|
--- |
|
base_model: llm-jp/llm-jp-3-13b-instruct2 |
|
tags: |
|
- text-generation-inference |
|
- transformers |
|
- unsloth |
|
- llama |
|
- trl |
|
license: apache-2.0 |
|
language: |
|
- en |
|
--- |
|
|
|
# morizon/llm-jp-3-13b-instruct2-grpo-0215_lora |
|
このモデルは日本語テキスト生成タスク向けに最適化されたLoRAアダプタ付きのモデルです。 |
|
|
|
- **Developed by:** morizon |
|
- **License:** apache-2.0 |
|
- **Finetuned from model :** llm-jp/llm-jp-3-13b-instruct2 |
|
|
|
This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library. |
|
|
|
[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth) |
|
|
|
## Sample Use |
|
|
|
|
|
```python |
|
|
|
%%capture |
|
# Skip restarting message in Colab |
|
import sys; modules = list(sys.modules.keys()) |
|
for x in modules: sys.modules.pop(x) if "PIL" in x or "google" in x else None |
|
|
|
!pip install unsloth vllm |
|
!pip install --upgrade pillow |
|
# If you are running this notebook on local, you need to install `diffusers` too |
|
# !pip install diffusers |
|
# Temporarily install a specific TRL nightly version |
|
!pip install git+https://github.com/huggingface/trl.git@e95f9fb74a3c3647b86f251b7e230ec51c64b72b |
|
``` |
|
|
|
```python |
|
from unsloth import FastLanguageModel, PatchFastRL |
|
PatchFastRL("GRPO", FastLanguageModel) |
|
|
|
import re |
|
import torch |
|
from datasets import load_dataset, Dataset |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
from peft import LoraConfig |
|
from trl import GRPOConfig, GRPOTrainer |
|
from unsloth import is_bfloat16_supported |
|
``` |
|
|
|
```python |
|
model_id="llm-jp/llm-jp-3-13b-instruct2" |
|
adpter_id="morizon/llm-jp-3-13b-instruct2-grpo-MATH-lighteval_step1000_lora" |
|
|
|
# --- モデルの読み込みと LoRA 適用 --- |
|
max_seq_length = 1024 # 推論トレースの最大長 |
|
lora_rank = 64 # LoRA のランク(推奨値:64) |
|
|
|
# FastLanguageModel 経由でモデルとトークナイザーを読み込み |
|
# ※ モデル名は使用するものに合わせてください |
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
model_name=model_id, |
|
max_seq_length=max_seq_length, |
|
load_in_4bit=True, # 4bit量子化(LoRAファインチューニング時は設定に注意) |
|
fast_inference=True, # vLLM 高速推論を有効化 |
|
max_lora_rank=lora_rank, |
|
gpu_memory_utilization=0.7, |
|
) |
|
|
|
# LoRA (PEFT) を適用 |
|
model = FastLanguageModel.get_peft_model( |
|
model, |
|
r=lora_rank, |
|
target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], |
|
lora_alpha=lora_rank, |
|
use_gradient_checkpointing="unsloth", |
|
random_state=3407, |
|
) |
|
``` |
|
|
|
```python |
|
# --- プロンプトとデータセットの準備 --- |
|
# 推奨:システムプロンプトを排除し、ユーザープロンプトに全指示を統合 |
|
USER_INSTRUCTION = ( |
|
"Please ensure your response begins with \"<reasoning>\n\". " |
|
"Please reason step by step, and put your final answer within \\boxed{}. " |
|
) |
|
|
|
# テストデータの例(リスト形式) |
|
test_data = [ |
|
{"id": 0, "text": "$x^{-1}>x$を満たす正の整数$x$の個数を求めなさい。", "gold": "0", "response": "", "type": "Algebra", "level": "Level 2"}, |
|
##評価したいテストデータを入力してください |
|
] |
|
|
|
def extract_boxed_answer_rev(text: str) -> str: |
|
""" |
|
テキスト中から最初の \boxed{...} の中身(ネストを考慮)を抽出する。 |
|
例: r"\boxed{\frac{\pi}{6}}" -> "\frac{\pi}{6}" |
|
""" |
|
key = r"\boxed{" |
|
start_idx = text.find(key) |
|
if start_idx == -1: |
|
return "" |
|
# \boxed{ の直後の位置を開始位置とする |
|
start_idx += len(key) |
|
brace_count = 1 # 最初の { を既にカウント |
|
i = start_idx |
|
while i < len(text) and brace_count > 0: |
|
if text[i] == "{": |
|
brace_count += 1 |
|
elif text[i] == "}": |
|
brace_count -= 1 |
|
i += 1 |
|
# i-1 が閉じ括弧に対応する位置 |
|
return text[start_idx:i-1].strip() |
|
|
|
from vllm import SamplingParams |
|
|
|
correct = 0 |
|
total = len(test_data) |
|
|
|
# 正解ケースと誤答ケースを記録するリスト |
|
correct_cases = [] |
|
incorrect_cases = [] |
|
|
|
for item in test_data: |
|
# プロンプト生成(USER_INSTRUCTION を先頭に追加) |
|
prompt = USER_INSTRUCTION + item["text"] |
|
text = tokenizer.apply_chat_template([ |
|
{"role": "user", "content": prompt}, |
|
], tokenize=False, add_generation_prompt=True) |
|
|
|
# 推論実行 |
|
sampling_params = SamplingParams( |
|
temperature=0.6, |
|
max_tokens=2048, |
|
) |
|
output = model.fast_generate( |
|
text, |
|
sampling_params=sampling_params, |
|
lora_request = model.load_lora(adpter_id), |
|
# lora_request = model.load_lora("grpo_saved_lora"), |
|
)[0].outputs[0].text |
|
|
|
# \boxed{...} の中身を抽出する関数で回答を取得 |
|
boxed_answer = extract_boxed_answer_rev(output) |
|
|
|
# 結果の表示用 |
|
print("\n----------Test ID:", item["id"], "----------") |
|
print("Prompt:") |
|
print(prompt) |
|
print("\nLLM Output:") |
|
print(output) |
|
print("\nExtracted Answer:") |
|
print(boxed_answer) |
|
print("Gold Answer:", item["gold"]) |
|
|
|
# 抽出回答と gold の一致で正解判定 |
|
if boxed_answer == item["gold"]: |
|
correct += 1 |
|
correct_cases.append({ |
|
"id": item["id"], |
|
"prompt": prompt, |
|
"LLM_output": output, |
|
"extracted_answer": boxed_answer, |
|
"gold": item["gold"] |
|
}) |
|
else: |
|
incorrect_cases.append({ |
|
"id": item["id"], |
|
"prompt": prompt, |
|
"LLM_output": output, |
|
"extracted_answer": boxed_answer, |
|
"gold": item["gold"] |
|
}) |
|
|
|
# 正解ケースの表示 |
|
print("\n========== 正解ケース ==========") |
|
for case in correct_cases: |
|
print("\nTest ID:", case["id"]) |
|
print("Prompt:") |
|
print(case["prompt"]) |
|
print("LLM Output:") |
|
print(case["LLM_output"]) |
|
print("Extracted Answer:", case["extracted_answer"]) |
|
print("Gold Answer:", case["gold"]) |
|
print("-" * 40) |
|
|
|
# 誤答ケースの表示 |
|
print("\n========== 誤答ケース ==========") |
|
for case in incorrect_cases: |
|
print("\nTest ID:", case["id"]) |
|
print("Prompt:") |
|
print(case["prompt"]) |
|
print("LLM Output:") |
|
print(case["LLM_output"]) |
|
print("Extracted Answer:", case["extracted_answer"]) |
|
print("Gold Answer:", case["gold"]) |
|
print("-" * 40) |
|
|
|
accuracy = correct / total * 100 |
|
print("\nOverall Accuracy: {}/{} ({:.2f}%)".format(correct, total, accuracy)) |
|
|
|
``` |
|
|