File size: 4,052 Bytes
751f584 ae9b00b 751f584 058a55a 751f584 ae9b00b 751f584 82485e4 ba0c76b 751f584 ae9b00b 0c6cd96 ae9b00b f8ce6af ae9b00b b96ade3 ae9b00b 1f67921 ae9b00b 0c6cd96 ae9b00b f8ce6af ae9b00b 0c6cd96 ae9b00b 4e411ef ae9b00b 058a55a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
---
base_model:
- google/gemma-2-27b
tags:
- text-generation-inference
- transformers
- unsloth
- gemma2
- trl
license: gemma
language:
- ja
---
# Uploaded model
- **Developed by:** hama-jp
- **License:** Gemma Terms of Use
- **Finetuned from model :** google/gemma-2-27b :: Improved using Qwen
This gemma2 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
# output.jsonlの生成方法
```python
%%capture
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
# Install Flash Attention 2 for softcapping support
import torch
if torch.cuda.get_device_capability()[0] >= 8:
!pip install --no-deps packaging ninja einops "flash-attn>=2.6.3"
```
```python
from unsloth import FastLanguageModel
import torch
import json
max_seq_length = 4096
dtype = None
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "hama-jp/gemma2-27b-sft-241213-lora-06",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
```
```python
#@title ELYZA-tasks-100-TVの読み込み
import json
# testファイルのパスを指定
file_path = 'elyza-tasks-100-TV_0.jsonl'
# データセットの辞書を初期化
dataset_test = {}
# JSONLファイルを読み込む
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
# 各行をJSON形式で読み取る
task_data = json.loads(line.strip())
# task_idとinputを取得
task_id = task_data.get("task_id")
input_data = task_data.get("input")
# task_idをキーにしてdataset_testに格納
if task_id is not None:
dataset_test[task_id] = {"input": input_data}
EOS_TOKEN = tokenizer.eos_token
# プロンプトテンプレート
alpaca_prompt = """### 指示
以下の入力に従って適切に処理してください。
### 入力:
{}
### 出力:
"""
# dataset_testに"text"キーを追加
for task_id, content in dataset_test.items():
input_text = content["input"]
prompt_text = alpaca_prompt.format(input_text) + EOS_TOKEN
dataset_test[task_id]["text"] = prompt_text
```
```python
from unsloth import FastLanguageModel
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
def extract_response(full_text):
"""
Extracts the response part after '### 出力:'.
Assumes the response starts after ':\n### 出力' and removes any trailing whitespace.
"""
response_marker = "\n### 出力:"
if response_marker in full_text:
return full_text.split(response_marker, 1)[-1].strip()
return full_text.strip()
with open("output.jsonl", "w", encoding="utf-8") as outfile:
for i in range(100):
# Get the input text
input_text = dataset_test[i]["text"]
# Tokenize and move input to GPU
inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
# Generate output
output = model.generate(
**inputs,
max_new_tokens=1024,
temperature=0.15,
repetition_penalty=1.05,
use_cache=True,
do_sample=True
)
# Decode output text
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
# Extract only the response part
response_only = extract_response(decoded_output)
# Print for debugging
print("task_id:",i)
print("input:",dataset_test[i]["input"])
print("output:",response_only)
print("---")
# Prepare a dictionary for JSONL
result = {
"task_id": i,
"input": dataset_test[i]["input"],
"output": response_only
}
# Save to JSONL
outfile.write(json.dumps(result, ensure_ascii=False) + "\n")
``` |