BarryL's picture
Update handler.py
855508a verified
import torch
import re
from typing import Any, Dict
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
class EndpointHandler:
"""
def __init__(self, path=""):
# load model and tokenizer from path
self.tokenizer = AutoTokenizer.from_pretrained(path)
self.model = AutoModelForCausalLM.from_pretrained(
path, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True
)
self.device = "cuda" if torch.cuda.is_available() else "cpu"
"""
def __init__(self, path="BarryL/suspicious-call-detect"):
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map={"": "cuda:0"},
)
self.model = PeftModel.from_pretrained(
self.model,
path,
device_map={"": "cuda:0"},
trust_remote_code=True
)
# 加载 tokenizer
try:
self.tokenizer = AutoTokenizer.from_pretrained(path)
except Exception as e:
print(f"Failed to load tokenizer from {path}: {e}")
# 设置设备
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.model.to(self.device)
self.model.eval()
def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
# process input
inputs = data.pop("inputs", data)
parameters = data.pop("parameters", None)
prompt = f'''Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n'''
prompt += f'### Instruction:\n'
prompt += f'''Three telecommunications experts analyze the behavior of a phone number based on its call records to evaluate whether it is suspicious behavior. The three experts will discuss together and provide the most confident probability value (0%–100%) of "Yes". No explanation is required.\n\n'''
prompt += f"### Question:\n{inputs}"
prompt += f"### Response:\n"
self.tokenizer.pad_token = self.tokenizer.eos_token
generation_config = {
'max_new_tokens':128,
'top_p':None,
'do_sample':False,
'num_beams': 5,
'temperature':None
}
self.model.generation_config.pad_token_id = self.tokenizer.pad_token_id
llama3_prompt = [
{
"role": "system",
"content": "Below is an instruction that describes a task. Write a response that appropriately completes the request."
},
{
"role": "user",
"content": ""
}
]
# preprocess
# inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
llama3_prompt[1]['content'] = prompt
input_ids = self.tokenizer.apply_chat_template(llama3_prompt, add_generation_prompt=True, add_special_tokens=True, return_tensors="pt").to(self.device)
prompt_len = input_ids.shape[-1]
outputs = self.model.generate(input_ids, **generation_config)
generated_answer = self.tokenizer.decode(outputs[0, prompt_len:], skip_special_tokens=True)
p = self.getProbability(generated_answer)
if p > 50.0:
generated_answer = 'Yes'
else:
generated_answer = 'No'
prediction =[{"generated_text": generated_answer}]
print("--prediction--",prediction )
return prediction
def getProbability(text):
match = re.search(r'(\d+(?:\.\d+)?)%', text)
if match:
number = float(match.group(1))
else:
# 如果沒有 '%',提取第一個數字
match = re.search(r'(\d+(?:\.\d+)?)', text)
if match:
number = float(match.group(1))
else:
number = None # 未找到數字
if number is not None:
print(number)
return number
else:
# 如果沒有數字,找到第一個 'yes' 或 'no',不區分大小寫
match = re.search(r'\b(yes|no)\b', text, re.IGNORECASE)
if match:
# 輸出匹配到的 'yes' 或 'no',保持原始大小寫
print(match.group(0))
if match.group(0).lower() == 'yes':
return 100.0
else:
return 0.0
else:
return 0.0