BarryL
/

suspicious-call-detect

Text2Text Generation

Inference Endpoints

Model card Files Files and versions Community

suspicious-call-detect / handler.py

BarryL's picture

Update handler.py

855508a verified about 1 month ago

history blame contribute delete

4.62 kB

	import torch
	import re

	from typing import Any, Dict
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel

	class EndpointHandler:
	"""
	def __init__(self, path=""):
	# load model and tokenizer from path
	self.tokenizer = AutoTokenizer.from_pretrained(path)
	self.model = AutoModelForCausalLM.from_pretrained(
	path, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True
	)
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	"""

	def __init__(self, path="BarryL/suspicious-call-detect"):


	model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
	self.model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.bfloat16,
	device_map={"": "cuda:0"},
	)


	self.model = PeftModel.from_pretrained(
	self.model,
	path,
	device_map={"": "cuda:0"},
	trust_remote_code=True
	)

	# 加载 tokenizer
	try:
	self.tokenizer = AutoTokenizer.from_pretrained(path)
	except Exception as e:
	print(f"Failed to load tokenizer from {path}: {e}")

	# 设置设备
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.model.to(self.device)
	self.model.eval()

	def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
	# process input
	inputs = data.pop("inputs", data)
	parameters = data.pop("parameters", None)

	prompt = f'''Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n'''
	prompt += f'### Instruction:\n'
	prompt += f'''Three telecommunications experts analyze the behavior of a phone number based on its call records to evaluate whether it is suspicious behavior. The three experts will discuss together and provide the most confident probability value (0%–100%) of "Yes". No explanation is required.\n\n'''
	prompt += f"### Question:\n{inputs}"
	prompt += f"### Response:\n"

	self.tokenizer.pad_token = self.tokenizer.eos_token
	generation_config = {
	'max_new_tokens':128,
	'top_p':None,
	'do_sample':False,
	'num_beams': 5,
	'temperature':None
	}

	self.model.generation_config.pad_token_id = self.tokenizer.pad_token_id

	llama3_prompt = [
	{
	"role": "system",
	"content": "Below is an instruction that describes a task. Write a response that appropriately completes the request."
	},
	{
	"role": "user",
	"content": ""
	}
	]

	# preprocess
	# inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)

	llama3_prompt[1]['content'] = prompt
	input_ids = self.tokenizer.apply_chat_template(llama3_prompt, add_generation_prompt=True, add_special_tokens=True, return_tensors="pt").to(self.device)
	prompt_len = input_ids.shape[-1]
	outputs = self.model.generate(input_ids, **generation_config)
	generated_answer = self.tokenizer.decode(outputs[0, prompt_len:], skip_special_tokens=True)

	p = self.getProbability(generated_answer)
	if p > 50.0:
	generated_answer = 'Yes'
	else:
	generated_answer = 'No'

	prediction =[{"generated_text": generated_answer}]
	print("--prediction--",prediction )
	return prediction

	def getProbability(text):
	match = re.search(r'(\d+(?:\.\d+)?)%', text)
	if match:
	number = float(match.group(1))
	else:
	# 如果沒有 '%'，提取第一個數字
	match = re.search(r'(\d+(?:\.\d+)?)', text)
	if match:
	number = float(match.group(1))
	else:
	number = None # 未找到數字

	if number is not None:
	print(number)
	return number
	else:
	# 如果沒有數字，找到第一個 'yes' 或 'no'，不區分大小寫
	match = re.search(r'\b(yes\|no)\b', text, re.IGNORECASE)
	if match:
	# 輸出匹配到的 'yes' 或 'no'，保持原始大小寫
	print(match.group(0))
	if match.group(0).lower() == 'yes':
	return 100.0
	else:
	return 0.0
	else:
	return 0.0