import torch import re from typing import Any, Dict from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel class EndpointHandler: """ def __init__(self, path=""): # load model and tokenizer from path self.tokenizer = AutoTokenizer.from_pretrained(path) self.model = AutoModelForCausalLM.from_pretrained( path, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True ) self.device = "cuda" if torch.cuda.is_available() else "cpu" """ def __init__(self, path="BarryL/suspicious-call-detect"): model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct" self.model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.bfloat16, device_map={"": "cuda:0"}, ) self.model = PeftModel.from_pretrained( self.model, path, device_map={"": "cuda:0"}, trust_remote_code=True ) # 加载 tokenizer try: self.tokenizer = AutoTokenizer.from_pretrained(path) except Exception as e: print(f"Failed to load tokenizer from {path}: {e}") # 设置设备 self.device = "cuda" if torch.cuda.is_available() else "cpu" self.model.to(self.device) self.model.eval() def __call__(self, data: Dict[str, Any]) -> Dict[str, str]: # process input inputs = data.pop("inputs", data) parameters = data.pop("parameters", None) prompt = f'''Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n''' prompt += f'### Instruction:\n' prompt += f'''Three telecommunications experts analyze the behavior of a phone number based on its call records to evaluate whether it is suspicious behavior. The three experts will discuss together and provide the most confident probability value (0%–100%) of "Yes". No explanation is required.\n\n''' prompt += f"### Question:\n{inputs}" prompt += f"### Response:\n" self.tokenizer.pad_token = self.tokenizer.eos_token generation_config = { 'max_new_tokens':128, 'top_p':None, 'do_sample':False, 'num_beams': 5, 'temperature':None } self.model.generation_config.pad_token_id = self.tokenizer.pad_token_id llama3_prompt = [ { "role": "system", "content": "Below is an instruction that describes a task. Write a response that appropriately completes the request." }, { "role": "user", "content": "" } ] # preprocess # inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) llama3_prompt[1]['content'] = prompt input_ids = self.tokenizer.apply_chat_template(llama3_prompt, add_generation_prompt=True, add_special_tokens=True, return_tensors="pt").to(self.device) prompt_len = input_ids.shape[-1] outputs = self.model.generate(input_ids, **generation_config) generated_answer = self.tokenizer.decode(outputs[0, prompt_len:], skip_special_tokens=True) p = self.getProbability(generated_answer) if p > 50.0: generated_answer = 'Yes' else: generated_answer = 'No' prediction =[{"generated_text": generated_answer}] print("--prediction--",prediction ) return prediction def getProbability(text): match = re.search(r'(\d+(?:\.\d+)?)%', text) if match: number = float(match.group(1)) else: # 如果沒有 '%',提取第一個數字 match = re.search(r'(\d+(?:\.\d+)?)', text) if match: number = float(match.group(1)) else: number = None # 未找到數字 if number is not None: print(number) return number else: # 如果沒有數字,找到第一個 'yes' 或 'no',不區分大小寫 match = re.search(r'\b(yes|no)\b', text, re.IGNORECASE) if match: # 輸出匹配到的 'yes' 或 'no',保持原始大小寫 print(match.group(0)) if match.group(0).lower() == 'yes': return 100.0 else: return 0.0 else: return 0.0