|
import torch |
|
import re |
|
|
|
from typing import Any, Dict |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
from peft import PeftModel |
|
|
|
class EndpointHandler: |
|
""" |
|
def __init__(self, path=""): |
|
# load model and tokenizer from path |
|
self.tokenizer = AutoTokenizer.from_pretrained(path) |
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
path, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True |
|
) |
|
self.device = "cuda" if torch.cuda.is_available() else "cpu" |
|
""" |
|
|
|
def __init__(self, path="BarryL/suspicious-call-detect"): |
|
|
|
|
|
model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct" |
|
self.model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.bfloat16, |
|
device_map={"": "cuda:0"}, |
|
) |
|
|
|
|
|
self.model = PeftModel.from_pretrained( |
|
self.model, |
|
path, |
|
device_map={"": "cuda:0"}, |
|
trust_remote_code=True |
|
) |
|
|
|
|
|
try: |
|
self.tokenizer = AutoTokenizer.from_pretrained(path) |
|
except Exception as e: |
|
print(f"Failed to load tokenizer from {path}: {e}") |
|
|
|
|
|
self.device = "cuda" if torch.cuda.is_available() else "cpu" |
|
self.model.to(self.device) |
|
self.model.eval() |
|
|
|
def __call__(self, data: Dict[str, Any]) -> Dict[str, str]: |
|
|
|
inputs = data.pop("inputs", data) |
|
parameters = data.pop("parameters", None) |
|
|
|
prompt = f'''Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n''' |
|
prompt += f'### Instruction:\n' |
|
prompt += f'''Three telecommunications experts analyze the behavior of a phone number based on its call records to evaluate whether it is suspicious behavior. The three experts will discuss together and provide the most confident probability value (0%–100%) of "Yes". No explanation is required.\n\n''' |
|
prompt += f"### Question:\n{inputs}" |
|
prompt += f"### Response:\n" |
|
|
|
self.tokenizer.pad_token = self.tokenizer.eos_token |
|
generation_config = { |
|
'max_new_tokens':128, |
|
'top_p':None, |
|
'do_sample':False, |
|
'num_beams': 5, |
|
'temperature':None |
|
} |
|
|
|
self.model.generation_config.pad_token_id = self.tokenizer.pad_token_id |
|
|
|
llama3_prompt = [ |
|
{ |
|
"role": "system", |
|
"content": "Below is an instruction that describes a task. Write a response that appropriately completes the request." |
|
}, |
|
{ |
|
"role": "user", |
|
"content": "" |
|
} |
|
] |
|
|
|
|
|
|
|
|
|
llama3_prompt[1]['content'] = prompt |
|
input_ids = self.tokenizer.apply_chat_template(llama3_prompt, add_generation_prompt=True, add_special_tokens=True, return_tensors="pt").to(self.device) |
|
prompt_len = input_ids.shape[-1] |
|
outputs = self.model.generate(input_ids, **generation_config) |
|
generated_answer = self.tokenizer.decode(outputs[0, prompt_len:], skip_special_tokens=True) |
|
|
|
p = self.getProbability(generated_answer) |
|
if p > 50.0: |
|
generated_answer = 'Yes' |
|
else: |
|
generated_answer = 'No' |
|
|
|
prediction =[{"generated_text": generated_answer}] |
|
print("--prediction--",prediction ) |
|
return prediction |
|
|
|
def getProbability(text): |
|
match = re.search(r'(\d+(?:\.\d+)?)%', text) |
|
if match: |
|
number = float(match.group(1)) |
|
else: |
|
|
|
match = re.search(r'(\d+(?:\.\d+)?)', text) |
|
if match: |
|
number = float(match.group(1)) |
|
else: |
|
number = None |
|
|
|
if number is not None: |
|
print(number) |
|
return number |
|
else: |
|
|
|
match = re.search(r'\b(yes|no)\b', text, re.IGNORECASE) |
|
if match: |
|
|
|
print(match.group(0)) |
|
if match.group(0).lower() == 'yes': |
|
return 100.0 |
|
else: |
|
return 0.0 |
|
else: |
|
return 0.0 |