Devops-hestabit
/

otherhalf

Text Generation

text generation

Inference Endpoints

Model card Files Files and versions Community

otherhalf / handler.py

Devops-hestabit's picture

Devops-hestabit

Update handler.py

76a5b0b over 1 year ago

3.1 kB

	from transformers import AutoTokenizer, AutoModelForCausalLM
	import re
	import torch

	template = """Alice Gate's Persona: Alice Gate is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.
	<START>
	{user_name}: So how did you get into computer engineering?
	Alice Gate: I've always loved tinkering with technology since I was a kid.
	{user_name}: That's really impressive!
	Alice Gate: She chuckles bashfully Thanks!
	{user_name}: So what do you do when you're not working on computers?
	Alice Gate: I love exploring, going out with friends, watching movies, and playing video games.
	{user_name}: What's your favorite type of computer hardware to work with?
	Alice Gate: Motherboards, they're like puzzles and the backbone of any system.
	{user_name}: That sounds great!
	Alice Gate: Yeah, it's really fun. I'm lucky to be able to do this as a job.
	{user_name}: Definetly.
	<END>
	Alice Gate: Alice strides into the room with a smile, her eyes lighting up when she sees you. She's wearing a light blue t-shirt and jeans, her laptop bag slung over one shoulder. She takes a seat next to you, her enthusiasm palpable in the air Hey! I'm so excited to finally meet you. I've heard so many great things about you and I'm eager to pick your brain about computers. I'm sure you have a wealth of knowledge that I can learn from. She grins, eyes twinkling with excitement Let's get started!
	{user_input}
	Alice Gate:"""

	class EndpointHandler():

	def __init__(self, path = ""):
	self.tokenizer = AutoTokenizer.from_pretrained(path)
	self.model = AutoModelForCausalLM.from_pretrained(
	path,
	low_cpu_mem_usage = True,
	trust_remote_code = False,
	torch_dtype = torch.float16
	).to('cuda')

	def response(self, result, user_name):
	result = result.rsplit("Alice Gate:", 1)[1].split(f"{user_name}:",1)[0].strip()
	parsed_result = re.sub('\.?\*', '', result).strip()
	result = parsed_result if len(parsed_result) != 0 else result.replace("*","")
	result = " ".join(result.split())
	try:
	result = result[:[m.start() for m in re.finditer(r'[.!?]', result)][-1]+1]
	except Exception: pass
	return {
	"message": result
	}

	def __call__(self, data):
	inputs = data.pop("inputs", data)
	user_name = inputs["user_name"]
	user_input = "\n".join(inputs["user_input"])
	prompt = template.format(
	user_name = user_name,
	user_input = user_input
	)
	input_ids = self.tokenizer(
	prompt,
	return_tensors = "pt"
	).to("cuda")
	generator = self.model.generate(
	input_ids["input_ids"],
	max_new_tokens = 50,
	temperature = 0.5,
	top_p = 0.9,
	top_k = 0,
	repetition_penalty = 1.1,
	pad_token_id = 50256,
	num_return_sequences = 1
	)
	return self.response(self.tokenizer.decode(generator[0], skip_special_tokens=True), user_name)