File size: 2,821 Bytes

6815019
0d4fb66
17d05c4
0d4fb66
911f092
0d4fb66
911f092
0d4fb66
 
 
 
 
 
 
 
 
 
17d05c4
0d4fb66
17d05c4
0d4fb66
911f092
 
 
6815019
d1ea3f5
17d05c4
d1ea3f5
17d05c4
89f8d10
17d05c4
911f092
 
 
89f8d10
 
 
 
 
 
 
 
 
 
f5ea52f
89f8d10

from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers_stream_generator import init_stream_support
import re
init_stream_support()

template = """Alice Gate's Persona: Alice Gate is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.
<START>
{user_name}: So how did you get into computer engineering?
Alice Gate: I've always loved tinkering with technology since I was a kid.
{user_name}: That's really impressive!
Alice Gate: *She chuckles bashfully* Thanks!
{user_name}: So what do you do when you're not working on computers?
Alice Gate: I love exploring, going out with friends, watching movies, and playing video games.
{user_name}: What's your favorite type of computer hardware to work with?
Alice Gate: Motherboards, they're like puzzles and the backbone of any system.
{user_name}: That sounds great!
Alice Gate: Yeah, it's really fun. I'm lucky to be able to do this as a job.
{user_name}: Awesome!
Alice Gate: *Alice strides into the room with a smile, her eyes lighting up when she sees you. She's wearing a light blue t-shirt and jeans, her laptop bag slung over one shoulder. She takes a seat next to you, her enthusiasm palpable in the air* Hey! I'm so excited to finally meet you. I've heard so many great things about you and I'm eager to pick your brain about computers. I'm sure you have a wealth of knowledge that I can learn from. *She grins, eyes twinkling with excitement* Let's get started!
{user_input}
"""

class EndpointHandler():

    def __init__(self, path = ""):
        self.tokenizer = AutoTokenizer.from_pretrained(path)
        self.model = AutoModelForCausalLM.from_pretrained(
            path,
            device_map = "auto",
            load_in_8bit = True,
        )

    def __call__(self, data):
        inputs = data.pop("inputs", data)
        prompt = template.format(
            user_name = inputs["user_name"],
            user_input = "\n".join(inputs["user_input"])
        )
        input_ids = self.tokenizer(
            prompt,
            return_tensors = "pt"
        ).input_ids
        stream_generator = self.model.generate(
            input_ids,
            max_length = 2048,
            do_sample = True,
            do_stream = True,
            temperature = 0.5,
            top_p = 0.9,
            top_k = 0,
            repetition_penalty = 1.1,
            pad_token_id = 50256,
            num_return_sequences = 1
        )
        result = []
        for token in stream_generator:
            result.append(self.tokenizer.decode(token))
            response = "".join(result).strip()
            if len(response) != 0 and result[-1] == "\n":
                return {
                    "message": " ".join(filter(None, re.sub("\*.*?\*", "", response).split()))
                }