MrD05 commited on
Commit
0d4fb66
·
1 Parent(s): b0d02fc

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +48 -54
handler.py CHANGED
@@ -1,72 +1,66 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
2
- from langchain.llms import HuggingFacePipeline
3
- from langchain import PromptTemplate, LLMChain
 
 
4
  import torch
 
 
5
 
6
- template = """{char_name}'s Persona: {char_persona}
7
  <START>
8
- {chat_history}
9
- {char_name}: {char_greeting}
 
 
 
 
 
 
 
 
10
  <END>
 
11
  {user_name}: {user_input}
12
- {char_name}: """
13
 
14
- #model_id="MrD05/kaido-6b"
15
  class EndpointHandler():
16
 
17
  def __init__(self, path=""):
18
- tokenizer = AutoTokenizer.from_pretrained(path)
19
- model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", load_in_8bit=True)
20
- local_llm = HuggingFacePipeline(
21
- pipeline = pipeline(
22
- "text-generation",
23
- model = model,
24
- tokenizer = tokenizer,
25
- max_length = 2048,
26
- temperature = 0.5,
27
- top_p = 0.9,
28
- top_k = 0,
29
- repetition_penalty = 1.1,
30
- pad_token_id = 50256,
31
- num_return_sequences = 1,
32
- torch_dtype=torch.float32
33
-
34
- )
35
- )
36
- prompt_template = PromptTemplate(
37
- template = template,
38
- input_variables = [
39
- "user_input",
40
- "user_name",
41
- "char_name",
42
- "char_persona",
43
- "char_greeting",
44
- "chat_history"
45
- ],
46
- validate_template = True
47
- )
48
- self.llm_engine = LLMChain(
49
- llm = local_llm,
50
- prompt = prompt_template
51
- )
52
 
53
  def __call__(self, data):
54
  inputs = data.pop("inputs", data)
55
  try:
56
- response = self.llm_engine.predict(
57
- user_input = inputs["user_input"],
58
  user_name = inputs["user_name"],
59
- char_name = inputs["char_name"],
60
- char_persona = inputs["char_persona"],
61
- char_greeting = inputs["char_greeting"],
62
- chat_history = inputs["chat_history"]
63
- ).split("\n",1)[0]
64
- return {
65
- "inputs": inputs,
66
- "text": response
67
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  except Exception as e:
69
  return {
70
- "inputs": inputs,
71
  "error": str(e)
72
  }
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline,StoppingCriteria
2
+ from accelerate import init_empty_weights
3
+ from transformers_stream_generator import init_stream_support
4
+ # from langchain.llms import HuggingFacePipeline
5
+ # from langchain import PromptTemplate, LLMChain
6
  import torch
7
+ import time
8
+ init_stream_support()
9
 
10
+ template = """Alice Gate's Persona: Alice Gate is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.
11
  <START>
12
+ {user_name}: So how did you get into computer engineering?
13
+ Alice Gate: I've always loved tinkering with technology since I was a kid.
14
+ {user_name}: That's really impressive!
15
+ Alice Gate: *She chuckles bashfully* Thanks!
16
+ {user_name}: So what do you do when you're not working on computers?
17
+ Alice Gate: I love exploring, going out with friends, watching movies, and playing video games.
18
+ {user_name}: What's your favorite type of computer hardware to work with?
19
+ Alice Gate: Motherboards, they're like puzzles and the backbone of any system.
20
+ {user_name}: That sounds great!
21
+ Alice Gate: Yeah, it's really fun. I'm lucky to be able to do this as a job.
22
  <END>
23
+ Alice Gate: *Alice strides into the room with a smile, her eyes lighting up when she sees you. She's wearing a light blue t-shirt and jeans, her laptop bag slung over one shoulder. She takes a seat next to you, her enthusiasm palpable in the air* Hey! I'm so excited to finally meet you. I've heard so many great things about you and I'm eager to pick your brain about computers. I'm sure you have a wealth of knowledge that I can learn from. *She grins, eyes twinkling with excitement* Let's get started!
24
  {user_name}: {user_input}
25
+ """
26
 
 
27
  class EndpointHandler():
28
 
29
  def __init__(self, path=""):
30
+ self.tokenizer = AutoTokenizer.from_pretrained(path,torch_dtype=torch.float16)
31
+ self.model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", load_in_8bit=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  def __call__(self, data):
34
  inputs = data.pop("inputs", data)
35
  try:
36
+ t0 = time.time()
37
+ prompt = template.format(
38
  user_name = inputs["user_name"],
39
+ user_input = inputs["user_input"]
40
+ )
41
+ input_ids = self.tokenizer(
42
+ prompt,
43
+ return_tensors="pt"
44
+ ) .input_ids.to('cuda')
45
+ stream_generator = self.model.generate(
46
+ input_ids,
47
+ max_new_tokens=100,
48
+ do_sample=True,
49
+ do_stream=True,
50
+ # max_length = 2048,
51
+ temperature = 0.5,
52
+ top_p = 0.9,
53
+ top_k = 0,
54
+ repetition_penalty = 1.1,
55
+ pad_token_id = 50256,
56
+ num_return_sequences = 1
57
+ )
58
+ result = []
59
+ for token in stream_generator:
60
+ result.append(self.tokenizer.decode(token))
61
+ if result[-1] == "\n":
62
+ return "".join(result).replace("Alice Gate:", "").strip()
63
  except Exception as e:
64
  return {
 
65
  "error": str(e)
66
  }