cyqm
commited on
Commit
·
6957707
1
Parent(s):
a3651a1
Update debug handler
Browse files- handler.py +2 -3
handler.py
CHANGED
@@ -42,7 +42,6 @@ class EndpointHandler:
|
|
42 |
tokenized_prompt = self.tokenizer.apply_chat_template(
|
43 |
messages,
|
44 |
add_generation_prompt=True,
|
45 |
-
return_dict=True,
|
46 |
return_tensors="pt"
|
47 |
).to("cuda")
|
48 |
|
@@ -52,7 +51,7 @@ class EndpointHandler:
|
|
52 |
|
53 |
time_start = time.time()
|
54 |
out = self.model.generate(
|
55 |
-
|
56 |
max_new_tokens=max_new_tokens,
|
57 |
temperature=1.0,
|
58 |
do_sample=True,
|
@@ -63,7 +62,7 @@ class EndpointHandler:
|
|
63 |
|
64 |
print("Debug: `out`:", len(out[0]), self.tokenizer.decode(out[0]))
|
65 |
|
66 |
-
response = self.tokenizer.decode(out[0])
|
67 |
|
68 |
num_new_tokens = len(out[0]) - len(tokenized_prompt[0])
|
69 |
|
|
|
42 |
tokenized_prompt = self.tokenizer.apply_chat_template(
|
43 |
messages,
|
44 |
add_generation_prompt=True,
|
|
|
45 |
return_tensors="pt"
|
46 |
).to("cuda")
|
47 |
|
|
|
51 |
|
52 |
time_start = time.time()
|
53 |
out = self.model.generate(
|
54 |
+
tokenized_prompt,
|
55 |
max_new_tokens=max_new_tokens,
|
56 |
temperature=1.0,
|
57 |
do_sample=True,
|
|
|
62 |
|
63 |
print("Debug: `out`:", len(out[0]), self.tokenizer.decode(out[0]))
|
64 |
|
65 |
+
response = self.tokenizer.decode(out[0][len(tokenized_prompt[0]):])
|
66 |
|
67 |
num_new_tokens = len(out[0]) - len(tokenized_prompt[0])
|
68 |
|