Spaces:
Sleeping
Sleeping
Hjgugugjhuhjggg
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,6 @@ from transformers import (
|
|
9 |
AutoModelForCausalLM,
|
10 |
AutoTokenizer,
|
11 |
GenerationConfig,
|
12 |
-
StoppingCriteriaList
|
13 |
)
|
14 |
import boto3
|
15 |
import uvicorn
|
@@ -49,7 +48,6 @@ class GenerateRequest(BaseModel):
|
|
49 |
repetition_penalty: float = 1.0
|
50 |
num_return_sequences: int = 1
|
51 |
do_sample: bool = True
|
52 |
-
chunk_delay: float = 0.0
|
53 |
stop_sequences: list[str] = []
|
54 |
|
55 |
@field_validator("model_name")
|
@@ -139,7 +137,6 @@ async def generate(request: GenerateRequest):
|
|
139 |
repetition_penalty = request.repetition_penalty
|
140 |
num_return_sequences = request.num_return_sequences
|
141 |
do_sample = request.do_sample
|
142 |
-
chunk_delay = request.chunk_delay
|
143 |
stop_sequences = request.stop_sequences
|
144 |
|
145 |
model, tokenizer = await model_loader.\
|
@@ -161,7 +158,7 @@ async def generate(request: GenerateRequest):
|
|
161 |
return StreamingResponse(
|
162 |
stream_text(model, tokenizer, input_text,
|
163 |
generation_config, stop_sequences,
|
164 |
-
device
|
165 |
media_type="text/plain"
|
166 |
)
|
167 |
else:
|
@@ -175,7 +172,7 @@ async def generate(request: GenerateRequest):
|
|
175 |
|
176 |
async def stream_text(model, tokenizer, input_text,
|
177 |
generation_config, stop_sequences,
|
178 |
-
device,
|
179 |
encoded_input = tokenizer(
|
180 |
input_text, return_tensors="pt",
|
181 |
truncation=True, max_length=max_length
|
@@ -226,30 +223,22 @@ async def stream_text(model, tokenizer, input_text,
|
|
226 |
if stop_index != -1:
|
227 |
final_output = output_text[:stop_index]
|
228 |
|
229 |
-
|
230 |
-
|
231 |
|
232 |
-
for chunk in chunked_output:
|
233 |
-
yield json.dumps({"text": chunk, "is_end": False}) + "\n"
|
234 |
-
await asyncio.sleep(chunk_delay)
|
235 |
|
236 |
yield json.dumps({"text": "", "is_end": True}) + "\n"
|
237 |
break
|
238 |
|
239 |
else:
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
yield json.dumps({"text": chunk, "is_end": False}) + "\n"
|
244 |
-
await asyncio.sleep(chunk_delay)
|
245 |
|
246 |
if len(output_text) >= generation_config.max_new_tokens:
|
247 |
-
|
248 |
-
|
249 |
|
250 |
-
for chunk in chunked_output:
|
251 |
-
yield json.dumps({"text": chunk, "is_end": False}) + "\n"
|
252 |
-
await asyncio.sleep(chunk_delay)
|
253 |
yield json.dumps({"text": "", "is_end": True}) + "\n"
|
254 |
break
|
255 |
|
|
|
9 |
AutoModelForCausalLM,
|
10 |
AutoTokenizer,
|
11 |
GenerationConfig,
|
|
|
12 |
)
|
13 |
import boto3
|
14 |
import uvicorn
|
|
|
48 |
repetition_penalty: float = 1.0
|
49 |
num_return_sequences: int = 1
|
50 |
do_sample: bool = True
|
|
|
51 |
stop_sequences: list[str] = []
|
52 |
|
53 |
@field_validator("model_name")
|
|
|
137 |
repetition_penalty = request.repetition_penalty
|
138 |
num_return_sequences = request.num_return_sequences
|
139 |
do_sample = request.do_sample
|
|
|
140 |
stop_sequences = request.stop_sequences
|
141 |
|
142 |
model, tokenizer = await model_loader.\
|
|
|
158 |
return StreamingResponse(
|
159 |
stream_text(model, tokenizer, input_text,
|
160 |
generation_config, stop_sequences,
|
161 |
+
device),
|
162 |
media_type="text/plain"
|
163 |
)
|
164 |
else:
|
|
|
172 |
|
173 |
async def stream_text(model, tokenizer, input_text,
|
174 |
generation_config, stop_sequences,
|
175 |
+
device, max_length=2048):
|
176 |
encoded_input = tokenizer(
|
177 |
input_text, return_tensors="pt",
|
178 |
truncation=True, max_length=max_length
|
|
|
223 |
if stop_index != -1:
|
224 |
final_output = output_text[:stop_index]
|
225 |
|
226 |
+
for char in final_output:
|
227 |
+
yield json.dumps({"text": char, "is_end": False}) + "\n"
|
228 |
|
|
|
|
|
|
|
229 |
|
230 |
yield json.dumps({"text": "", "is_end": True}) + "\n"
|
231 |
break
|
232 |
|
233 |
else:
|
234 |
+
|
235 |
+
for char in new_text:
|
236 |
+
yield json.dumps({"text": char, "is_end": False}) + "\n"
|
|
|
|
|
237 |
|
238 |
if len(output_text) >= generation_config.max_new_tokens:
|
239 |
+
for char in output_text:
|
240 |
+
yield json.dumps({"text": char, "is_end": False}) + "\n"
|
241 |
|
|
|
|
|
|
|
242 |
yield json.dumps({"text": "", "is_end": True}) + "\n"
|
243 |
break
|
244 |
|