Hjgugugjhuhjggg commited on
Commit
fbbc32a
·
verified ·
1 Parent(s): 564b6ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -20
app.py CHANGED
@@ -9,7 +9,6 @@ from transformers import (
9
  AutoModelForCausalLM,
10
  AutoTokenizer,
11
  GenerationConfig,
12
- StoppingCriteriaList
13
  )
14
  import boto3
15
  import uvicorn
@@ -49,7 +48,6 @@ class GenerateRequest(BaseModel):
49
  repetition_penalty: float = 1.0
50
  num_return_sequences: int = 1
51
  do_sample: bool = True
52
- chunk_delay: float = 0.0
53
  stop_sequences: list[str] = []
54
 
55
  @field_validator("model_name")
@@ -139,7 +137,6 @@ async def generate(request: GenerateRequest):
139
  repetition_penalty = request.repetition_penalty
140
  num_return_sequences = request.num_return_sequences
141
  do_sample = request.do_sample
142
- chunk_delay = request.chunk_delay
143
  stop_sequences = request.stop_sequences
144
 
145
  model, tokenizer = await model_loader.\
@@ -161,7 +158,7 @@ async def generate(request: GenerateRequest):
161
  return StreamingResponse(
162
  stream_text(model, tokenizer, input_text,
163
  generation_config, stop_sequences,
164
- device, chunk_delay),
165
  media_type="text/plain"
166
  )
167
  else:
@@ -175,7 +172,7 @@ async def generate(request: GenerateRequest):
175
 
176
  async def stream_text(model, tokenizer, input_text,
177
  generation_config, stop_sequences,
178
- device, chunk_delay, max_length=2048):
179
  encoded_input = tokenizer(
180
  input_text, return_tensors="pt",
181
  truncation=True, max_length=max_length
@@ -226,30 +223,22 @@ async def stream_text(model, tokenizer, input_text,
226
  if stop_index != -1:
227
  final_output = output_text[:stop_index]
228
 
229
- chunked_output = [final_output[i:i+10]
230
- for i in range(0, len(final_output), 10)]
231
 
232
- for chunk in chunked_output:
233
- yield json.dumps({"text": chunk, "is_end": False}) + "\n"
234
- await asyncio.sleep(chunk_delay)
235
 
236
  yield json.dumps({"text": "", "is_end": True}) + "\n"
237
  break
238
 
239
  else:
240
- chunked_output = [new_text[i:i+10]
241
- for i in range(0, len(new_text), 10)]
242
- for chunk in chunked_output:
243
- yield json.dumps({"text": chunk, "is_end": False}) + "\n"
244
- await asyncio.sleep(chunk_delay)
245
 
246
  if len(output_text) >= generation_config.max_new_tokens:
247
- chunked_output = [output_text[i:i+10]
248
- for i in range(0, len(output_text), 10)]
249
 
250
- for chunk in chunked_output:
251
- yield json.dumps({"text": chunk, "is_end": False}) + "\n"
252
- await asyncio.sleep(chunk_delay)
253
  yield json.dumps({"text": "", "is_end": True}) + "\n"
254
  break
255
 
 
9
  AutoModelForCausalLM,
10
  AutoTokenizer,
11
  GenerationConfig,
 
12
  )
13
  import boto3
14
  import uvicorn
 
48
  repetition_penalty: float = 1.0
49
  num_return_sequences: int = 1
50
  do_sample: bool = True
 
51
  stop_sequences: list[str] = []
52
 
53
  @field_validator("model_name")
 
137
  repetition_penalty = request.repetition_penalty
138
  num_return_sequences = request.num_return_sequences
139
  do_sample = request.do_sample
 
140
  stop_sequences = request.stop_sequences
141
 
142
  model, tokenizer = await model_loader.\
 
158
  return StreamingResponse(
159
  stream_text(model, tokenizer, input_text,
160
  generation_config, stop_sequences,
161
+ device),
162
  media_type="text/plain"
163
  )
164
  else:
 
172
 
173
  async def stream_text(model, tokenizer, input_text,
174
  generation_config, stop_sequences,
175
+ device, max_length=2048):
176
  encoded_input = tokenizer(
177
  input_text, return_tensors="pt",
178
  truncation=True, max_length=max_length
 
223
  if stop_index != -1:
224
  final_output = output_text[:stop_index]
225
 
226
+ for char in final_output:
227
+ yield json.dumps({"text": char, "is_end": False}) + "\n"
228
 
 
 
 
229
 
230
  yield json.dumps({"text": "", "is_end": True}) + "\n"
231
  break
232
 
233
  else:
234
+
235
+ for char in new_text:
236
+ yield json.dumps({"text": char, "is_end": False}) + "\n"
 
 
237
 
238
  if len(output_text) >= generation_config.max_new_tokens:
239
+ for char in output_text:
240
+ yield json.dumps({"text": char, "is_end": False}) + "\n"
241
 
 
 
 
242
  yield json.dumps({"text": "", "is_end": True}) + "\n"
243
  break
244