Hjgugugjhuhjggg commited on
Commit
b7a38a6
·
verified ·
1 Parent(s): c8741b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -57
app.py CHANGED
@@ -42,10 +42,10 @@ class GenerateRequest(BaseModel):
42
  input_text: str = ""
43
  task_type: str
44
  temperature: float = 1.0
45
- max_new_tokens: int = 10
46
  stream: bool = True
47
  top_p: float = 1.0
48
- top_k: int = 50
49
  repetition_penalty: float = 1.0
50
  num_return_sequences: int = 1
51
  do_sample: bool = True
@@ -65,6 +65,8 @@ class GenerateRequest(BaseModel):
65
  raise ValueError(f"task_type must be one of: {valid_types}")
66
  return v
67
 
 
 
68
  class S3ModelLoader:
69
  def __init__(self, bucket_name, s3_client):
70
  self.bucket_name = bucket_name
@@ -75,8 +77,11 @@ class S3ModelLoader:
75
  f"{model_name.replace('/', '-')}"
76
 
77
  async def load_model_and_tokenizer(self, model_name):
78
- s3_uri = self._get_s3_uri(model_name)
79
- try:
 
 
 
80
  config = AutoConfig.from_pretrained(
81
  s3_uri, local_files_only=False
82
  )
@@ -93,9 +98,9 @@ class S3ModelLoader:
93
  tokenizer.pad_token_id is None:
94
  tokenizer.pad_token_id = config.pad_token_id \
95
  or tokenizer.eos_token_id
96
-
97
  return model, tokenizer
98
- except (EnvironmentError, NoCredentialsError):
99
  try:
100
  config = AutoConfig.from_pretrained(
101
  model_name, token=HUGGINGFACE_HUB_TOKEN
@@ -130,7 +135,7 @@ class S3ModelLoader:
130
  tokenizer = AutoTokenizer.from_pretrained(
131
  s3_uri, config=config, local_files_only=False
132
  )
133
-
134
  return model, tokenizer
135
  except Exception as e:
136
  raise HTTPException(
@@ -188,11 +193,10 @@ async def generate(request: GenerateRequest):
188
  async def stream_text(model, tokenizer, input_text,
189
  generation_config, stop_sequences,
190
  device):
191
- max_length=10 #Define the max length to cut the text and generate another response
192
-
193
  encoded_input = tokenizer(
194
  input_text, return_tensors="pt",
195
- truncation=True, max_length=max_length
196
  ).to(device)
197
 
198
 
@@ -206,18 +210,6 @@ async def stream_text(model, tokenizer, input_text,
206
 
207
  output_text = ""
208
  while True:
209
-
210
- input_length = encoded_input["input_ids"].shape[1]
211
- remaining_tokens = max_length - input_length
212
-
213
- if remaining_tokens <=0:
214
- yield json.dumps({"text": "", "is_end": True}) + "\n"
215
- break
216
-
217
- generation_config.max_new_tokens = min(
218
- remaining_tokens, generation_config.max_new_tokens
219
- )
220
-
221
  outputs = model.generate(
222
  **encoded_input,
223
  do_sample=generation_config.do_sample,
@@ -242,48 +234,45 @@ async def stream_text(model, tokenizer, input_text,
242
 
243
  if stop_index != -1:
244
  final_output = output_text[:stop_index]
245
-
246
- for chunk in [final_output[i:i+10] for i in range(0,len(final_output),10)]:
247
- for text in chunk.split():
248
- yield json.dumps({"text": text, "is_end": False}) + "\n"
249
  yield json.dumps({"text": "", "is_end": True}) + "\n"
250
  break
251
  else:
252
- for chunk in [new_text[i:i+10] for i in range(0, len(new_text), 10)]:
253
- for text in chunk.split():
254
- yield json.dumps({"text": text, "is_end": False}) + "\n"
255
-
256
 
257
- if len(output_text) >= max_length:
258
-
259
- encoded_input = tokenizer(
260
- output_text, return_tensors="pt",
261
- truncation=True, max_length=max_length
262
- ).to(device)
263
-
264
- output_text = ""
265
-
266
- elif len(output_text) < max_length and len(new_text) == 0:
267
-
268
- for chunk in [output_text[i:i+10] for i in range(0, len(output_text), 10)]:
269
- for text in chunk.split():
270
- yield json.dumps({"text": text, "is_end": False}) + "\n"
271
-
272
  yield json.dumps({"text": "", "is_end": True}) + "\n"
273
  break
274
-
 
 
 
 
 
 
 
275
 
276
  @app.post("/generate-image")
277
  async def generate_image(request: GenerateRequest):
278
  try:
279
  validated_body = request
280
  device = "cuda" if torch.cuda.is_available() else "cpu"
281
-
282
- image_generator = pipeline(
283
- "text-to-image", model=validated_body.model_name,
284
- device=device
285
- )
286
- image = image_generator(validated_body.input_text)[0]
 
 
 
 
 
287
 
288
  image_data = list(image.getdata())
289
 
@@ -302,10 +291,15 @@ async def generate_text_to_speech(request: GenerateRequest):
302
  validated_body = request
303
  device = "cuda" if torch.cuda.is_available() else "cpu"
304
 
305
- audio_generator = pipeline(
306
- "text-to-speech", model=validated_body.model_name,
307
- device=device
308
- )
 
 
 
 
 
309
  audio = audio_generator(validated_body.input_text)
310
 
311
 
@@ -327,10 +321,15 @@ async def generate_video(request: GenerateRequest):
327
  try:
328
  validated_body = request
329
  device = "cuda" if torch.cuda.is_available() else "cpu"
330
- video_generator = pipeline(
 
331
  "text-to-video", model=validated_body.model_name,
332
  device=device
333
  )
 
 
 
 
334
  video = video_generator(validated_body.input_text)
335
 
336
 
 
42
  input_text: str = ""
43
  task_type: str
44
  temperature: float = 1.0
45
+ max_new_tokens: int = 3
46
  stream: bool = True
47
  top_p: float = 1.0
48
+ top_k: int = 50
49
  repetition_penalty: float = 1.0
50
  num_return_sequences: int = 1
51
  do_sample: bool = True
 
65
  raise ValueError(f"task_type must be one of: {valid_types}")
66
  return v
67
 
68
+ model_cache = {}
69
+
70
  class S3ModelLoader:
71
  def __init__(self, bucket_name, s3_client):
72
  self.bucket_name = bucket_name
 
77
  f"{model_name.replace('/', '-')}"
78
 
79
  async def load_model_and_tokenizer(self, model_name):
80
+ if model_name in model_cache:
81
+ return model_cache[model_name]
82
+
83
+ s3_uri = self._get_s3_uri(model_name)
84
+ try:
85
  config = AutoConfig.from_pretrained(
86
  s3_uri, local_files_only=False
87
  )
 
98
  tokenizer.pad_token_id is None:
99
  tokenizer.pad_token_id = config.pad_token_id \
100
  or tokenizer.eos_token_id
101
+ model_cache[model_name] = (model, tokenizer)
102
  return model, tokenizer
103
+ except (EnvironmentError, NoCredentialsError):
104
  try:
105
  config = AutoConfig.from_pretrained(
106
  model_name, token=HUGGINGFACE_HUB_TOKEN
 
135
  tokenizer = AutoTokenizer.from_pretrained(
136
  s3_uri, config=config, local_files_only=False
137
  )
138
+ model_cache[model_name] = (model, tokenizer)
139
  return model, tokenizer
140
  except Exception as e:
141
  raise HTTPException(
 
193
  async def stream_text(model, tokenizer, input_text,
194
  generation_config, stop_sequences,
195
  device):
196
+
 
197
  encoded_input = tokenizer(
198
  input_text, return_tensors="pt",
199
+ truncation=True
200
  ).to(device)
201
 
202
 
 
210
 
211
  output_text = ""
212
  while True:
 
 
 
 
 
 
 
 
 
 
 
 
213
  outputs = model.generate(
214
  **encoded_input,
215
  do_sample=generation_config.do_sample,
 
234
 
235
  if stop_index != -1:
236
  final_output = output_text[:stop_index]
237
+
238
+ for text in final_output.split():
239
+ yield json.dumps({"text": text, "is_end": False}) + "\n"
 
240
  yield json.dumps({"text": "", "is_end": True}) + "\n"
241
  break
242
  else:
243
+ for text in new_text.split():
244
+ yield json.dumps({"text": text, "is_end": False}) + "\n"
 
 
245
 
246
+ if len(new_text) == 0:
247
+ for text in output_text.split():
248
+ yield json.dumps({"text": text, "is_end": False}) + "\n"
 
 
 
 
 
 
 
 
 
 
 
 
249
  yield json.dumps({"text": "", "is_end": True}) + "\n"
250
  break
251
+
252
+ encoded_input = tokenizer(
253
+ output_text, return_tensors="pt",
254
+ truncation=True
255
+ ).to(device)
256
+ output_text = ""
257
+
258
+
259
 
260
  @app.post("/generate-image")
261
  async def generate_image(request: GenerateRequest):
262
  try:
263
  validated_body = request
264
  device = "cuda" if torch.cuda.is_available() else "cpu"
265
+
266
+ if validated_body.model_name not in model_cache:
267
+ model = pipeline(
268
+ "text-to-image", model=validated_body.model_name,
269
+ device=device
270
+ )
271
+ model_cache[validated_body.model_name] = model
272
+ else:
273
+ model = model_cache[validated_body.model_name]
274
+
275
+ image = model(validated_body.input_text)[0]
276
 
277
  image_data = list(image.getdata())
278
 
 
291
  validated_body = request
292
  device = "cuda" if torch.cuda.is_available() else "cpu"
293
 
294
+ if validated_body.model_name not in model_cache:
295
+ audio_generator = pipeline(
296
+ "text-to-speech", model=validated_body.model_name,
297
+ device=device
298
+ )
299
+ model_cache[validated_body.model_name] = audio_generator
300
+ else:
301
+ audio_generator = model_cache[validated_body.model_name]
302
+
303
  audio = audio_generator(validated_body.input_text)
304
 
305
 
 
321
  try:
322
  validated_body = request
323
  device = "cuda" if torch.cuda.is_available() else "cpu"
324
+ if validated_body.model_name not in model_cache:
325
+ video_generator = pipeline(
326
  "text-to-video", model=validated_body.model_name,
327
  device=device
328
  )
329
+ model_cache[validated_body.model_name] = video_generator
330
+ else:
331
+ video_generator = model_cache[validated_body.model_name]
332
+
333
  video = video_generator(validated_body.input_text)
334
 
335