Spaces:
Sleeping
Sleeping
Hjgugugjhuhjggg
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -10,29 +10,23 @@ from transformers import (
|
|
10 |
GenerationConfig,
|
11 |
StoppingCriteria,
|
12 |
StoppingCriteriaList,
|
|
|
13 |
)
|
14 |
-
import boto3
|
15 |
import uvicorn
|
16 |
import asyncio
|
17 |
import json
|
|
|
18 |
from huggingface_hub import login
|
19 |
from botocore.exceptions import NoCredentialsError
|
|
|
20 |
|
21 |
|
22 |
-
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
|
23 |
-
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
|
24 |
-
AWS_REGION = os.getenv("AWS_REGION")
|
25 |
-
S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
|
26 |
HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
|
27 |
|
28 |
-
|
29 |
if HUGGINGFACE_HUB_TOKEN:
|
30 |
login(token=HUGGINGFACE_HUB_TOKEN,
|
31 |
add_to_git_credential=False)
|
32 |
|
33 |
-
s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID,
|
34 |
-
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
|
35 |
-
region_name=AWS_REGION)
|
36 |
|
37 |
app = FastAPI()
|
38 |
|
@@ -66,67 +60,42 @@ class GenerateRequest(BaseModel):
|
|
66 |
|
67 |
model_data = {} # Global dictionary to store model data
|
68 |
|
69 |
-
|
70 |
-
def __init__(self, bucket_name, s3_client):
|
71 |
-
self.bucket_name = bucket_name
|
72 |
-
self.s3_client = s3_client
|
73 |
-
|
74 |
-
def _get_s3_uri(self, model_name):
|
75 |
-
return f"s3://{self.bucket_name}/" \
|
76 |
-
f"{model_name.replace('/', '-')}"
|
77 |
-
|
78 |
-
async def load_model_and_tokenizer(self, model_name):
|
79 |
-
if model_name in model_data:
|
80 |
-
return model_data[model_name]["model"], model_data[model_name]["tokenizer"]
|
81 |
-
|
82 |
-
s3_uri = self._get_s3_uri(model_name)
|
83 |
-
try:
|
84 |
-
|
85 |
-
config = AutoConfig.from_pretrained(
|
86 |
-
s3_uri, local_files_only=False
|
87 |
-
)
|
88 |
-
|
89 |
-
model = AutoModelForCausalLM.from_pretrained(
|
90 |
-
s3_uri, config=config, local_files_only=False
|
91 |
-
)
|
92 |
-
|
93 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
94 |
-
s3_uri, config=config, local_files_only=False
|
95 |
-
)
|
96 |
-
|
97 |
-
if tokenizer.eos_token_id is not None and \
|
98 |
-
tokenizer.pad_token_id is None:
|
99 |
-
tokenizer.pad_token_id = config.pad_token_id \
|
100 |
-
or tokenizer.eos_token_id
|
101 |
-
model_data[model_name] = {"model":model, "tokenizer":tokenizer}
|
102 |
-
return model, tokenizer
|
103 |
-
except (EnvironmentError, NoCredentialsError):
|
104 |
-
try:
|
105 |
-
config = AutoConfig.from_pretrained(
|
106 |
-
model_name, token=HUGGINGFACE_HUB_TOKEN
|
107 |
-
)
|
108 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
109 |
-
model_name, config=config, token=HUGGINGFACE_HUB_TOKEN
|
110 |
-
)
|
111 |
-
|
112 |
-
model = AutoModelForCausalLM.from_pretrained(
|
113 |
-
model_name, config=config, token=HUGGINGFACE_HUB_TOKEN
|
114 |
-
)
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
raise HTTPException(
|
126 |
-
status_code=500, detail=f"Error loading model: {e}"
|
127 |
-
)
|
128 |
|
129 |
-
model_loader = S3ModelLoader(S3_BUCKET_NAME, s3_client)
|
130 |
|
131 |
@app.post("/generate")
|
132 |
async def generate(request: GenerateRequest):
|
@@ -144,8 +113,8 @@ async def generate(request: GenerateRequest):
|
|
144 |
do_sample = request.do_sample
|
145 |
stop_sequences = request.stop_sequences
|
146 |
|
147 |
-
model, tokenizer = await
|
148 |
-
device = "
|
149 |
model.to(device)
|
150 |
|
151 |
if "text-to-text" == task_type:
|
@@ -231,18 +200,17 @@ async def stream_text(model, tokenizer, input_text,
|
|
231 |
|
232 |
if len(new_text) == 0:
|
233 |
if not stop_criteria(outputs.sequences, None):
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
break
|
238 |
|
239 |
output_text += new_text
|
240 |
|
241 |
-
|
242 |
-
yield json.dumps({"text": text, "is_end": False}) + "\n"
|
243 |
|
244 |
if stop_criteria(outputs.sequences, None):
|
245 |
-
yield
|
246 |
break
|
247 |
|
248 |
encoded_input = tokenizer(
|
@@ -250,8 +218,12 @@ async def stream_text(model, tokenizer, input_text,
|
|
250 |
truncation=True
|
251 |
).to(device)
|
252 |
output_text = ""
|
253 |
-
|
254 |
|
|
|
|
|
|
|
|
|
|
|
255 |
async def generate_text(model, tokenizer, input_text,
|
256 |
generation_config, stop_sequences,
|
257 |
device):
|
@@ -288,7 +260,7 @@ async def generate_text(model, tokenizer, input_text,
|
|
288 |
async def generate_image(request: GenerateRequest):
|
289 |
try:
|
290 |
validated_body = request
|
291 |
-
device = "
|
292 |
|
293 |
if validated_body.model_name not in model_data:
|
294 |
config = AutoConfig.from_pretrained(
|
@@ -306,7 +278,7 @@ async def generate_image(request: GenerateRequest):
|
|
306 |
|
307 |
image_data = list(image.getdata())
|
308 |
|
309 |
-
return
|
310 |
|
311 |
except Exception as e:
|
312 |
raise HTTPException(
|
@@ -319,7 +291,7 @@ async def generate_image(request: GenerateRequest):
|
|
319 |
async def generate_text_to_speech(request: GenerateRequest):
|
320 |
try:
|
321 |
validated_body = request
|
322 |
-
device = "
|
323 |
|
324 |
if validated_body.model_name not in model_data:
|
325 |
config = AutoConfig.from_pretrained(
|
@@ -341,7 +313,7 @@ async def generate_text_to_speech(request: GenerateRequest):
|
|
341 |
|
342 |
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
343 |
|
344 |
-
return
|
345 |
|
346 |
except Exception as e:
|
347 |
raise HTTPException(
|
@@ -354,7 +326,7 @@ async def generate_text_to_speech(request: GenerateRequest):
|
|
354 |
async def generate_video(request: GenerateRequest):
|
355 |
try:
|
356 |
validated_body = request
|
357 |
-
device = "
|
358 |
if validated_body.model_name not in model_data:
|
359 |
config = AutoConfig.from_pretrained(
|
360 |
validated_body.model_name, token=HUGGINGFACE_HUB_TOKEN
|
@@ -373,7 +345,7 @@ async def generate_video(request: GenerateRequest):
|
|
373 |
|
374 |
video_base64 = base64.b64encode(video).decode('utf-8')
|
375 |
|
376 |
-
return
|
377 |
|
378 |
except Exception as e:
|
379 |
raise HTTPException(
|
@@ -381,5 +353,27 @@ async def generate_video(request: GenerateRequest):
|
|
381 |
detail=f"Internal server error: {str(e)}"
|
382 |
)
|
383 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
384 |
if __name__ == "__main__":
|
385 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
10 |
GenerationConfig,
|
11 |
StoppingCriteria,
|
12 |
StoppingCriteriaList,
|
13 |
+
pipeline
|
14 |
)
|
|
|
15 |
import uvicorn
|
16 |
import asyncio
|
17 |
import json
|
18 |
+
import base64
|
19 |
from huggingface_hub import login
|
20 |
from botocore.exceptions import NoCredentialsError
|
21 |
+
from functools import lru_cache
|
22 |
|
23 |
|
|
|
|
|
|
|
|
|
24 |
HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
|
25 |
|
|
|
26 |
if HUGGINGFACE_HUB_TOKEN:
|
27 |
login(token=HUGGINGFACE_HUB_TOKEN,
|
28 |
add_to_git_credential=False)
|
29 |
|
|
|
|
|
|
|
30 |
|
31 |
app = FastAPI()
|
32 |
|
|
|
60 |
|
61 |
model_data = {} # Global dictionary to store model data
|
62 |
|
63 |
+
model_load_lock = asyncio.Lock() # Lock to avoid race conditions
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
+
@lru_cache(maxsize=None)
|
66 |
+
async def _load_model_and_tokenizer(model_name):
|
67 |
+
try:
|
68 |
+
config = AutoConfig.from_pretrained(
|
69 |
+
model_name, token=HUGGINGFACE_HUB_TOKEN
|
70 |
+
)
|
71 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
72 |
+
model_name, config=config, token=HUGGINGFACE_HUB_TOKEN
|
73 |
+
)
|
74 |
+
|
75 |
+
model = AutoModelForCausalLM.from_pretrained(
|
76 |
+
model_name, config=config, token=HUGGINGFACE_HUB_TOKEN
|
77 |
+
)
|
78 |
+
|
79 |
+
if tokenizer.eos_token_id is not None and \
|
80 |
+
tokenizer.pad_token_id is None:
|
81 |
+
tokenizer.pad_token_id = config.pad_token_id \
|
82 |
+
or tokenizer.eos_token_id
|
83 |
+
|
84 |
+
return {"model":model, "tokenizer":tokenizer}
|
85 |
+
except Exception as e:
|
86 |
+
raise HTTPException(
|
87 |
+
status_code=500, detail=f"Error loading model: {e}"
|
88 |
+
)
|
89 |
|
90 |
+
async def load_model_and_tokenizer(model_name):
|
91 |
+
async with model_load_lock:
|
92 |
+
if model_name in model_data:
|
93 |
+
return model_data[model_name].get("model"), model_data[model_name].get("tokenizer")
|
94 |
+
|
95 |
+
model_bundle = await _load_model_and_tokenizer(model_name)
|
96 |
+
model_data[model_name] = model_bundle
|
97 |
+
return model_bundle.get("model"), model_bundle.get("tokenizer")
|
|
|
|
|
|
|
98 |
|
|
|
99 |
|
100 |
@app.post("/generate")
|
101 |
async def generate(request: GenerateRequest):
|
|
|
113 |
do_sample = request.do_sample
|
114 |
stop_sequences = request.stop_sequences
|
115 |
|
116 |
+
model, tokenizer = await load_model_and_tokenizer(model_name)
|
117 |
+
device = "cpu" # Force CPU
|
118 |
model.to(device)
|
119 |
|
120 |
if "text-to-text" == task_type:
|
|
|
200 |
|
201 |
if len(new_text) == 0:
|
202 |
if not stop_criteria(outputs.sequences, None):
|
203 |
+
yield {"text": output_text, "is_end": False}
|
204 |
+
|
205 |
+
yield {"text": "", "is_end": True}
|
206 |
break
|
207 |
|
208 |
output_text += new_text
|
209 |
|
210 |
+
yield {"text": new_text, "is_end": False}
|
|
|
211 |
|
212 |
if stop_criteria(outputs.sequences, None):
|
213 |
+
yield {"text": "", "is_end": True}
|
214 |
break
|
215 |
|
216 |
encoded_input = tokenizer(
|
|
|
218 |
truncation=True
|
219 |
).to(device)
|
220 |
output_text = ""
|
|
|
221 |
|
222 |
+
|
223 |
+
async def stream_json_responses(generator):
|
224 |
+
async for data in generator:
|
225 |
+
yield json.dumps(data) + "\n"
|
226 |
+
|
227 |
async def generate_text(model, tokenizer, input_text,
|
228 |
generation_config, stop_sequences,
|
229 |
device):
|
|
|
260 |
async def generate_image(request: GenerateRequest):
|
261 |
try:
|
262 |
validated_body = request
|
263 |
+
device = "cpu" # Force CPU
|
264 |
|
265 |
if validated_body.model_name not in model_data:
|
266 |
config = AutoConfig.from_pretrained(
|
|
|
278 |
|
279 |
image_data = list(image.getdata())
|
280 |
|
281 |
+
return JSONResponse({"image_data": image_data, "is_end": True})
|
282 |
|
283 |
except Exception as e:
|
284 |
raise HTTPException(
|
|
|
291 |
async def generate_text_to_speech(request: GenerateRequest):
|
292 |
try:
|
293 |
validated_body = request
|
294 |
+
device = "cpu" # Force CPU
|
295 |
|
296 |
if validated_body.model_name not in model_data:
|
297 |
config = AutoConfig.from_pretrained(
|
|
|
313 |
|
314 |
audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
|
315 |
|
316 |
+
return JSONResponse({"audio": audio_base64, "is_end": True})
|
317 |
|
318 |
except Exception as e:
|
319 |
raise HTTPException(
|
|
|
326 |
async def generate_video(request: GenerateRequest):
|
327 |
try:
|
328 |
validated_body = request
|
329 |
+
device = "cpu" # Force CPU
|
330 |
if validated_body.model_name not in model_data:
|
331 |
config = AutoConfig.from_pretrained(
|
332 |
validated_body.model_name, token=HUGGINGFACE_HUB_TOKEN
|
|
|
345 |
|
346 |
video_base64 = base64.b64encode(video).decode('utf-8')
|
347 |
|
348 |
+
return JSONResponse({"video": video_base64, "is_end": True})
|
349 |
|
350 |
except Exception as e:
|
351 |
raise HTTPException(
|
|
|
353 |
detail=f"Internal server error: {str(e)}"
|
354 |
)
|
355 |
|
356 |
+
@app.on_event("startup")
|
357 |
+
async def startup_event():
|
358 |
+
# Load models here
|
359 |
+
print("Loading models...")
|
360 |
+
|
361 |
+
models_to_load = set()
|
362 |
+
|
363 |
+
for env_var_key, env_var_value in os.environ.items():
|
364 |
+
if env_var_key.startswith("MODEL_NAME_"):
|
365 |
+
models_to_load.add(env_var_value)
|
366 |
+
|
367 |
+
|
368 |
+
for model_name in models_to_load:
|
369 |
+
try:
|
370 |
+
await load_model_and_tokenizer(model_name)
|
371 |
+
print(f"Model {model_name} loaded")
|
372 |
+
except Exception as e:
|
373 |
+
print(f"Error loading model {model_name}: {e}")
|
374 |
+
|
375 |
+
|
376 |
+
print("Models loaded.")
|
377 |
+
|
378 |
if __name__ == "__main__":
|
379 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|