Hjgugugjhuhjggg commited on
Commit
2957fb3
·
verified ·
1 Parent(s): 492235f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +173 -169
app.py CHANGED
@@ -1,193 +1,197 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
- import requests
4
- import boto3
5
- from dotenv import load_dotenv
6
  import os
7
- import uvicorn
8
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
9
- import torch
10
- import safetensors.torch
 
 
11
  from fastapi.responses import StreamingResponse
12
- from tqdm import tqdm
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- # Cargar las variables de entorno desde el archivo .env
15
- load_dotenv()
16
 
17
- # Cargar las credenciales de AWS y el token de Hugging Face desde las variables de entorno
18
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
19
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
20
  AWS_REGION = os.getenv("AWS_REGION")
21
- S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME") # Nombre del bucket de S3
22
- HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN") # Token de Hugging Face
23
-
24
- # Cliente S3 de Amazon
25
- s3_client = boto3.client(
26
- 's3',
27
- aws_access_key_id=AWS_ACCESS_KEY_ID,
28
- aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
29
- region_name=AWS_REGION
30
- )
31
 
32
- app = FastAPI()
33
-
34
- # Pydantic Model para el cuerpo de la solicitud del endpoint /download_model/
35
- class DownloadModelRequest(BaseModel):
36
  model_name: str
37
- pipeline_task: str
38
- input_text: str
39
-
40
- class S3DirectStream:
41
- def __init__(self, bucket_name):
42
- self.s3_client = boto3.client(
43
- 's3',
44
- aws_access_key_id=AWS_ACCESS_KEY_ID,
45
- aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
46
- region_name=AWS_REGION
47
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  self.bucket_name = bucket_name
 
49
 
50
- def stream_from_s3(self, key):
51
- try:
52
- print(f"Descargando archivo {key} desde S3...")
53
- response = self.s3_client.get_object(Bucket=self.bucket_name, Key=key)
54
- return response['Body'] # Devolver el cuerpo directamente para el StreamingResponse
55
- except self.s3_client.exceptions.NoSuchKey:
56
- raise HTTPException(status_code=404, detail=f"El archivo {key} no existe en el bucket S3.")
57
 
58
- def file_exists_in_s3(self, key):
 
59
  try:
60
- self.s3_client.head_object(Bucket=self.bucket_name, Key=key)
61
- return True
62
- except self.s3_client.exceptions.ClientError:
63
- return False
64
-
65
- def load_model_from_stream(self, model_prefix):
66
- try:
67
- print(f"Cargando el modelo {model_prefix} desde S3...")
68
- if self.file_exists_in_s3(f"{model_prefix}/config.json") and \
69
- (self.file_exists_in_s3(f"{model_prefix}/pytorch_model.bin") or self.file_exists_in_s3(f"{model_prefix}/model.safetensors")):
70
- print(f"Modelo {model_prefix} ya existe en S3. No es necesario descargarlo.")
71
- return self.load_model_from_existing_s3(model_prefix)
72
-
73
- print(f"Modelo {model_prefix} no encontrado. Procediendo a descargar...")
74
- self.download_and_upload_to_s3(model_prefix)
75
- return self.load_model_from_stream(model_prefix)
76
- except HTTPException as e:
77
- print(f"Error al cargar el modelo: {e}")
78
- return None
79
-
80
- def load_model_from_existing_s3(self, model_prefix):
81
- # Cargar el modelo y los archivos necesarios desde S3
82
- print(f"Cargando los archivos {model_prefix} desde S3...")
83
- config_stream = self.stream_from_s3(f"{model_prefix}/config.json")
84
- config_data = config_stream.read().decode("utf-8")
85
-
86
- print(f"Cargando el modelo de lenguaje {model_prefix}...")
87
-
88
- # Verificar si el archivo es un safetensor o un archivo binario
89
- if self.file_exists_in_s3(f"{model_prefix}/model.safetensors"):
90
- # Usar safetensors si el archivo es de tipo safetensors
91
- model_stream = self.stream_from_s3(f"{model_prefix}/model.safetensors")
92
- model = AutoModelForCausalLM.from_config(config_data)
93
- model.load_state_dict(safetensors.torch.load_stream(model_stream)) # Cargar el modelo utilizando safetensors
94
- else:
95
- # Cargar el modelo utilizando pytorch si el archivo es .bin
96
- model_stream = self.stream_from_s3(f"{model_prefix}/pytorch_model.bin")
97
- model = AutoModelForCausalLM.from_config(config_data)
98
- model.load_state_dict(torch.load(model_stream, map_location="cpu"))
99
 
100
- return model
101
-
102
- def load_tokenizer_from_stream(self, model_prefix):
103
- try:
104
- if self.file_exists_in_s3(f"{model_prefix}/tokenizer.json"):
105
- print(f"Tokenizer para {model_prefix} ya existe en S3. No es necesario descargarlo.")
106
- return self.load_tokenizer_from_existing_s3(model_prefix)
107
-
108
- print(f"Tokenizer para {model_prefix} no encontrado. Procediendo a descargar...")
109
- self.download_and_upload_to_s3(model_prefix)
110
- return self.load_tokenizer_from_stream(model_prefix)
111
- except HTTPException as e:
112
- print(f"Error al cargar el tokenizer: {e}")
113
- return None
114
-
115
- def load_tokenizer_from_existing_s3(self, model_prefix):
116
- print(f"Cargando el tokenizer para {model_prefix} desde S3...")
117
- tokenizer_stream = self.stream_from_s3(f"{model_prefix}/tokenizer.json")
118
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_stream)
119
- return tokenizer
120
-
121
- def download_and_upload_to_s3(self, model_prefix):
122
- # URLs de los archivos de Hugging Face
123
- model_url = f"https://huggingface.co/{model_prefix}/resolve/main/pytorch_model.bin"
124
- safetensors_url = f"https://huggingface.co/{model_prefix}/resolve/main/model.safetensors"
125
- tokenizer_url = f"https://huggingface.co/{model_prefix}/resolve/main/tokenizer.json"
126
- config_url = f"https://huggingface.co/{model_prefix}/resolve/main/config.json"
127
-
128
- print(f"Descargando y subiendo archivos para el modelo {model_prefix} a S3...")
129
- self.download_and_upload_to_s3_url(model_url, f"{model_prefix}/pytorch_model.bin")
130
- self.download_and_upload_to_s3_url(safetensors_url, f"{model_prefix}/model.safetensors")
131
- self.download_and_upload_to_s3_url(tokenizer_url, f"{model_prefix}/tokenizer.json")
132
- self.download_and_upload_to_s3_url(config_url, f"{model_prefix}/config.json")
133
-
134
- def download_and_upload_to_s3_url(self, url: str, s3_key: str):
135
- print(f"Descargando archivo desde {url}...")
136
- response = requests.get(url)
137
- if response.status_code == 200:
138
- # Subir archivo a S3
139
- print(f"Subiendo archivo a S3 con key {s3_key}...")
140
- self.s3_client.put_object(Bucket=self.bucket_name, Key=s3_key, Body=response.content)
141
- else:
142
- raise HTTPException(status_code=500, detail=f"Error al descargar el archivo desde {url}")
143
 
 
 
144
 
145
- @app.post("/predict/")
146
- async def predict(model_request: DownloadModelRequest):
147
  try:
148
- print(f"Recibiendo solicitud para predecir con el modelo {model_request.model_name}...")
149
- # Cargar el modelo y tokenizer desde S3
150
- streamer = S3DirectStream(S3_BUCKET_NAME)
151
- model = streamer.load_model_from_stream(model_request.model_name)
152
- tokenizer = streamer.load_tokenizer_from_stream(model_request.model_name)
153
-
154
- # Obtener el pipeline adecuado según la solicitud
155
- task = model_request.pipeline_task
156
- if task not in ["text-generation", "sentiment-analysis", "translation", "fill-mask", "question-answering", "text-to-speech", "text-to-image", "text-to-audio", "text-to-video"]:
157
- raise HTTPException(status_code=400, detail="Pipeline task no soportado")
158
-
159
- # Crear el pipeline dinámicamente basado en el tipo de tarea
160
- nlp_pipeline = pipeline(task, model=model, tokenizer=tokenizer)
161
-
162
- # Ejecutar el pipeline con el input_text
163
- input_text = model_request.input_text
164
- outputs = nlp_pipeline(input_text)
165
-
166
- # Procesar los diferentes tipos de respuestas según el pipeline
167
- if task in ["text-generation", "translation", "fill-mask", "sentiment-analysis", "question-answering"]:
168
- return {"response": outputs}
169
-
170
- elif task == "text-to-image":
171
- # Asumir que outputs es la imagen generada
172
- s3_key = f"{model_request.model_name}/generated_image.png" # Definir el key del archivo de imagen
173
- return StreamingResponse(streamer.stream_from_s3(s3_key), media_type="image/png")
174
-
175
- elif task == "text-to-audio":
176
- # Asumir que outputs es el audio generado
177
- s3_key = f"{model_request.model_name}/generated_audio.wav" # Definir el key del archivo de audio
178
- return StreamingResponse(streamer.stream_from_s3(s3_key), media_type="audio/wav")
179
-
180
- elif task == "text-to-video":
181
- # Asumir que outputs es el video generado
182
- s3_key = f"{model_request.model_name}/generated_video.mp4" # Definir el key del archivo de video
183
- return StreamingResponse(streamer.stream_from_s3(s3_key), media_type="video/mp4")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
  else:
186
- raise HTTPException(status_code=400, detail="Tipo de tarea desconocido")
187
 
 
 
 
 
188
  except Exception as e:
189
- raise HTTPException(status_code=500, detail=f"Error al procesar la solicitud: {str(e)}")
 
190
 
191
 
192
  if __name__ == "__main__":
193
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
 
 
 
1
  import os
2
+ import logging
3
+ import time
4
+ from io import BytesIO
5
+ from typing import Union
6
+
7
+ from fastapi import FastAPI, HTTPException, Response, Request, UploadFile, File
8
  from fastapi.responses import StreamingResponse
9
+ from pydantic import BaseModel, ValidationError, field_validator
10
+ from transformers import (
11
+ AutoConfig,
12
+ AutoModelForCausalLM,
13
+ AutoTokenizer,
14
+ pipeline,
15
+ GenerationConfig,
16
+ StoppingCriteriaList
17
+ )
18
+ import boto3
19
+ from huggingface_hub import hf_hub_download
20
+ import soundfile as sf
21
+ import numpy as np
22
+ import torch
23
+ import uvicorn
24
 
25
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s")
 
26
 
 
27
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
28
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
29
  AWS_REGION = os.getenv("AWS_REGION")
30
+ S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
31
+ HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
 
 
 
 
 
 
 
 
32
 
33
+ class GenerateRequest(BaseModel):
 
 
 
34
  model_name: str
35
+ input_text: str = ""
36
+ task_type: str
37
+ temperature: float = 1.0
38
+ max_new_tokens: int = 200
39
+ stream: bool = False
40
+ top_p: float = 1.0
41
+ top_k: int = 50
42
+ repetition_penalty: float = 1.0
43
+ num_return_sequences: int = 1
44
+ do_sample: bool = True
45
+ chunk_delay: float = 0.0
46
+ stop_sequences: list[str] = []
47
+
48
+ model_config = {"protected_namespaces": ()}
49
+
50
+ @field_validator("model_name")
51
+ def model_name_cannot_be_empty(cls, v):
52
+ if not v:
53
+ raise ValueError("model_name cannot be empty.")
54
+ return v
55
+
56
+ @field_validator("task_type")
57
+ def task_type_must_be_valid(cls, v):
58
+ valid_types = ["text-to-text", "text-to-image", "text-to-speech", "text-to-video"]
59
+ if v not in valid_types:
60
+ raise ValueError(f"task_type must be one of: {valid_types}")
61
+ return v
62
+
63
+ class S3ModelLoader:
64
+ def __init__(self, bucket_name, s3_client):
65
  self.bucket_name = bucket_name
66
+ self.s3_client = s3_client
67
 
68
+ def _get_s3_uri(self, model_name):
69
+ return f"s3://{self.bucket_name}/{model_name.replace('/', '-')}"
 
 
 
 
 
70
 
71
+ async def load_model_and_tokenizer(self, model_name):
72
+ s3_uri = self._get_s3_uri(model_name)
73
  try:
74
+ logging.info(f"Trying to load {model_name} from S3...")
75
+ config = AutoConfig.from_pretrained(s3_uri)
76
+ model = AutoModelForCausalLM.from_pretrained(s3_uri, config=config)
77
+ tokenizer = AutoTokenizer.from_pretrained(s3_uri, config=config)
78
+
79
+ if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
80
+ tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
81
+
82
+ logging.info(f"Loaded {model_name} from S3 successfully.")
83
+ return model, tokenizer
84
+ except EnvironmentError:
85
+ logging.info(f"Model {model_name} not found in S3. Downloading...")
86
+ try:
87
+ config = AutoConfig.from_pretrained(model_name)
88
+ tokenizer = AutoTokenizer.from_pretrained(model_name, config=config)
89
+ model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN)
90
+
91
+ if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
92
+ tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
93
+
94
+ logging.info(f"Downloaded {model_name} successfully.")
95
+ logging.info(f"Saving {model_name} to S3...")
96
+ model.save_pretrained(s3_uri)
97
+ tokenizer.save_pretrained(s3_uri)
98
+ logging.info(f"Saved {model_name} to S3 successfully.")
99
+ return model, tokenizer
100
+ except Exception as e:
101
+ logging.exception(f"Error downloading/uploading model: {e}")
102
+ raise HTTPException(status_code=500, detail=f"Error loading model: {e}")
 
 
 
 
 
 
 
 
 
 
103
 
104
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region_name=AWS_REGION)
107
+ model_loader = S3ModelLoader(S3_BUCKET_NAME, s3_client)
108
 
109
+ @app.post("/generate")
110
+ async def generate(request: Request, body: GenerateRequest):
111
  try:
112
+ validated_body = GenerateRequest(**body.model_dump())
113
+ model, tokenizer = await model_loader.load_model_and_tokenizer(validated_body.model_name)
114
+ device = "cuda" if torch.cuda.is_available() else "cpu"
115
+ model.to(device)
116
+
117
+ if validated_body.task_type == "text-to-text":
118
+ generation_config = GenerationConfig(
119
+ temperature=validated_body.temperature,
120
+ max_new_tokens=validated_body.max_new_tokens,
121
+ top_p=validated_body.top_p,
122
+ top_k=validated_body.top_k,
123
+ repetition_penalty=validated_body.repetition_penalty,
124
+ do_sample=validated_body.do_sample,
125
+ num_return_sequences=validated_body.num_return_sequences
126
+ )
127
+
128
+ async def stream_text():
129
+ input_text = validated_body.input_text
130
+ generated_text = ""
131
+ max_length = model.config.max_position_embeddings
132
+
133
+ while True:
134
+ encoded_input = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=max_length).to(device)
135
+ input_length = encoded_input["input_ids"].shape[1]
136
+ remaining_tokens = max_length - input_length
137
+
138
+ if remaining_tokens <= 0:
139
+ break
140
+
141
+ generation_config.max_new_tokens = min(remaining_tokens, validated_body.max_new_tokens)
142
+
143
+ stopping_criteria = StoppingCriteriaList(
144
+ [lambda _, outputs: tokenizer.decode(outputs[0][-1], skip_special_tokens=True) in validated_body.stop_sequences] if validated_body.stop_sequences else []
145
+ )
146
+
147
+ output = model.generate(**encoded_input, generation_config=generation_config, stopping_criteria=stopping_criteria)
148
+ chunk = tokenizer.decode(output[0], skip_special_tokens=True)
149
+ generated_text += chunk
150
+ yield chunk
151
+ time.sleep(validated_body.chunk_delay)
152
+ input_text = generated_text
153
+
154
+ if validated_body.stream:
155
+ return StreamingResponse(stream_text(), media_type="text/plain")
156
+ else:
157
+ generated_text = ""
158
+ async for chunk in stream_text():
159
+ generated_text += chunk
160
+ return {"result": generated_text}
161
+
162
+ elif validated_body.task_type == "text-to-image":
163
+ generator = pipeline("text-to-image", model=model, tokenizer=tokenizer, device=device)
164
+ image = generator(validated_body.input_text)[0]
165
+ image_bytes = image.tobytes()
166
+ return Response(content=image_bytes, media_type="image/png")
167
+
168
+ elif validated_body.task_type == "text-to-speech":
169
+ generator = pipeline("text-to-speech", model=model, tokenizer=tokenizer, device=device)
170
+ audio = generator(validated_body.input_text)
171
+ audio_bytesio = BytesIO()
172
+ sf.write(audio_bytesio, audio["sampling_rate"], np.int16(audio["audio"]))
173
+ audio_bytes = audio_bytesio.getvalue()
174
+ return Response(content=audio_bytes, media_type="audio/wav")
175
+
176
+ elif validated_body.task_type == "text-to-video":
177
+ try:
178
+ generator = pipeline("text-to-video", model=model, tokenizer=tokenizer, device=device)
179
+ video = generator(validated_body.input_text)
180
+ return Response(content=video, media_type="video/mp4")
181
+ except Exception as e:
182
+ raise HTTPException(status_code=500, detail=f"Error in text-to-video generation: {e}")
183
 
184
  else:
185
+ raise HTTPException(status_code=400, detail="Unsupported task type")
186
 
187
+ except HTTPException as e:
188
+ raise e
189
+ except ValidationError as e:
190
+ raise HTTPException(status_code=422, detail=e.errors())
191
  except Exception as e:
192
+ logging.exception(f"An unexpected error occurred: {e}")
193
+ raise HTTPException(status_code=500, detail="An unexpected error occurred.")
194
 
195
 
196
  if __name__ == "__main__":
197
+ uvicorn.run(app, host="0.0.0.0", port=7860)