Hjgugugjhuhjggg commited on
Commit
37276c2
verified
1 Parent(s): 944ca71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -18
app.py CHANGED
@@ -10,26 +10,26 @@ from transformers import (
10
  StoppingCriteriaList,
11
  pipeline
12
  )
13
- import asyncio
14
  from io import BytesIO
15
- from botocore.exceptions import NoCredentialsError
16
  import boto3
 
17
  from huggingface_hub import snapshot_download
18
 
19
- # Diccionario global para almacenar los tokens y configuraciones de los modelos
20
- token_dict = {}
21
-
22
- # Configuraci贸n para acceso a modelos en Hugging Face o S3
23
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
24
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
25
  AWS_REGION = os.getenv("AWS_REGION")
26
  S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
27
  HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
28
 
 
 
 
29
  # Inicializaci贸n de la aplicaci贸n FastAPI
30
  app = FastAPI()
31
 
32
- # Modelo de la solicitud para la API
33
  class GenerateRequest(BaseModel):
34
  model_name: str
35
  input_text: str
@@ -45,6 +45,7 @@ class GenerateRequest(BaseModel):
45
  chunk_delay: float = 0.0
46
  stop_sequences: list[str] = []
47
 
 
48
  class S3ModelLoader:
49
  def __init__(self, bucket_name, aws_access_key_id=None, aws_secret_access_key=None, aws_region=None):
50
  self.bucket_name = bucket_name
@@ -64,16 +65,18 @@ class S3ModelLoader:
64
 
65
  s3_uri = self._get_s3_uri(model_name)
66
  try:
67
- # Descargamos el modelo y el tokenizer desde Hugging Face directamente a S3
68
  model_path = snapshot_download(model_name, token=HUGGINGFACE_HUB_TOKEN)
69
-
 
70
  model = AutoModelForCausalLM.from_pretrained(model_path)
71
  tokenizer = AutoTokenizer.from_pretrained(model_path)
72
 
 
73
  if tokenizer.eos_token_id is None:
74
  tokenizer.eos_token_id = tokenizer.pad_token_id
75
 
76
- # Guardamos en el diccionario global
77
  token_dict[model_name] = {
78
  "model": model,
79
  "tokenizer": tokenizer,
@@ -81,7 +84,7 @@ class S3ModelLoader:
81
  "eos_token_id": tokenizer.eos_token_id
82
  }
83
 
84
- # Subimos los modelos al S3 si es necesario
85
  self.s3_client.upload_file(model_path, self.bucket_name, f'{model_name}/model')
86
  self.s3_client.upload_file(f'{model_path}/tokenizer', self.bucket_name, f'{model_name}/tokenizer')
87
 
@@ -91,9 +94,10 @@ class S3ModelLoader:
91
  except Exception as e:
92
  raise HTTPException(status_code=500, detail=f"Error loading model: {e}")
93
 
 
94
  model_loader = S3ModelLoader(S3_BUCKET_NAME, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION)
95
 
96
- # Funci贸n para hacer streaming de texto, generando un token a la vez
97
  async def stream_text(model, tokenizer, input_text, generation_config, stop_sequences, device, chunk_delay, max_length=2048):
98
  encoded_input = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=max_length).to(device)
99
  input_length = encoded_input["input_ids"].shape[1]
@@ -129,13 +133,13 @@ async def stream_text(model, tokenizer, input_text, generation_config, stop_sequ
129
  for token_id in output:
130
  token = tokenizer.decode(token_id, skip_special_tokens=True)
131
  yield token
132
- await asyncio.sleep(chunk_delay) # Simula el delay entre tokens
133
 
134
  if stop_sequences and any(stop in output_text for stop in stop_sequences):
135
  yield output_text
136
  return
137
 
138
- # Endpoint para la generaci贸n de texto
139
  @app.post("/generate")
140
  async def generate(request: GenerateRequest):
141
  try:
@@ -152,7 +156,7 @@ async def generate(request: GenerateRequest):
152
  chunk_delay = request.chunk_delay
153
  stop_sequences = request.stop_sequences
154
 
155
- # Cargar el modelo y el tokenizer desde el S3
156
  model_data = model_loader.load_model_and_tokenizer(model_name)
157
  model = model_data["model"]
158
  tokenizer = model_data["tokenizer"]
@@ -180,7 +184,7 @@ async def generate(request: GenerateRequest):
180
  except Exception as e:
181
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
182
 
183
- # Endpoint para la generaci贸n de im谩genes
184
  @app.post("/generate-image")
185
  async def generate_image(request: GenerateRequest):
186
  try:
@@ -199,7 +203,7 @@ async def generate_image(request: GenerateRequest):
199
  except Exception as e:
200
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
201
 
202
- # Endpoint para la generaci贸n de texto a voz
203
  @app.post("/generate-text-to-speech")
204
  async def generate_text_to_speech(request: GenerateRequest):
205
  try:
@@ -218,7 +222,7 @@ async def generate_text_to_speech(request: GenerateRequest):
218
  except Exception as e:
219
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
220
 
221
- # Endpoint para la generaci贸n de video
222
  @app.post("/generate-video")
223
  async def generate_video(request: GenerateRequest):
224
  try:
 
10
  StoppingCriteriaList,
11
  pipeline
12
  )
 
13
  from io import BytesIO
14
+ import asyncio
15
  import boto3
16
+ from botocore.exceptions import NoCredentialsError
17
  from huggingface_hub import snapshot_download
18
 
19
+ # Configuraci贸n global
 
 
 
20
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
21
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
22
  AWS_REGION = os.getenv("AWS_REGION")
23
  S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
24
  HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
25
 
26
+ # Diccionario global de tokens y configuraciones
27
+ token_dict = {}
28
+
29
  # Inicializaci贸n de la aplicaci贸n FastAPI
30
  app = FastAPI()
31
 
32
+ # Modelo de solicitud
33
  class GenerateRequest(BaseModel):
34
  model_name: str
35
  input_text: str
 
45
  chunk_delay: float = 0.0
46
  stop_sequences: list[str] = []
47
 
48
+ # Clase para cargar y gestionar los modelos desde S3
49
  class S3ModelLoader:
50
  def __init__(self, bucket_name, aws_access_key_id=None, aws_secret_access_key=None, aws_region=None):
51
  self.bucket_name = bucket_name
 
65
 
66
  s3_uri = self._get_s3_uri(model_name)
67
  try:
68
+ # Descargar el modelo desde Hugging Face y guardarlo en S3 si no existe
69
  model_path = snapshot_download(model_name, token=HUGGINGFACE_HUB_TOKEN)
70
+
71
+ # Cargar el modelo y tokenizer
72
  model = AutoModelForCausalLM.from_pretrained(model_path)
73
  tokenizer = AutoTokenizer.from_pretrained(model_path)
74
 
75
+ # Asignar EOS y PAD token si no est谩n definidos
76
  if tokenizer.eos_token_id is None:
77
  tokenizer.eos_token_id = tokenizer.pad_token_id
78
 
79
+ # Guardar el modelo y el tokenizer en el diccionario
80
  token_dict[model_name] = {
81
  "model": model,
82
  "tokenizer": tokenizer,
 
84
  "eos_token_id": tokenizer.eos_token_id
85
  }
86
 
87
+ # Subir los archivos del modelo y tokenizer a S3 si no est谩n all铆
88
  self.s3_client.upload_file(model_path, self.bucket_name, f'{model_name}/model')
89
  self.s3_client.upload_file(f'{model_path}/tokenizer', self.bucket_name, f'{model_name}/tokenizer')
90
 
 
94
  except Exception as e:
95
  raise HTTPException(status_code=500, detail=f"Error loading model: {e}")
96
 
97
+ # Instanciaci贸n del cargador de modelos
98
  model_loader = S3ModelLoader(S3_BUCKET_NAME, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION)
99
 
100
+ # Funci贸n de generaci贸n de texto con streaming
101
  async def stream_text(model, tokenizer, input_text, generation_config, stop_sequences, device, chunk_delay, max_length=2048):
102
  encoded_input = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=max_length).to(device)
103
  input_length = encoded_input["input_ids"].shape[1]
 
133
  for token_id in output:
134
  token = tokenizer.decode(token_id, skip_special_tokens=True)
135
  yield token
136
+ await asyncio.sleep(chunk_delay)
137
 
138
  if stop_sequences and any(stop in output_text for stop in stop_sequences):
139
  yield output_text
140
  return
141
 
142
+ # Endpoint para generar texto
143
  @app.post("/generate")
144
  async def generate(request: GenerateRequest):
145
  try:
 
156
  chunk_delay = request.chunk_delay
157
  stop_sequences = request.stop_sequences
158
 
159
+ # Cargar el modelo y tokenizer desde S3 si no existe
160
  model_data = model_loader.load_model_and_tokenizer(model_name)
161
  model = model_data["model"]
162
  tokenizer = model_data["tokenizer"]
 
184
  except Exception as e:
185
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
186
 
187
+ # Endpoint para generar im谩genes
188
  @app.post("/generate-image")
189
  async def generate_image(request: GenerateRequest):
190
  try:
 
203
  except Exception as e:
204
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
205
 
206
+ # Endpoint para generar texto a voz
207
  @app.post("/generate-text-to-speech")
208
  async def generate_text_to_speech(request: GenerateRequest):
209
  try:
 
222
  except Exception as e:
223
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
224
 
225
+ # Endpoint para generar video
226
  @app.post("/generate-video")
227
  async def generate_video(request: GenerateRequest):
228
  try: