Hjgugugjhuhjggg commited on
Commit
5618c19
·
verified ·
1 Parent(s): 590a28e

Rename app.py to app.py.bkk

Browse files
Files changed (1) hide show
  1. app.py → app.py.bkk +65 -80
app.py → app.py.bkk RENAMED
@@ -2,13 +2,12 @@ import os
2
  import json
3
  import logging
4
  import boto3
5
- from fastapi import FastAPI, HTTPException
6
  from fastapi.responses import JSONResponse
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
8
  from huggingface_hub import hf_hub_download
9
  import asyncio
10
 
11
- # Configuración del logger
12
  logger = logging.getLogger(__name__)
13
  logger.setLevel(logging.INFO)
14
  console_handler = logging.StreamHandler()
@@ -33,17 +32,6 @@ s3_client = boto3.client(
33
 
34
  app = FastAPI()
35
 
36
- PIPELINE_MAP = {
37
- "text-generation": "text-generation",
38
- "sentiment-analysis": "sentiment-analysis",
39
- "translation": "translation",
40
- "fill-mask": "fill-mask",
41
- "question-answering": "question-answering",
42
- "text-to-speech": "text-to-speech",
43
- "text-to-video": "text-to-video",
44
- "text-to-image": "text-to-image"
45
- }
46
-
47
  class S3DirectStream:
48
  def __init__(self, bucket_name):
49
  self.s3_client = boto3.client(
@@ -73,33 +61,31 @@ class S3DirectStream:
73
 
74
  def _get_model_file_parts(self, model_name):
75
  try:
76
- model_prefix = model_name.lower()
77
- files = self.s3_client.list_objects_v2(Bucket=self.bucket_name, Prefix=model_prefix)
78
- model_files = [obj['Key'] for obj in files.get('Contents', []) if model_prefix in obj['Key']]
79
  return model_files
80
  except Exception as e:
81
  raise HTTPException(status_code=500, detail=f"Error al obtener archivos del modelo {model_name} desde S3: {e}")
82
 
83
  async def load_model_from_s3(self, model_name):
84
  try:
85
- profile, model = model_name.split("/", 1) if "/" in model_name else ("", model_name)
86
-
87
- model_prefix = f"{profile}/{model}".lower()
88
- model_files = await self.get_model_file_parts(model_prefix)
89
 
90
  if not model_files:
91
- await self.download_and_upload_to_s3(model_prefix, model)
92
 
93
- config_stream = await self.stream_from_s3(f"{model_prefix}/config.json")
94
  config_data = config_stream.read()
95
 
96
  if not config_data:
97
- raise HTTPException(status_code=500, detail=f"El archivo de configuración {model_prefix}/config.json está vacío o no se pudo leer.")
98
 
99
  config_text = config_data.decode("utf-8")
100
  config_json = json.loads(config_text)
101
 
102
- model = AutoModelForCausalLM.from_pretrained(f"s3://{self.bucket_name}/{model_prefix}", config=config_json, from_tf=False)
103
  return model
104
 
105
  except HTTPException as e:
@@ -109,21 +95,20 @@ class S3DirectStream:
109
 
110
  async def load_tokenizer_from_s3(self, model_name):
111
  try:
112
- profile, model = model_name.split("/", 1) if "/" in model_name else ("", model_name)
113
-
114
- tokenizer_stream = await self.stream_from_s3(f"{profile}/{model}/tokenizer.json")
115
  tokenizer_data = tokenizer_stream.read().decode("utf-8")
116
 
117
- tokenizer = AutoTokenizer.from_pretrained(f"s3://{self.bucket_name}/{profile}/{model}")
118
  return tokenizer
119
  except Exception as e:
120
  raise HTTPException(status_code=500, detail=f"Error al cargar el tokenizer desde S3: {e}")
121
 
122
  async def create_s3_folders(self, s3_key):
123
  try:
124
- folder_keys = s3_key.split('/')
125
  for i in range(1, len(folder_keys)):
126
- folder_key = '/'.join(folder_keys[:i]) + '/'
127
  if not await self.file_exists_in_s3(folder_key):
128
  logger.info(f"Creando carpeta en S3: {folder_key}")
129
  self.s3_client.put_object(Bucket=self.bucket_name, Key=folder_key, Body='')
@@ -138,21 +123,48 @@ class S3DirectStream:
138
  except self.s3_client.exceptions.ClientError:
139
  return False
140
 
141
- async def download_and_upload_to_s3(self, model_prefix, model_name):
142
  try:
143
- config_file = hf_hub_download(repo_id=model_name, filename="config.json", token=HUGGINGFACE_HUB_TOKEN)
144
- tokenizer_file = hf_hub_download(repo_id=model_name, filename="tokenizer.json", token=HUGGINGFACE_HUB_TOKEN)
 
 
 
 
 
 
 
 
145
 
146
- if not await self.file_exists_in_s3(f"{model_prefix}/config.json"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  with open(config_file, "rb") as file:
148
- self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_prefix}/config.json", Body=file)
149
 
150
- if not await self.file_exists_in_s3(f"{model_prefix}/tokenizer.json"):
151
  with open(tokenizer_file, "rb") as file:
152
- self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_prefix}/tokenizer.json", Body=file)
153
 
154
  except Exception as e:
155
- raise HTTPException(status_code=500, detail=f"Error al descargar o cargar archivos desde Hugging Face a S3: {e}")
156
 
157
  def split_text_by_tokens(text, tokenizer, max_tokens=MAX_TOKENS):
158
  tokens = tokenizer.encode(text)
@@ -165,54 +177,27 @@ def split_text_by_tokens(text, tokenizer, max_tokens=MAX_TOKENS):
165
  def continue_generation(input_text, model, tokenizer, max_tokens=MAX_TOKENS):
166
  generated_text = ""
167
  while len(input_text) > 0:
168
- tokens = tokenizer.encode(input_text)
169
- input_text = tokenizer.decode(tokens[:max_tokens])
170
- output = model.generate(input_ids=tokenizer.encode(input_text, return_tensors="pt").input_ids)
171
- generated_text += tokenizer.decode(output[0], skip_special_tokens=True)
172
- input_text = input_text[len(input_text):]
173
  return generated_text
174
 
175
- @app.post("/predict/")
176
- async def predict(model_request: dict):
177
  try:
178
- model_name = model_request.get("model_name")
179
- task = model_request.get("pipeline_task")
180
- input_text = model_request.get("input_text")
181
-
182
- if not model_name or not task or not input_text:
183
- raise HTTPException(status_code=400, detail="Faltan parámetros en la solicitud.")
184
-
185
- streamer = S3DirectStream(S3_BUCKET_NAME)
186
 
187
- await streamer.create_s3_folders(model_name)
188
-
189
- model = await streamer.load_model_from_s3(model_name)
190
- tokenizer = await streamer.load_tokenizer_from_s3(model_name)
191
-
192
- if task not in PIPELINE_MAP:
193
- raise HTTPException(status_code=400, detail="Pipeline task no soportado")
194
-
195
- nlp_pipeline = pipeline(PIPELINE_MAP[task], model=model, tokenizer=tokenizer)
196
-
197
- result = await asyncio.to_thread(nlp_pipeline, input_text)
198
-
199
- if len(result) > MAX_TOKENS:
200
- chunks = split_text_by_tokens(result, tokenizer)
201
- full_result = ""
202
- for chunk in chunks:
203
- full_result += continue_generation(chunk, model, tokenizer)
204
- return {"result": full_result}
205
 
206
- return {"result": result}
207
-
208
- except HTTPException as e:
209
- logger.error(f"Error al realizar la predicción: {str(e.detail)}")
210
- return JSONResponse(status_code=e.status_code, content={"detail": str(e.detail)})
211
 
212
  except Exception as e:
213
- logger.error(f"Error inesperado: {str(e)}")
214
- return JSONResponse(status_code=500, content={"detail": "Error inesperado. Intenta más tarde."})
215
 
216
  if __name__ == "__main__":
217
  import uvicorn
218
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
2
  import json
3
  import logging
4
  import boto3
5
+ from fastapi import FastAPI, HTTPException, Query
6
  from fastapi.responses import JSONResponse
7
+ from transformers import AutoModelForCausalLM, AutoTokenizer
8
  from huggingface_hub import hf_hub_download
9
  import asyncio
10
 
 
11
  logger = logging.getLogger(__name__)
12
  logger.setLevel(logging.INFO)
13
  console_handler = logging.StreamHandler()
 
32
 
33
  app = FastAPI()
34
 
 
 
 
 
 
 
 
 
 
 
 
35
  class S3DirectStream:
36
  def __init__(self, bucket_name):
37
  self.s3_client = boto3.client(
 
61
 
62
  def _get_model_file_parts(self, model_name):
63
  try:
64
+ model_name = model_name.replace("/", "-").lower()
65
+ files = self.s3_client.list_objects_v2(Bucket=self.bucket_name, Prefix=model_name)
66
+ model_files = [obj['Key'] for obj in files.get('Contents', []) if model_name in obj['Key']]
67
  return model_files
68
  except Exception as e:
69
  raise HTTPException(status_code=500, detail=f"Error al obtener archivos del modelo {model_name} desde S3: {e}")
70
 
71
  async def load_model_from_s3(self, model_name):
72
  try:
73
+ model_name = model_name.replace("/", "-").lower()
74
+ model_files = await self.get_model_file_parts(model_name)
 
 
75
 
76
  if not model_files:
77
+ await self.download_and_upload_to_s3(model_name)
78
 
79
+ config_stream = await self.stream_from_s3(f"{model_name}/config.json")
80
  config_data = config_stream.read()
81
 
82
  if not config_data:
83
+ raise HTTPException(status_code=500, detail=f"El archivo de configuración {model_name}/config.json está vacío o no se pudo leer.")
84
 
85
  config_text = config_data.decode("utf-8")
86
  config_json = json.loads(config_text)
87
 
88
+ model = AutoModelForCausalLM.from_pretrained(f"s3://{self.bucket_name}/{model_name}", config=config_json, from_tf=False)
89
  return model
90
 
91
  except HTTPException as e:
 
95
 
96
  async def load_tokenizer_from_s3(self, model_name):
97
  try:
98
+ model_name = model_name.replace("/", "-").lower()
99
+ tokenizer_stream = await self.stream_from_s3(f"{model_name}/tokenizer.json")
 
100
  tokenizer_data = tokenizer_stream.read().decode("utf-8")
101
 
102
+ tokenizer = AutoTokenizer.from_pretrained(f"s3://{self.bucket_name}/{model_name}")
103
  return tokenizer
104
  except Exception as e:
105
  raise HTTPException(status_code=500, detail=f"Error al cargar el tokenizer desde S3: {e}")
106
 
107
  async def create_s3_folders(self, s3_key):
108
  try:
109
+ folder_keys = s3_key.split('-')
110
  for i in range(1, len(folder_keys)):
111
+ folder_key = '-'.join(folder_keys[:i]) + '/'
112
  if not await self.file_exists_in_s3(folder_key):
113
  logger.info(f"Creando carpeta en S3: {folder_key}")
114
  self.s3_client.put_object(Bucket=self.bucket_name, Key=folder_key, Body='')
 
123
  except self.s3_client.exceptions.ClientError:
124
  return False
125
 
126
+ async def download_and_upload_to_s3(self, model_name, force_download=False):
127
  try:
128
+ if force_download:
129
+ logger.info(f"Forzando la descarga del modelo {model_name} y la carga a S3.")
130
+
131
+ model_name = model_name.replace("/", "-").lower()
132
+
133
+ if not await self.file_exists_in_s3(f"{model_name}/config.json") or not await self.file_exists_in_s3(f"{model_name}/tokenizer.json"):
134
+ config_file = hf_hub_download(repo_id=model_name, filename="config.json", token=HUGGINGFACE_HUB_TOKEN, force_download=force_download)
135
+ tokenizer_file = hf_hub_download(repo_id=model_name, filename="tokenizer.json", token=HUGGINGFACE_HUB_TOKEN, force_download=force_download)
136
+
137
+ await self.create_s3_folders(f"{model_name}/")
138
 
139
+ if not await self.file_exists_in_s3(f"{model_name}/config.json"):
140
+ with open(config_file, "rb") as file:
141
+ self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_name}/config.json", Body=file)
142
+
143
+ if not await self.file_exists_in_s3(f"{model_name}/tokenizer.json"):
144
+ with open(tokenizer_file, "rb") as file:
145
+ self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_name}/tokenizer.json", Body=file)
146
+ else:
147
+ logger.info(f"Los archivos del modelo {model_name} ya existen en S3. No es necesario descargarlos de nuevo.")
148
+
149
+ except Exception as e:
150
+ raise HTTPException(status_code=500, detail=f"Error al descargar o cargar archivos desde Hugging Face a S3: {e}")
151
+
152
+ async def resume_download(self, model_name):
153
+ try:
154
+ logger.info(f"Reanudando la descarga del modelo {model_name} desde Hugging Face.")
155
+ config_file = hf_hub_download(repo_id=model_name, filename="config.json", token=HUGGINGFACE_HUB_TOKEN, resume_download=True)
156
+ tokenizer_file = hf_hub_download(repo_id=model_name, filename="tokenizer.json", token=HUGGINGFACE_HUB_TOKEN, resume_download=True)
157
+
158
+ if not await self.file_exists_in_s3(f"{model_name}/config.json"):
159
  with open(config_file, "rb") as file:
160
+ self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_name}/config.json", Body=file)
161
 
162
+ if not await self.file_exists_in_s3(f"{model_name}/tokenizer.json"):
163
  with open(tokenizer_file, "rb") as file:
164
+ self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_name}/tokenizer.json", Body=file)
165
 
166
  except Exception as e:
167
+ raise HTTPException(status_code=500, detail=f"Error al reanudar la descarga o cargar archivos desde Hugging Face a S3: {e}")
168
 
169
  def split_text_by_tokens(text, tokenizer, max_tokens=MAX_TOKENS):
170
  tokens = tokenizer.encode(text)
 
177
  def continue_generation(input_text, model, tokenizer, max_tokens=MAX_TOKENS):
178
  generated_text = ""
179
  while len(input_text) > 0:
180
+ chunks = split_text_by_tokens(input_text, tokenizer, max_tokens)
181
+ for chunk in chunks:
182
+ generated_text += model.generate(chunk)
 
 
183
  return generated_text
184
 
185
+ @app.post("/generate")
186
+ async def generate_text(model_name: str = Query(...), input_text: str = Query(...)):
187
  try:
188
+ model_loader = S3DirectStream(S3_BUCKET_NAME)
189
+ model = await model_loader.load_model_from_s3(model_name)
190
+ tokenizer = await model_loader.load_tokenizer_from_s3(model_name)
 
 
 
 
 
191
 
192
+ chunks = split_text_by_tokens(input_text, tokenizer, max_tokens=MAX_TOKENS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
+ generated_text = continue_generation(input_text, model, tokenizer)
195
+
196
+ return {"generated_text": generated_text}
 
 
197
 
198
  except Exception as e:
199
+ raise HTTPException(status_code=500, detail=str(e))
 
200
 
201
  if __name__ == "__main__":
202
  import uvicorn
203
+ uvicorn.run(app, host="0.0.0.0", port=8000)