aws_test

Sleeping

App Files Files Community

Hjgugugjhuhjggg commited on Dec 7, 2024

Commit

972e5ee

verified ·

1 Parent(s): 0b9d8bf

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -79

app.py CHANGED Viewed

@@ -6,22 +6,19 @@ import boto3
 from dotenv import load_dotenv
 import os
 import uvicorn
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import safetensors.torch
 from fastapi.responses import StreamingResponse
-from tqdm import tqdm
-# Cargar las variables de entorno desde el archivo .env
 load_dotenv()
-# Cargar las credenciales de AWS y el token de Hugging Face desde las variables de entorno
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_REGION = os.getenv("AWS_REGION")
-S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")  # Nombre del bucket de S3
-HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")  # Token de Hugging Face
-# Cliente S3 de Amazon
 s3_client = boto3.client(
     's3',
     aws_access_key_id=AWS_ACCESS_KEY_ID,
@@ -31,12 +28,11 @@ s3_client = boto3.client(
 app = FastAPI()
-# Pydantic Model para el cuerpo de la solicitud del endpoint /download_model/
 class DownloadModelRequest(BaseModel):
     model_name: str
     pipeline_task: str
     input_text: str
-    revision: str = "main"  # Revisión por defecto
 class S3DirectStream:
     def __init__(self, bucket_name):
@@ -50,11 +46,10 @@ class S3DirectStream:
     def stream_from_s3(self, key):
         try:
-            print(f"Descargando archivo {key} desde S3...")
             response = self.s3_client.get_object(Bucket=self.bucket_name, Key=key)
-            return response['Body']  # Devolver el cuerpo directamente para el StreamingResponse
         except self.s3_client.exceptions.NoSuchKey:
-            raise HTTPException(status_code=404, detail=f"El archivo {key} no existe en el bucket S3.")
     def file_exists_in_s3(self, key):
         try:
@@ -65,127 +60,105 @@ class S3DirectStream:
     def load_model_from_stream(self, model_prefix, revision):
         try:
-            print(f"Cargando el modelo {model_prefix} desde S3...")
             if self.file_exists_in_s3(f"{model_prefix}/config.json") and \
                (self.file_exists_in_s3(f"{model_prefix}/pytorch_model.bin") or self.file_exists_in_s3(f"{model_prefix}/model.safetensors")):
-                print(f"Modelo {model_prefix} ya existe en S3. No es necesario descargarlo.")
                 return self.load_model_from_existing_s3(model_prefix)
-            print(f"Modelo {model_prefix} no encontrado. Procediendo a descargar...")
-            self.download_and_upload_to_s3(model_prefix, revision)  # Pasamos 'revision' aquí
             return self.load_model_from_stream(model_prefix, revision)
         except HTTPException as e:
-            print(f"Error al cargar el modelo: {e}")
             return None
     def load_model_from_existing_s3(self, model_prefix):
-        # Cargar el modelo y los archivos necesarios desde S3
-        print(f"Cargando los archivos {model_prefix} desde S3...")
         config_stream = self.stream_from_s3(f"{model_prefix}/config.json")
-        config_data = config_stream.read().decode("utf-8")
-        print(f"Cargando el modelo de lenguaje {model_prefix}...")
-        # Verificar si el archivo es un safetensor o un archivo binario
         if self.file_exists_in_s3(f"{model_prefix}/model.safetensors"):
-            # Usar safetensors si el archivo es de tipo safetensors
             model_stream = self.stream_from_s3(f"{model_prefix}/model.safetensors")
-            model = AutoModelForCausalLM.from_config(config_data)
-            model.load_state_dict(safetensors.torch.load_stream(model_stream))  # Cargar el modelo utilizando safetensors
-        else:
-            # Cargar el modelo utilizando pytorch si el archivo es .bin
             model_stream = self.stream_from_s3(f"{model_prefix}/pytorch_model.bin")
-            model = AutoModelForCausalLM.from_config(config_data)
-            model.load_state_dict(torch.load(model_stream, map_location="cpu"))
         return model
     def load_tokenizer_from_stream(self, model_prefix):
         try:
             if self.file_exists_in_s3(f"{model_prefix}/tokenizer.json"):
-                print(f"Tokenizer para {model_prefix} ya existe en S3. No es necesario descargarlo.")
                 return self.load_tokenizer_from_existing_s3(model_prefix)
-            print(f"Tokenizer para {model_prefix} no encontrado. Procediendo a descargar...")
-            self.download_and_upload_to_s3(model_prefix)  # Pasamos 'revision' aquí también
             return self.load_tokenizer_from_stream(model_prefix)
         except HTTPException as e:
-            print(f"Error al cargar el tokenizer: {e}")
             return None
     def load_tokenizer_from_existing_s3(self, model_prefix):
-        print(f"Cargando el tokenizer para {model_prefix} desde S3...")
         tokenizer_stream = self.stream_from_s3(f"{model_prefix}/tokenizer.json")
-        tokenizer = AutoTokenizer.from_pretrained(tokenizer_stream)
         return tokenizer
-    def download_and_upload_to_s3(self, model_prefix, revision):
-        # URLs de los archivos de Hugging Face
         model_url = f"https://huggingface.co/{model_prefix}/resolve/{revision}/pytorch_model.bin"
         safetensors_url = f"https://huggingface.co/{model_prefix}/resolve/{revision}/model.safetensors"
         tokenizer_url = f"https://huggingface.co/{model_prefix}/resolve/{revision}/tokenizer.json"
         config_url = f"https://huggingface.co/{model_prefix}/resolve/{revision}/config.json"
-        print(f"Descargando y subiendo archivos para el modelo {model_prefix} a S3...")
         self.download_and_upload_to_s3_url(model_url, f"{model_prefix}/pytorch_model.bin")
         self.download_and_upload_to_s3_url(safetensors_url, f"{model_prefix}/model.safetensors")
         self.download_and_upload_to_s3_url(tokenizer_url, f"{model_prefix}/tokenizer.json")
         self.download_and_upload_to_s3_url(config_url, f"{model_prefix}/config.json")
-    def download_and_upload_to_s3_url(self, url: str, s3_key: str):
-        print(f"Descargando archivo desde {url}...")
-        response = requests.get(url)
         if response.status_code == 200:
-            # Subir archivo a S3
-            print(f"Subiendo archivo a S3 con key {s3_key}...")
-            self.s3_client.put_object(Bucket=self.bucket_name, Key=s3_key, Body=response.content)
         else:
-            raise HTTPException(status_code=500, detail=f"Error al descargar el archivo desde {url}")
 @app.post("/predict/")
 async def predict(model_request: DownloadModelRequest):
     try:
-        print(f"Recibiendo solicitud para predecir con el modelo {model_request.model_name}...")
         model_name = model_request.model_name
         revision = model_request.revision
-        # Cargar el modelo y tokenizer desde S3
         streamer = S3DirectStream(S3_BUCKET_NAME)
         model = streamer.load_model_from_stream(model_name, revision)
         tokenizer = streamer.load_tokenizer_from_stream(model_name)
-        # Obtener el pipeline adecuado según la solicitud
         task = model_request.pipeline_task
-        if task not in ["text-generation", "sentiment-analysis", "translation", "fill-mask", "question-answering", "text-to-speech", "text-to-image", "text-to-audio", "text-to-video"]:
-            raise HTTPException(status_code=400, detail="Pipeline task no soportado")
-        # Crear el pipeline dinámicamente basado en el tipo de tarea
-        nlp_pipeline = pipeline(task, model=model, tokenizer=tokenizer, use_auth_token=HUGGINGFACE_TOKEN, revision=revision)
-        # Ejecutar el pipeline con el input_text
-        outputs = nlp_pipeline(model_request.input_text)
-        # Almacenar el resultado en S3 dependiendo del tipo de tarea
-        if task == "text-to-image":
-            s3_key = f"{model_request.model_name}/generated_image.png"
-            return StreamingResponse(streamer.stream_from_s3(s3_key), media_type="image/png")
-        elif task == "text-to-speech":
-            s3_key = f"{model_request.model_name}/generated_audio.wav"
-            return StreamingResponse(streamer.stream_from_s3(s3_key), media_type="audio/wav")
-        elif task == "text-to-video":
-            s3_key = f"{model_request.model_name}/generated_video.mp4"
-            return StreamingResponse(streamer.stream_from_s3(s3_key), media_type="video/mp4")
-        # Devolver resultados de texto u otros tipos de tarea
-        return {"result": outputs}
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error al procesar la solicitud: {str(e)}")
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 from dotenv import load_dotenv
 import os
 import uvicorn
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextIteratorStreamer
 import safetensors.torch
+import torch
 from fastapi.responses import StreamingResponse
 load_dotenv()
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_REGION = os.getenv("AWS_REGION")
+S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
+HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
 s3_client = boto3.client(
     's3',
     aws_access_key_id=AWS_ACCESS_KEY_ID,
 app = FastAPI()
 class DownloadModelRequest(BaseModel):
     model_name: str
     pipeline_task: str
     input_text: str
+    revision: str = "main"
 class S3DirectStream:
     def __init__(self, bucket_name):
     def stream_from_s3(self, key):
         try:
             response = self.s3_client.get_object(Bucket=self.bucket_name, Key=key)
+            return response['Body']
         except self.s3_client.exceptions.NoSuchKey:
+            raise HTTPException(status_code=404, detail=f"File {key} not found in S3")
     def file_exists_in_s3(self, key):
         try:
     def load_model_from_stream(self, model_prefix, revision):
         try:
             if self.file_exists_in_s3(f"{model_prefix}/config.json") and \
                (self.file_exists_in_s3(f"{model_prefix}/pytorch_model.bin") or self.file_exists_in_s3(f"{model_prefix}/model.safetensors")):
                 return self.load_model_from_existing_s3(model_prefix)
+            self.download_and_upload_to_s3(model_prefix, revision)
             return self.load_model_from_stream(model_prefix, revision)
         except HTTPException as e:
             return None
     def load_model_from_existing_s3(self, model_prefix):
         config_stream = self.stream_from_s3(f"{model_prefix}/config.json")
+        config = AutoConfig.from_pretrained(config_stream) # Directly from stream
         if self.file_exists_in_s3(f"{model_prefix}/model.safetensors"):
             model_stream = self.stream_from_s3(f"{model_prefix}/model.safetensors")
+            model = AutoModelForCausalLM.from_config(config)
+            model.load_state_dict(safetensors.torch.load_stream(model_stream))
+        elif self.file_exists_in_s3(f"{model_prefix}/pytorch_model.bin"):
             model_stream = self.stream_from_s3(f"{model_prefix}/pytorch_model.bin")
+            model = AutoModelForCausalLM.from_config(config)
+            state_dict = torch.load(model_stream, map_location="cpu")  # Load directly
+            model.load_state_dict(state_dict)
+        else:
+           raise EnvironmentError(f"No model file found for {model_prefix} in S3")
         return model
     def load_tokenizer_from_stream(self, model_prefix):
         try:
             if self.file_exists_in_s3(f"{model_prefix}/tokenizer.json"):
                 return self.load_tokenizer_from_existing_s3(model_prefix)
+            self.download_and_upload_to_s3(model_prefix)
             return self.load_tokenizer_from_stream(model_prefix)
         except HTTPException as e:
             return None
     def load_tokenizer_from_existing_s3(self, model_prefix):
         tokenizer_stream = self.stream_from_s3(f"{model_prefix}/tokenizer.json")
+        tokenizer = AutoTokenizer.from_pretrained(tokenizer_stream) # Directly from stream
         return tokenizer
+    def download_and_upload_to_s3(self, model_prefix, revision="main"):
         model_url = f"https://huggingface.co/{model_prefix}/resolve/{revision}/pytorch_model.bin"
         safetensors_url = f"https://huggingface.co/{model_prefix}/resolve/{revision}/model.safetensors"
         tokenizer_url = f"https://huggingface.co/{model_prefix}/resolve/{revision}/tokenizer.json"
         config_url = f"https://huggingface.co/{model_prefix}/resolve/{revision}/config.json"
         self.download_and_upload_to_s3_url(model_url, f"{model_prefix}/pytorch_model.bin")
         self.download_and_upload_to_s3_url(safetensors_url, f"{model_prefix}/model.safetensors")
         self.download_and_upload_to_s3_url(tokenizer_url, f"{model_prefix}/tokenizer.json")
         self.download_and_upload_to_s3_url(config_url, f"{model_prefix}/config.json")
+    def download_and_upload_to_s3_url(self, url, s3_key):
+        response = requests.get(url, stream=True)
         if response.status_code == 200:
+            self.s3_client.upload_fileobj(response.raw, self.bucket_name, s3_key) # Direct upload
+        elif response.status_code == 404:
+            raise HTTPException(status_code=404, detail=f"Error downloading file from {url}. File not found.")
         else:
+            raise HTTPException(status_code=500, detail=f"Error downloading file from {url}")
 @app.post("/predict/")
 async def predict(model_request: DownloadModelRequest):
     try:
         model_name = model_request.model_name
         revision = model_request.revision
         streamer = S3DirectStream(S3_BUCKET_NAME)
         model = streamer.load_model_from_stream(model_name, revision)
         tokenizer = streamer.load_tokenizer_from_stream(model_name)
         task = model_request.pipeline_task
+        if task not in ["text-generation", "sentiment-analysis", "translation", "fill-mask", "question-answering",  "summarization", "zero-shot-classification"]:
+            raise HTTPException(status_code=400, detail="Unsupported pipeline task")
+        if task == "text-generation":
+            text_streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+            inputs = tokenizer(model_request.input_text, return_tensors="pt").to(model.device)
+            generation_kwargs = dict(inputs, streamer=text_streamer)
+            model.generate(**generation_kwargs)
+            return StreamingResponse(iter([tokenizer.decode(token) for token in text_streamer]), media_type="text/event-stream")
+        else:
+            nlp_pipeline = pipeline(task, model=model, tokenizer=tokenizer, device_map="auto", trust_remote_code=True)
+            outputs = nlp_pipeline(model_request.input_text)
+            return {"result": outputs}
     except Exception as e:
+        print(f"Complete Error: {e}")
+        raise HTTPException(status_code=500, detail=f"Error processing request: {str(e)}")
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)