aws_test

Sleeping

App Files Files Community

Hjgugugjhuhjggg commited on Dec 5, 2024

Commit

3e67bfd

verified ·

1 Parent(s): 059d70c

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -10

app.py CHANGED Viewed

@@ -5,15 +5,22 @@ import boto3
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import asyncio
-logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_REGION = os.getenv("AWS_REGION")
 S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
 MAX_TOKENS = 1024
@@ -81,7 +88,7 @@ class S3DirectStream:
             model_files = await self.get_model_file_parts(model_prefix)
             if not model_files:
-                raise HTTPException(status_code=404, detail=f"Archivos del modelo {model_name} no encontrados en S3.")
             config_stream = await self.stream_from_s3(f"{model_prefix}/config.json")
             config_data = config_stream.read()
@@ -107,7 +114,7 @@ class S3DirectStream:
             tokenizer_stream = await self.stream_from_s3(f"{profile}/{model}/tokenizer.json")
             tokenizer_data = tokenizer_stream.read().decode("utf-8")
-            tokenizer = AutoTokenizer.from_pretrained(f"{profile}/{model}")
             return tokenizer
         except Exception as e:
             raise HTTPException(status_code=500, detail=f"Error al cargar el tokenizer desde S3: {e}")
@@ -131,6 +138,22 @@ class S3DirectStream:
         except self.s3_client.exceptions.ClientError:
             return False
 def split_text_by_tokens(text, tokenizer, max_tokens=MAX_TOKENS):
     tokens = tokenizer.encode(text)
     chunks = []
@@ -173,18 +196,22 @@ async def predict(model_request: dict):
         result = await asyncio.to_thread(nlp_pipeline, input_text)
-        chunks = split_text_by_tokens(result, tokenizer)
-        if len(chunks) > 1:
             full_result = ""
             for chunk in chunks:
                 full_result += continue_generation(chunk, model, tokenizer)
-            return JSONResponse(content={"result": full_result})
-        else:
-            return JSONResponse(content={"result": result})
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error al realizar la predicción: {e}")
 if __name__ == "__main__":
     import uvicorn

 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from huggingface_hub import hf_hub_download
 import asyncio
+# Configuración del logger
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+console_handler = logging.StreamHandler()
+formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+console_handler.setFormatter(formatter)
+logger.addHandler(console_handler)
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_REGION = os.getenv("AWS_REGION")
 S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
+HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
 MAX_TOKENS = 1024
             model_files = await self.get_model_file_parts(model_prefix)
             if not model_files:
+                await self.download_and_upload_to_s3(model_prefix, model)
             config_stream = await self.stream_from_s3(f"{model_prefix}/config.json")
             config_data = config_stream.read()
             tokenizer_stream = await self.stream_from_s3(f"{profile}/{model}/tokenizer.json")
             tokenizer_data = tokenizer_stream.read().decode("utf-8")
+            tokenizer = AutoTokenizer.from_pretrained(f"s3://{self.bucket_name}/{profile}/{model}")
             return tokenizer
         except Exception as e:
             raise HTTPException(status_code=500, detail=f"Error al cargar el tokenizer desde S3: {e}")
         except self.s3_client.exceptions.ClientError:
             return False
+    async def download_and_upload_to_s3(self, model_prefix, model_name):
+        try:
+            config_file = hf_hub_download(repo_id=model_name, filename="config.json", token=HUGGINGFACE_HUB_TOKEN)
+            tokenizer_file = hf_hub_download(repo_id=model_name, filename="tokenizer.json", token=HUGGINGFACE_HUB_TOKEN)
+            if not await self.file_exists_in_s3(f"{model_prefix}/config.json"):
+                with open(config_file, "rb") as file:
+                    self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_prefix}/config.json", Body=file)
+            if not await self.file_exists_in_s3(f"{model_prefix}/tokenizer.json"):
+                with open(tokenizer_file, "rb") as file:
+                    self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_prefix}/tokenizer.json", Body=file)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Error al descargar o cargar archivos desde Hugging Face a S3: {e}")
 def split_text_by_tokens(text, tokenizer, max_tokens=MAX_TOKENS):
     tokens = tokenizer.encode(text)
     chunks = []
         result = await asyncio.to_thread(nlp_pipeline, input_text)
+        if len(result) > MAX_TOKENS:
+            chunks = split_text_by_tokens(result, tokenizer)
             full_result = ""
             for chunk in chunks:
                 full_result += continue_generation(chunk, model, tokenizer)
+            return {"result": full_result}
+        return {"result": result}
+    except HTTPException as e:
+        logger.error(f"Error al realizar la predicción: {str(e.detail)}")
+        return JSONResponse(status_code=e.status_code, content={"detail": str(e.detail)})
     except Exception as e:
+        logger.error(f"Error inesperado: {str(e)}")
+        return JSONResponse(status_code=500, content={"detail": "Error inesperado. Intenta más tarde."})
 if __name__ == "__main__":
     import uvicorn