aws_test

Sleeping

App Files Files Community

Hjgugugjhuhjggg commited on Dec 11, 2024

Commit

fcc4b80

verified ·

1 Parent(s): 0598c12

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -80

app.py CHANGED Viewed

@@ -1,24 +1,29 @@
 import os
 import logging
-import threading
-import boto3
-from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, StoppingCriteriaList, pipeline
-from fastapi import FastAPI, HTTPException, Request
-from pydantic import BaseModel, field_validator
-from huggingface_hub import hf_hub_download
-import requests
 import time
-import asyncio
-from fastapi.responses import StreamingResponse, Response
-import torch
 from io import BytesIO
-import numpy as np
 import soundfile as sf
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s")
-app = FastAPI()
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_REGION = os.getenv("AWS_REGION")
@@ -40,6 +45,8 @@ class GenerateRequest(BaseModel):
     chunk_delay: float = 0.0
     stop_sequences: list[str] = []
     @field_validator("model_name")
     def model_name_cannot_be_empty(cls, v):
         if not v:
@@ -59,66 +66,42 @@ class S3ModelLoader:
         self.s3_client = s3_client
     def _get_s3_uri(self, model_name):
-        return f"s3://{self.bucket_name}/lilmeaty_garca/{model_name.replace('/', '-')}"
-    def _download_from_s3(self, model_name):
-        try:
-            logging.info(f"Attempting to load model {model_name} from S3...")
-            model_files = self.s3_client.list_objects_v2(Bucket=self.bucket_name, Prefix=f"lilmeaty_garca/{model_name}")
-            if "Contents" not in model_files:
-                raise FileNotFoundError(f"Model files not found in S3 for {model_name}")
-            s3_model_path = f"s3://{self.bucket_name}/lilmeaty_garca/{model_name.replace('/', '-')}"
-            logging.info(f"Model {model_name} found on S3 at {s3_model_path}")
-            return s3_model_path
-        except Exception as e:
-            logging.error(f"Error downloading from S3: {e}")
-            raise HTTPException(status_code=500, detail=f"Error downloading model from S3: {e}")
-    def download_model_from_huggingface(self, model_name):
-        try:
-            logging.info(f"Downloading model {model_name} from Hugging Face...")
-            model_dir = hf_hub_download(model_name, token=HUGGINGFACE_HUB_TOKEN)
-            model_files = os.listdir(model_dir)
-            for model_file in model_files:
-                s3_path = f"lilmeaty_garca/{model_name}/{model_file}"
-                self.s3_client.upload_file(os.path.join(model_dir, model_file), self.bucket_name, s3_path)
-            logging.info(f"Model {model_name} saved to S3 successfully.")
-        except Exception as e:
-            logging.error(f"Error downloading model {model_name} from Hugging Face: {e}")
-            raise HTTPException(status_code=500, detail=f"Error downloading model from Hugging Face: {e}")
-    def download_all_models_in_background(self):
-        models_url = "https://huggingface.co/api/models"
-        try:
-            response = requests.get(models_url)
-            if response.status_code != 200:
-                logging.error("Error getting Hugging Face model list.")
-                raise HTTPException(status_code=500, detail="Error getting model list.")
-            models = response.json()
-            for model in models:
-                model_name = model["id"]
-                self.download_model_from_huggingface(model_name)
-        except Exception as e:
-            logging.error(f"Error downloading models in the background: {e}")
-            raise HTTPException(status_code=500, detail="Error downloading models in the background.")
-    def run_in_background(self):
-        threading.Thread(target=self.download_all_models_in_background, daemon=True).start()
-    def load_model_and_tokenizer(self, model_name):
         try:
-            model_uri = self._download_from_s3(model_name)
-            model = AutoModelForCausalLM.from_pretrained(model_uri)
-            tokenizer = AutoTokenizer.from_pretrained(model_uri)
-            logging.info(f"Model {model_name} loaded successfully from {model_uri}.")
             return model, tokenizer
-        except Exception as e:
-            logging.error(f"Error loading model {model_name}: {e}")
-            raise HTTPException(status_code=500, detail=f"Error loading model {model_name}: {e}")
-@app.on_event("startup")
-async def startup_event():
-    model_loader.run_in_background()
 s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region_name=AWS_REGION)
 model_loader = S3ModelLoader(S3_BUCKET_NAME, s3_client)
@@ -186,21 +169,29 @@ async def generate(request: Request, body: GenerateRequest):
             generator = pipeline("text-to-speech", model=model, tokenizer=tokenizer, device=device)
             audio = generator(validated_body.input_text)
             audio_bytesio = BytesIO()
-            sf.write(audio_bytesio, audio["samples"], audio["rate"], format="WAV")
-            audio_bytesio.seek(0)
-            return StreamingResponse(audio_bytesio, media_type="audio/wav")
         elif validated_body.task_type == "text-to-video":
-            return {"error": "Text-to-video task type is not yet supported."}
         else:
-            raise HTTPException(status_code=400, detail="Invalid task type")
     except Exception as e:
-        logging.error(f"Error during generation: {e}")
-        raise HTTPException(status_code=500, detail=f"Internal Server Error: {e}")
-import uvicorn
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import logging
 import time
 from io import BytesIO
+from typing import Union
+from fastapi import FastAPI, HTTPException, Response, Request, UploadFile, File
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel, ValidationError, field_validator
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    pipeline,
+    GenerationConfig,
+    StoppingCriteriaList
+)
+import boto3
+from huggingface_hub import hf_hub_download
 import soundfile as sf
+import numpy as np
+import torch
+import uvicorn
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s")
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_REGION = os.getenv("AWS_REGION")
     chunk_delay: float = 0.0
     stop_sequences: list[str] = []
+    model_config = {"protected_namespaces": ()}
     @field_validator("model_name")
     def model_name_cannot_be_empty(cls, v):
         if not v:
         self.s3_client = s3_client
     def _get_s3_uri(self, model_name):
+        return f"s3://{self.bucket_name}/{model_name.replace('/', '-')}"
+    async def load_model_and_tokenizer(self, model_name):
+        s3_uri = self._get_s3_uri(model_name)
         try:
+            logging.info(f"Trying to load {model_name} from S3...")
+            config = AutoConfig.from_pretrained(s3_uri)
+            model = AutoModelForCausalLM.from_pretrained(s3_uri, config=config)
+            tokenizer = AutoTokenizer.from_pretrained(s3_uri, config=config)
+            if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
+                tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
+            logging.info(f"Loaded {model_name} from S3 successfully.")
             return model, tokenizer
+        except EnvironmentError:
+            logging.info(f"Model {model_name} not found in S3. Downloading...")
+            try:
+                config = AutoConfig.from_pretrained(model_name)
+                tokenizer = AutoTokenizer.from_pretrained(model_name, config=config)
+                model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN)
+                if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
+                    tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
+                logging.info(f"Downloaded {model_name} successfully.")
+                logging.info(f"Saving {model_name} to S3...")
+                model.save_pretrained(s3_uri)
+                tokenizer.save_pretrained(s3_uri)
+                logging.info(f"Saved {model_name} to S3 successfully.")
+                return model, tokenizer
+            except Exception as e:
+                logging.exception(f"Error downloading/uploading model: {e}")
+                raise HTTPException(status_code=500, detail=f"Error loading model: {e}")
+app = FastAPI()
 s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region_name=AWS_REGION)
 model_loader = S3ModelLoader(S3_BUCKET_NAME, s3_client)
             generator = pipeline("text-to-speech", model=model, tokenizer=tokenizer, device=device)
             audio = generator(validated_body.input_text)
             audio_bytesio = BytesIO()
+            sf.write(audio_bytesio, audio["sampling_rate"], np.int16(audio["audio"]))
+            audio_bytes = audio_bytesio.getvalue()
+            return Response(content=audio_bytes, media_type="audio/wav")
         elif validated_body.task_type == "text-to-video":
+            try:
+                generator = pipeline("text-to-video", model=model, tokenizer=tokenizer, device=device)
+                video = generator(validated_body.input_text)
+                return Response(content=video, media_type="video/mp4")
+            except Exception as e:
+                raise HTTPException(status_code=500, detail=f"Error in text-to-video generation: {e}")
         else:
+            raise HTTPException(status_code=400, detail="Unsupported task type")
+    except HTTPException as e:
+        raise e
+    except ValidationError as e:
+        raise HTTPException(status_code=422, detail=e.errors())
     except Exception as e:
+        logging.exception(f"An unexpected error occurred: {e}")
+        raise HTTPException(status_code=500, detail="An unexpected error occurred.")
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)