Hjgugugjhuhjggg commited on
Commit
5d00129
verified
1 Parent(s): cb66b7a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -151
app.py CHANGED
@@ -1,182 +1,164 @@
1
  import os
2
- import json
3
- from fastapi import FastAPI, HTTPException
 
 
 
 
4
  from pydantic import BaseModel
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
 
 
 
 
 
6
  import boto3
7
- import logging
8
  from huggingface_hub import hf_hub_download
 
 
 
 
 
 
 
9
 
10
- # Configuraci贸n de AWS y Hugging Face
11
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
12
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
13
  AWS_REGION = os.getenv("AWS_REGION")
14
  S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
15
  HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
16
 
17
- # Cliente de S3
18
- s3_client = boto3.client(
19
- 's3',
20
- aws_access_key_id=AWS_ACCESS_KEY_ID,
21
- aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
22
- region_name=AWS_REGION
23
- )
24
-
25
- app = FastAPI()
26
-
27
  class GenerateRequest(BaseModel):
28
  model_name: str
29
  input_text: str
30
  task_type: str
31
-
32
- class S3DirectStream:
33
- def __init__(self, bucket_name):
34
- self.s3_client = boto3.client(
35
- 's3',
36
- aws_access_key_id=AWS_ACCESS_KEY_ID,
37
- aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
38
- region_name=AWS_REGION
39
- )
 
 
 
40
  self.bucket_name = bucket_name
 
41
 
42
- async def download_and_upload_to_s3(self, model_name):
43
- try:
44
- model_name = model_name.replace("/", "-").lower()
45
-
46
- # Descargar el archivo config.json desde Hugging Face
47
- config_file = hf_hub_download(repo_id=model_name, filename="config.json", token=HUGGINGFACE_HUB_TOKEN)
48
- tokenizer_file = hf_hub_download(repo_id=model_name, filename="tokenizer.json", token=HUGGINGFACE_HUB_TOKEN)
49
-
50
- # Verificar si la carpeta y los archivos ya existen en S3
51
- if not await self.file_exists_in_s3(f"{model_name}/config.json"):
52
- logging.info(f"El archivo config.json no existe en S3. Subiendo desde Hugging Face...")
53
- self.create_folder_if_not_exists(model_name)
54
- with open(config_file, "rb") as file:
55
- self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_name}/config.json", Body=file)
56
-
57
- if not await self.file_exists_in_s3(f"{model_name}/tokenizer.json"):
58
- logging.info(f"El archivo tokenizer.json no existe en S3. Subiendo desde Hugging Face...")
59
- self.create_folder_if_not_exists(model_name)
60
- with open(tokenizer_file, "rb") as file:
61
- self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_name}/tokenizer.json", Body=file)
62
-
63
- except Exception as e:
64
- logging.error(f"Error al cargar el modelo desde Hugging Face a S3: {e}")
65
- raise HTTPException(status_code=500, detail=f"Error al cargar el modelo: {str(e)}")
66
-
67
- async def file_exists_in_s3(self, s3_key):
68
- try:
69
- self.s3_client.head_object(Bucket=self.bucket_name, Key=s3_key)
70
- return True
71
- except self.s3_client.exceptions.ClientError:
72
- return False
73
 
74
- def create_folder_if_not_exists(self, model_name):
75
- try:
76
- # Las carpetas no existen como tal en S3, pero se pueden crear archivos vac铆os para simular carpetas
77
- # Crear un archivo vac铆o para simular la carpeta
78
- self.s3_client.put_object(Bucket=self.bucket_name, Key=f"{model_name}/")
79
- except Exception as e:
80
- logging.error(f"Error al crear la carpeta en S3: {e}")
81
- raise HTTPException(status_code=500, detail=f"Error al crear la carpeta en S3: {str(e)}")
82
-
83
- async def load_model_from_s3(self, model_name):
84
- try:
85
- model_name = model_name.replace("/", "-").lower()
86
- model_files = await self.get_model_file_parts(model_name)
87
-
88
- if not model_files:
89
- await self.download_and_upload_to_s3(model_name)
90
-
91
- # Cargar configuraci贸n del modelo desde S3
92
- config_data = await self.stream_from_s3(f"{model_name}/config.json")
93
- if isinstance(config_data, bytes):
94
- config_data = config_data.decode("utf-8")
95
-
96
- config_json = json.loads(config_data)
97
-
98
- # Cargar el modelo
99
- model = AutoModelForCausalLM.from_pretrained(f"s3://{self.bucket_name}/{model_name}", config=config_json)
100
- return model
101
-
102
- except HTTPException as e:
103
- raise e
104
- except Exception as e:
105
- logging.error(f"Error al cargar el modelo desde S3: {e}")
106
- raise HTTPException(status_code=500, detail=f"Error al cargar el modelo desde S3: {str(e)}")
107
-
108
- async def load_tokenizer_from_s3(self, model_name):
109
- try:
110
- model_name = model_name.replace("/", "-").lower()
111
- tokenizer_data = await self.stream_from_s3(f"{model_name}/tokenizer.json")
112
-
113
- if isinstance(tokenizer_data, bytes):
114
- tokenizer_data = tokenizer_data.decode("utf-8")
115
-
116
- tokenizer = AutoTokenizer.from_pretrained(f"s3://{self.bucket_name}/{model_name}")
117
- return tokenizer
118
- except Exception as e:
119
- logging.error(f"Error al cargar el tokenizer desde S3: {e}")
120
- raise HTTPException(status_code=500, detail=f"Error al cargar el tokenizer desde S3: {str(e)}")
121
-
122
- async def stream_from_s3(self, key):
123
  try:
124
- response = self.s3_client.get_object(Bucket=self.bucket_name, Key=key)
125
- return response['Body'].read()
126
- except self.s3_client.exceptions.NoSuchKey:
127
- raise HTTPException(status_code=404, detail=f"El archivo {key} no existe en el bucket S3.")
128
- except Exception as e:
129
- raise HTTPException(status_code=500, detail=f"Error al descargar {key} desde S3: {str(e)}")
130
-
131
- async def get_model_file_parts(self, model_name):
132
- try:
133
- model_name = model_name.replace("/", "-").lower()
134
- files = self.s3_client.list_objects_v2(Bucket=self.bucket_name, Prefix=model_name)
135
- model_files = [obj['Key'] for obj in files.get('Contents', []) if model_name in obj['Key']]
136
- return model_files
137
- except Exception as e:
138
- raise HTTPException(status_code=500, detail=f"Error al obtener archivos del modelo {model_name} desde S3: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  @app.post("/generate")
141
- async def generate(request: GenerateRequest):
142
  try:
143
- model_name = request.model_name
144
- input_text = request.input_text
145
- task_type = request.task_type
146
-
147
- s3_direct_stream = S3DirectStream(S3_BUCKET_NAME)
148
-
149
- model = await s3_direct_stream.load_model_from_s3(model_name)
150
- tokenizer = await s3_direct_stream.load_tokenizer_from_s3(model_name)
151
-
152
- if task_type == "text-to-text":
153
- generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
154
- result = generator(input_text, max_length=1024, num_return_sequences=1)
155
- return {"result": result[0]["generated_text"]}
156
-
157
- elif task_type == "text-to-image":
158
- generator = pipeline("text-to-image", model=model, tokenizer=tokenizer, device=0)
159
- image = generator(input_text)
160
- return {"result": image}
161
-
162
- elif task_type == "text-to-speech":
163
- generator = pipeline("text-to-speech", model=model, tokenizer=tokenizer, device=0)
164
- audio = generator(input_text)
165
- return {"result": audio}
166
-
167
- elif task_type == "text-to-video":
168
- generator = pipeline("text-to-video", model=model, tokenizer=tokenizer, device=0)
169
- video = generator(input_text)
170
- return {"result": video}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
  else:
173
- raise HTTPException(status_code=400, detail="Tipo de tarea no soportada")
174
 
175
  except HTTPException as e:
176
  raise e
177
  except Exception as e:
178
  raise HTTPException(status_code=500, detail=str(e))
179
 
 
180
  if __name__ == "__main__":
181
- import uvicorn
182
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import os
2
+ import logging
3
+ import time
4
+ from io import BytesIO
5
+
6
+ from fastapi import FastAPI, HTTPException, Response, Request
7
+ from fastapi.responses import StreamingResponse
8
  from pydantic import BaseModel
9
+ from transformers import (
10
+ AutoConfig,
11
+ AutoModelForCausalLM,
12
+ AutoTokenizer,
13
+ pipeline,
14
+ GenerationConfig
15
+ )
16
  import boto3
 
17
  from huggingface_hub import hf_hub_download
18
+ import soundfile as sf
19
+ import numpy as np
20
+ import torch
21
+ import uvicorn
22
+ from tqdm import tqdm
23
+
24
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
25
 
 
26
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
27
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
28
  AWS_REGION = os.getenv("AWS_REGION")
29
  S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
30
  HUGGINGFACE_HUB_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
31
 
 
 
 
 
 
 
 
 
 
 
32
  class GenerateRequest(BaseModel):
33
  model_name: str
34
  input_text: str
35
  task_type: str
36
+ temperature: float = 1.0
37
+ max_new_tokens: int = 200
38
+ stream: bool = False
39
+ top_p: float = 1.0
40
+ top_k: int = 50
41
+ repetition_penalty: float = 1.0
42
+ num_return_sequences: int = 1
43
+ do_sample: bool = True
44
+ chunk_delay: float = 0.0
45
+
46
+ class S3ModelLoader:
47
+ def __init__(self, bucket_name, s3_client):
48
  self.bucket_name = bucket_name
49
+ self.s3_client = s3_client
50
 
51
+ def _get_s3_uri(self, model_name):
52
+ return f"s3://{self.bucket_name}/{model_name.replace('/', '-')}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ async def load_model_and_tokenizer(self, model_name):
55
+ s3_uri = self._get_s3_uri(model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  try:
57
+ logging.info(f"Trying to load {model_name} from S3...")
58
+ config = AutoConfig.from_pretrained(s3_uri)
59
+ model = AutoModelForCausalLM.from_pretrained(s3_uri, config=config)
60
+ tokenizer = AutoTokenizer.from_pretrained(s3_uri)
61
+ logging.info(f"Loaded {model_name} from S3 successfully.")
62
+ return model, tokenizer
63
+ except EnvironmentError:
64
+ logging.info(f"Model {model_name} not found in S3. Downloading...")
65
+ try:
66
+ with tqdm(unit="B", unit_scale=True, desc=f"Downloading {model_name}") as t:
67
+ model = AutoModelForCausalLM.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN, _tqdm=t)
68
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN)
69
+ logging.info(f"Downloaded {model_name} successfully.")
70
+ logging.info(f"Saving {model_name} to S3...")
71
+ model.save_pretrained(s3_uri)
72
+ tokenizer.save_pretrained(s3_uri)
73
+ logging.info(f"Saved {model_name} to S3 successfully.")
74
+ return model, tokenizer
75
+ except Exception as e:
76
+ logging.error(f"Error downloading/uploading model: {e}")
77
+ raise HTTPException(status_code=500, detail=f"Error loading model: {e}")
78
+
79
+ app = FastAPI()
80
+
81
+ s3_client = boto3.client('s3', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region_name=AWS_REGION)
82
+ model_loader = S3ModelLoader(S3_BUCKET_NAME, s3_client)
83
 
84
  @app.post("/generate")
85
+ async def generate(request: Request, body: GenerateRequest):
86
  try:
87
+ model, tokenizer = await model_loader.load_model_and_tokenizer(body.model_name)
88
+ device = "cuda" if torch.cuda.is_available() else "cpu"
89
+ model.to(device)
90
+
91
+ if body.task_type == "text-to-text":
92
+ generation_config = GenerationConfig(
93
+ temperature=body.temperature,
94
+ max_new_tokens=body.max_new_tokens,
95
+ top_p=body.top_p,
96
+ top_k=body.top_k,
97
+ repetition_penalty=body.repetition_penalty,
98
+ do_sample=body.do_sample,
99
+ num_return_sequences=body.num_return_sequences
100
+ )
101
+
102
+ async def stream_text():
103
+ input_text = body.input_text
104
+ generated_text = ""
105
+ max_length = model.config.max_position_embeddings
106
+
107
+ while True:
108
+ encoded_input = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=max_length).to(device)
109
+ input_length = encoded_input["input_ids"].shape[1]
110
+ remaining_tokens = max_length - input_length
111
+
112
+ if remaining_tokens <= 0:
113
+ break
114
+
115
+ generation_config.max_new_tokens = min(remaining_tokens, body.max_new_tokens)
116
+
117
+ output = model.generate(**encoded_input, generation_config=generation_config)
118
+ chunk = tokenizer.decode(output[0], skip_special_tokens=True)
119
+ generated_text += chunk
120
+ yield chunk
121
+ time.sleep(body.chunk_delay)
122
+ input_text = generated_text
123
+
124
+ if body.stream:
125
+ return StreamingResponse(stream_text(), media_type="text/plain")
126
+ else:
127
+ generated_text = ""
128
+ async for chunk in stream_text():
129
+ generated_text += chunk
130
+ return {"result": generated_text}
131
+
132
+ elif body.task_type == "text-to-image":
133
+ generator = pipeline("text-to-image", model=model, tokenizer=tokenizer, device=device)
134
+ image = generator(body.input_text)[0]
135
+ image_bytes = image.tobytes()
136
+ return Response(content=image_bytes, media_type="image/png")
137
+
138
+ elif body.task_type == "text-to-speech":
139
+ generator = pipeline("text-to-speech", model=model, tokenizer=tokenizer, device=device)
140
+ audio = generator(body.input_text)
141
+ audio_bytesio = BytesIO()
142
+ sf.write(audio_bytesio, audio["sampling_rate"], np.int16(audio["audio"]))
143
+ audio_bytes = audio_bytesio.getvalue()
144
+ return Response(content=audio_bytes, media_type="audio/wav")
145
+
146
+ elif body.task_type == "text-to-video":
147
+ try:
148
+ generator = pipeline("text-to-video", model=model, tokenizer=tokenizer, device=device)
149
+ video = generator(body.input_text)
150
+ return Response(content=video, media_type="video/mp4")
151
+ except Exception as e:
152
+ raise HTTPException(status_code=500, detail=f"Error in text-to-video generation: {e}")
153
 
154
  else:
155
+ raise HTTPException(status_code=400, detail="Unsupported task type")
156
 
157
  except HTTPException as e:
158
  raise e
159
  except Exception as e:
160
  raise HTTPException(status_code=500, detail=str(e))
161
 
162
+
163
  if __name__ == "__main__":
164
+ uvicorn.run(app, host="0.0.0.0", port=8000)