Hjgugugjhuhjggg commited on
Commit
99136f3
·
verified ·
1 Parent(s): 8b0fc48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -58
app.py CHANGED
@@ -43,10 +43,10 @@ class GenerateRequest(BaseModel):
43
  input_text: str = ""
44
  task_type: str
45
  temperature: float = 1.0
46
- max_new_tokens: int = 3
47
  stream: bool = True
48
  top_p: float = 1.0
49
- top_k: int = 50 # Changed back to 50
50
  repetition_penalty: float = 1.0
51
  num_return_sequences: int = 1
52
  do_sample: bool = True
@@ -92,44 +92,33 @@ class S3ModelLoader:
92
  )
93
 
94
  tokenizer = AutoTokenizer.from_pretrained(
95
- s3_uri, config=config, local_files_only=False, padding_side="left"
96
  )
97
-
98
- eos_token_id = tokenizer.eos_token_id
99
- pad_token_id = tokenizer.pad_token_id
100
- eos_token = tokenizer.eos_token
101
- pad_token = tokenizer.pad_token
102
- padding = tokenizer.padding_side
103
-
104
- if eos_token_id is not None and pad_token_id is None:
105
- pad_token_id = config.pad_token_id or eos_token_id
106
- tokenizer.pad_token_id = pad_token_id
107
 
108
- model_cache[model_name] = (model, tokenizer,eos_token_id,
109
- pad_token_id,eos_token,pad_token,padding)
110
- return model, tokenizer,eos_token_id,pad_token_id,eos_token,pad_token,padding
 
 
 
111
  except (EnvironmentError, NoCredentialsError):
112
  try:
113
  config = AutoConfig.from_pretrained(
114
  model_name, token=HUGGINGFACE_HUB_TOKEN
115
  )
116
  tokenizer = AutoTokenizer.from_pretrained(
117
- model_name, config=config, token=HUGGINGFACE_HUB_TOKEN, padding_side="left"
118
  )
119
 
120
  model = AutoModelForCausalLM.from_pretrained(
121
  model_name, config=config, token=HUGGINGFACE_HUB_TOKEN
122
  )
123
-
124
- eos_token_id = tokenizer.eos_token_id
125
- pad_token_id = tokenizer.pad_token_id
126
- eos_token = tokenizer.eos_token
127
- pad_token = tokenizer.pad_token
128
- padding = tokenizer.padding_side
129
 
130
- if eos_token_id is not None and pad_token_id is None:
131
- pad_token_id = config.pad_token_id or eos_token_id
132
- tokenizer.pad_token_id = pad_token_id
 
 
133
 
134
 
135
  model.save_pretrained(s3_uri)
@@ -145,22 +134,10 @@ class S3ModelLoader:
145
  )
146
 
147
  tokenizer = AutoTokenizer.from_pretrained(
148
- s3_uri, config=config, local_files_only=False, padding_side="left"
149
  )
150
-
151
- eos_token_id = tokenizer.eos_token_id
152
- pad_token_id = tokenizer.pad_token_id
153
- eos_token = tokenizer.eos_token
154
- pad_token = tokenizer.pad_token
155
- padding = tokenizer.padding_side
156
-
157
- if eos_token_id is not None and pad_token_id is None:
158
- pad_token_id = config.pad_token_id or eos_token_id
159
- tokenizer.pad_token_id = pad_token_id
160
-
161
- model_cache[model_name] = (model, tokenizer,eos_token_id,
162
- pad_token_id,eos_token,pad_token,padding)
163
- return model, tokenizer,eos_token_id,pad_token_id,eos_token,pad_token,padding
164
  except Exception as e:
165
  raise HTTPException(
166
  status_code=500, detail=f"Error loading model: {e}"
@@ -184,7 +161,7 @@ async def generate(request: GenerateRequest):
184
  do_sample = request.do_sample
185
  stop_sequences = request.stop_sequences
186
 
187
- model, tokenizer, eos_token_id, pad_token_id, eos_token, pad_token, padding = await model_loader.load_model_and_tokenizer(model_name)
188
  device = "cuda" if torch.cuda.is_available() else "cpu"
189
  model.to(device)
190
 
@@ -197,17 +174,12 @@ async def generate(request: GenerateRequest):
197
  repetition_penalty=repetition_penalty,
198
  do_sample=do_sample,
199
  num_return_sequences=num_return_sequences,
200
- pad_token_id=pad_token_id if pad_token_id is not None else None
201
  )
202
-
203
-
204
- max_model_length = model.config.max_position_embeddings
205
- input_text = input_text[:max_model_length]
206
 
207
  return StreamingResponse(
208
  stream_text(model, tokenizer, input_text,
209
  generation_config, stop_sequences,
210
- device,pad_token_id, max_model_length),
211
  media_type="text/plain"
212
  )
213
  else:
@@ -221,13 +193,11 @@ async def generate(request: GenerateRequest):
221
 
222
  async def stream_text(model, tokenizer, input_text,
223
  generation_config, stop_sequences,
224
- device,pad_token_id, max_model_length):
225
 
226
  encoded_input = tokenizer(
227
  input_text, return_tensors="pt",
228
- truncation=True,
229
- padding = "max_length",
230
- max_length=max_model_length
231
  ).to(device)
232
 
233
  stop_regex = re.compile(r'[\.\?\!\n]+')
@@ -258,7 +228,6 @@ async def stream_text(model, tokenizer, input_text,
258
  num_return_sequences=generation_config.num_return_sequences,
259
  output_scores=True,
260
  return_dict_in_generate=True,
261
- pad_token_id=pad_token_id if pad_token_id is not None else None
262
  )
263
 
264
  new_text = tokenizer.decode(
@@ -286,16 +255,15 @@ async def stream_text(model, tokenizer, input_text,
286
  yield json.dumps({"text": text, "is_end": False}) + "\n"
287
  yield json.dumps({"text": "", "is_end": True}) + "\n"
288
  break
289
-
290
  encoded_input = tokenizer(
291
- output_text, return_tensors="pt",
292
- truncation=True,
293
- padding = "max_length" ,
294
- max_length = max_model_length
295
  ).to(device)
296
  output_text = ""
297
 
298
 
 
299
  @app.post("/generate-image")
300
  async def generate_image(request: GenerateRequest):
301
  try:
 
43
  input_text: str = ""
44
  task_type: str
45
  temperature: float = 1.0
46
+ max_new_tokens: int = 3
47
  stream: bool = True
48
  top_p: float = 1.0
49
+ top_k: int = 50
50
  repetition_penalty: float = 1.0
51
  num_return_sequences: int = 1
52
  do_sample: bool = True
 
92
  )
93
 
94
  tokenizer = AutoTokenizer.from_pretrained(
95
+ s3_uri, config=config, local_files_only=False
96
  )
 
 
 
 
 
 
 
 
 
 
97
 
98
+ if tokenizer.eos_token_id is not None and \
99
+ tokenizer.pad_token_id is None:
100
+ tokenizer.pad_token_id = config.pad_token_id \
101
+ or tokenizer.eos_token_id
102
+ model_cache[model_name] = (model, tokenizer)
103
+ return model, tokenizer
104
  except (EnvironmentError, NoCredentialsError):
105
  try:
106
  config = AutoConfig.from_pretrained(
107
  model_name, token=HUGGINGFACE_HUB_TOKEN
108
  )
109
  tokenizer = AutoTokenizer.from_pretrained(
110
+ model_name, config=config, token=HUGGINGFACE_HUB_TOKEN
111
  )
112
 
113
  model = AutoModelForCausalLM.from_pretrained(
114
  model_name, config=config, token=HUGGINGFACE_HUB_TOKEN
115
  )
 
 
 
 
 
 
116
 
117
+
118
+ if tokenizer.eos_token_id is not None and \
119
+ tokenizer.pad_token_id is None:
120
+ tokenizer.pad_token_id = config.pad_token_id \
121
+ or tokenizer.eos_token_id
122
 
123
 
124
  model.save_pretrained(s3_uri)
 
134
  )
135
 
136
  tokenizer = AutoTokenizer.from_pretrained(
137
+ s3_uri, config=config, local_files_only=False
138
  )
139
+ model_cache[model_name] = (model, tokenizer)
140
+ return model, tokenizer
 
 
 
 
 
 
 
 
 
 
 
 
141
  except Exception as e:
142
  raise HTTPException(
143
  status_code=500, detail=f"Error loading model: {e}"
 
161
  do_sample = request.do_sample
162
  stop_sequences = request.stop_sequences
163
 
164
+ model, tokenizer = await model_loader.load_model_and_tokenizer(model_name)
165
  device = "cuda" if torch.cuda.is_available() else "cpu"
166
  model.to(device)
167
 
 
174
  repetition_penalty=repetition_penalty,
175
  do_sample=do_sample,
176
  num_return_sequences=num_return_sequences,
 
177
  )
 
 
 
 
178
 
179
  return StreamingResponse(
180
  stream_text(model, tokenizer, input_text,
181
  generation_config, stop_sequences,
182
+ device),
183
  media_type="text/plain"
184
  )
185
  else:
 
193
 
194
  async def stream_text(model, tokenizer, input_text,
195
  generation_config, stop_sequences,
196
+ device):
197
 
198
  encoded_input = tokenizer(
199
  input_text, return_tensors="pt",
200
+ truncation=True
 
 
201
  ).to(device)
202
 
203
  stop_regex = re.compile(r'[\.\?\!\n]+')
 
228
  num_return_sequences=generation_config.num_return_sequences,
229
  output_scores=True,
230
  return_dict_in_generate=True,
 
231
  )
232
 
233
  new_text = tokenizer.decode(
 
255
  yield json.dumps({"text": text, "is_end": False}) + "\n"
256
  yield json.dumps({"text": "", "is_end": True}) + "\n"
257
  break
258
+
259
  encoded_input = tokenizer(
260
+ output_text, return_tensors="pt",
261
+ truncation=True
 
 
262
  ).to(device)
263
  output_text = ""
264
 
265
 
266
+
267
  @app.post("/generate-image")
268
  async def generate_image(request: GenerateRequest):
269
  try: