Hjgugugjhuhjggg commited on
Commit
e079cb9
·
verified ·
1 Parent(s): 7c1d188

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -21
app.py CHANGED
@@ -12,8 +12,6 @@ from huggingface_hub import login
12
  from botocore.exceptions import NoCredentialsError
13
  from functools import cached_property
14
  import base64
15
- from optimum.onnxruntime import ORTModelForCausalLM
16
- import bitsandbytes as bnb
17
 
18
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
19
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
@@ -40,7 +38,7 @@ class GenerateRequest(BaseModel):
40
  num_return_sequences: int = 1
41
  do_sample: bool = False
42
  stop_sequences: list[str] = []
43
- quantize: bool = True
44
  use_onnx: bool = False
45
  @field_validator("model_name")
46
  def model_name_cannot_be_empty(cls, v):
@@ -64,15 +62,7 @@ class S3ModelLoader:
64
  s3_uri = self._get_s3_uri(model_name)
65
  try:
66
  config = AutoConfig.from_pretrained(s3_uri, local_files_only=False)
67
- if use_onnx:
68
- model = ORTModelForCausalLM.from_pretrained(s3_uri, config=config, local_files_only=False).to(self.device)
69
- elif quantize:
70
- model = AutoModelForCausalLM.from_pretrained(
71
- s3_uri, config=config, local_files_only=False,
72
- load_in_8bit=True
73
- ).to(self.device)
74
- else:
75
- model = AutoModelForCausalLM.from_pretrained(s3_uri, config=config, local_files_only=False).to(self.device)
76
  tokenizer = AutoTokenizer.from_pretrained(s3_uri, config=config, local_files_only=False)
77
  if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
78
  tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
@@ -81,15 +71,7 @@ class S3ModelLoader:
81
  try:
82
  config = AutoConfig.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN)
83
  tokenizer = AutoTokenizer.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN)
84
- if use_onnx:
85
- model = ORTModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN).to(self.device)
86
- elif quantize:
87
- model = AutoModelForCausalLM.from_pretrained(
88
- model_name, config=config, token=HUGGINGFACE_HUB_TOKEN,
89
- load_in_8bit=True
90
- ).to(self.device)
91
- else:
92
- model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN).to(self.device)
93
  if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
94
  tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
95
  return model, tokenizer
 
12
  from botocore.exceptions import NoCredentialsError
13
  from functools import cached_property
14
  import base64
 
 
15
 
16
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
17
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 
38
  num_return_sequences: int = 1
39
  do_sample: bool = False
40
  stop_sequences: list[str] = []
41
+ quantize: bool = False
42
  use_onnx: bool = False
43
  @field_validator("model_name")
44
  def model_name_cannot_be_empty(cls, v):
 
62
  s3_uri = self._get_s3_uri(model_name)
63
  try:
64
  config = AutoConfig.from_pretrained(s3_uri, local_files_only=False)
65
+ model = AutoModelForCausalLM.from_pretrained(s3_uri, config=config, local_files_only=False).to(self.device)
 
 
 
 
 
 
 
 
66
  tokenizer = AutoTokenizer.from_pretrained(s3_uri, config=config, local_files_only=False)
67
  if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
68
  tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
 
71
  try:
72
  config = AutoConfig.from_pretrained(model_name, token=HUGGINGFACE_HUB_TOKEN)
73
  tokenizer = AutoTokenizer.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN)
74
+ model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN).to(self.device)
 
 
 
 
 
 
 
 
75
  if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
76
  tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
77
  return model, tokenizer