aws_test

Sleeping

App Files Files Community

Hjgugugjhuhjggg commited on Dec 27, 2024

Commit

eec8624

verified ·

1 Parent(s): 0edd18a

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -2

app.py CHANGED Viewed

@@ -14,6 +14,7 @@ from functools import cached_property
 import base64
 from optimum.onnxruntime import ORTModelForCausalLM
 from optimum.bettertransformer import BetterTransformer
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
@@ -68,7 +69,10 @@ class S3ModelLoader:
             if use_onnx:
                 model = ORTModelForCausalLM.from_pretrained(s3_uri, config=config, local_files_only=False).to(self.device)
             elif quantize:
-                 model = AutoModelForCausalLM.from_pretrained(s3_uri, config=config, local_files_only=False, torch_dtype=torch.int8 if quantize else torch.float16).to(self.device)
             else:
                 model = AutoModelForCausalLM.from_pretrained(s3_uri, config=config, local_files_only=False).to(self.device)
             if use_bettertransformer:
@@ -84,7 +88,10 @@ class S3ModelLoader:
                 if use_onnx:
                     model = ORTModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN).to(self.device)
                 elif quantize:
-                     model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN, torch_dtype=torch.int8 if quantize else torch.float16).to(self.device)
                 else:
                      model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN).to(self.device)
                 if use_bettertransformer:

 import base64
 from optimum.onnxruntime import ORTModelForCausalLM
 from optimum.bettertransformer import BetterTransformer
+import bitsandbytes as bnb
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
             if use_onnx:
                 model = ORTModelForCausalLM.from_pretrained(s3_uri, config=config, local_files_only=False).to(self.device)
             elif quantize:
+                model = AutoModelForCausalLM.from_pretrained(
+                    s3_uri, config=config, local_files_only=False,
+                    load_in_8bit=True
+                    ).to(self.device)
             else:
                 model = AutoModelForCausalLM.from_pretrained(s3_uri, config=config, local_files_only=False).to(self.device)
             if use_bettertransformer:
                 if use_onnx:
                     model = ORTModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN).to(self.device)
                 elif quantize:
+                    model = AutoModelForCausalLM.from_pretrained(
+                        model_name, config=config, token=HUGGINGFACE_HUB_TOKEN,
+                        load_in_8bit=True
+                    ).to(self.device)
                 else:
                      model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN).to(self.device)
                 if use_bettertransformer: