Spaces:
Running
Running
Remove 8-bit quantization
Browse files
helper.py
CHANGED
@@ -74,7 +74,7 @@ def initialize_model_pipeline(model_name, force_cpu=False):
|
|
74 |
# Use 8-bit quantization for memory efficiency
|
75 |
model = AutoModelForCausalLM.from_pretrained(
|
76 |
model_name,
|
77 |
-
load_in_8bit=False
|
78 |
torch_dtype=MODEL_CONFIG["main_model"]["dtype"],
|
79 |
use_cache=True,
|
80 |
device_map="auto",
|
@@ -85,7 +85,7 @@ def initialize_model_pipeline(model_name, force_cpu=False):
|
|
85 |
|
86 |
model.config.use_cache = True
|
87 |
|
88 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
89 |
|
90 |
# Initialize pipeline
|
91 |
logger.info(f"Initializing pipeline with device: {device}")
|
|
|
74 |
# Use 8-bit quantization for memory efficiency
|
75 |
model = AutoModelForCausalLM.from_pretrained(
|
76 |
model_name,
|
77 |
+
load_in_8bit=False,
|
78 |
torch_dtype=MODEL_CONFIG["main_model"]["dtype"],
|
79 |
use_cache=True,
|
80 |
device_map="auto",
|
|
|
85 |
|
86 |
model.config.use_cache = True
|
87 |
|
88 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_key)
|
89 |
|
90 |
# Initialize pipeline
|
91 |
logger.info(f"Initializing pipeline with device: {device}")
|