prgrmc commited on
Commit
7c921e8
·
1 Parent(s): 0213327

Remove 8-bit quantization

Browse files
Files changed (1) hide show
  1. helper.py +2 -2
helper.py CHANGED
@@ -74,7 +74,7 @@ def initialize_model_pipeline(model_name, force_cpu=False):
74
  # Use 8-bit quantization for memory efficiency
75
  model = AutoModelForCausalLM.from_pretrained(
76
  model_name,
77
- load_in_8bit=False if device == "cpu" else True,
78
  torch_dtype=MODEL_CONFIG["main_model"]["dtype"],
79
  use_cache=True,
80
  device_map="auto",
@@ -85,7 +85,7 @@ def initialize_model_pipeline(model_name, force_cpu=False):
85
 
86
  model.config.use_cache = True
87
 
88
- tokenizer = AutoTokenizer.from_pretrained(model_name)
89
 
90
  # Initialize pipeline
91
  logger.info(f"Initializing pipeline with device: {device}")
 
74
  # Use 8-bit quantization for memory efficiency
75
  model = AutoModelForCausalLM.from_pretrained(
76
  model_name,
77
+ load_in_8bit=False,
78
  torch_dtype=MODEL_CONFIG["main_model"]["dtype"],
79
  use_cache=True,
80
  device_map="auto",
 
85
 
86
  model.config.use_cache = True
87
 
88
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_key)
89
 
90
  # Initialize pipeline
91
  logger.info(f"Initializing pipeline with device: {device}")