MrD05
/

pyg6b

@@ -1,4 +1,4 @@
-from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 from transformers_stream_generator import init_stream_support
 import re
 init_stream_support()
@@ -22,16 +22,12 @@ Alice Gate: *Alice strides into the room with a smile, her eyes lighting up when
 class EndpointHandler():
-    def __init__(self, path = "."):
-        path = "."
-        # quantization_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload = True)
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         self.model = AutoModelForCausalLM.from_pretrained(
             path,
             device_map = "auto",
             load_in_8bit = True,
-            torch_dtype = "auto",
-            low_cpu_mem_usage = True
         )
     def __call__(self, data):

+from transformers import AutoTokenizer, AutoModelForCausalLM
 from transformers_stream_generator import init_stream_support
 import re
 init_stream_support()
 class EndpointHandler():
+    def __init__(self, path = ""):
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         self.model = AutoModelForCausalLM.from_pretrained(
             path,
             device_map = "auto",
             load_in_8bit = True,
         )
     def __call__(self, data):