Spaces:

tanyuzhou
/

Ro-Play

Sleeping

tanyuzhou commited on Jun 11, 2024

Commit

0223f98

1 Parent(s): ab977cd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,25 +6,6 @@ from transformers import TextStreamer
 import spaces
-quantization_config = BitsAndBytesConfig(
-    bnb_4bit_compute_dtype="float16",
-    bnb_4bit_quant_storage="uint8",
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_use_double_quant=True,
-    llm_int8_enable_fp32_cpu_offload=False,
-    llm_int8_has_fp16_weight=False,
-    llm_int8_skip_modules=None,
-    llm_int8_threshold=6.0,
-    load_in_4bit=True,
-    load_in_8bit=False,
-    quant_method="bitsandbytes"
-)
-# Load model and tokenizer
-model = AutoModelForCausalLM.from_pretrained("Rorical/0-roleplay", return_dict=True, trust_remote_code=True)
-tokenizer = AutoTokenizer.from_pretrained("Rorical/0-roleplay", trust_remote_code=True, quantization_config=quantization_config)
-tokenizer.chat_template = "{% for message in messages %}{{'<|im_start|>' + ((message['role'] + ':\n') if message['role'] != '' else '') + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>星野:\n' }}{% endif %}" # Be careful that this model used custom chat template.
 # Define the response function
 @spaces.GPU
 def respond(
@@ -35,7 +16,24 @@ def respond(
     temperature,
     top_p,
 ):
-    global model, tokenizer
     # Construct the messages for the chat
     messages = [{"role": "", "content": system_message}]

 import spaces
 # Define the response function
 @spaces.GPU
 def respond(
     temperature,
     top_p,
 ):
+    quantization_config = BitsAndBytesConfig(
+        bnb_4bit_compute_dtype="float16",
+        bnb_4bit_quant_storage="uint8",
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_use_double_quant=True,
+        llm_int8_enable_fp32_cpu_offload=False,
+        llm_int8_has_fp16_weight=False,
+        llm_int8_skip_modules=None,
+        llm_int8_threshold=6.0,
+        load_in_4bit=True,
+        load_in_8bit=False,
+        quant_method="bitsandbytes"
+    )
+    # Load model and tokenizer
+    model = AutoModelForCausalLM.from_pretrained("Rorical/0-roleplay", return_dict=True, trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained("Rorical/0-roleplay", trust_remote_code=True, quantization_config=quantization_config)
+    tokenizer.chat_template = "{% for message in messages %}{{'<|im_start|>' + ((message['role'] + ':\n') if message['role'] != '' else '') + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>星野:\n' }}{% endif %}" # Be careful that this model used custom chat template.
     # Construct the messages for the chat
     messages = [{"role": "", "content": system_message}]