DeepSeek-R1-Qwen-14B-BnB4q

Running on Zero

nikravan commited on 2 days ago

Commit

c808fa7

verified ·

1 Parent(s): 876497b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -85,8 +85,8 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
         attention_mask = attention_mask[:, -CONTEXT_LENGTH:]
     generate_kwargs = dict(
-        input_ids=input_ids.to(device),
-        attention_mask=attention_mask.to(device),
         streamer=streamer,
         do_sample=True,
         temperature=temperature,
@@ -115,7 +115,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     device_map="auto",
-    #quantization_config=quantization_config,
     #attn_implementation="flash_attention_2",
 )

         attention_mask = attention_mask[:, -CONTEXT_LENGTH:]
     generate_kwargs = dict(
+        input_ids=input_ids,
+        attention_mask=attention_mask,
         streamer=streamer,
         do_sample=True,
         temperature=temperature,
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     device_map="auto",
+    quantization_config=quantization_config,
     #attn_implementation="flash_attention_2",
 )