nikravan commited on
Commit
c808fa7
·
verified ·
1 Parent(s): 876497b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -85,8 +85,8 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
85
  attention_mask = attention_mask[:, -CONTEXT_LENGTH:]
86
 
87
  generate_kwargs = dict(
88
- input_ids=input_ids.to(device),
89
- attention_mask=attention_mask.to(device),
90
  streamer=streamer,
91
  do_sample=True,
92
  temperature=temperature,
@@ -115,7 +115,7 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
115
  model = AutoModelForCausalLM.from_pretrained(
116
  MODEL_ID,
117
  device_map="auto",
118
- #quantization_config=quantization_config,
119
  #attn_implementation="flash_attention_2",
120
  )
121
 
 
85
  attention_mask = attention_mask[:, -CONTEXT_LENGTH:]
86
 
87
  generate_kwargs = dict(
88
+ input_ids=input_ids,
89
+ attention_mask=attention_mask,
90
  streamer=streamer,
91
  do_sample=True,
92
  temperature=temperature,
 
115
  model = AutoModelForCausalLM.from_pretrained(
116
  MODEL_ID,
117
  device_map="auto",
118
+ quantization_config=quantization_config,
119
  #attn_implementation="flash_attention_2",
120
  )
121