Allen Park commited on
Commit
d901060
·
1 Parent(s): d59c183

uncommented device=cuda

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -5,7 +5,7 @@ import gradio as gr
5
  import spaces
6
 
7
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
8
- # device = "cuda" # for GPU usage or "cpu" for CPU usage
9
 
10
  tokenizer = AutoTokenizer.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct")
11
  model = AutoModelForCausalLM.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct", torch_dtype=torch.float16, device_map="auto")
@@ -34,7 +34,7 @@ Your output should be in JSON FORMAT with the keys "REASONING" and "SCORE":
34
  def model_call(question, document, answer):
35
  NEW_FORMAT = PROMPT.format(question=question, document=document, answer=answer)
36
  inputs = tokenizer(NEW_FORMAT, return_tensors="pt")
37
- input_ids = inputs.input_ids.to(model.device)
38
  attention_mask = inputs.attention_mask
39
  generate_kwargs = dict(
40
  input_ids=input_ids,
 
5
  import spaces
6
 
7
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
8
+ device = "cuda" # for GPU usage or "cpu" for CPU usage
9
 
10
  tokenizer = AutoTokenizer.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct")
11
  model = AutoModelForCausalLM.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct", torch_dtype=torch.float16, device_map="auto")
 
34
  def model_call(question, document, answer):
35
  NEW_FORMAT = PROMPT.format(question=question, document=document, answer=answer)
36
  inputs = tokenizer(NEW_FORMAT, return_tensors="pt")
37
+ input_ids = inputs.input_ids.to(device)
38
  attention_mask = inputs.attention_mask
39
  generate_kwargs = dict(
40
  input_ids=input_ids,