Allen Park commited on
Commit
d59c183
·
1 Parent(s): f833ec9

comment out device='cuda' & input generate_kwargs

Browse files
Files changed (1) hide show
  1. app.py +10 -6
app.py CHANGED
@@ -5,7 +5,7 @@ import gradio as gr
5
  import spaces
6
 
7
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
8
- device = "cuda" # for GPU usage or "cpu" for CPU usage
9
 
10
  tokenizer = AutoTokenizer.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct")
11
  model = AutoModelForCausalLM.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct", torch_dtype=torch.float16, device_map="auto")
@@ -33,13 +33,17 @@ Your output should be in JSON FORMAT with the keys "REASONING" and "SCORE":
33
  @spaces.GPU()
34
  def model_call(question, document, answer):
35
  NEW_FORMAT = PROMPT.format(question=question, document=document, answer=answer)
36
- inputs = tokenizer(NEW_FORMAT, return_tensors="pt").to(device)
37
- model.generate(
38
- inputs.input_ids,
39
- attention_mask=inputs.attention_mask,
 
 
 
40
  pad_token_id=tokenizer.eos_token_id,
41
  )
42
- generated_text = tokenizer.decode(inputs.input_ids[0])
 
43
  print(generated_text)
44
  return generated_text
45
 
 
5
  import spaces
6
 
7
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
8
+ # device = "cuda" # for GPU usage or "cpu" for CPU usage
9
 
10
  tokenizer = AutoTokenizer.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct")
11
  model = AutoModelForCausalLM.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct", torch_dtype=torch.float16, device_map="auto")
 
33
  @spaces.GPU()
34
  def model_call(question, document, answer):
35
  NEW_FORMAT = PROMPT.format(question=question, document=document, answer=answer)
36
+ inputs = tokenizer(NEW_FORMAT, return_tensors="pt")
37
+ input_ids = inputs.input_ids.to(model.device)
38
+ attention_mask = inputs.attention_mask
39
+ generate_kwargs = dict(
40
+ input_ids=input_ids,
41
+ do_sample=True,
42
+ attention_mask=attention_mask,
43
  pad_token_id=tokenizer.eos_token_id,
44
  )
45
+ outputs = model.generate(**generate_kwargs)
46
+ generated_text = tokenizer.decode(outputs[0])
47
  print(generated_text)
48
  return generated_text
49