allenpark commited on
Commit
80390bf
·
verified ·
1 Parent(s): f03cfb2

Remove .to(device) calls

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -5,13 +5,13 @@ import gradio as gr
5
  import spaces
6
 
7
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
8
- if torch.cuda.is_available():
9
- device = "cuda:0"
10
- else:
11
- device = "cpu"
12
 
13
  tokenizer = AutoTokenizer.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct")
14
- model = AutoModelForCausalLM.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct", torch_dtype=torch.float16, device_map="auto").to(device)
15
  model.gradient_checkpointing_enable()
16
 
17
  # def load_model_and_tokenizer(model_choice):
@@ -79,10 +79,10 @@ HEADER = """
79
  @spaces.GPU()
80
  # def model_call(question, document, answer, tokenizer, model):
81
  def model_call(question, document, answer):
82
- device = next(model.parameters()).device
83
  NEW_FORMAT = PROMPT.format(question=question, document=document, answer=answer)
84
  print("ENTIRE NEW_FORMAT", NEW_FORMAT)
85
- inputs = tokenizer(NEW_FORMAT, return_tensors="pt").to(device)
86
  print("INPUTS", inputs)
87
  input_ids = inputs.input_ids
88
  attention_mask = inputs.attention_mask
 
5
  import spaces
6
 
7
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
8
+ # if torch.cuda.is_available():
9
+ # device = "cuda:0"
10
+ # else:
11
+ # device = "cpu"
12
 
13
  tokenizer = AutoTokenizer.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct")
14
+ model = AutoModelForCausalLM.from_pretrained("PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct", torch_dtype=torch.float16, device_map="auto")
15
  model.gradient_checkpointing_enable()
16
 
17
  # def load_model_and_tokenizer(model_choice):
 
79
  @spaces.GPU()
80
  # def model_call(question, document, answer, tokenizer, model):
81
  def model_call(question, document, answer):
82
+ # device = next(model.parameters()).device
83
  NEW_FORMAT = PROMPT.format(question=question, document=document, answer=answer)
84
  print("ENTIRE NEW_FORMAT", NEW_FORMAT)
85
+ inputs = tokenizer(NEW_FORMAT, return_tensors="pt")
86
  print("INPUTS", inputs)
87
  input_ids = inputs.input_ids
88
  attention_mask = inputs.attention_mask