Adrien Dor commited on
Commit
72b4369
·
1 Parent(s): f6baa5b
Files changed (1) hide show
  1. model.py +5 -4
model.py CHANGED
@@ -5,20 +5,21 @@ import torch
5
  from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
 
7
  model_id = 'meta-llama/Llama-2-13b-chat-hf'
8
-
9
  if torch.cuda.is_available():
10
- config = AutoConfig.from_pretrained(model_id)
11
  config.pretraining_tp = 1
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_id,
14
  config=config,
15
  torch_dtype=torch.float16,
16
  load_in_4bit=True,
17
- device_map='auto'
 
18
  )
19
  else:
20
  model = None
21
- tokenizer = AutoTokenizer.from_pretrained(model_id)
22
 
23
 
24
  def get_prompt(message: str, chat_history: list[tuple[str, str]],
 
5
  from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
6
 
7
  model_id = 'meta-llama/Llama-2-13b-chat-hf'
8
+ access_token='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'
9
  if torch.cuda.is_available():
10
+ config = AutoConfig.from_pretrained(model_id, use_auth_token=access_token)
11
  config.pretraining_tp = 1
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_id,
14
  config=config,
15
  torch_dtype=torch.float16,
16
  load_in_4bit=True,
17
+ device_map='auto',
18
+ use_auth_token=access_token
19
  )
20
  else:
21
  model = None
22
+ tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=access_token)
23
 
24
 
25
  def get_prompt(message: str, chat_history: list[tuple[str, str]],