msy127 commited on
Commit
91655f7
·
1 Parent(s): 7adfdd5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -20
app.py CHANGED
@@ -1,25 +1,8 @@
1
  import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
 
4
- device = "cuda" if torch.cuda.is_available() else "cpu"
5
-
6
- model_name ="NousResearch/Llama-2-7b-chat-hf"
7
-
8
- bnb_config = BitsAndBytesConfig(
9
- load_in_4bit=True,
10
- bnb_4bit_quant_type="nf4",
11
- bnb_4bit_compute_dtype=torch.bfloat16,
12
- )
13
-
14
- model = AutoModelForCausalLM.from_pretrained(
15
- model_name,
16
- torch_dtype=torch.bfloat16,
17
- quantization_config=bnb_config)
18
- model.config.use_cache = False
19
-
20
- tokenizer = AutoTokenizer.from_pretrained(model_name)
21
- tokenizer.pad_token = tokenizer.eos_token
22
- tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training
23
 
24
  def predict(message, chatbot, temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
25
 
 
1
  import torch
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
 
4
+ tokenizer = AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
5
+ model = AutoModelForCausalLM.from_pretrained("NousResearch/Llama-2-7b-chat-hf")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def predict(message, chatbot, temperature=0.9, max_new_tokens=256, top_p=0.6, repetition_penalty=1.0,):
8