liuguicheng commited on
Commit
3bc3047
1 Parent(s): d02db1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -1
app.py CHANGED
@@ -1,3 +1,37 @@
1
  import gradio as gr
2
 
3
- gr.Interface.load("models/codefuse-ai/CodeFuse-CodeLlama-34B").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
+ gr.Interface.load("models/codefuse-ai/CodeFuse-CodeLlama-34B").launch()
4
+
5
+ from transformers import (
6
+ AutoTokenizer,
7
+ AutoModelForCausalLM,
8
+ )
9
+ tokenizer = AutoTokenizer.from_pretrained(mode_name_or_path, trust_remote_code=True, use_fast=False, legacy=False)
10
+ tokenizer.padding_side = "left"
11
+ tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids("<unk>")
12
+ tokenizer.eos_token_id = tokenizer.convert_tokens_to_ids("</s>")
13
+ # 如果显存不够,可以考虑量化加载
14
+ model = AutoModelForCausalLM.from_pretrained(mode_name_or_path,
15
+ trust_remote_code=True,
16
+ load_in_4bit=False,
17
+ device_map="auto",
18
+ torch_dtype=torch.bfloat16)
19
+ model.eval()
20
+
21
+ HUMAN_ROLE_START_TAG = "<|role_start|>human<|role_end|>"
22
+ BOT_ROLE_START_TAG = "<|role_start|>bot<|role_end|>"
23
+
24
+ text = f"{HUMAN_ROLE_START_TAG}请用C++实现求解第n个斐波那契数{BOT_ROLE_START_TAG}"
25
+ inputs = tokenizer(text, return_tensors='pt', padding=True, add_special_tokens=False).to("cuda")
26
+ outputs = model.generate(
27
+ inputs=inputs["input_ids"],
28
+ attention_mask=inputs["attention_mask"],
29
+ max_new_tokens=512,
30
+ top_p=0.95,
31
+ temperature=0.1,
32
+ do_sample=True,
33
+ eos_token_id=tokenizer.eos_token_id,
34
+ pad_token_id=tokenizer.pad_token_id
35
+ )
36
+ gen_text = tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)
37
+ print(gen_text)