izh97 commited on
Commit
e8b0c24
·
verified ·
1 Parent(s): ef8e891

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -0
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer, GPTQConfig, TrainingArguments
4
+ from threading import Thread
5
+ from peft import AutoPeftModelForCausalLM
6
+ from transformers import GenerationConfig
7
+ tokenizer = AutoTokenizer.from_pretrained("izh97/zephyr-beta-climate-change-assistant")
8
+ model = AutoPeftModelForCausalLM.from_pretrained(
9
+ "izh97/zephyr-beta-climate-change-assistant",
10
+ low_cpu_mem_usage=True,
11
+ return_dict=True,
12
+ torch_dtype=torch.float16,
13
+ device_map="cuda")
14
+ model = model.to('cuda:0')
15
+ generation_config = GenerationConfig(
16
+ do_sample=True,
17
+ top_k=10,
18
+ temperature=0.2,
19
+ max_new_tokens=256,
20
+ pad_token_id=tokenizer.unk_token_id
21
+ )
22
+ def ask(text):
23
+ tokenizer = AutoTokenizer.from_pretrained("izh97/zephyr-beta-climate-change-assistant")
24
+ model = AutoPeftModelForCausalLM.from_pretrained(
25
+ "izh97/zephyr-beta-climate-change-assistant",
26
+ low_cpu_mem_usage=True,
27
+ return_dict=True,
28
+ torch_dtype=torch.float16,
29
+ device_map="cuda")
30
+
31
+ inputs = tokenizer.apply_chat_template(text, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
32
+
33
+ input_length = inputs.input_ids.shape[1]
34
+ outputs = model.generate(**inputs, generation_config=generation_config,
35
+ return_dict_in_generate=True)
36
+
37
+ tokens = outputs.sequences[0, input_length:]
38
+ return tokenizer.decode(tokens)