Tonic commited on
Commit
381bab8
·
1 Parent(s): 20c9b6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -5,6 +5,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import torch
6
  import gradio as gr
7
  import sentencepiece
 
8
 
9
  title = "# Welcome to 🙋🏻‍♂️Tonic's🌷Tulu Chat!"
10
  description = """[allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b) and larger Tulu-2 models are Instruct Llama Finetunes using the [mistralai/Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) recipe. You can use [allenai/tulu-2-13b](https://huggingface.co/allenai/tulu-2-13b) here via API using Gradio by scrolling down and clicking Use 'Via API' or privately by [cloning this space on huggingface](https://huggingface.co/spaces/Tonic1/TuluDemo?duplicate=true) See also the large model here : [allenai/tulu-2-dpo-70b](https://huggingface.co/allenai/tulu-2-dpo-70b) . [Join my active builders' server on discord](https://discord.gg/VqTxc76K3u). Let's build together!. [Add this Space as a discord bot to your server by clicking this link](https://discord.com/oauth2/authorize?client_id=1176628808212828231&scope=bot+applications.commands&permissions=326417525824). Big thanks to 🤗Huggingface Organisation for the🫂Community Grant"""
@@ -39,26 +40,31 @@ class TuluChatBot:
39
  return prompt
40
 
41
  def Tulu(self, user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
42
- prompt = self.format_prompt(user_message)
43
- inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=True)
44
- input_ids = inputs["input_ids"].to(self.model.device)
45
- attention_mask = inputs["attention_mask"].to(self.model.device)
46
-
47
-
48
- output_ids = self.model.generate(
49
- input_ids,
50
- attention_mask=attention_mask,
51
- max_length=input_ids.shape[1] + max_new_tokens,
52
- temperature=temperature,
53
- top_p=top_p,
54
- repetition_penalty=repetition_penalty,
55
- do_sample=do_sample
56
- )
57
-
58
- response = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
59
- response = response.strip()
60
- response = response.split("<|assistant|>\n")[-1]
61
- return response
 
 
 
 
 
62
 
63
  def gradio_Tulu(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
64
  Tulu_bot.set_system_message(system_message)
 
5
  import torch
6
  import gradio as gr
7
  import sentencepiece
8
+ import gc
9
 
10
  title = "# Welcome to 🙋🏻‍♂️Tonic's🌷Tulu Chat!"
11
  description = """[allenai/tulu-2-dpo-7b](https://huggingface.co/allenai/tulu-2-dpo-7b) and larger Tulu-2 models are Instruct Llama Finetunes using the [mistralai/Mistral-7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) recipe. You can use [allenai/tulu-2-13b](https://huggingface.co/allenai/tulu-2-13b) here via API using Gradio by scrolling down and clicking Use 'Via API' or privately by [cloning this space on huggingface](https://huggingface.co/spaces/Tonic1/TuluDemo?duplicate=true) See also the large model here : [allenai/tulu-2-dpo-70b](https://huggingface.co/allenai/tulu-2-dpo-70b) . [Join my active builders' server on discord](https://discord.gg/VqTxc76K3u). Let's build together!. [Add this Space as a discord bot to your server by clicking this link](https://discord.com/oauth2/authorize?client_id=1176628808212828231&scope=bot+applications.commands&permissions=326417525824). Big thanks to 🤗Huggingface Organisation for the🫂Community Grant"""
 
40
  return prompt
41
 
42
  def Tulu(self, user_message, temperature, max_new_tokens, top_p, repetition_penalty, do_sample):
43
+ try:
44
+ prompt = self.format_prompt(user_message)
45
+ inputs = self.tokenizer(prompt, return_tensors='pt', add_special_tokens=True)
46
+ input_ids = inputs["input_ids"].to(self.model.device)
47
+ attention_mask = inputs["attention_mask"].to(self.model.device)
48
+
49
+
50
+ output_ids = self.model.generate(
51
+ input_ids,
52
+ attention_mask=attention_mask,
53
+ max_length=input_ids.shape[1] + max_new_tokens,
54
+ temperature=temperature,
55
+ top_p=top_p,
56
+ repetition_penalty=repetition_penalty,
57
+ do_sample=do_sample
58
+ )
59
+
60
+ response = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
61
+ response = response.strip()
62
+ response = response.split("<|assistant|>\n")[-1]
63
+ return response
64
+ finally:
65
+ del input_ids, attention_mask, output_ids
66
+ gc.collect()
67
+ torch.cuda.empty_cache()
68
 
69
  def gradio_Tulu(user_message, system_message, max_new_tokens, temperature, top_p, repetition_penalty, do_sample):
70
  Tulu_bot.set_system_message(system_message)