Spaces:

Deci
/

DeciLM-7B-instruct

Paused

App Files Files Community

DeciLM-7B-instruct / app.py

harpreetsahota

Update app.py

0d4a6a6 11 months ago

raw

history blame contribute delete

5.43 kB

	import os
	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, TextStreamer

	token = os.environ["HUGGINGFACEHUB_API_TOKEN"]

	model_id = 'Deci/DeciLM-7B-instruct'

	SYSTEM_PROMPT_TEMPLATE = """### System: You are an AI assistant that follows instruction extremely well. Help as much as you can.
	### User:

	{instruction}

	### Assistant:
	"""

	DESCRIPTION = """
	# <p style="text-align: center; color: #292b47;"> 🤖 <span style='color: #3264ff;'>DeciLM-7B-Instruct:</span> A Fast Instruction-Tuned Model💨 </p>
	<span style='color: #292b47;'>Welcome to <a href="https://huggingface.co/Deci/DeciLM-7B-instruct" style="color: #3264ff;">DeciLM-7B-Instruct</a>! DeciLM-7B-Instruct is a 7B parameter instruction-tuned language model and released under the Apache 2.0 license. It's an instruction-tuned model, not a chat-tuned model; you should prompt the model with an instruction that describes a task, and the model will respond appropriately to complete the task.</span>
	<p><span style='color: #292b47;'>Learn more about the base model <a href="https://huggingface.co/Deci/DeciLM-7B" style="color: #3264ff;">DeciLM-7B.</a></span></p>
	<p><span style='color: #292b47;'>Experience the speed of DeciLM-7B + Infery. Check out the demo 👉🏽 <a href="https://console.deci.ai/infery-llm-demo" style="color: #3264ff;">here.</a></span></p>

	"""

	bnb_config = BitsAndBytesConfig(
	load_in_4bit = True,
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	if not torch.cuda.is_available():
	DESCRIPTION += 'You need a GPU for this example. Try using colab: '

	if torch.cuda.is_available():
	model = AutoModelForCausalLM.from_pretrained(model_id,
	device_map="auto",
	trust_remote_code=True,
	quantization_config=bnb_config
	)
	else:
	model = None

	tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
	tokenizer.pad_token = tokenizer.eos_token

	# Function to construct the prompt using the new system prompt template
	def get_prompt_with_template(message: str) -> str:
	return SYSTEM_PROMPT_TEMPLATE.format(instruction=message)

	# Function to generate the model's response
	def generate_model_response(message: str) -> str:
	prompt = get_prompt_with_template(message)
	inputs = tokenizer(prompt, return_tensors='pt')
	streamer = TextStreamer(tokenizer)
	if torch.cuda.is_available():
	inputs = inputs.to('cuda')
	# Include **generate_kwargs to include the user-defined options
	output = model.generate(**inputs,
	max_new_tokens=4096,
	do_sample=True,
	temperature=0.1,
	streamer=streamer
	)
	return tokenizer.decode(output[0], skip_special_tokens=True)

	def extract_response_content(full_response: str) -> str:
	response_start_index = full_response.find("### Assistant:")
	if response_start_index != -1:
	return full_response[response_start_index + len("### Assistant:"):].strip()
	else:
	return full_response

	def get_response_with_template(message: str) -> str:
	full_response = generate_model_response(message)
	return extract_response_content(full_response)


	with gr.Blocks(css="style.css") as demo:
	gr.Markdown(DESCRIPTION)
	gr.DuplicateButton(value='Duplicate Space for private use',
	elem_id='duplicate-button')
	with gr.Group():
	chatbot = gr.Textbox(label='DeciLM-7B-Instruct Output:')
	with gr.Row():
	textbox = gr.Textbox(
	container=False,
	show_label=False,
	placeholder='Type an instruction...',
	scale=10,
	elem_id="textbox"
	)
	submit_button = gr.Button(
	'💬 Submit',
	variant='primary',
	scale=1,
	min_width=0,
	elem_id="submit_button"
	)

	# Clear button to clear the chat history
	clear_button = gr.Button(
	'🗑️ Clear',
	variant='secondary',
	)

	clear_button.click(
	fn=lambda: ('',''),
	outputs=[textbox, chatbot],
	queue=False,
	api_name=False,
	)

	submit_button.click(
	fn=get_response_with_template,
	inputs=textbox,
	outputs= chatbot,
	queue=False,
	api_name=False,
	)

	gr.Examples(
	examples=[
	'Write detailed instructions for making chocolate chip pancakes.',
	'Write a 250-word article about your love of pancakes.',
	'Explain the plot of Back to the Future in three sentences.',
	'How do I make a trap beat?',
	'A step-by-step guide to learning Python in one month.',
	],
	inputs=textbox,
	outputs=chatbot,
	fn=get_response_with_template,
	cache_examples=True,
	elem_id="examples"
	)


	gr.HTML(label="Keep in touch", value="<img src='https://huggingface.co/spaces/Deci/DeciLM-7B-instruct/resolve/main/deci-coder-banner.png' alt='Keep in touch' style='display: block; color: #292b47; margin: auto; max-width: 800px;'>")

	demo.launch()