Spaces:

Deci
/

DeciLM-7B-instruct

Paused

File size: 5,428 Bytes

import os
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, TextStreamer

token = os.environ["HUGGINGFACEHUB_API_TOKEN"]

model_id = 'Deci/DeciLM-7B-instruct'

SYSTEM_PROMPT_TEMPLATE = """### System: You are an AI assistant that follows instruction extremely well. Help as much as you can.
### User:

{instruction}

### Assistant:
"""

DESCRIPTION = """
# <p style="text-align: center; color: #292b47;"> 🤖 <span style='color: #3264ff;'>DeciLM-7B-Instruct:</span> A Fast Instruction-Tuned Model💨 </p>
<span style='color: #292b47;'>Welcome to <a href="https://huggingface.co/Deci/DeciLM-7B-instruct" style="color: #3264ff;">DeciLM-7B-Instruct</a>! DeciLM-7B-Instruct is a 7B parameter instruction-tuned language model and released under the Apache 2.0 license. It's an instruction-tuned model, not a chat-tuned model;  you should prompt the model with an instruction that describes a task, and the model will respond appropriately to complete the task.</span>
<p><span style='color: #292b47;'>Learn more about the base model <a href="https://huggingface.co/Deci/DeciLM-7B" style="color: #3264ff;">DeciLM-7B.</a></span></p>
<p><span style='color: #292b47;'>Experience the speed of DeciLM-7B + Infery. Check out the demo 👉🏽 <a href="https://console.deci.ai/infery-llm-demo" style="color: #3264ff;">here.</a></span></p>

"""

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

if not torch.cuda.is_available():
    DESCRIPTION += 'You need a GPU for this example. Try using colab: '

if torch.cuda.is_available():
    model = AutoModelForCausalLM.from_pretrained(model_id,
                                                 device_map="auto",
                                                 trust_remote_code=True,
                                                 quantization_config=bnb_config
                                                 )
else:
    model = None

tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
tokenizer.pad_token = tokenizer.eos_token

# Function to construct the prompt using the new system prompt template
def get_prompt_with_template(message: str) -> str:
    return SYSTEM_PROMPT_TEMPLATE.format(instruction=message)

# Function to generate the model's response
def generate_model_response(message: str) -> str:
    prompt = get_prompt_with_template(message)
    inputs = tokenizer(prompt, return_tensors='pt')
    streamer = TextStreamer(tokenizer)
    if torch.cuda.is_available():
        inputs = inputs.to('cuda')
    # Include **generate_kwargs to include the user-defined options
    output = model.generate(**inputs, 
                            max_new_tokens=4096, 
                            do_sample=True,
                            temperature=0.1,
                            streamer=streamer
                            ) 
    return tokenizer.decode(output[0], skip_special_tokens=True)

def extract_response_content(full_response: str) -> str:
    response_start_index = full_response.find("### Assistant:")
    if response_start_index != -1:
        return full_response[response_start_index + len("### Assistant:"):].strip()
    else:
        return full_response

def get_response_with_template(message: str) -> str:
    full_response = generate_model_response(message)
    return extract_response_content(full_response)


with gr.Blocks(css="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    gr.DuplicateButton(value='Duplicate Space for private use',
                       elem_id='duplicate-button')
    with gr.Group():
        chatbot = gr.Textbox(label='DeciLM-7B-Instruct Output:')
        with gr.Row():
            textbox = gr.Textbox(
                container=False,
                show_label=False,
                placeholder='Type an instruction...',
                scale=10,
                elem_id="textbox"
            )
            submit_button = gr.Button(
                '💬 Submit',
                variant='primary',
                scale=1,
                min_width=0,
                elem_id="submit_button"
            )

            # Clear button to clear the chat history
            clear_button = gr.Button(
                '🗑️ Clear',
                variant='secondary',
            )

    clear_button.click(
        fn=lambda: ('',''),
        outputs=[textbox, chatbot],
        queue=False,
        api_name=False,
    )

    submit_button.click(
        fn=get_response_with_template,
        inputs=textbox,
        outputs= chatbot,
        queue=False,
        api_name=False,
    )

    gr.Examples(
        examples=[
            'Write detailed instructions for making chocolate chip pancakes.',
            'Write a 250-word article about your love of pancakes.',
            'Explain the plot of Back to the Future in three sentences.',
            'How do I make a trap beat?',
            'A step-by-step guide to learning Python in one month.',
        ],
        inputs=textbox,
        outputs=chatbot,
        fn=get_response_with_template,
        cache_examples=True,
        elem_id="examples"
    )


    gr.HTML(label="Keep in touch", value="<img src='https://huggingface.co/spaces/Deci/DeciLM-7B-instruct/resolve/main/deci-coder-banner.png' alt='Keep in touch' style='display: block; color: #292b47; margin: auto; max-width: 800px;'>")

demo.launch()