File size: 5,428 Bytes
7a7f507
 
 
9a81d74
7a7f507
 
 
fb54ff4
7a7f507
 
 
 
 
 
 
 
 
 
 
5157355
fb54ff4
1abd311
 
7a7f507
 
 
 
 
 
 
 
 
 
 
 
 
 
fb54ff4
7a7f507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a81d74
7a7f507
 
 
 
 
 
9a81d74
 
7a7f507
0d4a6a6
7a7f507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0aaac5
7a7f507
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5157355
7a7f507
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, TextStreamer

token = os.environ["HUGGINGFACEHUB_API_TOKEN"]

model_id = 'Deci/DeciLM-7B-instruct'

SYSTEM_PROMPT_TEMPLATE = """### System: You are an AI assistant that follows instruction extremely well. Help as much as you can.
### User:

{instruction}

### Assistant:
"""

DESCRIPTION = """
# <p style="text-align: center; color: #292b47;"> 🤖 <span style='color: #3264ff;'>DeciLM-7B-Instruct:</span> A Fast Instruction-Tuned Model💨 </p>
<span style='color: #292b47;'>Welcome to <a href="https://huggingface.co/Deci/DeciLM-7B-instruct" style="color: #3264ff;">DeciLM-7B-Instruct</a>! DeciLM-7B-Instruct is a 7B parameter instruction-tuned language model and released under the Apache 2.0 license. It's an instruction-tuned model, not a chat-tuned model;  you should prompt the model with an instruction that describes a task, and the model will respond appropriately to complete the task.</span>
<p><span style='color: #292b47;'>Learn more about the base model <a href="https://huggingface.co/Deci/DeciLM-7B" style="color: #3264ff;">DeciLM-7B.</a></span></p>
<p><span style='color: #292b47;'>Experience the speed of DeciLM-7B + Infery. Check out the demo 👉🏽 <a href="https://console.deci.ai/infery-llm-demo" style="color: #3264ff;">here.</a></span></p>

"""

bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

if not torch.cuda.is_available():
    DESCRIPTION += 'You need a GPU for this example. Try using colab: '

if torch.cuda.is_available():
    model = AutoModelForCausalLM.from_pretrained(model_id,
                                                 device_map="auto",
                                                 trust_remote_code=True,
                                                 quantization_config=bnb_config
                                                 )
else:
    model = None

tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
tokenizer.pad_token = tokenizer.eos_token

# Function to construct the prompt using the new system prompt template
def get_prompt_with_template(message: str) -> str:
    return SYSTEM_PROMPT_TEMPLATE.format(instruction=message)

# Function to generate the model's response
def generate_model_response(message: str) -> str:
    prompt = get_prompt_with_template(message)
    inputs = tokenizer(prompt, return_tensors='pt')
    streamer = TextStreamer(tokenizer)
    if torch.cuda.is_available():
        inputs = inputs.to('cuda')
    # Include **generate_kwargs to include the user-defined options
    output = model.generate(**inputs, 
                            max_new_tokens=4096, 
                            do_sample=True,
                            temperature=0.1,
                            streamer=streamer
                            ) 
    return tokenizer.decode(output[0], skip_special_tokens=True)

def extract_response_content(full_response: str) -> str:
    response_start_index = full_response.find("### Assistant:")
    if response_start_index != -1:
        return full_response[response_start_index + len("### Assistant:"):].strip()
    else:
        return full_response

def get_response_with_template(message: str) -> str:
    full_response = generate_model_response(message)
    return extract_response_content(full_response)


with gr.Blocks(css="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    gr.DuplicateButton(value='Duplicate Space for private use',
                       elem_id='duplicate-button')
    with gr.Group():
        chatbot = gr.Textbox(label='DeciLM-7B-Instruct Output:')
        with gr.Row():
            textbox = gr.Textbox(
                container=False,
                show_label=False,
                placeholder='Type an instruction...',
                scale=10,
                elem_id="textbox"
            )
            submit_button = gr.Button(
                '💬 Submit',
                variant='primary',
                scale=1,
                min_width=0,
                elem_id="submit_button"
            )

            # Clear button to clear the chat history
            clear_button = gr.Button(
                '🗑️ Clear',
                variant='secondary',
            )

    clear_button.click(
        fn=lambda: ('',''),
        outputs=[textbox, chatbot],
        queue=False,
        api_name=False,
    )

    submit_button.click(
        fn=get_response_with_template,
        inputs=textbox,
        outputs= chatbot,
        queue=False,
        api_name=False,
    )

    gr.Examples(
        examples=[
            'Write detailed instructions for making chocolate chip pancakes.',
            'Write a 250-word article about your love of pancakes.',
            'Explain the plot of Back to the Future in three sentences.',
            'How do I make a trap beat?',
            'A step-by-step guide to learning Python in one month.',
        ],
        inputs=textbox,
        outputs=chatbot,
        fn=get_response_with_template,
        cache_examples=True,
        elem_id="examples"
    )


    gr.HTML(label="Keep in touch", value="<img src='https://huggingface.co/spaces/Deci/DeciLM-7B-instruct/resolve/main/deci-coder-banner.png' alt='Keep in touch' style='display: block; color: #292b47; margin: auto; max-width: 800px;'>")

demo.launch()