File size: 10,156 Bytes
1f4f7c2
 
 
 
281f2f2
d6553a2
 
a78d902
1f4f7c2
 
 
5119610
 
 
 
 
 
 
1f4f7c2
 
 
 
 
 
691de0c
1f4f7c2
 
 
 
 
281f2f2
1f4f7c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6553a2
 
1f4f7c2
d6553a2
1f4f7c2
39c6017
1f4f7c2
e7f4f29
d6553a2
39c6017
 
 
d6553a2
1f4f7c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d6553a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14e8dbc
 
 
 
 
d6553a2
14e8dbc
 
 
 
 
 
d6553a2
 
 
 
 
a78d902
d6553a2
 
 
 
a78d902
 
369f6d3
d6553a2
 
a78d902
d6553a2
 
 
 
 
 
 
 
 
 
 
 
5119610
 
 
 
04b6719
d6553a2
5119610
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
04b6719
d6553a2
5119610
04b6719
 
5119610
 
 
 
04b6719
5119610
 
d6553a2
 
 
 
1f4f7c2
d6553a2
1f4f7c2
 
691de0c
1f4f7c2
 
 
d6553a2
1f4f7c2
 
7767b8c
 
d6553a2
 
 
1f4f7c2
 
 
d6553a2
 
 
 
 
1f4f7c2
 
 
 
 
d6553a2
281f2f2
 
1f4f7c2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
import os
from threading import Thread
from typing import Iterator

import gradio as gr
from gradio.themes.base import Base
from gradio.themes.utils import colors, sizes, fonts
import time
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

DESCRIPTION = """\
# Llama 3.2 1B Instruct
Llama 3.2 1B is Meta's latest iteration of open LLMs.
This is a demo of [`meta-llama/Llama-3.2-3B-Instruct`](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct), fine-tuned for instruction following.
For more details, please check [our post](https://huggingface.co/blog/llama32).
"""

MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_id = "ussipan/SipanGPT-0.2-Llama-3.2-1B-GGUF"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.bfloat16,
)
model.eval()

# Main Gradio inference function
def generate(
    message: str,
    chat_history: list[tuple[str, str]],
    max_new_tokens: int = 1024,
    temperature: float = 0.6,
    top_p: float = 0.9,
    top_k: int = 50,
    repetition_penalty: float = 1.2,
) -> Iterator[str]:

    conversation = [{k: v for k, v in d.items() if k != 'metadata'} for d in chat_history]
    conversation.append({"role": "user", "content": message})

    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
        gr.Warning(f"Se recortó la entrada de la conversación porque era más larga que {MAX_INPUT_TOKEN_LENGTH} tokens.")
    input_ids = input_ids.to(model.device)

    streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        {"input_ids": input_ids},
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_p=top_p,
        top_k=top_k,
        temperature=temperature,
        num_beams=1,
        repetition_penalty=repetition_penalty,
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    conversation.append({"role": "assistant", "content": ""})
    outputs = []
    for text in streamer:
        outputs.append(text)
        bot_response = "".join(outputs)
        conversation[-1]['content'] = bot_response
        yield "", conversation


# Implementing Gradio 5 features and building a ChatInterface UI yourself
PLACEHOLDER = """<div style="padding: 20px; text-align: center; display: flex; flex-direction: column; align-items: center;">
   <img src="https://corladlalibertad.org.pe/wp-content/uploads/2024/01/USS.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; margin-bottom: 10px;">
   <h1 style="font-size: 28px; margin: 0;">SipánGPT 0.2 Llama 3.2</h1>
   <p style="font-size: 8px; margin: 5px 0 0; opacity: 0.65;">
       <a href="https://huggingface.co/spaces/ysharma/Llama3-2_with_Gradio-5" target="_blank" style="color: inherit; text-decoration: none;">Forked from @ysharma</a>
   </p>
   <p style="font-size: 12px; margin: 5px 0 0; opacity: 0.9;">Este modelo es experimental, puede generar alucinaciones o respuestas incorrectas.</p>
   <p style="font-size: 12px; margin: 5px 0 0; opacity: 0.9;">Entrenado con un dataset de 5.4k conversaciones.</p>
   <p style="font-size: 12px; margin: 5px 0 0; opacity: 0.9;">
       <a href="https://huggingface.co/datasets/ussipan/sipangpt" target="_blank" style="color: inherit; text-decoration: none;">Ver el dataset aquí</a>
   </p>
</div>"""

def handle_retry(history, retry_data: gr.RetryData):
    new_history = history[:retry_data.index]
    previous_prompt = history[retry_data.index]['content']
    yield from generate(previous_prompt, chat_history = new_history, max_new_tokens = 1024, temperature = 0.6, top_p = 0.9, top_k = 50, repetition_penalty = 1.2)

def handle_like(data: gr.LikeData):
    if data.liked:
        print("Votaste positivamente esta respuesta: ", data.value)
    else:
        print("Votaste negativamente esta respuesta: ", data.value)

def handle_undo(history, undo_data: gr.UndoData):
    chatbot = history[:undo_data.index]
    prompt = history[undo_data.index]['content']
    return chatbot, prompt

def chat_examples_fill(data: gr.SelectData):
    yield from generate(data.value['text'], chat_history = [], max_new_tokens = 1024, temperature = 0.6, top_p = 0.9, top_k = 50, repetition_penalty = 1.2)

class SipanGPTTheme(Base):
    def __init__(
        self,
        *,
        primary_hue: colors.Color | str = colors.Color(
            name="custom_green",
            c50="#f0fde4",
            c100="#e1fbc8",
            c200="#c3f789",
            c300="#a5f34a",
            c400="#7dfa00",  # primary color
            c500="#5ef000",
            c600="#4cc700",
            c700="#39a000",
            c800="#2b7900",
            c900="#1d5200",
            c950="#102e00",
        ),
        secondary_hue: colors.Color | str = colors.Color(
            name="custom_secondary_green",
            c50="#edfce0",
            c100="#dbf9c1",
            c200="#b7f583",
            c300="#93f145",
            c400="#5fed00",  # secondary color
            c500="#4ed400",
            c600="#3fad00",
            c700="#308700",
            c800="#236100",
            c900="#153b00",
            c950="#0a1f00",
        ),
        neutral_hue: colors.Color | str = colors.gray,
        spacing_size: sizes.Size | str = sizes.spacing_md,
        radius_size: sizes.Size | str = sizes.radius_md,
        text_size: sizes.Size | str = sizes.text_md,
        font: fonts.Font | str | list[fonts.Font | str] = [
            fonts.GoogleFont("Exo 2"),
            "ui-sans-serif",
            "system-ui",
            "sans-serif",
        ],
        font_mono: fonts.Font | str | list[fonts.Font | str] = [
            fonts.GoogleFont("Fraunces"),
            "ui-monospace",
            "monospace",
        ],
    ):
        super().__init__(
            primary_hue=primary_hue,
            secondary_hue=secondary_hue,
            neutral_hue=neutral_hue,
            spacing_size=spacing_size,
            radius_size=radius_size,
            text_size=text_size,
            font=font,
            font_mono=font_mono,
        )
        self.set(
            # Light mode settings
            body_background_fill="*neutral_50",
            body_text_color="*neutral_900",
            color_accent_soft="*secondary_200",
            button_primary_background_fill="*primary_600",
            button_primary_background_fill_hover="*primary_500",
            button_primary_text_color="*neutral_50",
            block_title_text_color="*primary_600",
            input_background_fill="*neutral_200",
            input_border_color="*neutral_300",
            input_placeholder_color="*neutral_500",
            block_background_fill="*neutral_100",
            block_label_background_fill="*primary_100",
            block_label_text_color="*neutral_800",
            checkbox_background_color="*neutral_200",
            checkbox_border_color="*primary_500",
            loader_color="*primary_500",
            slider_color="*primary_500",

            # Dark mode settings
            body_background_fill_dark="*neutral_900",
            body_text_color_dark="*neutral_50",
            color_accent_soft_dark="*secondary_800",
            button_primary_background_fill_dark="*primary_700",
            button_primary_background_fill_hover_dark="*primary_600",
            button_primary_text_color_dark="*neutral_950",
            block_title_text_color_dark="*primary_400",
            input_background_fill_dark="*neutral_800",
            input_border_color_dark="*neutral_700",
            input_placeholder_color_dark="*neutral_400",
            block_background_fill_dark="*neutral_850",
            block_label_background_fill_dark="*primary_900",
            block_label_text_color_dark="*neutral_200",
            checkbox_background_color_dark="*neutral_800",
            checkbox_border_color_dark="*primary_600",
            loader_color_dark="*primary_400",
            slider_color_dark="*primary_600",
        )

# Uso del tema
theme = SipanGPTTheme()

with gr.Blocks(theme=theme, fill_height=True) as demo:
    with gr.Column(elem_id="container", scale=1):
        chatbot = gr.Chatbot(
            label="SipánGPT 0.2 Llama 3.2",
            show_label=False,
            type="messages",
            scale=1,
            suggestions = [
                {"text": "Háblame del reglamento de estudiantes de la universidad"},
                {"text": "Qué becas ofrece la universidad"},
                {"text": "Hablame sobre el temario del examen de admisión para pregrado"},
                {"text": "Cuando se fundó la universidad?"},
                ],
            placeholder = PLACEHOLDER,
            )

    msg = gr.Textbox(submit_btn=True, show_label=False)
    with gr.Accordion('Additional inputs', open=False):
        max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS, )
        temperature = gr.Slider(label="Temperature",minimum=0.1, maximum=4.0, step=0.1, value=0.6,)
        top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9, )
        top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50, )
        repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2, )

    msg.submit(generate, [msg, chatbot, max_new_tokens, temperature, top_p, top_k, repetition_penalty], [msg, chatbot])
    chatbot.retry(handle_retry, chatbot, [msg, chatbot])
    chatbot.like(handle_like, None, None)
    chatbot.undo(handle_undo, chatbot, [chatbot, msg])
    chatbot.suggestion_select(chat_examples_fill, None, [msg, chatbot] )


demo.launch()