jhangmez commited on
Commit
2a5ca46
·
1 Parent(s): 418cb98

Se modifico apra version 5

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. app.py +67 -101
README.md CHANGED
@@ -16,4 +16,4 @@ short_description: SipánGPT based on Llama-3.2-1B
16
  ---
17
 
18
  SipánGPT 0.3 Llama 3.2
19
- Entrenado con 5400 conversaciones.
 
16
  ---
17
 
18
  SipánGPT 0.3 Llama 3.2
19
+ Entrenado con 50000 conversaciones.
app.py CHANGED
@@ -1,12 +1,8 @@
1
  import os
 
2
  from threading import Thread
3
  from typing import Iterator
4
-
5
  import gradio as gr
6
- from gradio.themes.base import Base
7
- from gradio.themes.utils import colors, sizes, fonts
8
- import time
9
- import torch
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
11
 
12
  DESCRIPTION = """\
@@ -16,12 +12,7 @@ This is a demo of [`meta-llama/Llama-3.2-3B-Instruct`](https://huggingface.co/me
16
  For more details, please check [our post](https://huggingface.co/blog/llama32).
17
  """
18
 
19
- MAX_MAX_NEW_TOKENS = 2048
20
- DEFAULT_MAX_NEW_TOKENS = 1024
21
- MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
22
-
23
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
24
-
25
  model_id = "ussipan/SipanGPT-0.3-Llama-3.2-1B-GGUF"
26
  tokenizer = AutoTokenizer.from_pretrained(model_id)
27
  model = AutoModelForCausalLM.from_pretrained(
@@ -31,82 +22,48 @@ model = AutoModelForCausalLM.from_pretrained(
31
  )
32
  model.eval()
33
 
34
- # Main Gradio inference function
35
  def generate(
36
  message: str,
37
- chat_history: list[tuple[str, str]],
38
  max_new_tokens: int = 1024,
39
  temperature: float = 0.6,
40
- top_p: float = 0.9,
41
- top_k: int = 50,
42
- repetition_penalty: float = 1.2,
43
  ) -> Iterator[str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- conversation = [{k: v for k, v in d.items() if k != 'metadata'} for d in chat_history]
46
- conversation.append({"role": "user", "content": message})
47
-
48
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
49
- if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
50
- input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
51
- gr.Warning(f"Se recortó la entrada de la conversación porque era más larga que {MAX_INPUT_TOKEN_LENGTH} tokens.")
52
- input_ids = input_ids.to(model.device)
53
-
54
- streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
55
- generate_kwargs = dict(
56
- {"input_ids": input_ids},
57
  streamer=streamer,
58
  max_new_tokens=max_new_tokens,
59
- do_sample=True,
60
- top_p=top_p,
61
- top_k=top_k,
62
  temperature=temperature,
63
- num_beams=1,
64
- repetition_penalty=repetition_penalty,
65
  )
66
- t = Thread(target=model.generate, kwargs=generate_kwargs)
67
- t.start()
 
68
 
69
  conversation.append({"role": "assistant", "content": ""})
70
- outputs = []
 
71
  for text in streamer:
72
- outputs.append(text)
73
- bot_response = "".join(outputs)
74
- conversation[-1]['content'] = bot_response
75
  yield "", conversation
76
 
77
-
78
- # Implementing Gradio 5 features and building a ChatInterface UI yourself
79
- PLACEHOLDER = """<div style="padding: 20px; text-align: center; display: flex; flex-direction: column; align-items: center;">
80
- <img src="https://corladlalibertad.org.pe/wp-content/uploads/2024/01/USS.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; margin-bottom: 10px;">
81
- <h1 style="font-size: 28px; margin: 0;">SipánGPT 0.3 Llama 3.2</h1>
82
- <p style="font-size: 8px; margin: 5px 0 0; opacity: 0.65;">
83
- <a href="https://huggingface.co/spaces/ysharma/Llama3-2_with_Gradio-5" target="_blank" style="color: inherit; text-decoration: none;">Forked from @ysharma</a>
84
- </p>
85
- <p style="font-size: 12px; margin: 5px 0 0; opacity: 0.9;">Este modelo es experimental, puede generar alucinaciones o respuestas incorrectas.</p>
86
- <p style="font-size: 12px; margin: 5px 0 0; opacity: 0.9;">Entrenado con un dataset de 50k conversaciones.</p>
87
- <p style="font-size: 12px; margin: 5px 0 0; opacity: 0.9;">
88
- <a href="https://huggingface.co/datasets/ussipan/sipangpt" target="_blank" style="color: inherit; text-decoration: none;">Ver el dataset aquí</a>
89
- </p>
90
- </div>"""
91
-
92
- def handle_retry(history, retry_data: gr.RetryData):
93
- new_history = history[:retry_data.index]
94
- previous_prompt = history[retry_data.index]['content']
95
- yield from generate(previous_prompt, chat_history = new_history, max_new_tokens = 1024, temperature = 0.6, top_p = 0.9, top_k = 50, repetition_penalty = 1.2)
96
-
97
  def handle_like(data: gr.LikeData):
98
- if data.liked:
99
- print("Votaste positivamente esta respuesta: ", data.value)
100
- else:
101
- print("Votaste negativamente esta respuesta: ", data.value)
102
-
103
- def handle_undo(history, undo_data: gr.UndoData):
104
- chatbot = history[:undo_data.index]
105
- prompt = history[undo_data.index]['content']
106
- return chatbot, prompt
107
-
108
- def chat_examples_fill(data: gr.SelectData):
109
- yield from generate(data.value['text'], chat_history = [], max_new_tokens = 1024, temperature = 0.6, top_p = 0.9, top_k = 50, repetition_penalty = 1.2)
110
 
111
  class SipanGPTTheme(Base):
112
  def __init__(
@@ -206,38 +163,47 @@ class SipanGPTTheme(Base):
206
  slider_color_dark="*primary_600",
207
  )
208
 
209
- # Uso del tema
210
  theme = SipanGPTTheme()
211
 
212
- with gr.Blocks(theme=theme, fill_height=True) as demo:
213
- with gr.Column(elem_id="container", scale=1):
214
- chatbot = gr.Chatbot(
215
- label="SipánGPT 0.3 Llama 3.2",
216
- show_label=False,
217
- type="messages",
218
- scale=1,
219
- suggestions = [
220
- {"text": "Háblame del reglamento de estudiantes de la universidad"},
221
- {"text": "Qué becas ofrece la universidad"},
222
- {"text": "Hablame sobre el temario del examen de admisión para pregrado"},
223
- {"text": "Cuando se fundó la universidad?"},
224
- ],
225
- placeholder = PLACEHOLDER,
226
- )
227
-
228
- msg = gr.Textbox(submit_btn=True, show_label=False)
229
- with gr.Accordion('Additional inputs', open=False):
230
- max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS, )
231
- temperature = gr.Slider(label="Temperature",minimum=0.1, maximum=4.0, step=0.1, value=0.6,)
232
- top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9, )
233
- top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50, )
234
- repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2, )
235
-
236
- msg.submit(generate, [msg, chatbot, max_new_tokens, temperature, top_p, top_k, repetition_penalty], [msg, chatbot])
237
- chatbot.retry(handle_retry, chatbot, [msg, chatbot])
238
- chatbot.like(handle_like, None, None)
239
- chatbot.undo(handle_undo, chatbot, [chatbot, msg])
240
- chatbot.suggestion_select(chat_examples_fill, None, [msg, chatbot] )
 
 
 
 
 
 
 
241
 
 
 
 
242
 
243
  demo.launch()
 
1
  import os
2
+ import torch
3
  from threading import Thread
4
  from typing import Iterator
 
5
  import gradio as gr
 
 
 
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
7
 
8
  DESCRIPTION = """\
 
12
  For more details, please check [our post](https://huggingface.co/blog/llama32).
13
  """
14
 
15
+ # Model setup
 
 
 
 
 
16
  model_id = "ussipan/SipanGPT-0.3-Llama-3.2-1B-GGUF"
17
  tokenizer = AutoTokenizer.from_pretrained(model_id)
18
  model = AutoModelForCausalLM.from_pretrained(
 
22
  )
23
  model.eval()
24
 
 
25
  def generate(
26
  message: str,
27
+ chat_history: list,
28
  max_new_tokens: int = 1024,
29
  temperature: float = 0.6,
 
 
 
30
  ) -> Iterator[str]:
31
+ conversation = chat_history + [{"role": "user", "content": message}]
32
+
33
+ input_ids = tokenizer.apply_chat_template(
34
+ conversation,
35
+ add_generation_prompt=True,
36
+ return_tensors="pt"
37
+ ).to(model.device)
38
+
39
+ streamer = TextIteratorStreamer(
40
+ tokenizer,
41
+ timeout=20.0,
42
+ skip_prompt=True,
43
+ skip_special_tokens=True
44
+ )
45
 
46
+ generation_kwargs = dict(
47
+ input_ids=input_ids,
 
 
 
 
 
 
 
 
 
 
48
  streamer=streamer,
49
  max_new_tokens=max_new_tokens,
 
 
 
50
  temperature=temperature,
51
+ do_sample=True,
 
52
  )
53
+
54
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
55
+ thread.start()
56
 
57
  conversation.append({"role": "assistant", "content": ""})
58
+ output = []
59
+
60
  for text in streamer:
61
+ output.append(text)
62
+ conversation[-1]["content"] = "".join(output)
 
63
  yield "", conversation
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def handle_like(data: gr.LikeData):
66
+ print(f"El mensaje {data.index} fue puntuado como {'bueno' if data.liked else 'malo'}.")
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  class SipanGPTTheme(Base):
69
  def __init__(
 
163
  slider_color_dark="*primary_600",
164
  )
165
 
 
166
  theme = SipanGPTTheme()
167
 
168
+ with gr.Blocks(theme=theme, fill_height=True) as demo:
169
+ chatbot = gr.Chatbot(
170
+ label="SipánGPT 0.3 Llama 3.2",
171
+ examples=[{"text": "Que carreras existen en la uss?"}, {"text": "Quien es el decano de la facultad de ingenieria?"}, {"text": "Que maestrias tiene la universidad?"}],
172
+ value=[],
173
+ show_label=True,
174
+ type="messages",
175
+ bubble_full_width=False,
176
+ placeholder = PLACEHOLDER,
177
+ )
178
+
179
+ msg = gr.Textbox(
180
+ show_label=False,
181
+ placeholder="Escribe tu pregunta aquí...",
182
+ scale=4
183
+ )
184
+
185
+ with gr.Row():
186
+ submit = gr.Button("Enviar")
187
+ clear = gr.ClearButton([msg, chatbot])
188
+
189
+ with gr.Accordion("Parameters", open=False):
190
+ temperature = gr.Slider(
191
+ minimum=0.1,
192
+ maximum=2.0,
193
+ value=0.6,
194
+ step=0.1,
195
+ label="Temperatura",
196
+ )
197
+ max_new_tokens = gr.Slider(
198
+ minimum=1,
199
+ maximum=2048,
200
+ value=1024,
201
+ step=1,
202
+ label="Máximo de nuevos Tokens",
203
+ )
204
 
205
+ msg.submit(generate, [msg, chatbot, max_new_tokens, temperature], [msg, chatbot])
206
+ submit.click(generate, [msg, chatbot, max_new_tokens, temperature], [msg, chatbot])
207
+ chatbot.like(handle_like)
208
 
209
  demo.launch()