jhangmez commited on
Commit
1f4f7c2
·
1 Parent(s): 2704ff2

Se cambio todo a un codigo de web

Browse files
Files changed (2) hide show
  1. app.py +126 -59
  2. requirements.txt +3 -1
app.py CHANGED
@@ -1,63 +1,130 @@
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("ussipan/SipanGPT-0.1-Llama-3.2-1B-GGUF")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are SipanGPT, an artificial intelligence assistant responsible for providing technical support for the Information Technology Department (DTI) to students, professors, and administrative staff at the Señor de Sipán University, a private university in the Lambayeque region of Peru.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=1.5, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
 
62
- if __name__ == "__main__":
63
- demo.launch()
 
1
+ import os
2
+ from threading import Thread
3
+ from typing import Iterator
4
+
5
  import gradio as gr
6
+ #import spaces
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
9
+
10
+ MAX_MAX_NEW_TOKENS = 2048
11
+ DEFAULT_MAX_NEW_TOKENS = 1024
12
+ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
13
+
14
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
15
+
16
+ # Download model from Huggingface Hub
17
+ # Change this to meta-llama or the correct org name from Huggingface Hub
18
+ model_id = "ussipan/SipanGPT-0.1-Llama-3.2-1B-GGUF"
19
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ model_id,
22
+ device_map="auto",
23
+ torch_dtype=torch.bfloat16,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  )
25
+ model.eval()
26
+
27
+ # Main Gradio inference function
28
+ def generate(
29
+ message: str,
30
+ chat_history: list[tuple[str, str]],
31
+ max_new_tokens: int = 1024,
32
+ temperature: float = 0.6,
33
+ top_p: float = 0.9,
34
+ top_k: int = 50,
35
+ repetition_penalty: float = 1.2,
36
+ ) -> Iterator[str]:
37
+
38
+ conversation = [{k: v for k, v in d.items() if k != 'metadata'} for d in chat_history]
39
+ conversation.append({"role": "user", "content": message})
40
+
41
+ input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
42
+ if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
43
+ input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
44
+ gr.Warning(f"Se recortó la entrada de la conversación porque era más larga que {MAX_INPUT_TOKEN_LENGTH} tokens.")
45
+ input_ids = input_ids.to(model.device)
46
+
47
+ streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
48
+ generate_kwargs = dict(
49
+ {"input_ids": input_ids},
50
+ streamer=streamer,
51
+ max_new_tokens=max_new_tokens,
52
+ do_sample=True,
53
+ top_p=top_p,
54
+ top_k=top_k,
55
+ temperature=temperature,
56
+ num_beams=1,
57
+ repetition_penalty=repetition_penalty,
58
+ )
59
+ t = Thread(target=model.generate, kwargs=generate_kwargs)
60
+ t.start()
61
+
62
+ conversation.append({"role": "assistant", "content": ""})
63
+ outputs = []
64
+ for text in streamer:
65
+ outputs.append(text)
66
+ bot_response = "".join(outputs)
67
+ conversation[-1]['content'] = bot_response
68
+ yield "", conversation
69
+
70
+
71
+ # Implementing Gradio 5 features and building a ChatInterface UI yourself
72
+ PLACEHOLDER = """<div style="padding: 20px; text-align: center; display: flex; flex-direction: column; align-items: center;">
73
+ <img src="https://corladlalibertad.org.pe/wp-content/uploads/2024/01/USS.jpg" style="width: 80%; max-width: 550px; height: auto; opacity: 0.55; margin-bottom: 10px;">
74
+ <h1 style="font-size: 28px; margin: 0;">SipánGPT 0.1 Llama 3.2</h1>
75
+ <p style="font-size: 8px; margin: 5px 0 0; opacity: 0.65;">
76
+ <a href="https://huggingface.co/spaces/ysharma/Llama3-2_with_Gradio-5" target="_blank" style="color: inherit; text-decoration: none;">Source Code</a>
77
+ </p>
78
+ </div>"""
79
+
80
+
81
+ def handle_retry(history, retry_data: gr.RetryData):
82
+ new_history = history[:retry_data.index]
83
+ previous_prompt = history[retry_data.index]['content']
84
+ yield from generate(previous_prompt, chat_history = new_history, max_new_tokens = 1024, temperature = 0.6, top_p = 0.9, top_k = 50, repetition_penalty = 1.2)
85
+
86
+ def handle_like(data: gr.LikeData):
87
+ if data.liked:
88
+ print("Votaste positivamente esta respuesta: ", data.value)
89
+ else:
90
+ print("Votaste negativamente esta respuesta: ", data.value)
91
+
92
+ def handle_undo(history, undo_data: gr.UndoData):
93
+ chatbot = history[:undo_data.index]
94
+ prompt = history[undo_data.index]['content']
95
+ return chatbot, prompt
96
+
97
+ def chat_examples_fill(data: gr.SelectData):
98
+ yield from generate(data.value['text'], chat_history = [], max_new_tokens = 1024, temperature = 0.6, top_p = 0.9, top_k = 50, repetition_penalty = 1.2)
99
+
100
+
101
+ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
102
+ with gr.Column(elem_id="container", scale=1):
103
+ chatbot = gr.Chatbot(
104
+ label="SipánGPT 0.1 Llama 3.2",
105
+ show_label=False,
106
+ type="messages",
107
+ scale=1,
108
+ suggestions = [
109
+ {"text": "Háblame del reglamento de estudiantes de la universidad"},
110
+ {"text": "Qué becas ofrece la universidad"},
111
+ ],
112
+ placeholder = PLACEHOLDER,
113
+ )
114
+
115
+ msg = gr.Textbox(submit_btn=True, show_label=False)
116
+ with gr.Accordion('Additional inputs', open=False):
117
+ max_new_tokens = gr.Slider(label="Max new tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS, )
118
+ temperature = gr.Slider(label="Temperature",minimum=0.1, maximum=4.0, step=0.1, value=0.6,)
119
+ top_p = gr.Slider(label="Top-p (nucleus sampling)", minimum=0.05, maximum=1.0, step=0.05, value=0.9, )
120
+ top_k = gr.Slider(label="Top-k", minimum=1, maximum=1000, step=1, value=50, )
121
+ repetition_penalty = gr.Slider(label="Repetition penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.2, )
122
+
123
+ msg.submit(generate, [msg, chatbot, max_new_tokens, temperature, top_p, top_k, repetition_penalty], [msg, chatbot])
124
+ chatbot.retry(handle_retry, chatbot, [msg, chatbot])
125
+ chatbot.like(handle_like, None, None)
126
+ chatbot.undo(handle_undo, chatbot, [chatbot, msg])
127
+ chatbot.suggestion_select(chat_examples_fill, None, [msg, chatbot] )
128
 
129
 
130
+ demo.launch()
 
requirements.txt CHANGED
@@ -1 +1,3 @@
1
- huggingface_hub==0.22.2
 
 
 
1
+ accelerate==0.33.0
2
+ bitsandbytes==0.43.2
3
+ transformers