aiala commited on
Commit
0712e23
1 Parent(s): 5d068c3

Upload 5 files

Browse files
app.py CHANGED
@@ -1,63 +1,262 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
-
61
-
62
- if __name__ == "__main__":
63
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import pandas as pd
5
+ from datetime import datetime, timedelta, timezone
6
+ import torch
7
+ from config import hugging_face_token, init_google_sheets_client, models, quantized_models, default_model_name, user_names, google_sheets_name, MAX_INTERACTIONS
8
+ import spaces
9
+
10
+ # Hack for ZeroGPU
11
+ torch.jit.script = lambda f: f
12
+
13
+ # Initialize Google Sheets client
14
+ client = init_google_sheets_client()
15
+ sheet = client.open(google_sheets_name)
16
+ stories_sheet = sheet.worksheet("Stories")
17
+ system_prompts_sheet = sheet.worksheet("System Prompts")
18
+
19
+ # Load stories from Google Sheets
20
+ def load_stories():
21
+ stories_data = stories_sheet.get_all_values()
22
+ stories = [{"title": story[0], "story": story[1]} for story in stories_data if story[0] != "Title"] # Skip header row
23
+ return stories
24
+
25
+ # Load system prompts from Google Sheets
26
+ def load_system_prompts():
27
+ system_prompts_data = system_prompts_sheet.get_all_values()
28
+ system_prompts = [prompt[0] for prompt in system_prompts_data[1:]] # Skip header row
29
+ return system_prompts
30
+
31
+ # Load available stories and system prompts
32
+ stories = load_stories()
33
+ system_prompts = load_system_prompts()
34
+
35
+ # Initialize the selected model
36
+ selected_model = default_model_name
37
+ tokenizer, model = None, None
38
+
39
+ # Initialize the data list
40
+ data = []
41
+
42
+ # Load the model and tokenizer once at the beginning
43
+ def load_model(model_name):
44
+ global tokenizer, model, selected_model
45
+ try:
46
+ # Release the memory of the previous model if exists
47
+ if model is not None:
48
+ del model
49
+ torch.cuda.empty_cache()
50
+
51
+ # Check if the model is in models or quantized_models and load accordingly
52
+ if model_name in models:
53
+ model_path = models[model_name]
54
+ elif model_name in quantized_models:
55
+ model_path = quantized_models[model_name]
56
+ else:
57
+ raise ValueError(f"Model {model_name} not found in either models or quantized_models.")
58
+
59
+ tokenizer = AutoTokenizer.from_pretrained(
60
+ model_path,
61
+ padding_side='left',
62
+ token=hugging_face_token,
63
+ trust_remote_code=True
64
+ )
65
+
66
+ # Ensure the padding token is set
67
+ if tokenizer.pad_token is None:
68
+ tokenizer.pad_token = tokenizer.eos_token
69
+ tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
70
+
71
+ model = AutoModelForCausalLM.from_pretrained(
72
+ model_path,
73
+ token=hugging_face_token,
74
+ trust_remote_code=True
75
+ )
76
+
77
+ # Only move to CUDA if it's not a quantized model
78
+ if model_name not in quantized_models:
79
+ model = model.to("cuda")
80
+
81
+ selected_model = model_name
82
+ except Exception as e:
83
+ print(f"Error loading model {model_name}: {e}")
84
+ raise e
85
+ return tokenizer, model
86
+
87
+
88
+
89
+ # Ensure the initial model is loaded
90
+ tokenizer, model = load_model(selected_model)
91
+
92
+ # Chat history
93
+ chat_history = []
94
+
95
+ # Function to handle interaction with model
96
+ @spaces.GPU
97
+ def interact(user_input, history, interaction_count, model_name):
98
+ global tokenizer, model
99
+ try:
100
+ if tokenizer is None or model is None:
101
+ raise ValueError("Tokenizer or model is not initialized.")
102
+
103
+ # Determine the device to use (either CUDA if available, or CPU)
104
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
105
+
106
+ # Only move the model to the device if it's not a quantized model
107
+ if model_name not in quantized_models:
108
+ model = model.to(device)
109
+
110
+ if interaction_count >= MAX_INTERACTIONS:
111
+ user_input += ". Thank you for your questions. Our session is now over. Goodbye!"
112
+
113
+ messages = history + [{"role": "user", "content": user_input}]
114
+
115
+ # Ensure roles alternate correctly
116
+ for i in range(1, len(messages)):
117
+ if messages[i-1].get("role") == messages[i].get("role"):
118
+ raise ValueError("Conversation roles must alternate user/assistant/user/assistant/...")
119
+
120
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
121
+
122
+ # Move input tensor to the correct device
123
+ input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
124
+ chat_history_ids = model.generate(input_ids, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id, temperature=0.1)
125
+ response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
126
+
127
+ # Update chat history with generated response
128
+ history.append({"role": "user", "content": user_input})
129
+ history.append({"role": "assistant", "content": response})
130
+
131
+ interaction_count += 1
132
+
133
+ formatted_history = [(entry["content"], None) if entry["role"] == "user" else (None, entry["content"]) for entry in history if entry["role"] in ["user", "assistant"]]
134
+
135
+ return "", formatted_history, history, interaction_count
136
+ except Exception as e:
137
+ if torch.cuda.is_available():
138
+ torch.cuda.empty_cache()
139
+ print(f"Error during interaction: {e}")
140
+ raise gr.Error(f"An error occurred during interaction: {str(e)}")
141
+
142
+
143
+ # Function to send selected story and initial message
144
+ def send_selected_story(title, model_name, system_prompt):
145
+ global chat_history
146
+ global selected_story
147
+ global data # Ensure data is reset
148
+ data = [] # Reset data for new story
149
+ interaction_count = 1 # Reset interaction count for new story
150
+ tokenizer, model = load_model(model_name) # Load the appropriate model
151
+ selected_story = title
152
+ for story in stories:
153
+ if story["title"] == title:
154
+ system_prompt = f"""
155
+ {system_prompt}
156
+ Here is the story:
157
+ ---
158
+ {story['story']}
159
+ ---
160
+ """
161
+ combined_message = system_prompt.strip()
162
+ if combined_message:
163
+ chat_history = [] # Reset chat history
164
+ chat_history.append({"role": "system", "content": combined_message})
165
+ question_prompt = "Please ask a simple question about the story to encourage interaction."
166
+ _, formatted_history, chat_history, interaction_count = interact(question_prompt, chat_history, interaction_count, model_name)
167
+
168
+ return formatted_history, chat_history, gr.update(value=[]), story["story"]
169
+ else:
170
+ print("Combined message is empty.")
171
+ else:
172
+ print("Story title does not match.")
173
+
174
+
175
+ # Function to save comment and score
176
+ def save_comment_score(chat_responses, score, comment, story_name, user_name, system_prompt):
177
+ full_chat_history = ""
178
+
179
+ # Create formatted chat history with roles
180
+ for message in chat_responses:
181
+ if message[0]: # User message
182
+ full_chat_history += f"User: {message[0]}\n"
183
+ if message[1]: # Assistant message
184
+ full_chat_history += f"Assistant: {message[1]}\n"
185
+
186
+ timestamp = datetime.now(timezone.utc) - timedelta(hours=3) # Adjust to GMT-3
187
+ timestamp_str = timestamp.strftime("%Y-%m-%d %H:%M:%S")
188
+ model_name = selected_model
189
+
190
+ # Append data to local data storage
191
+ data.append([
192
+ timestamp_str,
193
+ user_name,
194
+ model_name,
195
+ system_prompt,
196
+ story_name,
197
+ full_chat_history,
198
+ score,
199
+ comment
200
+ ])
201
+
202
+ # Append data to Google Sheets
203
+ try:
204
+ user_sheet = client.open(google_sheets_name).worksheet(user_name)
205
+ except gspread.exceptions.WorksheetNotFound:
206
+ user_sheet = client.open(google_sheets_name).add_worksheet(title=user_name, rows="100", cols="20")
207
+
208
+ user_sheet.append_row([timestamp_str, user_name, model_name, system_prompt, story_name, full_chat_history, score, comment])
209
+
210
+ df = pd.DataFrame(data, columns=["Timestamp", "User Name", "Model Name", "System Prompt", "Story Name", "Chat History", "Score", "Comment"])
211
+ return df[["Chat History", "Score", "Comment"]], gr.update(value="") # Show only the required columns and clear the comment input box
212
+
213
+ # Function to load user guide from a file
214
+ def load_user_guide():
215
+ with open('user_guide.txt', 'r') as file:
216
+ return file.read()
217
+
218
+ # Combine both model dictionaries
219
+ all_models = {**models, **quantized_models}
220
+
221
+ # Create the chat interface using Gradio Blocks
222
+ with gr.Blocks() as demo:
223
+ with gr.Tabs():
224
+ with gr.TabItem("Chat"):
225
+ gr.Markdown("# Demo Chatbot V3")
226
+
227
+ gr.Markdown("## Context")
228
+ with gr.Group():
229
+ model_dropdown = gr.Dropdown(choices=list(all_models.keys()), label="Select Model", value=default_model_name)
230
+ user_dropdown = gr.Dropdown(choices=user_names, label="Select User Name")
231
+ initial_story = stories[0]["title"] if stories else None
232
+ story_dropdown = gr.Dropdown(choices=[story["title"] for story in stories], label="Select Story", value=initial_story)
233
+ system_prompt_dropdown = gr.Dropdown(choices=system_prompts, label="Select System Prompt", value=system_prompts[0])
234
+ send_story_button = gr.Button("Send Story")
235
+
236
+ gr.Markdown("## Chat")
237
+ with gr.Group():
238
+ selected_story_textbox = gr.Textbox(label="Selected Story", lines=10, interactive=False)
239
+ chatbot_output = gr.Chatbot(label="Chat History")
240
+ chatbot_input = gr.Textbox(placeholder="Type your message here...", label="User Input")
241
+ send_message_button = gr.Button("Send")
242
+
243
+ gr.Markdown("## Evaluation")
244
+ with gr.Group():
245
+ score_input = gr.Slider(minimum=0, maximum=5, step=1, label="Score")
246
+ comment_input = gr.Textbox(placeholder="Add a comment...", label="Comment")
247
+ save_button = gr.Button("Save Score and Comment")
248
+ data_table = gr.DataFrame(headers=["Chat History", "Score", "Comment"])
249
+
250
+ with gr.TabItem("User Guide"):
251
+ gr.Textbox(label="User Guide", value=load_user_guide(), lines=20)
252
+
253
+ chat_history_json = gr.JSON(value=[], visible=False)
254
+ interaction_count = gr.Number(value=0, visible=False)
255
+
256
+
257
+
258
+ send_story_button.click(fn=send_selected_story, inputs=[story_dropdown, model_dropdown, system_prompt_dropdown], outputs=[chatbot_output, chat_history_json, data_table, selected_story_textbox])
259
+ send_message_button.click(fn=interact, inputs=[chatbot_input, chat_history_json, interaction_count, model_dropdown], outputs=[chatbot_input, chatbot_output, chat_history_json, interaction_count])
260
+ save_button.click(fn=save_comment_score, inputs=[chatbot_output, score_input, comment_input, story_dropdown, user_dropdown, system_prompt_dropdown], outputs=[data_table, comment_input])
261
+
262
+ demo.launch()
config.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gspread
3
+ from oauth2client.service_account import ServiceAccountCredentials
4
+
5
+ # Read the authentication token from the environment variable
6
+ hugging_face_token = os.getenv("HUGGING_FACE_TOKEN")
7
+
8
+ # Google Sheets configuration
9
+ def init_google_sheets_client():
10
+ scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
11
+ creds = ServiceAccountCredentials.from_json_keyfile_name('tokyo-portal-326513-90aee094bab9.json', scope)
12
+ return gspread.authorize(creds)
13
+
14
+ # Google Sheets name
15
+ google_sheets_name = "Chatbot Test"
16
+
17
+ # Define available models
18
+ models = {
19
+ "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
20
+ "Llama-2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",
21
+ "Yi-6B-Chat": "01-ai/Yi-6B-Chat",
22
+ "Qwen2-7B-Instruct": "Qwen/Qwen2-7B-Instruct"
23
+ }
24
+ # List of models fine-tuned in 4-bit or 8-bit
25
+ quantized_models = {
26
+ "Llama-3-8B-Finetuning-Stories": "rodrisouza/Llama-3-8B-Finetuning-Stories",
27
+ }
28
+
29
+ # Default model name
30
+ default_model_name = "Meta-Llama-3-8B-Instruct"
31
+
32
+ # Define available user names
33
+ user_names = ["Laura Musto", "Brian Carpenter", "Germán Capdehourat", "Isabel Amigo", "Aiala Rosá", "Luis Chiruzzo", "Igancio Sastre", "Santiago Góngora", "Ignacio Remersaro", "Rodrigo Souza"]
34
+
35
+ MAX_INTERACTIONS = 5
requirements.txt CHANGED
@@ -1 +1,9 @@
1
- huggingface_hub==0.22.2
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub==0.22.2
2
+ minijinja
3
+ transformers
4
+ torch
5
+ pandas
6
+ gspread
7
+ oauth2client
8
+ accelerate
9
+ bitsandbytes
tokyo-portal-326513-90aee094bab9 (1).json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "tokyo-portal-326513",
4
+ "private_key_id": "90aee094bab930aedc4ad7bdb17626ef345ef57a",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDA9rv7k4RnEi9G\nngtf7+cyv7ReziihB22cz3oWBypfjLw9kwoKTRiBQJyltSWhTUaC+ZBQdLcVQ0Xh\nKgw8PXZ25Q1xkrvLYbR5TD4Id1yfGk0s9ymhLa/OqsUmpbaleRsLcfxETpz6dfIN\n4t7md1S5OusXMirWm7PXjBVpU+fp2eCkJtrSFEWqdZo4LkJ3s6oOQzI5GWac0ntX\ntYcdxfl9Ul/XqE23s65rXB/rHxx9+NMwgnT4QZRXOuYhJqLYP5TPpNsz7MTRf9pt\n7ZCkIJQpwwAcvDW0onenOCCYMn11ntDJICiJsdyaoREfodx2D4dhNyxw2dT73wtm\nJsLETzNbAgMBAAECggEAAZyuElp3ozLMMU9Vsf37lPl0BMy9GydIO6SiFPgHWxf+\nZJdLYK2nNEE5bcr3HwNZwPNbyMhI1UUkGs7ynjg3OqQMOjQrHHZa/x5rcD+rBus9\n1M/VybJrgxL7JakBvx0lZDGWFW4Asrj0hjgse4lorEl5OE7je7p+RKE3dpNMXzA+\nkPe7y5RG74cwIEg2C7y/WOQxuDEELqyse7HOXR6+BSVntb6eVhrt4xTn3KoGWxhE\nWd9maD3rMsSkmQtk45wWSvwdj6Pkfp/DL5DX2iIprFIpbveYaa907W6BD8Kz4xnA\nFS5EDdqCxqURJk9p/ADxjSPxnbmqVPrUK3KsHWkyIQKBgQDgeUgEY4/bJKtx12Sl\nw/acvGA6OxHNVNN4R8YLC1xDUg+csy09vXTZ6VcHqae417k+KXe3beBoE2k7mX5b\nZNShtIBkm/g0Ei4t80mgEaatLBddZghoiLJvHOCdpTmMfNt41KWzyr139PgT/KSK\n5iQnTsbxPUmjgS0h58KfM6CfOwKBgQDcEIu4IGXIXQ1qydXZgvkYte3nrjJw+ump\n0OklIvZjOcptgWAjguNr/6l5tqXnc3VhD5Mkk1hRHckUHYU93RoalTk/xGWoHlnx\n/rfZq/PMHNd4GoRl8GHWGAzviaS1Nqcz+8vwXW8Sg5mnoIFFrWkg6NN418k2FlLs\nZ1Hfuia6YQKBgDt+qaHQKZ6xl2cy6ZAt4j0kiHSml09OvkXQ8CKZPjjxlQW5T7Jf\ndosTF2gGhVeuhYB+SSaqzqGHE4siYaUjkl/RkX4ZAK38a5TuOINeakjXuNGea3gZ\nUwG0K6xc0GX16c2avthqUdFrch0KBypxWP+6F2x1DAF0CVxFKwY7DsVlAoGBAIOa\nAXQjK7seqp9qBFEHWh/E7HjYW6Hk60d6f3KN7fp43O2PqQaTh51WTutSpvQ6v79D\nqSL+WppnzZAR91R/nNS42Huh34kiXXeydA/gHPO7a7+PXA36Kwf1agb1sWINRQB5\nXARqW7oeqQztl2Eryuq7UXu6z8FgqEZnozbAA9kBAoGBAI2cycVnPOHfRo24KLsI\nOIhfkgUn2VPgGpBig1QVaGdi0fLsKPX7HnHbui9m5dXwgps/w8sUd/hwoCu0zR/C\nedYc+G1VmUikJ2fLXj3OX1axS4oGTg3ZuN+O4/GsA65xrTotIJBxp9RQz+HXTzlU\nBGg3nYWf2DB1d3oZ8sFU4T3S\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "[email protected]",
7
+ "client_id": "108995062104802726229",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/chatbot-test%40tokyo-portal-326513.iam.gserviceaccount.com",
12
+ "universe_domain": "googleapis.com"
13
+ }
user_guide.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ La aplicación está organizada en tres bloques principales: Contexto, Chat y Evaluación.
2
+
3
+ 1. Contexto
4
+ En este bloque, configuramos el contexto antes de interactuar con el chat.
5
+ 1. Seleccionar el modelo: Elige el modelo que deseas probar de la lista de modelos disponibles.
6
+ 2. Seleccionar el usuario: Elige el usuario que está realizando las pruebas. Es importante seleccionar el usuario correcto para guardar los resultados adecuadamente.
7
+ 3. Seleccionar el cuento: Elige el cuento que se trabajará con el modelo.
8
+ 4. Seleccionar el System Prompt: Configura el mensaje de sistema que guiará la interacción con el modelo.
9
+ 5. Enviar el cuento: Haz clic en "Send Story" para enviar el cuento seleccionado y configurar el contexto.
10
+
11
+ 2. Chat
12
+ En este bloque, interactuamos con el modelo de lenguaje.
13
+ 1. Visualizar el cuento seleccionado: En el campo “Selected Story” se muestra el cuento enviado anteriormente en el contexto.
14
+ 2. Escribir el mensaje: Introduce tu mensaje en el campo “User Input”.
15
+ 3. Enviar el mensaje: Haz clic en "Send" para enviar tu mensaje al modelo y recibir una respuesta.
16
+
17
+ 3. Evaluación
18
+ En este bloque, evaluamos la última interacción realizada entre el usuario y el chat.
19
+ 1. Asignar un puntaje: Selecciona el puntaje que deseas asignar al resultado de la interacción.
20
+ 2. Escribir un comentario: Introduce un comentario sobre el resultado de la interacción.
21
+ 3. Guardar la evaluación: Haz clic en "Save Score and Comment" para guardar el puntaje y el comentario.
22
+
23
+ Obs: La lista de cuentos y system prompts disponibles, así como los resultados, se encuentran en el siguiente link.
24
+ https://docs.google.com/spreadsheets/d/1EY0vYkzsrSYW5PNWDDTUHlL4ahTym3Wj1etxRHQY3c8/edit?usp=sharing