Spaces:
Sleeping
Sleeping
ignacio
commited on
Commit
•
69df8a4
1
Parent(s):
6830e68
replicant y tres modelos
Browse files
app.py
CHANGED
@@ -4,8 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
4 |
import pandas as pd
|
5 |
from datetime import datetime, timedelta, timezone
|
6 |
import torch
|
7 |
-
from config import hugging_face_token, replicate_token, init_google_sheets_client,
|
8 |
-
import spaces
|
9 |
import replicate
|
10 |
|
11 |
# Hack for ZeroGPU
|
@@ -47,19 +46,12 @@ replicate_api = replicate.Client(api_token=replicate_token)
|
|
47 |
def load_model(model_name):
|
48 |
global tokenizer, selected_model #model
|
49 |
try:
|
50 |
-
# Release the memory of the previous model if exists
|
51 |
-
#no corresponde para API
|
52 |
-
#if model is not None:
|
53 |
-
# del model
|
54 |
-
# torch.cuda.empty_cache()
|
55 |
|
56 |
-
# Check if the model is
|
57 |
-
if model_name in
|
58 |
-
model_path =
|
59 |
-
elif model_name in quantized_models:
|
60 |
-
model_path = quantized_models[model_name]
|
61 |
else:
|
62 |
-
raise ValueError(f"Model {model_name} not found in
|
63 |
|
64 |
tokenizer = AutoTokenizer.from_pretrained(
|
65 |
model_path,
|
@@ -73,16 +65,6 @@ def load_model(model_name):
|
|
73 |
tokenizer.pad_token = tokenizer.eos_token
|
74 |
tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
|
75 |
|
76 |
-
#model = AutoModelForCausalLM.from_pretrained(
|
77 |
-
# model_path,
|
78 |
-
# token=hugging_face_token,
|
79 |
-
# trust_remote_code=True
|
80 |
-
#)
|
81 |
-
#
|
82 |
-
## Only move to CUDA if it's not a quantized model
|
83 |
-
#if model_name not in quantized_models:
|
84 |
-
# model = model.to("cuda")
|
85 |
-
#
|
86 |
selected_model = model_name
|
87 |
except Exception as e:
|
88 |
print(f"Error loading model {model_name}: {e}")
|
@@ -99,66 +81,49 @@ tokenizer = load_model(selected_model) #, model
|
|
99 |
chat_history = []
|
100 |
|
101 |
# Function to handle interaction with model
|
102 |
-
#@spaces.GPU
|
103 |
def interact(user_input, history, interaction_count, model_name):
|
104 |
-
global tokenizer, model
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
#response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
|
147 |
-
|
148 |
-
# Update chat history with generated response
|
149 |
-
history.append({"role": "user", "content": user_input})
|
150 |
-
history.append({"role": "assistant", "content": response})
|
151 |
-
|
152 |
-
interaction_count += 1
|
153 |
-
|
154 |
-
formatted_history = [(entry["content"], None) if entry["role"] == "user" else (None, entry["content"]) for entry in history if entry["role"] in ["user", "assistant"]]
|
155 |
-
|
156 |
-
return "", formatted_history, history, interaction_count
|
157 |
-
except Exception as e:
|
158 |
-
if torch.cuda.is_available():
|
159 |
-
torch.cuda.empty_cache()
|
160 |
-
print(f"Error during interaction: {e}")
|
161 |
-
raise gr.Error(f"An error occurred during interaction: {str(e)}")
|
162 |
|
163 |
|
164 |
# Function to send selected story and initial message
|
@@ -168,7 +133,7 @@ def send_selected_story(title, model_name, system_prompt):
|
|
168 |
global data # Ensure data is reset
|
169 |
data = [] # Reset data for new story
|
170 |
interaction_count = 1 # Reset interaction count for new story
|
171 |
-
tokenizer
|
172 |
selected_story = title
|
173 |
for story in stories:
|
174 |
if story["title"] == title:
|
@@ -237,7 +202,7 @@ def load_user_guide():
|
|
237 |
return file.read()
|
238 |
|
239 |
# Combine both model dictionaries
|
240 |
-
all_models = {**
|
241 |
|
242 |
# Create the chat interface using Gradio Blocks
|
243 |
with gr.Blocks() as demo:
|
|
|
4 |
import pandas as pd
|
5 |
from datetime import datetime, timedelta, timezone
|
6 |
import torch
|
7 |
+
from config import hugging_face_token, replicate_token, init_google_sheets_client, huggingface_tokenizer, replicate_model, default_model_name, user_names, google_sheets_name, MAX_INTERACTIONS
|
|
|
8 |
import replicate
|
9 |
|
10 |
# Hack for ZeroGPU
|
|
|
46 |
def load_model(model_name):
|
47 |
global tokenizer, selected_model #model
|
48 |
try:
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
+
# Check if the model is models and load accordingly
|
51 |
+
if model_name in huggingface_tokenizer:
|
52 |
+
model_path = huggingface_tokenizer[model_name]
|
|
|
|
|
53 |
else:
|
54 |
+
raise ValueError(f"Model {model_name} not found in models")
|
55 |
|
56 |
tokenizer = AutoTokenizer.from_pretrained(
|
57 |
model_path,
|
|
|
65 |
tokenizer.pad_token = tokenizer.eos_token
|
66 |
tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
selected_model = model_name
|
69 |
except Exception as e:
|
70 |
print(f"Error loading model {model_name}: {e}")
|
|
|
81 |
chat_history = []
|
82 |
|
83 |
# Function to handle interaction with model
|
|
|
84 |
def interact(user_input, history, interaction_count, model_name):
|
85 |
+
#global tokenizer, model
|
86 |
+
tokenizer = load_model(model_name)
|
87 |
+
|
88 |
+
if tokenizer is None: #or model is None:
|
89 |
+
raise ValueError("Tokenizer or model is not initialized.")
|
90 |
+
|
91 |
+
if interaction_count >= MAX_INTERACTIONS:
|
92 |
+
user_input += ". Thank you for your questions. Our session is now over. Goodbye!"
|
93 |
+
|
94 |
+
messages = history + [{"role": "user", "content": user_input}]
|
95 |
+
|
96 |
+
# Ensure roles alternate correctly
|
97 |
+
for i in range(1, len(messages)):
|
98 |
+
if messages[i-1].get("role") == messages[i].get("role"):
|
99 |
+
raise ValueError("Conversation roles must alternate user/assistant/user/assistant/...")
|
100 |
+
|
101 |
+
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
102 |
+
|
103 |
+
# Generate request
|
104 |
+
inpt = {"prompt": prompt,
|
105 |
+
"max_new_tokens": 100,
|
106 |
+
"temperature": 0.1,
|
107 |
+
"prompt_template": "{prompt}",}
|
108 |
+
#"num_return_sequences": 1,
|
109 |
+
#"pad_token_id": tokenizer.eos_token_id}
|
110 |
+
|
111 |
+
#make request
|
112 |
+
response = replicate_api.run(
|
113 |
+
replicate_model[model_name],
|
114 |
+
input=inpt
|
115 |
+
)
|
116 |
+
response = "".join(response).strip()
|
117 |
+
|
118 |
+
# Update chat history with generated response
|
119 |
+
history.append({"role": "user", "content": user_input})
|
120 |
+
history.append({"role": "assistant", "content": response})
|
121 |
+
|
122 |
+
interaction_count += 1
|
123 |
+
|
124 |
+
formatted_history = [(entry["content"], None) if entry["role"] == "user" else (None, entry["content"]) for entry in history if entry["role"] in ["user", "assistant"]]
|
125 |
+
|
126 |
+
return "", formatted_history, history, interaction_count
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
|
129 |
# Function to send selected story and initial message
|
|
|
133 |
global data # Ensure data is reset
|
134 |
data = [] # Reset data for new story
|
135 |
interaction_count = 1 # Reset interaction count for new story
|
136 |
+
tokenizer = load_model(model_name) # Load the appropriate model
|
137 |
selected_story = title
|
138 |
for story in stories:
|
139 |
if story["title"] == title:
|
|
|
202 |
return file.read()
|
203 |
|
204 |
# Combine both model dictionaries
|
205 |
+
all_models = {**huggingface_tokenizer}
|
206 |
|
207 |
# Create the chat interface using Gradio Blocks
|
208 |
with gr.Blocks() as demo:
|
config.py
CHANGED
@@ -19,27 +19,21 @@ google_sheets_name = "Chatbot Test"
|
|
19 |
huggingface_tokenizer = {
|
20 |
"Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
|
21 |
"Llama-2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",
|
22 |
-
|
23 |
-
#"Qwen2-7B-Instruct": "Qwen/Qwen2-7B-Instruct"
|
24 |
}
|
25 |
|
26 |
#Avaiable models for replicate
|
27 |
replicate_model= {
|
28 |
-
"Meta-Llama-3-8B-Instruct": "meta/
|
29 |
-
"Llama-2-7B-Chat": "meta/
|
30 |
-
|
31 |
-
#"Qwen2-7B-Instruct": "Qwen/Qwen2-7B-Instruct"
|
32 |
}
|
33 |
|
34 |
-
#
|
35 |
-
|
36 |
-
# "Llama-3-8B-Finetuning-Stories": "rodrisouza/Llama-3-8B-Finetuning-Stories",
|
37 |
-
#}
|
38 |
|
39 |
-
# Default model name
|
40 |
-
default_model_name = "Meta-Llama-3-8B-Instruct"
|
41 |
|
42 |
# Define available user names
|
43 |
-
user_names = ["Laura Musto", "Brian Carpenter", "Germán Capdehourat", "Isabel Amigo", "Aiala Rosá", "Luis Chiruzzo", "
|
44 |
|
45 |
MAX_INTERACTIONS = 5
|
|
|
19 |
huggingface_tokenizer = {
|
20 |
"Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
|
21 |
"Llama-2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",
|
22 |
+
"mistralai/mistral-7b-instruct-v0.2": "mistralai/Mistral-7B-Instruct-v0.2",
|
|
|
23 |
}
|
24 |
|
25 |
#Avaiable models for replicate
|
26 |
replicate_model= {
|
27 |
+
"Meta-Llama-3-8B-Instruct": "meta/meta-llama-3-8b-instruct",
|
28 |
+
"Llama-2-7B-Chat": "meta/llama-2-7b-chat",
|
29 |
+
"mistralai/mistral-7b-instruct-v0.2": "mistralai/mistral-7b-instruct-v0.2",
|
|
|
30 |
}
|
31 |
|
32 |
+
# Default model (first in list)
|
33 |
+
default_model_name = list(replicate_model.items())[0][0]
|
|
|
|
|
34 |
|
|
|
|
|
35 |
|
36 |
# Define available user names
|
37 |
+
user_names = ["Laura Musto", "Brian Carpenter", "Germán Capdehourat", "Isabel Amigo", "Aiala Rosá", "Luis Chiruzzo", "Ignacio Sastre", "Santiago Góngora", "Ignacio Remersaro", "Rodrigo Souza"]
|
38 |
|
39 |
MAX_INTERACTIONS = 5
|