ignacio commited on
Commit
69df8a4
1 Parent(s): 6830e68

replicant y tres modelos

Browse files
Files changed (2) hide show
  1. app.py +49 -84
  2. config.py +7 -13
app.py CHANGED
@@ -4,8 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import pandas as pd
5
  from datetime import datetime, timedelta, timezone
6
  import torch
7
- from config import hugging_face_token, replicate_token, init_google_sheets_client, models, replicate_model, quantized_models, default_model_name, user_names, google_sheets_name, MAX_INTERACTIONS
8
- import spaces
9
  import replicate
10
 
11
  # Hack for ZeroGPU
@@ -47,19 +46,12 @@ replicate_api = replicate.Client(api_token=replicate_token)
47
  def load_model(model_name):
48
  global tokenizer, selected_model #model
49
  try:
50
- # Release the memory of the previous model if exists
51
- #no corresponde para API
52
- #if model is not None:
53
- # del model
54
- # torch.cuda.empty_cache()
55
 
56
- # Check if the model is in models or quantized_models and load accordingly
57
- if model_name in models:
58
- model_path = models[model_name]
59
- elif model_name in quantized_models:
60
- model_path = quantized_models[model_name]
61
  else:
62
- raise ValueError(f"Model {model_name} not found in either models or quantized_models.")
63
 
64
  tokenizer = AutoTokenizer.from_pretrained(
65
  model_path,
@@ -73,16 +65,6 @@ def load_model(model_name):
73
  tokenizer.pad_token = tokenizer.eos_token
74
  tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
75
 
76
- #model = AutoModelForCausalLM.from_pretrained(
77
- # model_path,
78
- # token=hugging_face_token,
79
- # trust_remote_code=True
80
- #)
81
- #
82
- ## Only move to CUDA if it's not a quantized model
83
- #if model_name not in quantized_models:
84
- # model = model.to("cuda")
85
- #
86
  selected_model = model_name
87
  except Exception as e:
88
  print(f"Error loading model {model_name}: {e}")
@@ -99,66 +81,49 @@ tokenizer = load_model(selected_model) #, model
99
  chat_history = []
100
 
101
  # Function to handle interaction with model
102
- #@spaces.GPU
103
  def interact(user_input, history, interaction_count, model_name):
104
- global tokenizer, model
105
- try:
106
- if tokenizer is None or model is None:
107
- raise ValueError("Tokenizer or model is not initialized.")
108
-
109
- ## Determine the device to use (either CUDA if available, or CPU)
110
- #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
111
- #
112
- ## Only move the model to the device if it's not a quantized model
113
- #if model_name not in quantized_models:
114
- # model = model.to(device)
115
-
116
- if interaction_count >= MAX_INTERACTIONS:
117
- user_input += ". Thank you for your questions. Our session is now over. Goodbye!"
118
-
119
- messages = history + [{"role": "user", "content": user_input}]
120
-
121
- # Ensure roles alternate correctly
122
- for i in range(1, len(messages)):
123
- if messages[i-1].get("role") == messages[i].get("role"):
124
- raise ValueError("Conversation roles must alternate user/assistant/user/assistant/...")
125
-
126
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
127
-
128
- # Move input tensor to the correct device
129
- #input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
130
-
131
- # Generate request
132
- inpt = {"prompt": prompt,
133
- "max_new_tokens": 100,
134
- "temperature": 0.1,
135
- "prompt_template": "{prompt}",}
136
- #"num_return_sequences": 1,
137
- #"pad_token_id": tokenizer.eos_token_id}
138
-
139
- #make request
140
- response = replicate_api.run(
141
- replicate_model[model],
142
- input=inpt
143
- )
144
- response = "".join(response).strip()
145
- #chat_history_ids = model.generate(input_ids, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id, temperature=0.1)
146
- #response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
147
-
148
- # Update chat history with generated response
149
- history.append({"role": "user", "content": user_input})
150
- history.append({"role": "assistant", "content": response})
151
-
152
- interaction_count += 1
153
-
154
- formatted_history = [(entry["content"], None) if entry["role"] == "user" else (None, entry["content"]) for entry in history if entry["role"] in ["user", "assistant"]]
155
-
156
- return "", formatted_history, history, interaction_count
157
- except Exception as e:
158
- if torch.cuda.is_available():
159
- torch.cuda.empty_cache()
160
- print(f"Error during interaction: {e}")
161
- raise gr.Error(f"An error occurred during interaction: {str(e)}")
162
 
163
 
164
  # Function to send selected story and initial message
@@ -168,7 +133,7 @@ def send_selected_story(title, model_name, system_prompt):
168
  global data # Ensure data is reset
169
  data = [] # Reset data for new story
170
  interaction_count = 1 # Reset interaction count for new story
171
- tokenizer, model = load_model(model_name) # Load the appropriate model
172
  selected_story = title
173
  for story in stories:
174
  if story["title"] == title:
@@ -237,7 +202,7 @@ def load_user_guide():
237
  return file.read()
238
 
239
  # Combine both model dictionaries
240
- all_models = {**models, **quantized_models}
241
 
242
  # Create the chat interface using Gradio Blocks
243
  with gr.Blocks() as demo:
 
4
  import pandas as pd
5
  from datetime import datetime, timedelta, timezone
6
  import torch
7
+ from config import hugging_face_token, replicate_token, init_google_sheets_client, huggingface_tokenizer, replicate_model, default_model_name, user_names, google_sheets_name, MAX_INTERACTIONS
 
8
  import replicate
9
 
10
  # Hack for ZeroGPU
 
46
  def load_model(model_name):
47
  global tokenizer, selected_model #model
48
  try:
 
 
 
 
 
49
 
50
+ # Check if the model is models and load accordingly
51
+ if model_name in huggingface_tokenizer:
52
+ model_path = huggingface_tokenizer[model_name]
 
 
53
  else:
54
+ raise ValueError(f"Model {model_name} not found in models")
55
 
56
  tokenizer = AutoTokenizer.from_pretrained(
57
  model_path,
 
65
  tokenizer.pad_token = tokenizer.eos_token
66
  tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
67
 
 
 
 
 
 
 
 
 
 
 
68
  selected_model = model_name
69
  except Exception as e:
70
  print(f"Error loading model {model_name}: {e}")
 
81
  chat_history = []
82
 
83
  # Function to handle interaction with model
 
84
  def interact(user_input, history, interaction_count, model_name):
85
+ #global tokenizer, model
86
+ tokenizer = load_model(model_name)
87
+
88
+ if tokenizer is None: #or model is None:
89
+ raise ValueError("Tokenizer or model is not initialized.")
90
+
91
+ if interaction_count >= MAX_INTERACTIONS:
92
+ user_input += ". Thank you for your questions. Our session is now over. Goodbye!"
93
+
94
+ messages = history + [{"role": "user", "content": user_input}]
95
+
96
+ # Ensure roles alternate correctly
97
+ for i in range(1, len(messages)):
98
+ if messages[i-1].get("role") == messages[i].get("role"):
99
+ raise ValueError("Conversation roles must alternate user/assistant/user/assistant/...")
100
+
101
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
102
+
103
+ # Generate request
104
+ inpt = {"prompt": prompt,
105
+ "max_new_tokens": 100,
106
+ "temperature": 0.1,
107
+ "prompt_template": "{prompt}",}
108
+ #"num_return_sequences": 1,
109
+ #"pad_token_id": tokenizer.eos_token_id}
110
+
111
+ #make request
112
+ response = replicate_api.run(
113
+ replicate_model[model_name],
114
+ input=inpt
115
+ )
116
+ response = "".join(response).strip()
117
+
118
+ # Update chat history with generated response
119
+ history.append({"role": "user", "content": user_input})
120
+ history.append({"role": "assistant", "content": response})
121
+
122
+ interaction_count += 1
123
+
124
+ formatted_history = [(entry["content"], None) if entry["role"] == "user" else (None, entry["content"]) for entry in history if entry["role"] in ["user", "assistant"]]
125
+
126
+ return "", formatted_history, history, interaction_count
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
 
129
  # Function to send selected story and initial message
 
133
  global data # Ensure data is reset
134
  data = [] # Reset data for new story
135
  interaction_count = 1 # Reset interaction count for new story
136
+ tokenizer = load_model(model_name) # Load the appropriate model
137
  selected_story = title
138
  for story in stories:
139
  if story["title"] == title:
 
202
  return file.read()
203
 
204
  # Combine both model dictionaries
205
+ all_models = {**huggingface_tokenizer}
206
 
207
  # Create the chat interface using Gradio Blocks
208
  with gr.Blocks() as demo:
config.py CHANGED
@@ -19,27 +19,21 @@ google_sheets_name = "Chatbot Test"
19
  huggingface_tokenizer = {
20
  "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
21
  "Llama-2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",
22
- #"Yi-6B-Chat": "01-ai/Yi-6B-Chat",
23
- #"Qwen2-7B-Instruct": "Qwen/Qwen2-7B-Instruct"
24
  }
25
 
26
  #Avaiable models for replicate
27
  replicate_model= {
28
- "Meta-Llama-3-8B-Instruct": "meta/Meta-Llama-3-8B-Instruct",
29
- "Llama-2-7B-Chat": "meta/Llama-2-7b-chat-hf",
30
- #"Yi-6B-Chat": "01-ai/yi-34b-chat",
31
- #"Qwen2-7B-Instruct": "Qwen/Qwen2-7B-Instruct"
32
  }
33
 
34
- # List of models fine-tuned in 4-bit or 8-bit
35
- #quantized_models = {
36
- # "Llama-3-8B-Finetuning-Stories": "rodrisouza/Llama-3-8B-Finetuning-Stories",
37
- #}
38
 
39
- # Default model name
40
- default_model_name = "Meta-Llama-3-8B-Instruct"
41
 
42
  # Define available user names
43
- user_names = ["Laura Musto", "Brian Carpenter", "Germán Capdehourat", "Isabel Amigo", "Aiala Rosá", "Luis Chiruzzo", "Igancio Sastre", "Santiago Góngora", "Ignacio Remersaro", "Rodrigo Souza"]
44
 
45
  MAX_INTERACTIONS = 5
 
19
  huggingface_tokenizer = {
20
  "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
21
  "Llama-2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",
22
+ "mistralai/mistral-7b-instruct-v0.2": "mistralai/Mistral-7B-Instruct-v0.2",
 
23
  }
24
 
25
  #Avaiable models for replicate
26
  replicate_model= {
27
+ "Meta-Llama-3-8B-Instruct": "meta/meta-llama-3-8b-instruct",
28
+ "Llama-2-7B-Chat": "meta/llama-2-7b-chat",
29
+ "mistralai/mistral-7b-instruct-v0.2": "mistralai/mistral-7b-instruct-v0.2",
 
30
  }
31
 
32
+ # Default model (first in list)
33
+ default_model_name = list(replicate_model.items())[0][0]
 
 
34
 
 
 
35
 
36
  # Define available user names
37
+ user_names = ["Laura Musto", "Brian Carpenter", "Germán Capdehourat", "Isabel Amigo", "Aiala Rosá", "Luis Chiruzzo", "Ignacio Sastre", "Santiago Góngora", "Ignacio Remersaro", "Rodrigo Souza"]
38
 
39
  MAX_INTERACTIONS = 5