chatbot-educativo

Sleeping

App Files Files Community

ignacio commited on 27 days ago

Commit

69df8a4

•

1 Parent(s): 6830e68

replicant y tres modelos

Browse files

Files changed (2) hide show

app.py +49 -84
config.py +7 -13

app.py CHANGED Viewed

@@ -4,8 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import pandas as pd
 from datetime import datetime, timedelta, timezone
 import torch
-from config import hugging_face_token, replicate_token, init_google_sheets_client, models, replicate_model, quantized_models, default_model_name, user_names, google_sheets_name, MAX_INTERACTIONS
-import spaces
 import replicate
 # Hack for ZeroGPU
@@ -47,19 +46,12 @@ replicate_api = replicate.Client(api_token=replicate_token)
 def load_model(model_name):
     global tokenizer, selected_model #model
     try:
-        # Release the memory of the previous model if exists
-        #no corresponde para API
-        #if model is not None:
-        #    del model
-        #    torch.cuda.empty_cache()
-        # Check if the model is in models or quantized_models and load accordingly
-        if model_name in models:
-            model_path = models[model_name]
-        elif model_name in quantized_models:
-            model_path = quantized_models[model_name]
         else:
-            raise ValueError(f"Model {model_name} not found in either models or quantized_models.")
         tokenizer = AutoTokenizer.from_pretrained(
             model_path,
@@ -73,16 +65,6 @@ def load_model(model_name):
             tokenizer.pad_token = tokenizer.eos_token
             tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
-        #model = AutoModelForCausalLM.from_pretrained(
-        #    model_path,
-        #    token=hugging_face_token,
-        #    trust_remote_code=True
-        #)
-        #
-        ## Only move to CUDA if it's not a quantized model
-        #if model_name not in quantized_models:
-        #    model = model.to("cuda")
-        #
         selected_model = model_name
     except Exception as e:
         print(f"Error loading model {model_name}: {e}")
@@ -99,66 +81,49 @@ tokenizer = load_model(selected_model) #, model
 chat_history = []
 # Function to handle interaction with model
-#@spaces.GPU
 def interact(user_input, history, interaction_count, model_name):
-    global tokenizer, model
-    try:
-        if tokenizer is None or model is None:
-            raise ValueError("Tokenizer or model is not initialized.")
-        ## Determine the device to use (either CUDA if available, or CPU)
-        #device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        #
-        ## Only move the model to the device if it's not a quantized model
-        #if model_name not in quantized_models:
-        #    model = model.to(device)
-        if interaction_count >= MAX_INTERACTIONS:
-            user_input += ". Thank you for your questions. Our session is now over. Goodbye!"
-        messages = history + [{"role": "user", "content": user_input}]
-        # Ensure roles alternate correctly
-        for i in range(1, len(messages)):
-            if messages[i-1].get("role") == messages[i].get("role"):
-                raise ValueError("Conversation roles must alternate user/assistant/user/assistant/...")
-        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        # Move input tensor to the correct device
-        #input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
-        # Generate request
-        inpt = {"prompt": prompt,
-                 "max_new_tokens": 100,
-                 "temperature": 0.1,
-                 "prompt_template": "{prompt}",}
-                 #"num_return_sequences": 1,
-                 #"pad_token_id": tokenizer.eos_token_id}
-        #make request
-        response = replicate_api.run(
-            replicate_model[model],
-            input=inpt
-        )
-        response = "".join(response).strip()
-        #chat_history_ids = model.generate(input_ids, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id, temperature=0.1)
-        #response = tokenizer.decode(chat_history_ids[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
-        # Update chat history with generated response
-        history.append({"role": "user", "content": user_input})
-        history.append({"role": "assistant", "content": response})
-        interaction_count += 1
-        formatted_history = [(entry["content"], None) if entry["role"] == "user" else (None, entry["content"]) for entry in history if entry["role"] in ["user", "assistant"]]
-        return "", formatted_history, history, interaction_count
-    except Exception as e:
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        print(f"Error during interaction: {e}")
-        raise gr.Error(f"An error occurred during interaction: {str(e)}")
 # Function to send selected story and initial message
@@ -168,7 +133,7 @@ def send_selected_story(title, model_name, system_prompt):
     global data  # Ensure data is reset
     data = []  # Reset data for new story
     interaction_count = 1  # Reset interaction count for new story
-    tokenizer, model = load_model(model_name)  # Load the appropriate model
     selected_story = title
     for story in stories:
         if story["title"] == title:
@@ -237,7 +202,7 @@ def load_user_guide():
         return file.read()
 # Combine both model dictionaries
-all_models = {**models, **quantized_models}
 # Create the chat interface using Gradio Blocks
 with gr.Blocks() as demo:

 import pandas as pd
 from datetime import datetime, timedelta, timezone
 import torch
+from config import hugging_face_token, replicate_token, init_google_sheets_client, huggingface_tokenizer, replicate_model, default_model_name, user_names, google_sheets_name, MAX_INTERACTIONS
 import replicate
 # Hack for ZeroGPU
 def load_model(model_name):
     global tokenizer, selected_model #model
     try:
+        # Check if the model is models and load accordingly
+        if model_name in huggingface_tokenizer:
+            model_path = huggingface_tokenizer[model_name]
         else:
+            raise ValueError(f"Model {model_name} not found in models")
         tokenizer = AutoTokenizer.from_pretrained(
             model_path,
             tokenizer.pad_token = tokenizer.eos_token
             tokenizer.add_special_tokens({'pad_token': tokenizer.eos_token})
         selected_model = model_name
     except Exception as e:
         print(f"Error loading model {model_name}: {e}")
 chat_history = []
 # Function to handle interaction with model
 def interact(user_input, history, interaction_count, model_name):
+    #global tokenizer, model
+    tokenizer = load_model(model_name)
+    if tokenizer is None: #or model is None:
+        raise ValueError("Tokenizer or model is not initialized.")
+    if interaction_count >= MAX_INTERACTIONS:
+        user_input += ". Thank you for your questions. Our session is now over. Goodbye!"
+    messages = history + [{"role": "user", "content": user_input}]
+    # Ensure roles alternate correctly
+    for i in range(1, len(messages)):
+        if messages[i-1].get("role") == messages[i].get("role"):
+            raise ValueError("Conversation roles must alternate user/assistant/user/assistant/...")
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    # Generate request
+    inpt = {"prompt": prompt,
+             "max_new_tokens": 100,
+             "temperature": 0.1,
+             "prompt_template": "{prompt}",}
+             #"num_return_sequences": 1,
+             #"pad_token_id": tokenizer.eos_token_id}
+    #make request
+    response = replicate_api.run(
+        replicate_model[model_name],
+        input=inpt
+    )
+    response = "".join(response).strip()
+    # Update chat history with generated response
+    history.append({"role": "user", "content": user_input})
+    history.append({"role": "assistant", "content": response})
+    interaction_count += 1
+    formatted_history = [(entry["content"], None) if entry["role"] == "user" else (None, entry["content"]) for entry in history if entry["role"] in ["user", "assistant"]]
+    return "", formatted_history, history, interaction_count
 # Function to send selected story and initial message
     global data  # Ensure data is reset
     data = []  # Reset data for new story
     interaction_count = 1  # Reset interaction count for new story
+    tokenizer = load_model(model_name)  # Load the appropriate model
     selected_story = title
     for story in stories:
         if story["title"] == title:
         return file.read()
 # Combine both model dictionaries
+all_models = {**huggingface_tokenizer}
 # Create the chat interface using Gradio Blocks
 with gr.Blocks() as demo:

config.py CHANGED Viewed

@@ -19,27 +19,21 @@ google_sheets_name = "Chatbot Test"
 huggingface_tokenizer = {
     "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
     "Llama-2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",
-    #"Yi-6B-Chat": "01-ai/Yi-6B-Chat",
-    #"Qwen2-7B-Instruct": "Qwen/Qwen2-7B-Instruct"
 }
 #Avaiable models for replicate
 replicate_model= {
-    "Meta-Llama-3-8B-Instruct": "meta/Meta-Llama-3-8B-Instruct",
-    "Llama-2-7B-Chat": "meta/Llama-2-7b-chat-hf",
-    #"Yi-6B-Chat": "01-ai/yi-34b-chat",
-    #"Qwen2-7B-Instruct": "Qwen/Qwen2-7B-Instruct"
 }
-# List of models fine-tuned in 4-bit or 8-bit
-#quantized_models = {
-#    "Llama-3-8B-Finetuning-Stories": "rodrisouza/Llama-3-8B-Finetuning-Stories",
-#}
-# Default model name
-default_model_name = "Meta-Llama-3-8B-Instruct"
 # Define available user names
-user_names = ["Laura Musto", "Brian Carpenter", "Germán Capdehourat", "Isabel Amigo", "Aiala Rosá", "Luis Chiruzzo", "Igancio Sastre", "Santiago Góngora", "Ignacio Remersaro", "Rodrigo Souza"]
 MAX_INTERACTIONS = 5

 huggingface_tokenizer = {
     "Meta-Llama-3-8B-Instruct": "meta-llama/Meta-Llama-3-8B-Instruct",
     "Llama-2-7B-Chat": "meta-llama/Llama-2-7b-chat-hf",
+    "mistralai/mistral-7b-instruct-v0.2": "mistralai/Mistral-7B-Instruct-v0.2",
 }
 #Avaiable models for replicate
 replicate_model= {
+    "Meta-Llama-3-8B-Instruct": "meta/meta-llama-3-8b-instruct",
+    "Llama-2-7B-Chat": "meta/llama-2-7b-chat",
+    "mistralai/mistral-7b-instruct-v0.2": "mistralai/mistral-7b-instruct-v0.2",
 }
+# Default model (first in list)
+default_model_name = list(replicate_model.items())[0][0]
 # Define available user names
+user_names = ["Laura Musto", "Brian Carpenter", "Germán Capdehourat", "Isabel Amigo", "Aiala Rosá", "Luis Chiruzzo", "Ignacio Sastre", "Santiago Góngora", "Ignacio Remersaro", "Rodrigo Souza"]
 MAX_INTERACTIONS = 5