Spaces:

mrcuddle
/

Dark-Hermes3.2

Sleeping

mrcuddle commited on Jan 23

Commit

0c92bcf

verified ·

1 Parent(s): 2990998

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,10 +4,11 @@ import torch
 import spaces
 # Load the model and tokenizer
-model_name = "mrcuddle/DarkHermes3-Llama3.2-3B-Instruct"
 device = "cuda" if torch.cuda.is_available() else "cpu"  # Detect GPU or default to CPU
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name).to(device)  # Move model to the appropriate device
 model.eval()  # Ensure the model is in evaluation mode
 # Define the system prompt
@@ -30,8 +31,8 @@ def chatbot(message, history):
     conversation += "".join([f"User: {msg}\nBot: {resp}\n" for msg, resp in history])
     conversation += f"User: {message}\nBot:"
-    # Tokenize the input and move it to the correct device
-    inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024).to(device)
     # Generate a response
     outputs = model.generate(

 import spaces
 # Load the model and tokenizer
+model_name = "mrcuddle/Dark-Hermes3-Llama3.2-3B"
 device = "cuda" if torch.cuda.is_available() else "cpu"  # Detect GPU or default to CPU
+dtype = torch.bfloat16 if device == "cuda" else torch.float32  # Use bfloat16 for mixed precision on GPU
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=dtype).to(device)  # Ensure model is on the correct device
 model.eval()  # Ensure the model is in evaluation mode
 # Define the system prompt
     conversation += "".join([f"User: {msg}\nBot: {resp}\n" for msg, resp in history])
     conversation += f"User: {message}\nBot:"
+    # Tokenize the input and move it to the correct device and dtype
+    inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024).to(device, dtype=dtype)
     # Generate a response
     outputs = model.generate(