Spaces:

PhantHive
/

Phearion-bigbrain-v0.0.1

Paused

Phearion-bigbrain-v0.0.1

File size: 1,000 Bytes

8194866
3bf71d2
 
 
8194866
d037b1c
 
3bf71d2
1116122
fcdc41c
1116122
3bf71d2
d037b1c
 
 
 
3bf71d2
fcdc41c
3bf71d2
 
d08a677
3bf71d2
 
1fdbfe6
3bf71d2
d037b1c
1fdbfe6
3bf71d2
d08a677
3bf71d2

import gradio as gr
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the model and config when the script starts
config = PeftConfig.from_pretrained("phearion/bigbrain-v0.0.1")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
model = PeftModel.from_pretrained(model, "phearion/bigbrain-v0.0.1")

# Move the model to the device
model = model.to(device)


# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")



def greet(text):
    batch = tokenizer(f"'{text}' ->: ", return_tensors='pt')
    
    # Use torch.no_grad to disable gradient calculation
    with torch.no_grad():
        output_tokens = model.generate(**batch, max_new_tokens=20)

    return tokenizer.decode(output_tokens[0], skip_special_tokens=True)


iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()