SITCCSA / README.md
KameronB's picture
Fixed the load_state_dict_from_url statement
8de7b7f verified
metadata
license: mit
import torch
from torch import nn
from transformers import RobertaConfig, RobertaModel, RobertaTokenizer
import torch.optim as optim


class HelpdeskReviewModel(nn.Module):
    def __init__(self):
        super(HelpdeskReviewModel, self).__init__()
        self.bert = RobertaModel.from_pretrained('roberta-base')
        self.drop = nn.Dropout(p=0.2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(self.bert.config.hidden_size, 512)  # First fully connected layer
        self.fc2 = nn.Linear(512, 256)  # Second fully connected layer
        self.fc3 = nn.Linear(256, 128)  # Third fully connected layer

        # Activation function
        self.relu = nn.ReLU()
        
        self.output = nn.Linear(128, 4)  # 4 outputs
        self.sigmoid = nn.Sigmoid()

        state_dict = torch.hub.load_state_dict_from_url(f"https://huggingface.co/KameronB/SITCCSA/resolve/main/pytorch_model.bin", model_dir="./model", file_name="sitccsa_pytorch_model.bin")
        # if running on cpu
        # state_dict = torch.hub.load_state_dict_from_url(f"https://huggingface.co/KameronB/SITCCSA/resolve/main/pytorch_model.bin", model_dir="./model", file_name="sitccsa_pytorch_model.bin", map_location=torch.device('cpu'))


        self.load_state_dict(state_dict)

    def forward(self, input_ids, attention_mask):
        _, pooled_output = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=False
        )
        output = self.drop(pooled_output)
        
        # apply new hidden layers
        output = self.relu(self.fc1(output))
        output = self.relu(self.fc2(output))
        output = self.relu(self.fc3(output))

        return self.sigmoid(self.output(output))

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
model = HelpdeskReviewModel()


def make_prediction(model, tokenizer, text, max_length=512):
    # Tokenize the input text and convert to tensor
    inputs = tokenizer.encode_plus(
        text,
        add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
        max_length=max_length,    # Pad & truncate all sentences.
        padding='max_length',
        truncation=True,
        return_tensors="pt"       # Return PyTorch tensors.
    )

    # Move tensors to the same device as model
    input_ids = inputs['input_ids']
    attention_mask = inputs['attention_mask']
    if torch.cuda.is_available():
        input_ids = input_ids.cuda()
        attention_mask = attention_mask.cuda()
        model = model.cuda()

    # Make prediction
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)

    # Return probabilities
    return outputs.cpu().numpy()[0]  # Return to CPU and convert to numpy array if not running on CPU

# Example usage of the function
texts = [
    "This Agent is TERRIBLE!: The agent I spoke to on the phone did not seem to have any idea of what he was doing.",
    "Excellent work!: The tech that installed my software was amazing! Thank you!",
    "Good Work: The person who anwsered the phone did a pretty good job. It took a bit longer than I would have liked, but they got the job done.",
    "Bad Computer: My Computer is a piece of junk!!!",
    "Poor Service: I sent David and email and it took him over 30 seconds to respond. The service is so slow that I missed the solar eclipse.",
    "Very Slow: The technician was very slow.",
    "Thank you!: Stanley did a great job installing my software!",
    "You need better training: These agents need better training, they cant even seem to do simple troubleshooting.",
    "The technician threatened my life: The technician threatened my life!"
]
for text in texts:
    probabilities = make_prediction(model, tokenizer, text)
    print(probabilities)