ahxxm/ahxxm-image-classifier

Trained on images labeled by myself.
2-step inference:
from PIL import Image
import torch
import torch.nn as nn
import uform

path = "image.jpg"

# generate 768 dimension embeddings for an image
uf_model = uform.get_model("unum-cloud/uform-vl-english")
img = Image.open(path)
image_data = uf_model.preprocess_image(img)
text_data = uf_model.preprocess_text(path.name) # filename not necessarily useful, but encode anyway
memb = uf_model.encode_multimodal(image=image_data, text=text_data)
memb = memb.detach().numpy()
assert memb.shape == (1, 768)

# load model, infer and Sigmoid
class ImageBinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(768, 2048),
            nn.ReLU(),
            nn.Linear(2048, 2048),
            nn.ReLU(),
            nn.Linear(2048, 2048),
            nn.ReLU(),
            nn.Linear(2048, 256),
            nn.ReLU(),
            nn.Linear(256, 1),
        )
    def forward(self, x):
        return self.layers(x)

saved_model = ImageBinaryClassifier()
saved_model.load_state_dict(torch.load("model.pt", map_location=torch.device('cpu')))
saved_model = torch.compile(saved_model)
saved_model.eval()

prob = float(torch.sigmoid(saved_model(memb)))