Spaces:

Johnny-Z
/

aes-test

Running

App Files Files Community

aes-test / app.py

Johnny-Z

Upload 2 files

d4c8d5e verified about 21 hours ago

raw

history blame contribute delete

3.28 kB

	import gradio as gr
	import numpy as np
	import torch
	from transformers import SiglipImageProcessor, SiglipVisionModel
	from PIL import Image
	from sklearn.metrics.pairwise import cosine_similarity
	import torch.nn as nn
	import torch.nn.functional as F

	device = torch.device('cpu')
	torch.set_num_threads(4)
	selected_model = SiglipVisionModel.from_pretrained('google/siglip-so400m-patch14-384', attn_implementation="sdpa" ).to(device)
	processor = SiglipImageProcessor.from_pretrained('google/siglip-so400m-patch14-384')

	class MLP(nn.Module):
	def __init__(self, input_size, xcol='emb', ycol='avg_rating'):
	super().__init__()
	self.input_size = input_size
	self.xcol = xcol
	self.ycol = ycol
	self.layers = nn.Sequential(
	nn.Linear(self.input_size, 2048),
	nn.LayerNorm(2048),
	nn.Mish(),
	nn.Dropout(0.2),
	nn.Linear(2048, 512),
	nn.LayerNorm(512),
	nn.Mish(),
	nn.Linear(512, 128),
	nn.LayerNorm(128),
	nn.Mish(),
	nn.Linear(128, 1)
	)

	def forward(self, x):
	return self.layers(x)

	mlp = MLP(1152)
	mlp.load_state_dict(torch.load("./aesthetic_predictor_siglip_huber_v1_ad_mlp_ep20.pth", map_location=torch.device('cpu')))
	mlp.to(device).eval()

	def normalized(a, axis=-1, order=2):
	l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
	l2[l2 == 0] = 1
	return a / np.expand_dims(l2, axis)

	def process_image(image, processor, model, device):
	images = image.convert('RGBA')
	background = Image.new('RGBA', images.size, (255, 255, 255, 255))
	images = Image.alpha_composite(background, images).convert('RGB')

	inputs = processor(images, return_tensors="pt").to(device)

	with torch.no_grad():
	outputs = model(**inputs)
	pooler_output = outputs.pooler_output
	im_emb_arr = pooler_output.cpu().detach().numpy()
	im_emb_arr = normalized(im_emb_arr)
	prediction = mlp(torch.from_numpy(im_emb_arr).to(device).type(torch.FloatTensor))
	prediction_value = prediction.item()
	return im_emb_arr, prediction_value


	def infer(image1, image2):
	try:

	features1, prediction_value1 = process_image(image1, processor, selected_model, device)
	features2, prediction_value2 = process_image(image2, processor, selected_model, device)

	cos_sim_features = cosine_similarity(features1, features2)[0][0]

	return cos_sim_features, prediction_value1, prediction_value2
	except Exception as e:
	print(f"Error during inference: {e}")
	return "Error", "Error", "Error"


	with gr.Blocks() as iface:
	gr.Markdown("# Image Aesthetic Predictor\nUpload two images to calculate aesthetic score.")
	with gr.Row():
	image1 = gr.Image(type="pil")
	image2 = gr.Image(type="pil")
	with gr.Row():
	prediction1 = gr.Textbox(label="Aesthetic Score 1")
	prediction2 = gr.Textbox(label="Aesthetic Score 2")
	with gr.Row():
	feature_similarity = gr.Textbox(label="Feature Similarity")
	with gr.Row():
	submit_btn = gr.Button("Submit")

	submit_btn.click(infer, inputs=[image1, image2], outputs=[feature_similarity, prediction1, prediction2])

	iface.queue(max_size=10)
	iface.launch()