Geonmo commited on
Commit
ddf780e
·
1 Parent(s): 88b7229
Files changed (4) hide show
  1. app.py +35 -10
  2. example1.jpg +0 -0
  3. example2.jpg +0 -0
  4. example3.jpg +0 -0
app.py CHANGED
@@ -61,13 +61,15 @@ def normalized(a, axis=-1, order=2):
61
 
62
  def load_models():
63
  model = MLP(768)
64
- s = torch.load("sac+logos+ava1-l14-linearMSE.pth")
 
 
 
65
 
66
  model.load_state_dict(s)
67
- model.to("cuda")
68
  model.eval()
69
 
70
- device = "cuda" if torch.cuda.is_available() else "cpu"
71
  model2, preprocess = clip.load("ViT-L/14", device=device)
72
 
73
  model_dict = {}
@@ -82,8 +84,14 @@ def predict(image):
82
  image_input = model_dict['clip_preprocess'](image).unsqueeze(0).to(model_dict['device'])
83
  with torch.no_grad():
84
  image_features = model_dict['clip_model'].encode_image(image_input)
85
- im_emb_arr = normalized(image_features.detach().cpu().numpy())
86
- prediction = model_dict['classifier'](torch.from_numpy(im_emb_arr).to(model_dict['device']).type(torch.cuda.FloatTensor))
 
 
 
 
 
 
87
  score = prediction.item()
88
 
89
  return {'aesthetic score': score}
@@ -101,8 +109,25 @@ if __name__ == '__main__':
101
 
102
  title = 'image aesthetic predictor'
103
 
104
- gr.Interface(predict,
105
- inputs,
106
- outputs,
107
- title=title,
108
- ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  def load_models():
63
  model = MLP(768)
64
+
65
+ device = "cuda" if torch.cuda.is_available() else "cpu"
66
+
67
+ s = torch.load("sac+logos+ava1-l14-linearMSE.pth", map_location=device)
68
 
69
  model.load_state_dict(s)
70
+ model.to(device)
71
  model.eval()
72
 
 
73
  model2, preprocess = clip.load("ViT-L/14", device=device)
74
 
75
  model_dict = {}
 
84
  image_input = model_dict['clip_preprocess'](image).unsqueeze(0).to(model_dict['device'])
85
  with torch.no_grad():
86
  image_features = model_dict['clip_model'].encode_image(image_input)
87
+ if model_dict['device'] == 'cuda':
88
+ im_emb_arr = normalized(image_features.detach().cpu().numpy())
89
+ im_emb = torch.from_numpy(im_emb_arr).to(model_dict['device']).type(torch.cuda.FloatTensor)
90
+ else:
91
+ im_emb_arr = normalized(image_features.detach().numpy())
92
+ im_emb = torch.from_numpy(im_emb_arr).to(model_dict['device']).type(torch.FloatTensor)
93
+
94
+ prediction = model_dict['classifier'](im_emb)
95
  score = prediction.item()
96
 
97
  return {'aesthetic score': score}
 
109
 
110
  title = 'image aesthetic predictor'
111
 
112
+ examples = ['example1.jpg', 'example2.jpg', 'example3.jpg']
113
+
114
+ description = """
115
+ # Image Aesthetic Predictor Demo
116
+ This model (Image Aesthetic Predictor) is trained by LAION Team. See [https://github.com/christophschuhmann/improved-aesthetic-predictor](https://github.com/christophschuhmann/improved-aesthetic-predictor)
117
+ 1. This model is desgined by adding five MLP layers on top of (frozen) CLIP ViT-L/14 and only the MLP layers are fine-tuned with a lot of images by a regression loss term such as MSE and MAE.
118
+ 2. Output is bounded from 0 to 10. The higher the better.
119
+ """
120
+
121
+ article = "<p style='text-align: center'><a href='https://laion.ai/blog/laion-aesthetics/'>LAION aeshetics blog post</a></p>"
122
+
123
+ with gr.Blocks() as demo:
124
+ gr.Markdown(description)
125
+ with gr.Row():
126
+ with gr.Column():
127
+ image_input = gr.Image(type='pil', label='Input image')
128
+ submit_botton = gr.Button('Submit')
129
+ json_output = gr.JSON(label='Output')
130
+ submit_botton.click(predict, inputs=image_input, outputs=json_output)
131
+ gr.Examples(examples=examples, inputs=image_input)
132
+ gr.HTML(article)
133
+ demo.launch()
example1.jpg ADDED
example2.jpg ADDED
example3.jpg ADDED