Spaces:

eternalBlissard
/

FoodVision101

Runtime error

App Files Files Community

Chaitanya Garg commited on Apr 15, 2024

Commit

9bb0389

1 Parent(s): 87886d5

all files

Browse files

Files changed (13) hide show

ViT.py +19 -0
ViTModel.pt +3 -0
app.py +32 -0
examples/example1.jpg +0 -0
examples/example2.jpg +0 -0
examples/example3.jpg +0 -0
examples/example4.jpg +0 -0
examples/example5.jpg +0 -0
helper.py +18 -0
model.py +17 -0
partViT.py +70 -0
predictor.py +24 -0
requirements.txt +3 -0

ViT.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import torch
+from torch import nn
+from MyModel.partsViT import patchNPositionalEmbeddingMaker,transformerEncoderBlock
+class ViT(nn.Module):
+  def __init__(self,inChannels,outChannels,patchSize,imgSize, hiddenLayer,numHeads,MLPdropOut,numTransformLayers,numClasses,embeddingDropOut=0.1,attnDropOut=0):
+    super().__init__()
+    self.EmbeddingMaker = patchNPositionalEmbeddingMaker(inChannels,outChannels,patchSize,imgSize)
+    # self.transformerEncodingBlock = transformerEncoderBlock(outChannels,hiddenLayer,numHeads,MLPdropOut,attnDropOut)
+    self.embeddingDrop = nn.Dropout(embeddingDropOut)
+    self.TransformEncoder = nn.Sequential(*[transformerEncoderBlock(outChannels,hiddenLayer,numHeads,MLPdropOut,attnDropOut) for _ in range(numTransformLayers)])
+    self.Classifier = nn.Sequential(nn.LayerNorm(normalized_shape=outChannels),
+                                    nn.Linear(outChannels,numClasses))
+  def forward(self,x):
+    x = self.EmbeddingMaker(x)
+    x = self.embeddingDrop(x)
+    x = self.TransformEncoder(x)
+    x = self.Classifier(x[:,0])
+    return x

ViTModel.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf9a58f8286e2d46f20877ab7bea7b38be359b1aa175273df92ac9150e8257d7
+size 343564994

app.py ADDED Viewed

	@@ -0,0 +1,32 @@

+### Imports for Modules ###
+import gradio as gr
+import os
+import torch
+from typing import Tuple, Dict
+from timeit import default_timer as timer
+### Functional Imports
+from predictor import predictionMaker
+exampleList = [["examples/" + example] for example in os.listdir("examples")]
+title = "Food Vision👀 on Food101 Using ViT"
+description = "Trained a ViT to classify images of food based on [Food101](https://pytorch.org/vision/main/generated/torchvision.datasets.Food101.html)."
+article = "Created by [Eternal Bliassard](https://github.com/EternalBlissard)."
+# Create the Gradio demo
+demo = gr.Interface(fn=predictionMaker,
+                    inputs=[gr.Image(type="pil")],
+                    outputs=[gr.Label(num_top_classes=3, label="Predictions"),
+                             gr.Number(label="Prediction time (s)")],
+                    examples=exampleList,
+                    title=title,
+                    description=description,
+                    article=article)
+# Launch the demo!
+demo.launch()

examples/example1.jpg ADDED Viewed

examples/example2.jpg ADDED Viewed

examples/example3.jpg ADDED Viewed

examples/example4.jpg ADDED Viewed

examples/example5.jpg ADDED Viewed

helper.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import torch
+import random
+import zipfile
+from pathlib import Path
+import requests
+def setAllSeeds(seed):
+  os.environ['MY_GLOBAL_SEED'] = str(seed)
+  random.seed(seed)
+  np.random.seed(seed)
+  torch.manual_seed(seed)
+  torch.cuda.manual_seed_all(seed)

model.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import torch
+import torchvision
+from torch import nn
+from helper import setAllSeeds
+from ViT import ViT
+def getViT(seed,classNames,DEVICE):
+  setAllSeeds(seed)
+  ViTModel = ViT(3,768,16,224,3072,12,0.1,12,len(classNames)).to(DEVICE)
+  vitWeights = torchvision.models.ViT_B_16_Weights.DEFAULT
+  vitTransforms = vitWeights.transforms()
+  vit = torchvision.models.vit_b_16(weights=vitWeights).to(DEVICE)
+  for param in vit.parameters():
+    param.requires_grad = False
+  vit.heads = nn.Linear(in_features=768, out_features=len(classNames)).to(DEVICE)
+  return vit,vitTransforms

partViT.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from torch import nn
+import torch
+class multiHeadSelfAttentionBlock(nn.Module):
+  def __init__(self,embeddingDim=768,numHeads=12,attnDropOut=0):
+    super().__init__()
+    self.layerNorm = nn.LayerNorm(normalized_shape=embeddingDim)
+    self.multiheadAttn = nn.MultiheadAttention(embed_dim=embeddingDim,num_heads=numHeads,dropout=attnDropOut,batch_first=True)
+  def forward(self,x):
+    layNorm = self.layerNorm(x)
+    attnOutPut, _ = self.multiheadAttn(query=layNorm,key=layNorm,value=layNorm)
+    return attnOutPut
+class MLPBlock(nn.Module):
+  def __init__(self,embeddingDim,hiddenLayer,dropOut=0.1):
+    super().__init__()
+    self.MLP = nn.Sequential(
+        nn.LayerNorm(normalized_shape = embeddingDim),
+        nn.Linear(embeddingDim, hiddenLayer),
+        nn.GELU(),
+        nn.Dropout(dropOut),
+        nn.Linear(hiddenLayer,embeddingDim),
+        nn.Dropout(dropOut)
+    )
+  def forward(self,x):
+    return self.MLP(x)
+class transformerEncoderBlock(nn.Module):
+  def __init__(self, embeddingDim, hiddenLayer,numHeads,MLPdropOut,attnDropOut=0):
+    super().__init__()
+    self.MSABlock = multiHeadSelfAttentionBlock(embeddingDim,numHeads,attnDropOut)
+    self.MLPBlock = MLPBlock(embeddingDim,hiddenLayer,MLPdropOut)
+  def forward(self,x):
+    x = self.MSABlock(x) + x
+    x = self.MLPBlock(x) + x
+    return x
+class patchNPositionalEmbeddingMaker(nn.Module):
+  def __init__(self,inChannels,outChannels,patchSize,imgSize):
+    super().__init__()
+    self.outChannels = outChannels
+    # outChannels is the same as embeddingDim
+    self.patchSize = patchSize
+    self.numPatches = int(imgSize**2/patchSize**2)
+    self.patchMaker = nn.Conv2d(inChannels,outChannels, kernel_size=patchSize,stride=patchSize,padding=0)
+    self.flattener  = nn.Flatten(start_dim=2,end_dim=3)
+    self.classEmbedding = nn.Parameter(torch.randn(1,1,self.outChannels),requires_grad=True)
+    self.PositionalEmbedding = nn.Parameter(torch.randn(1,self.numPatches+1,self.outChannels), requires_grad=True)
+  def forward(self,x):
+    batchSize = x.shape[0]
+    imgRes = x.shape[-1]
+    if(imgRes % self.patchSize ==0):
+      pass
+    else:
+      assert imgRes % self.patchSize ==0, 'Input size must be div by patchSize'
+    x = self.patchMaker(x)
+    x = self.flattener(x)
+    x = x.permute(0,2,1)
+    classToken = self.classEmbedding.expand(batchSize,-1,-1)
+    x = torch.cat((classToken,x),dim=1)
+    x = x + self.PositionalEmbedding
+    # batchSize = x.shape[0]
+    # embeddingDim = x.shape[-1]
+    return x

predictor.py ADDED Viewed

	@@ -0,0 +1,24 @@

+### Imports for Modules ###
+import gradio as gr
+import os
+import torch
+from typing import Tuple, Dict
+from timeit import default_timer as timer
+### Functional Imports
+from model import getViT
+classNames = ['Apple Pie', 'Baby Back Ribs', 'Baklava', 'Beef Carpaccio', 'Beef Tartare', 'Beet Salad', 'Beignets', 'Bibimbap', 'Bread Pudding', 'Breakfast Burrito', 'Bruschetta', 'Caesar Salad', 'Cannoli', 'Caprese Salad', 'Carrot Cake', 'Ceviche', 'Cheese Plate', 'Cheesecake', 'Chicken Curry', 'Chicken Quesadilla', 'Chicken Wings', 'Chocolate Cake', 'Chocolate Mousse', 'Churros', 'Clam Chowder', 'Club Sandwich', 'Crab Cakes', 'Creme Brulee', 'Croque Madame', 'Cup Cakes', 'Deviled Eggs', 'Donuts', 'Dumplings', 'Edamame', 'Eggs Benedict', 'Escargots', 'Falafel', 'Filet Mignon', 'Fish And Chips', 'Foie Gras', 'French Fries', 'French Onion Soup', 'French Toast', 'Fried Calamari', 'Fried Rice', 'Frozen Yogurt', 'Garlic Bread', 'Gnocchi', 'Greek Salad', 'Grilled Cheese Sandwich', 'Grilled Salmon', 'Guacamole', 'Gyoza', 'Hamburger', 'Hot And Sour Soup', 'Hot Dog', 'Huevos Rancheros', 'Hummus', 'Ice Cream', 'Lasagna', 'Lobster Bisque', 'Lobster Roll Sandwich', 'Macaroni And Cheese', 'Macarons', 'Miso Soup', 'Mussels', 'Nachos', 'Omelette', 'Onion Rings', 'Oysters', 'Pad Thai', 'Paella', 'Pancakes', 'Panna Cotta', 'Peking Duck', 'Pho', 'Pizza', 'Pork Chop', 'Poutine', 'Prime Rib', 'Pulled Pork Sandwich', 'Ramen', 'Ravioli', 'Red Velvet Cake', 'Risotto', 'Samosa', 'Sashimi', 'Scallops', 'Seaweed Salad', 'Shrimp And Grits', 'Spaghetti Bolognese', 'Spaghetti Carbonara', 'Spring Rolls', 'Steak', 'Strawberry Shortcake', 'Sushi', 'Tacos', 'Takoyaki', 'Tiramisu', 'Tuna Tartare', 'Waffles']
+ViTModel, VitTransforms = getViT(42,len(classNames))
+ViTModel.load_state_dict(torch.load(f="ViTModel.pt",map_location=torch.device("cpu")))
+def predictionMaker(img):
+  startTime = timer()
+  img = VitTransforms(img).unsqueeze(0)
+  ViTModel.eval()
+  with torch.inference_mode():
+    predProbs = torch.softmax(ViTModel(img),dim=1)
+  predDict = {classNames[i]: float(predProbs[0][i]) for i in range(len(classNames))}
+  endTime = timer()
+  predTime = round(endTime-startTime,4)
+  return predDict,predTime

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch==2.2.0
+torchvision==0.17.0
+gradio==4.20.0