Sirreajohn commited on
Commit
9d6c5d2
Β·
1 Parent(s): 5348292

initial_commit

Browse files
__pycache__/models.cpython-39.pyc ADDED
Binary file (825 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import gradio as gr
3
+ from pathlib import Path
4
+ from models import *
5
+
6
+ class_idx_to_names = {
7
+ 0: "pizza",
8
+ 1: "steak",
9
+ 2: "sushi"
10
+ }
11
+
12
+ examples = [[str(path)] for path in Path(r"examples").glob("*")]
13
+
14
+ def predict_one(model, transforms, image, device, class_idx_to_names):
15
+ model.eval()
16
+ model = model.to(device)
17
+ with torch.inference_mode():
18
+
19
+ start_time = time.perf_counter()
20
+ image_transformed = transforms(image).unsqueeze(dim = 0).to(device)
21
+
22
+ y_logits = model(image_transformed)
23
+
24
+ y_preds = torch.softmax(y_logits, dim = 1)
25
+ y_probs = torch.argmax(y_preds, dim = 1)
26
+
27
+ end_time = time.perf_counter()
28
+
29
+ predictions = {class_idx_to_names[index]: x.item() for index, x in enumerate(y_preds[0])}
30
+
31
+ return predictions, end_time - start_time
32
+
33
+ def predict(image, model_choice):
34
+
35
+ if model_choice is None or model_choice == "effnet_b2":
36
+ model, transforms = get_effnet_b2()
37
+ else:
38
+ model, transforms = get_vit_16_base_transformer()
39
+
40
+ predictions, time_taken = predict_one(model, transforms, image, "cpu", class_idx_to_names)
41
+ return predictions, time_taken
42
+
43
+
44
+ title = "Food Recognition πŸ•πŸ•"
45
+ desc = "A dual model app ft. EfficientNetB2 Feature Extractor and VisionTransformer."
46
+ article = '''
47
+ ## Stats on different Models
48
+ ---
49
+ | Model Name | Train Loss | Test Loss | Train Accuracy | Test Accuracy | Num Parameters | Model Size |
50
+ |-----------------|------------|-----------|----------------|---------------|----------------|------------|
51
+ | EfficientNet_b2 | 0.340270 | 0.301134 | 0.906250 | 0.953409 | 7705221 | 29.91 MB |
52
+ | ViT_Base_16 | 0.040448 | 0.055140 | 0.995833 | 0.981250 | 85800963 | 327.39 MB |
53
+ '''
54
+ demo = gr.Interface(fn = predict,
55
+ inputs = [gr.Image(type = "pil", label = "upload an Jpeg or Png"), gr.Radio(["effnet_b2", "ViT (Vision Transformer)"], label = "choose model (default on effnet)")],
56
+ outputs = [gr.Label(num_top_classes=3, label = "predictions"), gr.Number(label = "Prediction Time in seconds")],
57
+ examples = examples,
58
+ title = title,
59
+ description=desc,
60
+ article=article)
61
+
62
+ demo.launch(debug = False)
examples/108310.jpg ADDED
examples/1203702.jpg ADDED
examples/2572488.jpg ADDED
examples/296426.jpg ADDED
examples/511818.jpg ADDED
models.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from torchvision.models import vit_b_16, ViT_B_16_Weights
4
+ from torchvision.models import efficientnet_b2, EfficientNet_B2_Weights
5
+
6
+
7
+ def get_vit_16_base_transformer():
8
+ vit_b_16_model = torch.load(r"models\vit_16_base_custom_head_3_classes.pth")
9
+ vit_b_16_transforms = ViT_B_16_Weights.DEFAULT.transforms()
10
+
11
+ return vit_b_16_model, vit_b_16_transforms
12
+
13
+ def get_effnet_b2():
14
+ eff_net_b2_model = torch.load(r"models\eff_netb2_custom_head_3_classes.pth")
15
+ eff_net_b2_transforms = EfficientNet_B2_Weights.DEFAULT.transforms()
16
+
17
+ return eff_net_b2_model, eff_net_b2_transforms
models/eff_netb2_custom_head_3_classes.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34f7f58fa9ead3866305089bca9f47ab2115e783820d471a060b10aaf43165ac
3
+ size 31362521
models/vit_16_base_custom_head_3_classes.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96d4e7ebcdf7e83e23b9e9816e2c6d58b0f5836e6143334440c1bd6b0e9dbe3d
3
+ size 343289909
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ torchvision
3
+ gradio