Spaces:

awsaf49
/

gcvit-tf

Runtime error

App Files Files Community

awsaf49 commited on Jul 20, 2022

Commit

3126b1e

1 Parent(s): cbe5ac9

file added

Browse files

Files changed (28) hide show

app.py +30 -4
gcvit/__init__.py +2 -0
gcvit/__pycache__/__init__.cpython-38.pyc +0 -0
gcvit/layers/__init__.py +7 -0
gcvit/layers/__pycache__/__init__.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/attention.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/block.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/drop.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/embedding.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/feature.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/level.cpython-38.pyc +0 -0
gcvit/layers/__pycache__/window.cpython-38.pyc +0 -0
gcvit/layers/attention.py +96 -0
gcvit/layers/block.py +99 -0
gcvit/layers/drop.py +40 -0
gcvit/layers/embedding.py +27 -0
gcvit/layers/feature.py +202 -0
gcvit/layers/level.py +93 -0
gcvit/layers/window.py +15 -0
gcvit/models/__init__.py +1 -0
gcvit/models/__pycache__/__init__.cpython-38.pyc +0 -0
gcvit/models/__pycache__/gcvit.cpython-38.pyc +0 -0
gcvit/models/gcvit.py +145 -0
gcvit/utils/__init__.py +1 -0
gcvit/utils/gradcam.py +69 -0
gcvit/version.py +1 -0
requirements.txt +5 -0
setup.py +50 -0

app.py CHANGED Viewed

@@ -1,7 +1,33 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

+import tensorflow as tf
 import gradio as gr
+import gcvit
+from gcvit.utils import get_gradcam_model, get_gradcam_prediction
+def predict_fn(image, model_name):
+    """A predict function that will be invoked by gradio."""
+    model = getattr(gcvit, model_name)(pretrain=True)
+    gradcam_model = get_gradcam_model(model)
+    preds, overlay = get_gradcam_prediction(image, gradcam_model, cmap='jet', alpha=0.4, pred_index=None)
+    preds = {x[1]:x[2] for x in preds}
+    return [preds, overlay]
+demo = gr.Interface(
+    fn=predict_fn,
+    inputs=[
+        gr.inputs.Image(label="Input Image"),
+        gr.Radio(['GCViTTiny', 'GCViTSmall', 'GCViTBase'], value='GCViTTiny', label='Model Size')
+        ],
+    outputs=[
+        gr.outputs.Label(label="Prediction"),
+        gr.inputs.Image(label="GradCAM"),
+    ],
+    title="Global Context Vision Transformer (GCViT) Demo",
+    description="ImageNet Pretrain.",
+    examples=[
+        ["example/african_elephant.png"],
+        ["example/chelsea.png"],
+        ["example/german_shepherd.jpg"],
+        ["example/panda.jpg"],
+    ],
+)
+demo.launch()

gcvit/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .models import GCViT, GCViTTiny, GCViTSmall, GCViTBase
2	+ from .version import __version__

gcvit/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (228 Bytes). View file

gcvit/layers/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .window import window_partition, window_reverse
+from .attention import WindowAttention
+from .drop import DropPath, Identity
+from .embedding import PatchEmbed
+from .feature import Mlp, FeatExtract, ReduceSize, SE, Resizing
+from .block import GCViTBlock
+from .level import GCViTLayer

gcvit/layers/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (530 Bytes). View file

gcvit/layers/__pycache__/attention.cpython-38.pyc ADDED Viewed

Binary file (3.58 kB). View file

gcvit/layers/__pycache__/block.cpython-38.pyc ADDED Viewed

Binary file (3 kB). View file

gcvit/layers/__pycache__/drop.cpython-38.pyc ADDED Viewed

Binary file (1.8 kB). View file

gcvit/layers/__pycache__/embedding.cpython-38.pyc ADDED Viewed

Binary file (1.39 kB). View file

gcvit/layers/__pycache__/feature.cpython-38.pyc ADDED Viewed

Binary file (5.5 kB). View file

gcvit/layers/__pycache__/level.cpython-38.pyc ADDED Viewed

Binary file (3 kB). View file

gcvit/layers/__pycache__/window.cpython-38.pyc ADDED Viewed

Binary file (801 Bytes). View file

gcvit/layers/attention.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import tensorflow as tf
+import tensorflow_addons as tfa
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class WindowAttention(tf.keras.layers.Layer):
+    def __init__(self, window_size, num_heads, global_query, qkv_bias=True, qk_scale=None, attn_dropout=0., proj_dropout=0.,
+                 **kwargs):
+        super().__init__(**kwargs)
+        window_size = (window_size,window_size)
+        self.window_size = window_size
+        self.num_heads = num_heads
+        self.global_query = global_query
+        self.qkv_bias = qkv_bias
+        self.qk_scale = qk_scale
+        self.attn_dropout = attn_dropout
+        self.proj_dropout = proj_dropout
+    def build(self, input_shape):
+        dim = input_shape[0][-1]
+        head_dim = dim // self.num_heads
+        self.scale = self.qk_scale or head_dim ** -0.5
+        self.qkv_size = 3 - int(self.global_query)
+        self.qkv = tf.keras.layers.Dense(dim * self.qkv_size, use_bias=self.qkv_bias, name='qkv')
+        self.relative_position_bias_table = self.add_weight(
+            'relative_position_bias_table',
+            shape=[(2 * self.window_size[0] - 1) * (2 * self.window_size[1] - 1), self.num_heads],
+            initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
+            trainable=True,
+            dtype=self.dtype)
+        self.attn_drop = tf.keras.layers.Dropout(self.attn_dropout, name='attn_drop')
+        self.proj = tf.keras.layers.Dense(dim, name='proj')
+        self.proj_drop = tf.keras.layers.Dropout(self.proj_dropout, name='proj_drop')
+        self.softmax = tf.keras.layers.Activation('softmax', name='softmax')
+        self.relative_position_index = self.get_relative_position_index()
+        super().build(input_shape)
+    def get_relative_position_index(self):
+        coords_h = tf.range(self.window_size[0])
+        coords_w = tf.range(self.window_size[1])
+        coords = tf.stack(tf.meshgrid(coords_h, coords_w, indexing='ij'), axis=0)
+        coords_flatten = tf.reshape(coords, [2, -1])
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]
+        relative_coords = tf.transpose(relative_coords, perm=[1, 2, 0])
+        relative_coords_xx = (relative_coords[:, :, 0] + self.window_size[0] - 1)
+        relative_coords_yy = (relative_coords[:, :, 1] + self.window_size[1] - 1)
+        relative_coords_xx = relative_coords_xx * (2 * self.window_size[1] - 1)
+        relative_position_index = (relative_coords_xx + relative_coords_yy)
+        return relative_position_index
+    def call(self, inputs, **kwargs):
+        if self.global_query:
+            inputs, q_global = inputs
+            B = tf.shape(q_global)[0] # B, N, C
+        else:
+            inputs = inputs[0]
+        B_, N, C = tf.unstack(tf.shape(inputs), num=3) # B*num_window, num_tokens, channels
+        qkv = self.qkv(inputs)
+        qkv = tf.reshape(qkv, [B_, N, self.qkv_size, self.num_heads, C // self.num_heads])
+        qkv = tf.transpose(qkv, [2, 0, 3, 1, 4])
+        if self.global_query:
+            k, v = tf.unstack(qkv, num=2, axis=0)  # for unknown shame num=None will throw error
+            q_global = tf.repeat(q_global, repeats=B_//B, axis=0) # num_windows = B_//B => q_global same for all windows in a img
+            q = tf.reshape(q_global, shape=[B_, N, self.num_heads, C // self.num_heads])
+            q = tf.transpose(q, perm=[0, 2, 1, 3])
+        else:
+            q, k, v = tf.unstack(qkv, num=3, axis=0)
+        q = q * self.scale
+        attn = (q @ tf.transpose(k, perm=[0, 1, 3, 2]))
+        relative_position_bias = tf.gather(self.relative_position_bias_table, tf.reshape(self.relative_position_index, shape=[-1]))
+        relative_position_bias = tf.reshape(relative_position_bias,
+                                            shape=[self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1])
+        relative_position_bias = tf.transpose(relative_position_bias, perm=[2, 0, 1])
+        attn = attn + relative_position_bias[tf.newaxis,]
+        attn = self.softmax(attn)
+        attn = self.attn_drop(attn)
+        x = tf.transpose((attn @ v), perm=[0, 2, 1, 3]) # B_, num_tokens, num_heads, channels_per_head
+        x = tf.reshape(x, shape=[B_, N, C])
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'window_size': self.window_size,
+            'num_heads': self.num_heads,
+            'global_query': self.global_query,
+            'qkv_bias': self.qkv_bias,
+            'qk_scale': self.qk_scale,
+            'attn_dropout': self.attn_dropout,
+            'proj_dropout': self.proj_dropout
+        })
+        return config

gcvit/layers/block.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import tensorflow as tf
+from .attention import WindowAttention
+from .drop import DropPath
+from .window import window_partition, window_reverse
+from .feature import Mlp, FeatExtract
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class GCViTBlock(tf.keras.layers.Layer):
+    def __init__(self, window_size, num_heads, global_query, mlp_ratio=4., qkv_bias=True, qk_scale=None, drop=0.,
+                 attn_drop=0., path_drop=0., act_layer='gelu', layer_scale=None, **kwargs):
+        super().__init__(**kwargs)
+        self.window_size = window_size
+        self.num_heads = num_heads
+        self.global_query = global_query
+        self.mlp_ratio = mlp_ratio
+        self.qkv_bias = qkv_bias
+        self.qk_scale = qk_scale
+        self.drop = drop
+        self.attn_drop = attn_drop
+        self.path_drop = path_drop
+        self.act_layer = act_layer
+        self.layer_scale = layer_scale
+    def build(self, input_shape):
+        B, H, W, C = input_shape[0]
+        self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm1')
+        self.attn = WindowAttention(window_size=self.window_size,
+                                   num_heads=self.num_heads,
+                                   global_query=self.global_query,
+                                   qkv_bias=self.qkv_bias,
+                                   qk_scale=self.qk_scale,
+                                   attn_dropout=self.attn_drop,
+                                   proj_dropout=self.drop,
+                                   name='attn')
+        self.drop_path1 = DropPath(self.path_drop)
+        self.drop_path2 = DropPath(self.path_drop)
+        self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm2')
+        self.mlp = Mlp(hidden_features=int(C * self.mlp_ratio), dropout=self.drop, act_layer=self.act_layer, name='mlp')
+        if self.layer_scale is not None:
+            self.gamma1 = self.add_weight(
+                'gamma1',
+                shape=[C],
+                initializer=tf.keras.initializers.Constant(self.layer_scale),
+                trainable=True,
+                dtype=self.dtype)
+            self.gamma2 = self.add_weight(
+                'gamma2',
+                shape=[C],
+                initializer=tf.keras.initializers.Constant(self.layer_scale),
+                trainable=True,
+                dtype=self.dtype)
+        else:
+            self.gamma1 = 1.0
+            self.gamma2 = 1.0
+        self.num_windows = int(H // self.window_size) * int(W // self.window_size)
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        if self.global_query:
+            inputs, q_global = inputs
+        else:
+            inputs = inputs[0]
+        B, H, W, C = tf.unstack(tf.shape(inputs), num=4)
+        x = self.norm1(inputs)
+        # create windows and concat them in batch axis
+        x = window_partition(x, self.window_size)  # (B_, win_h, win_w, C)
+        # flatten patch
+        x = tf.reshape(x, shape=[-1, self.window_size * self.window_size, C])  # (B_, N, C) => (batch*num_win, num_token, feature)
+        # attention
+        if self.global_query:
+            x = self.attn([x, q_global])
+        else:
+            x = self.attn([x])
+        # reverse window partition
+        x = window_reverse(x, self.window_size, H, W, C)
+        # FFN
+        x = inputs + self.drop_path1(x * self.gamma1)
+        x = x + self.drop_path2(self.gamma2 * self.mlp(self.norm2(x)))
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'window_size': self.window_size,
+            'num_heads': self.num_heads,
+            'global_query': self.global_query,
+            'mlp_ratio': self.mlp_ratio,
+            'qkv_bias': self.qkv_bias,
+            'qk_scale': self.qk_scale,
+            'drop': self.drop,
+            'attn_drop': self.attn_drop,
+            'path_drop': self.path_drop,
+            'act_layer': self.act_layer,
+            'layer_scale': self.layer_scale,
+            'num_windows': self.num_windows,
+        })
+        return config

gcvit/layers/drop.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import tensorflow as tf
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class Identity(tf.keras.layers.Layer):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+    def call(self, x):
+        return tf.identity(x)
+    def get_config(self):
+        config = super().get_config()
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class DropPath(tf.keras.layers.Layer):
+    def __init__(self, drop_prob=0., scale_by_keep=True, **kwargs):
+        super().__init__(**kwargs)
+        self.drop_prob = drop_prob
+        self.scale_by_keep = scale_by_keep
+    def call(self, x, training=None):
+        if self.drop_prob==0. or not training:
+            return x
+        keep_prob = 1 - self.drop_prob
+        shape = (tf.shape(x)[0],) + (1,) * (len(tf.shape(x)) - 1)
+        random_tensor = keep_prob + tf.random.uniform(shape, 0, 1)
+        random_tensor = tf.floor(random_tensor)
+        if keep_prob > 0.0 and self.scale_by_keep:
+            x = (x / keep_prob)
+        return x * random_tensor
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "drop_prob": self.drop_prob,
+            "scale_by_keep": self.scale_by_keep
+            })
+        return config

gcvit/layers/embedding.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import tensorflow as tf
+from .feature import ReduceSize
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class PatchEmbed(tf.keras.layers.Layer):
+    def __init__(self, dim, **kwargs):
+        super().__init__(**kwargs)
+        self.dim = dim
+    def build(self, input_shape):
+        self.pad = tf.keras.layers.ZeroPadding2D(1, name='pad')
+        self.proj = tf.keras.layers.Conv2D(self.dim, kernel_size=3, strides=2, name='proj')
+        self.conv_down = ReduceSize(keep_dim=True, name='conv_down')
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        x = self.pad(inputs)
+        x = self.proj(x)
+        x = self.conv_down(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({'dim': self.dim})
+        return config

gcvit/layers/feature.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import tensorflow as tf
+import tensorflow_addons as tfa
+H_AXIS = -3
+W_AXIS = -2
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class Mlp(tf.keras.layers.Layer):
+    def __init__(self, hidden_features=None, out_features=None, act_layer='gelu', dropout=0., **kwargs):
+        super().__init__(**kwargs)
+        self.hidden_features = hidden_features
+        self.out_features = out_features
+        self.act_layer = act_layer
+        self.dropout = dropout
+    def build(self, input_shape):
+        self.in_features = input_shape[-1]
+        self.hidden_features = self.hidden_features or self.in_features
+        self.out_features = self.out_features or self.in_features
+        self.fc1 = tf.keras.layers.Dense(self.hidden_features, name="fc1")
+        self.act = tf.keras.layers.Activation(self.act_layer, name="act")
+        self.fc2 = tf.keras.layers.Dense(self.out_features, name="fc2")
+        self.drop1 = tf.keras.layers.Dropout(self.dropout, name="drop1")
+        self.drop2 = tf.keras.layers.Dropout(self.dropout, name="drop2")
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        x = self.fc1(inputs)
+        x = self.act(x)
+        x = self.drop1(x)
+        x = self.fc2(x)
+        x = self.drop2(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "hidden_features":self.hidden_features,
+            "out_features":self.out_features,
+            "act_layer":self.act_layer,
+            "dropout":self.dropout
+            })
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class SE(tf.keras.layers.Layer):
+    def __init__(self, oup=None, expansion=0.25, **kwargs):
+        super().__init__(**kwargs)
+        self.expansion = expansion
+        self.oup = oup
+    def build(self, input_shape):
+        inp = input_shape[-1]
+        self.oup = self.oup or inp
+        self.avg_pool = tfa.layers.AdaptiveAveragePooling2D(1, name="avg_pool")
+        self.fc = [
+            tf.keras.layers.Dense(int(inp * self.expansion), use_bias=False, name='fc/0'),
+            tf.keras.layers.Activation('gelu', name='fc/1'),
+            tf.keras.layers.Dense(self.oup, use_bias=False, name='fc/2'),
+            tf.keras.layers.Activation('sigmoid', name='fc/3')
+            ]
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        b, _, _, c = tf.unstack(tf.shape(inputs), num=4)
+        x = tf.reshape(self.avg_pool(inputs), (b, c))
+        for layer in self.fc:
+            x = layer(x)
+        x = tf.reshape(x, (b, 1, 1, c))
+        return x*inputs
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'expansion': self.expansion,
+            'oup': self.oup,
+            })
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class ReduceSize(tf.keras.layers.Layer):
+    def __init__(self, keep_dim=False, **kwargs):
+        super().__init__(**kwargs)
+        self.keep_dim = keep_dim
+    def build(self, input_shape):
+        dim = input_shape[-1]
+        dim_out = dim if self.keep_dim else 2*dim
+        self.pad1 = tf.keras.layers.ZeroPadding2D(1, name='pad1')
+        self.pad2 = tf.keras.layers.ZeroPadding2D(1, name='pad2')
+        self.conv = [
+            tf.keras.layers.DepthwiseConv2D(kernel_size=3, strides=1, padding='valid', use_bias=False, name='conv/0'),
+            tf.keras.layers.Activation('gelu', name='conv/1'),
+            SE(name='conv/2'),
+            tf.keras.layers.Conv2D(dim, kernel_size=1, strides=1, padding='valid', use_bias=False, name='conv/3')
+        ]
+        self.reduction = tf.keras.layers.Conv2D(dim_out, kernel_size=3, strides=2, padding='valid', use_bias=False,
+                                                name='reduction')
+        self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm1')  # eps like PyTorch
+        self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm2')
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        x = self.norm1(inputs)
+        xr = self.pad1(x)  # if pad had weights it would've thrown error with .save_weights()
+        for layer in self.conv:
+            xr = layer(xr)
+        x = x + xr
+        x = self.pad2(x)
+        x = self.reduction(x)
+        x = self.norm2(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "keep_dim":self.keep_dim,
+        })
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class FeatExtract(tf.keras.layers.Layer):
+    def __init__(self, keep_dim=False, **kwargs):
+        super().__init__(**kwargs)
+        self.keep_dim = keep_dim
+    def build(self, input_shape):
+        dim = input_shape[-1]
+        self.pad1 = tf.keras.layers.ZeroPadding2D(1, name='pad1')
+        self.pad2 = tf.keras.layers.ZeroPadding2D(1, name='pad2')
+        self.conv = [
+            tf.keras.layers.DepthwiseConv2D(kernel_size=3, strides=1, padding='valid', use_bias=False, name='conv/0'),
+            tf.keras.layers.Activation('gelu', name='conv/1'),
+            SE(name='conv/2'),
+            tf.keras.layers.Conv2D(dim, kernel_size=1, strides=1, padding='valid', use_bias=False, name='conv/3')
+        ]
+        if not self.keep_dim:
+            self.pool = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='valid', name='pool')
+        # else:
+        #     self.pool = tf.keras.layers.Activation('linear', name='identity')  # hack for PyTorch nn.Identity layer ;)
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        x = inputs
+        xr = self.pad1(x)
+        for layer in self.conv:
+            xr = layer(xr)
+        x = x + xr # if pad had weights it would've thrown error with .save_weights()
+        if not self.keep_dim:
+            x = self.pad2(x)
+            x = self.pool(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            "keep_dim":self.keep_dim,
+        })
+        return config
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class Resizing(tf.keras.layers.Layer):
+    def __init__(self,
+               height,
+               width,
+               interpolation='bilinear',
+               **kwargs):
+        self.height = height
+        self.width = width
+        self.interpolation = interpolation
+        super().__init__(**kwargs)
+    def call(self, inputs):
+        # tf.image.resize will always output float32 and operate more efficiently on
+        # float32 unless interpolation is nearest, in which case ouput type matches
+        # input type.
+        if self.interpolation == 'nearest':
+            input_dtype = self.compute_dtype
+        else:
+            input_dtype = tf.float32
+        inputs = tf.cast(inputs, dtype=input_dtype)
+        size = [self.height, self.width]
+        outputs = tf.image.resize(
+            inputs,
+            size=size,
+            method=self.interpolation)
+        return tf.cast(outputs, self.compute_dtype)
+    def compute_output_shape(self, input_shape):
+        input_shape = tf.TensorShape(input_shape).as_list()
+        input_shape[H_AXIS] = self.height
+        input_shape[W_AXIS] = self.width
+        return tf.TensorShape(input_shape)
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'height': self.height,
+            'width': self.width,
+            'interpolation': self.interpolation,
+            })
+        return config

gcvit/layers/level.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import tensorflow as tf
+from .feature import FeatExtract, ReduceSize, Resizing
+from .block import GCViTBlock
+@tf.keras.utils.register_keras_serializable(package="gcvit")
+class GCViTLayer(tf.keras.layers.Layer):
+    def __init__(self, depth, num_heads, window_size, keep_dims, downsample=True, mlp_ratio=4., qkv_bias=True,
+                qk_scale=None, drop=0., attn_drop=0., path_drop=0., layer_scale=None, resize_query=False, **kwargs):
+        super().__init__(**kwargs)
+        self.depth = depth
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.keep_dims = keep_dims
+        self.downsample = downsample
+        self.mlp_ratio = mlp_ratio
+        self.qkv_bias = qkv_bias
+        self.qk_scale = qk_scale
+        self.drop = drop
+        self.attn_drop = attn_drop
+        self.path_drop = path_drop
+        self.layer_scale = layer_scale
+        self.resize_query = resize_query
+    def build(self, input_shape):
+        path_drop = [self.path_drop] * self.depth if not isinstance(self.path_drop, list) else self.path_drop
+        self.blocks = [
+            GCViTBlock(window_size=self.window_size,
+                      num_heads=self.num_heads,
+                      global_query=bool(i % 2),
+                      mlp_ratio=self.mlp_ratio,
+                      qkv_bias=self.qkv_bias,
+                      qk_scale=self.qk_scale,
+                      drop=self.drop,
+                      attn_drop=self.attn_drop,
+                      path_drop=path_drop[i],
+                      layer_scale=self.layer_scale,
+                      name=f'blocks/{i}')
+            for i in range(self.depth)]
+        self.down = ReduceSize(keep_dim=False, name='downsample')
+        self.to_q_global = [
+            FeatExtract(keep_dim, name=f'to_q_global/{i}')
+            for i, keep_dim in enumerate(self.keep_dims)]
+        self.resize = Resizing(self.window_size, self.window_size, interpolation='bicubic')
+        super().build(input_shape)
+    def call(self, inputs, **kwargs):
+        height, width = tf.unstack(tf.shape(inputs)[1:3], num=2)
+        # pad to multiple of window_size
+        h_pad = (self.window_size - height % self.window_size) % self.window_size
+        w_pad = (self.window_size - width % self.window_size) % self.window_size
+        x = tf.pad(inputs, [[0, 0],
+                            [h_pad//2, (h_pad//2 + h_pad%2)],  # padding in both directions unlike tfgcvit
+                            [w_pad//2, (w_pad//2 + w_pad%2)],
+                            [0, 0]])
+        # generate global query
+        q_global = x  # (B, H, W, C)
+        for layer in self.to_q_global:
+            q_global = layer(q_global)  #  official impl issue: https://github.com/NVlabs/GCVit/issues/13
+        # resize query to fit key-value, but result in poor score with official weights?
+        if self.resize_query:
+            q_global = self.resize(q_global)  # to avoid mismatch between feat_map and q_global: https://github.com/NVlabs/GCVit/issues/9
+        # feature_map -> windows -> window_attention -> feature_map
+        for i, blk in enumerate(self.blocks):
+            if i % 2:
+                x = blk([x, q_global])
+            else:
+                x = blk([x])
+        x = x[:, :height, :width, :]  # https://github.com/NVlabs/GCVit/issues/9
+        # set shape for [B, ?, ?, C]
+        x.set_shape(inputs.shape)  # `tf.reshape` creates new tensor with new_shape
+        # downsample
+        if self.downsample:
+          x = self.down(x)
+        return x
+    def get_config(self):
+        config = super().get_config()
+        config.update({
+            'depth': self.depth,
+            'num_heads': self.num_heads,
+            'window_size': self.window_size,
+            'keep_dims': self.keep_dims,
+            'downsample': self.downsample,
+            'mlp_ratio': self.mlp_ratio,
+            'qkv_bias': self.qkv_bias,
+            'qk_scale': self.qk_scale,
+            'drop': self.drop,
+            'attn_drop': self.attn_drop,
+            'path_drop': self.path_drop,
+            'layer_scale': self.layer_scale
+        })
+        return config

gcvit/layers/window.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import tensorflow as tf
+def window_partition(x, window_size):
+    B, H, W, C = tf.unstack(tf.shape(x), num=4)
+    x = tf.reshape(x, shape=[-1, H // window_size, window_size, W // window_size, window_size, C])
+    x = tf.transpose(x, perm=[0, 1, 3, 2, 4, 5])
+    windows = tf.reshape(x, shape=[-1, window_size, window_size, C])
+    return windows
+def window_reverse(windows, window_size, H, W, C):
+    x = tf.reshape(windows, shape=[-1, H // window_size, W // window_size, window_size, window_size, C])
+    x = tf.transpose(x, perm=[0, 1, 3, 2, 4, 5])
+    x = tf.reshape(x, shape=[-1, H, W, C])
+    return x

gcvit/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .gcvit import GCViT, GCViTTiny, GCViTSmall, GCViTBase

gcvit/models/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (234 Bytes). View file

gcvit/models/__pycache__/gcvit.cpython-38.pyc ADDED Viewed

Binary file (4.08 kB). View file

gcvit/models/gcvit.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import numpy as np
+import tensorflow as tf
+from ..layers import PatchEmbed, GCViTLayer, Identity
+BASE_URL = 'https://github.com/awsaf49/gcvit-tf/releases/download'
+TAG = 'v1.0.0'
+NAME2CONFIG = {
+    'gcvit_tiny': {'window_size': (7, 7, 14, 7),
+                    'dim': 64,
+                    'depths': (3, 4, 19, 5),
+                    'num_heads': (2, 4, 8, 16),
+                    'path_drop': 0.2,},
+    'gcvit_small': {'window_size': (7, 7, 14, 7),
+                     'dim': 96,
+                     'depths': (3, 4, 19, 5),
+                     'num_heads': (3, 6, 12, 24),
+                     'mlp_ratio': 2.,
+                     'path_drop': 0.3,
+                     'layer_scale': 1e-5,},
+    'gcvit_base': {'window_size': (7, 7, 14, 7),
+                    'dim':128,
+                    'depths': (3, 4, 19, 5),
+                    'num_heads': (4, 8, 16, 32),
+                    'mlp_ratio': 2.,
+                    'path_drop': 0.5,
+                    'layer_scale': 1e-5,},
+    }
+@tf.keras.utils.register_keras_serializable(package='gcvit')
+class GCViT(tf.keras.Model):
+    def __init__(self, window_size, dim, depths, num_heads,
+        drop_rate=0., mlp_ratio=3., qkv_bias=True, qk_scale=None, attn_drop=0., path_drop=0.1, layer_scale=None, resize_query=False,
+        global_pool='avg', num_classes=1000, head_act='softmax', **kwargs):
+        super().__init__(**kwargs)
+        self.window_size = window_size
+        self.dim = dim
+        self.depths = depths
+        self.num_heads = num_heads
+        self.drop_rate = drop_rate
+        self.mlp_ratio = mlp_ratio
+        self.qkv_bias = qkv_bias
+        self.qk_scale = qk_scale
+        self.attn_drop = attn_drop
+        self.path_drop = path_drop
+        self.layer_scale = layer_scale
+        self.resize_query = resize_query
+        self.global_pool = global_pool
+        self.num_classes = num_classes
+        self.head_act = head_act
+        self.patch_embed = PatchEmbed(dim=dim, name='patch_embed')
+        self.pos_drop = tf.keras.layers.Dropout(drop_rate, name='pos_drop')
+        path_drops = np.linspace(0., path_drop, sum(depths))
+        keep_dims = [(False, False, False),(False, False),(True,),(True,),]
+        self.levels = []
+        for i in range(len(depths)):
+            path_drop = path_drops[sum(depths[:i]):sum(depths[:i + 1])].tolist()
+            level = GCViTLayer(depth=depths[i], num_heads=num_heads[i], window_size=window_size[i], keep_dims=keep_dims[i],
+                    downsample=(i < len(depths) - 1), mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+                    drop=drop_rate, attn_drop=attn_drop, path_drop=path_drop, layer_scale=layer_scale, resize_query=resize_query,
+                    name=f'levels/{i}')
+            self.levels.append(level)
+        self.norm = tf.keras.layers.LayerNormalization(axis=-1, epsilon=1e-05, name='norm')
+        if global_pool == 'avg':
+            self.pool = tf.keras.layers.GlobalAveragePooling2D(name='pool')
+        elif global_pool == 'max':
+            self.pool = tf.keras.layers.GlobalMaxPooling2D(name='pool')
+        elif global_pool is None:
+            self.pool = Identity(name='pool')
+        else:
+            raise ValueError(f'Expecting pooling to be one of None/avg/max. Found: {global_pool}')
+        self.head = [tf.keras.layers.Dense(num_classes, name='head/fc'),
+                     tf.keras.layers.Activation(head_act, name='head/act')]
+    def reset_classifier(self, num_classes, head_act, global_pool=None):
+        self.num_classes = num_classes
+        if global_pool is not None:
+            self.global_pool = global_pool
+        self.head[0] = tf.keras.layers.Dense(num_classes, name='head/fc') if num_classes else Identity(name='head/fc')
+        self.head[1] = tf.keras.layers.Activation(head_act, name='head/act') if head_act else Identity(name='head/act')
+        super().build((1, 224, 224, 3))
+    def forward_features(self, inputs):
+        x = self.patch_embed(inputs)
+        x = self.pos_drop(x)
+        x = tf.cast(x, dtype=tf.float32)
+        for level in self.levels:
+            x = level(x)
+        x = self.norm(x)
+        return x
+    def forward_head(self, inputs, pre_logits=False):
+        x = inputs
+        if self.global_pool in ['avg', 'max']:
+            x = self.pool(x)
+        if not pre_logits:
+            for layer in self.head:
+                x = layer(x)
+        return x
+    def call(self, inputs, **kwargs):
+        x = self.forward_features(inputs)
+        x = self.forward_head(x)
+        return x
+    def build_graph(self, input_shape=(224, 224, 3)):
+        """https://www.kaggle.com/code/ipythonx/tf-hybrid-efficientnet-swin-transformer-gradcam"""
+        x = tf.keras.Input(shape=input_shape)
+        return tf.keras.Model(inputs=[x], outputs=self.call(x), name=self.name)
+# load standard models
+def GCViTTiny(pretrain=False, **kwargs):
+    name = 'gcvit_tiny'
+    config = NAME2CONFIG[name]
+    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
+    model = GCViT(name=name, **config, **kwargs)
+    model(tf.random.uniform(shape=(1, 224, 224, 3)))
+    if pretrain:
+        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
+        model.load_weights(ckpt_path)
+    return model
+def GCViTSmall(pretrain=False, **kwargs):
+    name = 'gcvit_small'
+    config = NAME2CONFIG[name]
+    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
+    model = GCViT(name=name, **config, **kwargs)
+    model(tf.random.uniform(shape=(1, 224, 224, 3)))
+    if pretrain:
+        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
+        model.load_weights(ckpt_path)
+    return model
+def GCViTBase(pretrain=False, **kwargs):
+    name = 'gcvit_base'
+    config = NAME2CONFIG[name]
+    ckpt_link = '{}/{}/{}_weights.h5'.format(BASE_URL, TAG, name)
+    model = GCViT(name=name, **config, **kwargs)
+    model(tf.random.uniform(shape=(1, 224, 224, 3)))
+    if pretrain:
+        ckpt_path = tf.keras.utils.get_file('{}_weights.h5'.format(name), ckpt_link)
+        model.load_weights(ckpt_path)
+    return model

gcvit/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .gradcam import process_image, get_gradcam_model, get_gradcam_prediction

gcvit/utils/gradcam.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import tensorflow as tf
+import matplotlib.cm as cm
+import numpy as np
+try:
+    from tensorflow.keras.utils import array_to_img, img_to_array
+except:
+    from tensorflow.keras.preprocessing.image import array_to_img, img_to_array
+def process_image(img, size=(224, 224)):
+    img_array = tf.keras.applications.imagenet_utils.preprocess_input(img, mode='torch')
+    img_array = tf.image.resize(img_array, size,)[None,]
+    return img_array
+def get_gradcam_model(model):
+    inp = tf.keras.Input(shape=(224, 224, 3))
+    feats = model.forward_features(inp)
+    preds = model.forward_head(feats)
+    return tf.keras.models.Model(inp, [preds, feats])
+def get_gradcam_prediction(img, grad_model, process=True, decode=True, pred_index=None, cmap='jet', alpha=0.4):
+    """Grad-CAM for a single image
+    Args:
+        img (np.ndarray): process or raw image without batch_shape e.g. (224, 224, 3)
+        grad_model (tf.keras.Model): model with feature map and prediction
+        process (bool, optional): imagenet pre-processing. Defaults to True.
+        pred_index (int, optional): for particular calss. Defaults to None.
+        cmap (str, optional): colormap. Defaults to 'jet'.
+        alpha (float, optional): opacity. Defaults to 0.4.
+    Returns:
+        preds_decode: top5 predictions
+        heatmap: gradcam heatmap
+    """
+    # process image for inference
+    if process:
+        img_array = process_image(img)
+    else:
+        img_array = tf.convert_to_tensor(img)[None,]
+        if img.min()!=img.max():
+            img = (img - img.min())/(img.max() - img.min())
+            img = np.uint8(img*255.0)
+    # get prediction
+    with tf.GradientTape(persistent=True) as tape:
+        preds, feats = grad_model(img_array)
+        if pred_index is None:
+            pred_index = tf.argmax(preds[0])
+        class_channel = preds[:, pred_index]
+    # compute heatmap
+    grads = tape.gradient(class_channel, feats)
+    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
+    feats = feats[0]
+    heatmap = feats @ pooled_grads[..., tf.newaxis]
+    heatmap = tf.squeeze(heatmap)
+    heatmap = tf.maximum(heatmap, 0) / tf.math.reduce_max(heatmap)
+    heatmap = heatmap.numpy()
+    heatmap = np.uint8(255 * heatmap)
+    # colorize heatmap
+    cmap = cm.get_cmap(cmap)
+    colors = cmap(np.arange(256))[:, :3]
+    heatmap = colors[heatmap]
+    heatmap = array_to_img(heatmap)
+    heatmap = heatmap.resize((img.shape[1], img.shape[0]))
+    heatmap = img_to_array(heatmap)
+    overlay = img + heatmap * alpha
+    overlay = array_to_img(overlay)
+    # decode prediction
+    preds_decode = tf.keras.applications.imagenet_utils.decode_predictions(preds.numpy())[0] if decode else preds
+    return preds_decode, overlay

gcvit/version.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ __version__ = "1.0.3"

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+tensorflow==2.4.1
+tensorflow_addons==0.14.0
+gradio==3.1.0
+numpy
+matplotlib

setup.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from setuptools import setup, find_packages
+from codecs import open
+from os import path
+here = path.abspath(path.dirname(__file__))
+# Get the long description from the README file
+with open(path.join(here, "README.md"), encoding="utf-8") as f:
+    long_description = f.read()
+with open(path.join(here, 'requirements.txt')) as f:
+    install_requires = [x for x in f.read().splitlines() if len(x)]
+exec(open("gcvit/version.py").read())
+setup(
+    name="gcvit",
+    version=__version__,
+    description="Tensorflow 2.0 Implementation of GCViT: Global Context Vision Transformer. https://github.com/awsaf49/gcvit-tf",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/awsaf49/gcvit-tf",
+    author="Awsaf",
+    author_email="[email protected]",
+    classifiers=[
+        # How mature is this project? Common values are
+        #   3 - Alpha
+        #   4 - Beta
+        #   5 - Production/Stable
+        "Development Status :: 3 - Alpha",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: Apache Software License",
+        "Programming Language :: Python :: 3.6",
+        "Programming Language :: Python :: 3.7",
+        "Programming Language :: Python :: 3.8",
+        "Topic :: Scientific/Engineering",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Software Development",
+        "Topic :: Software Development :: Libraries",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+    ],
+    # Note that this is a string of words separated by whitespace, not a list.
+    keywords="tensorflow computer_vision image classification transformer",
+    packages=find_packages(exclude=["tests"]),
+    include_package_data=True,
+    install_requires=install_requires,
+    python_requires=">=3.6",
+    license="MIT",
+)