Upload AugViTForImageClassification

Browse files

Files changed (5) hide show

README.md +46 -0
augvit_config.py +31 -0
augvit_model.py +178 -0
config.json +20 -0
tf_model.h5 +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,46 @@

+---
+tags:
+- generated_from_keras_callback
+model-index:
+- name: TFaugvit
+  results: []
+---
+<!-- This model card has been generated automatically according to the information Keras had access to. You should
+probably proofread and complete it, then remove this comment. -->
+# TFaugvit
+This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
+It achieves the following results on the evaluation set:
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- optimizer: None
+- training_precision: float32
+### Training results
+### Framework versions
+- Transformers 4.33.2
+- TensorFlow 2.13.0
+- Tokenizers 0.13.3

augvit_config.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from transformers import PretrainedConfig
+from typing import List
+class AugViTConfig(PretrainedConfig):
+    model_type = "augvit"
+    def __init__(
+        self,
+        image_size: int = 32,
+        patch_size: int = 4,
+        num_classes: int = 10,
+        dim: int = 128,
+        depth: int = 6,
+        heads: int = 16,
+        mlp_dim: int = 256,
+        dropout: int = 0.1,
+        emb_dropout: int = 0.1,
+        **kwargs,
+    ):
+        self.image_size = image_size
+        self.patch_size = patch_size
+        self.num_classes = num_classes
+        self.dim = dim
+        self.depth = depth
+        self.heads = heads
+        self.mlp_dim = mlp_dim
+        self.dropout = dropout
+        self.emb_dropout = emb_dropout
+        super().__init__(**kwargs)

augvit_model.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import tensorflow as tf
+from tensorflow.keras import Model
+from tensorflow.keras.layers import Layer
+from tensorflow.keras import Sequential
+import tensorflow.keras.layers as nn
+from tensorflow import einsum
+from einops import rearrange, repeat
+from einops.layers.tensorflow import Rearrange
+import numpy as np
+def pair(t):
+    return t if isinstance(t, tuple) else (t, t)
+def gelu(x):
+    cdf = 0.5 * (1.0 + tf.tanh(
+        (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
+    return x * cdf
+class PreNorm(Layer):
+    def __init__(self,fn,name):
+        super(PreNorm, self).__init__(name=name)
+        self.norm = nn.LayerNormalization(name=f'{name}/layernorm')
+        self.fn = fn
+    def call(self, x, training=True):
+        return self.fn(self.norm(x), training=training)
+class MLP(Layer):
+    def __init__(self, dim, hidden_dim, name,dropout=0.0):
+        super(MLP, self).__init__(name=name)
+        self.net = Sequential([
+            nn.Dense(units=hidden_dim,activation=gelu,name=f'{name}/den1'),
+            nn.Dropout(rate=dropout,name=f'{name}/drop1'),
+            nn.Dense(units=dim,name=f'{name}/den2'),
+            nn.Dropout(rate=dropout,name=f'{name}/drop2')
+        ],name=f'{name}/seq1')
+    def call(self, x, training=True):
+        return self.net(x, training=training)
+class Attention(Layer):
+    def __init__(self, dim, name,heads=8, dim_head=64, dropout=0.0):
+        super(Attention, self).__init__(name=name)
+        inner_dim = dim_head * heads
+        project_out = not (heads == 1 and dim_head == dim)
+        self.heads = heads
+        self.scale = dim_head ** -0.5
+        self.attend = nn.Softmax(name=f'{name}/soft')
+        self.to_qkv = nn.Dense(units=inner_dim * 3, use_bias=False,name=f'{name}/den1')
+        if project_out:
+            self.to_out = [
+                nn.Dense(units=dim,name=f'{name}/den2'),
+                nn.Dropout(rate=dropout,name=f'{name}/drop1')
+            ]
+        else:
+            self.to_out = []
+        self.to_out = Sequential(self.to_out,name=f'{name}/seq')
+    def call(self, x, training=True):
+        qkv = self.to_qkv(x)
+        qkv = tf.split(qkv, num_or_size_splits=3, axis=-1)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=self.heads), qkv)
+        # dots = tf.matmul(q, tf.transpose(k, perm=[0, 1, 3, 2])) * self.scale
+        dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale
+        attn = self.attend(dots)
+        # x = tf.matmul(attn, v)
+        x = einsum('b h i j, b h j d -> b h i d', attn, v)
+        x = rearrange(x, 'b h n d -> b n (h d)')
+        x = self.to_out(x, training=training)
+        return x
+class Transformer(Layer):
+    def __init__(self, dim, depth, heads, dim_head, mlp_dim, name,dropout=0.0):
+        super(Transformer, self).__init__(True,name)
+        self.layers = []
+        for i in range(depth):
+            self.layers.append([
+                PreNorm(Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout,name=f'{name}/att{i}'),name=f'{name}preno{i}'),
+                PreNorm(nn.Dense(dim,activation=gelu,name=f'{name}/den{i}'),name=f'{name}preno1{i}'),
+                PreNorm(MLP(dim, mlp_dim, dropout=dropout,name=f'{name}/mlp{i}'),name=f'{name}preno2{i}'),
+                PreNorm(nn.Dense(dim,activation=gelu,name=f'{name}/den2{i}'),name=f'{name}preno3{i}'),
+            ])
+    def call(self, x, training=True):
+        for attn,aug_attn, mlp, augs in self.layers:
+            x = attn(x, training=training) + x + aug_attn(x, training=training)
+            x = mlp(x, training=training) + x + augs(x, training=training)
+        return x
+class AUGViT(Model):
+    def __init__(self, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim,name='augvit',
+                 pool='cls', dim_head=64, dropout=0.0, emb_dropout=0.0):
+        super(AUGViT, self).__init__(name=name)
+        image_height, image_width = pair(image_size)
+        patch_height, patch_width = pair(patch_size)
+        assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'
+        num_patches = (image_height // patch_height) * (image_width // patch_width)
+        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
+        self.patch_embedding = Sequential([
+            Rearrange('b (h p1) (w p2) c -> b (h w) (p1 p2 c)', p1=patch_height, p2=patch_width),
+            nn.Dense(units=dim,name='patchden')
+        ], name='patch_embedding')
+        self.pos_embedding = tf.Variable(initial_value=tf.random.normal([1, num_patches + 1, dim]),name='pos_emb')
+        self.cls_token = tf.Variable(initial_value=tf.random.normal([1, 1, dim]),name='cls')
+        self.dropout = nn.Dropout(rate=emb_dropout,name='drop')
+        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout=dropout,name='trans')
+        self.pool = pool
+        self.mlp_head = Sequential([
+            nn.LayerNormalization(name='layernorm'),
+            nn.Dense(units=num_classes,name='dense12')
+        ], name='mlp_head')
+    def call(self, img, training=True, **kwargs):
+        x = self.patch_embedding(img)
+        b, n, d = x.shape
+        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b=b)
+        x = tf.concat([cls_tokens, x], axis=1)
+        x += self.pos_embedding[:, :(n + 1)]
+        x = self.dropout(x, training=training)
+        x = self.transformer(x, training=training)
+        if self.pool == 'mean':
+            x = tf.reduce_mean(x, axis=1)
+        else:
+            x = x[:, 0]
+        x = self.mlp_head(x)
+        return x
+from transformers import TFPreTrainedModel
+from .augvit_config import AugViTConfig
+class AugViTForImageClassification(TFPreTrainedModel):
+    config_class = AugViTConfig
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = AUGViT(
+            image_size = config.image_size,
+            patch_size = config.patch_size,
+            num_classes = config.num_classes,
+            dim = config.dim,
+            depth = config.depth,
+            heads = config.heads,
+            mlp_dim = config.mlp_dim,
+            dropout = config.dropout,
+            emb_dropout =config.emb_dropout
+        )
+    def call(self, input,**kwargs):
+        logits = self.model(input)
+        return logits

config.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+  "architectures": [
+    "gViTForImageClassification"
+  ],
+  "auto_map": {
+    "AutoConfig": "augvit_config.AugViTConfig",
+    "TFAutoModelForImageClassification": "augvit_model.AugViTForImageClassification"
+  },
+  "depth": 1,
+  "dim": 128,
+  "dropout": 0.1,
+  "emb_dropout": 0.1,
+  "heads": 16,
+  "image_size": 32,
+  "mlp_dim": 256,
+  "model_type": "augvit",
+  "num_classes": 10,
+  "patch_size": 4,
+  "transformers_version": "4.33.2"
+}

tf_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d4253d3d1d254a42edd6baf604ba35d0317f6754cec27fe47739ba60908c235
+size 2613128