tensorgirl commited on
Commit
f203678
·
1 Parent(s): 8522232

Upload AugViTForImageClassification

Browse files
Files changed (5) hide show
  1. README.md +46 -0
  2. augvit_config.py +31 -0
  3. augvit_model.py +178 -0
  4. config.json +20 -0
  5. tf_model.h5 +3 -0
README.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_keras_callback
4
+ model-index:
5
+ - name: TFaugvit
6
+ results: []
7
+ ---
8
+
9
+ <!-- This model card has been generated automatically according to the information Keras had access to. You should
10
+ probably proofread and complete it, then remove this comment. -->
11
+
12
+ # TFaugvit
13
+
14
+ This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
15
+ It achieves the following results on the evaluation set:
16
+
17
+
18
+ ## Model description
19
+
20
+ More information needed
21
+
22
+ ## Intended uses & limitations
23
+
24
+ More information needed
25
+
26
+ ## Training and evaluation data
27
+
28
+ More information needed
29
+
30
+ ## Training procedure
31
+
32
+ ### Training hyperparameters
33
+
34
+ The following hyperparameters were used during training:
35
+ - optimizer: None
36
+ - training_precision: float32
37
+
38
+ ### Training results
39
+
40
+
41
+
42
+ ### Framework versions
43
+
44
+ - Transformers 4.33.2
45
+ - TensorFlow 2.13.0
46
+ - Tokenizers 0.13.3
augvit_config.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+ from typing import List
3
+
4
+
5
+ class AugViTConfig(PretrainedConfig):
6
+ model_type = "augvit"
7
+
8
+ def __init__(
9
+ self,
10
+ image_size: int = 32,
11
+ patch_size: int = 4,
12
+ num_classes: int = 10,
13
+ dim: int = 128,
14
+ depth: int = 6,
15
+ heads: int = 16,
16
+ mlp_dim: int = 256,
17
+ dropout: int = 0.1,
18
+ emb_dropout: int = 0.1,
19
+ **kwargs,
20
+ ):
21
+
22
+ self.image_size = image_size
23
+ self.patch_size = patch_size
24
+ self.num_classes = num_classes
25
+ self.dim = dim
26
+ self.depth = depth
27
+ self.heads = heads
28
+ self.mlp_dim = mlp_dim
29
+ self.dropout = dropout
30
+ self.emb_dropout = emb_dropout
31
+ super().__init__(**kwargs)
augvit_model.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ from tensorflow.keras import Model
3
+ from tensorflow.keras.layers import Layer
4
+ from tensorflow.keras import Sequential
5
+ import tensorflow.keras.layers as nn
6
+
7
+ from tensorflow import einsum
8
+ from einops import rearrange, repeat
9
+ from einops.layers.tensorflow import Rearrange
10
+ import numpy as np
11
+
12
+
13
+ def pair(t):
14
+ return t if isinstance(t, tuple) else (t, t)
15
+ def gelu(x):
16
+
17
+ cdf = 0.5 * (1.0 + tf.tanh(
18
+ (np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
19
+ return x * cdf
20
+
21
+ class PreNorm(Layer):
22
+ def __init__(self,fn,name):
23
+ super(PreNorm, self).__init__(name=name)
24
+ self.norm = nn.LayerNormalization(name=f'{name}/layernorm')
25
+ self.fn = fn
26
+
27
+ def call(self, x, training=True):
28
+ return self.fn(self.norm(x), training=training)
29
+
30
+
31
+ class MLP(Layer):
32
+ def __init__(self, dim, hidden_dim, name,dropout=0.0):
33
+ super(MLP, self).__init__(name=name)
34
+ self.net = Sequential([
35
+ nn.Dense(units=hidden_dim,activation=gelu,name=f'{name}/den1'),
36
+
37
+ nn.Dropout(rate=dropout,name=f'{name}/drop1'),
38
+ nn.Dense(units=dim,name=f'{name}/den2'),
39
+ nn.Dropout(rate=dropout,name=f'{name}/drop2')
40
+ ],name=f'{name}/seq1')
41
+
42
+ def call(self, x, training=True):
43
+ return self.net(x, training=training)
44
+
45
+ class Attention(Layer):
46
+ def __init__(self, dim, name,heads=8, dim_head=64, dropout=0.0):
47
+ super(Attention, self).__init__(name=name)
48
+ inner_dim = dim_head * heads
49
+ project_out = not (heads == 1 and dim_head == dim)
50
+ self.heads = heads
51
+ self.scale = dim_head ** -0.5
52
+
53
+ self.attend = nn.Softmax(name=f'{name}/soft')
54
+ self.to_qkv = nn.Dense(units=inner_dim * 3, use_bias=False,name=f'{name}/den1')
55
+
56
+ if project_out:
57
+ self.to_out = [
58
+ nn.Dense(units=dim,name=f'{name}/den2'),
59
+ nn.Dropout(rate=dropout,name=f'{name}/drop1')
60
+ ]
61
+ else:
62
+ self.to_out = []
63
+ self.to_out = Sequential(self.to_out,name=f'{name}/seq')
64
+
65
+ def call(self, x, training=True):
66
+ qkv = self.to_qkv(x)
67
+ qkv = tf.split(qkv, num_or_size_splits=3, axis=-1)
68
+ q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=self.heads), qkv)
69
+
70
+ # dots = tf.matmul(q, tf.transpose(k, perm=[0, 1, 3, 2])) * self.scale
71
+ dots = einsum('b h i d, b h j d -> b h i j', q, k) * self.scale
72
+ attn = self.attend(dots)
73
+
74
+ # x = tf.matmul(attn, v)
75
+ x = einsum('b h i j, b h j d -> b h i d', attn, v)
76
+ x = rearrange(x, 'b h n d -> b n (h d)')
77
+ x = self.to_out(x, training=training)
78
+
79
+ return x
80
+
81
+ class Transformer(Layer):
82
+ def __init__(self, dim, depth, heads, dim_head, mlp_dim, name,dropout=0.0):
83
+ super(Transformer, self).__init__(True,name)
84
+
85
+ self.layers = []
86
+
87
+ for i in range(depth):
88
+ self.layers.append([
89
+ PreNorm(Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout,name=f'{name}/att{i}'),name=f'{name}preno{i}'),
90
+ PreNorm(nn.Dense(dim,activation=gelu,name=f'{name}/den{i}'),name=f'{name}preno1{i}'),
91
+ PreNorm(MLP(dim, mlp_dim, dropout=dropout,name=f'{name}/mlp{i}'),name=f'{name}preno2{i}'),
92
+ PreNorm(nn.Dense(dim,activation=gelu,name=f'{name}/den2{i}'),name=f'{name}preno3{i}'),
93
+ ])
94
+
95
+
96
+ def call(self, x, training=True):
97
+ for attn,aug_attn, mlp, augs in self.layers:
98
+ x = attn(x, training=training) + x + aug_attn(x, training=training)
99
+ x = mlp(x, training=training) + x + augs(x, training=training)
100
+ return x
101
+
102
+ class AUGViT(Model):
103
+ def __init__(self, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim,name='augvit',
104
+ pool='cls', dim_head=64, dropout=0.0, emb_dropout=0.0):
105
+
106
+ super(AUGViT, self).__init__(name=name)
107
+
108
+ image_height, image_width = pair(image_size)
109
+ patch_height, patch_width = pair(patch_size)
110
+
111
+ assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'
112
+
113
+ num_patches = (image_height // patch_height) * (image_width // patch_width)
114
+ assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
115
+
116
+ self.patch_embedding = Sequential([
117
+ Rearrange('b (h p1) (w p2) c -> b (h w) (p1 p2 c)', p1=patch_height, p2=patch_width),
118
+ nn.Dense(units=dim,name='patchden')
119
+ ], name='patch_embedding')
120
+
121
+ self.pos_embedding = tf.Variable(initial_value=tf.random.normal([1, num_patches + 1, dim]),name='pos_emb')
122
+ self.cls_token = tf.Variable(initial_value=tf.random.normal([1, 1, dim]),name='cls')
123
+ self.dropout = nn.Dropout(rate=emb_dropout,name='drop')
124
+
125
+ self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout=dropout,name='trans')
126
+
127
+ self.pool = pool
128
+
129
+ self.mlp_head = Sequential([
130
+ nn.LayerNormalization(name='layernorm'),
131
+ nn.Dense(units=num_classes,name='dense12')
132
+ ], name='mlp_head')
133
+
134
+ def call(self, img, training=True, **kwargs):
135
+ x = self.patch_embedding(img)
136
+ b, n, d = x.shape
137
+
138
+ cls_tokens = repeat(self.cls_token, '() n d -> b n d', b=b)
139
+ x = tf.concat([cls_tokens, x], axis=1)
140
+ x += self.pos_embedding[:, :(n + 1)]
141
+ x = self.dropout(x, training=training)
142
+
143
+ x = self.transformer(x, training=training)
144
+
145
+ if self.pool == 'mean':
146
+ x = tf.reduce_mean(x, axis=1)
147
+ else:
148
+ x = x[:, 0]
149
+
150
+ x = self.mlp_head(x)
151
+
152
+ return x
153
+
154
+
155
+
156
+
157
+ from transformers import TFPreTrainedModel
158
+ from .augvit_config import AugViTConfig
159
+
160
+ class AugViTForImageClassification(TFPreTrainedModel):
161
+ config_class = AugViTConfig
162
+ def __init__(self, config):
163
+ super().__init__(config)
164
+ self.model = AUGViT(
165
+ image_size = config.image_size,
166
+ patch_size = config.patch_size,
167
+ num_classes = config.num_classes,
168
+ dim = config.dim,
169
+ depth = config.depth,
170
+ heads = config.heads,
171
+ mlp_dim = config.mlp_dim,
172
+ dropout = config.dropout,
173
+ emb_dropout =config.emb_dropout
174
+ )
175
+
176
+ def call(self, input,**kwargs):
177
+ logits = self.model(input)
178
+ return logits
config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "gViTForImageClassification"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "augvit_config.AugViTConfig",
7
+ "TFAutoModelForImageClassification": "augvit_model.AugViTForImageClassification"
8
+ },
9
+ "depth": 1,
10
+ "dim": 128,
11
+ "dropout": 0.1,
12
+ "emb_dropout": 0.1,
13
+ "heads": 16,
14
+ "image_size": 32,
15
+ "mlp_dim": 256,
16
+ "model_type": "augvit",
17
+ "num_classes": 10,
18
+ "patch_size": 4,
19
+ "transformers_version": "4.33.2"
20
+ }
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d4253d3d1d254a42edd6baf604ba35d0317f6754cec27fe47739ba60908c235
3
+ size 2613128