dima806 commited on
Commit
6ed5990
·
verified ·
1 Parent(s): a881dac

Upload folder using huggingface_hub

Browse files
checkpoint-15190/config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Down",
13
+ "1": "Left Swipe",
14
+ "2": "Right Swipe",
15
+ "3": "Stop",
16
+ "4": "Stop Gesture",
17
+ "5": "Swipe",
18
+ "6": "Thumbs Down",
19
+ "7": "Thumbs Up",
20
+ "8": "Up"
21
+ },
22
+ "image_size": 224,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "Down": 0,
27
+ "Left Swipe": 1,
28
+ "Right Swipe": 2,
29
+ "Stop": 3,
30
+ "Stop Gesture": 4,
31
+ "Swipe": 5,
32
+ "Thumbs Down": 6,
33
+ "Thumbs Up": 7,
34
+ "Up": 8
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "model_type": "vit",
38
+ "num_attention_heads": 12,
39
+ "num_channels": 3,
40
+ "num_hidden_layers": 12,
41
+ "patch_size": 16,
42
+ "problem_type": "single_label_classification",
43
+ "qkv_bias": true,
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.48.1"
46
+ }
checkpoint-15190/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79bb991cfdeabcaebb97dfb043d3bdc3aaf43549e81ce77f9b3516b1c47410ff
3
+ size 343245508
checkpoint-15190/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66585cc84314597e9f83c866cc3a45589d70485d4c14887c504cadf0152eb202
3
+ size 686611898
checkpoint-15190/preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
checkpoint-15190/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a303ba2ab58b508c69de3f998fdc7fd9d7e762ee82e6c757111acf74c5df157
3
+ size 14244
checkpoint-15190/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8f632990a80e8bdd73cf5af524bdc72f4527d9e44b0fc21b8981de9abbaed27
3
+ size 1064
checkpoint-15190/trainer_state.json ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.15759548544883728,
3
+ "best_model_checkpoint": "smart_tv_hand_gestures_image_detection/checkpoint-15190",
4
+ "epoch": 10.0,
5
+ "eval_steps": 500,
6
+ "global_step": 15190,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.32916392363396973,
13
+ "grad_norm": 1.4565895795822144,
14
+ "learning_rate": 9.702774108322324e-07,
15
+ "loss": 2.1185,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.6583278472679395,
20
+ "grad_norm": 1.595917820930481,
21
+ "learning_rate": 9.372523117569352e-07,
22
+ "loss": 1.8436,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.9874917709019092,
27
+ "grad_norm": 2.2012250423431396,
28
+ "learning_rate": 9.04227212681638e-07,
29
+ "loss": 1.4844,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 1.0,
34
+ "eval_accuracy": 0.9256790123456791,
35
+ "eval_loss": 1.3686660528182983,
36
+ "eval_model_preparation_time": 0.0043,
37
+ "eval_runtime": 139.1921,
38
+ "eval_samples_per_second": 87.289,
39
+ "eval_steps_per_second": 10.913,
40
+ "step": 1519
41
+ },
42
+ {
43
+ "epoch": 1.316655694535879,
44
+ "grad_norm": 1.8626253604888916,
45
+ "learning_rate": 8.712021136063408e-07,
46
+ "loss": 1.1357,
47
+ "step": 2000
48
+ },
49
+ {
50
+ "epoch": 1.6458196181698486,
51
+ "grad_norm": 1.5412793159484863,
52
+ "learning_rate": 8.381770145310435e-07,
53
+ "loss": 0.8734,
54
+ "step": 2500
55
+ },
56
+ {
57
+ "epoch": 1.9749835418038182,
58
+ "grad_norm": 1.9414352178573608,
59
+ "learning_rate": 8.051519154557464e-07,
60
+ "loss": 0.6898,
61
+ "step": 3000
62
+ },
63
+ {
64
+ "epoch": 2.0,
65
+ "eval_accuracy": 0.98559670781893,
66
+ "eval_loss": 0.6899833679199219,
67
+ "eval_model_preparation_time": 0.0043,
68
+ "eval_runtime": 140.1792,
69
+ "eval_samples_per_second": 86.675,
70
+ "eval_steps_per_second": 10.836,
71
+ "step": 3038
72
+ },
73
+ {
74
+ "epoch": 2.3041474654377883,
75
+ "grad_norm": 1.410414218902588,
76
+ "learning_rate": 7.72126816380449e-07,
77
+ "loss": 0.5598,
78
+ "step": 3500
79
+ },
80
+ {
81
+ "epoch": 2.633311389071758,
82
+ "grad_norm": 1.2400822639465332,
83
+ "learning_rate": 7.391017173051519e-07,
84
+ "loss": 0.4651,
85
+ "step": 4000
86
+ },
87
+ {
88
+ "epoch": 2.9624753127057275,
89
+ "grad_norm": 0.9760797023773193,
90
+ "learning_rate": 7.060766182298547e-07,
91
+ "loss": 0.3972,
92
+ "step": 4500
93
+ },
94
+ {
95
+ "epoch": 3.0,
96
+ "eval_accuracy": 0.9925925925925926,
97
+ "eval_loss": 0.42788437008857727,
98
+ "eval_model_preparation_time": 0.0043,
99
+ "eval_runtime": 138.4606,
100
+ "eval_samples_per_second": 87.751,
101
+ "eval_steps_per_second": 10.971,
102
+ "step": 4557
103
+ },
104
+ {
105
+ "epoch": 3.291639236339697,
106
+ "grad_norm": 0.7729864120483398,
107
+ "learning_rate": 6.730515191545574e-07,
108
+ "loss": 0.3433,
109
+ "step": 5000
110
+ },
111
+ {
112
+ "epoch": 3.6208031599736668,
113
+ "grad_norm": 0.7019414305686951,
114
+ "learning_rate": 6.400264200792602e-07,
115
+ "loss": 0.3052,
116
+ "step": 5500
117
+ },
118
+ {
119
+ "epoch": 3.9499670836076364,
120
+ "grad_norm": 0.6397891044616699,
121
+ "learning_rate": 6.070013210039629e-07,
122
+ "loss": 0.2739,
123
+ "step": 6000
124
+ },
125
+ {
126
+ "epoch": 4.0,
127
+ "eval_accuracy": 0.9956378600823045,
128
+ "eval_loss": 0.3098331689834595,
129
+ "eval_model_preparation_time": 0.0043,
130
+ "eval_runtime": 140.6468,
131
+ "eval_samples_per_second": 86.387,
132
+ "eval_steps_per_second": 10.8,
133
+ "step": 6076
134
+ },
135
+ {
136
+ "epoch": 4.279131007241606,
137
+ "grad_norm": 0.9204681515693665,
138
+ "learning_rate": 5.739762219286658e-07,
139
+ "loss": 0.2502,
140
+ "step": 6500
141
+ },
142
+ {
143
+ "epoch": 4.6082949308755765,
144
+ "grad_norm": 0.8557950854301453,
145
+ "learning_rate": 5.409511228533686e-07,
146
+ "loss": 0.2293,
147
+ "step": 7000
148
+ },
149
+ {
150
+ "epoch": 4.937458854509546,
151
+ "grad_norm": 0.5376387238502502,
152
+ "learning_rate": 5.079260237780713e-07,
153
+ "loss": 0.2135,
154
+ "step": 7500
155
+ },
156
+ {
157
+ "epoch": 5.0,
158
+ "eval_accuracy": 0.9967901234567901,
159
+ "eval_loss": 0.2464514970779419,
160
+ "eval_model_preparation_time": 0.0043,
161
+ "eval_runtime": 143.0601,
162
+ "eval_samples_per_second": 84.929,
163
+ "eval_steps_per_second": 10.618,
164
+ "step": 7595
165
+ },
166
+ {
167
+ "epoch": 5.266622778143516,
168
+ "grad_norm": 2.1749770641326904,
169
+ "learning_rate": 4.749009247027741e-07,
170
+ "loss": 0.1991,
171
+ "step": 8000
172
+ },
173
+ {
174
+ "epoch": 5.595786701777485,
175
+ "grad_norm": 0.4924432039260864,
176
+ "learning_rate": 4.418758256274769e-07,
177
+ "loss": 0.1887,
178
+ "step": 8500
179
+ },
180
+ {
181
+ "epoch": 5.924950625411455,
182
+ "grad_norm": 0.44754961133003235,
183
+ "learning_rate": 4.088507265521796e-07,
184
+ "loss": 0.1787,
185
+ "step": 9000
186
+ },
187
+ {
188
+ "epoch": 6.0,
189
+ "eval_accuracy": 0.9973662551440329,
190
+ "eval_loss": 0.20901748538017273,
191
+ "eval_model_preparation_time": 0.0043,
192
+ "eval_runtime": 143.6019,
193
+ "eval_samples_per_second": 84.609,
194
+ "eval_steps_per_second": 10.578,
195
+ "step": 9114
196
+ },
197
+ {
198
+ "epoch": 6.254114549045425,
199
+ "grad_norm": 1.0733314752578735,
200
+ "learning_rate": 3.758256274768824e-07,
201
+ "loss": 0.1706,
202
+ "step": 9500
203
+ },
204
+ {
205
+ "epoch": 6.583278472679394,
206
+ "grad_norm": 0.45371586084365845,
207
+ "learning_rate": 3.428005284015852e-07,
208
+ "loss": 0.1637,
209
+ "step": 10000
210
+ },
211
+ {
212
+ "epoch": 6.912442396313364,
213
+ "grad_norm": 0.3785471022129059,
214
+ "learning_rate": 3.09775429326288e-07,
215
+ "loss": 0.1571,
216
+ "step": 10500
217
+ },
218
+ {
219
+ "epoch": 7.0,
220
+ "eval_accuracy": 0.9988477366255144,
221
+ "eval_loss": 0.1836911290884018,
222
+ "eval_model_preparation_time": 0.0043,
223
+ "eval_runtime": 141.3146,
224
+ "eval_samples_per_second": 85.978,
225
+ "eval_steps_per_second": 10.749,
226
+ "step": 10633
227
+ },
228
+ {
229
+ "epoch": 7.2416063199473335,
230
+ "grad_norm": 0.774605393409729,
231
+ "learning_rate": 2.7675033025099076e-07,
232
+ "loss": 0.1517,
233
+ "step": 11000
234
+ },
235
+ {
236
+ "epoch": 7.570770243581303,
237
+ "grad_norm": 0.49195748567581177,
238
+ "learning_rate": 2.437252311756935e-07,
239
+ "loss": 0.1477,
240
+ "step": 11500
241
+ },
242
+ {
243
+ "epoch": 7.899934167215273,
244
+ "grad_norm": 0.32700517773628235,
245
+ "learning_rate": 2.107001321003963e-07,
246
+ "loss": 0.1439,
247
+ "step": 12000
248
+ },
249
+ {
250
+ "epoch": 8.0,
251
+ "eval_accuracy": 0.9981069958847737,
252
+ "eval_loss": 0.17038238048553467,
253
+ "eval_model_preparation_time": 0.0043,
254
+ "eval_runtime": 142.0518,
255
+ "eval_samples_per_second": 85.532,
256
+ "eval_steps_per_second": 10.693,
257
+ "step": 12152
258
+ },
259
+ {
260
+ "epoch": 8.229098090849243,
261
+ "grad_norm": 0.36188894510269165,
262
+ "learning_rate": 1.7767503302509906e-07,
263
+ "loss": 0.1402,
264
+ "step": 12500
265
+ },
266
+ {
267
+ "epoch": 8.558262014483212,
268
+ "grad_norm": 0.38871052861213684,
269
+ "learning_rate": 1.4464993394980185e-07,
270
+ "loss": 0.1385,
271
+ "step": 13000
272
+ },
273
+ {
274
+ "epoch": 8.887425938117183,
275
+ "grad_norm": 0.33406054973602295,
276
+ "learning_rate": 1.1162483487450462e-07,
277
+ "loss": 0.1359,
278
+ "step": 13500
279
+ },
280
+ {
281
+ "epoch": 9.0,
282
+ "eval_accuracy": 0.9986008230452675,
283
+ "eval_loss": 0.16055729985237122,
284
+ "eval_model_preparation_time": 0.0043,
285
+ "eval_runtime": 143.3485,
286
+ "eval_samples_per_second": 84.758,
287
+ "eval_steps_per_second": 10.597,
288
+ "step": 13671
289
+ },
290
+ {
291
+ "epoch": 9.216589861751151,
292
+ "grad_norm": 0.32042938470840454,
293
+ "learning_rate": 7.85997357992074e-08,
294
+ "loss": 0.1341,
295
+ "step": 14000
296
+ },
297
+ {
298
+ "epoch": 9.545753785385122,
299
+ "grad_norm": 0.29274484515190125,
300
+ "learning_rate": 4.557463672391017e-08,
301
+ "loss": 0.1333,
302
+ "step": 14500
303
+ },
304
+ {
305
+ "epoch": 9.874917709019092,
306
+ "grad_norm": 0.3197895288467407,
307
+ "learning_rate": 1.2549537648612946e-08,
308
+ "loss": 0.1331,
309
+ "step": 15000
310
+ },
311
+ {
312
+ "epoch": 10.0,
313
+ "eval_accuracy": 0.9990123456790123,
314
+ "eval_loss": 0.15759548544883728,
315
+ "eval_model_preparation_time": 0.0043,
316
+ "eval_runtime": 142.0855,
317
+ "eval_samples_per_second": 85.512,
318
+ "eval_steps_per_second": 10.691,
319
+ "step": 15190
320
+ }
321
+ ],
322
+ "logging_steps": 500,
323
+ "max_steps": 15190,
324
+ "num_input_tokens_seen": 0,
325
+ "num_train_epochs": 10,
326
+ "save_steps": 500,
327
+ "stateful_callbacks": {
328
+ "TrainerControl": {
329
+ "args": {
330
+ "should_epoch_stop": false,
331
+ "should_evaluate": false,
332
+ "should_log": false,
333
+ "should_save": true,
334
+ "should_training_stop": true
335
+ },
336
+ "attributes": {}
337
+ }
338
+ },
339
+ "total_flos": 3.76634697648169e+19,
340
+ "train_batch_size": 32,
341
+ "trial_name": null,
342
+ "trial_params": null
343
+ }
checkpoint-15190/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e7dd678aabd9d0ea23f0200be2e7cc55ac5dee2fd8ecb3c063ce03e74171147
3
+ size 5304
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Down",
13
+ "1": "Left Swipe",
14
+ "2": "Right Swipe",
15
+ "3": "Stop",
16
+ "4": "Stop Gesture",
17
+ "5": "Swipe",
18
+ "6": "Thumbs Down",
19
+ "7": "Thumbs Up",
20
+ "8": "Up"
21
+ },
22
+ "image_size": 224,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 3072,
25
+ "label2id": {
26
+ "Down": 0,
27
+ "Left Swipe": 1,
28
+ "Right Swipe": 2,
29
+ "Stop": 3,
30
+ "Stop Gesture": 4,
31
+ "Swipe": 5,
32
+ "Thumbs Down": 6,
33
+ "Thumbs Up": 7,
34
+ "Up": 8
35
+ },
36
+ "layer_norm_eps": 1e-12,
37
+ "model_type": "vit",
38
+ "num_attention_heads": 12,
39
+ "num_channels": 3,
40
+ "num_hidden_layers": 12,
41
+ "patch_size": 16,
42
+ "problem_type": "single_label_classification",
43
+ "qkv_bias": true,
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.48.1"
46
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79bb991cfdeabcaebb97dfb043d3bdc3aaf43549e81ce77f9b3516b1c47410ff
3
+ size 343245508
preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "ViTImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 224,
21
+ "width": 224
22
+ }
23
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e7dd678aabd9d0ea23f0200be2e7cc55ac5dee2fd8ecb3c063ce03e74171147
3
+ size 5304