ppparkker commited on
Commit
e5ded48
·
verified ·
1 Parent(s): 344fd27

End of training

Browse files
Files changed (4) hide show
  1. README.md +11 -13
  2. config.json +5 -5
  3. model.safetensors +1 -1
  4. training_args.bin +1 -1
README.md CHANGED
@@ -16,8 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [team-lucid/hubert-base-korean](https://huggingface.co/team-lucid/hubert-base-korean) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 9.8047
20
- - Per: 2.5339
21
  - Learning Rate: 0.0000
22
 
23
  ## Model description
@@ -53,17 +53,15 @@ The following hyperparameters were used during training:
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Per | Rate |
55
  |:-------------:|:------:|:----:|:---------------:|:------:|:------:|
56
- | 10.7575 | 0.8547 | 50 | 11.3211 | 1.3831 | 0.0001 |
57
- | 6.5834 | 1.7094 | 100 | 10.3838 | 2.6287 | 0.0001 |
58
- | 5.7953 | 2.5641 | 150 | 10.0381 | 2.7805 | 0.0001 |
59
- | 5.4816 | 3.4188 | 200 | 9.9584 | 2.7593 | 0.0001 |
60
- | 5.4448 | 4.2735 | 250 | 9.9493 | 2.7246 | 0.0001 |
61
- | 5.1187 | 5.1282 | 300 | 9.9573 | 2.6414 | 0.0001 |
62
- | 4.5692 | 5.9829 | 350 | 9.9676 | 2.5711 | 0.0000 |
63
- | 5.1398 | 6.8376 | 400 | 9.8478 | 2.5756 | 0.0000 |
64
- | 4.7656 | 7.6923 | 450 | 9.8491 | 2.5501 | 0.0000 |
65
- | 5.0038 | 8.5470 | 500 | 9.8228 | 2.5269 | 0.0000 |
66
- | 5.3115 | 9.4017 | 550 | 9.8047 | 2.5339 | 0.0000 |
67
 
68
 
69
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [team-lucid/hubert-base-korean](https://huggingface.co/team-lucid/hubert-base-korean) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 9.0800
20
+ - Per: 0.8663
21
  - Learning Rate: 0.0000
22
 
23
  ## Model description
 
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Per | Rate |
55
  |:-------------:|:------:|:----:|:---------------:|:------:|:------:|
56
+ | 9.8433 | 1.0417 | 50 | 10.8927 | 1.9730 | 0.0001 |
57
+ | 6.2994 | 2.0833 | 100 | 10.1801 | 1.4889 | 0.0001 |
58
+ | 5.6979 | 3.125 | 150 | 9.8748 | 1.1627 | 0.0001 |
59
+ | 5.5696 | 4.1667 | 200 | 9.6279 | 0.9856 | 0.0001 |
60
+ | 5.5354 | 5.2083 | 250 | 9.4447 | 0.9282 | 0.0001 |
61
+ | 5.3749 | 6.25 | 300 | 9.3013 | 0.8952 | 4e-05 |
62
+ | 5.6517 | 7.2917 | 350 | 9.1784 | 0.8771 | 0.0000 |
63
+ | 5.1293 | 8.3333 | 400 | 9.0618 | 0.8661 | 0.0000 |
64
+ | 5.5912 | 9.375 | 450 | 9.0800 | 0.8663 | 0.0000 |
 
 
65
 
66
 
67
  ### Framework versions
config.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "_name_or_path": "team-lucid/hubert-base-korean",
3
- "activation_dropout": 0.05,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
  "HubertForCTC"
7
  ],
8
- "attention_dropout": 0.01,
9
  "auto_map": {
10
  "FlaxAutoModel": "team-lucid/hubert-base-korean--modeling_flax_hubert.FlaxHubertModel"
11
  },
@@ -46,13 +46,13 @@
46
  "feat_extract_activation": "gelu",
47
  "feat_extract_dropout": 0.0,
48
  "feat_extract_norm": "group",
49
- "feat_proj_dropout": 0.05,
50
  "feat_proj_layer_norm": true,
51
  "final_dropout": 0.1,
52
  "final_proj_size": 256,
53
  "gradient_checkpointing": false,
54
  "hidden_act": "gelu",
55
- "hidden_dropout": 0.05,
56
  "hidden_dropout_prob": 0.1,
57
  "hidden_size": 768,
58
  "initializer_range": 0.02,
@@ -64,7 +64,7 @@
64
  "mask_feature_prob": 0.0,
65
  "mask_time_length": 10,
66
  "mask_time_min_masks": 2,
67
- "mask_time_prob": 0.1,
68
  "model_type": "hubert",
69
  "num_attention_heads": 12,
70
  "num_conv_pos_embedding_groups": 16,
 
1
  {
2
  "_name_or_path": "team-lucid/hubert-base-korean",
3
+ "activation_dropout": 0.0,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
  "HubertForCTC"
7
  ],
8
+ "attention_dropout": 0.0,
9
  "auto_map": {
10
  "FlaxAutoModel": "team-lucid/hubert-base-korean--modeling_flax_hubert.FlaxHubertModel"
11
  },
 
46
  "feat_extract_activation": "gelu",
47
  "feat_extract_dropout": 0.0,
48
  "feat_extract_norm": "group",
49
+ "feat_proj_dropout": 0.0,
50
  "feat_proj_layer_norm": true,
51
  "final_dropout": 0.1,
52
  "final_proj_size": 256,
53
  "gradient_checkpointing": false,
54
  "hidden_act": "gelu",
55
+ "hidden_dropout": 0.0,
56
  "hidden_dropout_prob": 0.1,
57
  "hidden_size": 768,
58
  "initializer_range": 0.02,
 
64
  "mask_feature_prob": 0.0,
65
  "mask_time_length": 10,
66
  "mask_time_min_masks": 2,
67
+ "mask_time_prob": 0.05,
68
  "model_type": "hubert",
69
  "num_attention_heads": 12,
70
  "num_conv_pos_embedding_groups": 16,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11fedae78d85046497f3ce8a233a9904ce954e7e6eacd82c59034e120bace9f8
3
  size 377647624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:449046bda55eb2c295045fad580dd3c4d47d03e9557b8557af628039d2b1c7e0
3
  size 377647624
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05e8c0e0ddea3f29f722a59cbf32ca7d2cdcfd90d776f426cefb721f35733a36
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8bda180d779a216d8d6970fe6011f6e2233e2c8b5f5621d73d6325251a38957
3
  size 5240