ppparkker
/

for_test13

@@ -16,8 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [team-lucid/hubert-base-korean](https://huggingface.co/team-lucid/hubert-base-korean) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 9.8047
-- Per: 2.5339
 - Learning Rate: 0.0000
 ## Model description
@@ -53,17 +53,15 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Per    | Rate   |
 |:-------------:|:------:|:----:|:---------------:|:------:|:------:|
-| 10.7575       | 0.8547 | 50   | 11.3211         | 1.3831 | 0.0001 |
-| 6.5834        | 1.7094 | 100  | 10.3838         | 2.6287 | 0.0001 |
-| 5.7953        | 2.5641 | 150  | 10.0381         | 2.7805 | 0.0001 |
-| 5.4816        | 3.4188 | 200  | 9.9584          | 2.7593 | 0.0001 |
-| 5.4448        | 4.2735 | 250  | 9.9493          | 2.7246 | 0.0001 |
-| 5.1187        | 5.1282 | 300  | 9.9573          | 2.6414 | 0.0001 |
-| 4.5692        | 5.9829 | 350  | 9.9676          | 2.5711 | 0.0000 |
-| 5.1398        | 6.8376 | 400  | 9.8478          | 2.5756 | 0.0000 |
-| 4.7656        | 7.6923 | 450  | 9.8491          | 2.5501 | 0.0000 |
-| 5.0038        | 8.5470 | 500  | 9.8228          | 2.5269 | 0.0000 |
-| 5.3115        | 9.4017 | 550  | 9.8047          | 2.5339 | 0.0000 |
 ### Framework versions

 This model is a fine-tuned version of [team-lucid/hubert-base-korean](https://huggingface.co/team-lucid/hubert-base-korean) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 9.0800
+- Per: 0.8663
 - Learning Rate: 0.0000
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss | Per    | Rate   |
 |:-------------:|:------:|:----:|:---------------:|:------:|:------:|
+| 9.8433        | 1.0417 | 50   | 10.8927         | 1.9730 | 0.0001 |
+| 6.2994        | 2.0833 | 100  | 10.1801         | 1.4889 | 0.0001 |
+| 5.6979        | 3.125  | 150  | 9.8748          | 1.1627 | 0.0001 |
+| 5.5696        | 4.1667 | 200  | 9.6279          | 0.9856 | 0.0001 |
+| 5.5354        | 5.2083 | 250  | 9.4447          | 0.9282 | 0.0001 |
+| 5.3749        | 6.25   | 300  | 9.3013          | 0.8952 | 4e-05  |
+| 5.6517        | 7.2917 | 350  | 9.1784          | 0.8771 | 0.0000 |
+| 5.1293        | 8.3333 | 400  | 9.0618          | 0.8661 | 0.0000 |
+| 5.5912        | 9.375  | 450  | 9.0800          | 0.8663 | 0.0000 |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "_name_or_path": "team-lucid/hubert-base-korean",
-  "activation_dropout": 0.05,
   "apply_spec_augment": true,
   "architectures": [
     "HubertForCTC"
   ],
-  "attention_dropout": 0.01,
   "auto_map": {
     "FlaxAutoModel": "team-lucid/hubert-base-korean--modeling_flax_hubert.FlaxHubertModel"
   },
@@ -46,13 +46,13 @@
   "feat_extract_activation": "gelu",
   "feat_extract_dropout": 0.0,
   "feat_extract_norm": "group",
-  "feat_proj_dropout": 0.05,
   "feat_proj_layer_norm": true,
   "final_dropout": 0.1,
   "final_proj_size": 256,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
-  "hidden_dropout": 0.05,
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "initializer_range": 0.02,
@@ -64,7 +64,7 @@
   "mask_feature_prob": 0.0,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
-  "mask_time_prob": 0.1,
   "model_type": "hubert",
   "num_attention_heads": 12,
   "num_conv_pos_embedding_groups": 16,

 {
   "_name_or_path": "team-lucid/hubert-base-korean",
+  "activation_dropout": 0.0,
   "apply_spec_augment": true,
   "architectures": [
     "HubertForCTC"
   ],
+  "attention_dropout": 0.0,
   "auto_map": {
     "FlaxAutoModel": "team-lucid/hubert-base-korean--modeling_flax_hubert.FlaxHubertModel"
   },
   "feat_extract_activation": "gelu",
   "feat_extract_dropout": 0.0,
   "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.0,
   "feat_proj_layer_norm": true,
   "final_dropout": 0.1,
   "final_proj_size": 256,
   "gradient_checkpointing": false,
   "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "mask_feature_prob": 0.0,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
   "model_type": "hubert",
   "num_attention_heads": 12,
   "num_conv_pos_embedding_groups": 16,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11fedae78d85046497f3ce8a233a9904ce954e7e6eacd82c59034e120bace9f8
 size 377647624

 version https://git-lfs.github.com/spec/v1
+oid sha256:449046bda55eb2c295045fad580dd3c4d47d03e9557b8557af628039d2b1c7e0
 size 377647624

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05e8c0e0ddea3f29f722a59cbf32ca7d2cdcfd90d776f426cefb721f35733a36
 size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8bda180d779a216d8d6970fe6011f6e2233e2c8b5f5621d73d6325251a38957
 size 5240