Model save

Browse files

Files changed (5) hide show

README.md +28 -20
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/adapter_config.json +59 -0
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/head_config.json +15 -0
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/pytorch_adapter.bin +3 -0
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/pytorch_model_head.bin +3 -0

README.md CHANGED Viewed

@@ -14,11 +14,11 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [facebook/mbart-large-cc25](https://huggingface.co/facebook/mbart-large-cc25) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.2042
-- Gen Len: 18.5844
-- Rouge-1: 34.6893
-- Rouge-2: 17.6676
-- Rouge-l: 34.0228
 ## Model description
@@ -49,25 +49,33 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss | Gen Len | Rouge-1 | Rouge-2 | Rouge-l |
-|:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:-------:|
-| No log        | 1.0   | 642  | 3.6377          | 22.68   | 23.5122 | 9.8274  | 22.6444 |
-| No log        | 2.0   | 1284 | 3.4690          | 24.1111 | 26.1024 | 11.7671 | 25.2784 |
-| No log        | 3.0   | 1926 | 3.2594          | 23.4756 | 30.4311 | 14.7772 | 29.7887 |
-| No log        | 4.0   | 2568 | 3.2171          | 21.3644 | 33.2453 | 16.8783 | 32.3833 |
-| No log        | 5.0   | 3210 | 3.1916          | 19.3667 | 33.1313 | 15.8622 | 32.4001 |
-| No log        | 6.0   | 3852 | 3.1366          | 21.8133 | 32.3945 | 16.0472 | 31.6275 |
-| No log        | 7.0   | 4494 | 3.1354          | 20.72   | 33.2417 | 16.5622 | 32.4859 |
-| 3.4117        | 8.0   | 5136 | 3.1412          | 19.2578 | 34.706  | 17.7827 | 33.877  |
-| 3.4117        | 9.0   | 5778 | 3.1538          | 18.1556 | 32.9258 | 17.2139 | 32.2229 |
-| 3.4117        | 10.0  | 6420 | 3.1681          | 19.1867 | 34.1901 | 17.269  | 33.3286 |
-| 3.4117        | 11.0  | 7062 | 3.1708          | 18.2978 | 34.4327 | 17.98   | 33.6562 |
-| 3.4117        | 12.0  | 7704 | 3.2042          | 18.5844 | 34.6893 | 17.6676 | 34.0228 |
 ### Framework versions
 - Transformers 4.35.2
 - Pytorch 2.2.1+cu121
-- Datasets 2.18.0
 - Tokenizers 0.15.2

 This model is a fine-tuned version of [facebook/mbart-large-cc25](https://huggingface.co/facebook/mbart-large-cc25) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.2706
+- Gen Len: 26.5311
+- Rouge-1: 74.3728
+- Rouge-2: 66.4207
+- Rouge-l: 73.2288
 ## Model description
 ### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Gen Len | Rouge-1 | Rouge-2 | Rouge-l |
+|:-------------:|:-----:|:-----:|:---------------:|:-------:|:-------:|:-------:|:-------:|
+| No log        | 1.0   | 642   | 3.0984          | 27.1578 | 35.9221 | 20.801  | 33.0593 |
+| No log        | 2.0   | 1284  | 2.8489          | 23.6511 | 39.2966 | 22.654  | 37.1498 |
+| No log        | 3.0   | 1926  | 2.5933          | 26.7267 | 47.1185 | 30.2869 | 44.1491 |
+| No log        | 4.0   | 2568  | 2.4764          | 25.22   | 50.0244 | 34.9212 | 48.057  |
+| No log        | 5.0   | 3210  | 2.4122          | 26.6533 | 54.5479 | 40.7397 | 52.5031 |
+| No log        | 6.0   | 3852  | 2.3427          | 26.2667 | 57.6653 | 44.2728 | 55.9363 |
+| No log        | 7.0   | 4494  | 2.3345          | 26.1711 | 62.4422 | 50.3255 | 60.7065 |
+| 2.5283        | 8.0   | 5136  | 2.3083          | 26.3044 | 65.6385 | 55.386  | 64.2792 |
+| 2.5283        | 9.0   | 5778  | 2.2888          | 26.0844 | 69.2728 | 59.5819 | 68.0772 |
+| 2.5283        | 10.0  | 6420  | 2.2962          | 26.5111 | 69.5351 | 60.4728 | 68.5563 |
+| 2.5283        | 11.0  | 7062  | 2.2949          | 26.4467 | 71.1387 | 62.2653 | 69.9201 |
+| 2.5283        | 12.0  | 7704  | 2.2703          | 26.3911 | 72.6498 | 63.8234 | 71.505  |
+| 2.5283        | 13.0  | 8346  | 2.2668          | 26.3556 | 73.2494 | 65.1934 | 72.2586 |
+| 2.5283        | 14.0  | 8988  | 2.2672          | 26.5333 | 73.2228 | 64.7993 | 71.9902 |
+| 2.5283        | 15.0  | 9630  | 2.2614          | 27.2356 | 72.7529 | 64.3941 | 71.5976 |
+| 1.7733        | 16.0  | 10272 | 2.2660          | 26.5311 | 73.5002 | 65.0894 | 72.2767 |
+| 1.7733        | 17.0  | 10914 | 2.2625          | 26.4089 | 73.9016 | 66.1357 | 72.7275 |
+| 1.7733        | 18.0  | 11556 | 2.2674          | 26.0111 | 74.1365 | 66.7011 | 73.0304 |
+| 1.7733        | 19.0  | 12198 | 2.2645          | 26.7356 | 74.1459 | 66.8989 | 73.1443 |
+| 1.7733        | 20.0  | 12840 | 2.2706          | 26.5311 | 74.3728 | 66.4207 | 73.2288 |
 ### Framework versions
 - Transformers 4.35.2
 - Pytorch 2.2.1+cu121
+- Datasets 2.19.0
 - Tokenizers 0.15.2

mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/adapter_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "config": {
+    "architecture": "union",
+    "configs": [
+      {
+        "architecture": "prefix_tuning",
+        "bottleneck_size": 800,
+        "cross_prefix": true,
+        "dropout": 0.0,
+        "encoder_prefix": true,
+        "flat": false,
+        "leave_out": [],
+        "non_linearity": "tanh",
+        "prefix_length": 12,
+        "shared_gating": true,
+        "use_gating": false
+      },
+      {
+        "adapter_residual_before_ln": false,
+        "cross_adapter": false,
+        "factorized_phm_W": true,
+        "factorized_phm_rule": false,
+        "hypercomplex_nonlinearity": "glorot-uniform",
+        "init_weights": "mam_adapter",
+        "inv_adapter": null,
+        "inv_adapter_reduction_factor": null,
+        "is_parallel": true,
+        "learn_phm": true,
+        "leave_out": [],
+        "ln_after": false,
+        "ln_before": false,
+        "mh_adapter": false,
+        "non_linearity": "relu",
+        "original_ln_after": true,
+        "original_ln_before": false,
+        "output_adapter": true,
+        "phm_bias": true,
+        "phm_c_init": "normal",
+        "phm_dim": 4,
+        "phm_init_range": 0.0001,
+        "phm_layer": false,
+        "phm_rank": 1,
+        "reduction_factor": 2,
+        "residual_before_ln": true,
+        "scaling": 4.0,
+        "shared_W_phm": false,
+        "shared_phm_rule": true,
+        "use_gating": false
+      }
+    ]
+  },
+  "config_id": "4524cf66398dd5c5",
+  "hidden_size": 1024,
+  "model_class": "MBartForConditionalGeneration",
+  "model_name": "facebook/mbart-large-cc25",
+  "model_type": "mbart",
+  "name": "mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3",
+  "version": "0.1.1"
+}

mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/head_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "config": null,
+  "hidden_size": 1024,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "model_class": "MBartForConditionalGeneration",
+  "model_name": "facebook/mbart-large-cc25",
+  "model_type": "mbart",
+  "name": null,
+  "num_labels": 3,
+  "version": "0.1.1"
+}

mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/pytorch_adapter.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73d286633212892569cf3db0d670685c96948e37c91d22e364b2d37fe47dcd16
+size 347204524

mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/pytorch_model_head.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11d847a7c53a8cf4cbf1435e76e4bd7db89bde724cc5aaa97a7111e858f6d02a
+size 1025116314