Model save
Browse files- README.md +21 -27
- mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_dinamina_first3/adapter_config.json +59 -0
- mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_dinamina_first3/head_config.json +15 -0
- mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_dinamina_first3/pytorch_adapter.bin +3 -0
- mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_dinamina_first3/pytorch_model_head.bin +3 -0
README.md
CHANGED
@@ -14,11 +14,11 @@ should probably proofread and complete it, then remove this comment. -->
|
|
14 |
|
15 |
This model is a fine-tuned version of [facebook/mbart-large-cc25](https://huggingface.co/facebook/mbart-large-cc25) on an unknown dataset.
|
16 |
It achieves the following results on the evaluation set:
|
17 |
-
- Loss: 2.
|
18 |
-
- Gen Len:
|
19 |
-
- Rouge-1:
|
20 |
-
- Rouge-2:
|
21 |
-
- Rouge-l:
|
22 |
|
23 |
## Model description
|
24 |
|
@@ -49,28 +49,22 @@ The following hyperparameters were used during training:
|
|
49 |
|
50 |
### Training results
|
51 |
|
52 |
-
| Training Loss | Epoch | Step
|
53 |
-
|
54 |
-
| No log | 1.0 | 642
|
55 |
-
| No log | 2.0 | 1284
|
56 |
-
| No log | 3.0 | 1926
|
57 |
-
| No log | 4.0 | 2568
|
58 |
-
| No log | 5.0 | 3210
|
59 |
-
| No log | 6.0 | 3852
|
60 |
-
| No log | 7.0 | 4494
|
61 |
-
| 2.
|
62 |
-
| 2.
|
63 |
-
| 2.
|
64 |
-
| 2.
|
65 |
-
| 2.
|
66 |
-
| 2.
|
67 |
-
| 2.
|
68 |
-
| 2.5283 | 15.0 | 9630 | 2.2614 | 27.2356 | 72.7529 | 64.3941 | 71.5976 |
|
69 |
-
| 1.7733 | 16.0 | 10272 | 2.2660 | 26.5311 | 73.5002 | 65.0894 | 72.2767 |
|
70 |
-
| 1.7733 | 17.0 | 10914 | 2.2625 | 26.4089 | 73.9016 | 66.1357 | 72.7275 |
|
71 |
-
| 1.7733 | 18.0 | 11556 | 2.2674 | 26.0111 | 74.1365 | 66.7011 | 73.0304 |
|
72 |
-
| 1.7733 | 19.0 | 12198 | 2.2645 | 26.7356 | 74.1459 | 66.8989 | 73.1443 |
|
73 |
-
| 1.7733 | 20.0 | 12840 | 2.2706 | 26.5311 | 74.3728 | 66.4207 | 73.2288 |
|
74 |
|
75 |
|
76 |
### Framework versions
|
|
|
14 |
|
15 |
This model is a fine-tuned version of [facebook/mbart-large-cc25](https://huggingface.co/facebook/mbart-large-cc25) on an unknown dataset.
|
16 |
It achieves the following results on the evaluation set:
|
17 |
+
- Loss: 2.5883
|
18 |
+
- Gen Len: 16.3289
|
19 |
+
- Rouge-1: 67.054
|
20 |
+
- Rouge-2: 58.2501
|
21 |
+
- Rouge-l: 66.8589
|
22 |
|
23 |
## Model description
|
24 |
|
|
|
49 |
|
50 |
### Training results
|
51 |
|
52 |
+
| Training Loss | Epoch | Step | Validation Loss | Gen Len | Rouge-1 | Rouge-2 | Rouge-l |
|
53 |
+
|:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:-------:|
|
54 |
+
| No log | 1.0 | 642 | 3.1489 | 17.9067 | 34.9145 | 18.886 | 34.0576 |
|
55 |
+
| No log | 2.0 | 1284 | 2.9582 | 20.4022 | 36.7329 | 20.2718 | 36.107 |
|
56 |
+
| No log | 3.0 | 1926 | 2.7904 | 17.6689 | 41.2473 | 24.743 | 40.7266 |
|
57 |
+
| No log | 4.0 | 2568 | 2.6926 | 16.3289 | 48.3255 | 31.9378 | 47.3541 |
|
58 |
+
| No log | 5.0 | 3210 | 2.6226 | 16.1467 | 52.4513 | 38.1209 | 52.1258 |
|
59 |
+
| No log | 6.0 | 3852 | 2.5896 | 16.6178 | 54.5309 | 41.7086 | 54.1448 |
|
60 |
+
| No log | 7.0 | 4494 | 2.6139 | 16.6844 | 59.101 | 46.7108 | 58.6428 |
|
61 |
+
| 2.6207 | 8.0 | 5136 | 2.5894 | 16.5756 | 63.603 | 52.3404 | 63.3017 |
|
62 |
+
| 2.6207 | 9.0 | 5778 | 2.5514 | 16.38 | 64.0458 | 54.1763 | 63.7781 |
|
63 |
+
| 2.6207 | 10.0 | 6420 | 2.5533 | 16.2444 | 65.4167 | 55.446 | 65.0577 |
|
64 |
+
| 2.6207 | 11.0 | 7062 | 2.5621 | 16.5644 | 67.0846 | 57.8265 | 66.7532 |
|
65 |
+
| 2.6207 | 12.0 | 7704 | 2.5537 | 16.3689 | 66.2595 | 56.4953 | 65.7888 |
|
66 |
+
| 2.6207 | 13.0 | 8346 | 2.5779 | 16.1778 | 66.3899 | 56.6339 | 66.1145 |
|
67 |
+
| 2.6207 | 14.0 | 8988 | 2.5883 | 16.3289 | 67.054 | 58.2501 | 66.8589 |
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
|
70 |
### Framework versions
|
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_dinamina_first3/adapter_config.json
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"config": {
|
3 |
+
"architecture": "union",
|
4 |
+
"configs": [
|
5 |
+
{
|
6 |
+
"architecture": "prefix_tuning",
|
7 |
+
"bottleneck_size": 800,
|
8 |
+
"cross_prefix": true,
|
9 |
+
"dropout": 0.0,
|
10 |
+
"encoder_prefix": true,
|
11 |
+
"flat": false,
|
12 |
+
"leave_out": [],
|
13 |
+
"non_linearity": "tanh",
|
14 |
+
"prefix_length": 12,
|
15 |
+
"shared_gating": true,
|
16 |
+
"use_gating": false
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"adapter_residual_before_ln": false,
|
20 |
+
"cross_adapter": false,
|
21 |
+
"factorized_phm_W": true,
|
22 |
+
"factorized_phm_rule": false,
|
23 |
+
"hypercomplex_nonlinearity": "glorot-uniform",
|
24 |
+
"init_weights": "mam_adapter",
|
25 |
+
"inv_adapter": null,
|
26 |
+
"inv_adapter_reduction_factor": null,
|
27 |
+
"is_parallel": true,
|
28 |
+
"learn_phm": true,
|
29 |
+
"leave_out": [],
|
30 |
+
"ln_after": false,
|
31 |
+
"ln_before": false,
|
32 |
+
"mh_adapter": false,
|
33 |
+
"non_linearity": "relu",
|
34 |
+
"original_ln_after": true,
|
35 |
+
"original_ln_before": false,
|
36 |
+
"output_adapter": true,
|
37 |
+
"phm_bias": true,
|
38 |
+
"phm_c_init": "normal",
|
39 |
+
"phm_dim": 4,
|
40 |
+
"phm_init_range": 0.0001,
|
41 |
+
"phm_layer": false,
|
42 |
+
"phm_rank": 1,
|
43 |
+
"reduction_factor": 2,
|
44 |
+
"residual_before_ln": true,
|
45 |
+
"scaling": 4.0,
|
46 |
+
"shared_W_phm": false,
|
47 |
+
"shared_phm_rule": true,
|
48 |
+
"use_gating": false
|
49 |
+
}
|
50 |
+
]
|
51 |
+
},
|
52 |
+
"config_id": "4524cf66398dd5c5",
|
53 |
+
"hidden_size": 1024,
|
54 |
+
"model_class": "MBartForConditionalGeneration",
|
55 |
+
"model_name": "facebook/mbart-large-cc25",
|
56 |
+
"model_type": "mbart",
|
57 |
+
"name": "mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_dinamina_first3",
|
58 |
+
"version": "0.1.1"
|
59 |
+
}
|
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_dinamina_first3/head_config.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"config": null,
|
3 |
+
"hidden_size": 1024,
|
4 |
+
"label2id": {
|
5 |
+
"LABEL_0": 0,
|
6 |
+
"LABEL_1": 1,
|
7 |
+
"LABEL_2": 2
|
8 |
+
},
|
9 |
+
"model_class": "MBartForConditionalGeneration",
|
10 |
+
"model_name": "facebook/mbart-large-cc25",
|
11 |
+
"model_type": "mbart",
|
12 |
+
"name": null,
|
13 |
+
"num_labels": 3,
|
14 |
+
"version": "0.1.1"
|
15 |
+
}
|
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_dinamina_first3/pytorch_adapter.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92d1f22649bafb8bcda1dd3fe4c235d2a74fe0ca65849d8d5a17ae629ff68b2f
|
3 |
+
size 347207148
|
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_dinamina_first3/pytorch_model_head.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11d847a7c53a8cf4cbf1435e76e4bd7db89bde724cc5aaa97a7111e858f6d02a
|
3 |
+
size 1025116314
|