Pubudu commited on
Commit
bfe4ea4
·
verified ·
1 Parent(s): 250c4b5

Model save

Browse files
README.md CHANGED
@@ -14,11 +14,11 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model is a fine-tuned version of [facebook/mbart-large-cc25](https://huggingface.co/facebook/mbart-large-cc25) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 3.2042
18
- - Gen Len: 18.5844
19
- - Rouge-1: 34.6893
20
- - Rouge-2: 17.6676
21
- - Rouge-l: 34.0228
22
 
23
  ## Model description
24
 
@@ -49,25 +49,33 @@ The following hyperparameters were used during training:
49
 
50
  ### Training results
51
 
52
- | Training Loss | Epoch | Step | Validation Loss | Gen Len | Rouge-1 | Rouge-2 | Rouge-l |
53
- |:-------------:|:-----:|:----:|:---------------:|:-------:|:-------:|:-------:|:-------:|
54
- | No log | 1.0 | 642 | 3.6377 | 22.68 | 23.5122 | 9.8274 | 22.6444 |
55
- | No log | 2.0 | 1284 | 3.4690 | 24.1111 | 26.1024 | 11.7671 | 25.2784 |
56
- | No log | 3.0 | 1926 | 3.2594 | 23.4756 | 30.4311 | 14.7772 | 29.7887 |
57
- | No log | 4.0 | 2568 | 3.2171 | 21.3644 | 33.2453 | 16.8783 | 32.3833 |
58
- | No log | 5.0 | 3210 | 3.1916 | 19.3667 | 33.1313 | 15.8622 | 32.4001 |
59
- | No log | 6.0 | 3852 | 3.1366 | 21.8133 | 32.3945 | 16.0472 | 31.6275 |
60
- | No log | 7.0 | 4494 | 3.1354 | 20.72 | 33.2417 | 16.5622 | 32.4859 |
61
- | 3.4117 | 8.0 | 5136 | 3.1412 | 19.2578 | 34.706 | 17.7827 | 33.877 |
62
- | 3.4117 | 9.0 | 5778 | 3.1538 | 18.1556 | 32.9258 | 17.2139 | 32.2229 |
63
- | 3.4117 | 10.0 | 6420 | 3.1681 | 19.1867 | 34.1901 | 17.269 | 33.3286 |
64
- | 3.4117 | 11.0 | 7062 | 3.1708 | 18.2978 | 34.4327 | 17.98 | 33.6562 |
65
- | 3.4117 | 12.0 | 7704 | 3.2042 | 18.5844 | 34.6893 | 17.6676 | 34.0228 |
 
 
 
 
 
 
 
 
66
 
67
 
68
  ### Framework versions
69
 
70
  - Transformers 4.35.2
71
  - Pytorch 2.2.1+cu121
72
- - Datasets 2.18.0
73
  - Tokenizers 0.15.2
 
14
 
15
  This model is a fine-tuned version of [facebook/mbart-large-cc25](https://huggingface.co/facebook/mbart-large-cc25) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 2.2706
18
+ - Gen Len: 26.5311
19
+ - Rouge-1: 74.3728
20
+ - Rouge-2: 66.4207
21
+ - Rouge-l: 73.2288
22
 
23
  ## Model description
24
 
 
49
 
50
  ### Training results
51
 
52
+ | Training Loss | Epoch | Step | Validation Loss | Gen Len | Rouge-1 | Rouge-2 | Rouge-l |
53
+ |:-------------:|:-----:|:-----:|:---------------:|:-------:|:-------:|:-------:|:-------:|
54
+ | No log | 1.0 | 642 | 3.0984 | 27.1578 | 35.9221 | 20.801 | 33.0593 |
55
+ | No log | 2.0 | 1284 | 2.8489 | 23.6511 | 39.2966 | 22.654 | 37.1498 |
56
+ | No log | 3.0 | 1926 | 2.5933 | 26.7267 | 47.1185 | 30.2869 | 44.1491 |
57
+ | No log | 4.0 | 2568 | 2.4764 | 25.22 | 50.0244 | 34.9212 | 48.057 |
58
+ | No log | 5.0 | 3210 | 2.4122 | 26.6533 | 54.5479 | 40.7397 | 52.5031 |
59
+ | No log | 6.0 | 3852 | 2.3427 | 26.2667 | 57.6653 | 44.2728 | 55.9363 |
60
+ | No log | 7.0 | 4494 | 2.3345 | 26.1711 | 62.4422 | 50.3255 | 60.7065 |
61
+ | 2.5283 | 8.0 | 5136 | 2.3083 | 26.3044 | 65.6385 | 55.386 | 64.2792 |
62
+ | 2.5283 | 9.0 | 5778 | 2.2888 | 26.0844 | 69.2728 | 59.5819 | 68.0772 |
63
+ | 2.5283 | 10.0 | 6420 | 2.2962 | 26.5111 | 69.5351 | 60.4728 | 68.5563 |
64
+ | 2.5283 | 11.0 | 7062 | 2.2949 | 26.4467 | 71.1387 | 62.2653 | 69.9201 |
65
+ | 2.5283 | 12.0 | 7704 | 2.2703 | 26.3911 | 72.6498 | 63.8234 | 71.505 |
66
+ | 2.5283 | 13.0 | 8346 | 2.2668 | 26.3556 | 73.2494 | 65.1934 | 72.2586 |
67
+ | 2.5283 | 14.0 | 8988 | 2.2672 | 26.5333 | 73.2228 | 64.7993 | 71.9902 |
68
+ | 2.5283 | 15.0 | 9630 | 2.2614 | 27.2356 | 72.7529 | 64.3941 | 71.5976 |
69
+ | 1.7733 | 16.0 | 10272 | 2.2660 | 26.5311 | 73.5002 | 65.0894 | 72.2767 |
70
+ | 1.7733 | 17.0 | 10914 | 2.2625 | 26.4089 | 73.9016 | 66.1357 | 72.7275 |
71
+ | 1.7733 | 18.0 | 11556 | 2.2674 | 26.0111 | 74.1365 | 66.7011 | 73.0304 |
72
+ | 1.7733 | 19.0 | 12198 | 2.2645 | 26.7356 | 74.1459 | 66.8989 | 73.1443 |
73
+ | 1.7733 | 20.0 | 12840 | 2.2706 | 26.5311 | 74.3728 | 66.4207 | 73.2288 |
74
 
75
 
76
  ### Framework versions
77
 
78
  - Transformers 4.35.2
79
  - Pytorch 2.2.1+cu121
80
+ - Datasets 2.19.0
81
  - Tokenizers 0.15.2
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/adapter_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "union",
4
+ "configs": [
5
+ {
6
+ "architecture": "prefix_tuning",
7
+ "bottleneck_size": 800,
8
+ "cross_prefix": true,
9
+ "dropout": 0.0,
10
+ "encoder_prefix": true,
11
+ "flat": false,
12
+ "leave_out": [],
13
+ "non_linearity": "tanh",
14
+ "prefix_length": 12,
15
+ "shared_gating": true,
16
+ "use_gating": false
17
+ },
18
+ {
19
+ "adapter_residual_before_ln": false,
20
+ "cross_adapter": false,
21
+ "factorized_phm_W": true,
22
+ "factorized_phm_rule": false,
23
+ "hypercomplex_nonlinearity": "glorot-uniform",
24
+ "init_weights": "mam_adapter",
25
+ "inv_adapter": null,
26
+ "inv_adapter_reduction_factor": null,
27
+ "is_parallel": true,
28
+ "learn_phm": true,
29
+ "leave_out": [],
30
+ "ln_after": false,
31
+ "ln_before": false,
32
+ "mh_adapter": false,
33
+ "non_linearity": "relu",
34
+ "original_ln_after": true,
35
+ "original_ln_before": false,
36
+ "output_adapter": true,
37
+ "phm_bias": true,
38
+ "phm_c_init": "normal",
39
+ "phm_dim": 4,
40
+ "phm_init_range": 0.0001,
41
+ "phm_layer": false,
42
+ "phm_rank": 1,
43
+ "reduction_factor": 2,
44
+ "residual_before_ln": true,
45
+ "scaling": 4.0,
46
+ "shared_W_phm": false,
47
+ "shared_phm_rule": true,
48
+ "use_gating": false
49
+ }
50
+ ]
51
+ },
52
+ "config_id": "4524cf66398dd5c5",
53
+ "hidden_size": 1024,
54
+ "model_class": "MBartForConditionalGeneration",
55
+ "model_name": "facebook/mbart-large-cc25",
56
+ "model_type": "mbart",
57
+ "name": "mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3",
58
+ "version": "0.1.1"
59
+ }
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/head_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": null,
3
+ "hidden_size": 1024,
4
+ "label2id": {
5
+ "LABEL_0": 0,
6
+ "LABEL_1": 1,
7
+ "LABEL_2": 2
8
+ },
9
+ "model_class": "MBartForConditionalGeneration",
10
+ "model_name": "facebook/mbart-large-cc25",
11
+ "model_type": "mbart",
12
+ "name": null,
13
+ "num_labels": 3,
14
+ "version": "0.1.1"
15
+ }
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/pytorch_adapter.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73d286633212892569cf3db0d670685c96948e37c91d22e364b2d37fe47dcd16
3
+ size 347204524
mbart-large-cc25_prefix_tuning_12_par_bn_rf_2_army_first3/pytorch_model_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11d847a7c53a8cf4cbf1435e76e4bd7db89bde724cc5aaa97a7111e858f6d02a
3
+ size 1025116314