Add verifyToken field to verify evaluation results are produced by Hugging Face's automatic model evaluator

#8
by autoevaluator HF staff - opened
Files changed (1) hide show
  1. README.md +74 -50
README.md CHANGED
@@ -1,9 +1,9 @@
1
  ---
2
- tags:
3
- - summarization
4
  language:
5
  - en
6
  license: mit
 
 
7
  model-index:
8
  - name: facebook/bart-large-xsum
9
  results:
@@ -16,30 +16,36 @@ model-index:
16
  config: 3.0.0
17
  split: test
18
  metrics:
19
- - name: ROUGE-1
20
- type: rouge
21
  value: 25.2697
 
22
  verified: true
23
- - name: ROUGE-2
24
- type: rouge
25
  value: 7.6638
 
26
  verified: true
27
- - name: ROUGE-L
28
- type: rouge
29
  value: 17.1808
 
30
  verified: true
31
- - name: ROUGE-LSUM
32
- type: rouge
33
  value: 21.7933
 
34
  verified: true
35
- - name: loss
36
- type: loss
37
  value: 3.5042972564697266
 
38
  verified: true
39
- - name: gen_len
40
- type: gen_len
41
  value: 27.4462
 
42
  verified: true
 
43
  - task:
44
  type: summarization
45
  name: Summarization
@@ -49,30 +55,36 @@ model-index:
49
  config: default
50
  split: test
51
  metrics:
52
- - name: ROUGE-1
53
- type: rouge
54
  value: 45.4525
 
55
  verified: true
56
- - name: ROUGE-2
57
- type: rouge
58
  value: 22.3455
 
59
  verified: true
60
- - name: ROUGE-L
61
- type: rouge
62
  value: 37.2302
 
63
  verified: true
64
- - name: ROUGE-LSUM
65
- type: rouge
66
  value: 37.2323
 
67
  verified: true
68
- - name: loss
69
- type: loss
70
  value: 2.3128726482391357
 
71
  verified: true
72
- - name: gen_len
73
- type: gen_len
74
  value: 25.5435
 
75
  verified: true
 
76
  - task:
77
  type: summarization
78
  name: Summarization
@@ -82,30 +94,36 @@ model-index:
82
  config: samsum
83
  split: train
84
  metrics:
85
- - name: ROUGE-1
86
- type: rouge
87
  value: 24.7852
 
88
  verified: true
89
- - name: ROUGE-2
90
- type: rouge
91
  value: 5.2533
 
92
  verified: true
93
- - name: ROUGE-L
94
- type: rouge
95
  value: 18.6792
 
96
  verified: true
97
- - name: ROUGE-LSUM
98
- type: rouge
99
  value: 20.629
 
100
  verified: true
101
- - name: loss
102
- type: loss
103
  value: 3.746837854385376
 
104
  verified: true
105
- - name: gen_len
106
- type: gen_len
107
  value: 23.1206
 
108
  verified: true
 
109
  - task:
110
  type: summarization
111
  name: Summarization
@@ -115,30 +133,36 @@ model-index:
115
  config: samsum
116
  split: test
117
  metrics:
118
- - name: ROUGE-1
119
- type: rouge
120
  value: 24.9158
 
121
  verified: true
122
- - name: ROUGE-2
123
- type: rouge
124
  value: 5.5837
 
125
  verified: true
126
- - name: ROUGE-L
127
- type: rouge
128
  value: 18.8935
 
129
  verified: true
130
- - name: ROUGE-LSUM
131
- type: rouge
132
  value: 20.76
 
133
  verified: true
134
- - name: loss
135
- type: loss
136
  value: 3.775235891342163
 
137
  verified: true
138
- - name: gen_len
139
- type: gen_len
140
  value: 23.0928
 
141
  verified: true
 
142
  ---
143
  ### Bart model finetuned on xsum
144
 
 
1
  ---
 
 
2
  language:
3
  - en
4
  license: mit
5
+ tags:
6
+ - summarization
7
  model-index:
8
  - name: facebook/bart-large-xsum
9
  results:
 
16
  config: 3.0.0
17
  split: test
18
  metrics:
19
+ - type: rouge
 
20
  value: 25.2697
21
+ name: ROUGE-1
22
  verified: true
23
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMTM3ZWYzNDIyZWRlYzI2MDRkNmQwNzg4NTBhMzMzMmEwNGI5Mjg4ZGNkYzc0ODJjMWNjM2VkMDczNzk4M2ZhYiIsInZlcnNpb24iOjF9.FhfTibmxB-KfZdA0QA-dlaW2s837Y34litHb4SomxCTctYAuwwuFXhRjaYd1a3Q0RurJAOS5v31-LyQVnBiOBw
24
+ - type: rouge
25
  value: 7.6638
26
+ name: ROUGE-2
27
  verified: true
28
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNGI5NWVhYzAxYjRlMjRjNDlmZTRiNmY0Y2ZhYjJhNDA0MWRlZDUwZGIyZDg3MmViOTQ1MmQ3YjQxZjg4MWZhMyIsInZlcnNpb24iOjF9.CR6lwIak_ku4EiobhSpyAhtJmHdqJaBldAgJLGgrI1FZ4fQGWVcz1ugfD5O0amFeA5vYGO4_mppjuRhGR2ZMAA
29
+ - type: rouge
30
  value: 17.1808
31
+ name: ROUGE-L
32
  verified: true
33
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZGFhMmQwMWQyYzZhM2ZmOGMzYzI1YTg5MGQ3YWNmYTZjOWNhYjg1YzQ3MDA4MzA4Y2QxZWVlMjgyOWNkZjE3ZCIsInZlcnNpb24iOjF9.bbTmYKsUG57-9gCbk4f789A1GzvUpzjrAGI_GBGgg9TK-Lu56x38scURnsAENrKmEqOjBSTqROkEMZSJQ0bRAg
34
+ - type: rouge
35
  value: 21.7933
36
+ name: ROUGE-LSUM
37
  verified: true
38
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzQ0ODY2M2Y4YjMxZTMxNTczYWVmOTQ0MDY1Y2Y5YWM3ZTU1ZTE2YWQyNWM4ZDIyNGZlZDYyM2VkNjQ2MjI2MyIsInZlcnNpb24iOjF9.28iOKCynvIt6kK5mhM6ZzKJsnwjVv_CDMG8veAB0JYeZ4yyrM-tyQUzaorFAbEyb9JBJnpn7YJR9ntGTP3YcCw
39
+ - type: loss
40
  value: 3.5042972564697266
41
+ name: loss
42
  verified: true
43
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNmQxMTc2OWU5MWFjODNkNGRjYzU5NWRkOWVlYzY3OGRkMGY2N2NlYmM4MTJkZDRkZTBkMDI0ZWJmYTUxYzZjMSIsInZlcnNpb24iOjF9.PbOoV_245iT1FuAOf03tKAIkBVFRop4XsB26v5qF4dPPeZRP2M2pXJOZli0hILH6hUX0-D3IGDV-a8segNCICw
44
+ - type: gen_len
45
  value: 27.4462
46
+ name: gen_len
47
  verified: true
48
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiN2Q1YmRkNTE5OTVlYjdlYzc4YTIyYjhjNjIyOWUxMTc5MDBiNTVjYzQ3NWEyODdhZGFjNzUzNjIzMTcxNzhlYSIsInZlcnNpb24iOjF9.szl4fWDoBqVXKbBQxXV9DFgk9UbFLedmiZmGBI1sKoN69jw8IZopOs4VYtyY5TbpzsVGzpHoZnRCCrVQG8V3AQ
49
  - task:
50
  type: summarization
51
  name: Summarization
 
55
  config: default
56
  split: test
57
  metrics:
58
+ - type: rouge
 
59
  value: 45.4525
60
+ name: ROUGE-1
61
  verified: true
62
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYTIxOWFlMWQxYzBiMzg1ODkxMmI3NTQwYmFiYzlkZDc4YTc3OWNhNDc3YTNmNjdhOGQyNGQ3NWExYTJhMGVjZiIsInZlcnNpb24iOjF9.CEDDzEKRIdxVndsQ5R9P6ROu70YMTUFtKQcDNI2BaHpoux3uqR20xBnd9xVJbaihnTm8Rn3Gz3FSEU6HloteBg
63
+ - type: rouge
64
  value: 22.3455
65
+ name: ROUGE-2
66
  verified: true
67
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTBjNTgzNmY4ZGU0MzcyYzNmOGVmYTAwODgyNjNkNjMwNTA2YWRkNDM2NzM2NDQxN2IzOWMxMGFjZDkwY2I0YiIsInZlcnNpb24iOjF9.30NFIQgNYUdqCgIni38Nd0mPjkFAqEQqCnB0p58Csiukp8oZ9NSRUJJHKsxdQ_3mcmkwz4l8C87AdarL-X2wBw
68
+ - type: rouge
69
  value: 37.2302
70
+ name: ROUGE-L
71
  verified: true
72
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNTg5Yzg3OGM0MTM0ODI0NTJiMmQwM2Q5M2ExZWMwZDU2YjZjNzJmZGVmNWVmMWExYzYxMzRhOTg1MzUyMTY0ZCIsInZlcnNpb24iOjF9.7Os0OXC-gX5s0kcEhiKdSv9j40g_EOIrOXGSMPAZJz3NDP1EkKddSPpha8mwFFkphxYbryg69Z6b1NE3TBa5AQ
73
+ - type: rouge
74
  value: 37.2323
75
+ name: ROUGE-LSUM
76
  verified: true
77
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzAyMDUzN2RlZDE5MjdiZGM1NjU2ZjQ1NGJlZTZmNjNlMjc2ZWUxZWU3YmY2OGY0YmJiZWViOTUyMDg2MDdhMiIsInZlcnNpb24iOjF9.M18WnqZ79MnAwz17NRHQ2iqv2_JneQ2SIV2sx10Pi3ACLYLordzcYAeQAGcfh38qQow-TyPXS-MC2Alwjvj3Bw
78
+ - type: loss
79
  value: 2.3128726482391357
80
+ name: loss
81
  verified: true
82
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzAwZGZkYTVkZGM1MTNhNjA3NzBmNDMxNGJiODA0MzVhYmZkYjIzYzc1OGYxMjNiMDFhZjkxOGZmYTk1YjUyYyIsInZlcnNpb24iOjF9.i3pCisuPdpYFNLjniPejbseEeh3j6elXWze19As4pUJb3Gxp8uStckPR5rhmV_r-FDP7wKFY2GrqJZrWGGsVBg
83
+ - type: gen_len
84
  value: 25.5435
85
+ name: gen_len
86
  verified: true
87
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZTE3ODlkZDhhMTEwNTlhNzVjMWMxMGQyZDc0OTc0NWY0MDBlMzUzNGI3MGQwNmJmNzQ3NTQ5MjhhNDhiYTM5YSIsInZlcnNpb24iOjF9.e7nHzg3OH3zkWiCj3iZVAAQG6Zy0E16_MJzBEEyGTlSVuPGMziNfcjRvLD6WeY_6lXUonEwc9lur0X-qUvB7Aw
88
  - task:
89
  type: summarization
90
  name: Summarization
 
94
  config: samsum
95
  split: train
96
  metrics:
97
+ - type: rouge
 
98
  value: 24.7852
99
+ name: ROUGE-1
100
  verified: true
101
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiY2Y0NDZlNGM4M2IzMDNhZmI4MTI2ZTI0MmQ4ODYxYTk1Zjc5ZGYyYWQzNmMyN2Q2MDViNDVjNDBlYjVkNDM0MiIsInZlcnNpb24iOjF9.oSFDaNHNgTQ5WnrJqn01jXG2u-5HPhIrmPtyc_LT2kGwGs3dBe89HVsRFs3c6oAumvstKadMJ4TcQy6pDdg1Dw
102
+ - type: rouge
103
  value: 5.2533
104
+ name: ROUGE-2
105
  verified: true
106
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTc1ZTJhY2U1MDNlM2EyN2Q0NWJkMDZlYTdkYzM3M2ZmY2JlMzU4ZDQwYWMzZjhhMTU5Y2VlMTIwNWYzNWM1MiIsInZlcnNpb24iOjF9.meJyYKPZRtmT2YLBiBWBxPf1XiZVFDjbFrc5_SVJv3EWvNuXlTZy0qUVbZgE9rnKLA1ND_0Yj1o-qlY8G6iVDg
107
+ - type: rouge
108
  value: 18.6792
109
+ name: ROUGE-L
110
  verified: true
111
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzYxMjIzNWIzOTRlYWYwMmQ5YjFiNmM1NDRkMzVhZGI0OTJkMjE0OThmNGEwOWRkZjJiMGYyMTc0Zjc3NjUyNSIsInZlcnNpb24iOjF9.VlXVgQSBVdDjduKV_kg2TRoinJn7kkfsTcLJa_iwDTn2Lw0ZyyOBTcGdfWFcXeteee9m0-iA7uZBGkiaKtQkBw
112
+ - type: rouge
113
  value: 20.629
114
+ name: ROUGE-LSUM
115
  verified: true
116
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMzQ3NmM1ZTcyMGM3ZDRkZmJmN2IyZmNmOTFkYzYzZmFjNTJmYWVhYmViZGIwY2U0YzIzNWFjYzZjYjc2NGZhMyIsInZlcnNpb24iOjF9.POIXnLpVaPYKk07apBROnvbevoI4LNfs9LAelqJmL5aZsQrvb9w_mUj_y8cr_JtWMcYioKvMQfCNqweMR0QlCw
117
+ - type: loss
118
  value: 3.746837854385376
119
+ name: loss
120
  verified: true
121
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiMDUxODlhM2NmNTNkY2E5OWE0MmVhMzEzOWFlZDUzMGUzNjQ0OWJmYmUxYWY2NzU3ZjQwZjYyMGQ0MDlkMDA2MyIsInZlcnNpb24iOjF9.dtkcai-opGLauvudNLIxw0GtkNF5DlcUG7A7h2xi42ymyUNigrAg0PcjcjuUt8uW4SEf4oTON5nmlLu924m_Dg
122
+ - type: gen_len
123
  value: 23.1206
124
+ name: gen_len
125
  verified: true
126
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiNWMzOTVkOWYyYmJkZjY1MDJkMDdkMDhiMmEwNWJjNDU2ZDliMDg4ZTY0ODllM2VlMDUzNWY1NGViZjMyODZlNSIsInZlcnNpb24iOjF9.sLobtAdG5opp6UgM0sMzOQdXKQRbPNFFYng0Yv62loV8Ihwz5aDr8v0rlmmmvjUI45zXKpFdll0MP2mEjD91Aw
127
  - task:
128
  type: summarization
129
  name: Summarization
 
133
  config: samsum
134
  split: test
135
  metrics:
136
+ - type: rouge
 
137
  value: 24.9158
138
+ name: ROUGE-1
139
  verified: true
140
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYmZmYzYxNDU1YWVmYTNhZTAzYWNhNjE4YmQzNzUwMmJmZjM0OTdlZGY4NDJiNmU1OGM1NGUyZGU3MjEyNmZjZCIsInZlcnNpb24iOjF9.4_VqXLFvNv4EMJSg-vMYoj1BGp5ayLay8soylnHwEqicYeLyYNjeN1aYO4HFn9juBejXWLb1Yhe5n3nET8tRBg
141
+ - type: rouge
142
  value: 5.5837
143
+ name: ROUGE-2
144
  verified: true
145
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYzU2YWVjOWY3YzAzNjc2NWYxZTkzZDgyMzA4NzY0YWJmNjdlZTVlYWY0MmRkNjhiMjc1ZWZmNGRiYTJiNjNhNiIsInZlcnNpb24iOjF9.10Zo_slj3TtOkQ1ve2w2As8NrLUU1tjqyku7UMREqtYigd56p4SRuFZtr-cfZW1nrVfXrOw0BQlWMF1LevDECA
146
+ - type: rouge
147
  value: 18.8935
148
+ name: ROUGE-L
149
  verified: true
150
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiYWYwMWY3NmEwZDYxYTMwYmM2NjY1YmFmOWI3Mjg3NmI2MGM5MDQwNzAyMGY0ZWVjNzc0MjJiY2IwNDhjNmEyZSIsInZlcnNpb24iOjF9.sbb6WadT2lIB1JgWxGgMg2hzkSp5sTn5qBbUfUZFupv1ugvpGEE6bCo7fNuYCQRu0qOYvWGNyibhYIAoJqymDQ
151
+ - type: rouge
152
  value: 20.76
153
+ name: ROUGE-LSUM
154
  verified: true
155
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiOTcyNzJhMTU4YjM1ZDgyZWJiMTAyYzAyYTU4Y2E0M2M1YjVmODQzM2JlMmM2YmM3MWZlZTVhNWJkMzdjODhmNCIsInZlcnNpb24iOjF9.YQJDobcKtaOIro8g7Y7opjpfKZ081aJvYKCpzkBhDA5di1GIKtIjGkHqdulqtcGog_L5IcEfr9QBmwIGRFNqCw
156
+ - type: loss
157
  value: 3.775235891342163
158
+ name: loss
159
  verified: true
160
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiM2ViMmVhNGFkZmFiYmU5NmQxZmIzMTBkYTYwOGQ5NzA3MWQxMmZmZjljMmNkNjNkZGFlMTI0MGY2ZDQ3MjAxNSIsInZlcnNpb24iOjF9.YH3xzE3aQCPUXm1591TdRyJo2UM62QcP1705EKxHmg7BzS5VJmZI0-fpEMxegB1aMzNiEr7WSJ7pOWFG_1MwBQ
161
+ - type: gen_len
162
  value: 23.0928
163
+ name: gen_len
164
  verified: true
165
+ verifyToken: eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJoYXNoIjoiZWZmZTQ0NjNkZWVlNWUwODNhZWMzMzQ2MGM5NGY2ZDY1OGUzM2JmYzRlN2JjNTczMmRlNWI0MjRhNGM5NjJmYyIsInZlcnNpb24iOjF9.9jC1XMqvVzK6bjwltfHJPswBWIwqbiguGX3onycpTSgbONtx1nsvB163sOfwzRppfGcLPC8E_lmEYqvgej7eCg
166
  ---
167
  ### Bart model finetuned on xsum
168