Spaces:
Runtime error
Runtime error
JP-SystemsX
commited on
Commit
·
163dff6
1
Parent(s):
8d7d29c
Added documentation to nDCG.py
Browse files- Testing.py +15 -0
- nDCG.py +107 -11
Testing.py
CHANGED
@@ -16,3 +16,18 @@ print(metric.compute(predictions=[a], references=[c]))
|
|
16 |
print(metric.compute(predictions=[a], references=[c]))
|
17 |
print(metric.compute(predictions=[a,a], references=[c,a]))
|
18 |
print(metric.cache_file_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
print(metric.compute(predictions=[a], references=[c]))
|
17 |
print(metric.compute(predictions=[a,a], references=[c,a]))
|
18 |
print(metric.cache_file_name)
|
19 |
+
|
20 |
+
nDCG_metric = ev.load("nDCG.py")
|
21 |
+
results = nDCG_metric.compute(references=[[10, 0, 0, 1, 5]], predictions=[[.1, .2, .3, 4, 70]])
|
22 |
+
print(results)
|
23 |
+
|
24 |
+
nDCG_metric = ev.load("nDCG.py")
|
25 |
+
results = nDCG_metric.compute(references=[[10, 0, 0, 1, 5]], predictions=[[.1, .2, .3, 4, 70]], k=3)
|
26 |
+
print(results)
|
27 |
+
|
28 |
+
nDCG_metric = ev.load("nDCG.py")
|
29 |
+
results = nDCG_metric.compute(references=[[1, 0, 0, 0, 0]], predictions=[[1, 1, 0, 0, 0]], k=1)
|
30 |
+
print(results)
|
31 |
+
|
32 |
+
results = nDCG_metric.compute(references=[[1, 0, 0, 0, 0]], predictions=[[1, 1, 0, 0, 0]], k=1, ignore_ties=True)
|
33 |
+
print(results)
|
nDCG.py
CHANGED
@@ -2,24 +2,120 @@ import evaluate as ev
|
|
2 |
from sklearn.metrics import ndcg_score
|
3 |
import datasets
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
@ev.utils.file_utils.add_start_docstrings("_DESCRIPTION", "_KWARGS_DESCRIPTION")
|
6 |
class nDCG(ev.Metric):
|
7 |
def _info(self):
|
8 |
return ev.MetricInfo(
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
inputs_description="None",
|
13 |
features=datasets.Features({
|
14 |
'predictions': datasets.Sequence(datasets.Value('float')),
|
15 |
'references': datasets.Sequence(datasets.Value('float'))
|
16 |
}),
|
17 |
-
|
18 |
)
|
19 |
|
20 |
-
def _compute(self, predictions, references, sample_weight=None, k=
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from sklearn.metrics import ndcg_score
|
3 |
import datasets
|
4 |
|
5 |
+
_DESCRIPTION = """
|
6 |
+
Compute Normalized Discounted Cumulative Gain.
|
7 |
+
|
8 |
+
Sums the true scores ranked in the order induced by the predicted scores,
|
9 |
+
after applying a logarithmic discount. Then divides by the best possible
|
10 |
+
score (Ideal DCG, obtained for a perfect ranking) to obtain a score between
|
11 |
+
0 and 1.
|
12 |
+
|
13 |
+
This ranking metric returns a high value if true labels are ranked high by
|
14 |
+
``predictions``.
|
15 |
+
|
16 |
+
If a value for k is given to the metric it will only consider the k highest
|
17 |
+
scores in the ranking
|
18 |
+
|
19 |
+
References
|
20 |
+
----------
|
21 |
+
`Wikipedia entry for Discounted Cumulative Gain
|
22 |
+
<https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_
|
23 |
+
|
24 |
+
Jarvelin, K., & Kekalainen, J. (2002).
|
25 |
+
Cumulated gain-based evaluation of IR techniques. ACM Transactions on
|
26 |
+
Information Systems (TOIS), 20(4), 422-446.
|
27 |
+
|
28 |
+
Wang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).
|
29 |
+
A theoretical analysis of NDCG ranking measures. In Proceedings of the 26th
|
30 |
+
Annual Conference on Learning Theory (COLT 2013)
|
31 |
+
|
32 |
+
McSherry, F., & Najork, M. (2008, March). Computing information retrieval
|
33 |
+
performance measures efficiently in the presence of tied scores. In
|
34 |
+
European conference on information retrieval (pp. 414-421). Springer,
|
35 |
+
Berlin, Heidelberg.
|
36 |
+
"""
|
37 |
+
|
38 |
+
_KWARGS_DESCRIPTION = """
|
39 |
+
Args:
|
40 |
+
references ('list' of 'float'): True relevance
|
41 |
+
|
42 |
+
predictions ('list' of 'float'): Either predicted relevance, probability estimates or confidence values
|
43 |
+
|
44 |
+
k (int): If set to a value only the k highest scores in the ranking will be considered else considers all outputs.
|
45 |
+
Defaults to None.
|
46 |
+
|
47 |
+
sample_weight (`list` of `float`): Sample weights Defaults to None.
|
48 |
+
|
49 |
+
ignore_ties ('boolean'): If set to true asumes that there are no ties (this is likely if predictions are continuous)
|
50 |
+
for efficiency gains. Defaults to False.
|
51 |
+
|
52 |
+
Returns:
|
53 |
+
normalized_discounted_cumulative_gain ('float'): The averaged nDCG scores for all samples.
|
54 |
+
Minimum possible value is 0.0 Maximum possible value is 1.0
|
55 |
+
|
56 |
+
Examples:
|
57 |
+
Example 1-A simple example
|
58 |
+
>>> nDCG_metric = evaluate.load("JP-SystemsX/nDCG")
|
59 |
+
>>> results = nDCG_metric.compute(references=[[10, 0, 0, 1, 5]], predictions=[[.1, .2, .3, 4, 70]])
|
60 |
+
>>> print(results)
|
61 |
+
{'nDCG': 0.6956940443813076}
|
62 |
+
Example 2-The same as Example 1, except with k set to 3.
|
63 |
+
>>> nDCG_metric = evaluate.load("JP-SystemsX/nDCG")
|
64 |
+
>>> results = nDCG_metric.compute(references=[[10, 0, 0, 1, 5]], predictions=[[.1, .2, .3, 4, 70]], k=3)
|
65 |
+
>>> print(results)
|
66 |
+
{'nDCG@3': 0.4123818817534531}
|
67 |
+
Example 3-There is only one relevant label but there is a tie and the model can't decide which one is the one.
|
68 |
+
>>> accuracy_metric = evaluate.load("accuracy")
|
69 |
+
>>> results = nDCG_metric.compute(references=[[1, 0, 0, 0, 0]], predictions=[[1, 1, 0, 0, 0]], k=1)
|
70 |
+
>>> print(results)
|
71 |
+
{'nDCG@1': 0.5}
|
72 |
+
>>> #That is it calculates both and returns the average of both
|
73 |
+
Example 4-The Same as 3, except ignore_ties is set to True.
|
74 |
+
>>> accuracy_metric = evaluate.load("accuracy")
|
75 |
+
>>> results = nDCG_metric.compute(references=[[1, 0, 0, 0, 0]], predictions=[[1, 1, 0, 0, 0]], k=1, ignore_ties=True)
|
76 |
+
>>> print(results)
|
77 |
+
{'nDCG@1': 0.0}
|
78 |
+
>>> # Alternative Result: {'nDCG@1': 1.0}
|
79 |
+
>>> # That is it chooses one of the 2 candidates and calculates the score only for this one
|
80 |
+
>>> # That means the score may vary depending on which one was chosen
|
81 |
+
"""
|
82 |
+
|
83 |
+
_CITATION = """
|
84 |
+
@article{scikit-learn,
|
85 |
+
title={Scikit-learn: Machine Learning in {P}ython},
|
86 |
+
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
|
87 |
+
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
|
88 |
+
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
|
89 |
+
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
|
90 |
+
journal={Journal of Machine Learning Research},
|
91 |
+
volume={12},
|
92 |
+
pages={2825--2830},
|
93 |
+
year={2011}
|
94 |
+
}
|
95 |
+
"""
|
96 |
+
|
97 |
@ev.utils.file_utils.add_start_docstrings("_DESCRIPTION", "_KWARGS_DESCRIPTION")
|
98 |
class nDCG(ev.Metric):
|
99 |
def _info(self):
|
100 |
return ev.MetricInfo(
|
101 |
+
description=_DESCRIPTION,
|
102 |
+
citation=_CITATION,
|
103 |
+
inputs_description=_KWARGS_DESCRIPTION,
|
|
|
104 |
features=datasets.Features({
|
105 |
'predictions': datasets.Sequence(datasets.Value('float')),
|
106 |
'references': datasets.Sequence(datasets.Value('float'))
|
107 |
}),
|
108 |
+
reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html"],
|
109 |
)
|
110 |
|
111 |
+
def _compute(self, predictions, references, sample_weight=None, k=None, ignore_ties=False):
|
112 |
+
score = ndcg_score(y_true=references,
|
113 |
+
y_score=predictions,
|
114 |
+
k=k,
|
115 |
+
sample_weight=sample_weight,
|
116 |
+
ignore_ties=ignore_ties
|
117 |
+
)
|
118 |
+
if k is not None:
|
119 |
+
return {"nDCG@" + str(k): score}
|
120 |
+
else:
|
121 |
+
return {"nDCG": score}
|