JP-SystemsX commited on
Commit
163dff6
·
1 Parent(s): 8d7d29c

Added documentation to nDCG.py

Browse files
Files changed (2) hide show
  1. Testing.py +15 -0
  2. nDCG.py +107 -11
Testing.py CHANGED
@@ -16,3 +16,18 @@ print(metric.compute(predictions=[a], references=[c]))
16
  print(metric.compute(predictions=[a], references=[c]))
17
  print(metric.compute(predictions=[a,a], references=[c,a]))
18
  print(metric.cache_file_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  print(metric.compute(predictions=[a], references=[c]))
17
  print(metric.compute(predictions=[a,a], references=[c,a]))
18
  print(metric.cache_file_name)
19
+
20
+ nDCG_metric = ev.load("nDCG.py")
21
+ results = nDCG_metric.compute(references=[[10, 0, 0, 1, 5]], predictions=[[.1, .2, .3, 4, 70]])
22
+ print(results)
23
+
24
+ nDCG_metric = ev.load("nDCG.py")
25
+ results = nDCG_metric.compute(references=[[10, 0, 0, 1, 5]], predictions=[[.1, .2, .3, 4, 70]], k=3)
26
+ print(results)
27
+
28
+ nDCG_metric = ev.load("nDCG.py")
29
+ results = nDCG_metric.compute(references=[[1, 0, 0, 0, 0]], predictions=[[1, 1, 0, 0, 0]], k=1)
30
+ print(results)
31
+
32
+ results = nDCG_metric.compute(references=[[1, 0, 0, 0, 0]], predictions=[[1, 1, 0, 0, 0]], k=1, ignore_ties=True)
33
+ print(results)
nDCG.py CHANGED
@@ -2,24 +2,120 @@ import evaluate as ev
2
  from sklearn.metrics import ndcg_score
3
  import datasets
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  @ev.utils.file_utils.add_start_docstrings("_DESCRIPTION", "_KWARGS_DESCRIPTION")
6
  class nDCG(ev.Metric):
7
  def _info(self):
8
  return ev.MetricInfo(
9
- module_type="metric",
10
- description="nDCG",
11
- citation="None",
12
- inputs_description="None",
13
  features=datasets.Features({
14
  'predictions': datasets.Sequence(datasets.Value('float')),
15
  'references': datasets.Sequence(datasets.Value('float'))
16
  }),
17
- homepage="none",
18
  )
19
 
20
- def _compute(self, predictions, references, sample_weight=None, k=5):
21
- """Returns the scores"""
22
- score = ndcg_score(references, predictions, k=k, sample_weight=sample_weight)
23
- return {
24
- "nDCG@"+str(k): score
25
- }
 
 
 
 
 
 
2
  from sklearn.metrics import ndcg_score
3
  import datasets
4
 
5
+ _DESCRIPTION = """
6
+ Compute Normalized Discounted Cumulative Gain.
7
+
8
+ Sums the true scores ranked in the order induced by the predicted scores,
9
+ after applying a logarithmic discount. Then divides by the best possible
10
+ score (Ideal DCG, obtained for a perfect ranking) to obtain a score between
11
+ 0 and 1.
12
+
13
+ This ranking metric returns a high value if true labels are ranked high by
14
+ ``predictions``.
15
+
16
+ If a value for k is given to the metric it will only consider the k highest
17
+ scores in the ranking
18
+
19
+ References
20
+ ----------
21
+ `Wikipedia entry for Discounted Cumulative Gain
22
+ <https://en.wikipedia.org/wiki/Discounted_cumulative_gain>`_
23
+
24
+ Jarvelin, K., & Kekalainen, J. (2002).
25
+ Cumulated gain-based evaluation of IR techniques. ACM Transactions on
26
+ Information Systems (TOIS), 20(4), 422-446.
27
+
28
+ Wang, Y., Wang, L., Li, Y., He, D., Chen, W., & Liu, T. Y. (2013, May).
29
+ A theoretical analysis of NDCG ranking measures. In Proceedings of the 26th
30
+ Annual Conference on Learning Theory (COLT 2013)
31
+
32
+ McSherry, F., & Najork, M. (2008, March). Computing information retrieval
33
+ performance measures efficiently in the presence of tied scores. In
34
+ European conference on information retrieval (pp. 414-421). Springer,
35
+ Berlin, Heidelberg.
36
+ """
37
+
38
+ _KWARGS_DESCRIPTION = """
39
+ Args:
40
+ references ('list' of 'float'): True relevance
41
+
42
+ predictions ('list' of 'float'): Either predicted relevance, probability estimates or confidence values
43
+
44
+ k (int): If set to a value only the k highest scores in the ranking will be considered else considers all outputs.
45
+ Defaults to None.
46
+
47
+ sample_weight (`list` of `float`): Sample weights Defaults to None.
48
+
49
+ ignore_ties ('boolean'): If set to true asumes that there are no ties (this is likely if predictions are continuous)
50
+ for efficiency gains. Defaults to False.
51
+
52
+ Returns:
53
+ normalized_discounted_cumulative_gain ('float'): The averaged nDCG scores for all samples.
54
+ Minimum possible value is 0.0 Maximum possible value is 1.0
55
+
56
+ Examples:
57
+ Example 1-A simple example
58
+ >>> nDCG_metric = evaluate.load("JP-SystemsX/nDCG")
59
+ >>> results = nDCG_metric.compute(references=[[10, 0, 0, 1, 5]], predictions=[[.1, .2, .3, 4, 70]])
60
+ >>> print(results)
61
+ {'nDCG': 0.6956940443813076}
62
+ Example 2-The same as Example 1, except with k set to 3.
63
+ >>> nDCG_metric = evaluate.load("JP-SystemsX/nDCG")
64
+ >>> results = nDCG_metric.compute(references=[[10, 0, 0, 1, 5]], predictions=[[.1, .2, .3, 4, 70]], k=3)
65
+ >>> print(results)
66
+ {'nDCG@3': 0.4123818817534531}
67
+ Example 3-There is only one relevant label but there is a tie and the model can't decide which one is the one.
68
+ >>> accuracy_metric = evaluate.load("accuracy")
69
+ >>> results = nDCG_metric.compute(references=[[1, 0, 0, 0, 0]], predictions=[[1, 1, 0, 0, 0]], k=1)
70
+ >>> print(results)
71
+ {'nDCG@1': 0.5}
72
+ >>> #That is it calculates both and returns the average of both
73
+ Example 4-The Same as 3, except ignore_ties is set to True.
74
+ >>> accuracy_metric = evaluate.load("accuracy")
75
+ >>> results = nDCG_metric.compute(references=[[1, 0, 0, 0, 0]], predictions=[[1, 1, 0, 0, 0]], k=1, ignore_ties=True)
76
+ >>> print(results)
77
+ {'nDCG@1': 0.0}
78
+ >>> # Alternative Result: {'nDCG@1': 1.0}
79
+ >>> # That is it chooses one of the 2 candidates and calculates the score only for this one
80
+ >>> # That means the score may vary depending on which one was chosen
81
+ """
82
+
83
+ _CITATION = """
84
+ @article{scikit-learn,
85
+ title={Scikit-learn: Machine Learning in {P}ython},
86
+ author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
87
+ and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
88
+ and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
89
+ Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
90
+ journal={Journal of Machine Learning Research},
91
+ volume={12},
92
+ pages={2825--2830},
93
+ year={2011}
94
+ }
95
+ """
96
+
97
  @ev.utils.file_utils.add_start_docstrings("_DESCRIPTION", "_KWARGS_DESCRIPTION")
98
  class nDCG(ev.Metric):
99
  def _info(self):
100
  return ev.MetricInfo(
101
+ description=_DESCRIPTION,
102
+ citation=_CITATION,
103
+ inputs_description=_KWARGS_DESCRIPTION,
 
104
  features=datasets.Features({
105
  'predictions': datasets.Sequence(datasets.Value('float')),
106
  'references': datasets.Sequence(datasets.Value('float'))
107
  }),
108
+ reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.ndcg_score.html"],
109
  )
110
 
111
+ def _compute(self, predictions, references, sample_weight=None, k=None, ignore_ties=False):
112
+ score = ndcg_score(y_true=references,
113
+ y_score=predictions,
114
+ k=k,
115
+ sample_weight=sample_weight,
116
+ ignore_ties=ignore_ties
117
+ )
118
+ if k is not None:
119
+ return {"nDCG@" + str(k): score}
120
+ else:
121
+ return {"nDCG": score}