Spaces:
Configuration error
Configuration error
update app
Browse files
ece.py
CHANGED
@@ -209,29 +209,17 @@ class ECE(evaluate.EvaluationModule):
|
|
209 |
4. apply L^p norm distance and weights
|
210 |
"""
|
211 |
|
212 |
-
def __init__(self, n_bins=10, bin_range=None, scheme="equal-range", proxy="upper-edge", p=1, **kwargs):
|
213 |
-
import pdb; pdb.set_trace() # breakpoint bf1c56c7 //
|
214 |
-
|
215 |
-
super(evaluate.EvaluationModule, self).__init__(**kwargs)
|
216 |
-
self.n_bins = n_bins
|
217 |
-
self.bin_range = bin_range
|
218 |
-
self.scheme = scheme
|
219 |
-
self.proxy = proxy
|
220 |
-
self.p = p
|
221 |
-
|
222 |
def _info(self):
|
223 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
224 |
return evaluate.EvaluationModuleInfo(
|
225 |
-
# This is the description that will appear on the modules page.
|
226 |
module_type="metric",
|
227 |
description=_DESCRIPTION,
|
228 |
citation=_CITATION,
|
229 |
inputs_description=_KWARGS_DESCRIPTION,
|
230 |
-
# This defines the format of each prediction and reference
|
231 |
features=datasets.Features(
|
232 |
{
|
233 |
-
"predictions": datasets.Value("float32"),
|
234 |
-
"references": datasets.Value("int64"),
|
235 |
}
|
236 |
),
|
237 |
# Homepage of the module for documentation
|
@@ -241,42 +229,43 @@ class ECE(evaluate.EvaluationModule):
|
|
241 |
reference_urls=["http://path.to.reference.url/new_module"],
|
242 |
)
|
243 |
|
244 |
-
def
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
"ECE": ECE,
|
254 |
-
}
|
255 |
-
|
256 |
|
257 |
-
def
|
258 |
-
N = 10 # N evaluation instances {(x_i,y_i)}_{i=1}^N
|
259 |
-
K = 5 # K class problem
|
260 |
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
) # class targets
|
265 |
-
prediction = np.random.dirichlet(([concentration for _ in range(K)])) # probabilities
|
266 |
-
if onehot:
|
267 |
-
reference = np.eye(K)[np.argmax(reference, -1)]
|
268 |
-
return reference, prediction
|
269 |
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
res = ECE()._compute(predictions, references)
|
274 |
-
print(f"ECE: {res['ECE']}")
|
275 |
|
|
|
|
|
|
|
276 |
|
277 |
-
if
|
278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
279 |
|
280 |
-
|
281 |
-
import pdb; pdb.set_trace() # breakpoint 058892ee //
|
282 |
|
|
|
|
|
|
|
|
|
|
|
|
209 |
4. apply L^p norm distance and weights
|
210 |
"""
|
211 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
def _info(self):
|
213 |
# TODO: Specifies the evaluate.EvaluationModuleInfo object
|
214 |
return evaluate.EvaluationModuleInfo(
|
|
|
215 |
module_type="metric",
|
216 |
description=_DESCRIPTION,
|
217 |
citation=_CITATION,
|
218 |
inputs_description=_KWARGS_DESCRIPTION,
|
|
|
219 |
features=datasets.Features(
|
220 |
{
|
221 |
+
"predictions": datasets.Array2D(datasets.Value("float32")), # or Sequence
|
222 |
+
"references": datasets.Sequence(datasets.Value("int64")),
|
223 |
}
|
224 |
),
|
225 |
# Homepage of the module for documentation
|
|
|
229 |
reference_urls=["http://path.to.reference.url/new_module"],
|
230 |
)
|
231 |
|
232 |
+
def init_kwargs(
|
233 |
+
self, n_bins=10, bin_range=None, scheme="equal-range", proxy="upper-edge", p=1, **kwargs
|
234 |
+
):
|
235 |
+
# super(evaluate.EvaluationModule, self).__init__(**kwargs)
|
236 |
+
self.n_bins = n_bins
|
237 |
+
self.bin_range = bin_range
|
238 |
+
self.scheme = scheme
|
239 |
+
self.proxy = proxy
|
240 |
+
self.p = p
|
|
|
|
|
|
|
241 |
|
242 |
+
def _compute(self, predictions, references, **kwargs):
|
|
|
|
|
243 |
|
244 |
+
# convert to numpy arrays
|
245 |
+
references = np.array(references, dtype=np.int64)
|
246 |
+
predictions = np.array(predictions, dtype=np.float32)
|
|
|
|
|
|
|
|
|
|
|
247 |
|
248 |
+
assert (
|
249 |
+
predictions.shape[0] == references.shape[0]
|
250 |
+
), "Need to pass similar predictions and references"
|
|
|
|
|
251 |
|
252 |
+
# Assert that arrays are 2D
|
253 |
+
if len(predictions.shape) != 2:
|
254 |
+
raise ValueError("Expected `predictions` to be a 2D vector (N x K)")
|
255 |
|
256 |
+
if len(references.shape) != 1:
|
257 |
+
# could check if wrongly passed as onehot
|
258 |
+
if (references.shape[-1] == predictions.shape[1]) and (
|
259 |
+
np.sum(references) == predictions.shape[0]
|
260 |
+
):
|
261 |
+
references = np.argmax(references, -1)
|
262 |
+
else:
|
263 |
+
raise ValueError("Expected `references` to be a 1D vector (N,)")
|
264 |
|
265 |
+
self.init_kwargs(**kwargs)
|
|
|
266 |
|
267 |
+
"""Returns the scores"""
|
268 |
+
ECE = top_1_CE(references, predictions, **self.__dict__)
|
269 |
+
return {
|
270 |
+
"ECE": ECE,
|
271 |
+
}
|
tests.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
test_cases = [
|
2 |
{
|
3 |
"predictions": [0, 0],
|
@@ -14,4 +16,24 @@ test_cases = [
|
|
14 |
"references": [1, 1],
|
15 |
"result": {"metric_score": 0.5}
|
16 |
}
|
17 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
test_cases = [
|
4 |
{
|
5 |
"predictions": [0, 0],
|
|
|
16 |
"references": [1, 1],
|
17 |
"result": {"metric_score": 0.5}
|
18 |
}
|
19 |
+
]
|
20 |
+
|
21 |
+
|
22 |
+
def test_ECE():
|
23 |
+
N = 10 # N evaluation instances {(x_i,y_i)}_{i=1}^N
|
24 |
+
K = 5 # K class problem
|
25 |
+
|
26 |
+
def random_mc_instance(concentration=1, onehot=False):
|
27 |
+
reference = np.argmax(
|
28 |
+
np.random.dirichlet(([concentration for _ in range(K)])), -1
|
29 |
+
) # class targets
|
30 |
+
prediction = np.random.dirichlet(([concentration for _ in range(K)])) # probabilities
|
31 |
+
if onehot:
|
32 |
+
reference = np.eye(K)[np.argmax(reference, -1)]
|
33 |
+
return reference, prediction
|
34 |
+
|
35 |
+
references, predictions = list(zip(*[random_mc_instance() for i in range(N)]))
|
36 |
+
references = np.array(references, dtype=np.int64)
|
37 |
+
predictions = np.array(predictions, dtype=np.float32)
|
38 |
+
res = ECE()._compute(predictions, references)
|
39 |
+
print(f"ECE: {res['ECE']}")
|