Spaces:

huanghuayu
/

multiclass_brier_score

Sleeping

App Files Files Community

multiclass_brier_score / multiclass_brier_score.py

huanghuayu

add implementation

ef248b8 12 months ago

raw

history blame contribute delete

4.24 kB

	# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""brier_score metric for multiclass problem."""

	import numpy as np

	import evaluate
	import datasets


	_CITATION = """
	@article{brier1950verification,
	title={Verification of forecasts expressed in terms of probability},
	author={Brier, Glenn W},
	journal={Monthly weather review},
	volume={78},
	number={1},
	pages={1--3},
	year={1950}
	}
	"""

	_DESCRIPTION = """
	Measure to compare true observed labels with predicted probabilities in multiclass classification tasks.
	"""


	_KWARGS_DESCRIPTION = """
	Multiclass Brier Score: Measure to compare true observed labels with predicted probabilities in multiclass classification tasks.
	Args:
	pred_probs: array-like of shape (n_sample, m_classes).
	references: array-like array of shape (n_sample,).
	Returns:
	brier_score: float, average brier score over all samples.
	Examples:
	Examples should be written in doctest format, and should illustrate how
	to use the function.

	>>> brier_metric = multiclass_brier_score()
	>>> brier_score = brier_metric.compute(pred_probs=[[0.0, 1.0, 0.0]], references=[1])
	>>> print(brier_score)
	{'brier_score': 0.0}

	>>> brier_metric = multiclass_brier_score()
	>>> brier_score = brier_metric.compute(pred_probs=[[0.1, 0.1, 0.8]], references=[2])
	>>> print(round(brier_score['brier_score'], 2))
	0.06

	>>> brier_metric = multiclass_brier_score()
	>>> brier_score = brier_metric.compute(pred_probs=[[0.1, 0.1, 0.8], [0.0, 1.0, 0.0]], references=[2, 1])
	>>> print(round(brier_score['brier_score'], 2))
	0.03
	"""


	@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
	class multiclass_brier_score(evaluate.Metric):
	"""TODO: Short description of my evaluation module."""

	def _info(self):
	# TODO: Specifies the evaluate.EvaluationModuleInfo object
	return evaluate.MetricInfo(
	# This is the description that will appear on the modules page.
	module_type="metric",
	description=_DESCRIPTION,
	citation=_CITATION,
	inputs_description=_KWARGS_DESCRIPTION,
	# This defines the format of each prediction and reference
	features=datasets.Features({
	'pred_probs': datasets.Sequence(datasets.Value("float")),
	'references': datasets.Value('int32'),
	}),
	# Additional links to the codebase or references
	#codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
	reference_urls=["https://search.r-project.org/CRAN/refmans/mlr3measures/html/mbrier.html"]
	)


	def _compute(self, pred_probs: np.ndarray, references: np.ndarray):
	"""
	brier_score = 1/n * sum_{i=1}^n sum_{j=1}^m (y_{ij} - p{ij})^2
	Args:
	pred_probs: numpy array of shape (n, m) where n is the number of samples and m is the number of classes
	references: numpy array of shape (n,) where n is the number of samples
	"""
	assert len(pred_probs) == len(references), "The length of the predictions and references should be the same"
	pred_probs = np.array(pred_probs)
	n = len(references)
	m = pred_probs.shape[1]
	# generate one-hot encoding for the references
	references_onehot = np.zeros((n, m))
	references_onehot[np.arange(n), references] = 1 # shape: (n, m)
	brier_score = np.sum((references_onehot - pred_probs)**2) / float(n)
	return {
	"brier_score": brier_score,
	}


	if __name__ == "__main__":
	import doctest
	doctest.testmod()