571 lines
20 KiB
Python
571 lines
20 KiB
Python
|
"""Metrics to assess performance on regression task
|
||
|
|
||
|
Functions named as ``*_score`` return a scalar value to maximize: the higher
|
||
|
the better
|
||
|
|
||
|
Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize:
|
||
|
the lower the better
|
||
|
"""
|
||
|
|
||
|
# Authors: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||
|
# Mathieu Blondel <mathieu@mblondel.org>
|
||
|
# Olivier Grisel <olivier.grisel@ensta.org>
|
||
|
# Arnaud Joly <a.joly@ulg.ac.be>
|
||
|
# Jochen Wersdorfer <jochen@wersdoerfer.de>
|
||
|
# Lars Buitinck
|
||
|
# Joel Nothman <joel.nothman@gmail.com>
|
||
|
# Karan Desai <karandesai281196@gmail.com>
|
||
|
# Noel Dawe <noel@dawe.me>
|
||
|
# Manoj Kumar <manojkumarsivaraj334@gmail.com>
|
||
|
# Michael Eickenberg <michael.eickenberg@gmail.com>
|
||
|
# Konstantin Shmelkov <konstantin.shmelkov@polytechnique.edu>
|
||
|
# License: BSD 3 clause
|
||
|
|
||
|
from __future__ import division
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from ..utils.validation import check_array, check_consistent_length
|
||
|
from ..utils.validation import column_or_1d
|
||
|
from ..externals.six import string_types
|
||
|
|
||
|
|
||
|
__ALL__ = [
|
||
|
"mean_absolute_error",
|
||
|
"mean_squared_error",
|
||
|
"mean_squared_log_error",
|
||
|
"median_absolute_error",
|
||
|
"r2_score",
|
||
|
"explained_variance_score"
|
||
|
]
|
||
|
|
||
|
|
||
|
def _check_reg_targets(y_true, y_pred, multioutput):
|
||
|
"""Check that y_true and y_pred belong to the same regression task
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y_true : array-like,
|
||
|
|
||
|
y_pred : array-like,
|
||
|
|
||
|
multioutput : array-like or string in ['raw_values', uniform_average',
|
||
|
'variance_weighted'] or None
|
||
|
None is accepted due to backward compatibility of r2_score().
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
type_true : one of {'continuous', continuous-multioutput'}
|
||
|
The type of the true target data, as output by
|
||
|
'utils.multiclass.type_of_target'
|
||
|
|
||
|
y_true : array-like of shape = (n_samples, n_outputs)
|
||
|
Ground truth (correct) target values.
|
||
|
|
||
|
y_pred : array-like of shape = (n_samples, n_outputs)
|
||
|
Estimated target values.
|
||
|
|
||
|
multioutput : array-like of shape = (n_outputs) or string in ['raw_values',
|
||
|
uniform_average', 'variance_weighted'] or None
|
||
|
Custom output weights if ``multioutput`` is array-like or
|
||
|
just the corresponding argument if ``multioutput`` is a
|
||
|
correct keyword.
|
||
|
|
||
|
"""
|
||
|
check_consistent_length(y_true, y_pred)
|
||
|
y_true = check_array(y_true, ensure_2d=False)
|
||
|
y_pred = check_array(y_pred, ensure_2d=False)
|
||
|
|
||
|
if y_true.ndim == 1:
|
||
|
y_true = y_true.reshape((-1, 1))
|
||
|
|
||
|
if y_pred.ndim == 1:
|
||
|
y_pred = y_pred.reshape((-1, 1))
|
||
|
|
||
|
if y_true.shape[1] != y_pred.shape[1]:
|
||
|
raise ValueError("y_true and y_pred have different number of output "
|
||
|
"({0}!={1})".format(y_true.shape[1], y_pred.shape[1]))
|
||
|
|
||
|
n_outputs = y_true.shape[1]
|
||
|
allowed_multioutput_str = ('raw_values', 'uniform_average',
|
||
|
'variance_weighted')
|
||
|
if isinstance(multioutput, string_types):
|
||
|
if multioutput not in allowed_multioutput_str:
|
||
|
raise ValueError("Allowed 'multioutput' string values are {}. "
|
||
|
"You provided multioutput={!r}".format(
|
||
|
allowed_multioutput_str,
|
||
|
multioutput))
|
||
|
elif multioutput is not None:
|
||
|
multioutput = check_array(multioutput, ensure_2d=False)
|
||
|
if n_outputs == 1:
|
||
|
raise ValueError("Custom weights are useful only in "
|
||
|
"multi-output cases.")
|
||
|
elif n_outputs != len(multioutput):
|
||
|
raise ValueError(("There must be equally many custom weights "
|
||
|
"(%d) as outputs (%d).") %
|
||
|
(len(multioutput), n_outputs))
|
||
|
y_type = 'continuous' if n_outputs == 1 else 'continuous-multioutput'
|
||
|
|
||
|
return y_type, y_true, y_pred, multioutput
|
||
|
|
||
|
|
||
|
def mean_absolute_error(y_true, y_pred,
|
||
|
sample_weight=None,
|
||
|
multioutput='uniform_average'):
|
||
|
"""Mean absolute error regression loss
|
||
|
|
||
|
Read more in the :ref:`User Guide <mean_absolute_error>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||
|
Ground truth (correct) target values.
|
||
|
|
||
|
y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||
|
Estimated target values.
|
||
|
|
||
|
sample_weight : array-like of shape = (n_samples), optional
|
||
|
Sample weights.
|
||
|
|
||
|
multioutput : string in ['raw_values', 'uniform_average']
|
||
|
or array-like of shape (n_outputs)
|
||
|
Defines aggregating of multiple output values.
|
||
|
Array-like value defines weights used to average errors.
|
||
|
|
||
|
'raw_values' :
|
||
|
Returns a full set of errors in case of multioutput input.
|
||
|
|
||
|
'uniform_average' :
|
||
|
Errors of all outputs are averaged with uniform weight.
|
||
|
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
loss : float or ndarray of floats
|
||
|
If multioutput is 'raw_values', then mean absolute error is returned
|
||
|
for each output separately.
|
||
|
If multioutput is 'uniform_average' or an ndarray of weights, then the
|
||
|
weighted average of all output errors is returned.
|
||
|
|
||
|
MAE output is non-negative floating point. The best value is 0.0.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from sklearn.metrics import mean_absolute_error
|
||
|
>>> y_true = [3, -0.5, 2, 7]
|
||
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
||
|
>>> mean_absolute_error(y_true, y_pred)
|
||
|
0.5
|
||
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
||
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
||
|
>>> mean_absolute_error(y_true, y_pred)
|
||
|
0.75
|
||
|
>>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')
|
||
|
array([ 0.5, 1. ])
|
||
|
>>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
||
|
... # doctest: +ELLIPSIS
|
||
|
0.849...
|
||
|
"""
|
||
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||
|
y_true, y_pred, multioutput)
|
||
|
output_errors = np.average(np.abs(y_pred - y_true),
|
||
|
weights=sample_weight, axis=0)
|
||
|
if isinstance(multioutput, string_types):
|
||
|
if multioutput == 'raw_values':
|
||
|
return output_errors
|
||
|
elif multioutput == 'uniform_average':
|
||
|
# pass None as weights to np.average: uniform mean
|
||
|
multioutput = None
|
||
|
|
||
|
return np.average(output_errors, weights=multioutput)
|
||
|
|
||
|
|
||
|
def mean_squared_error(y_true, y_pred,
|
||
|
sample_weight=None,
|
||
|
multioutput='uniform_average'):
|
||
|
"""Mean squared error regression loss
|
||
|
|
||
|
Read more in the :ref:`User Guide <mean_squared_error>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||
|
Ground truth (correct) target values.
|
||
|
|
||
|
y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||
|
Estimated target values.
|
||
|
|
||
|
sample_weight : array-like of shape = (n_samples), optional
|
||
|
Sample weights.
|
||
|
|
||
|
multioutput : string in ['raw_values', 'uniform_average']
|
||
|
or array-like of shape (n_outputs)
|
||
|
Defines aggregating of multiple output values.
|
||
|
Array-like value defines weights used to average errors.
|
||
|
|
||
|
'raw_values' :
|
||
|
Returns a full set of errors in case of multioutput input.
|
||
|
|
||
|
'uniform_average' :
|
||
|
Errors of all outputs are averaged with uniform weight.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
loss : float or ndarray of floats
|
||
|
A non-negative floating point value (the best value is 0.0), or an
|
||
|
array of floating point values, one for each individual target.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from sklearn.metrics import mean_squared_error
|
||
|
>>> y_true = [3, -0.5, 2, 7]
|
||
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
||
|
>>> mean_squared_error(y_true, y_pred)
|
||
|
0.375
|
||
|
>>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
|
||
|
>>> y_pred = [[0, 2],[-1, 2],[8, -5]]
|
||
|
>>> mean_squared_error(y_true, y_pred) # doctest: +ELLIPSIS
|
||
|
0.708...
|
||
|
>>> mean_squared_error(y_true, y_pred, multioutput='raw_values')
|
||
|
... # doctest: +ELLIPSIS
|
||
|
array([ 0.416..., 1. ])
|
||
|
>>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
||
|
... # doctest: +ELLIPSIS
|
||
|
0.824...
|
||
|
|
||
|
"""
|
||
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||
|
y_true, y_pred, multioutput)
|
||
|
output_errors = np.average((y_true - y_pred) ** 2, axis=0,
|
||
|
weights=sample_weight)
|
||
|
if isinstance(multioutput, string_types):
|
||
|
if multioutput == 'raw_values':
|
||
|
return output_errors
|
||
|
elif multioutput == 'uniform_average':
|
||
|
# pass None as weights to np.average: uniform mean
|
||
|
multioutput = None
|
||
|
|
||
|
return np.average(output_errors, weights=multioutput)
|
||
|
|
||
|
|
||
|
def mean_squared_log_error(y_true, y_pred,
|
||
|
sample_weight=None,
|
||
|
multioutput='uniform_average'):
|
||
|
"""Mean squared logarithmic error regression loss
|
||
|
|
||
|
Read more in the :ref:`User Guide <mean_squared_log_error>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||
|
Ground truth (correct) target values.
|
||
|
|
||
|
y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||
|
Estimated target values.
|
||
|
|
||
|
sample_weight : array-like of shape = (n_samples), optional
|
||
|
Sample weights.
|
||
|
|
||
|
multioutput : string in ['raw_values', 'uniform_average'] \
|
||
|
or array-like of shape = (n_outputs)
|
||
|
|
||
|
Defines aggregating of multiple output values.
|
||
|
Array-like value defines weights used to average errors.
|
||
|
|
||
|
'raw_values' :
|
||
|
Returns a full set of errors when the input is of multioutput
|
||
|
format.
|
||
|
|
||
|
'uniform_average' :
|
||
|
Errors of all outputs are averaged with uniform weight.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
loss : float or ndarray of floats
|
||
|
A non-negative floating point value (the best value is 0.0), or an
|
||
|
array of floating point values, one for each individual target.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from sklearn.metrics import mean_squared_log_error
|
||
|
>>> y_true = [3, 5, 2.5, 7]
|
||
|
>>> y_pred = [2.5, 5, 4, 8]
|
||
|
>>> mean_squared_log_error(y_true, y_pred) # doctest: +ELLIPSIS
|
||
|
0.039...
|
||
|
>>> y_true = [[0.5, 1], [1, 2], [7, 6]]
|
||
|
>>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]
|
||
|
>>> mean_squared_log_error(y_true, y_pred) # doctest: +ELLIPSIS
|
||
|
0.044...
|
||
|
>>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
|
||
|
... # doctest: +ELLIPSIS
|
||
|
array([ 0.004..., 0.083...])
|
||
|
>>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
|
||
|
... # doctest: +ELLIPSIS
|
||
|
0.060...
|
||
|
|
||
|
"""
|
||
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||
|
y_true, y_pred, multioutput)
|
||
|
|
||
|
if not (y_true >= 0).all() and not (y_pred >= 0).all():
|
||
|
raise ValueError("Mean Squared Logarithmic Error cannot be used when "
|
||
|
"targets contain negative values.")
|
||
|
|
||
|
return mean_squared_error(np.log(y_true + 1), np.log(y_pred + 1),
|
||
|
sample_weight, multioutput)
|
||
|
|
||
|
|
||
|
def median_absolute_error(y_true, y_pred):
|
||
|
"""Median absolute error regression loss
|
||
|
|
||
|
Read more in the :ref:`User Guide <median_absolute_error>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y_true : array-like of shape = (n_samples)
|
||
|
Ground truth (correct) target values.
|
||
|
|
||
|
y_pred : array-like of shape = (n_samples)
|
||
|
Estimated target values.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
loss : float
|
||
|
A positive floating point value (the best value is 0.0).
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from sklearn.metrics import median_absolute_error
|
||
|
>>> y_true = [3, -0.5, 2, 7]
|
||
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
||
|
>>> median_absolute_error(y_true, y_pred)
|
||
|
0.5
|
||
|
|
||
|
"""
|
||
|
y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred,
|
||
|
'uniform_average')
|
||
|
if y_type == 'continuous-multioutput':
|
||
|
raise ValueError("Multioutput not supported in median_absolute_error")
|
||
|
return np.median(np.abs(y_pred - y_true))
|
||
|
|
||
|
|
||
|
def explained_variance_score(y_true, y_pred,
|
||
|
sample_weight=None,
|
||
|
multioutput='uniform_average'):
|
||
|
"""Explained variance regression score function
|
||
|
|
||
|
Best possible score is 1.0, lower values are worse.
|
||
|
|
||
|
Read more in the :ref:`User Guide <explained_variance_score>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||
|
Ground truth (correct) target values.
|
||
|
|
||
|
y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||
|
Estimated target values.
|
||
|
|
||
|
sample_weight : array-like of shape = (n_samples), optional
|
||
|
Sample weights.
|
||
|
|
||
|
multioutput : string in ['raw_values', 'uniform_average', \
|
||
|
'variance_weighted'] or array-like of shape (n_outputs)
|
||
|
Defines aggregating of multiple output scores.
|
||
|
Array-like value defines weights used to average scores.
|
||
|
|
||
|
'raw_values' :
|
||
|
Returns a full set of scores in case of multioutput input.
|
||
|
|
||
|
'uniform_average' :
|
||
|
Scores of all outputs are averaged with uniform weight.
|
||
|
|
||
|
'variance_weighted' :
|
||
|
Scores of all outputs are averaged, weighted by the variances
|
||
|
of each individual output.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
score : float or ndarray of floats
|
||
|
The explained variance or ndarray if 'multioutput' is 'raw_values'.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This is not a symmetric function.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from sklearn.metrics import explained_variance_score
|
||
|
>>> y_true = [3, -0.5, 2, 7]
|
||
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
||
|
>>> explained_variance_score(y_true, y_pred) # doctest: +ELLIPSIS
|
||
|
0.957...
|
||
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
||
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
||
|
>>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')
|
||
|
... # doctest: +ELLIPSIS
|
||
|
0.983...
|
||
|
|
||
|
"""
|
||
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||
|
y_true, y_pred, multioutput)
|
||
|
|
||
|
y_diff_avg = np.average(y_true - y_pred, weights=sample_weight, axis=0)
|
||
|
numerator = np.average((y_true - y_pred - y_diff_avg) ** 2,
|
||
|
weights=sample_weight, axis=0)
|
||
|
|
||
|
y_true_avg = np.average(y_true, weights=sample_weight, axis=0)
|
||
|
denominator = np.average((y_true - y_true_avg) ** 2,
|
||
|
weights=sample_weight, axis=0)
|
||
|
|
||
|
nonzero_numerator = numerator != 0
|
||
|
nonzero_denominator = denominator != 0
|
||
|
valid_score = nonzero_numerator & nonzero_denominator
|
||
|
output_scores = np.ones(y_true.shape[1])
|
||
|
|
||
|
output_scores[valid_score] = 1 - (numerator[valid_score] /
|
||
|
denominator[valid_score])
|
||
|
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
|
||
|
if isinstance(multioutput, string_types):
|
||
|
if multioutput == 'raw_values':
|
||
|
# return scores individually
|
||
|
return output_scores
|
||
|
elif multioutput == 'uniform_average':
|
||
|
# passing to np.average() None as weights results is uniform mean
|
||
|
avg_weights = None
|
||
|
elif multioutput == 'variance_weighted':
|
||
|
avg_weights = denominator
|
||
|
else:
|
||
|
avg_weights = multioutput
|
||
|
|
||
|
return np.average(output_scores, weights=avg_weights)
|
||
|
|
||
|
|
||
|
def r2_score(y_true, y_pred, sample_weight=None,
|
||
|
multioutput="uniform_average"):
|
||
|
"""R^2 (coefficient of determination) regression score function.
|
||
|
|
||
|
Best possible score is 1.0 and it can be negative (because the
|
||
|
model can be arbitrarily worse). A constant model that always
|
||
|
predicts the expected value of y, disregarding the input features,
|
||
|
would get a R^2 score of 0.0.
|
||
|
|
||
|
Read more in the :ref:`User Guide <r2_score>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y_true : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||
|
Ground truth (correct) target values.
|
||
|
|
||
|
y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs)
|
||
|
Estimated target values.
|
||
|
|
||
|
sample_weight : array-like of shape = (n_samples), optional
|
||
|
Sample weights.
|
||
|
|
||
|
multioutput : string in ['raw_values', 'uniform_average', \
|
||
|
'variance_weighted'] or None or array-like of shape (n_outputs)
|
||
|
|
||
|
Defines aggregating of multiple output scores.
|
||
|
Array-like value defines weights used to average scores.
|
||
|
Default is "uniform_average".
|
||
|
|
||
|
'raw_values' :
|
||
|
Returns a full set of scores in case of multioutput input.
|
||
|
|
||
|
'uniform_average' :
|
||
|
Scores of all outputs are averaged with uniform weight.
|
||
|
|
||
|
'variance_weighted' :
|
||
|
Scores of all outputs are averaged, weighted by the variances
|
||
|
of each individual output.
|
||
|
|
||
|
.. versionchanged:: 0.19
|
||
|
Default value of multioutput is 'uniform_average'.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
z : float or ndarray of floats
|
||
|
The R^2 score or ndarray of scores if 'multioutput' is
|
||
|
'raw_values'.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This is not a symmetric function.
|
||
|
|
||
|
Unlike most other scores, R^2 score may be negative (it need not actually
|
||
|
be the square of a quantity R).
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] `Wikipedia entry on the Coefficient of determination
|
||
|
<https://en.wikipedia.org/wiki/Coefficient_of_determination>`_
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from sklearn.metrics import r2_score
|
||
|
>>> y_true = [3, -0.5, 2, 7]
|
||
|
>>> y_pred = [2.5, 0.0, 2, 8]
|
||
|
>>> r2_score(y_true, y_pred) # doctest: +ELLIPSIS
|
||
|
0.948...
|
||
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
|
||
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]]
|
||
|
>>> r2_score(y_true, y_pred, multioutput='variance_weighted')
|
||
|
... # doctest: +ELLIPSIS
|
||
|
0.938...
|
||
|
>>> y_true = [1,2,3]
|
||
|
>>> y_pred = [1,2,3]
|
||
|
>>> r2_score(y_true, y_pred)
|
||
|
1.0
|
||
|
>>> y_true = [1,2,3]
|
||
|
>>> y_pred = [2,2,2]
|
||
|
>>> r2_score(y_true, y_pred)
|
||
|
0.0
|
||
|
>>> y_true = [1,2,3]
|
||
|
>>> y_pred = [3,2,1]
|
||
|
>>> r2_score(y_true, y_pred)
|
||
|
-3.0
|
||
|
"""
|
||
|
y_type, y_true, y_pred, multioutput = _check_reg_targets(
|
||
|
y_true, y_pred, multioutput)
|
||
|
|
||
|
if sample_weight is not None:
|
||
|
sample_weight = column_or_1d(sample_weight)
|
||
|
weight = sample_weight[:, np.newaxis]
|
||
|
else:
|
||
|
weight = 1.
|
||
|
|
||
|
numerator = (weight * (y_true - y_pred) ** 2).sum(axis=0,
|
||
|
dtype=np.float64)
|
||
|
denominator = (weight * (y_true - np.average(
|
||
|
y_true, axis=0, weights=sample_weight)) ** 2).sum(axis=0,
|
||
|
dtype=np.float64)
|
||
|
nonzero_denominator = denominator != 0
|
||
|
nonzero_numerator = numerator != 0
|
||
|
valid_score = nonzero_denominator & nonzero_numerator
|
||
|
output_scores = np.ones([y_true.shape[1]])
|
||
|
output_scores[valid_score] = 1 - (numerator[valid_score] /
|
||
|
denominator[valid_score])
|
||
|
# arbitrary set to zero to avoid -inf scores, having a constant
|
||
|
# y_true is not interesting for scoring a regression anyway
|
||
|
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
|
||
|
if isinstance(multioutput, string_types):
|
||
|
if multioutput == 'raw_values':
|
||
|
# return scores individually
|
||
|
return output_scores
|
||
|
elif multioutput == 'uniform_average':
|
||
|
# passing None as weights results is uniform mean
|
||
|
avg_weights = None
|
||
|
elif multioutput == 'variance_weighted':
|
||
|
avg_weights = denominator
|
||
|
# avoid fail on constant y or one-element arrays
|
||
|
if not np.any(nonzero_denominator):
|
||
|
if not np.any(nonzero_numerator):
|
||
|
return 1.0
|
||
|
else:
|
||
|
return 0.0
|
||
|
else:
|
||
|
avg_weights = multioutput
|
||
|
|
||
|
return np.average(output_scores, weights=avg_weights)
|