Skip to content

Commit

Permalink
[BACKPORT] Add _binary_roc_auc_score method (#2403) (#2477)
Browse files Browse the repository at this point in the history
  • Loading branch information
Xuye (Chris) Qin authored Sep 21, 2021
1 parent 0ac9abd commit cc5b577
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 0 deletions.
37 changes: 37 additions & 0 deletions mars/learn/metrics/_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,43 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None,
return ret.execute(session=session, **(run_kwargs or dict()))


def _binary_roc_auc_score(y_true, y_score, sample_weight=None,
max_fpr=None, session=None, run_kwargs=None):
"""Binary roc auc score."""

from numpy import interp

if len(mt.unique(y_true).execute()) != 2:
raise ValueError("Only one class present in y_true. ROC AUC score "
"is not defined in that case.")

fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight,
session=session, run_kwargs=run_kwargs)
fpr, tpr = mt.ExecutableTuple([fpr, tpr]).fetch(session=session)

if max_fpr is None or max_fpr == 1:
return auc(fpr, tpr, session=session, run_kwargs=run_kwargs).fetch(session=session)
if max_fpr <= 0 or max_fpr > 1:
raise ValueError(f"Expected max_fpr in range (0, 1], got: {max_fpr}")

# Add a single point at max_fpr by linear interpolation
stop = mt.searchsorted(fpr, max_fpr, 'right').execute(
session=session, **(run_kwargs or dict())).fetch(session=session)
x_interp = [fpr[stop - 1], fpr[stop]]
y_interp = [tpr[stop - 1], tpr[stop]]
tpr = list(tpr[:stop])
tpr.append(interp(max_fpr, x_interp, y_interp))
fpr = list(fpr[:stop])
fpr.append(max_fpr)
partial_auc = auc(fpr, tpr, session=session, run_kwargs=run_kwargs)

# McClish correction: standardize result to be 0.5 if non-discriminant
# and 1 if maximal
min_area = 0.5 * max_fpr**2
max_area = max_fpr
return 0.5 * (1 + (partial_auc.fetch(session=session) - min_area) / (max_area - min_area))


def roc_curve(y_true, y_score, pos_label=None, sample_weight=None,
drop_intermediate=True, session=None, run_kwargs=None):
"""Compute Receiver operating characteristic (ROC)
Expand Down
27 changes: 27 additions & 0 deletions mars/learn/metrics/tests/test_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,16 @@
from sklearn.exceptions import UndefinedMetricWarning
from sklearn.utils import check_random_state
from sklearn.utils._testing import assert_warns
from sklearn.metrics._ranking import _binary_roc_auc_score as sk_binary_roc_auc_score
except ImportError: # pragma: no cover
sklearn = None
import pytest


from .... import dataframe as md
from .... import tensor as mt
from .. import roc_curve, auc, accuracy_score
from .._ranking import _binary_roc_auc_score


def test_roc_curve(setup):
Expand Down Expand Up @@ -149,6 +152,30 @@ def test_roc_curve_one_label(setup):
assert fpr.shape == thresholds.shape


def test_binary_roc_auc_score(setup):
# Test the area is equal under binary roc_auc_score
rs = np.random.RandomState(0)
raw_X = rs.randint(0, 2, size=10)
raw_Y = rs.rand(10).astype('float32')

X = mt.tensor(raw_X)
Y = mt.tensor(raw_Y)

for max_fpr in (np.random.rand(), None):
# Calculate the score using both frameworks
score = _binary_roc_auc_score(X, Y, max_fpr=max_fpr)
expected_score = sk_binary_roc_auc_score(raw_X, raw_Y, max_fpr=max_fpr)

# Both the scores should be equal
np.testing.assert_almost_equal(score, expected_score, decimal=6)

with pytest.raises(ValueError):
_binary_roc_auc_score(mt.tensor([0]), Y)

with pytest.raises(ValueError):
_binary_roc_auc_score(X, Y, max_fpr=0)


def test_roc_curve_drop_intermediate(setup):
# Test that drop_intermediate drops the correct thresholds
y_true = [0, 0, 0, 0, 1, 1]
Expand Down

0 comments on commit cc5b577

Please sign in to comment.