[BACKPORT] Add _binary_roc_auc_score method (#2403) (#2477)

Xuye (Chris) Qin · web-flow · commit cc5b577f00bd · 2021-09-21T18:52:38.000+08:00
diff --git a/mars/learn/metrics/_ranking.py b/mars/learn/metrics/_ranking.py
@@ -185,6 +185,43 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None,
     return ret.execute(session=session, **(run_kwargs or dict()))
 
 
+def _binary_roc_auc_score(y_true, y_score, sample_weight=None,
+                          max_fpr=None, session=None, run_kwargs=None):
+    """Binary roc auc score."""
+
+    from numpy import interp
+
+    if len(mt.unique(y_true).execute()) != 2:
+        raise ValueError("Only one class present in y_true. ROC AUC score "
+                         "is not defined in that case.")
+
+    fpr, tpr, _ = roc_curve(y_true, y_score, sample_weight=sample_weight,
+                            session=session, run_kwargs=run_kwargs)
+    fpr, tpr = mt.ExecutableTuple([fpr, tpr]).fetch(session=session)
+
+    if max_fpr is None or max_fpr == 1:
+        return auc(fpr, tpr, session=session, run_kwargs=run_kwargs).fetch(session=session)
+    if max_fpr <= 0 or max_fpr > 1:
+        raise ValueError(f"Expected max_fpr in range (0, 1], got: {max_fpr}")
+
+    # Add a single point at max_fpr by linear interpolation
+    stop = mt.searchsorted(fpr, max_fpr, 'right').execute(
+        session=session, **(run_kwargs or dict())).fetch(session=session)
+    x_interp = [fpr[stop - 1], fpr[stop]]
+    y_interp = [tpr[stop - 1], tpr[stop]]
+    tpr = list(tpr[:stop])
+    tpr.append(interp(max_fpr, x_interp, y_interp))
+    fpr = list(fpr[:stop])
+    fpr.append(max_fpr)
+    partial_auc = auc(fpr, tpr, session=session, run_kwargs=run_kwargs)
+
+    # McClish correction: standardize result to be 0.5 if non-discriminant
+    # and 1 if maximal
+    min_area = 0.5 * max_fpr**2
+    max_area = max_fpr
+    return 0.5 * (1 + (partial_auc.fetch(session=session) - min_area) / (max_area - min_area))
+
+
 def roc_curve(y_true, y_score, pos_label=None, sample_weight=None,
               drop_intermediate=True, session=None, run_kwargs=None):
     """Compute Receiver operating characteristic (ROC)
diff --git a/mars/learn/metrics/tests/test_ranking.py b/mars/learn/metrics/tests/test_ranking.py
@@ -25,13 +25,16 @@
     from sklearn.exceptions import UndefinedMetricWarning
     from sklearn.utils import check_random_state
     from sklearn.utils._testing import assert_warns
+    from sklearn.metrics._ranking import _binary_roc_auc_score as sk_binary_roc_auc_score
 except ImportError:  # pragma: no cover
     sklearn = None
 import pytest
 
+
 from .... import dataframe as md
 from .... import tensor as mt
 from .. import roc_curve, auc, accuracy_score
+from .._ranking import _binary_roc_auc_score
 
 
 def test_roc_curve(setup):
@@ -149,6 +152,30 @@ def test_roc_curve_one_label(setup):
     assert fpr.shape == thresholds.shape
 
 
+def test_binary_roc_auc_score(setup):
+    # Test the area is equal under binary roc_auc_score
+    rs = np.random.RandomState(0)
+    raw_X = rs.randint(0, 2, size=10)
+    raw_Y = rs.rand(10).astype('float32')
+
+    X = mt.tensor(raw_X)
+    Y = mt.tensor(raw_Y)
+
+    for max_fpr in (np.random.rand(), None):
+        # Calculate the score using both frameworks
+        score = _binary_roc_auc_score(X, Y, max_fpr=max_fpr)
+        expected_score = sk_binary_roc_auc_score(raw_X, raw_Y, max_fpr=max_fpr)
+
+        # Both the scores should be equal
+        np.testing.assert_almost_equal(score, expected_score, decimal=6)
+
+    with pytest.raises(ValueError):
+        _binary_roc_auc_score(mt.tensor([0]), Y)
+
+    with pytest.raises(ValueError):
+        _binary_roc_auc_score(X, Y, max_fpr=0)
+
+
 def test_roc_curve_drop_intermediate(setup):
     # Test that drop_intermediate drops the correct thresholds
     y_true = [0, 0, 0, 0, 1, 1]