From 010a211d267f059d9965483e5bd83e50b7b91fd4 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 27 Oct 2017 16:05:25 +0200
Subject: [PATCH 01/10] minor corrections in docstring

---
 sklearn/neighbors/nca.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py
index a3d35e4dd0a2d..4385c43639d01 100644
--- a/sklearn/neighbors/nca.py
+++ b/sklearn/neighbors/nca.py
@@ -87,10 +87,10 @@ class NeighborhoodComponentAnalysis(BaseEstimator, TransformerMixin):
     Attributes
     ----------
     transformation_ : array, shape (n_features_out, n_features)
-    The linear transformation learned during fitting.
+        The linear transformation learned during fitting.
 
     n_iter_ : int
-    Counts the number of iterations performed by the optimizer.
+        Counts the number of iterations performed by the optimizer.
 
     opt_result_ : scipy.optimize.OptimizeResult (optional)
         A dictionary of information representing the optimization result.
@@ -121,9 +121,9 @@ class NeighborhoodComponentAnalysis(BaseEstimator, TransformerMixin):
     Notes
     -----
     Neighborhood Component Analysis (NCA) is a machine learning algorithm for
-    metric learning. It learns a linear transformation of the space in a
-    supervised fashion to improve the classification accuracy of a
-    stochastic nearest neighbors rule in this new space.
+    metric learning. It learns a linear transformation in a supervised fashion
+    to improve the classification accuracy of a stochastic nearest neighbors
+    rule in the new space.
 
     .. warning::
 

From 9f1157d5fcefbd6af899673a50e6a3df532a2759 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 27 Oct 2017 16:12:08 +0200
Subject: [PATCH 02/10] minor corrections in docstring

---
 sklearn/neighbors/nca.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py
index 4385c43639d01..a825a1ab44de4 100644
--- a/sklearn/neighbors/nca.py
+++ b/sklearn/neighbors/nca.py
@@ -27,7 +27,7 @@ class NeighborhoodComponentAnalysis(BaseEstimator, TransformerMixin):
 
     Parameters
     ----------
-    n_features_out: int, optional (default=None)
+    n_features_out : int, optional (default=None)
         Preferred dimensionality of the embedding.
 
     init : string or numpy array, optional (default='pca')

From 22838c5ddb27d0283254b465594172bcaaa6c52c Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Fri, 27 Oct 2017 17:01:24 +0200
Subject: [PATCH 03/10] remove comment

---
 sklearn/neighbors/nca.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py
index a825a1ab44de4..bd1b4721a8280 100644
--- a/sklearn/neighbors/nca.py
+++ b/sklearn/neighbors/nca.py
@@ -445,7 +445,7 @@ def _loss_grad_lbfgs(self, transformation, X, y, diffs,
             ci = masks[:, y[i]]
             p_i_j = soft[ci]
             not_ci = np.logical_not(ci)
-            diff_ci = diffs[i, ci, :]  # n_samples * n_features
+            diff_ci = diffs[i, ci, :]
             diff_not_ci = diffs[i, not_ci, :]
             sum_ci = diff_ci.T.dot(
                 (p_i_j[:, np.newaxis] * diff_embedded[ci, :]))

From 8a9ff5bb3945e371083a88738d8802b3acfa3bae Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Mon, 30 Oct 2017 10:33:41 +0100
Subject: [PATCH 04/10] Add verbose during iterations

---
 sklearn/neighbors/nca.py            | 21 +++++++++++++++++++++
 sklearn/neighbors/tests/test_nca.py |  1 +
 2 files changed, 22 insertions(+)

diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py
index bd1b4721a8280..50ac51fd654de 100644
--- a/sklearn/neighbors/nca.py
+++ b/sklearn/neighbors/nca.py
@@ -430,6 +430,19 @@ def _loss_grad_lbfgs(self, transformation, X, y, diffs,
             The new (flattened) gradient of the loss.
         """
 
+        if self.n_iter_ == 0:
+            self.n_iter_ += 1
+            if self.verbose:
+                header_fields = ['Iteration', 'Objective Value', 'Time(s)']
+                header_fmt = '{:>10} {:>20} {:>10}'
+                header = header_fmt.format(*header_fields)
+                cls_name = self.__class__.__name__
+                print('[{}]'.format(cls_name))
+                print('[{}] {}\n[{}] {}'.format(cls_name, header,
+                                                cls_name, '-' * len(header)))
+
+        t_funcall = time.time()
+
         transformation = transformation.reshape(-1, X.shape[1])
         loss = 0
         gradient = np.zeros(transformation.shape)
@@ -454,6 +467,14 @@ def _loss_grad_lbfgs(self, transformation, X, y, diffs,
             p_i = np.sum(p_i_j)
             gradient += 2 * (p_i * (sum_ci.T + sum_not_ci.T) - sum_ci.T)
             loss += p_i
+
+        if self.verbose:
+            t_funcall = time.time() - t_funcall
+            values_fmt = '[{}] {:>10} {:>20.6e} {:>10.2f}'
+            print(values_fmt.format(self.__class__.__name__, self.n_iter_,
+                                    loss, t_funcall))
+            sys.stdout.flush()
+
         return - loss, - gradient.ravel()
 
 
diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py
index fccc046e51892..a60e4399fe62b 100644
--- a/sklearn/neighbors/tests/test_nca.py
+++ b/sklearn/neighbors/tests/test_nca.py
@@ -60,6 +60,7 @@ def test_finite_differences():
     X, y, init = nca._validate_params(X, y)
     masks = _make_masks(y)
     diffs = X[:, np.newaxis] - X[np.newaxis]
+    nca.n_iter_ = 0
 
     point = nca._initialize(X, init)
     # compute the gradient at `point`

From 76f3544b369cdd4d7907470a011a83b65c031f62 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 31 Oct 2017 10:22:49 +0100
Subject: [PATCH 05/10] Update code according to code review:

https://github.com/wdevazelhes/scikit-learn/pull/1#pullrequestreview-72533389
---
 doc/modules/classes.rst             |   2 +-
 sklearn/neighbors/__init__.py       |   4 +-
 sklearn/neighbors/nca.py            |  77 +++++++++++---------
 sklearn/neighbors/tests/test_nca.py | 106 +++++++++++++++++++++-------
 4 files changed, 125 insertions(+), 64 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index fbb773e8024f0..17f2704e0829b 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -1113,7 +1113,7 @@ Model validation
    neighbors.RadiusNeighborsRegressor
    neighbors.NearestCentroid
    neighbors.NearestNeighbors
-   neighbors.NeighborhoodComponentAnalysis
+   neighbors.NeighborhoodComponentsAnalysis
 
 .. autosummary::
    :toctree: generated/
diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py
index 8e211ef9ec448..367928fad5b5a 100644
--- a/sklearn/neighbors/__init__.py
+++ b/sklearn/neighbors/__init__.py
@@ -14,7 +14,7 @@
 from .kde import KernelDensity
 from .approximate import LSHForest
 from .lof import LocalOutlierFactor
-from .nca import NeighborhoodComponentAnalysis
+from .nca import NeighborhoodComponentsAnalysis
 
 __all__ = ['BallTree',
            'DistanceMetric',
@@ -30,4 +30,4 @@
            'KernelDensity',
            'LSHForest',
            'LocalOutlierFactor',
-           'NeighborhoodComponentAnalysis']
+           'NeighborhoodComponentsAnalysis']
diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py
index 50ac51fd654de..1c755427c918c 100644
--- a/sklearn/neighbors/nca.py
+++ b/sklearn/neighbors/nca.py
@@ -12,6 +12,7 @@
 import time
 from scipy.misc import logsumexp
 from scipy.optimize import minimize
+from sklearn.preprocessing import OneHotEncoder
 
 from ..base import BaseEstimator, TransformerMixin
 from ..preprocessing import LabelEncoder
@@ -22,8 +23,8 @@
 from ..externals.six import integer_types
 
 
-class NeighborhoodComponentAnalysis(BaseEstimator, TransformerMixin):
-    """Neighborhood Component Analysis
+class NeighborhoodComponentsAnalysis(BaseEstimator, TransformerMixin):
+    """Neighborhood Components Analysis
 
     Parameters
     ----------
@@ -98,16 +99,16 @@ class NeighborhoodComponentAnalysis(BaseEstimator, TransformerMixin):
 
     Examples
     --------
-    >>> from sklearn.neighbors.nca import NeighborhoodComponentAnalysis
+    >>> from sklearn.neighbors.nca import NeighborhoodComponentsAnalysis
     >>> from sklearn.neighbors import KNeighborsClassifier
     >>> from sklearn.datasets import load_iris
     >>> from sklearn.model_selection import train_test_split
     >>> X, y = load_iris(return_X_y=True)
     >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
     ... stratify=y, test_size=0.7, random_state=42)
-    >>> nca = NeighborhoodComponentAnalysis(None,random_state=42)
+    >>> nca = NeighborhoodComponentsAnalysis(None,random_state=42)
     >>> nca.fit(X_train, y_train) # doctest: +ELLIPSIS
-    NeighborhoodComponentAnalysis(...)
+    NeighborhoodComponentsAnalysis(...)
     >>> knn = KNeighborsClassifier(n_neighbors=3)
     >>> knn.fit(X_train, y_train) # doctest: +ELLIPSIS
     KNeighborsClassifier(...)
@@ -123,13 +124,7 @@ class NeighborhoodComponentAnalysis(BaseEstimator, TransformerMixin):
     Neighborhood Component Analysis (NCA) is a machine learning algorithm for
     metric learning. It learns a linear transformation in a supervised fashion
     to improve the classification accuracy of a stochastic nearest neighbors
-    rule in the new space.
-
-    .. warning::
-
-        As NCA is optimizing a non-convex objective function, it will
-        likely end up in a local optimum. Several runs with independent random
-        init might be necessary to get a good convergence.
+    rule in the transformed space.
 
     References
     ----------
@@ -137,9 +132,13 @@ class NeighborhoodComponentAnalysis(BaseEstimator, TransformerMixin):
            "Neighbourhood Components Analysis". Advances in Neural Information
            Processing Systems. 17, 513-520, 2005.
            http://www.cs.nyu.edu/~roweis/papers/ncanips.pdf
+
+    .. [2] Wikipedia entry on Neighborhood Components Analysis
+           https://en.wikipedia.org/wiki/Neighbourhood_components_analysis
+
     """
 
-    def __init__(self, n_features_out=None, init='identity', max_iter=50,
+    def __init__(self, n_features_out=None, init='pca', max_iter=50,
                  tol=1e-5, callback=None, store_opt_result=False, verbose=0,
                  random_state=None):
 
@@ -167,7 +166,7 @@ def fit(self, X, y):
         Returns
         -------
         self : object
-            returns a trained NeighborhoodComponentAnalysis model.
+            returns a trained NeighborhoodComponentsAnalysis model.
         """
 
         # Verify inputs X and y and NCA parameters, and transform a copy if
@@ -182,7 +181,8 @@ def fit(self, X, y):
 
         # Compute arrays that stay fixed during optimization:
         # mask for fast lookup of same-class samples
-        masks = _make_masks(y_valid)
+        masks = OneHotEncoder(sparse=False,
+                              dtype=bool).fit_transform(y_valid[:, np.newaxis])
         # pairwise differences
         diffs = X_valid[:, np.newaxis] - X_valid[np.newaxis]
 
@@ -193,7 +193,7 @@ def fit(self, X, y):
         disp = self.verbose - 2 if self.verbose > 1 else -1
         optimizer_params = {'method': 'L-BFGS-B',
                             'fun': self._loss_grad_lbfgs,
-                            'args': (X_valid, y_valid, diffs, masks),
+                            'args': (X_valid, y_valid, diffs, masks, -1.0),
                             'jac': True,
                             'x0': transformation,
                             'tol': self.tol,
@@ -401,7 +401,7 @@ def _callback(self, transformation):
         self.n_iter_ += 1
 
     def _loss_grad_lbfgs(self, transformation, X, y, diffs,
-                         masks):
+                         masks, sign=1.0):
         """Compute the loss and the loss gradient w.r.t. ``transformation``.
 
         Parameters
@@ -448,23 +448,29 @@ def _loss_grad_lbfgs(self, transformation, X, y, diffs,
         gradient = np.zeros(transformation.shape)
         X_embedded = transformation.dot(X.T).T
 
-        # for every sample, compute its contribution to loss and gradient
+        # for every sample x_i, compute its contribution to loss and gradient
         for i in range(X.shape[0]):
+            # compute distances to x_i in embedded space
             diff_embedded = X_embedded[i] - X_embedded
-            sum_of_squares = np.einsum('ij,ij->i', diff_embedded,
-                                       diff_embedded)
-            sum_of_squares[i] = np.inf
-            soft = np.exp(-sum_of_squares - logsumexp(-sum_of_squares))
-            ci = masks[:, y[i]]
-            p_i_j = soft[ci]
-            not_ci = np.logical_not(ci)
+            dist_embedded = np.einsum('ij,ij->i', diff_embedded,
+                                      diff_embedded)
+            dist_embedded[i] = np.inf
+
+            # compute exponentiated distances (use the log-sum-exp trick to
+            # avoid numerical instabilities
+            exp_dist_embedded = np.exp(-dist_embedded -
+                                       logsumexp(-dist_embedded))
+            ci = masks[:, y[i]]  # samples that are in the same class as x_i
+            p_i_j = exp_dist_embedded[ci]
             diff_ci = diffs[i, ci, :]
-            diff_not_ci = diffs[i, not_ci, :]
+            diff_not_ci = diffs[i, ~ci, :]
             sum_ci = diff_ci.T.dot(
                 (p_i_j[:, np.newaxis] * diff_embedded[ci, :]))
-            sum_not_ci = diff_not_ci.T.dot((soft[not_ci][:, np.newaxis] *
-                                            diff_embedded[not_ci, :]))
-            p_i = np.sum(p_i_j)
+            sum_not_ci = diff_not_ci.T.dot((exp_dist_embedded[~ci][:,
+                                            np.newaxis] *
+                                            diff_embedded[~ci, :]))
+            p_i = np.sum(p_i_j)  # probability of x_i to be correctly
+            # classified
             gradient += 2 * (p_i * (sum_ci.T + sum_not_ci.T) - sum_ci.T)
             loss += p_i
 
@@ -475,7 +481,7 @@ def _loss_grad_lbfgs(self, transformation, X, y, diffs,
                                     loss, t_funcall))
             sys.stdout.flush()
 
-        return - loss, - gradient.ravel()
+        return sign * loss, sign * gradient.ravel()
 
 
 ##########################
@@ -538,8 +544,9 @@ def _make_masks(y):
     masks: array, shape (n_samples, n_classes)
         One-hot encoding of ``y``.
     """
-
-    n = y.shape[0]
-    masks = np.zeros((n, y.max() + 1))
-    masks[np.arange(n), y] = [1]
-    return masks.astype(bool)
+    masks = OneHotEncoder(sparse=False, dtype=bool).fit_transform(y[:,
+                                                                  np.newaxis])
+    # n = y.shape[0]
+    # masks = np.zeros((n, y.max() + 1), dtype=bool)
+    # masks[np.arange(n), y] = [True]
+    return masks
diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py
index a60e4399fe62b..5adcdd3404830 100644
--- a/sklearn/neighbors/tests/test_nca.py
+++ b/sklearn/neighbors/tests/test_nca.py
@@ -1,9 +1,10 @@
 import numpy as np
+from numpy.testing import assert_array_equal
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import assert_raises, assert_equal
 from sklearn.datasets import load_iris, make_classification
 from sklearn.model_selection import train_test_split
-from sklearn.neighbors.nca import NeighborhoodComponentAnalysis, _make_masks
+from sklearn.neighbors.nca import NeighborhoodComponentsAnalysis, _make_masks
 from sklearn.metrics import pairwise_distances
 
 
@@ -55,7 +56,7 @@ def test_finite_differences():
     y = random_state.randint(0, n_labels, (n_samples))
     point = random_state.randn(num_dims, n_features)
     X = random_state.randn(n_samples, n_features)
-    nca = NeighborhoodComponentAnalysis(None, init=point)
+    nca = NeighborhoodComponentsAnalysis(None, init=point)
 
     X, y, init = nca._validate_params(X, y)
     masks = _make_masks(y)
@@ -99,8 +100,8 @@ def test_simple_example():
     """
     X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
     y = np.array([1, 0, 1, 0])
-    nca = NeighborhoodComponentAnalysis(n_features_out=2, init='identity',
-                                        random_state=42)
+    nca = NeighborhoodComponentsAnalysis(n_features_out=2, init='identity',
+                                         random_state=42)
     nca.fit(X, y)
     X_transformed = nca.transform(X)
     np.testing.assert_equal(pairwise_distances(X_transformed).argsort()[:, 1],
@@ -111,7 +112,7 @@ def test_params_validation():
     # Test that invalid parameters raise value error
     X = np.arange(12).reshape(4, 3)
     y = [1, 1, 2, 2]
-    NCA = NeighborhoodComponentAnalysis
+    NCA = NeighborhoodComponentsAnalysis
 
     # TypeError
     assert_raises(TypeError, NCA(max_iter='21').fit, X, y)
@@ -136,7 +137,8 @@ def test_transformation_dimensions():
     # Fail if transformation input dimension does not match inputs dimensions
     transformation = np.array([[1, 2], [3, 4]])
     assert_raises(ValueError,
-                  NeighborhoodComponentAnalysis(None, init=transformation).fit,
+                  NeighborhoodComponentsAnalysis(None,
+                                                 init=transformation).fit,
                   X, y)
 
     # Fail if transformation output dimension is larger than
@@ -144,12 +146,13 @@ def test_transformation_dimensions():
     transformation = np.array([[1, 2], [3, 4], [5, 6]])
     # len(transformation) > len(transformation[0])
     assert_raises(ValueError,
-                  NeighborhoodComponentAnalysis(None, init=transformation).fit,
+                  NeighborhoodComponentsAnalysis(None,
+                                                 init=transformation).fit,
                   X, y)
 
     # Pass otherwise
     transformation = np.arange(9).reshape(3, 3)
-    NeighborhoodComponentAnalysis(None, init=transformation).fit(X, y)
+    NeighborhoodComponentsAnalysis(None, init=transformation).fit(X, y)
 
 
 def test_n_features_out():
@@ -159,15 +162,15 @@ def test_n_features_out():
     transformation = np.array([[1, 2, 3], [4, 5, 6]])
 
     # n_features_out = X.shape[1] != transformation.shape[0]
-    nca = NeighborhoodComponentAnalysis(n_features_out=3, init=transformation)
+    nca = NeighborhoodComponentsAnalysis(n_features_out=3, init=transformation)
     assert_raises(ValueError, nca.fit, X, y)
 
     # n_features_out > X.shape[1]
-    nca = NeighborhoodComponentAnalysis(n_features_out=5, init=transformation)
+    nca = NeighborhoodComponentsAnalysis(n_features_out=5, init=transformation)
     assert_raises(ValueError, nca.fit, X, y)
 
     # n_features_out < X.shape[1]
-    nca = NeighborhoodComponentAnalysis(n_features_out=2, init='identity')
+    nca = NeighborhoodComponentsAnalysis(n_features_out=2, init='identity')
     nca.fit(X, y)
 
 
@@ -177,49 +180,100 @@ def test_init_transformation():
     X_train, X_test, y_train, y_test = train_test_split(X, y)
 
     # Start learning from scratch
-    nca = NeighborhoodComponentAnalysis(None, init='identity')
+    nca = NeighborhoodComponentsAnalysis(None, init='identity')
     nca.fit(X_train, y_train)
 
     # Initialize with random
-    nca_random = NeighborhoodComponentAnalysis(None, init='random')
+    nca_random = NeighborhoodComponentsAnalysis(None, init='random')
     nca_random.fit(X_train, y_train)
 
     # Initialize with PCA
-    nca_pca = NeighborhoodComponentAnalysis(None, init='pca')
+    nca_pca = NeighborhoodComponentsAnalysis(None, init='pca')
     nca_pca.fit(X_train, y_train)
 
     init = np.random.rand(X.shape[1], X.shape[1])
-    nca = NeighborhoodComponentAnalysis(None, init=init)
+    nca = NeighborhoodComponentsAnalysis(None, init=init)
     nca.fit(X_train, y_train)
 
     # init.shape[1] must match X.shape[1]
     init = np.random.rand(X.shape[1], X.shape[1] + 1)
-    nca = NeighborhoodComponentAnalysis(None, init=init)
+    nca = NeighborhoodComponentsAnalysis(None, init=init)
     assert_raises(ValueError, nca.fit, X_train, y_train)
 
     # init.shape[0] must be <= init.shape[1]
     init = np.random.rand(X.shape[1] + 1, X.shape[1])
-    nca = NeighborhoodComponentAnalysis(None, init=init)
+    nca = NeighborhoodComponentsAnalysis(None, init=init)
     assert_raises(ValueError, nca.fit, X_train, y_train)
 
     # init.shape[0] must match n_features_out
     init = np.random.rand(X.shape[1], X.shape[1])
-    nca = NeighborhoodComponentAnalysis(n_features_out=X.shape[1] - 2,
-                                        init=init)
+    nca = NeighborhoodComponentsAnalysis(n_features_out=X.shape[1] - 2,
+                                         init=init)
     assert_raises(ValueError, nca.fit, X_train, y_train)
 
 
 def test_verbose():
-    nca = NeighborhoodComponentAnalysis(None, verbose=1)
+    nca = NeighborhoodComponentsAnalysis(None, verbose=1)
     nca.fit(iris_data, iris_target)
 
 
+def test_singleton_class():
+    X = iris_data
+    y = iris_target
+    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)
+
+    # one singleton class
+    singleton_class = 1
+    ind_singleton, = np.where(y_tr == singleton_class)
+    y_tr[ind_singleton] = 2
+    y_tr[ind_singleton[0]] = singleton_class
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30)
+    nca.fit(X_tr, y_tr)
+
+    # One non-singleton class
+    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)
+    ind_1, = np.where(y_tr == 1)
+    ind_2, = np.where(y_tr == 2)
+    y_tr[ind_1] = 0
+    y_tr[ind_1[0]] = 1
+    y_tr[ind_2] = 0
+    y_tr[ind_2[0]] = 2
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30)
+    nca.fit(X_tr, y_tr)
+
+    # Only singleton classes
+    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)
+    ind_0, = np.where(y_tr == 0)
+    ind_1, = np.where(y_tr == 1)
+    ind_2, = np.where(y_tr == 2)
+    X_tr = X_tr[[ind_0[0], ind_1[0], ind_2[0]]]
+    y_tr = y_tr[[ind_0[0], ind_1[0], ind_2[0]]]
+
+    nca = NeighborhoodComponentsAnalysis(init='identity', max_iter=30)
+    nca.fit(X_tr, y_tr)
+    assert_array_equal(X, nca.transform(X))
+
+
+def test_one_class():
+    X = iris_data[iris_target == 0]
+    y = iris_target[iris_target == 0]
+    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3)
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30,
+                                         n_features_out=X.shape[1],
+                                         init='identity')
+    nca.fit(X_tr, y_tr)
+    assert_array_equal(X, nca.transform(X))
+
+
 def test_callable():
     X = iris_data
     y = iris_target
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
 
-    nca = NeighborhoodComponentAnalysis(None, callback='my_cb')
+    nca = NeighborhoodComponentsAnalysis(None, callback='my_cb')
     assert_raises(ValueError, nca.fit, X_train, y_train)
 
     max_iter = 10
@@ -228,8 +282,8 @@ def my_cb(transformation, n_iter):
         rem_iter = max_iter - n_iter
         print('{} iterations remaining...'.format(rem_iter))
 
-    nca = NeighborhoodComponentAnalysis(None, max_iter=max_iter,
-                                        callback=my_cb, verbose=1)
+    nca = NeighborhoodComponentsAnalysis(None, max_iter=max_iter,
+                                         callback=my_cb, verbose=1)
     nca.fit(X_train, y_train)
 
 
@@ -238,7 +292,7 @@ def test_terminate_early():
     y = iris_target
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
 
-    nca = NeighborhoodComponentAnalysis(None, max_iter=5)
+    nca = NeighborhoodComponentsAnalysis(None, max_iter=5)
     nca.fit(X_train, y_train)
 
 
@@ -247,8 +301,8 @@ def test_store_opt_result():
     y = iris_target
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
 
-    nca = NeighborhoodComponentAnalysis(None, max_iter=5,
-                                        store_opt_result=True)
+    nca = NeighborhoodComponentsAnalysis(None, max_iter=5,
+                                         store_opt_result=True)
     nca.fit(X_train, y_train)
     transformation = nca.opt_result_.x
     assert_equal(transformation.size, X.shape[1]**2)

From 0cee29d78fecc0bf29b60f1008017f75c399a9d5 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 31 Oct 2017 10:28:59 +0100
Subject: [PATCH 06/10] Remove _make_masks and use OneHotEncoder instead

---
 sklearn/neighbors/nca.py            | 21 ---------------------
 sklearn/neighbors/tests/test_nca.py |  6 ++++--
 2 files changed, 4 insertions(+), 23 deletions(-)

diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py
index 1c755427c918c..eb85dc0e540c8 100644
--- a/sklearn/neighbors/nca.py
+++ b/sklearn/neighbors/nca.py
@@ -529,24 +529,3 @@ def _check_scalar(x, name, target_type, min_val=None, max_val=None):
 
     if max_val is not None and x > max_val:
         raise ValueError('`{}`= {}, must be <= {}.'.format(name, x, max_val))
-
-
-def _make_masks(y):
-    """Create one-hot encoding of vector ``y``.
-
-    Parameters
-    ----------
-    y : array, shape (n_samples,)
-        Data samples labels.
-
-    Returns
-    -------
-    masks: array, shape (n_samples, n_classes)
-        One-hot encoding of ``y``.
-    """
-    masks = OneHotEncoder(sparse=False, dtype=bool).fit_transform(y[:,
-                                                                  np.newaxis])
-    # n = y.shape[0]
-    # masks = np.zeros((n, y.max() + 1), dtype=bool)
-    # masks[np.arange(n), y] = [True]
-    return masks
diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py
index 5adcdd3404830..3999a7dff93cc 100644
--- a/sklearn/neighbors/tests/test_nca.py
+++ b/sklearn/neighbors/tests/test_nca.py
@@ -1,10 +1,11 @@
 import numpy as np
 from numpy.testing import assert_array_equal
+from sklearn.preprocessing import OneHotEncoder
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import assert_raises, assert_equal
 from sklearn.datasets import load_iris, make_classification
 from sklearn.model_selection import train_test_split
-from sklearn.neighbors.nca import NeighborhoodComponentsAnalysis, _make_masks
+from sklearn.neighbors.nca import NeighborhoodComponentsAnalysis
 from sklearn.metrics import pairwise_distances
 
 
@@ -59,7 +60,8 @@ def test_finite_differences():
     nca = NeighborhoodComponentsAnalysis(None, init=point)
 
     X, y, init = nca._validate_params(X, y)
-    masks = _make_masks(y)
+    masks = OneHotEncoder(sparse=False,
+                          dtype=bool).fit_transform(y[:, np.newaxis])
     diffs = X[:, np.newaxis] - X[np.newaxis]
     nca.n_iter_ = 0
 

From 507b763c5d99e5c3d4fb2cb556587c4ba1b69a92 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 31 Oct 2017 10:42:23 +0100
Subject: [PATCH 07/10] precise that distances are squared

---
 sklearn/neighbors/nca.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py
index eb85dc0e540c8..806a954a2ba02 100644
--- a/sklearn/neighbors/nca.py
+++ b/sklearn/neighbors/nca.py
@@ -450,7 +450,7 @@ def _loss_grad_lbfgs(self, transformation, X, y, diffs,
 
         # for every sample x_i, compute its contribution to loss and gradient
         for i in range(X.shape[0]):
-            # compute distances to x_i in embedded space
+            # compute squared distances to x_i in embedded space
             diff_embedded = X_embedded[i] - X_embedded
             dist_embedded = np.einsum('ij,ij->i', diff_embedded,
                                       diff_embedded)

From f6d09738968d3a2dd367a6b4ee55901f7031a2d4 Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 31 Oct 2017 13:33:53 +0100
Subject: [PATCH 08/10] simplify tests and remove None parameter in nca

---
 sklearn/neighbors/tests/test_nca.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py
index 3999a7dff93cc..7473392da7660 100644
--- a/sklearn/neighbors/tests/test_nca.py
+++ b/sklearn/neighbors/tests/test_nca.py
@@ -50,14 +50,10 @@ def test_finite_differences():
     """
     # Initialize `transformation`, `X` and `y` and `NCA`
     random_state = check_random_state(0)
-    n_features = 10
-    num_dims = 2
-    n_samples = 100
-    n_labels = 3
-    y = random_state.randint(0, n_labels, (n_samples))
-    point = random_state.randn(num_dims, n_features)
-    X = random_state.randn(n_samples, n_features)
-    nca = NeighborhoodComponentsAnalysis(None, init=point)
+    X, y = make_classification(random_state=random_state)
+    point = random_state.randn(random_state.randint(1, X.shape[1] + 1),
+                               X.shape[1])
+    nca = NeighborhoodComponentsAnalysis(init=point)
 
     X, y, init = nca._validate_params(X, y)
     masks = OneHotEncoder(sparse=False,

From 4c0494d3ab8c66a8ea47d89de36a185709a97d2b Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 31 Oct 2017 15:12:10 +0100
Subject: [PATCH 09/10] remove useless None

---
 sklearn/neighbors/nca.py            |  2 +-
 sklearn/neighbors/tests/test_nca.py | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py
index 806a954a2ba02..4179faa9f6315 100644
--- a/sklearn/neighbors/nca.py
+++ b/sklearn/neighbors/nca.py
@@ -106,7 +106,7 @@ class NeighborhoodComponentsAnalysis(BaseEstimator, TransformerMixin):
     >>> X, y = load_iris(return_X_y=True)
     >>> X_train, X_test, y_train, y_test = train_test_split(X, y,
     ... stratify=y, test_size=0.7, random_state=42)
-    >>> nca = NeighborhoodComponentsAnalysis(None,random_state=42)
+    >>> nca = NeighborhoodComponentsAnalysis(random_state=42)
     >>> nca.fit(X_train, y_train) # doctest: +ELLIPSIS
     NeighborhoodComponentsAnalysis(...)
     >>> knn = KNeighborsClassifier(n_neighbors=3)
diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py
index 7473392da7660..0e2f573d84ec2 100644
--- a/sklearn/neighbors/tests/test_nca.py
+++ b/sklearn/neighbors/tests/test_nca.py
@@ -135,7 +135,7 @@ def test_transformation_dimensions():
     # Fail if transformation input dimension does not match inputs dimensions
     transformation = np.array([[1, 2], [3, 4]])
     assert_raises(ValueError,
-                  NeighborhoodComponentsAnalysis(None,
+                  NeighborhoodComponentsAnalysis(
                                                  init=transformation).fit,
                   X, y)
 
@@ -144,13 +144,13 @@ def test_transformation_dimensions():
     transformation = np.array([[1, 2], [3, 4], [5, 6]])
     # len(transformation) > len(transformation[0])
     assert_raises(ValueError,
-                  NeighborhoodComponentsAnalysis(None,
+                  NeighborhoodComponentsAnalysis(
                                                  init=transformation).fit,
                   X, y)
 
     # Pass otherwise
     transformation = np.arange(9).reshape(3, 3)
-    NeighborhoodComponentsAnalysis(None, init=transformation).fit(X, y)
+    NeighborhoodComponentsAnalysis(init=transformation).fit(X, y)
 
 
 def test_n_features_out():
@@ -178,29 +178,29 @@ def test_init_transformation():
     X_train, X_test, y_train, y_test = train_test_split(X, y)
 
     # Start learning from scratch
-    nca = NeighborhoodComponentsAnalysis(None, init='identity')
+    nca = NeighborhoodComponentsAnalysis(init='identity')
     nca.fit(X_train, y_train)
 
     # Initialize with random
-    nca_random = NeighborhoodComponentsAnalysis(None, init='random')
+    nca_random = NeighborhoodComponentsAnalysis(init='random')
     nca_random.fit(X_train, y_train)
 
     # Initialize with PCA
-    nca_pca = NeighborhoodComponentsAnalysis(None, init='pca')
+    nca_pca = NeighborhoodComponentsAnalysis(init='pca')
     nca_pca.fit(X_train, y_train)
 
     init = np.random.rand(X.shape[1], X.shape[1])
-    nca = NeighborhoodComponentsAnalysis(None, init=init)
+    nca = NeighborhoodComponentsAnalysis(init=init)
     nca.fit(X_train, y_train)
 
     # init.shape[1] must match X.shape[1]
     init = np.random.rand(X.shape[1], X.shape[1] + 1)
-    nca = NeighborhoodComponentsAnalysis(None, init=init)
+    nca = NeighborhoodComponentsAnalysis(init=init)
     assert_raises(ValueError, nca.fit, X_train, y_train)
 
     # init.shape[0] must be <= init.shape[1]
     init = np.random.rand(X.shape[1] + 1, X.shape[1])
-    nca = NeighborhoodComponentsAnalysis(None, init=init)
+    nca = NeighborhoodComponentsAnalysis(init=init)
     assert_raises(ValueError, nca.fit, X_train, y_train)
 
     # init.shape[0] must match n_features_out
@@ -211,7 +211,7 @@ def test_init_transformation():
 
 
 def test_verbose():
-    nca = NeighborhoodComponentsAnalysis(None, verbose=1)
+    nca = NeighborhoodComponentsAnalysis(verbose=1)
     nca.fit(iris_data, iris_target)
 
 
@@ -271,7 +271,7 @@ def test_callable():
     y = iris_target
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
 
-    nca = NeighborhoodComponentsAnalysis(None, callback='my_cb')
+    nca = NeighborhoodComponentsAnalysis(callback='my_cb')
     assert_raises(ValueError, nca.fit, X_train, y_train)
 
     max_iter = 10
@@ -280,7 +280,7 @@ def my_cb(transformation, n_iter):
         rem_iter = max_iter - n_iter
         print('{} iterations remaining...'.format(rem_iter))
 
-    nca = NeighborhoodComponentsAnalysis(None, max_iter=max_iter,
+    nca = NeighborhoodComponentsAnalysis(max_iter=max_iter,
                                          callback=my_cb, verbose=1)
     nca.fit(X_train, y_train)
 
@@ -290,7 +290,7 @@ def test_terminate_early():
     y = iris_target
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
 
-    nca = NeighborhoodComponentsAnalysis(None, max_iter=5)
+    nca = NeighborhoodComponentsAnalysis(max_iter=5)
     nca.fit(X_train, y_train)
 
 
@@ -299,7 +299,7 @@ def test_store_opt_result():
     y = iris_target
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
 
-    nca = NeighborhoodComponentsAnalysis(None, max_iter=5,
+    nca = NeighborhoodComponentsAnalysis(max_iter=5,
                                          store_opt_result=True)
     nca.fit(X_train, y_train)
     transformation = nca.opt_result_.x

From fb8cefa524f58346879aeb3fa679ea976914837a Mon Sep 17 00:00:00 2001
From: William de Vazelhes <william.de-vazelhes@inria.fr>
Date: Tue, 31 Oct 2017 16:15:20 +0100
Subject: [PATCH 10/10] simplify tests

---
 sklearn/neighbors/tests/test_nca.py | 129 ++++++++++++----------------
 1 file changed, 55 insertions(+), 74 deletions(-)

diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py
index 0e2f573d84ec2..7f3edaa9beee2 100644
--- a/sklearn/neighbors/tests/test_nca.py
+++ b/sklearn/neighbors/tests/test_nca.py
@@ -4,7 +4,6 @@
 from sklearn.utils import check_random_state
 from sklearn.utils.testing import assert_raises, assert_equal
 from sklearn.datasets import load_iris, make_classification
-from sklearn.model_selection import train_test_split
 from sklearn.neighbors.nca import NeighborhoodComponentsAnalysis
 from sklearn.metrics import pairwise_distances
 
@@ -18,6 +17,24 @@
 EPS = np.finfo(float).eps
 
 
+def test_simple_example():
+    """Test on a simple example.
+
+    Puts four points in the input space where the opposite labels points are
+    next to each other. After transform the same labels points should be next
+    to each other.
+
+    """
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    nca = NeighborhoodComponentsAnalysis(n_features_out=2, init='identity',
+                                         random_state=42)
+    nca.fit(X, y)
+    Xansformed = nca.transform(X)
+    np.testing.assert_equal(pairwise_distances(Xansformed).argsort()[:, 1],
+                            np.array([2, 3, 0, 1]))
+
+
 def test_finite_differences():
     r"""Test gradient of loss function
 
@@ -49,10 +66,9 @@ def test_finite_differences():
 
     """
     # Initialize `transformation`, `X` and `y` and `NCA`
-    random_state = check_random_state(0)
-    X, y = make_classification(random_state=random_state)
-    point = random_state.randn(random_state.randint(1, X.shape[1] + 1),
-                               X.shape[1])
+    X = iris_data
+    y = iris_target
+    point = rng.randn(rng.randint(1, X.shape[1] + 1), X.shape[1])
     nca = NeighborhoodComponentsAnalysis(init=point)
 
     X, y, init = nca._validate_params(X, y)
@@ -67,7 +83,7 @@ def test_finite_differences():
                                        masks)
 
     # create a random direction of norm 1
-    random_direction = random_state.randn(*point.shape)
+    random_direction = rng.randn(*point.shape)
     random_direction /= np.linalg.norm(random_direction)
 
     # computes projected gradient
@@ -88,24 +104,6 @@ def test_finite_differences():
     np.testing.assert_almost_equal(relative_error, 0.)
 
 
-def test_simple_example():
-    """Test on a simple example.
-
-    Puts four points in the input space where the opposite labels points are
-    next to each other. After transform the same labels points should be next
-    to each other.
-
-    """
-    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
-    y = np.array([1, 0, 1, 0])
-    nca = NeighborhoodComponentsAnalysis(n_features_out=2, init='identity',
-                                         random_state=42)
-    nca.fit(X, y)
-    X_transformed = nca.transform(X)
-    np.testing.assert_equal(pairwise_distances(X_transformed).argsort()[:, 1],
-                            np.array([2, 3, 0, 1]))
-
-
 def test_params_validation():
     # Test that invalid parameters raise value error
     X = np.arange(12).reshape(4, 3)
@@ -116,8 +114,7 @@ def test_params_validation():
     assert_raises(TypeError, NCA(max_iter='21').fit, X, y)
     assert_raises(TypeError, NCA(verbose='true').fit, X, y)
     assert_raises(TypeError, NCA(tol=1).fit, X, y)
-    assert_raises(TypeError, NCA(n_features_out='invalid').fit,
-                  X, y)
+    assert_raises(TypeError, NCA(n_features_out='invalid').fit, X, y)
 
     # ValueError
     assert_raises(ValueError, NCA(init=1).fit, X, y)
@@ -135,8 +132,7 @@ def test_transformation_dimensions():
     # Fail if transformation input dimension does not match inputs dimensions
     transformation = np.array([[1, 2], [3, 4]])
     assert_raises(ValueError,
-                  NeighborhoodComponentsAnalysis(
-                                                 init=transformation).fit,
+                  NeighborhoodComponentsAnalysis(init=transformation).fit,
                   X, y)
 
     # Fail if transformation output dimension is larger than
@@ -144,8 +140,7 @@ def test_transformation_dimensions():
     transformation = np.array([[1, 2], [3, 4], [5, 6]])
     # len(transformation) > len(transformation[0])
     assert_raises(ValueError,
-                  NeighborhoodComponentsAnalysis(
-                                                 init=transformation).fit,
+                  NeighborhoodComponentsAnalysis(init=transformation).fit,
                   X, y)
 
     # Pass otherwise
@@ -175,104 +170,99 @@ def test_n_features_out():
 def test_init_transformation():
     X, y = make_classification(n_samples=30, n_features=5,
                                n_redundant=0, random_state=0)
-    X_train, X_test, y_train, y_test = train_test_split(X, y)
 
     # Start learning from scratch
     nca = NeighborhoodComponentsAnalysis(init='identity')
-    nca.fit(X_train, y_train)
+    nca.fit(X, y)
 
     # Initialize with random
     nca_random = NeighborhoodComponentsAnalysis(init='random')
-    nca_random.fit(X_train, y_train)
+    nca_random.fit(X, y)
 
     # Initialize with PCA
     nca_pca = NeighborhoodComponentsAnalysis(init='pca')
-    nca_pca.fit(X_train, y_train)
+    nca_pca.fit(X, y)
 
     init = np.random.rand(X.shape[1], X.shape[1])
     nca = NeighborhoodComponentsAnalysis(init=init)
-    nca.fit(X_train, y_train)
+    nca.fit(X, y)
 
     # init.shape[1] must match X.shape[1]
     init = np.random.rand(X.shape[1], X.shape[1] + 1)
     nca = NeighborhoodComponentsAnalysis(init=init)
-    assert_raises(ValueError, nca.fit, X_train, y_train)
+    assert_raises(ValueError, nca.fit, X, y)
 
     # init.shape[0] must be <= init.shape[1]
     init = np.random.rand(X.shape[1] + 1, X.shape[1])
     nca = NeighborhoodComponentsAnalysis(init=init)
-    assert_raises(ValueError, nca.fit, X_train, y_train)
+    assert_raises(ValueError, nca.fit, X, y)
 
     # init.shape[0] must match n_features_out
     init = np.random.rand(X.shape[1], X.shape[1])
     nca = NeighborhoodComponentsAnalysis(n_features_out=X.shape[1] - 2,
                                          init=init)
-    assert_raises(ValueError, nca.fit, X_train, y_train)
+    assert_raises(ValueError, nca.fit, X, y)
 
 
 def test_verbose():
     nca = NeighborhoodComponentsAnalysis(verbose=1)
     nca.fit(iris_data, iris_target)
+    # TODO: rather assert that some message is printed
 
 
 def test_singleton_class():
     X = iris_data
     y = iris_target
-    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)
 
     # one singleton class
     singleton_class = 1
-    ind_singleton, = np.where(y_tr == singleton_class)
-    y_tr[ind_singleton] = 2
-    y_tr[ind_singleton[0]] = singleton_class
+    ind_singleton, = np.where(y == singleton_class)
+    y[ind_singleton] = 2
+    y[ind_singleton[0]] = singleton_class
 
     nca = NeighborhoodComponentsAnalysis(max_iter=30)
-    nca.fit(X_tr, y_tr)
+    nca.fit(X, y)
 
     # One non-singleton class
-    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)
-    ind_1, = np.where(y_tr == 1)
-    ind_2, = np.where(y_tr == 2)
-    y_tr[ind_1] = 0
-    y_tr[ind_1[0]] = 1
-    y_tr[ind_2] = 0
-    y_tr[ind_2[0]] = 2
+    ind_1, = np.where(y == 1)
+    ind_2, = np.where(y == 2)
+    y[ind_1] = 0
+    y[ind_1[0]] = 1
+    y[ind_2] = 0
+    y[ind_2[0]] = 2
 
     nca = NeighborhoodComponentsAnalysis(max_iter=30)
-    nca.fit(X_tr, y_tr)
+    nca.fit(X, y)
 
     # Only singleton classes
-    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3, stratify=y)
-    ind_0, = np.where(y_tr == 0)
-    ind_1, = np.where(y_tr == 1)
-    ind_2, = np.where(y_tr == 2)
-    X_tr = X_tr[[ind_0[0], ind_1[0], ind_2[0]]]
-    y_tr = y_tr[[ind_0[0], ind_1[0], ind_2[0]]]
+    ind_0, = np.where(y == 0)
+    ind_1, = np.where(y == 1)
+    ind_2, = np.where(y == 2)
+    X = X[[ind_0[0], ind_1[0], ind_2[0]]]
+    y = y[[ind_0[0], ind_1[0], ind_2[0]]]
 
     nca = NeighborhoodComponentsAnalysis(init='identity', max_iter=30)
-    nca.fit(X_tr, y_tr)
+    nca.fit(X, y)
     assert_array_equal(X, nca.transform(X))
 
 
 def test_one_class():
     X = iris_data[iris_target == 0]
     y = iris_target[iris_target == 0]
-    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.3)
 
     nca = NeighborhoodComponentsAnalysis(max_iter=30,
                                          n_features_out=X.shape[1],
                                          init='identity')
-    nca.fit(X_tr, y_tr)
+    nca.fit(X, y)
     assert_array_equal(X, nca.transform(X))
 
 
 def test_callable():
     X = iris_data
     y = iris_target
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
 
     nca = NeighborhoodComponentsAnalysis(callback='my_cb')
-    assert_raises(ValueError, nca.fit, X_train, y_train)
+    assert_raises(ValueError, nca.fit, X, y)
 
     max_iter = 10
 
@@ -282,25 +272,16 @@ def my_cb(transformation, n_iter):
 
     nca = NeighborhoodComponentsAnalysis(max_iter=max_iter,
                                          callback=my_cb, verbose=1)
-    nca.fit(X_train, y_train)
-
-
-def test_terminate_early():
-    X = iris_data
-    y = iris_target
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
-
-    nca = NeighborhoodComponentsAnalysis(max_iter=5)
-    nca.fit(X_train, y_train)
+    nca.fit(X, y)
+    # TODO: rather assert that message is printed
 
 
 def test_store_opt_result():
     X = iris_data
     y = iris_target
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
 
     nca = NeighborhoodComponentsAnalysis(max_iter=5,
                                          store_opt_result=True)
-    nca.fit(X_train, y_train)
+    nca.fit(X, y)
     transformation = nca.opt_result_.x
     assert_equal(transformation.size, X.shape[1]**2)