laywerrobot/lib/python3.6/site-packages/sklearn/metrics/tests/test_pairwise.py

import numpy as np
from numpy import linalg

from scipy.sparse import dok_matrix, csr_matrix, issparse
from scipy.spatial.distance import cosine, cityblock, minkowski, wminkowski

from sklearn.utils.testing import assert_greater
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_almost_equal
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_array_equal
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import assert_raises_regexp
from sklearn.utils.testing import assert_true
from sklearn.utils.testing import assert_warns
from sklearn.utils.testing import ignore_warnings

from sklearn.externals.six import iteritems

from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import manhattan_distances
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import chi2_kernel, additive_chi2_kernel
from sklearn.metrics.pairwise import polynomial_kernel
from sklearn.metrics.pairwise import rbf_kernel
from sklearn.metrics.pairwise import laplacian_kernel
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_distances
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics.pairwise import pairwise_distances_argmin_min
from sklearn.metrics.pairwise import pairwise_distances_argmin
from sklearn.metrics.pairwise import pairwise_kernels
from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS
from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
from sklearn.metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS
from sklearn.metrics.pairwise import PAIRED_DISTANCES
from sklearn.metrics.pairwise import check_pairwise_arrays
from sklearn.metrics.pairwise import check_paired_arrays
from sklearn.metrics.pairwise import paired_distances
from sklearn.metrics.pairwise import paired_euclidean_distances
from sklearn.metrics.pairwise import paired_manhattan_distances
from sklearn.preprocessing import normalize
from sklearn.exceptions import DataConversionWarning


def test_pairwise_distances():
    # Test the pairwise_distance helper function.
    rng = np.random.RandomState(0)
    # Euclidean distance should be equivalent to calling the function.
    X = rng.random_sample((5, 4))
    S = pairwise_distances(X, metric="euclidean")
    S2 = euclidean_distances(X)
    assert_array_almost_equal(S, S2)
    # Euclidean distance, with Y != X.
    Y = rng.random_sample((2, 4))
    S = pairwise_distances(X, Y, metric="euclidean")
    S2 = euclidean_distances(X, Y)
    assert_array_almost_equal(S, S2)
    # Test with tuples as X and Y
    X_tuples = tuple([tuple([v for v in row]) for row in X])
    Y_tuples = tuple([tuple([v for v in row]) for row in Y])
    S2 = pairwise_distances(X_tuples, Y_tuples, metric="euclidean")
    assert_array_almost_equal(S, S2)
    # "cityblock" uses scikit-learn metric, cityblock (function) is
    # scipy.spatial.
    S = pairwise_distances(X, metric="cityblock")
    S2 = pairwise_distances(X, metric=cityblock)
    assert_equal(S.shape[0], S.shape[1])
    assert_equal(S.shape[0], X.shape[0])
    assert_array_almost_equal(S, S2)
    # The manhattan metric should be equivalent to cityblock.
    S = pairwise_distances(X, Y, metric="manhattan")
    S2 = pairwise_distances(X, Y, metric=cityblock)
    assert_equal(S.shape[0], X.shape[0])
    assert_equal(S.shape[1], Y.shape[0])
    assert_array_almost_equal(S, S2)
    # Using size_threshold argument should raise
    # a deprecation warning
    assert_warns(DeprecationWarning,
                 manhattan_distances, X, Y, size_threshold=10)
    # Test cosine as a string metric versus cosine callable
    # The string "cosine" uses sklearn.metric,
    # while the function cosine is scipy.spatial
    S = pairwise_distances(X, Y, metric="cosine")
    S2 = pairwise_distances(X, Y, metric=cosine)
    assert_equal(S.shape[0], X.shape[0])
    assert_equal(S.shape[1], Y.shape[0])
    assert_array_almost_equal(S, S2)
    # Test with sparse X and Y,
    # currently only supported for Euclidean, L1 and cosine.
    X_sparse = csr_matrix(X)
    Y_sparse = csr_matrix(Y)
    S = pairwise_distances(X_sparse, Y_sparse, metric="euclidean")
    S2 = euclidean_distances(X_sparse, Y_sparse)
    assert_array_almost_equal(S, S2)
    S = pairwise_distances(X_sparse, Y_sparse, metric="cosine")
    S2 = cosine_distances(X_sparse, Y_sparse)
    assert_array_almost_equal(S, S2)
    S = pairwise_distances(X_sparse, Y_sparse.tocsc(), metric="manhattan")
    S2 = manhattan_distances(X_sparse.tobsr(), Y_sparse.tocoo())
    assert_array_almost_equal(S, S2)
    S2 = manhattan_distances(X, Y)
    assert_array_almost_equal(S, S2)
    # Test with scipy.spatial.distance metric, with a kwd
    kwds = {"p": 2.0}
    S = pairwise_distances(X, Y, metric="minkowski", **kwds)
    S2 = pairwise_distances(X, Y, metric=minkowski, **kwds)
    assert_array_almost_equal(S, S2)
    # same with Y = None
    kwds = {"p": 2.0}
    S = pairwise_distances(X, metric="minkowski", **kwds)
    S2 = pairwise_distances(X, metric=minkowski, **kwds)
    assert_array_almost_equal(S, S2)
    # Test that scipy distance metrics throw an error if sparse matrix given
    assert_raises(TypeError, pairwise_distances, X_sparse, metric="minkowski")
    assert_raises(TypeError, pairwise_distances, X, Y_sparse,
                  metric="minkowski")

    # Test that a value error is raised if the metric is unknown
    assert_raises(ValueError, pairwise_distances, X, Y, metric="blah")


# ignore conversion to boolean in pairwise_distances
@ignore_warnings(category=DataConversionWarning)
def test_pairwise_boolean_distance():
    # test that we convert to boolean arrays for boolean distances
    rng = np.random.RandomState(0)
    X = rng.randn(5, 4)
    Y = X.copy()
    Y[0, 0] = 1 - Y[0, 0]

    for metric in PAIRWISE_BOOLEAN_FUNCTIONS:
        for Z in [Y, None]:
            res = pairwise_distances(X, Z, metric=metric)
            res[np.isnan(res)] = 0
            assert_true(np.sum(res != 0) == 0)


def test_pairwise_precomputed():
    for func in [pairwise_distances, pairwise_kernels]:
        # Test correct shape
        assert_raises_regexp(ValueError, '.* shape .*',
                             func, np.zeros((5, 3)), metric='precomputed')
        # with two args
        assert_raises_regexp(ValueError, '.* shape .*',
                             func, np.zeros((5, 3)), np.zeros((4, 4)),
                             metric='precomputed')
        # even if shape[1] agrees (although thus second arg is spurious)
        assert_raises_regexp(ValueError, '.* shape .*',
                             func, np.zeros((5, 3)), np.zeros((4, 3)),
                             metric='precomputed')

        # Test not copied (if appropriate dtype)
        S = np.zeros((5, 5))
        S2 = func(S, metric="precomputed")
        assert_true(S is S2)
        # with two args
        S = np.zeros((5, 3))
        S2 = func(S, np.zeros((3, 3)), metric="precomputed")
        assert_true(S is S2)

        # Test always returns float dtype
        S = func(np.array([[1]], dtype='int'), metric='precomputed')
        assert_equal('f', S.dtype.kind)

        # Test converts list to array-like
        S = func([[1.]], metric='precomputed')
        assert_true(isinstance(S, np.ndarray))


def check_pairwise_parallel(func, metric, kwds):
    rng = np.random.RandomState(0)
    for make_data in (np.array, csr_matrix):
        X = make_data(rng.random_sample((5, 4)))
        Y = make_data(rng.random_sample((3, 4)))

        try:
            S = func(X, metric=metric, n_jobs=1, **kwds)
        except (TypeError, ValueError) as exc:
            # Not all metrics support sparse input
            # ValueError may be triggered by bad callable
            if make_data is csr_matrix:
                assert_raises(type(exc), func, X, metric=metric,
                              n_jobs=2, **kwds)
                continue
            else:
                raise
        S2 = func(X, metric=metric, n_jobs=2, **kwds)
        assert_array_almost_equal(S, S2)

        S = func(X, Y, metric=metric, n_jobs=1, **kwds)
        S2 = func(X, Y, metric=metric, n_jobs=2, **kwds)
        assert_array_almost_equal(S, S2)


def test_pairwise_parallel():
    wminkowski_kwds = {'w': np.arange(1, 5).astype('double'), 'p': 1}
    metrics = [(pairwise_distances, 'euclidean', {}),
               (pairwise_distances, wminkowski, wminkowski_kwds),
               (pairwise_distances, 'wminkowski', wminkowski_kwds),
               (pairwise_kernels, 'polynomial', {'degree': 1}),
               (pairwise_kernels, callable_rbf_kernel, {'gamma': .1}),
               ]
    for func, metric, kwds in metrics:
        yield check_pairwise_parallel, func, metric, kwds


def test_pairwise_callable_nonstrict_metric():
    # paired_distances should allow callable metric where metric(x, x) != 0
    # Knowing that the callable is a strict metric would allow the diagonal to
    # be left uncalculated and set to 0.
    assert_equal(pairwise_distances([[1.]], metric=lambda x, y: 5)[0, 0], 5)


def callable_rbf_kernel(x, y, **kwds):
    # Callable version of pairwise.rbf_kernel.
    K = rbf_kernel(np.atleast_2d(x), np.atleast_2d(y), **kwds)
    return K


def test_pairwise_kernels():    # Test the pairwise_kernels helper function.

    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((2, 4))
    # Test with all metrics that should be in PAIRWISE_KERNEL_FUNCTIONS.
    test_metrics = ["rbf", "laplacian", "sigmoid", "polynomial", "linear",
                    "chi2", "additive_chi2"]
    for metric in test_metrics:
        function = PAIRWISE_KERNEL_FUNCTIONS[metric]
        # Test with Y=None
        K1 = pairwise_kernels(X, metric=metric)
        K2 = function(X)
        assert_array_almost_equal(K1, K2)
        # Test with Y=Y
        K1 = pairwise_kernels(X, Y=Y, metric=metric)
        K2 = function(X, Y=Y)
        assert_array_almost_equal(K1, K2)
        # Test with tuples as X and Y
        X_tuples = tuple([tuple([v for v in row]) for row in X])
        Y_tuples = tuple([tuple([v for v in row]) for row in Y])
        K2 = pairwise_kernels(X_tuples, Y_tuples, metric=metric)
        assert_array_almost_equal(K1, K2)

        # Test with sparse X and Y
        X_sparse = csr_matrix(X)
        Y_sparse = csr_matrix(Y)
        if metric in ["chi2", "additive_chi2"]:
            # these don't support sparse matrices yet
            assert_raises(ValueError, pairwise_kernels,
                          X_sparse, Y=Y_sparse, metric=metric)
            continue
        K1 = pairwise_kernels(X_sparse, Y=Y_sparse, metric=metric)
        assert_array_almost_equal(K1, K2)
    # Test with a callable function, with given keywords.
    metric = callable_rbf_kernel
    kwds = {'gamma': 0.1}
    K1 = pairwise_kernels(X, Y=Y, metric=metric, **kwds)
    K2 = rbf_kernel(X, Y=Y, **kwds)
    assert_array_almost_equal(K1, K2)

    # callable function, X=Y
    K1 = pairwise_kernels(X, Y=X, metric=metric, **kwds)
    K2 = rbf_kernel(X, Y=X, **kwds)
    assert_array_almost_equal(K1, K2)


def test_pairwise_kernels_filter_param():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((2, 4))
    K = rbf_kernel(X, Y, gamma=0.1)
    params = {"gamma": 0.1, "blabla": ":)"}
    K2 = pairwise_kernels(X, Y, metric="rbf", filter_params=True, **params)
    assert_array_almost_equal(K, K2)

    assert_raises(TypeError, pairwise_kernels, X, Y, "rbf", **params)


def test_paired_distances():
    # Test the pairwise_distance helper function.
    rng = np.random.RandomState(0)
    # Euclidean distance should be equivalent to calling the function.
    X = rng.random_sample((5, 4))
    # Euclidean distance, with Y != X.
    Y = rng.random_sample((5, 4))
    for metric, func in iteritems(PAIRED_DISTANCES):
        S = paired_distances(X, Y, metric=metric)
        S2 = func(X, Y)
        assert_array_almost_equal(S, S2)
        S3 = func(csr_matrix(X), csr_matrix(Y))
        assert_array_almost_equal(S, S3)
        if metric in PAIRWISE_DISTANCE_FUNCTIONS:
            # Check the pairwise_distances implementation
            # gives the same value
            distances = PAIRWISE_DISTANCE_FUNCTIONS[metric](X, Y)
            distances = np.diag(distances)
            assert_array_almost_equal(distances, S)

    # Check the callable implementation
    S = paired_distances(X, Y, metric='manhattan')
    S2 = paired_distances(X, Y, metric=lambda x, y: np.abs(x - y).sum(axis=0))
    assert_array_almost_equal(S, S2)

    # Test that a value error is raised when the lengths of X and Y should not
    # differ
    Y = rng.random_sample((3, 4))
    assert_raises(ValueError, paired_distances, X, Y)


def test_pairwise_distances_argmin_min():
    # Check pairwise minimum distances computation for any metric
    X = [[0], [1]]
    Y = [[-1], [2]]

    Xsp = dok_matrix(X)
    Ysp = csr_matrix(Y, dtype=np.float32)

    # euclidean metric
    D, E = pairwise_distances_argmin_min(X, Y, metric="euclidean")
    D2 = pairwise_distances_argmin(X, Y, metric="euclidean")
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(D2, [0, 1])
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(E, [1., 1.])

    # sparse matrix case
    Dsp, Esp = pairwise_distances_argmin_min(Xsp, Ysp, metric="euclidean")
    assert_array_equal(Dsp, D)
    assert_array_equal(Esp, E)
    # We don't want np.matrix here
    assert_equal(type(Dsp), np.ndarray)
    assert_equal(type(Esp), np.ndarray)

    # Non-euclidean scikit-learn metric
    D, E = pairwise_distances_argmin_min(X, Y, metric="manhattan")
    D2 = pairwise_distances_argmin(X, Y, metric="manhattan")
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(D2, [0, 1])
    assert_array_almost_equal(E, [1., 1.])
    D, E = pairwise_distances_argmin_min(Xsp, Ysp, metric="manhattan")
    D2 = pairwise_distances_argmin(Xsp, Ysp, metric="manhattan")
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(E, [1., 1.])

    # Non-euclidean Scipy distance (callable)
    D, E = pairwise_distances_argmin_min(X, Y, metric=minkowski,
                                         metric_kwargs={"p": 2})
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(E, [1., 1.])

    # Non-euclidean Scipy distance (string)
    D, E = pairwise_distances_argmin_min(X, Y, metric="minkowski",
                                         metric_kwargs={"p": 2})
    assert_array_almost_equal(D, [0, 1])
    assert_array_almost_equal(E, [1., 1.])

    # Compare with naive implementation
    rng = np.random.RandomState(0)
    X = rng.randn(97, 149)
    Y = rng.randn(111, 149)

    dist = pairwise_distances(X, Y, metric="manhattan")
    dist_orig_ind = dist.argmin(axis=0)
    dist_orig_val = dist[dist_orig_ind, range(len(dist_orig_ind))]

    dist_chunked_ind, dist_chunked_val = pairwise_distances_argmin_min(
        X, Y, axis=0, metric="manhattan", batch_size=50)
    np.testing.assert_almost_equal(dist_orig_ind, dist_chunked_ind, decimal=7)
    np.testing.assert_almost_equal(dist_orig_val, dist_chunked_val, decimal=7)


def test_euclidean_distances():
    # Check the pairwise Euclidean distances computation
    X = [[0]]
    Y = [[1], [2]]
    D = euclidean_distances(X, Y)
    assert_array_almost_equal(D, [[1., 2.]])

    X = csr_matrix(X)
    Y = csr_matrix(Y)
    D = euclidean_distances(X, Y)
    assert_array_almost_equal(D, [[1., 2.]])

    rng = np.random.RandomState(0)
    X = rng.random_sample((10, 4))
    Y = rng.random_sample((20, 4))
    X_norm_sq = (X ** 2).sum(axis=1).reshape(1, -1)
    Y_norm_sq = (Y ** 2).sum(axis=1).reshape(1, -1)

    # check that we still get the right answers with {X,Y}_norm_squared
    D1 = euclidean_distances(X, Y)
    D2 = euclidean_distances(X, Y, X_norm_squared=X_norm_sq)
    D3 = euclidean_distances(X, Y, Y_norm_squared=Y_norm_sq)
    D4 = euclidean_distances(X, Y, X_norm_squared=X_norm_sq,
                             Y_norm_squared=Y_norm_sq)
    assert_array_almost_equal(D2, D1)
    assert_array_almost_equal(D3, D1)
    assert_array_almost_equal(D4, D1)

    # check we get the wrong answer with wrong {X,Y}_norm_squared
    X_norm_sq *= 0.5
    Y_norm_sq *= 0.5
    wrong_D = euclidean_distances(X, Y,
                                  X_norm_squared=np.zeros_like(X_norm_sq),
                                  Y_norm_squared=np.zeros_like(Y_norm_sq))
    assert_greater(np.max(np.abs(wrong_D - D1)), .01)


def test_cosine_distances():
    # Check the pairwise Cosine distances computation
    rng = np.random.RandomState(1337)
    x = np.abs(rng.rand(910))
    XA = np.vstack([x, x])
    D = cosine_distances(XA)
    assert_array_almost_equal(D, [[0., 0.], [0., 0.]])
    # check that all elements are in [0, 2]
    assert_true(np.all(D >= 0.))
    assert_true(np.all(D <= 2.))
    # check that diagonal elements are equal to 0
    assert_array_almost_equal(D[np.diag_indices_from(D)], [0., 0.])

    XB = np.vstack([x, -x])
    D2 = cosine_distances(XB)
    # check that all elements are in [0, 2]
    assert_true(np.all(D2 >= 0.))
    assert_true(np.all(D2 <= 2.))
    # check that diagonal elements are equal to 0 and non diagonal to 2
    assert_array_almost_equal(D2, [[0., 2.], [2., 0.]])

    # check large random matrix
    X = np.abs(rng.rand(1000, 5000))
    D = cosine_distances(X)
    # check that diagonal elements are equal to 0
    assert_array_almost_equal(D[np.diag_indices_from(D)], [0.] * D.shape[0])
    assert_true(np.all(D >= 0.))
    assert_true(np.all(D <= 2.))


# Paired distances

def test_paired_euclidean_distances():
    # Check the paired Euclidean distances computation
    X = [[0], [0]]
    Y = [[1], [2]]
    D = paired_euclidean_distances(X, Y)
    assert_array_almost_equal(D, [1., 2.])


def test_paired_manhattan_distances():
    # Check the paired manhattan distances computation
    X = [[0], [0]]
    Y = [[1], [2]]
    D = paired_manhattan_distances(X, Y)
    assert_array_almost_equal(D, [1., 2.])


def test_chi_square_kernel():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((10, 4))
    K_add = additive_chi2_kernel(X, Y)
    gamma = 0.1
    K = chi2_kernel(X, Y, gamma=gamma)
    assert_equal(K.dtype, np.float)
    for i, x in enumerate(X):
        for j, y in enumerate(Y):
            chi2 = -np.sum((x - y) ** 2 / (x + y))
            chi2_exp = np.exp(gamma * chi2)
            assert_almost_equal(K_add[i, j], chi2)
            assert_almost_equal(K[i, j], chi2_exp)

    # check diagonal is ones for data with itself
    K = chi2_kernel(Y)
    assert_array_equal(np.diag(K), 1)
    # check off-diagonal is < 1 but > 0:
    assert_true(np.all(K > 0))
    assert_true(np.all(K - np.diag(np.diag(K)) < 1))
    # check that float32 is preserved
    X = rng.random_sample((5, 4)).astype(np.float32)
    Y = rng.random_sample((10, 4)).astype(np.float32)
    K = chi2_kernel(X, Y)
    assert_equal(K.dtype, np.float32)

    # check integer type gets converted,
    # check that zeros are handled
    X = rng.random_sample((10, 4)).astype(np.int32)
    K = chi2_kernel(X, X)
    assert_true(np.isfinite(K).all())
    assert_equal(K.dtype, np.float)

    # check that kernel of similar things is greater than dissimilar ones
    X = [[.3, .7], [1., 0]]
    Y = [[0, 1], [.9, .1]]
    K = chi2_kernel(X, Y)
    assert_greater(K[0, 0], K[0, 1])
    assert_greater(K[1, 1], K[1, 0])

    # test negative input
    assert_raises(ValueError, chi2_kernel, [[0, -1]])
    assert_raises(ValueError, chi2_kernel, [[0, -1]], [[-1, -1]])
    assert_raises(ValueError, chi2_kernel, [[0, 1]], [[-1, -1]])

    # different n_features in X and Y
    assert_raises(ValueError, chi2_kernel, [[0, 1]], [[.2, .2, .6]])

    # sparse matrices
    assert_raises(ValueError, chi2_kernel, csr_matrix(X), csr_matrix(Y))
    assert_raises(ValueError, additive_chi2_kernel,
                  csr_matrix(X), csr_matrix(Y))


def test_kernel_symmetry():
    # Valid kernels should be symmetric
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    for kernel in (linear_kernel, polynomial_kernel, rbf_kernel,
                   laplacian_kernel, sigmoid_kernel, cosine_similarity):
        K = kernel(X, X)
        assert_array_almost_equal(K, K.T, 15)


def test_kernel_sparse():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    X_sparse = csr_matrix(X)
    for kernel in (linear_kernel, polynomial_kernel, rbf_kernel,
                   laplacian_kernel, sigmoid_kernel, cosine_similarity):
        K = kernel(X, X)
        K2 = kernel(X_sparse, X_sparse)
        assert_array_almost_equal(K, K2)


def test_linear_kernel():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    K = linear_kernel(X, X)
    # the diagonal elements of a linear kernel are their squared norm
    assert_array_almost_equal(K.flat[::6], [linalg.norm(x) ** 2 for x in X])


def test_rbf_kernel():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    K = rbf_kernel(X, X)
    # the diagonal elements of a rbf kernel are 1
    assert_array_almost_equal(K.flat[::6], np.ones(5))


def test_laplacian_kernel():
    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    K = laplacian_kernel(X, X)
    # the diagonal elements of a laplacian kernel are 1
    assert_array_almost_equal(np.diag(K), np.ones(5))

    # off-diagonal elements are < 1 but > 0:
    assert_true(np.all(K > 0))
    assert_true(np.all(K - np.diag(np.diag(K)) < 1))


def test_cosine_similarity_sparse_output():
    # Test if cosine_similarity correctly produces sparse output.

    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((3, 4))
    Xcsr = csr_matrix(X)
    Ycsr = csr_matrix(Y)

    K1 = cosine_similarity(Xcsr, Ycsr, dense_output=False)
    assert_true(issparse(K1))

    K2 = pairwise_kernels(Xcsr, Y=Ycsr, metric="cosine")
    assert_array_almost_equal(K1.todense(), K2)


def test_cosine_similarity():
    # Test the cosine_similarity.

    rng = np.random.RandomState(0)
    X = rng.random_sample((5, 4))
    Y = rng.random_sample((3, 4))
    Xcsr = csr_matrix(X)
    Ycsr = csr_matrix(Y)

    for X_, Y_ in ((X, None), (X, Y),
                   (Xcsr, None), (Xcsr, Ycsr)):
        # Test that the cosine is kernel is equal to a linear kernel when data
        # has been previously normalized by L2-norm.
        K1 = pairwise_kernels(X_, Y=Y_, metric="cosine")
        X_ = normalize(X_)
        if Y_ is not None:
            Y_ = normalize(Y_)
        K2 = pairwise_kernels(X_, Y=Y_, metric="linear")
        assert_array_almost_equal(K1, K2)


def test_check_dense_matrices():
    # Ensure that pairwise array check works for dense matrices.
    # Check that if XB is None, XB is returned as reference to XA
    XA = np.resize(np.arange(40), (5, 8))
    XA_checked, XB_checked = check_pairwise_arrays(XA, None)
    assert_true(XA_checked is XB_checked)
    assert_array_equal(XA, XA_checked)


def test_check_XB_returned():
    # Ensure that if XA and XB are given correctly, they return as equal.
    # Check that if XB is not None, it is returned equal.
    # Note that the second dimension of XB is the same as XA.
    XA = np.resize(np.arange(40), (5, 8))
    XB = np.resize(np.arange(32), (4, 8))
    XA_checked, XB_checked = check_pairwise_arrays(XA, XB)
    assert_array_equal(XA, XA_checked)
    assert_array_equal(XB, XB_checked)

    XB = np.resize(np.arange(40), (5, 8))
    XA_checked, XB_checked = check_paired_arrays(XA, XB)
    assert_array_equal(XA, XA_checked)
    assert_array_equal(XB, XB_checked)


def test_check_different_dimensions():
    # Ensure an error is raised if the dimensions are different.
    XA = np.resize(np.arange(45), (5, 9))
    XB = np.resize(np.arange(32), (4, 8))
    assert_raises(ValueError, check_pairwise_arrays, XA, XB)

    XB = np.resize(np.arange(4 * 9), (4, 9))
    assert_raises(ValueError, check_paired_arrays, XA, XB)


def test_check_invalid_dimensions():
    # Ensure an error is raised on 1D input arrays.
    # The modified tests are not 1D. In the old test, the array was internally
    # converted to 2D anyways
    XA = np.arange(45).reshape(9, 5)
    XB = np.arange(32).reshape(4, 8)
    assert_raises(ValueError, check_pairwise_arrays, XA, XB)
    XA = np.arange(45).reshape(9, 5)
    XB = np.arange(32).reshape(4, 8)
    assert_raises(ValueError, check_pairwise_arrays, XA, XB)


def test_check_sparse_arrays():
    # Ensures that checks return valid sparse matrices.
    rng = np.random.RandomState(0)
    XA = rng.random_sample((5, 4))
    XA_sparse = csr_matrix(XA)
    XB = rng.random_sample((5, 4))
    XB_sparse = csr_matrix(XB)
    XA_checked, XB_checked = check_pairwise_arrays(XA_sparse, XB_sparse)
    # compare their difference because testing csr matrices for
    # equality with '==' does not work as expected.
    assert_true(issparse(XA_checked))
    assert_equal(abs(XA_sparse - XA_checked).sum(), 0)
    assert_true(issparse(XB_checked))
    assert_equal(abs(XB_sparse - XB_checked).sum(), 0)

    XA_checked, XA_2_checked = check_pairwise_arrays(XA_sparse, XA_sparse)
    assert_true(issparse(XA_checked))
    assert_equal(abs(XA_sparse - XA_checked).sum(), 0)
    assert_true(issparse(XA_2_checked))
    assert_equal(abs(XA_2_checked - XA_checked).sum(), 0)


def tuplify(X):
    # Turns a numpy matrix (any n-dimensional array) into tuples.
    s = X.shape
    if len(s) > 1:
        # Tuplify each sub-array in the input.
        return tuple(tuplify(row) for row in X)
    else:
        # Single dimension input, just return tuple of contents.
        return tuple(r for r in X)


def test_check_tuple_input():
    # Ensures that checks return valid tuples.
    rng = np.random.RandomState(0)
    XA = rng.random_sample((5, 4))
    XA_tuples = tuplify(XA)
    XB = rng.random_sample((5, 4))
    XB_tuples = tuplify(XB)
    XA_checked, XB_checked = check_pairwise_arrays(XA_tuples, XB_tuples)
    assert_array_equal(XA_tuples, XA_checked)
    assert_array_equal(XB_tuples, XB_checked)


def test_check_preserve_type():
    # Ensures that type float32 is preserved.
    XA = np.resize(np.arange(40), (5, 8)).astype(np.float32)
    XB = np.resize(np.arange(40), (5, 8)).astype(np.float32)

    XA_checked, XB_checked = check_pairwise_arrays(XA, None)
    assert_equal(XA_checked.dtype, np.float32)

    # both float32
    XA_checked, XB_checked = check_pairwise_arrays(XA, XB)
    assert_equal(XA_checked.dtype, np.float32)
    assert_equal(XB_checked.dtype, np.float32)

    # mismatched A
    XA_checked, XB_checked = check_pairwise_arrays(XA.astype(np.float),
                                                   XB)
    assert_equal(XA_checked.dtype, np.float)
    assert_equal(XB_checked.dtype, np.float)

    # mismatched B
    XA_checked, XB_checked = check_pairwise_arrays(XA,
                                                   XB.astype(np.float))
    assert_equal(XA_checked.dtype, np.float)
    assert_equal(XB_checked.dtype, np.float)