laywerrobot/lib/python3.6/site-packages/sklearn/decomposition/tests/test_sparse_pca.py

# Author: Vlad Niculae
# License: BSD 3 clause

import sys

import numpy as np

from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_array_equal
from sklearn.utils.testing import SkipTest
from sklearn.utils.testing import assert_true
from sklearn.utils.testing import assert_false
from sklearn.utils.testing import assert_warns_message
from sklearn.utils.testing import if_safe_multiprocessing_with_blas

from sklearn.decomposition import SparsePCA, MiniBatchSparsePCA
from sklearn.utils import check_random_state


def generate_toy_data(n_components, n_samples, image_size, random_state=None):
    n_features = image_size[0] * image_size[1]

    rng = check_random_state(random_state)
    U = rng.randn(n_samples, n_components)
    V = rng.randn(n_components, n_features)

    centers = [(3, 3), (6, 7), (8, 1)]
    sz = [1, 2, 1]
    for k in range(n_components):
        img = np.zeros(image_size)
        xmin, xmax = centers[k][0] - sz[k], centers[k][0] + sz[k]
        ymin, ymax = centers[k][1] - sz[k], centers[k][1] + sz[k]
        img[xmin:xmax][:, ymin:ymax] = 1.0
        V[k, :] = img.ravel()

    # Y is defined by : Y = UV + noise
    Y = np.dot(U, V)
    Y += 0.1 * rng.randn(Y.shape[0], Y.shape[1])  # Add noise
    return Y, U, V

# SparsePCA can be a bit slow. To avoid having test times go up, we
# test different aspects of the code in the same test


def test_correct_shapes():
    rng = np.random.RandomState(0)
    X = rng.randn(12, 10)
    spca = SparsePCA(n_components=8, random_state=rng)
    U = spca.fit_transform(X)
    assert_equal(spca.components_.shape, (8, 10))
    assert_equal(U.shape, (12, 8))
    # test overcomplete decomposition
    spca = SparsePCA(n_components=13, random_state=rng)
    U = spca.fit_transform(X)
    assert_equal(spca.components_.shape, (13, 10))
    assert_equal(U.shape, (12, 13))


def test_fit_transform():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha,
                          random_state=0)
    spca_lars.fit(Y)

    # Test that CD gives similar results
    spca_lasso = SparsePCA(n_components=3, method='cd', random_state=0,
                           alpha=alpha)
    spca_lasso.fit(Y)
    assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)

    # Test that deprecated ridge_alpha parameter throws warning
    warning_msg = "The ridge_alpha parameter on transform()"
    assert_warns_message(DeprecationWarning, warning_msg, spca_lars.transform,
                         Y, ridge_alpha=0.01)
    assert_warns_message(DeprecationWarning, warning_msg, spca_lars.transform,
                         Y, ridge_alpha=None)


@if_safe_multiprocessing_with_blas
def test_fit_transform_parallel():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha,
                          random_state=0)
    spca_lars.fit(Y)
    U1 = spca_lars.transform(Y)
    # Test multiple CPUs
    spca = SparsePCA(n_components=3, n_jobs=2, method='lars', alpha=alpha,
                     random_state=0).fit(Y)
    U2 = spca.transform(Y)
    assert_true(not np.all(spca_lars.components_ == 0))
    assert_array_almost_equal(U1, U2)


def test_transform_nan():
    # Test that SparsePCA won't return NaN when there is 0 feature in all
    # samples.
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    Y[:, 0] = 0
    estimator = SparsePCA(n_components=8)
    assert_false(np.any(np.isnan(estimator.fit_transform(Y))))


def test_fit_transform_tall():
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 65, (8, 8), random_state=rng)  # tall array
    spca_lars = SparsePCA(n_components=3, method='lars',
                          random_state=rng)
    U1 = spca_lars.fit_transform(Y)
    spca_lasso = SparsePCA(n_components=3, method='cd', random_state=rng)
    U2 = spca_lasso.fit(Y).transform(Y)
    assert_array_almost_equal(U1, U2)


def test_initialization():
    rng = np.random.RandomState(0)
    U_init = rng.randn(5, 3)
    V_init = rng.randn(3, 4)
    model = SparsePCA(n_components=3, U_init=U_init, V_init=V_init, max_iter=0,
                      random_state=rng)
    model.fit(rng.randn(5, 4))
    assert_array_equal(model.components_, V_init)


def test_mini_batch_correct_shapes():
    rng = np.random.RandomState(0)
    X = rng.randn(12, 10)
    pca = MiniBatchSparsePCA(n_components=8, random_state=rng)
    U = pca.fit_transform(X)
    assert_equal(pca.components_.shape, (8, 10))
    assert_equal(U.shape, (12, 8))
    # test overcomplete decomposition
    pca = MiniBatchSparsePCA(n_components=13, random_state=rng)
    U = pca.fit_transform(X)
    assert_equal(pca.components_.shape, (13, 10))
    assert_equal(U.shape, (12, 13))


def test_mini_batch_fit_transform():
    raise SkipTest("skipping mini_batch_fit_transform.")
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = MiniBatchSparsePCA(n_components=3, random_state=0,
                                   alpha=alpha).fit(Y)
    U1 = spca_lars.transform(Y)
    # Test multiple CPUs
    if sys.platform == 'win32':  # fake parallelism for win32
        import sklearn.externals.joblib.parallel as joblib_par
        _mp = joblib_par.multiprocessing
        joblib_par.multiprocessing = None
        try:
            U2 = MiniBatchSparsePCA(n_components=3, n_jobs=2, alpha=alpha,
                                    random_state=0).fit(Y).transform(Y)
        finally:
            joblib_par.multiprocessing = _mp
    else:  # we can efficiently use parallelism
        U2 = MiniBatchSparsePCA(n_components=3, n_jobs=2, alpha=alpha,
                                random_state=0).fit(Y).transform(Y)
    assert_true(not np.all(spca_lars.components_ == 0))
    assert_array_almost_equal(U1, U2)
    # Test that CD gives similar results
    spca_lasso = MiniBatchSparsePCA(n_components=3, method='cd', alpha=alpha,
                                    random_state=0).fit(Y)
    assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
first commit 2020-08-27 21:55:39 +02:00			`# Author: Vlad Niculae`
			`# License: BSD 3 clause`

			`import sys`

			`import numpy as np`

			`from sklearn.utils.testing import assert_array_almost_equal`
			`from sklearn.utils.testing import assert_equal`
			`from sklearn.utils.testing import assert_array_equal`
			`from sklearn.utils.testing import SkipTest`
			`from sklearn.utils.testing import assert_true`
			`from sklearn.utils.testing import assert_false`
			`from sklearn.utils.testing import assert_warns_message`
			`from sklearn.utils.testing import if_safe_multiprocessing_with_blas`

			`from sklearn.decomposition import SparsePCA, MiniBatchSparsePCA`
			`from sklearn.utils import check_random_state`


			`def generate_toy_data(n_components, n_samples, image_size, random_state=None):`
			`n_features = image_size[0] * image_size[1]`

			`rng = check_random_state(random_state)`
			`U = rng.randn(n_samples, n_components)`
			`V = rng.randn(n_components, n_features)`

			`centers = [(3, 3), (6, 7), (8, 1)]`
			`sz = [1, 2, 1]`
			`for k in range(n_components):`
			`img = np.zeros(image_size)`
			`xmin, xmax = centers[k][0] - sz[k], centers[k][0] + sz[k]`
			`ymin, ymax = centers[k][1] - sz[k], centers[k][1] + sz[k]`
			`img[xmin:xmax][:, ymin:ymax] = 1.0`
			`V[k, :] = img.ravel()`

			`# Y is defined by : Y = UV + noise`
			`Y = np.dot(U, V)`
			`Y += 0.1 * rng.randn(Y.shape[0], Y.shape[1]) # Add noise`
			`return Y, U, V`

			`# SparsePCA can be a bit slow. To avoid having test times go up, we`
			`# test different aspects of the code in the same test`


			`def test_correct_shapes():`
			`rng = np.random.RandomState(0)`
			`X = rng.randn(12, 10)`
			`spca = SparsePCA(n_components=8, random_state=rng)`
			`U = spca.fit_transform(X)`
			`assert_equal(spca.components_.shape, (8, 10))`
			`assert_equal(U.shape, (12, 8))`
			`# test overcomplete decomposition`
			`spca = SparsePCA(n_components=13, random_state=rng)`
			`U = spca.fit_transform(X)`
			`assert_equal(spca.components_.shape, (13, 10))`
			`assert_equal(U.shape, (12, 13))`


			`def test_fit_transform():`
			`alpha = 1`
			`rng = np.random.RandomState(0)`
			`Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array`
			`spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha,`
			`random_state=0)`
			`spca_lars.fit(Y)`

			`# Test that CD gives similar results`
			`spca_lasso = SparsePCA(n_components=3, method='cd', random_state=0,`
			`alpha=alpha)`
			`spca_lasso.fit(Y)`
			`assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)`

			`# Test that deprecated ridge_alpha parameter throws warning`
			`warning_msg = "The ridge_alpha parameter on transform()"`
			`assert_warns_message(DeprecationWarning, warning_msg, spca_lars.transform,`
			`Y, ridge_alpha=0.01)`
			`assert_warns_message(DeprecationWarning, warning_msg, spca_lars.transform,`
			`Y, ridge_alpha=None)`


			`@if_safe_multiprocessing_with_blas`
			`def test_fit_transform_parallel():`
			`alpha = 1`
			`rng = np.random.RandomState(0)`
			`Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array`
			`spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha,`
			`random_state=0)`
			`spca_lars.fit(Y)`
			`U1 = spca_lars.transform(Y)`
			`# Test multiple CPUs`
			`spca = SparsePCA(n_components=3, n_jobs=2, method='lars', alpha=alpha,`
			`random_state=0).fit(Y)`
			`U2 = spca.transform(Y)`
			`assert_true(not np.all(spca_lars.components_ == 0))`
			`assert_array_almost_equal(U1, U2)`


			`def test_transform_nan():`
			`# Test that SparsePCA won't return NaN when there is 0 feature in all`
			`# samples.`
			`rng = np.random.RandomState(0)`
			`Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array`
			`Y[:, 0] = 0`
			`estimator = SparsePCA(n_components=8)`
			`assert_false(np.any(np.isnan(estimator.fit_transform(Y))))`


			`def test_fit_transform_tall():`
			`rng = np.random.RandomState(0)`
			`Y, _, _ = generate_toy_data(3, 65, (8, 8), random_state=rng) # tall array`
			`spca_lars = SparsePCA(n_components=3, method='lars',`
			`random_state=rng)`
			`U1 = spca_lars.fit_transform(Y)`
			`spca_lasso = SparsePCA(n_components=3, method='cd', random_state=rng)`
			`U2 = spca_lasso.fit(Y).transform(Y)`
			`assert_array_almost_equal(U1, U2)`


			`def test_initialization():`
			`rng = np.random.RandomState(0)`
			`U_init = rng.randn(5, 3)`
			`V_init = rng.randn(3, 4)`
			`model = SparsePCA(n_components=3, U_init=U_init, V_init=V_init, max_iter=0,`
			`random_state=rng)`
			`model.fit(rng.randn(5, 4))`
			`assert_array_equal(model.components_, V_init)`


			`def test_mini_batch_correct_shapes():`
			`rng = np.random.RandomState(0)`
			`X = rng.randn(12, 10)`
			`pca = MiniBatchSparsePCA(n_components=8, random_state=rng)`
			`U = pca.fit_transform(X)`
			`assert_equal(pca.components_.shape, (8, 10))`
			`assert_equal(U.shape, (12, 8))`
			`# test overcomplete decomposition`
			`pca = MiniBatchSparsePCA(n_components=13, random_state=rng)`
			`U = pca.fit_transform(X)`
			`assert_equal(pca.components_.shape, (13, 10))`
			`assert_equal(U.shape, (12, 13))`


			`def test_mini_batch_fit_transform():`
			`raise SkipTest("skipping mini_batch_fit_transform.")`
			`alpha = 1`
			`rng = np.random.RandomState(0)`
			`Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array`
			`spca_lars = MiniBatchSparsePCA(n_components=3, random_state=0,`
			`alpha=alpha).fit(Y)`
			`U1 = spca_lars.transform(Y)`
			`# Test multiple CPUs`
			`if sys.platform == 'win32': # fake parallelism for win32`
			`import sklearn.externals.joblib.parallel as joblib_par`
			`_mp = joblib_par.multiprocessing`
			`joblib_par.multiprocessing = None`
			`try:`
			`U2 = MiniBatchSparsePCA(n_components=3, n_jobs=2, alpha=alpha,`
			`random_state=0).fit(Y).transform(Y)`
			`finally:`
			`joblib_par.multiprocessing = _mp`
			`else: # we can efficiently use parallelism`
			`U2 = MiniBatchSparsePCA(n_components=3, n_jobs=2, alpha=alpha,`
			`random_state=0).fit(Y).transform(Y)`
			`assert_true(not np.all(spca_lars.components_ == 0))`
			`assert_array_almost_equal(U1, U2)`
			`# Test that CD gives similar results`
			`spca_lasso = MiniBatchSparsePCA(n_components=3, method='cd', alpha=alpha,`
			`random_state=0).fit(Y)`
			`assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)`