import numpy as np import itertools from sklearn.exceptions import ConvergenceWarning from sklearn.utils import check_array from sklearn.utils.testing import assert_array_almost_equal from sklearn.utils.testing import assert_array_equal from sklearn.utils.testing import assert_equal from sklearn.utils.testing import assert_true from sklearn.utils.testing import assert_less from sklearn.utils.testing import assert_raises from sklearn.utils.testing import ignore_warnings from sklearn.utils.testing import TempMemmap from sklearn.decomposition import DictionaryLearning from sklearn.decomposition import MiniBatchDictionaryLearning from sklearn.decomposition import SparseCoder from sklearn.decomposition import dict_learning_online from sklearn.decomposition import sparse_encode rng_global = np.random.RandomState(0) n_samples, n_features = 10, 8 X = rng_global.randn(n_samples, n_features) def test_sparse_encode_shapes_omp(): rng = np.random.RandomState(0) algorithms = ['omp', 'lasso_lars', 'lasso_cd', 'lars', 'threshold'] for n_components, n_samples in itertools.product([1, 5], [1, 9]): X_ = rng.randn(n_samples, n_features) dictionary = rng.randn(n_components, n_features) for algorithm, n_jobs in itertools.product(algorithms, [1, 3]): code = sparse_encode(X_, dictionary, algorithm=algorithm, n_jobs=n_jobs) assert_equal(code.shape, (n_samples, n_components)) def test_dict_learning_shapes(): n_components = 5 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert_equal(dico.components_.shape, (n_components, n_features)) n_components = 1 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert_equal(dico.components_.shape, (n_components, n_features)) assert_equal(dico.transform(X).shape, (X.shape[0], n_components)) def test_dict_learning_overcomplete(): n_components = 12 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert_true(dico.components_.shape == (n_components, n_features)) def test_dict_learning_reconstruction(): n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2) # used to test lars here too, but there's no guarantee the number of # nonzero atoms is right. def test_dict_learning_reconstruction_parallel(): # regression test that parallel reconstruction works with n_jobs=-1 n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0, n_jobs=-1) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2) def test_dict_learning_lassocd_readonly_data(): n_components = 12 with TempMemmap(X) as X_read_only: dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd', transform_alpha=0.001, random_state=0, n_jobs=-1) with ignore_warnings(category=ConvergenceWarning): code = dico.fit(X_read_only).transform(X_read_only) assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2) def test_dict_learning_nonzero_coefs(): n_components = 4 dico = DictionaryLearning(n_components, transform_algorithm='lars', transform_n_nonzero_coefs=3, random_state=0) code = dico.fit(X).transform(X[np.newaxis, 1]) assert_true(len(np.flatnonzero(code)) == 3) dico.set_params(transform_algorithm='omp') code = dico.transform(X[np.newaxis, 1]) assert_equal(len(np.flatnonzero(code)), 3) def test_dict_learning_unknown_fit_algorithm(): n_components = 5 dico = DictionaryLearning(n_components, fit_algorithm='') assert_raises(ValueError, dico.fit, X) def test_dict_learning_split(): n_components = 5 dico = DictionaryLearning(n_components, transform_algorithm='threshold', random_state=0) code = dico.fit(X).transform(X) dico.split_sign = True split_code = dico.transform(X) assert_array_equal(split_code[:, :n_components] - split_code[:, n_components:], code) def test_dict_learning_online_shapes(): rng = np.random.RandomState(0) n_components = 8 code, dictionary = dict_learning_online(X, n_components=n_components, alpha=1, random_state=rng) assert_equal(code.shape, (n_samples, n_components)) assert_equal(dictionary.shape, (n_components, n_features)) assert_equal(np.dot(code, dictionary).shape, X.shape) def test_dict_learning_online_verbosity(): n_components = 5 # test verbosity from sklearn.externals.six.moves import cStringIO as StringIO import sys old_stdout = sys.stdout try: sys.stdout = StringIO() dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1, random_state=0) dico.fit(X) dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2, random_state=0) dico.fit(X) dict_learning_online(X, n_components=n_components, alpha=1, verbose=1, random_state=0) dict_learning_online(X, n_components=n_components, alpha=1, verbose=2, random_state=0) finally: sys.stdout = old_stdout assert_true(dico.components_.shape == (n_components, n_features)) def test_dict_learning_online_estimator_shapes(): n_components = 5 dico = MiniBatchDictionaryLearning(n_components, n_iter=20, random_state=0) dico.fit(X) assert_true(dico.components_.shape == (n_components, n_features)) def test_dict_learning_online_overcomplete(): n_components = 12 dico = MiniBatchDictionaryLearning(n_components, n_iter=20, random_state=0).fit(X) assert_true(dico.components_.shape == (n_components, n_features)) def test_dict_learning_online_initialization(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) dico = MiniBatchDictionaryLearning(n_components, n_iter=0, dict_init=V, random_state=0).fit(X) assert_array_equal(dico.components_, V) def test_dict_learning_online_partial_fit(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] dict1 = MiniBatchDictionaryLearning(n_components, n_iter=10 * len(X), batch_size=1, alpha=1, shuffle=False, dict_init=V, random_state=0).fit(X) dict2 = MiniBatchDictionaryLearning(n_components, alpha=1, n_iter=1, dict_init=V, random_state=0) for i in range(10): for sample in X: dict2.partial_fit(sample[np.newaxis, :]) assert_true(not np.all(sparse_encode(X, dict1.components_, alpha=1) == 0)) assert_array_almost_equal(dict1.components_, dict2.components_, decimal=2) def test_sparse_encode_shapes(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] for algo in ('lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'): code = sparse_encode(X, V, algorithm=algo) assert_equal(code.shape, (n_samples, n_components)) def test_sparse_encode_input(): n_components = 100 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] Xf = check_array(X, order='F') for algo in ('lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'): a = sparse_encode(X, V, algorithm=algo) b = sparse_encode(Xf, V, algorithm=algo) assert_array_almost_equal(a, b) def test_sparse_encode_error(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] code = sparse_encode(X, V, alpha=0.001) assert_true(not np.all(code == 0)) assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1) def test_sparse_encode_error_default_sparsity(): rng = np.random.RandomState(0) X = rng.randn(100, 64) D = rng.randn(2, 64) code = ignore_warnings(sparse_encode)(X, D, algorithm='omp', n_nonzero_coefs=None) assert_equal(code.shape, (100, 2)) def test_unknown_method(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init assert_raises(ValueError, sparse_encode, X, V, algorithm="") def test_sparse_coder_estimator(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] code = SparseCoder(dictionary=V, transform_algorithm='lasso_lars', transform_alpha=0.001).transform(X) assert_true(not np.all(code == 0)) assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)