laywerrobot/lib/python3.6/site-packages/sklearn/covariance/tests/test_graph_lasso.py

157 lines
6 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
""" Test the graph_lasso module.
"""
import sys
import numpy as np
from scipy import linalg
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_array_less
from sklearn.utils.testing import assert_warns_message
from sklearn.covariance import (graph_lasso, GraphLasso, GraphLassoCV,
empirical_covariance)
from sklearn.datasets.samples_generator import make_sparse_spd_matrix
from sklearn.externals.six.moves import StringIO
from sklearn.utils import check_random_state
from sklearn import datasets
from numpy.testing import assert_equal
def test_graph_lasso(random_state=0):
# Sample data from a sparse multivariate normal
dim = 20
n_samples = 100
random_state = check_random_state(random_state)
prec = make_sparse_spd_matrix(dim, alpha=.95,
random_state=random_state)
cov = linalg.inv(prec)
X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
emp_cov = empirical_covariance(X)
for alpha in (0., .1, .25):
covs = dict()
icovs = dict()
for method in ('cd', 'lars'):
cov_, icov_, costs = graph_lasso(emp_cov, alpha=alpha, mode=method,
return_costs=True)
covs[method] = cov_
icovs[method] = icov_
costs, dual_gap = np.array(costs).T
# Check that the costs always decrease (doesn't hold if alpha == 0)
if not alpha == 0:
assert_array_less(np.diff(costs), 0)
# Check that the 2 approaches give similar results
assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4)
assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4)
# Smoke test the estimator
model = GraphLasso(alpha=.25).fit(X)
model.score(X)
assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4)
assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4)
# For a centered matrix, assume_centered could be chosen True or False
# Check that this returns indeed the same result for centered data
Z = X - X.mean(0)
precs = list()
for assume_centered in (False, True):
prec_ = GraphLasso(assume_centered=assume_centered).fit(Z).precision_
precs.append(prec_)
assert_array_almost_equal(precs[0], precs[1])
def test_graph_lasso_iris():
# Hard-coded solution from R glasso package for alpha=1.0
# The iris datasets in R and scikit-learn do not match in a few places,
# these values are for the scikit-learn version.
cov_R = np.array([
[0.68112222, 0.0, 0.2651911, 0.02467558],
[0.00, 0.1867507, 0.0, 0.00],
[0.26519111, 0.0, 3.0924249, 0.28774489],
[0.02467558, 0.0, 0.2877449, 0.57853156]
])
icov_R = np.array([
[1.5188780, 0.0, -0.1302515, 0.0],
[0.0, 5.354733, 0.0, 0.0],
[-0.1302515, 0.0, 0.3502322, -0.1686399],
[0.0, 0.0, -0.1686399, 1.8123908]
])
X = datasets.load_iris().data
emp_cov = empirical_covariance(X)
for method in ('cd', 'lars'):
cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False,
mode=method)
assert_array_almost_equal(cov, cov_R)
assert_array_almost_equal(icov, icov_R)
def test_graph_lasso_iris_singular():
# Small subset of rows to test the rank-deficient case
# Need to choose samples such that none of the variances are zero
indices = np.arange(10, 13)
# Hard-coded solution from R glasso package for alpha=0.01
cov_R = np.array([
[0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
[0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
[0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
[0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
])
icov_R = np.array([
[24.42244057, -16.831679593, 0.0, 0.0],
[-16.83168201, 24.351841681, -6.206896552, -12.5],
[0.0, -6.206896171, 153.103448276, 0.0],
[0.0, -12.499999143, 0.0, 462.5]
])
X = datasets.load_iris().data[indices, :]
emp_cov = empirical_covariance(X)
for method in ('cd', 'lars'):
cov, icov = graph_lasso(emp_cov, alpha=0.01, return_costs=False,
mode=method)
assert_array_almost_equal(cov, cov_R, decimal=5)
assert_array_almost_equal(icov, icov_R, decimal=5)
def test_graph_lasso_cv(random_state=1):
# Sample data from a sparse multivariate normal
dim = 5
n_samples = 6
random_state = check_random_state(random_state)
prec = make_sparse_spd_matrix(dim, alpha=.96,
random_state=random_state)
cov = linalg.inv(prec)
X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
# Capture stdout, to smoke test the verbose mode
orig_stdout = sys.stdout
try:
sys.stdout = StringIO()
# We need verbose very high so that Parallel prints on stdout
GraphLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X)
finally:
sys.stdout = orig_stdout
# Smoke test with specified alphas
GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
def test_deprecated_grid_scores(random_state=1):
dim = 5
n_samples = 6
random_state = check_random_state(random_state)
prec = make_sparse_spd_matrix(dim, alpha=.96,
random_state=random_state)
cov = linalg.inv(prec)
X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
graph_lasso = GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1)
graph_lasso.fit(X)
depr_message = ("Attribute grid_scores was deprecated in version "
"0.19 and will be removed in 0.21. Use "
"``grid_scores_`` instead")
assert_warns_message(DeprecationWarning, depr_message,
lambda: graph_lasso.grid_scores)
assert_equal(graph_lasso.grid_scores, graph_lasso.grid_scores_)