701 lines
26 KiB
Python
701 lines
26 KiB
Python
|
"""GraphLasso: sparse inverse covariance estimation with an l1-penalized
|
||
|
estimator.
|
||
|
"""
|
||
|
|
||
|
# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
|
||
|
# License: BSD 3 clause
|
||
|
# Copyright: INRIA
|
||
|
import warnings
|
||
|
import operator
|
||
|
import sys
|
||
|
import time
|
||
|
|
||
|
import numpy as np
|
||
|
from scipy import linalg
|
||
|
|
||
|
from .empirical_covariance_ import (empirical_covariance, EmpiricalCovariance,
|
||
|
log_likelihood)
|
||
|
|
||
|
from ..exceptions import ConvergenceWarning
|
||
|
from ..utils.validation import check_random_state, check_array
|
||
|
from ..utils import deprecated
|
||
|
from ..utils.fixes import _Sequence as Sequence
|
||
|
from ..linear_model import lars_path
|
||
|
from ..linear_model import cd_fast
|
||
|
from ..model_selection import check_cv, cross_val_score
|
||
|
from ..externals.joblib import Parallel, delayed
|
||
|
|
||
|
|
||
|
# Helper functions to compute the objective and dual objective functions
|
||
|
# of the l1-penalized estimator
|
||
|
def _objective(mle, precision_, alpha):
|
||
|
"""Evaluation of the graph-lasso objective function
|
||
|
|
||
|
the objective function is made of a shifted scaled version of the
|
||
|
normalized log-likelihood (i.e. its empirical mean over the samples) and a
|
||
|
penalisation term to promote sparsity
|
||
|
"""
|
||
|
p = precision_.shape[0]
|
||
|
cost = - 2. * log_likelihood(mle, precision_) + p * np.log(2 * np.pi)
|
||
|
cost += alpha * (np.abs(precision_).sum()
|
||
|
- np.abs(np.diag(precision_)).sum())
|
||
|
return cost
|
||
|
|
||
|
|
||
|
def _dual_gap(emp_cov, precision_, alpha):
|
||
|
"""Expression of the dual gap convergence criterion
|
||
|
|
||
|
The specific definition is given in Duchi "Projected Subgradient Methods
|
||
|
for Learning Sparse Gaussians".
|
||
|
"""
|
||
|
gap = np.sum(emp_cov * precision_)
|
||
|
gap -= precision_.shape[0]
|
||
|
gap += alpha * (np.abs(precision_).sum()
|
||
|
- np.abs(np.diag(precision_)).sum())
|
||
|
return gap
|
||
|
|
||
|
|
||
|
def alpha_max(emp_cov):
|
||
|
"""Find the maximum alpha for which there are some non-zeros off-diagonal.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
emp_cov : 2D array, (n_features, n_features)
|
||
|
The sample covariance matrix
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
|
||
|
This results from the bound for the all the Lasso that are solved
|
||
|
in GraphLasso: each time, the row of cov corresponds to Xy. As the
|
||
|
bound for alpha is given by `max(abs(Xy))`, the result follows.
|
||
|
|
||
|
"""
|
||
|
A = np.copy(emp_cov)
|
||
|
A.flat[::A.shape[0] + 1] = 0
|
||
|
return np.max(np.abs(A))
|
||
|
|
||
|
|
||
|
# The g-lasso algorithm
|
||
|
|
||
|
def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4,
|
||
|
enet_tol=1e-4, max_iter=100, verbose=False,
|
||
|
return_costs=False, eps=np.finfo(np.float64).eps,
|
||
|
return_n_iter=False):
|
||
|
"""l1-penalized covariance estimator
|
||
|
|
||
|
Read more in the :ref:`User Guide <sparse_inverse_covariance>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
emp_cov : 2D ndarray, shape (n_features, n_features)
|
||
|
Empirical covariance from which to compute the covariance estimate.
|
||
|
|
||
|
alpha : positive float
|
||
|
The regularization parameter: the higher alpha, the more
|
||
|
regularization, the sparser the inverse covariance.
|
||
|
|
||
|
cov_init : 2D array (n_features, n_features), optional
|
||
|
The initial guess for the covariance.
|
||
|
|
||
|
mode : {'cd', 'lars'}
|
||
|
The Lasso solver to use: coordinate descent or LARS. Use LARS for
|
||
|
very sparse underlying graphs, where p > n. Elsewhere prefer cd
|
||
|
which is more numerically stable.
|
||
|
|
||
|
tol : positive float, optional
|
||
|
The tolerance to declare convergence: if the dual gap goes below
|
||
|
this value, iterations are stopped.
|
||
|
|
||
|
enet_tol : positive float, optional
|
||
|
The tolerance for the elastic net solver used to calculate the descent
|
||
|
direction. This parameter controls the accuracy of the search direction
|
||
|
for a given column update, not of the overall parameter estimate. Only
|
||
|
used for mode='cd'.
|
||
|
|
||
|
max_iter : integer, optional
|
||
|
The maximum number of iterations.
|
||
|
|
||
|
verbose : boolean, optional
|
||
|
If verbose is True, the objective function and dual gap are
|
||
|
printed at each iteration.
|
||
|
|
||
|
return_costs : boolean, optional
|
||
|
If return_costs is True, the objective function and dual gap
|
||
|
at each iteration are returned.
|
||
|
|
||
|
eps : float, optional
|
||
|
The machine-precision regularization in the computation of the
|
||
|
Cholesky diagonal factors. Increase this for very ill-conditioned
|
||
|
systems.
|
||
|
|
||
|
return_n_iter : bool, optional
|
||
|
Whether or not to return the number of iterations.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
covariance : 2D ndarray, shape (n_features, n_features)
|
||
|
The estimated covariance matrix.
|
||
|
|
||
|
precision : 2D ndarray, shape (n_features, n_features)
|
||
|
The estimated (sparse) precision matrix.
|
||
|
|
||
|
costs : list of (objective, dual_gap) pairs
|
||
|
The list of values of the objective function and the dual gap at
|
||
|
each iteration. Returned only if return_costs is True.
|
||
|
|
||
|
n_iter : int
|
||
|
Number of iterations. Returned only if `return_n_iter` is set to True.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
GraphLasso, GraphLassoCV
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The algorithm employed to solve this problem is the GLasso algorithm,
|
||
|
from the Friedman 2008 Biostatistics paper. It is the same algorithm
|
||
|
as in the R `glasso` package.
|
||
|
|
||
|
One possible difference with the `glasso` R package is that the
|
||
|
diagonal coefficients are not penalized.
|
||
|
|
||
|
"""
|
||
|
_, n_features = emp_cov.shape
|
||
|
if alpha == 0:
|
||
|
if return_costs:
|
||
|
precision_ = linalg.inv(emp_cov)
|
||
|
cost = - 2. * log_likelihood(emp_cov, precision_)
|
||
|
cost += n_features * np.log(2 * np.pi)
|
||
|
d_gap = np.sum(emp_cov * precision_) - n_features
|
||
|
if return_n_iter:
|
||
|
return emp_cov, precision_, (cost, d_gap), 0
|
||
|
else:
|
||
|
return emp_cov, precision_, (cost, d_gap)
|
||
|
else:
|
||
|
if return_n_iter:
|
||
|
return emp_cov, linalg.inv(emp_cov), 0
|
||
|
else:
|
||
|
return emp_cov, linalg.inv(emp_cov)
|
||
|
if cov_init is None:
|
||
|
covariance_ = emp_cov.copy()
|
||
|
else:
|
||
|
covariance_ = cov_init.copy()
|
||
|
# As a trivial regularization (Tikhonov like), we scale down the
|
||
|
# off-diagonal coefficients of our starting point: This is needed, as
|
||
|
# in the cross-validation the cov_init can easily be
|
||
|
# ill-conditioned, and the CV loop blows. Beside, this takes
|
||
|
# conservative stand-point on the initial conditions, and it tends to
|
||
|
# make the convergence go faster.
|
||
|
covariance_ *= 0.95
|
||
|
diagonal = emp_cov.flat[::n_features + 1]
|
||
|
covariance_.flat[::n_features + 1] = diagonal
|
||
|
precision_ = linalg.pinvh(covariance_)
|
||
|
|
||
|
indices = np.arange(n_features)
|
||
|
costs = list()
|
||
|
# The different l1 regression solver have different numerical errors
|
||
|
if mode == 'cd':
|
||
|
errors = dict(over='raise', invalid='ignore')
|
||
|
else:
|
||
|
errors = dict(invalid='raise')
|
||
|
try:
|
||
|
# be robust to the max_iter=0 edge case, see:
|
||
|
# https://github.com/scikit-learn/scikit-learn/issues/4134
|
||
|
d_gap = np.inf
|
||
|
for i in range(max_iter):
|
||
|
for idx in range(n_features):
|
||
|
sub_covariance = np.ascontiguousarray(
|
||
|
covariance_[indices != idx].T[indices != idx])
|
||
|
row = emp_cov[idx, indices != idx]
|
||
|
with np.errstate(**errors):
|
||
|
if mode == 'cd':
|
||
|
# Use coordinate descent
|
||
|
coefs = -(precision_[indices != idx, idx]
|
||
|
/ (precision_[idx, idx] + 1000 * eps))
|
||
|
coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram(
|
||
|
coefs, alpha, 0, sub_covariance, row, row,
|
||
|
max_iter, enet_tol, check_random_state(None), False)
|
||
|
else:
|
||
|
# Use LARS
|
||
|
_, _, coefs = lars_path(
|
||
|
sub_covariance, row, Xy=row, Gram=sub_covariance,
|
||
|
alpha_min=alpha / (n_features - 1), copy_Gram=True,
|
||
|
eps=eps, method='lars', return_path=False)
|
||
|
# Update the precision matrix
|
||
|
precision_[idx, idx] = (
|
||
|
1. / (covariance_[idx, idx]
|
||
|
- np.dot(covariance_[indices != idx, idx], coefs)))
|
||
|
precision_[indices != idx, idx] = (- precision_[idx, idx]
|
||
|
* coefs)
|
||
|
precision_[idx, indices != idx] = (- precision_[idx, idx]
|
||
|
* coefs)
|
||
|
coefs = np.dot(sub_covariance, coefs)
|
||
|
covariance_[idx, indices != idx] = coefs
|
||
|
covariance_[indices != idx, idx] = coefs
|
||
|
d_gap = _dual_gap(emp_cov, precision_, alpha)
|
||
|
cost = _objective(emp_cov, precision_, alpha)
|
||
|
if verbose:
|
||
|
print(
|
||
|
'[graph_lasso] Iteration % 3i, cost % 3.2e, dual gap %.3e'
|
||
|
% (i, cost, d_gap))
|
||
|
if return_costs:
|
||
|
costs.append((cost, d_gap))
|
||
|
if np.abs(d_gap) < tol:
|
||
|
break
|
||
|
if not np.isfinite(cost) and i > 0:
|
||
|
raise FloatingPointError('Non SPD result: the system is '
|
||
|
'too ill-conditioned for this solver')
|
||
|
else:
|
||
|
warnings.warn('graph_lasso: did not converge after %i iteration:'
|
||
|
' dual gap: %.3e' % (max_iter, d_gap),
|
||
|
ConvergenceWarning)
|
||
|
except FloatingPointError as e:
|
||
|
e.args = (e.args[0]
|
||
|
+ '. The system is too ill-conditioned for this solver',)
|
||
|
raise e
|
||
|
|
||
|
if return_costs:
|
||
|
if return_n_iter:
|
||
|
return covariance_, precision_, costs, i + 1
|
||
|
else:
|
||
|
return covariance_, precision_, costs
|
||
|
else:
|
||
|
if return_n_iter:
|
||
|
return covariance_, precision_, i + 1
|
||
|
else:
|
||
|
return covariance_, precision_
|
||
|
|
||
|
|
||
|
class GraphLasso(EmpiricalCovariance):
|
||
|
"""Sparse inverse covariance estimation with an l1-penalized estimator.
|
||
|
|
||
|
Read more in the :ref:`User Guide <sparse_inverse_covariance>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
alpha : positive float, default 0.01
|
||
|
The regularization parameter: the higher alpha, the more
|
||
|
regularization, the sparser the inverse covariance.
|
||
|
|
||
|
mode : {'cd', 'lars'}, default 'cd'
|
||
|
The Lasso solver to use: coordinate descent or LARS. Use LARS for
|
||
|
very sparse underlying graphs, where p > n. Elsewhere prefer cd
|
||
|
which is more numerically stable.
|
||
|
|
||
|
tol : positive float, default 1e-4
|
||
|
The tolerance to declare convergence: if the dual gap goes below
|
||
|
this value, iterations are stopped.
|
||
|
|
||
|
enet_tol : positive float, optional
|
||
|
The tolerance for the elastic net solver used to calculate the descent
|
||
|
direction. This parameter controls the accuracy of the search direction
|
||
|
for a given column update, not of the overall parameter estimate. Only
|
||
|
used for mode='cd'.
|
||
|
|
||
|
max_iter : integer, default 100
|
||
|
The maximum number of iterations.
|
||
|
|
||
|
verbose : boolean, default False
|
||
|
If verbose is True, the objective function and dual gap are
|
||
|
plotted at each iteration.
|
||
|
|
||
|
assume_centered : boolean, default False
|
||
|
If True, data are not centered before computation.
|
||
|
Useful when working with data whose mean is almost, but not exactly
|
||
|
zero.
|
||
|
If False, data are centered before computation.
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
covariance_ : array-like, shape (n_features, n_features)
|
||
|
Estimated covariance matrix
|
||
|
|
||
|
precision_ : array-like, shape (n_features, n_features)
|
||
|
Estimated pseudo inverse matrix.
|
||
|
|
||
|
n_iter_ : int
|
||
|
Number of iterations run.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
graph_lasso, GraphLassoCV
|
||
|
"""
|
||
|
|
||
|
def __init__(self, alpha=.01, mode='cd', tol=1e-4, enet_tol=1e-4,
|
||
|
max_iter=100, verbose=False, assume_centered=False):
|
||
|
super(GraphLasso, self).__init__(assume_centered=assume_centered)
|
||
|
self.alpha = alpha
|
||
|
self.mode = mode
|
||
|
self.tol = tol
|
||
|
self.enet_tol = enet_tol
|
||
|
self.max_iter = max_iter
|
||
|
self.verbose = verbose
|
||
|
|
||
|
def fit(self, X, y=None):
|
||
|
"""Fits the GraphLasso model to X.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : ndarray, shape (n_samples, n_features)
|
||
|
Data from which to compute the covariance estimate
|
||
|
y : (ignored)
|
||
|
"""
|
||
|
# Covariance does not make sense for a single feature
|
||
|
X = check_array(X, ensure_min_features=2, ensure_min_samples=2,
|
||
|
estimator=self)
|
||
|
|
||
|
if self.assume_centered:
|
||
|
self.location_ = np.zeros(X.shape[1])
|
||
|
else:
|
||
|
self.location_ = X.mean(0)
|
||
|
emp_cov = empirical_covariance(
|
||
|
X, assume_centered=self.assume_centered)
|
||
|
self.covariance_, self.precision_, self.n_iter_ = graph_lasso(
|
||
|
emp_cov, alpha=self.alpha, mode=self.mode, tol=self.tol,
|
||
|
enet_tol=self.enet_tol, max_iter=self.max_iter,
|
||
|
verbose=self.verbose, return_n_iter=True)
|
||
|
return self
|
||
|
|
||
|
|
||
|
# Cross-validation with GraphLasso
|
||
|
def graph_lasso_path(X, alphas, cov_init=None, X_test=None, mode='cd',
|
||
|
tol=1e-4, enet_tol=1e-4, max_iter=100, verbose=False):
|
||
|
"""l1-penalized covariance estimator along a path of decreasing alphas
|
||
|
|
||
|
Read more in the :ref:`User Guide <sparse_inverse_covariance>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : 2D ndarray, shape (n_samples, n_features)
|
||
|
Data from which to compute the covariance estimate.
|
||
|
|
||
|
alphas : list of positive floats
|
||
|
The list of regularization parameters, decreasing order.
|
||
|
|
||
|
X_test : 2D array, shape (n_test_samples, n_features), optional
|
||
|
Optional test matrix to measure generalisation error.
|
||
|
|
||
|
mode : {'cd', 'lars'}
|
||
|
The Lasso solver to use: coordinate descent or LARS. Use LARS for
|
||
|
very sparse underlying graphs, where p > n. Elsewhere prefer cd
|
||
|
which is more numerically stable.
|
||
|
|
||
|
tol : positive float, optional
|
||
|
The tolerance to declare convergence: if the dual gap goes below
|
||
|
this value, iterations are stopped.
|
||
|
|
||
|
enet_tol : positive float, optional
|
||
|
The tolerance for the elastic net solver used to calculate the descent
|
||
|
direction. This parameter controls the accuracy of the search direction
|
||
|
for a given column update, not of the overall parameter estimate. Only
|
||
|
used for mode='cd'.
|
||
|
|
||
|
max_iter : integer, optional
|
||
|
The maximum number of iterations.
|
||
|
|
||
|
verbose : integer, optional
|
||
|
The higher the verbosity flag, the more information is printed
|
||
|
during the fitting.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
covariances_ : List of 2D ndarray, shape (n_features, n_features)
|
||
|
The estimated covariance matrices.
|
||
|
|
||
|
precisions_ : List of 2D ndarray, shape (n_features, n_features)
|
||
|
The estimated (sparse) precision matrices.
|
||
|
|
||
|
scores_ : List of float
|
||
|
The generalisation error (log-likelihood) on the test data.
|
||
|
Returned only if test data is passed.
|
||
|
"""
|
||
|
inner_verbose = max(0, verbose - 1)
|
||
|
emp_cov = empirical_covariance(X)
|
||
|
if cov_init is None:
|
||
|
covariance_ = emp_cov.copy()
|
||
|
else:
|
||
|
covariance_ = cov_init
|
||
|
covariances_ = list()
|
||
|
precisions_ = list()
|
||
|
scores_ = list()
|
||
|
if X_test is not None:
|
||
|
test_emp_cov = empirical_covariance(X_test)
|
||
|
|
||
|
for alpha in alphas:
|
||
|
try:
|
||
|
# Capture the errors, and move on
|
||
|
covariance_, precision_ = graph_lasso(
|
||
|
emp_cov, alpha=alpha, cov_init=covariance_, mode=mode, tol=tol,
|
||
|
enet_tol=enet_tol, max_iter=max_iter, verbose=inner_verbose)
|
||
|
covariances_.append(covariance_)
|
||
|
precisions_.append(precision_)
|
||
|
if X_test is not None:
|
||
|
this_score = log_likelihood(test_emp_cov, precision_)
|
||
|
except FloatingPointError:
|
||
|
this_score = -np.inf
|
||
|
covariances_.append(np.nan)
|
||
|
precisions_.append(np.nan)
|
||
|
if X_test is not None:
|
||
|
if not np.isfinite(this_score):
|
||
|
this_score = -np.inf
|
||
|
scores_.append(this_score)
|
||
|
if verbose == 1:
|
||
|
sys.stderr.write('.')
|
||
|
elif verbose > 1:
|
||
|
if X_test is not None:
|
||
|
print('[graph_lasso_path] alpha: %.2e, score: %.2e'
|
||
|
% (alpha, this_score))
|
||
|
else:
|
||
|
print('[graph_lasso_path] alpha: %.2e' % alpha)
|
||
|
if X_test is not None:
|
||
|
return covariances_, precisions_, scores_
|
||
|
return covariances_, precisions_
|
||
|
|
||
|
|
||
|
class GraphLassoCV(GraphLasso):
|
||
|
"""Sparse inverse covariance w/ cross-validated choice of the l1 penalty
|
||
|
|
||
|
Read more in the :ref:`User Guide <sparse_inverse_covariance>`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
alphas : integer, or list positive float, optional
|
||
|
If an integer is given, it fixes the number of points on the
|
||
|
grids of alpha to be used. If a list is given, it gives the
|
||
|
grid to be used. See the notes in the class docstring for
|
||
|
more details.
|
||
|
|
||
|
n_refinements : strictly positive integer
|
||
|
The number of times the grid is refined. Not used if explicit
|
||
|
values of alphas are passed.
|
||
|
|
||
|
cv : int, cross-validation generator or an iterable, optional
|
||
|
Determines the cross-validation splitting strategy.
|
||
|
Possible inputs for cv are:
|
||
|
|
||
|
- None, to use the default 3-fold cross-validation,
|
||
|
- integer, to specify the number of folds.
|
||
|
- An object to be used as a cross-validation generator.
|
||
|
- An iterable yielding train/test splits.
|
||
|
|
||
|
For integer/None inputs :class:`KFold` is used.
|
||
|
|
||
|
Refer :ref:`User Guide <cross_validation>` for the various
|
||
|
cross-validation strategies that can be used here.
|
||
|
|
||
|
tol : positive float, optional
|
||
|
The tolerance to declare convergence: if the dual gap goes below
|
||
|
this value, iterations are stopped.
|
||
|
|
||
|
enet_tol : positive float, optional
|
||
|
The tolerance for the elastic net solver used to calculate the descent
|
||
|
direction. This parameter controls the accuracy of the search direction
|
||
|
for a given column update, not of the overall parameter estimate. Only
|
||
|
used for mode='cd'.
|
||
|
|
||
|
max_iter : integer, optional
|
||
|
Maximum number of iterations.
|
||
|
|
||
|
mode : {'cd', 'lars'}
|
||
|
The Lasso solver to use: coordinate descent or LARS. Use LARS for
|
||
|
very sparse underlying graphs, where number of features is greater
|
||
|
than number of samples. Elsewhere prefer cd which is more numerically
|
||
|
stable.
|
||
|
|
||
|
n_jobs : int, optional
|
||
|
number of jobs to run in parallel (default 1).
|
||
|
|
||
|
verbose : boolean, optional
|
||
|
If verbose is True, the objective function and duality gap are
|
||
|
printed at each iteration.
|
||
|
|
||
|
assume_centered : Boolean
|
||
|
If True, data are not centered before computation.
|
||
|
Useful when working with data whose mean is almost, but not exactly
|
||
|
zero.
|
||
|
If False, data are centered before computation.
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
covariance_ : numpy.ndarray, shape (n_features, n_features)
|
||
|
Estimated covariance matrix.
|
||
|
|
||
|
precision_ : numpy.ndarray, shape (n_features, n_features)
|
||
|
Estimated precision matrix (inverse covariance).
|
||
|
|
||
|
alpha_ : float
|
||
|
Penalization parameter selected.
|
||
|
|
||
|
cv_alphas_ : list of float
|
||
|
All penalization parameters explored.
|
||
|
|
||
|
grid_scores_ : 2D numpy.ndarray (n_alphas, n_folds)
|
||
|
Log-likelihood score on left-out data across folds.
|
||
|
|
||
|
n_iter_ : int
|
||
|
Number of iterations run for the optimal alpha.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
graph_lasso, GraphLasso
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The search for the optimal penalization parameter (alpha) is done on an
|
||
|
iteratively refined grid: first the cross-validated scores on a grid are
|
||
|
computed, then a new refined grid is centered around the maximum, and so
|
||
|
on.
|
||
|
|
||
|
One of the challenges which is faced here is that the solvers can
|
||
|
fail to converge to a well-conditioned estimate. The corresponding
|
||
|
values of alpha then come out as missing values, but the optimum may
|
||
|
be close to these missing values.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, alphas=4, n_refinements=4, cv=None, tol=1e-4,
|
||
|
enet_tol=1e-4, max_iter=100, mode='cd', n_jobs=1,
|
||
|
verbose=False, assume_centered=False):
|
||
|
super(GraphLassoCV, self).__init__(
|
||
|
mode=mode, tol=tol, verbose=verbose, enet_tol=enet_tol,
|
||
|
max_iter=max_iter, assume_centered=assume_centered)
|
||
|
self.alphas = alphas
|
||
|
self.n_refinements = n_refinements
|
||
|
self.cv = cv
|
||
|
self.n_jobs = n_jobs
|
||
|
|
||
|
@property
|
||
|
@deprecated("Attribute grid_scores was deprecated in version 0.19 and "
|
||
|
"will be removed in 0.21. Use ``grid_scores_`` instead")
|
||
|
def grid_scores(self):
|
||
|
return self.grid_scores_
|
||
|
|
||
|
def fit(self, X, y=None):
|
||
|
"""Fits the GraphLasso covariance model to X.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : ndarray, shape (n_samples, n_features)
|
||
|
Data from which to compute the covariance estimate
|
||
|
y : (ignored)
|
||
|
"""
|
||
|
# Covariance does not make sense for a single feature
|
||
|
X = check_array(X, ensure_min_features=2, estimator=self)
|
||
|
if self.assume_centered:
|
||
|
self.location_ = np.zeros(X.shape[1])
|
||
|
else:
|
||
|
self.location_ = X.mean(0)
|
||
|
emp_cov = empirical_covariance(
|
||
|
X, assume_centered=self.assume_centered)
|
||
|
|
||
|
cv = check_cv(self.cv, y, classifier=False)
|
||
|
|
||
|
# List of (alpha, scores, covs)
|
||
|
path = list()
|
||
|
n_alphas = self.alphas
|
||
|
inner_verbose = max(0, self.verbose - 1)
|
||
|
|
||
|
if isinstance(n_alphas, Sequence):
|
||
|
alphas = self.alphas
|
||
|
n_refinements = 1
|
||
|
else:
|
||
|
n_refinements = self.n_refinements
|
||
|
alpha_1 = alpha_max(emp_cov)
|
||
|
alpha_0 = 1e-2 * alpha_1
|
||
|
alphas = np.logspace(np.log10(alpha_0), np.log10(alpha_1),
|
||
|
n_alphas)[::-1]
|
||
|
|
||
|
t0 = time.time()
|
||
|
for i in range(n_refinements):
|
||
|
with warnings.catch_warnings():
|
||
|
# No need to see the convergence warnings on this grid:
|
||
|
# they will always be points that will not converge
|
||
|
# during the cross-validation
|
||
|
warnings.simplefilter('ignore', ConvergenceWarning)
|
||
|
# Compute the cross-validated loss on the current grid
|
||
|
|
||
|
# NOTE: Warm-restarting graph_lasso_path has been tried, and
|
||
|
# this did not allow to gain anything (same execution time with
|
||
|
# or without).
|
||
|
this_path = Parallel(
|
||
|
n_jobs=self.n_jobs,
|
||
|
verbose=self.verbose
|
||
|
)(delayed(graph_lasso_path)(X[train], alphas=alphas,
|
||
|
X_test=X[test], mode=self.mode,
|
||
|
tol=self.tol,
|
||
|
enet_tol=self.enet_tol,
|
||
|
max_iter=int(.1 * self.max_iter),
|
||
|
verbose=inner_verbose)
|
||
|
for train, test in cv.split(X, y))
|
||
|
|
||
|
# Little danse to transform the list in what we need
|
||
|
covs, _, scores = zip(*this_path)
|
||
|
covs = zip(*covs)
|
||
|
scores = zip(*scores)
|
||
|
path.extend(zip(alphas, scores, covs))
|
||
|
path = sorted(path, key=operator.itemgetter(0), reverse=True)
|
||
|
|
||
|
# Find the maximum (avoid using built in 'max' function to
|
||
|
# have a fully-reproducible selection of the smallest alpha
|
||
|
# in case of equality)
|
||
|
best_score = -np.inf
|
||
|
last_finite_idx = 0
|
||
|
for index, (alpha, scores, _) in enumerate(path):
|
||
|
this_score = np.mean(scores)
|
||
|
if this_score >= .1 / np.finfo(np.float64).eps:
|
||
|
this_score = np.nan
|
||
|
if np.isfinite(this_score):
|
||
|
last_finite_idx = index
|
||
|
if this_score >= best_score:
|
||
|
best_score = this_score
|
||
|
best_index = index
|
||
|
|
||
|
# Refine the grid
|
||
|
if best_index == 0:
|
||
|
# We do not need to go back: we have chosen
|
||
|
# the highest value of alpha for which there are
|
||
|
# non-zero coefficients
|
||
|
alpha_1 = path[0][0]
|
||
|
alpha_0 = path[1][0]
|
||
|
elif (best_index == last_finite_idx
|
||
|
and not best_index == len(path) - 1):
|
||
|
# We have non-converged models on the upper bound of the
|
||
|
# grid, we need to refine the grid there
|
||
|
alpha_1 = path[best_index][0]
|
||
|
alpha_0 = path[best_index + 1][0]
|
||
|
elif best_index == len(path) - 1:
|
||
|
alpha_1 = path[best_index][0]
|
||
|
alpha_0 = 0.01 * path[best_index][0]
|
||
|
else:
|
||
|
alpha_1 = path[best_index - 1][0]
|
||
|
alpha_0 = path[best_index + 1][0]
|
||
|
|
||
|
if not isinstance(n_alphas, Sequence):
|
||
|
alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0),
|
||
|
n_alphas + 2)
|
||
|
alphas = alphas[1:-1]
|
||
|
|
||
|
if self.verbose and n_refinements > 1:
|
||
|
print('[GraphLassoCV] Done refinement % 2i out of %i: % 3is'
|
||
|
% (i + 1, n_refinements, time.time() - t0))
|
||
|
|
||
|
path = list(zip(*path))
|
||
|
grid_scores = list(path[1])
|
||
|
alphas = list(path[0])
|
||
|
# Finally, compute the score with alpha = 0
|
||
|
alphas.append(0)
|
||
|
grid_scores.append(cross_val_score(EmpiricalCovariance(), X,
|
||
|
cv=cv, n_jobs=self.n_jobs,
|
||
|
verbose=inner_verbose))
|
||
|
self.grid_scores_ = np.array(grid_scores)
|
||
|
best_alpha = alphas[best_index]
|
||
|
self.alpha_ = best_alpha
|
||
|
self.cv_alphas_ = alphas
|
||
|
|
||
|
# Finally fit the model with the selected alpha
|
||
|
self.covariance_, self.precision_, self.n_iter_ = graph_lasso(
|
||
|
emp_cov, alpha=best_alpha, mode=self.mode, tol=self.tol,
|
||
|
enet_tol=self.enet_tol, max_iter=self.max_iter,
|
||
|
verbose=inner_verbose, return_n_iter=True)
|
||
|
return self
|