laywerrobot/lib/python3.6/site-packages/sklearn/linear_model/omp.py

871 lines
31 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
"""Orthogonal matching pursuit algorithms
"""
# Author: Vlad Niculae
#
# License: BSD 3 clause
import warnings
import numpy as np
from scipy import linalg
from scipy.linalg.lapack import get_lapack_funcs
from .base import LinearModel, _pre_fit
from ..base import RegressorMixin
from ..utils import as_float_array, check_array, check_X_y
from ..model_selection import check_cv
from ..externals.joblib import Parallel, delayed
solve_triangular_args = {'check_finite': False}
premature = """ Orthogonal matching pursuit ended prematurely due to linear
dependence in the dictionary. The requested precision might not have been met.
"""
def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True,
return_path=False):
"""Orthogonal Matching Pursuit step using the Cholesky decomposition.
Parameters
----------
X : array, shape (n_samples, n_features)
Input dictionary. Columns are assumed to have unit norm.
y : array, shape (n_samples,)
Input targets
n_nonzero_coefs : int
Targeted number of non-zero elements
tol : float
Targeted squared error, if not None overrides n_nonzero_coefs.
copy_X : bool, optional
Whether the design matrix X must be copied by the algorithm. A false
value is only helpful if X is already Fortran-ordered, otherwise a
copy is made anyway.
return_path : bool, optional. Default: False
Whether to return every value of the nonzero coefficients along the
forward path. Useful for cross-validation.
Returns
-------
gamma : array, shape (n_nonzero_coefs,)
Non-zero elements of the solution
idx : array, shape (n_nonzero_coefs,)
Indices of the positions of the elements in gamma within the solution
vector
coef : array, shape (n_features, n_nonzero_coefs)
The first k values of column k correspond to the coefficient value
for the active features at that step. The lower left triangle contains
garbage. Only returned if ``return_path=True``.
n_active : int
Number of active features at convergence.
"""
if copy_X:
X = X.copy('F')
else: # even if we are allowed to overwrite, still copy it if bad order
X = np.asfortranarray(X)
min_float = np.finfo(X.dtype).eps
nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (X,))
potrs, = get_lapack_funcs(('potrs',), (X,))
alpha = np.dot(X.T, y)
residual = y
gamma = np.empty(0)
n_active = 0
indices = np.arange(X.shape[1]) # keeping track of swapping
max_features = X.shape[1] if tol is not None else n_nonzero_coefs
if solve_triangular_args:
# new scipy, don't need to initialize because check_finite=False
L = np.empty((max_features, max_features), dtype=X.dtype)
else:
# old scipy, we need the garbage upper triangle to be non-Inf
L = np.zeros((max_features, max_features), dtype=X.dtype)
L[0, 0] = 1.
if return_path:
coefs = np.empty_like(L)
while True:
lam = np.argmax(np.abs(np.dot(X.T, residual)))
if lam < n_active or alpha[lam] ** 2 < min_float:
# atom already selected or inner product too small
warnings.warn(premature, RuntimeWarning, stacklevel=2)
break
if n_active > 0:
# Updates the Cholesky decomposition of X' X
L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam])
linalg.solve_triangular(L[:n_active, :n_active],
L[n_active, :n_active],
trans=0, lower=1,
overwrite_b=True,
**solve_triangular_args)
v = nrm2(L[n_active, :n_active]) ** 2
if 1 - v <= min_float: # selected atoms are dependent
warnings.warn(premature, RuntimeWarning, stacklevel=2)
break
L[n_active, n_active] = np.sqrt(1 - v)
X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam])
alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active]
indices[n_active], indices[lam] = indices[lam], indices[n_active]
n_active += 1
# solves LL'x = y as a composition of two triangular systems
gamma, _ = potrs(L[:n_active, :n_active], alpha[:n_active], lower=True,
overwrite_b=False)
if return_path:
coefs[:n_active, n_active - 1] = gamma
residual = y - np.dot(X[:, :n_active], gamma)
if tol is not None and nrm2(residual) ** 2 <= tol:
break
elif n_active == max_features:
break
if return_path:
return gamma, indices[:n_active], coefs[:, :n_active], n_active
else:
return gamma, indices[:n_active], n_active
def _gram_omp(Gram, Xy, n_nonzero_coefs, tol_0=None, tol=None,
copy_Gram=True, copy_Xy=True, return_path=False):
"""Orthogonal Matching Pursuit step on a precomputed Gram matrix.
This function uses the Cholesky decomposition method.
Parameters
----------
Gram : array, shape (n_features, n_features)
Gram matrix of the input data matrix
Xy : array, shape (n_features,)
Input targets
n_nonzero_coefs : int
Targeted number of non-zero elements
tol_0 : float
Squared norm of y, required if tol is not None.
tol : float
Targeted squared error, if not None overrides n_nonzero_coefs.
copy_Gram : bool, optional
Whether the gram matrix must be copied by the algorithm. A false
value is only helpful if it is already Fortran-ordered, otherwise a
copy is made anyway.
copy_Xy : bool, optional
Whether the covariance vector Xy must be copied by the algorithm.
If False, it may be overwritten.
return_path : bool, optional. Default: False
Whether to return every value of the nonzero coefficients along the
forward path. Useful for cross-validation.
Returns
-------
gamma : array, shape (n_nonzero_coefs,)
Non-zero elements of the solution
idx : array, shape (n_nonzero_coefs,)
Indices of the positions of the elements in gamma within the solution
vector
coefs : array, shape (n_features, n_nonzero_coefs)
The first k values of column k correspond to the coefficient value
for the active features at that step. The lower left triangle contains
garbage. Only returned if ``return_path=True``.
n_active : int
Number of active features at convergence.
"""
Gram = Gram.copy('F') if copy_Gram else np.asfortranarray(Gram)
if copy_Xy:
Xy = Xy.copy()
min_float = np.finfo(Gram.dtype).eps
nrm2, swap = linalg.get_blas_funcs(('nrm2', 'swap'), (Gram,))
potrs, = get_lapack_funcs(('potrs',), (Gram,))
indices = np.arange(len(Gram)) # keeping track of swapping
alpha = Xy
tol_curr = tol_0
delta = 0
gamma = np.empty(0)
n_active = 0
max_features = len(Gram) if tol is not None else n_nonzero_coefs
if solve_triangular_args:
# new scipy, don't need to initialize because check_finite=False
L = np.empty((max_features, max_features), dtype=Gram.dtype)
else:
# old scipy, we need the garbage upper triangle to be non-Inf
L = np.zeros((max_features, max_features), dtype=Gram.dtype)
L[0, 0] = 1.
if return_path:
coefs = np.empty_like(L)
while True:
lam = np.argmax(np.abs(alpha))
if lam < n_active or alpha[lam] ** 2 < min_float:
# selected same atom twice, or inner product too small
warnings.warn(premature, RuntimeWarning, stacklevel=3)
break
if n_active > 0:
L[n_active, :n_active] = Gram[lam, :n_active]
linalg.solve_triangular(L[:n_active, :n_active],
L[n_active, :n_active],
trans=0, lower=1,
overwrite_b=True,
**solve_triangular_args)
v = nrm2(L[n_active, :n_active]) ** 2
if 1 - v <= min_float: # selected atoms are dependent
warnings.warn(premature, RuntimeWarning, stacklevel=3)
break
L[n_active, n_active] = np.sqrt(1 - v)
Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam])
Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam])
indices[n_active], indices[lam] = indices[lam], indices[n_active]
Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active]
n_active += 1
# solves LL'x = y as a composition of two triangular systems
gamma, _ = potrs(L[:n_active, :n_active], Xy[:n_active], lower=True,
overwrite_b=False)
if return_path:
coefs[:n_active, n_active - 1] = gamma
beta = np.dot(Gram[:, :n_active], gamma)
alpha = Xy - beta
if tol is not None:
tol_curr += delta
delta = np.inner(gamma, beta[:n_active])
tol_curr -= delta
if abs(tol_curr) <= tol:
break
elif n_active == max_features:
break
if return_path:
return gamma, indices[:n_active], coefs[:, :n_active], n_active
else:
return gamma, indices[:n_active], n_active
def orthogonal_mp(X, y, n_nonzero_coefs=None, tol=None, precompute=False,
copy_X=True, return_path=False,
return_n_iter=False):
"""Orthogonal Matching Pursuit (OMP)
Solves n_targets Orthogonal Matching Pursuit problems.
An instance of the problem has the form:
When parametrized by the number of non-zero coefficients using
`n_nonzero_coefs`:
argmin ||y - X\gamma||^2 subject to ||\gamma||_0 <= n_{nonzero coefs}
When parametrized by error using the parameter `tol`:
argmin ||\gamma||_0 subject to ||y - X\gamma||^2 <= tol
Read more in the :ref:`User Guide <omp>`.
Parameters
----------
X : array, shape (n_samples, n_features)
Input data. Columns are assumed to have unit norm.
y : array, shape (n_samples,) or (n_samples, n_targets)
Input targets
n_nonzero_coefs : int
Desired number of non-zero entries in the solution. If None (by
default) this value is set to 10% of n_features.
tol : float
Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
precompute : {True, False, 'auto'},
Whether to perform precomputations. Improves performance when n_targets
or n_samples is very large.
copy_X : bool, optional
Whether the design matrix X must be copied by the algorithm. A false
value is only helpful if X is already Fortran-ordered, otherwise a
copy is made anyway.
return_path : bool, optional. Default: False
Whether to return every value of the nonzero coefficients along the
forward path. Useful for cross-validation.
return_n_iter : bool, optional default False
Whether or not to return the number of iterations.
Returns
-------
coef : array, shape (n_features,) or (n_features, n_targets)
Coefficients of the OMP solution. If `return_path=True`, this contains
the whole coefficient path. In this case its shape is
(n_features, n_features) or (n_features, n_targets, n_features) and
iterating over the last axis yields coefficients in increasing order
of active features.
n_iters : array-like or int
Number of active features across every target. Returned only if
`return_n_iter` is set to True.
See also
--------
OrthogonalMatchingPursuit
orthogonal_mp_gram
lars_path
decomposition.sparse_encode
Notes
-----
Orthogonal matching pursuit was introduced in S. Mallat, Z. Zhang,
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
Matching Pursuit Technical Report - CS Technion, April 2008.
http://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
"""
X = check_array(X, order='F', copy=copy_X)
copy_X = False
if y.ndim == 1:
y = y.reshape(-1, 1)
y = check_array(y)
if y.shape[1] > 1: # subsequent targets will be affected
copy_X = True
if n_nonzero_coefs is None and tol is None:
# default for n_nonzero_coefs is 0.1 * n_features
# but at least one.
n_nonzero_coefs = max(int(0.1 * X.shape[1]), 1)
if tol is not None and tol < 0:
raise ValueError("Epsilon cannot be negative")
if tol is None and n_nonzero_coefs <= 0:
raise ValueError("The number of atoms must be positive")
if tol is None and n_nonzero_coefs > X.shape[1]:
raise ValueError("The number of atoms cannot be more than the number "
"of features")
if precompute == 'auto':
precompute = X.shape[0] > X.shape[1]
if precompute:
G = np.dot(X.T, X)
G = np.asfortranarray(G)
Xy = np.dot(X.T, y)
if tol is not None:
norms_squared = np.sum((y ** 2), axis=0)
else:
norms_squared = None
return orthogonal_mp_gram(G, Xy, n_nonzero_coefs, tol, norms_squared,
copy_Gram=copy_X, copy_Xy=False,
return_path=return_path)
if return_path:
coef = np.zeros((X.shape[1], y.shape[1], X.shape[1]))
else:
coef = np.zeros((X.shape[1], y.shape[1]))
n_iters = []
for k in range(y.shape[1]):
out = _cholesky_omp(
X, y[:, k], n_nonzero_coefs, tol,
copy_X=copy_X, return_path=return_path)
if return_path:
_, idx, coefs, n_iter = out
coef = coef[:, :, :len(idx)]
for n_active, x in enumerate(coefs.T):
coef[idx[:n_active + 1], k, n_active] = x[:n_active + 1]
else:
x, idx, n_iter = out
coef[idx, k] = x
n_iters.append(n_iter)
if y.shape[1] == 1:
n_iters = n_iters[0]
if return_n_iter:
return np.squeeze(coef), n_iters
else:
return np.squeeze(coef)
def orthogonal_mp_gram(Gram, Xy, n_nonzero_coefs=None, tol=None,
norms_squared=None, copy_Gram=True,
copy_Xy=True, return_path=False,
return_n_iter=False):
"""Gram Orthogonal Matching Pursuit (OMP)
Solves n_targets Orthogonal Matching Pursuit problems using only
the Gram matrix X.T * X and the product X.T * y.
Read more in the :ref:`User Guide <omp>`.
Parameters
----------
Gram : array, shape (n_features, n_features)
Gram matrix of the input data: X.T * X
Xy : array, shape (n_features,) or (n_features, n_targets)
Input targets multiplied by X: X.T * y
n_nonzero_coefs : int
Desired number of non-zero entries in the solution. If None (by
default) this value is set to 10% of n_features.
tol : float
Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
norms_squared : array-like, shape (n_targets,)
Squared L2 norms of the lines of y. Required if tol is not None.
copy_Gram : bool, optional
Whether the gram matrix must be copied by the algorithm. A false
value is only helpful if it is already Fortran-ordered, otherwise a
copy is made anyway.
copy_Xy : bool, optional
Whether the covariance vector Xy must be copied by the algorithm.
If False, it may be overwritten.
return_path : bool, optional. Default: False
Whether to return every value of the nonzero coefficients along the
forward path. Useful for cross-validation.
return_n_iter : bool, optional default False
Whether or not to return the number of iterations.
Returns
-------
coef : array, shape (n_features,) or (n_features, n_targets)
Coefficients of the OMP solution. If `return_path=True`, this contains
the whole coefficient path. In this case its shape is
(n_features, n_features) or (n_features, n_targets, n_features) and
iterating over the last axis yields coefficients in increasing order
of active features.
n_iters : array-like or int
Number of active features across every target. Returned only if
`return_n_iter` is set to True.
See also
--------
OrthogonalMatchingPursuit
orthogonal_mp
lars_path
decomposition.sparse_encode
Notes
-----
Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
Matching Pursuit Technical Report - CS Technion, April 2008.
http://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
"""
Gram = check_array(Gram, order='F', copy=copy_Gram)
Xy = np.asarray(Xy)
if Xy.ndim > 1 and Xy.shape[1] > 1:
# or subsequent target will be affected
copy_Gram = True
if Xy.ndim == 1:
Xy = Xy[:, np.newaxis]
if tol is not None:
norms_squared = [norms_squared]
if n_nonzero_coefs is None and tol is None:
n_nonzero_coefs = int(0.1 * len(Gram))
if tol is not None and norms_squared is None:
raise ValueError('Gram OMP needs the precomputed norms in order '
'to evaluate the error sum of squares.')
if tol is not None and tol < 0:
raise ValueError("Epsilon cannot be negative")
if tol is None and n_nonzero_coefs <= 0:
raise ValueError("The number of atoms must be positive")
if tol is None and n_nonzero_coefs > len(Gram):
raise ValueError("The number of atoms cannot be more than the number "
"of features")
if return_path:
coef = np.zeros((len(Gram), Xy.shape[1], len(Gram)))
else:
coef = np.zeros((len(Gram), Xy.shape[1]))
n_iters = []
for k in range(Xy.shape[1]):
out = _gram_omp(
Gram, Xy[:, k], n_nonzero_coefs,
norms_squared[k] if tol is not None else None, tol,
copy_Gram=copy_Gram, copy_Xy=copy_Xy,
return_path=return_path)
if return_path:
_, idx, coefs, n_iter = out
coef = coef[:, :, :len(idx)]
for n_active, x in enumerate(coefs.T):
coef[idx[:n_active + 1], k, n_active] = x[:n_active + 1]
else:
x, idx, n_iter = out
coef[idx, k] = x
n_iters.append(n_iter)
if Xy.shape[1] == 1:
n_iters = n_iters[0]
if return_n_iter:
return np.squeeze(coef), n_iters
else:
return np.squeeze(coef)
class OrthogonalMatchingPursuit(LinearModel, RegressorMixin):
"""Orthogonal Matching Pursuit model (OMP)
Read more in the :ref:`User Guide <omp>`.
Parameters
----------
n_nonzero_coefs : int, optional
Desired number of non-zero entries in the solution. If None (by
default) this value is set to 10% of n_features.
tol : float, optional
Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
fit_intercept : boolean, optional
whether to calculate the intercept for this model. If set
to false, no intercept will be used in calculations
(e.g. data is expected to be already centered).
normalize : boolean, optional, default True
This parameter is ignored when ``fit_intercept`` is set to False.
If True, the regressors X will be normalized before regression by
subtracting the mean and dividing by the l2-norm.
If you wish to standardize, please use
:class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
on an estimator with ``normalize=False``.
precompute : {True, False, 'auto'}, default 'auto'
Whether to use a precomputed Gram and Xy matrix to speed up
calculations. Improves performance when `n_targets` or `n_samples` is
very large. Note that if you already have such matrices, you can pass
them directly to the fit method.
Attributes
----------
coef_ : array, shape (n_features,) or (n_targets, n_features)
parameter vector (w in the formula)
intercept_ : float or array, shape (n_targets,)
independent term in decision function.
n_iter_ : int or array-like
Number of active features across every target.
Notes
-----
Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
(http://blanche.polytechnique.fr/~mallat/papiers/MallatPursuit93.pdf)
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
Matching Pursuit Technical Report - CS Technion, April 2008.
http://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
See also
--------
orthogonal_mp
orthogonal_mp_gram
lars_path
Lars
LassoLars
decomposition.sparse_encode
"""
def __init__(self, n_nonzero_coefs=None, tol=None, fit_intercept=True,
normalize=True, precompute='auto'):
self.n_nonzero_coefs = n_nonzero_coefs
self.tol = tol
self.fit_intercept = fit_intercept
self.normalize = normalize
self.precompute = precompute
def fit(self, X, y):
"""Fit the model using X, y as training data.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training data.
y : array-like, shape (n_samples,) or (n_samples, n_targets)
Target values. Will be cast to X's dtype if necessary
Returns
-------
self : object
returns an instance of self.
"""
X, y = check_X_y(X, y, multi_output=True, y_numeric=True)
n_features = X.shape[1]
X, y, X_offset, y_offset, X_scale, Gram, Xy = \
_pre_fit(X, y, None, self.precompute, self.normalize,
self.fit_intercept, copy=True)
if y.ndim == 1:
y = y[:, np.newaxis]
if self.n_nonzero_coefs is None and self.tol is None:
# default for n_nonzero_coefs is 0.1 * n_features
# but at least one.
self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)
else:
self.n_nonzero_coefs_ = self.n_nonzero_coefs
if Gram is False:
coef_, self.n_iter_ = orthogonal_mp(
X, y, self.n_nonzero_coefs_, self.tol,
precompute=False, copy_X=True,
return_n_iter=True)
else:
norms_sq = np.sum(y ** 2, axis=0) if self.tol is not None else None
coef_, self.n_iter_ = orthogonal_mp_gram(
Gram, Xy=Xy, n_nonzero_coefs=self.n_nonzero_coefs_,
tol=self.tol, norms_squared=norms_sq,
copy_Gram=True, copy_Xy=True,
return_n_iter=True)
self.coef_ = coef_.T
self._set_intercept(X_offset, y_offset, X_scale)
return self
def _omp_path_residues(X_train, y_train, X_test, y_test, copy=True,
fit_intercept=True, normalize=True, max_iter=100):
"""Compute the residues on left-out data for a full LARS path
Parameters
-----------
X_train : array, shape (n_samples, n_features)
The data to fit the LARS on
y_train : array, shape (n_samples)
The target variable to fit LARS on
X_test : array, shape (n_samples, n_features)
The data to compute the residues on
y_test : array, shape (n_samples)
The target variable to compute the residues on
copy : boolean, optional
Whether X_train, X_test, y_train and y_test should be copied. If
False, they may be overwritten.
fit_intercept : boolean
whether to calculate the intercept for this model. If set
to false, no intercept will be used in calculations
(e.g. data is expected to be already centered).
normalize : boolean, optional, default True
This parameter is ignored when ``fit_intercept`` is set to False.
If True, the regressors X will be normalized before regression by
subtracting the mean and dividing by the l2-norm.
If you wish to standardize, please use
:class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
on an estimator with ``normalize=False``.
max_iter : integer, optional
Maximum numbers of iterations to perform, therefore maximum features
to include. 100 by default.
Returns
-------
residues : array, shape (n_samples, max_features)
Residues of the prediction on the test data
"""
if copy:
X_train = X_train.copy()
y_train = y_train.copy()
X_test = X_test.copy()
y_test = y_test.copy()
if fit_intercept:
X_mean = X_train.mean(axis=0)
X_train -= X_mean
X_test -= X_mean
y_mean = y_train.mean(axis=0)
y_train = as_float_array(y_train, copy=False)
y_train -= y_mean
y_test = as_float_array(y_test, copy=False)
y_test -= y_mean
if normalize:
norms = np.sqrt(np.sum(X_train ** 2, axis=0))
nonzeros = np.flatnonzero(norms)
X_train[:, nonzeros] /= norms[nonzeros]
coefs = orthogonal_mp(X_train, y_train, n_nonzero_coefs=max_iter, tol=None,
precompute=False, copy_X=False,
return_path=True)
if coefs.ndim == 1:
coefs = coefs[:, np.newaxis]
if normalize:
coefs[nonzeros] /= norms[nonzeros][:, np.newaxis]
return np.dot(coefs.T, X_test.T) - y_test
class OrthogonalMatchingPursuitCV(LinearModel, RegressorMixin):
"""Cross-validated Orthogonal Matching Pursuit model (OMP)
Read more in the :ref:`User Guide <omp>`.
Parameters
----------
copy : bool, optional
Whether the design matrix X must be copied by the algorithm. A false
value is only helpful if X is already Fortran-ordered, otherwise a
copy is made anyway.
fit_intercept : boolean, optional
whether to calculate the intercept for this model. If set
to false, no intercept will be used in calculations
(e.g. data is expected to be already centered).
normalize : boolean, optional, default True
This parameter is ignored when ``fit_intercept`` is set to False.
If True, the regressors X will be normalized before regression by
subtracting the mean and dividing by the l2-norm.
If you wish to standardize, please use
:class:`sklearn.preprocessing.StandardScaler` before calling ``fit``
on an estimator with ``normalize=False``.
max_iter : integer, optional
Maximum numbers of iterations to perform, therefore maximum features
to include. 10% of ``n_features`` but at least 5 if available.
cv : int, cross-validation generator or an iterable, optional
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 3-fold cross-validation,
- integer, to specify the number of folds.
- An object to be used as a cross-validation generator.
- An iterable yielding train/test splits.
For integer/None inputs, :class:`KFold` is used.
Refer :ref:`User Guide <cross_validation>` for the various
cross-validation strategies that can be used here.
n_jobs : integer, optional
Number of CPUs to use during the cross validation. If ``-1``, use
all the CPUs
verbose : boolean or integer, optional
Sets the verbosity amount
Attributes
----------
intercept_ : float or array, shape (n_targets,)
Independent term in decision function.
coef_ : array, shape (n_features,) or (n_targets, n_features)
Parameter vector (w in the problem formulation).
n_nonzero_coefs_ : int
Estimated number of non-zero coefficients giving the best mean squared
error over the cross-validation folds.
n_iter_ : int or array-like
Number of active features across every target for the model refit with
the best hyperparameters got by cross-validating across all folds.
See also
--------
orthogonal_mp
orthogonal_mp_gram
lars_path
Lars
LassoLars
OrthogonalMatchingPursuit
LarsCV
LassoLarsCV
decomposition.sparse_encode
"""
def __init__(self, copy=True, fit_intercept=True, normalize=True,
max_iter=None, cv=None, n_jobs=1, verbose=False):
self.copy = copy
self.fit_intercept = fit_intercept
self.normalize = normalize
self.max_iter = max_iter
self.cv = cv
self.n_jobs = n_jobs
self.verbose = verbose
def fit(self, X, y):
"""Fit the model using X, y as training data.
Parameters
----------
X : array-like, shape [n_samples, n_features]
Training data.
y : array-like, shape [n_samples]
Target values. Will be cast to X's dtype if necessary
Returns
-------
self : object
returns an instance of self.
"""
X, y = check_X_y(X, y, y_numeric=True, ensure_min_features=2,
estimator=self)
X = as_float_array(X, copy=False, force_all_finite=False)
cv = check_cv(self.cv, classifier=False)
max_iter = (min(max(int(0.1 * X.shape[1]), 5), X.shape[1])
if not self.max_iter
else self.max_iter)
cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
delayed(_omp_path_residues)(
X[train], y[train], X[test], y[test], self.copy,
self.fit_intercept, self.normalize, max_iter)
for train, test in cv.split(X))
min_early_stop = min(fold.shape[0] for fold in cv_paths)
mse_folds = np.array([(fold[:min_early_stop] ** 2).mean(axis=1)
for fold in cv_paths])
best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1
self.n_nonzero_coefs_ = best_n_nonzero_coefs
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=best_n_nonzero_coefs,
fit_intercept=self.fit_intercept,
normalize=self.normalize)
omp.fit(X, y)
self.coef_ = omp.coef_
self.intercept_ = omp.intercept_
self.n_iter_ = omp.n_iter_
return self