161 lines
5.5 KiB
Python
161 lines
5.5 KiB
Python
|
"""Principal Component Analysis Base Classes"""
|
||
|
|
||
|
# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>
|
||
|
# Olivier Grisel <olivier.grisel@ensta.org>
|
||
|
# Mathieu Blondel <mathieu@mblondel.org>
|
||
|
# Denis A. Engemann <denis-alexander.engemann@inria.fr>
|
||
|
# Kyle Kastner <kastnerkyle@gmail.com>
|
||
|
#
|
||
|
# License: BSD 3 clause
|
||
|
|
||
|
import numpy as np
|
||
|
from scipy import linalg
|
||
|
|
||
|
from ..base import BaseEstimator, TransformerMixin
|
||
|
from ..utils import check_array
|
||
|
from ..utils.validation import check_is_fitted
|
||
|
from ..externals import six
|
||
|
from abc import ABCMeta, abstractmethod
|
||
|
|
||
|
|
||
|
class _BasePCA(six.with_metaclass(ABCMeta, BaseEstimator, TransformerMixin)):
|
||
|
"""Base class for PCA methods.
|
||
|
|
||
|
Warning: This class should not be used directly.
|
||
|
Use derived classes instead.
|
||
|
"""
|
||
|
def get_covariance(self):
|
||
|
"""Compute data covariance with the generative model.
|
||
|
|
||
|
``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``
|
||
|
where S**2 contains the explained variances, and sigma2 contains the
|
||
|
noise variances.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
cov : array, shape=(n_features, n_features)
|
||
|
Estimated covariance of data.
|
||
|
"""
|
||
|
components_ = self.components_
|
||
|
exp_var = self.explained_variance_
|
||
|
if self.whiten:
|
||
|
components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
|
||
|
exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.)
|
||
|
cov = np.dot(components_.T * exp_var_diff, components_)
|
||
|
cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace
|
||
|
return cov
|
||
|
|
||
|
def get_precision(self):
|
||
|
"""Compute data precision matrix with the generative model.
|
||
|
|
||
|
Equals the inverse of the covariance but computed with
|
||
|
the matrix inversion lemma for efficiency.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
precision : array, shape=(n_features, n_features)
|
||
|
Estimated precision of data.
|
||
|
"""
|
||
|
n_features = self.components_.shape[1]
|
||
|
|
||
|
# handle corner cases first
|
||
|
if self.n_components_ == 0:
|
||
|
return np.eye(n_features) / self.noise_variance_
|
||
|
if self.n_components_ == n_features:
|
||
|
return linalg.inv(self.get_covariance())
|
||
|
|
||
|
# Get precision using matrix inversion lemma
|
||
|
components_ = self.components_
|
||
|
exp_var = self.explained_variance_
|
||
|
if self.whiten:
|
||
|
components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
|
||
|
exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.)
|
||
|
precision = np.dot(components_, components_.T) / self.noise_variance_
|
||
|
precision.flat[::len(precision) + 1] += 1. / exp_var_diff
|
||
|
precision = np.dot(components_.T,
|
||
|
np.dot(linalg.inv(precision), components_))
|
||
|
precision /= -(self.noise_variance_ ** 2)
|
||
|
precision.flat[::len(precision) + 1] += 1. / self.noise_variance_
|
||
|
return precision
|
||
|
|
||
|
@abstractmethod
|
||
|
def fit(X, y=None):
|
||
|
"""Placeholder for fit. Subclasses should implement this method!
|
||
|
|
||
|
Fit the model with X.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : array-like, shape (n_samples, n_features)
|
||
|
Training data, where n_samples is the number of samples and
|
||
|
n_features is the number of features.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
self : object
|
||
|
Returns the instance itself.
|
||
|
"""
|
||
|
|
||
|
def transform(self, X):
|
||
|
"""Apply dimensionality reduction to X.
|
||
|
|
||
|
X is projected on the first principal components previously extracted
|
||
|
from a training set.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : array-like, shape (n_samples, n_features)
|
||
|
New data, where n_samples is the number of samples
|
||
|
and n_features is the number of features.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
X_new : array-like, shape (n_samples, n_components)
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
|
||
|
>>> import numpy as np
|
||
|
>>> from sklearn.decomposition import IncrementalPCA
|
||
|
>>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
|
||
|
>>> ipca = IncrementalPCA(n_components=2, batch_size=3)
|
||
|
>>> ipca.fit(X)
|
||
|
IncrementalPCA(batch_size=3, copy=True, n_components=2, whiten=False)
|
||
|
>>> ipca.transform(X) # doctest: +SKIP
|
||
|
"""
|
||
|
check_is_fitted(self, ['mean_', 'components_'], all_or_any=all)
|
||
|
|
||
|
X = check_array(X)
|
||
|
if self.mean_ is not None:
|
||
|
X = X - self.mean_
|
||
|
X_transformed = np.dot(X, self.components_.T)
|
||
|
if self.whiten:
|
||
|
X_transformed /= np.sqrt(self.explained_variance_)
|
||
|
return X_transformed
|
||
|
|
||
|
def inverse_transform(self, X):
|
||
|
"""Transform data back to its original space.
|
||
|
|
||
|
In other words, return an input X_original whose transform would be X.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
X : array-like, shape (n_samples, n_components)
|
||
|
New data, where n_samples is the number of samples
|
||
|
and n_components is the number of components.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
X_original array-like, shape (n_samples, n_features)
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
If whitening is enabled, inverse_transform will compute the
|
||
|
exact inverse operation, which includes reversing whitening.
|
||
|
"""
|
||
|
if self.whiten:
|
||
|
return np.dot(X, np.sqrt(self.explained_variance_[:, np.newaxis]) *
|
||
|
self.components_) + self.mean_
|
||
|
else:
|
||
|
return np.dot(X, self.components_) + self.mean_
|