alpcentaur
/
basabuuka_prototyp


								"""Base classes for all estimators."""


								# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>

								# License: BSD 3 clause


								import copy

								import warnings

								from collections import defaultdict


								import numpy as np

								from scipy import sparse

								from .externals import six

								from .utils.fixes import signature

								from . import __version__


								##############################################################################

								def _first_and_last_element(arr):

								    """Returns first and last element of numpy array or sparse matrix."""

								    if isinstance(arr, np.ndarray) or hasattr(arr, 'data'):

								        # numpy array or sparse matrix with .data attribute

								        data = arr.data if sparse.issparse(arr) else arr

								        return data.flat[0], data.flat[-1]

								    else:

								        # Sparse matrices without .data attribute. Only dok_matrix at

								        # the time of writing, in this case indexing is fast

								        return arr[0, 0], arr[-1, -1]


								def clone(estimator, safe=True):

								    """Constructs a new estimator with the same parameters.


								    Clone does a deep copy of the model in an estimator

								    without actually copying attached data. It yields a new estimator

								    with the same parameters that has not been fit on any data.


								    Parameters

								    ----------

								    estimator : estimator object, or list, tuple or set of objects

								        The estimator or group of estimators to be cloned


								    safe : boolean, optional

								        If safe is false, clone will fall back to a deep copy on objects

								        that are not estimators.


								    """

								    estimator_type = type(estimator)

								    # XXX: not handling dictionaries

								    if estimator_type in (list, tuple, set, frozenset):

								        return estimator_type([clone(e, safe=safe) for e in estimator])

								    elif not hasattr(estimator, 'get_params'):

								        if not safe:

								            return copy.deepcopy(estimator)

								        else:

								            raise TypeError("Cannot clone object '%s' (type %s): "

								                            "it does not seem to be a scikit-learn estimator "

								                            "as it does not implement a 'get_params' methods."

								                            % (repr(estimator), type(estimator)))

								    klass = estimator.__class__

								    new_object_params = estimator.get_params(deep=False)

								    for name, param in six.iteritems(new_object_params):

								        new_object_params[name] = clone(param, safe=False)

								    new_object = klass(**new_object_params)

								    params_set = new_object.get_params(deep=False)


								    # quick sanity check of the parameters of the clone

								    for name in new_object_params:

								        param1 = new_object_params[name]

								        param2 = params_set[name]

								        if param1 is not param2:

								            raise RuntimeError('Cannot clone object %s, as the constructor '

								                               'either does not set or modifies parameter %s' %

								                               (estimator, name))

								    return new_object


								###############################################################################

								def _pprint(params, offset=0, printer=repr):

								    """Pretty print the dictionary 'params'


								    Parameters

								    ----------

								    params : dict

								        The dictionary to pretty print


								    offset : int

								        The offset in characters to add at the begin of each line.


								    printer : callable

								        The function to convert entries to strings, typically

								        the builtin str or repr


								    """

								    # Do a multi-line justified repr:

								    options = np.get_printoptions()

								    np.set_printoptions(precision=5, threshold=64, edgeitems=2)

								    params_list = list()

								    this_line_length = offset

								    line_sep = ',\n' + (1 + offset // 2) * ' '

								    for i, (k, v) in enumerate(sorted(six.iteritems(params))):

								        if type(v) is float:

								            # use str for representing floating point numbers

								            # this way we get consistent representation across

								            # architectures and versions.

								            this_repr = '%s=%s' % (k, str(v))

								        else:

								            # use repr of the rest

								            this_repr = '%s=%s' % (k, printer(v))

								        if len(this_repr) > 500:

								            this_repr = this_repr[:300] + '...' + this_repr[-100:]

								        if i > 0:

								            if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr):

								                params_list.append(line_sep)

								                this_line_length = len(line_sep)

								            else:

								                params_list.append(', ')

								                this_line_length += 2

								        params_list.append(this_repr)

								        this_line_length += len(this_repr)


								    np.set_printoptions(**options)

								    lines = ''.join(params_list)

								    # Strip trailing space to avoid nightmare in doctests

								    lines = '\n'.join(l.rstrip(' ') for l in lines.split('\n'))

								    return lines


								###############################################################################

								class BaseEstimator(object):

								    """Base class for all estimators in scikit-learn


								    Notes

								    -----

								    All estimators should specify all the parameters that can be set

								    at the class level in their ``__init__`` as explicit keyword

								    arguments (no ``*args`` or ``**kwargs``).

								    """


								    @classmethod

								    def _get_param_names(cls):

								        """Get parameter names for the estimator"""

								        # fetch the constructor or the original constructor before

								        # deprecation wrapping if any

								        init = getattr(cls.__init__, 'deprecated_original', cls.__init__)

								        if init is object.__init__:

								            # No explicit constructor to introspect

								            return []


								        # introspect the constructor arguments to find the model parameters

								        # to represent

								        init_signature = signature(init)

								        # Consider the constructor parameters excluding 'self'

								        parameters = [p for p in init_signature.parameters.values()

								                      if p.name != 'self' and p.kind != p.VAR_KEYWORD]

								        for p in parameters:

								            if p.kind == p.VAR_POSITIONAL:

								                raise RuntimeError("scikit-learn estimators should always "

								                                   "specify their parameters in the signature"

								                                   " of their __init__ (no varargs)."

								                                   " %s with constructor %s doesn't "

								                                   " follow this convention."

								                                   % (cls, init_signature))

								        # Extract and sort argument names excluding 'self'

								        return sorted([p.name for p in parameters])


								    def get_params(self, deep=True):

								        """Get parameters for this estimator.


								        Parameters

								        ----------

								        deep : boolean, optional

								            If True, will return the parameters for this estimator and

								            contained subobjects that are estimators.


								        Returns

								        -------

								        params : mapping of string to any

								            Parameter names mapped to their values.

								        """

								        out = dict()

								        for key in self._get_param_names():

								            value = getattr(self, key, None)

								            if deep and hasattr(value, 'get_params'):

								                deep_items = value.get_params().items()

								                out.update((key + '__' + k, val) for k, val in deep_items)

								            out[key] = value

								        return out


								    def set_params(self, **params):

								        """Set the parameters of this estimator.


								        The method works on simple estimators as well as on nested objects

								        (such as pipelines). The latter have parameters of the form

								        ``<component>__<parameter>`` so that it's possible to update each

								        component of a nested object.


								        Returns

								        -------

								        self

								        """

								        if not params:

								            # Simple optimization to gain speed (inspect is slow)

								            return self

								        valid_params = self.get_params(deep=True)


								        nested_params = defaultdict(dict)  # grouped by prefix

								        for key, value in params.items():

								            key, delim, sub_key = key.partition('__')

								            if key not in valid_params:

								                raise ValueError('Invalid parameter %s for estimator %s. '

								                                 'Check the list of available parameters '

								                                 'with `estimator.get_params().keys()`.' %

								                                 (key, self))


								            if delim:

								                nested_params[key][sub_key] = value

								            else:

								                setattr(self, key, value)

								                valid_params[key] = value


								        for key, sub_params in nested_params.items():

								            valid_params[key].set_params(**sub_params)


								        return self


								    def __repr__(self):

								        class_name = self.__class__.__name__

								        return '%s(%s)' % (class_name, _pprint(self.get_params(deep=False),

								                                               offset=len(class_name),),)


								    def __getstate__(self):

								        try:

								            state = super(BaseEstimator, self).__getstate__()

								        except AttributeError:

								            state = self.__dict__.copy()


								        if type(self).__module__.startswith('sklearn.'):

								            return dict(state.items(), _sklearn_version=__version__)

								        else:

								            return state


								    def __setstate__(self, state):

								        if type(self).__module__.startswith('sklearn.'):

								            pickle_version = state.pop("_sklearn_version", "pre-0.18")

								            if pickle_version != __version__:

								                warnings.warn(

								                    "Trying to unpickle estimator {0} from version {1} when "

								                    "using version {2}. This might lead to breaking code or "

								                    "invalid results. Use at your own risk.".format(

								                        self.__class__.__name__, pickle_version, __version__),

								                    UserWarning)

								        try:

								            super(BaseEstimator, self).__setstate__(state)

								        except AttributeError:

								            self.__dict__.update(state)


								###############################################################################

								class ClassifierMixin(object):

								    """Mixin class for all classifiers in scikit-learn."""

								    _estimator_type = "classifier"


								    def score(self, X, y, sample_weight=None):

								        """Returns the mean accuracy on the given test data and labels.


								        In multi-label classification, this is the subset accuracy

								        which is a harsh metric since you require for each sample that

								        each label set be correctly predicted.


								        Parameters

								        ----------

								        X : array-like, shape = (n_samples, n_features)

								            Test samples.


								        y : array-like, shape = (n_samples) or (n_samples, n_outputs)

								            True labels for X.


								        sample_weight : array-like, shape = [n_samples], optional

								            Sample weights.


								        Returns

								        -------

								        score : float

								            Mean accuracy of self.predict(X) wrt. y.


								        """

								        from .metrics import accuracy_score

								        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)


								###############################################################################

								class RegressorMixin(object):

								    """Mixin class for all regression estimators in scikit-learn."""

								    _estimator_type = "regressor"


								    def score(self, X, y, sample_weight=None):

								        """Returns the coefficient of determination R^2 of the prediction.


								        The coefficient R^2 is defined as (1 - u/v), where u is the residual

								        sum of squares ((y_true - y_pred) ** 2).sum() and v is the total

								        sum of squares ((y_true - y_true.mean()) ** 2).sum().

								        The best possible score is 1.0 and it can be negative (because the

								        model can be arbitrarily worse). A constant model that always

								        predicts the expected value of y, disregarding the input features,

								        would get a R^2 score of 0.0.


								        Parameters

								        ----------

								        X : array-like, shape = (n_samples, n_features)

								            Test samples. For some estimators this may be a

								            precomputed kernel matrix instead, shape = (n_samples,

								            n_samples_fitted], where n_samples_fitted is the number of

								            samples used in the fitting for the estimator.


								        y : array-like, shape = (n_samples) or (n_samples, n_outputs)

								            True values for X.


								        sample_weight : array-like, shape = [n_samples], optional

								            Sample weights.


								        Returns

								        -------

								        score : float

								            R^2 of self.predict(X) wrt. y.

								        """


								        from .metrics import r2_score

								        return r2_score(y, self.predict(X), sample_weight=sample_weight,

								                        multioutput='variance_weighted')


								###############################################################################

								class ClusterMixin(object):

								    """Mixin class for all cluster estimators in scikit-learn."""

								    _estimator_type = "clusterer"


								    def fit_predict(self, X, y=None):

								        """Performs clustering on X and returns cluster labels.


								        Parameters

								        ----------

								        X : ndarray, shape (n_samples, n_features)

								            Input data.


								        y : Ignored

								            not used, present for API consistency by convention.


								        Returns

								        -------

								        labels : ndarray, shape (n_samples,)

								            cluster labels

								        """

								        # non-optimized default implementation; override when a better

								        # method is possible for a given clustering algorithm

								        self.fit(X)

								        return self.labels_


								class BiclusterMixin(object):

								    """Mixin class for all bicluster estimators in scikit-learn"""


								    @property

								    def biclusters_(self):

								        """Convenient way to get row and column indicators together.


								        Returns the ``rows_`` and ``columns_`` members.

								        """

								        return self.rows_, self.columns_


								    def get_indices(self, i):

								        """Row and column indices of the i'th bicluster.


								        Only works if ``rows_`` and ``columns_`` attributes exist.


								        Parameters

								        ----------

								        i : int

								            The index of the cluster.


								        Returns

								        -------

								        row_ind : np.array, dtype=np.intp

								            Indices of rows in the dataset that belong to the bicluster.

								        col_ind : np.array, dtype=np.intp

								            Indices of columns in the dataset that belong to the bicluster.


								        """

								        rows = self.rows_[i]

								        columns = self.columns_[i]

								        return np.nonzero(rows)[0], np.nonzero(columns)[0]


								    def get_shape(self, i):

								        """Shape of the i'th bicluster.


								        Parameters

								        ----------

								        i : int

								            The index of the cluster.


								        Returns

								        -------

								        shape : (int, int)

								            Number of rows and columns (resp.) in the bicluster.

								        """

								        indices = self.get_indices(i)

								        return tuple(len(i) for i in indices)


								    def get_submatrix(self, i, data):

								        """Returns the submatrix corresponding to bicluster `i`.


								        Parameters

								        ----------

								        i : int

								            The index of the cluster.

								        data : array

								            The data.


								        Returns

								        -------

								        submatrix : array

								            The submatrix corresponding to bicluster i.


								        Notes

								        -----

								        Works with sparse matrices. Only works if ``rows_`` and

								        ``columns_`` attributes exist.

								        """

								        from .utils.validation import check_array

								        data = check_array(data, accept_sparse='csr')

								        row_ind, col_ind = self.get_indices(i)

								        return data[row_ind[:, np.newaxis], col_ind]


								###############################################################################

								class TransformerMixin(object):

								    """Mixin class for all transformers in scikit-learn."""


								    def fit_transform(self, X, y=None, **fit_params):

								        """Fit to data, then transform it.


								        Fits transformer to X and y with optional parameters fit_params

								        and returns a transformed version of X.


								        Parameters

								        ----------

								        X : numpy array of shape [n_samples, n_features]

								            Training set.


								        y : numpy array of shape [n_samples]

								            Target values.


								        Returns

								        -------

								        X_new : numpy array of shape [n_samples, n_features_new]

								            Transformed array.


								        """

								        # non-optimized default implementation; override when a better

								        # method is possible for a given clustering algorithm

								        if y is None:

								            # fit method of arity 1 (unsupervised transformation)

								            return self.fit(X, **fit_params).transform(X)

								        else:

								            # fit method of arity 2 (supervised transformation)

								            return self.fit(X, y, **fit_params).transform(X)


								class DensityMixin(object):

								    """Mixin class for all density estimators in scikit-learn."""

								    _estimator_type = "DensityEstimator"


								    def score(self, X, y=None):

								        """Returns the score of the model on the data X


								        Parameters

								        ----------

								        X : array-like, shape = (n_samples, n_features)


								        Returns

								        -------

								        score : float

								        """

								        pass


								class OutlierMixin(object):

								    """Mixin class for all outlier detection estimators in scikit-learn."""

								    _estimator_type = "outlier_detector"


								    def fit_predict(self, X, y=None):

								        """Performs outlier detection on X.


								        Returns -1 for outliers and 1 for inliers.


								        Parameters

								        ----------

								        X : ndarray, shape (n_samples, n_features)

								            Input data.


								        y : Ignored

								            not used, present for API consistency by convention.


								        Returns

								        -------

								        y : ndarray, shape (n_samples,)

								            1 for inliers, -1 for outliers.

								        """

								        # override for transductive outlier detectors like LocalOulierFactor

								        return self.fit(X).predict(X)


								###############################################################################

								class MetaEstimatorMixin(object):

								    """Mixin class for all meta estimators in scikit-learn."""

								    # this is just a tag for the moment


								###############################################################################


								def is_classifier(estimator):

								    """Returns True if the given estimator is (probably) a classifier.


								    Parameters

								    ----------

								    estimator : object

								        Estimator object to test.


								    Returns

								    -------

								    out : bool

								        True if estimator is a classifier and False otherwise.

								    """

								    return getattr(estimator, "_estimator_type", None) == "classifier"


								def is_regressor(estimator):

								    """Returns True if the given estimator is (probably) a regressor.


								    Parameters

								    ----------

								    estimator : object

								        Estimator object to test.


								    Returns

								    -------

								    out : bool

								        True if estimator is a regressor and False otherwise.

								    """

								    return getattr(estimator, "_estimator_type", None) == "regressor"


								def is_outlier_detector(estimator):

								    """Returns True if the given estimator is (probably) an outlier detector.


								    Parameters

								    ----------

								    estimator : object

								        Estimator object to test.


								    Returns

								    -------

								    out : bool

								        True if estimator is an outlier detector and False otherwise.

								    """

								    return getattr(estimator, "_estimator_type", None) == "outlier_detector"