87 lines
2.8 KiB
Python
87 lines
2.8 KiB
Python
|
import numpy as np
|
||
|
|
||
|
from ..base import BaseEstimator, ClassifierMixin
|
||
|
from .testing import assert_true
|
||
|
from .validation import _num_samples, check_array
|
||
|
|
||
|
|
||
|
class ArraySlicingWrapper(object):
|
||
|
def __init__(self, array):
|
||
|
self.array = array
|
||
|
|
||
|
def __getitem__(self, aslice):
|
||
|
return MockDataFrame(self.array[aslice])
|
||
|
|
||
|
|
||
|
class MockDataFrame(object):
|
||
|
|
||
|
# have shape an length but don't support indexing.
|
||
|
def __init__(self, array):
|
||
|
self.array = array
|
||
|
self.values = array
|
||
|
self.shape = array.shape
|
||
|
self.ndim = array.ndim
|
||
|
# ugly hack to make iloc work.
|
||
|
self.iloc = ArraySlicingWrapper(array)
|
||
|
|
||
|
def __len__(self):
|
||
|
return len(self.array)
|
||
|
|
||
|
def __array__(self, dtype=None):
|
||
|
# Pandas data frames also are array-like: we want to make sure that
|
||
|
# input validation in cross-validation does not try to call that
|
||
|
# method.
|
||
|
return self.array
|
||
|
|
||
|
def __eq__(self, other):
|
||
|
return MockDataFrame(self.array == other.array)
|
||
|
|
||
|
def __ne__(self, other):
|
||
|
return not self == other
|
||
|
|
||
|
|
||
|
class CheckingClassifier(BaseEstimator, ClassifierMixin):
|
||
|
"""Dummy classifier to test pipelining and meta-estimators.
|
||
|
|
||
|
Checks some property of X and y in fit / predict.
|
||
|
This allows testing whether pipelines / cross-validation or metaestimators
|
||
|
changed the input.
|
||
|
"""
|
||
|
def __init__(self, check_y=None, check_X=None, foo_param=0,
|
||
|
expected_fit_params=None):
|
||
|
self.check_y = check_y
|
||
|
self.check_X = check_X
|
||
|
self.foo_param = foo_param
|
||
|
self.expected_fit_params = expected_fit_params
|
||
|
|
||
|
def fit(self, X, y, **fit_params):
|
||
|
assert_true(len(X) == len(y))
|
||
|
if self.check_X is not None:
|
||
|
assert_true(self.check_X(X))
|
||
|
if self.check_y is not None:
|
||
|
assert_true(self.check_y(y))
|
||
|
self.classes_ = np.unique(check_array(y, ensure_2d=False,
|
||
|
allow_nd=True))
|
||
|
if self.expected_fit_params:
|
||
|
missing = set(self.expected_fit_params) - set(fit_params)
|
||
|
assert_true(len(missing) == 0, 'Expected fit parameter(s) %s not '
|
||
|
'seen.' % list(missing))
|
||
|
for key, value in fit_params.items():
|
||
|
assert_true(len(value) == len(X),
|
||
|
'Fit parameter %s has length %d; '
|
||
|
'expected %d.' % (key, len(value), len(X)))
|
||
|
|
||
|
return self
|
||
|
|
||
|
def predict(self, T):
|
||
|
if self.check_X is not None:
|
||
|
assert_true(self.check_X(T))
|
||
|
return self.classes_[np.zeros(_num_samples(T), dtype=np.int)]
|
||
|
|
||
|
def score(self, X=None, Y=None):
|
||
|
if self.foo_param > 1:
|
||
|
score = 1.
|
||
|
else:
|
||
|
score = 0.
|
||
|
return score
|