1039 lines
42 KiB
Python
1039 lines
42 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
from __future__ import division, print_function
|
||
|
|
||
|
from functools import partial
|
||
|
|
||
|
import pytest
|
||
|
import warnings
|
||
|
import numpy as np
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import Series, isna
|
||
|
from pandas.core.dtypes.common import is_integer_dtype
|
||
|
import pandas.core.nanops as nanops
|
||
|
import pandas.util.testing as tm
|
||
|
import pandas.util._test_decorators as td
|
||
|
from pandas.compat.numpy import _np_version_under1p13
|
||
|
|
||
|
use_bn = nanops._USE_BOTTLENECK
|
||
|
|
||
|
|
||
|
class TestnanopsDataFrame(object):
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
np.random.seed(11235)
|
||
|
nanops._USE_BOTTLENECK = False
|
||
|
|
||
|
self.arr_shape = (11, 7, 5)
|
||
|
|
||
|
self.arr_float = np.random.randn(*self.arr_shape)
|
||
|
self.arr_float1 = np.random.randn(*self.arr_shape)
|
||
|
self.arr_complex = self.arr_float + self.arr_float1 * 1j
|
||
|
self.arr_int = np.random.randint(-10, 10, self.arr_shape)
|
||
|
self.arr_bool = np.random.randint(0, 2, self.arr_shape) == 0
|
||
|
self.arr_str = np.abs(self.arr_float).astype('S')
|
||
|
self.arr_utf = np.abs(self.arr_float).astype('U')
|
||
|
self.arr_date = np.random.randint(0, 20000,
|
||
|
self.arr_shape).astype('M8[ns]')
|
||
|
self.arr_tdelta = np.random.randint(0, 20000,
|
||
|
self.arr_shape).astype('m8[ns]')
|
||
|
|
||
|
self.arr_nan = np.tile(np.nan, self.arr_shape)
|
||
|
self.arr_float_nan = np.vstack([self.arr_float, self.arr_nan])
|
||
|
self.arr_float1_nan = np.vstack([self.arr_float1, self.arr_nan])
|
||
|
self.arr_nan_float1 = np.vstack([self.arr_nan, self.arr_float1])
|
||
|
self.arr_nan_nan = np.vstack([self.arr_nan, self.arr_nan])
|
||
|
|
||
|
self.arr_inf = self.arr_float * np.inf
|
||
|
self.arr_float_inf = np.vstack([self.arr_float, self.arr_inf])
|
||
|
self.arr_float1_inf = np.vstack([self.arr_float1, self.arr_inf])
|
||
|
self.arr_inf_float1 = np.vstack([self.arr_inf, self.arr_float1])
|
||
|
self.arr_inf_inf = np.vstack([self.arr_inf, self.arr_inf])
|
||
|
|
||
|
self.arr_nan_inf = np.vstack([self.arr_nan, self.arr_inf])
|
||
|
self.arr_float_nan_inf = np.vstack([self.arr_float, self.arr_nan,
|
||
|
self.arr_inf])
|
||
|
self.arr_nan_float1_inf = np.vstack([self.arr_float, self.arr_inf,
|
||
|
self.arr_nan])
|
||
|
self.arr_nan_nan_inf = np.vstack([self.arr_nan, self.arr_nan,
|
||
|
self.arr_inf])
|
||
|
self.arr_obj = np.vstack([self.arr_float.astype(
|
||
|
'O'), self.arr_int.astype('O'), self.arr_bool.astype(
|
||
|
'O'), self.arr_complex.astype('O'), self.arr_str.astype(
|
||
|
'O'), self.arr_utf.astype('O'), self.arr_date.astype('O'),
|
||
|
self.arr_tdelta.astype('O')])
|
||
|
|
||
|
with np.errstate(invalid='ignore'):
|
||
|
self.arr_nan_nanj = self.arr_nan + self.arr_nan * 1j
|
||
|
self.arr_complex_nan = np.vstack([self.arr_complex,
|
||
|
self.arr_nan_nanj])
|
||
|
|
||
|
self.arr_nan_infj = self.arr_inf * 1j
|
||
|
self.arr_complex_nan_infj = np.vstack([self.arr_complex,
|
||
|
self.arr_nan_infj])
|
||
|
|
||
|
self.arr_float_2d = self.arr_float[:, :, 0]
|
||
|
self.arr_float1_2d = self.arr_float1[:, :, 0]
|
||
|
self.arr_complex_2d = self.arr_complex[:, :, 0]
|
||
|
self.arr_int_2d = self.arr_int[:, :, 0]
|
||
|
self.arr_bool_2d = self.arr_bool[:, :, 0]
|
||
|
self.arr_str_2d = self.arr_str[:, :, 0]
|
||
|
self.arr_utf_2d = self.arr_utf[:, :, 0]
|
||
|
self.arr_date_2d = self.arr_date[:, :, 0]
|
||
|
self.arr_tdelta_2d = self.arr_tdelta[:, :, 0]
|
||
|
|
||
|
self.arr_nan_2d = self.arr_nan[:, :, 0]
|
||
|
self.arr_float_nan_2d = self.arr_float_nan[:, :, 0]
|
||
|
self.arr_float1_nan_2d = self.arr_float1_nan[:, :, 0]
|
||
|
self.arr_nan_float1_2d = self.arr_nan_float1[:, :, 0]
|
||
|
self.arr_nan_nan_2d = self.arr_nan_nan[:, :, 0]
|
||
|
self.arr_nan_nanj_2d = self.arr_nan_nanj[:, :, 0]
|
||
|
self.arr_complex_nan_2d = self.arr_complex_nan[:, :, 0]
|
||
|
|
||
|
self.arr_inf_2d = self.arr_inf[:, :, 0]
|
||
|
self.arr_float_inf_2d = self.arr_float_inf[:, :, 0]
|
||
|
self.arr_nan_inf_2d = self.arr_nan_inf[:, :, 0]
|
||
|
self.arr_float_nan_inf_2d = self.arr_float_nan_inf[:, :, 0]
|
||
|
self.arr_nan_nan_inf_2d = self.arr_nan_nan_inf[:, :, 0]
|
||
|
|
||
|
self.arr_float_1d = self.arr_float[:, 0, 0]
|
||
|
self.arr_float1_1d = self.arr_float1[:, 0, 0]
|
||
|
self.arr_complex_1d = self.arr_complex[:, 0, 0]
|
||
|
self.arr_int_1d = self.arr_int[:, 0, 0]
|
||
|
self.arr_bool_1d = self.arr_bool[:, 0, 0]
|
||
|
self.arr_str_1d = self.arr_str[:, 0, 0]
|
||
|
self.arr_utf_1d = self.arr_utf[:, 0, 0]
|
||
|
self.arr_date_1d = self.arr_date[:, 0, 0]
|
||
|
self.arr_tdelta_1d = self.arr_tdelta[:, 0, 0]
|
||
|
|
||
|
self.arr_nan_1d = self.arr_nan[:, 0, 0]
|
||
|
self.arr_float_nan_1d = self.arr_float_nan[:, 0, 0]
|
||
|
self.arr_float1_nan_1d = self.arr_float1_nan[:, 0, 0]
|
||
|
self.arr_nan_float1_1d = self.arr_nan_float1[:, 0, 0]
|
||
|
self.arr_nan_nan_1d = self.arr_nan_nan[:, 0, 0]
|
||
|
self.arr_nan_nanj_1d = self.arr_nan_nanj[:, 0, 0]
|
||
|
self.arr_complex_nan_1d = self.arr_complex_nan[:, 0, 0]
|
||
|
|
||
|
self.arr_inf_1d = self.arr_inf.ravel()
|
||
|
self.arr_float_inf_1d = self.arr_float_inf[:, 0, 0]
|
||
|
self.arr_nan_inf_1d = self.arr_nan_inf[:, 0, 0]
|
||
|
self.arr_float_nan_inf_1d = self.arr_float_nan_inf[:, 0, 0]
|
||
|
self.arr_nan_nan_inf_1d = self.arr_nan_nan_inf[:, 0, 0]
|
||
|
|
||
|
def teardown_method(self, method):
|
||
|
nanops._USE_BOTTLENECK = use_bn
|
||
|
|
||
|
def check_results(self, targ, res, axis, check_dtype=True):
|
||
|
res = getattr(res, 'asm8', res)
|
||
|
res = getattr(res, 'values', res)
|
||
|
|
||
|
# timedeltas are a beast here
|
||
|
def _coerce_tds(targ, res):
|
||
|
if hasattr(targ, 'dtype') and targ.dtype == 'm8[ns]':
|
||
|
if len(targ) == 1:
|
||
|
targ = targ[0].item()
|
||
|
res = res.item()
|
||
|
else:
|
||
|
targ = targ.view('i8')
|
||
|
return targ, res
|
||
|
|
||
|
try:
|
||
|
if axis != 0 and hasattr(
|
||
|
targ, 'shape') and targ.ndim and targ.shape != res.shape:
|
||
|
res = np.split(res, [targ.shape[0]], axis=0)[0]
|
||
|
except:
|
||
|
targ, res = _coerce_tds(targ, res)
|
||
|
|
||
|
try:
|
||
|
tm.assert_almost_equal(targ, res, check_dtype=check_dtype)
|
||
|
except:
|
||
|
|
||
|
# handle timedelta dtypes
|
||
|
if hasattr(targ, 'dtype') and targ.dtype == 'm8[ns]':
|
||
|
targ, res = _coerce_tds(targ, res)
|
||
|
tm.assert_almost_equal(targ, res, check_dtype=check_dtype)
|
||
|
return
|
||
|
|
||
|
# There are sometimes rounding errors with
|
||
|
# complex and object dtypes.
|
||
|
# If it isn't one of those, re-raise the error.
|
||
|
if not hasattr(res, 'dtype') or res.dtype.kind not in ['c', 'O']:
|
||
|
raise
|
||
|
# convert object dtypes to something that can be split into
|
||
|
# real and imaginary parts
|
||
|
if res.dtype.kind == 'O':
|
||
|
if targ.dtype.kind != 'O':
|
||
|
res = res.astype(targ.dtype)
|
||
|
else:
|
||
|
try:
|
||
|
res = res.astype('c16')
|
||
|
except:
|
||
|
res = res.astype('f8')
|
||
|
try:
|
||
|
targ = targ.astype('c16')
|
||
|
except:
|
||
|
targ = targ.astype('f8')
|
||
|
# there should never be a case where numpy returns an object
|
||
|
# but nanops doesn't, so make that an exception
|
||
|
elif targ.dtype.kind == 'O':
|
||
|
raise
|
||
|
tm.assert_almost_equal(targ.real, res.real,
|
||
|
check_dtype=check_dtype)
|
||
|
tm.assert_almost_equal(targ.imag, res.imag,
|
||
|
check_dtype=check_dtype)
|
||
|
|
||
|
def check_fun_data(self, testfunc, targfunc, testarval, targarval,
|
||
|
targarnanval, check_dtype=True, empty_targfunc=None,
|
||
|
**kwargs):
|
||
|
for axis in list(range(targarval.ndim)) + [None]:
|
||
|
for skipna in [False, True]:
|
||
|
targartempval = targarval if skipna else targarnanval
|
||
|
if skipna and empty_targfunc and isna(targartempval).all():
|
||
|
targ = empty_targfunc(targartempval, axis=axis, **kwargs)
|
||
|
else:
|
||
|
targ = targfunc(targartempval, axis=axis, **kwargs)
|
||
|
|
||
|
try:
|
||
|
res = testfunc(testarval, axis=axis, skipna=skipna,
|
||
|
**kwargs)
|
||
|
self.check_results(targ, res, axis,
|
||
|
check_dtype=check_dtype)
|
||
|
if skipna:
|
||
|
res = testfunc(testarval, axis=axis, **kwargs)
|
||
|
self.check_results(targ, res, axis,
|
||
|
check_dtype=check_dtype)
|
||
|
if axis is None:
|
||
|
res = testfunc(testarval, skipna=skipna, **kwargs)
|
||
|
self.check_results(targ, res, axis,
|
||
|
check_dtype=check_dtype)
|
||
|
if skipna and axis is None:
|
||
|
res = testfunc(testarval, **kwargs)
|
||
|
self.check_results(targ, res, axis,
|
||
|
check_dtype=check_dtype)
|
||
|
except BaseException as exc:
|
||
|
exc.args += ('axis: %s of %s' % (axis, testarval.ndim - 1),
|
||
|
'skipna: %s' % skipna, 'kwargs: %s' % kwargs)
|
||
|
raise
|
||
|
|
||
|
if testarval.ndim <= 1:
|
||
|
return
|
||
|
|
||
|
try:
|
||
|
testarval2 = np.take(testarval, 0, axis=-1)
|
||
|
targarval2 = np.take(targarval, 0, axis=-1)
|
||
|
targarnanval2 = np.take(targarnanval, 0, axis=-1)
|
||
|
except ValueError:
|
||
|
return
|
||
|
self.check_fun_data(testfunc, targfunc, testarval2, targarval2,
|
||
|
targarnanval2, check_dtype=check_dtype,
|
||
|
empty_targfunc=empty_targfunc, **kwargs)
|
||
|
|
||
|
def check_fun(self, testfunc, targfunc, testar, targar=None,
|
||
|
targarnan=None, empty_targfunc=None, **kwargs):
|
||
|
if targar is None:
|
||
|
targar = testar
|
||
|
if targarnan is None:
|
||
|
targarnan = testar
|
||
|
testarval = getattr(self, testar)
|
||
|
targarval = getattr(self, targar)
|
||
|
targarnanval = getattr(self, targarnan)
|
||
|
try:
|
||
|
self.check_fun_data(testfunc, targfunc, testarval, targarval,
|
||
|
targarnanval, empty_targfunc=empty_targfunc,
|
||
|
**kwargs)
|
||
|
except BaseException as exc:
|
||
|
exc.args += ('testar: %s' % testar, 'targar: %s' % targar,
|
||
|
'targarnan: %s' % targarnan)
|
||
|
raise
|
||
|
|
||
|
def check_funs(self, testfunc, targfunc, allow_complex=True,
|
||
|
allow_all_nan=True, allow_str=True, allow_date=True,
|
||
|
allow_tdelta=True, allow_obj=True, **kwargs):
|
||
|
self.check_fun(testfunc, targfunc, 'arr_float', **kwargs)
|
||
|
self.check_fun(testfunc, targfunc, 'arr_float_nan', 'arr_float',
|
||
|
**kwargs)
|
||
|
self.check_fun(testfunc, targfunc, 'arr_int', **kwargs)
|
||
|
self.check_fun(testfunc, targfunc, 'arr_bool', **kwargs)
|
||
|
objs = [self.arr_float.astype('O'), self.arr_int.astype('O'),
|
||
|
self.arr_bool.astype('O')]
|
||
|
|
||
|
if allow_all_nan:
|
||
|
self.check_fun(testfunc, targfunc, 'arr_nan', **kwargs)
|
||
|
|
||
|
if allow_complex:
|
||
|
self.check_fun(testfunc, targfunc, 'arr_complex', **kwargs)
|
||
|
self.check_fun(testfunc, targfunc, 'arr_complex_nan',
|
||
|
'arr_complex', **kwargs)
|
||
|
if allow_all_nan:
|
||
|
self.check_fun(testfunc, targfunc, 'arr_nan_nanj', **kwargs)
|
||
|
objs += [self.arr_complex.astype('O')]
|
||
|
|
||
|
if allow_str:
|
||
|
self.check_fun(testfunc, targfunc, 'arr_str', **kwargs)
|
||
|
self.check_fun(testfunc, targfunc, 'arr_utf', **kwargs)
|
||
|
objs += [self.arr_str.astype('O'), self.arr_utf.astype('O')]
|
||
|
|
||
|
if allow_date:
|
||
|
try:
|
||
|
targfunc(self.arr_date)
|
||
|
except TypeError:
|
||
|
pass
|
||
|
else:
|
||
|
self.check_fun(testfunc, targfunc, 'arr_date', **kwargs)
|
||
|
objs += [self.arr_date.astype('O')]
|
||
|
|
||
|
if allow_tdelta:
|
||
|
try:
|
||
|
targfunc(self.arr_tdelta)
|
||
|
except TypeError:
|
||
|
pass
|
||
|
else:
|
||
|
self.check_fun(testfunc, targfunc, 'arr_tdelta', **kwargs)
|
||
|
objs += [self.arr_tdelta.astype('O')]
|
||
|
|
||
|
if allow_obj:
|
||
|
self.arr_obj = np.vstack(objs)
|
||
|
# some nanops handle object dtypes better than their numpy
|
||
|
# counterparts, so the numpy functions need to be given something
|
||
|
# else
|
||
|
if allow_obj == 'convert':
|
||
|
targfunc = partial(self._badobj_wrap, func=targfunc,
|
||
|
allow_complex=allow_complex)
|
||
|
self.check_fun(testfunc, targfunc, 'arr_obj', **kwargs)
|
||
|
|
||
|
def _badobj_wrap(self, value, func, allow_complex=True, **kwargs):
|
||
|
if value.dtype.kind == 'O':
|
||
|
if allow_complex:
|
||
|
value = value.astype('c16')
|
||
|
else:
|
||
|
value = value.astype('f8')
|
||
|
return func(value, **kwargs)
|
||
|
|
||
|
def test_nanany(self):
|
||
|
self.check_funs(nanops.nanany, np.any, allow_all_nan=False,
|
||
|
allow_str=False, allow_date=False, allow_tdelta=False)
|
||
|
|
||
|
def test_nanall(self):
|
||
|
self.check_funs(nanops.nanall, np.all, allow_all_nan=False,
|
||
|
allow_str=False, allow_date=False, allow_tdelta=False)
|
||
|
|
||
|
def test_nansum(self):
|
||
|
self.check_funs(nanops.nansum, np.sum, allow_str=False,
|
||
|
allow_date=False, allow_tdelta=True, check_dtype=False,
|
||
|
empty_targfunc=np.nansum)
|
||
|
|
||
|
def test_nanmean(self):
|
||
|
self.check_funs(nanops.nanmean, np.mean, allow_complex=False,
|
||
|
allow_obj=False, allow_str=False, allow_date=False,
|
||
|
allow_tdelta=True)
|
||
|
|
||
|
def test_nanmean_overflow(self):
|
||
|
# GH 10155
|
||
|
# In the previous implementation mean can overflow for int dtypes, it
|
||
|
# is now consistent with numpy
|
||
|
|
||
|
for a in [2 ** 55, -2 ** 55, 20150515061816532]:
|
||
|
s = Series(a, index=range(500), dtype=np.int64)
|
||
|
result = s.mean()
|
||
|
np_result = s.values.mean()
|
||
|
assert result == a
|
||
|
assert result == np_result
|
||
|
assert result.dtype == np.float64
|
||
|
|
||
|
def test_returned_dtype(self):
|
||
|
|
||
|
dtypes = [np.int16, np.int32, np.int64, np.float32, np.float64]
|
||
|
if hasattr(np, 'float128'):
|
||
|
dtypes.append(np.float128)
|
||
|
|
||
|
for dtype in dtypes:
|
||
|
s = Series(range(10), dtype=dtype)
|
||
|
group_a = ['mean', 'std', 'var', 'skew', 'kurt']
|
||
|
group_b = ['min', 'max']
|
||
|
for method in group_a + group_b:
|
||
|
result = getattr(s, method)()
|
||
|
if is_integer_dtype(dtype) and method in group_a:
|
||
|
assert result.dtype == np.float64
|
||
|
else:
|
||
|
assert result.dtype == dtype
|
||
|
|
||
|
def test_nanmedian(self):
|
||
|
with warnings.catch_warnings(record=True):
|
||
|
self.check_funs(nanops.nanmedian, np.median, allow_complex=False,
|
||
|
allow_str=False, allow_date=False,
|
||
|
allow_tdelta=True, allow_obj='convert')
|
||
|
|
||
|
@pytest.mark.parametrize('ddof', range(3))
|
||
|
def test_nanvar(self, ddof):
|
||
|
self.check_funs(nanops.nanvar, np.var, allow_complex=False,
|
||
|
allow_str=False, allow_date=False,
|
||
|
allow_tdelta=True, allow_obj='convert', ddof=ddof)
|
||
|
|
||
|
@pytest.mark.parametrize('ddof', range(3))
|
||
|
def test_nanstd(self, ddof):
|
||
|
self.check_funs(nanops.nanstd, np.std, allow_complex=False,
|
||
|
allow_str=False, allow_date=False,
|
||
|
allow_tdelta=True, allow_obj='convert', ddof=ddof)
|
||
|
|
||
|
@td.skip_if_no('scipy', min_version='0.17.0')
|
||
|
@pytest.mark.parametrize('ddof', range(3))
|
||
|
def test_nansem(self, ddof):
|
||
|
from scipy.stats import sem
|
||
|
with np.errstate(invalid='ignore'):
|
||
|
self.check_funs(nanops.nansem, sem, allow_complex=False,
|
||
|
allow_str=False, allow_date=False,
|
||
|
allow_tdelta=False, allow_obj='convert', ddof=ddof)
|
||
|
|
||
|
def _minmax_wrap(self, value, axis=None, func=None):
|
||
|
|
||
|
# numpy warns if all nan
|
||
|
res = func(value, axis)
|
||
|
if res.dtype.kind == 'm':
|
||
|
res = np.atleast_1d(res)
|
||
|
return res
|
||
|
|
||
|
def test_nanmin(self):
|
||
|
with warnings.catch_warnings(record=True):
|
||
|
func = partial(self._minmax_wrap, func=np.min)
|
||
|
self.check_funs(nanops.nanmin, func,
|
||
|
allow_str=False, allow_obj=False)
|
||
|
|
||
|
def test_nanmax(self):
|
||
|
with warnings.catch_warnings(record=True):
|
||
|
func = partial(self._minmax_wrap, func=np.max)
|
||
|
self.check_funs(nanops.nanmax, func,
|
||
|
allow_str=False, allow_obj=False)
|
||
|
|
||
|
def _argminmax_wrap(self, value, axis=None, func=None):
|
||
|
res = func(value, axis)
|
||
|
nans = np.min(value, axis)
|
||
|
nullnan = isna(nans)
|
||
|
if res.ndim:
|
||
|
res[nullnan] = -1
|
||
|
elif (hasattr(nullnan, 'all') and nullnan.all() or
|
||
|
not hasattr(nullnan, 'all') and nullnan):
|
||
|
res = -1
|
||
|
return res
|
||
|
|
||
|
def test_nanargmax(self):
|
||
|
with warnings.catch_warnings(record=True):
|
||
|
func = partial(self._argminmax_wrap, func=np.argmax)
|
||
|
self.check_funs(nanops.nanargmax, func,
|
||
|
allow_str=False, allow_obj=False,
|
||
|
allow_date=True, allow_tdelta=True)
|
||
|
|
||
|
def test_nanargmin(self):
|
||
|
with warnings.catch_warnings(record=True):
|
||
|
func = partial(self._argminmax_wrap, func=np.argmin)
|
||
|
self.check_funs(nanops.nanargmin, func, allow_str=False,
|
||
|
allow_obj=False)
|
||
|
|
||
|
def _skew_kurt_wrap(self, values, axis=None, func=None):
|
||
|
if not isinstance(values.dtype.type, np.floating):
|
||
|
values = values.astype('f8')
|
||
|
result = func(values, axis=axis, bias=False)
|
||
|
# fix for handling cases where all elements in an axis are the same
|
||
|
if isinstance(result, np.ndarray):
|
||
|
result[np.max(values, axis=axis) == np.min(values, axis=axis)] = 0
|
||
|
return result
|
||
|
elif np.max(values) == np.min(values):
|
||
|
return 0.
|
||
|
return result
|
||
|
|
||
|
@td.skip_if_no('scipy', min_version='0.17.0')
|
||
|
def test_nanskew(self):
|
||
|
from scipy.stats import skew
|
||
|
func = partial(self._skew_kurt_wrap, func=skew)
|
||
|
with np.errstate(invalid='ignore'):
|
||
|
self.check_funs(nanops.nanskew, func, allow_complex=False,
|
||
|
allow_str=False, allow_date=False,
|
||
|
allow_tdelta=False)
|
||
|
|
||
|
@td.skip_if_no('scipy', min_version='0.17.0')
|
||
|
def test_nankurt(self):
|
||
|
from scipy.stats import kurtosis
|
||
|
func1 = partial(kurtosis, fisher=True)
|
||
|
func = partial(self._skew_kurt_wrap, func=func1)
|
||
|
with np.errstate(invalid='ignore'):
|
||
|
self.check_funs(nanops.nankurt, func, allow_complex=False,
|
||
|
allow_str=False, allow_date=False,
|
||
|
allow_tdelta=False)
|
||
|
|
||
|
@td.skip_if_no("numpy", min_version="1.10.0")
|
||
|
def test_nanprod(self):
|
||
|
self.check_funs(nanops.nanprod, np.prod, allow_str=False,
|
||
|
allow_date=False, allow_tdelta=False,
|
||
|
empty_targfunc=np.nanprod)
|
||
|
|
||
|
def check_nancorr_nancov_2d(self, checkfun, targ0, targ1, **kwargs):
|
||
|
res00 = checkfun(self.arr_float_2d, self.arr_float1_2d, **kwargs)
|
||
|
res01 = checkfun(self.arr_float_2d, self.arr_float1_2d,
|
||
|
min_periods=len(self.arr_float_2d) - 1, **kwargs)
|
||
|
tm.assert_almost_equal(targ0, res00)
|
||
|
tm.assert_almost_equal(targ0, res01)
|
||
|
|
||
|
res10 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d,
|
||
|
**kwargs)
|
||
|
res11 = checkfun(self.arr_float_nan_2d, self.arr_float1_nan_2d,
|
||
|
min_periods=len(self.arr_float_2d) - 1, **kwargs)
|
||
|
tm.assert_almost_equal(targ1, res10)
|
||
|
tm.assert_almost_equal(targ1, res11)
|
||
|
|
||
|
targ2 = np.nan
|
||
|
res20 = checkfun(self.arr_nan_2d, self.arr_float1_2d, **kwargs)
|
||
|
res21 = checkfun(self.arr_float_2d, self.arr_nan_2d, **kwargs)
|
||
|
res22 = checkfun(self.arr_nan_2d, self.arr_nan_2d, **kwargs)
|
||
|
res23 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d,
|
||
|
**kwargs)
|
||
|
res24 = checkfun(self.arr_float_nan_2d, self.arr_nan_float1_2d,
|
||
|
min_periods=len(self.arr_float_2d) - 1, **kwargs)
|
||
|
res25 = checkfun(self.arr_float_2d, self.arr_float1_2d,
|
||
|
min_periods=len(self.arr_float_2d) + 1, **kwargs)
|
||
|
tm.assert_almost_equal(targ2, res20)
|
||
|
tm.assert_almost_equal(targ2, res21)
|
||
|
tm.assert_almost_equal(targ2, res22)
|
||
|
tm.assert_almost_equal(targ2, res23)
|
||
|
tm.assert_almost_equal(targ2, res24)
|
||
|
tm.assert_almost_equal(targ2, res25)
|
||
|
|
||
|
def check_nancorr_nancov_1d(self, checkfun, targ0, targ1, **kwargs):
|
||
|
res00 = checkfun(self.arr_float_1d, self.arr_float1_1d, **kwargs)
|
||
|
res01 = checkfun(self.arr_float_1d, self.arr_float1_1d,
|
||
|
min_periods=len(self.arr_float_1d) - 1, **kwargs)
|
||
|
tm.assert_almost_equal(targ0, res00)
|
||
|
tm.assert_almost_equal(targ0, res01)
|
||
|
|
||
|
res10 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d,
|
||
|
**kwargs)
|
||
|
res11 = checkfun(self.arr_float_nan_1d, self.arr_float1_nan_1d,
|
||
|
min_periods=len(self.arr_float_1d) - 1, **kwargs)
|
||
|
tm.assert_almost_equal(targ1, res10)
|
||
|
tm.assert_almost_equal(targ1, res11)
|
||
|
|
||
|
targ2 = np.nan
|
||
|
res20 = checkfun(self.arr_nan_1d, self.arr_float1_1d, **kwargs)
|
||
|
res21 = checkfun(self.arr_float_1d, self.arr_nan_1d, **kwargs)
|
||
|
res22 = checkfun(self.arr_nan_1d, self.arr_nan_1d, **kwargs)
|
||
|
res23 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d,
|
||
|
**kwargs)
|
||
|
res24 = checkfun(self.arr_float_nan_1d, self.arr_nan_float1_1d,
|
||
|
min_periods=len(self.arr_float_1d) - 1, **kwargs)
|
||
|
res25 = checkfun(self.arr_float_1d, self.arr_float1_1d,
|
||
|
min_periods=len(self.arr_float_1d) + 1, **kwargs)
|
||
|
tm.assert_almost_equal(targ2, res20)
|
||
|
tm.assert_almost_equal(targ2, res21)
|
||
|
tm.assert_almost_equal(targ2, res22)
|
||
|
tm.assert_almost_equal(targ2, res23)
|
||
|
tm.assert_almost_equal(targ2, res24)
|
||
|
tm.assert_almost_equal(targ2, res25)
|
||
|
|
||
|
def test_nancorr(self):
|
||
|
targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
|
||
|
targ1 = np.corrcoef(self.arr_float_2d.flat,
|
||
|
self.arr_float1_2d.flat)[0, 1]
|
||
|
self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1)
|
||
|
targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
|
||
|
targ1 = np.corrcoef(self.arr_float_1d.flat,
|
||
|
self.arr_float1_1d.flat)[0, 1]
|
||
|
self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
|
||
|
method='pearson')
|
||
|
|
||
|
def test_nancorr_pearson(self):
|
||
|
targ0 = np.corrcoef(self.arr_float_2d, self.arr_float1_2d)[0, 1]
|
||
|
targ1 = np.corrcoef(self.arr_float_2d.flat,
|
||
|
self.arr_float1_2d.flat)[0, 1]
|
||
|
self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
|
||
|
method='pearson')
|
||
|
targ0 = np.corrcoef(self.arr_float_1d, self.arr_float1_1d)[0, 1]
|
||
|
targ1 = np.corrcoef(self.arr_float_1d.flat,
|
||
|
self.arr_float1_1d.flat)[0, 1]
|
||
|
self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
|
||
|
method='pearson')
|
||
|
|
||
|
@td.skip_if_no_scipy
|
||
|
def test_nancorr_kendall(self):
|
||
|
from scipy.stats import kendalltau
|
||
|
targ0 = kendalltau(self.arr_float_2d, self.arr_float1_2d)[0]
|
||
|
targ1 = kendalltau(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
|
||
|
self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
|
||
|
method='kendall')
|
||
|
targ0 = kendalltau(self.arr_float_1d, self.arr_float1_1d)[0]
|
||
|
targ1 = kendalltau(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
|
||
|
self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
|
||
|
method='kendall')
|
||
|
|
||
|
@td.skip_if_no_scipy
|
||
|
def test_nancorr_spearman(self):
|
||
|
from scipy.stats import spearmanr
|
||
|
targ0 = spearmanr(self.arr_float_2d, self.arr_float1_2d)[0]
|
||
|
targ1 = spearmanr(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0]
|
||
|
self.check_nancorr_nancov_2d(nanops.nancorr, targ0, targ1,
|
||
|
method='spearman')
|
||
|
targ0 = spearmanr(self.arr_float_1d, self.arr_float1_1d)[0]
|
||
|
targ1 = spearmanr(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0]
|
||
|
self.check_nancorr_nancov_1d(nanops.nancorr, targ0, targ1,
|
||
|
method='spearman')
|
||
|
|
||
|
def test_nancov(self):
|
||
|
targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1]
|
||
|
targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1]
|
||
|
self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1)
|
||
|
targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1]
|
||
|
targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
|
||
|
self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
|
||
|
|
||
|
def check_nancomp(self, checkfun, targ0):
|
||
|
arr_float = self.arr_float
|
||
|
arr_float1 = self.arr_float1
|
||
|
arr_nan = self.arr_nan
|
||
|
arr_nan_nan = self.arr_nan_nan
|
||
|
arr_float_nan = self.arr_float_nan
|
||
|
arr_float1_nan = self.arr_float1_nan
|
||
|
arr_nan_float1 = self.arr_nan_float1
|
||
|
|
||
|
while targ0.ndim:
|
||
|
try:
|
||
|
res0 = checkfun(arr_float, arr_float1)
|
||
|
tm.assert_almost_equal(targ0, res0)
|
||
|
|
||
|
if targ0.ndim > 1:
|
||
|
targ1 = np.vstack([targ0, arr_nan])
|
||
|
else:
|
||
|
targ1 = np.hstack([targ0, arr_nan])
|
||
|
res1 = checkfun(arr_float_nan, arr_float1_nan)
|
||
|
tm.assert_numpy_array_equal(targ1, res1, check_dtype=False)
|
||
|
|
||
|
targ2 = arr_nan_nan
|
||
|
res2 = checkfun(arr_float_nan, arr_nan_float1)
|
||
|
tm.assert_numpy_array_equal(targ2, res2, check_dtype=False)
|
||
|
except Exception as exc:
|
||
|
exc.args += ('ndim: %s' % arr_float.ndim, )
|
||
|
raise
|
||
|
|
||
|
try:
|
||
|
arr_float = np.take(arr_float, 0, axis=-1)
|
||
|
arr_float1 = np.take(arr_float1, 0, axis=-1)
|
||
|
arr_nan = np.take(arr_nan, 0, axis=-1)
|
||
|
arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1)
|
||
|
arr_float_nan = np.take(arr_float_nan, 0, axis=-1)
|
||
|
arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1)
|
||
|
arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1)
|
||
|
targ0 = np.take(targ0, 0, axis=-1)
|
||
|
except ValueError:
|
||
|
break
|
||
|
|
||
|
def test_nangt(self):
|
||
|
targ0 = self.arr_float > self.arr_float1
|
||
|
self.check_nancomp(nanops.nangt, targ0)
|
||
|
|
||
|
def test_nange(self):
|
||
|
targ0 = self.arr_float >= self.arr_float1
|
||
|
self.check_nancomp(nanops.nange, targ0)
|
||
|
|
||
|
def test_nanlt(self):
|
||
|
targ0 = self.arr_float < self.arr_float1
|
||
|
self.check_nancomp(nanops.nanlt, targ0)
|
||
|
|
||
|
def test_nanle(self):
|
||
|
targ0 = self.arr_float <= self.arr_float1
|
||
|
self.check_nancomp(nanops.nanle, targ0)
|
||
|
|
||
|
def test_naneq(self):
|
||
|
targ0 = self.arr_float == self.arr_float1
|
||
|
self.check_nancomp(nanops.naneq, targ0)
|
||
|
|
||
|
def test_nanne(self):
|
||
|
targ0 = self.arr_float != self.arr_float1
|
||
|
self.check_nancomp(nanops.nanne, targ0)
|
||
|
|
||
|
def check_bool(self, func, value, correct, *args, **kwargs):
|
||
|
while getattr(value, 'ndim', True):
|
||
|
try:
|
||
|
res0 = func(value, *args, **kwargs)
|
||
|
if correct:
|
||
|
assert res0
|
||
|
else:
|
||
|
assert not res0
|
||
|
except BaseException as exc:
|
||
|
exc.args += ('dim: %s' % getattr(value, 'ndim', value), )
|
||
|
raise
|
||
|
if not hasattr(value, 'ndim'):
|
||
|
break
|
||
|
try:
|
||
|
value = np.take(value, 0, axis=-1)
|
||
|
except ValueError:
|
||
|
break
|
||
|
|
||
|
def test__has_infs(self):
|
||
|
pairs = [('arr_complex', False), ('arr_int', False),
|
||
|
('arr_bool', False), ('arr_str', False), ('arr_utf', False),
|
||
|
('arr_complex', False), ('arr_complex_nan', False),
|
||
|
('arr_nan_nanj', False), ('arr_nan_infj', True),
|
||
|
('arr_complex_nan_infj', True)]
|
||
|
pairs_float = [('arr_float', False), ('arr_nan', False),
|
||
|
('arr_float_nan', False), ('arr_nan_nan', False),
|
||
|
('arr_float_inf', True), ('arr_inf', True),
|
||
|
('arr_nan_inf', True), ('arr_float_nan_inf', True),
|
||
|
('arr_nan_nan_inf', True)]
|
||
|
|
||
|
for arr, correct in pairs:
|
||
|
val = getattr(self, arr)
|
||
|
try:
|
||
|
self.check_bool(nanops._has_infs, val, correct)
|
||
|
except BaseException as exc:
|
||
|
exc.args += (arr, )
|
||
|
raise
|
||
|
|
||
|
for arr, correct in pairs_float:
|
||
|
val = getattr(self, arr)
|
||
|
try:
|
||
|
self.check_bool(nanops._has_infs, val, correct)
|
||
|
self.check_bool(nanops._has_infs, val.astype('f4'), correct)
|
||
|
self.check_bool(nanops._has_infs, val.astype('f2'), correct)
|
||
|
except BaseException as exc:
|
||
|
exc.args += (arr, )
|
||
|
raise
|
||
|
|
||
|
def test__isfinite(self):
|
||
|
pairs = [('arr_complex', False), ('arr_int', False),
|
||
|
('arr_bool', False), ('arr_str', False), ('arr_utf', False),
|
||
|
('arr_complex', False), ('arr_complex_nan', True),
|
||
|
('arr_nan_nanj', True), ('arr_nan_infj', True),
|
||
|
('arr_complex_nan_infj', True)]
|
||
|
pairs_float = [('arr_float', False), ('arr_nan', True),
|
||
|
('arr_float_nan', True), ('arr_nan_nan', True),
|
||
|
('arr_float_inf', True), ('arr_inf', True),
|
||
|
('arr_nan_inf', True), ('arr_float_nan_inf', True),
|
||
|
('arr_nan_nan_inf', True)]
|
||
|
|
||
|
func1 = lambda x: np.any(nanops._isfinite(x).ravel())
|
||
|
|
||
|
# TODO: unused?
|
||
|
# func2 = lambda x: np.any(nanops._isfinite(x).values.ravel())
|
||
|
|
||
|
for arr, correct in pairs:
|
||
|
val = getattr(self, arr)
|
||
|
try:
|
||
|
self.check_bool(func1, val, correct)
|
||
|
except BaseException as exc:
|
||
|
exc.args += (arr, )
|
||
|
raise
|
||
|
|
||
|
for arr, correct in pairs_float:
|
||
|
val = getattr(self, arr)
|
||
|
try:
|
||
|
self.check_bool(func1, val, correct)
|
||
|
self.check_bool(func1, val.astype('f4'), correct)
|
||
|
self.check_bool(func1, val.astype('f2'), correct)
|
||
|
except BaseException as exc:
|
||
|
exc.args += (arr, )
|
||
|
raise
|
||
|
|
||
|
def test__bn_ok_dtype(self):
|
||
|
assert nanops._bn_ok_dtype(self.arr_float.dtype, 'test')
|
||
|
assert nanops._bn_ok_dtype(self.arr_complex.dtype, 'test')
|
||
|
assert nanops._bn_ok_dtype(self.arr_int.dtype, 'test')
|
||
|
assert nanops._bn_ok_dtype(self.arr_bool.dtype, 'test')
|
||
|
assert nanops._bn_ok_dtype(self.arr_str.dtype, 'test')
|
||
|
assert nanops._bn_ok_dtype(self.arr_utf.dtype, 'test')
|
||
|
assert not nanops._bn_ok_dtype(self.arr_date.dtype, 'test')
|
||
|
assert not nanops._bn_ok_dtype(self.arr_tdelta.dtype, 'test')
|
||
|
assert not nanops._bn_ok_dtype(self.arr_obj.dtype, 'test')
|
||
|
|
||
|
|
||
|
class TestEnsureNumeric(object):
|
||
|
|
||
|
def test_numeric_values(self):
|
||
|
# Test integer
|
||
|
assert nanops._ensure_numeric(1) == 1
|
||
|
|
||
|
# Test float
|
||
|
assert nanops._ensure_numeric(1.1) == 1.1
|
||
|
|
||
|
# Test complex
|
||
|
assert nanops._ensure_numeric(1 + 2j) == 1 + 2j
|
||
|
|
||
|
def test_ndarray(self):
|
||
|
# Test numeric ndarray
|
||
|
values = np.array([1, 2, 3])
|
||
|
assert np.allclose(nanops._ensure_numeric(values), values)
|
||
|
|
||
|
# Test object ndarray
|
||
|
o_values = values.astype(object)
|
||
|
assert np.allclose(nanops._ensure_numeric(o_values), values)
|
||
|
|
||
|
# Test convertible string ndarray
|
||
|
s_values = np.array(['1', '2', '3'], dtype=object)
|
||
|
assert np.allclose(nanops._ensure_numeric(s_values), values)
|
||
|
|
||
|
# Test non-convertible string ndarray
|
||
|
s_values = np.array(['foo', 'bar', 'baz'], dtype=object)
|
||
|
pytest.raises(ValueError, lambda: nanops._ensure_numeric(s_values))
|
||
|
|
||
|
def test_convertable_values(self):
|
||
|
assert np.allclose(nanops._ensure_numeric('1'), 1.0)
|
||
|
assert np.allclose(nanops._ensure_numeric('1.1'), 1.1)
|
||
|
assert np.allclose(nanops._ensure_numeric('1+1j'), 1 + 1j)
|
||
|
|
||
|
def test_non_convertable_values(self):
|
||
|
pytest.raises(TypeError, lambda: nanops._ensure_numeric('foo'))
|
||
|
pytest.raises(TypeError, lambda: nanops._ensure_numeric({}))
|
||
|
pytest.raises(TypeError, lambda: nanops._ensure_numeric([]))
|
||
|
|
||
|
|
||
|
class TestNanvarFixedValues(object):
|
||
|
|
||
|
# xref GH10242
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
# Samples from a normal distribution.
|
||
|
self.variance = variance = 3.0
|
||
|
self.samples = self.prng.normal(scale=variance ** 0.5, size=100000)
|
||
|
|
||
|
def test_nanvar_all_finite(self):
|
||
|
samples = self.samples
|
||
|
actual_variance = nanops.nanvar(samples)
|
||
|
tm.assert_almost_equal(actual_variance, self.variance,
|
||
|
check_less_precise=2)
|
||
|
|
||
|
def test_nanvar_nans(self):
|
||
|
samples = np.nan * np.ones(2 * self.samples.shape[0])
|
||
|
samples[::2] = self.samples
|
||
|
|
||
|
actual_variance = nanops.nanvar(samples, skipna=True)
|
||
|
tm.assert_almost_equal(actual_variance, self.variance,
|
||
|
check_less_precise=2)
|
||
|
|
||
|
actual_variance = nanops.nanvar(samples, skipna=False)
|
||
|
tm.assert_almost_equal(actual_variance, np.nan, check_less_precise=2)
|
||
|
|
||
|
def test_nanstd_nans(self):
|
||
|
samples = np.nan * np.ones(2 * self.samples.shape[0])
|
||
|
samples[::2] = self.samples
|
||
|
|
||
|
actual_std = nanops.nanstd(samples, skipna=True)
|
||
|
tm.assert_almost_equal(actual_std, self.variance ** 0.5,
|
||
|
check_less_precise=2)
|
||
|
|
||
|
actual_std = nanops.nanvar(samples, skipna=False)
|
||
|
tm.assert_almost_equal(actual_std, np.nan,
|
||
|
check_less_precise=2)
|
||
|
|
||
|
def test_nanvar_axis(self):
|
||
|
# Generate some sample data.
|
||
|
samples_norm = self.samples
|
||
|
samples_unif = self.prng.uniform(size=samples_norm.shape[0])
|
||
|
samples = np.vstack([samples_norm, samples_unif])
|
||
|
|
||
|
actual_variance = nanops.nanvar(samples, axis=1)
|
||
|
tm.assert_almost_equal(actual_variance, np.array(
|
||
|
[self.variance, 1.0 / 12]), check_less_precise=2)
|
||
|
|
||
|
def test_nanvar_ddof(self):
|
||
|
n = 5
|
||
|
samples = self.prng.uniform(size=(10000, n + 1))
|
||
|
samples[:, -1] = np.nan # Force use of our own algorithm.
|
||
|
|
||
|
variance_0 = nanops.nanvar(samples, axis=1, skipna=True, ddof=0).mean()
|
||
|
variance_1 = nanops.nanvar(samples, axis=1, skipna=True, ddof=1).mean()
|
||
|
variance_2 = nanops.nanvar(samples, axis=1, skipna=True, ddof=2).mean()
|
||
|
|
||
|
# The unbiased estimate.
|
||
|
var = 1.0 / 12
|
||
|
tm.assert_almost_equal(variance_1, var,
|
||
|
check_less_precise=2)
|
||
|
|
||
|
# The underestimated variance.
|
||
|
tm.assert_almost_equal(variance_0, (n - 1.0) / n * var,
|
||
|
check_less_precise=2)
|
||
|
|
||
|
# The overestimated variance.
|
||
|
tm.assert_almost_equal(variance_2, (n - 1.0) / (n - 2.0) * var,
|
||
|
check_less_precise=2)
|
||
|
|
||
|
def test_ground_truth(self):
|
||
|
# Test against values that were precomputed with Numpy.
|
||
|
samples = np.empty((4, 4))
|
||
|
samples[:3, :3] = np.array([[0.97303362, 0.21869576, 0.55560287
|
||
|
], [0.72980153, 0.03109364, 0.99155171],
|
||
|
[0.09317602, 0.60078248, 0.15871292]])
|
||
|
samples[3] = samples[:, 3] = np.nan
|
||
|
|
||
|
# Actual variances along axis=0, 1 for ddof=0, 1, 2
|
||
|
variance = np.array([[[0.13762259, 0.05619224, 0.11568816
|
||
|
], [0.20643388, 0.08428837, 0.17353224],
|
||
|
[0.41286776, 0.16857673, 0.34706449]],
|
||
|
[[0.09519783, 0.16435395, 0.05082054
|
||
|
], [0.14279674, 0.24653093, 0.07623082],
|
||
|
[0.28559348, 0.49306186, 0.15246163]]])
|
||
|
|
||
|
# Test nanvar.
|
||
|
for axis in range(2):
|
||
|
for ddof in range(3):
|
||
|
var = nanops.nanvar(samples, skipna=True, axis=axis, ddof=ddof)
|
||
|
tm.assert_almost_equal(var[:3], variance[axis, ddof])
|
||
|
assert np.isnan(var[3])
|
||
|
|
||
|
# Test nanstd.
|
||
|
for axis in range(2):
|
||
|
for ddof in range(3):
|
||
|
std = nanops.nanstd(samples, skipna=True, axis=axis, ddof=ddof)
|
||
|
tm.assert_almost_equal(std[:3], variance[axis, ddof] ** 0.5)
|
||
|
assert np.isnan(std[3])
|
||
|
|
||
|
def test_nanstd_roundoff(self):
|
||
|
# Regression test for GH 10242 (test data taken from GH 10489). Ensure
|
||
|
# that variance is stable.
|
||
|
data = Series(766897346 * np.ones(10))
|
||
|
for ddof in range(3):
|
||
|
result = data.std(ddof=ddof)
|
||
|
assert result == 0.0
|
||
|
|
||
|
@property
|
||
|
def prng(self):
|
||
|
return np.random.RandomState(1234)
|
||
|
|
||
|
|
||
|
class TestNanskewFixedValues(object):
|
||
|
|
||
|
# xref GH 11974
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
# Test data + skewness value (computed with scipy.stats.skew)
|
||
|
self.samples = np.sin(np.linspace(0, 1, 200))
|
||
|
self.actual_skew = -0.1875895205961754
|
||
|
|
||
|
def test_constant_series(self):
|
||
|
# xref GH 11974
|
||
|
for val in [3075.2, 3075.3, 3075.5]:
|
||
|
data = val * np.ones(300)
|
||
|
skew = nanops.nanskew(data)
|
||
|
assert skew == 0.0
|
||
|
|
||
|
def test_all_finite(self):
|
||
|
alpha, beta = 0.3, 0.1
|
||
|
left_tailed = self.prng.beta(alpha, beta, size=100)
|
||
|
assert nanops.nanskew(left_tailed) < 0
|
||
|
|
||
|
alpha, beta = 0.1, 0.3
|
||
|
right_tailed = self.prng.beta(alpha, beta, size=100)
|
||
|
assert nanops.nanskew(right_tailed) > 0
|
||
|
|
||
|
def test_ground_truth(self):
|
||
|
skew = nanops.nanskew(self.samples)
|
||
|
tm.assert_almost_equal(skew, self.actual_skew)
|
||
|
|
||
|
def test_axis(self):
|
||
|
samples = np.vstack([self.samples,
|
||
|
np.nan * np.ones(len(self.samples))])
|
||
|
skew = nanops.nanskew(samples, axis=1)
|
||
|
tm.assert_almost_equal(skew, np.array([self.actual_skew, np.nan]))
|
||
|
|
||
|
def test_nans(self):
|
||
|
samples = np.hstack([self.samples, np.nan])
|
||
|
skew = nanops.nanskew(samples, skipna=False)
|
||
|
assert np.isnan(skew)
|
||
|
|
||
|
def test_nans_skipna(self):
|
||
|
samples = np.hstack([self.samples, np.nan])
|
||
|
skew = nanops.nanskew(samples, skipna=True)
|
||
|
tm.assert_almost_equal(skew, self.actual_skew)
|
||
|
|
||
|
@property
|
||
|
def prng(self):
|
||
|
return np.random.RandomState(1234)
|
||
|
|
||
|
|
||
|
class TestNankurtFixedValues(object):
|
||
|
|
||
|
# xref GH 11974
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
# Test data + kurtosis value (computed with scipy.stats.kurtosis)
|
||
|
self.samples = np.sin(np.linspace(0, 1, 200))
|
||
|
self.actual_kurt = -1.2058303433799713
|
||
|
|
||
|
def test_constant_series(self):
|
||
|
# xref GH 11974
|
||
|
for val in [3075.2, 3075.3, 3075.5]:
|
||
|
data = val * np.ones(300)
|
||
|
kurt = nanops.nankurt(data)
|
||
|
assert kurt == 0.0
|
||
|
|
||
|
def test_all_finite(self):
|
||
|
alpha, beta = 0.3, 0.1
|
||
|
left_tailed = self.prng.beta(alpha, beta, size=100)
|
||
|
assert nanops.nankurt(left_tailed) < 0
|
||
|
|
||
|
alpha, beta = 0.1, 0.3
|
||
|
right_tailed = self.prng.beta(alpha, beta, size=100)
|
||
|
assert nanops.nankurt(right_tailed) > 0
|
||
|
|
||
|
def test_ground_truth(self):
|
||
|
kurt = nanops.nankurt(self.samples)
|
||
|
tm.assert_almost_equal(kurt, self.actual_kurt)
|
||
|
|
||
|
def test_axis(self):
|
||
|
samples = np.vstack([self.samples,
|
||
|
np.nan * np.ones(len(self.samples))])
|
||
|
kurt = nanops.nankurt(samples, axis=1)
|
||
|
tm.assert_almost_equal(kurt, np.array([self.actual_kurt, np.nan]))
|
||
|
|
||
|
def test_nans(self):
|
||
|
samples = np.hstack([self.samples, np.nan])
|
||
|
kurt = nanops.nankurt(samples, skipna=False)
|
||
|
assert np.isnan(kurt)
|
||
|
|
||
|
def test_nans_skipna(self):
|
||
|
samples = np.hstack([self.samples, np.nan])
|
||
|
kurt = nanops.nankurt(samples, skipna=True)
|
||
|
tm.assert_almost_equal(kurt, self.actual_kurt)
|
||
|
|
||
|
@property
|
||
|
def prng(self):
|
||
|
return np.random.RandomState(1234)
|
||
|
|
||
|
|
||
|
def test_use_bottleneck():
|
||
|
|
||
|
if nanops._BOTTLENECK_INSTALLED:
|
||
|
|
||
|
pd.set_option('use_bottleneck', True)
|
||
|
assert pd.get_option('use_bottleneck')
|
||
|
|
||
|
pd.set_option('use_bottleneck', False)
|
||
|
assert not pd.get_option('use_bottleneck')
|
||
|
|
||
|
pd.set_option('use_bottleneck', use_bn)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("numpy_op, expected", [
|
||
|
(np.sum, 10),
|
||
|
(np.nansum, 10),
|
||
|
(np.mean, 2.5),
|
||
|
(np.nanmean, 2.5),
|
||
|
(np.median, 2.5),
|
||
|
(np.nanmedian, 2.5),
|
||
|
(np.min, 1),
|
||
|
(np.max, 4),
|
||
|
])
|
||
|
def test_numpy_ops(numpy_op, expected):
|
||
|
# GH8383
|
||
|
result = numpy_op(pd.Series([1, 2, 3, 4]))
|
||
|
assert result == expected
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("numpy_op, expected", [
|
||
|
(np.nanmin, 1),
|
||
|
(np.nanmax, 4),
|
||
|
])
|
||
|
def test_numpy_ops_np_version_under1p13(numpy_op, expected):
|
||
|
# GH8383
|
||
|
result = numpy_op(pd.Series([1, 2, 3, 4]))
|
||
|
if _np_version_under1p13:
|
||
|
# bug for numpy < 1.13, where result is a series, should be a scalar
|
||
|
with pytest.raises(ValueError):
|
||
|
assert result == expected
|
||
|
else:
|
||
|
assert result == expected
|