3906 lines
146 KiB
Python
3906 lines
146 KiB
Python
from itertools import product
|
|
import pytest
|
|
import warnings
|
|
from warnings import catch_warnings
|
|
|
|
from datetime import datetime, timedelta
|
|
from numpy.random import randn
|
|
import numpy as np
|
|
from pandas import _np_version_under1p12
|
|
|
|
import pandas as pd
|
|
from pandas import (Series, DataFrame, bdate_range,
|
|
isna, notna, concat, Timestamp, Index)
|
|
import pandas.core.window as rwindow
|
|
import pandas.tseries.offsets as offsets
|
|
from pandas.core.base import SpecificationError
|
|
from pandas.errors import UnsupportedFunctionCall
|
|
from pandas.core.sorting import safe_sort
|
|
import pandas.util.testing as tm
|
|
import pandas.util._test_decorators as td
|
|
from pandas.compat import range, zip
|
|
|
|
N, K = 100, 10
|
|
|
|
|
|
def assert_equal(left, right):
|
|
if isinstance(left, Series):
|
|
tm.assert_series_equal(left, right)
|
|
else:
|
|
tm.assert_frame_equal(left, right)
|
|
|
|
|
|
@pytest.fixture(params=[True, False])
|
|
def raw(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(params=['triang', 'blackman', 'hamming', 'bartlett', 'bohman',
|
|
'blackmanharris', 'nuttall', 'barthann'])
|
|
def win_types(request):
|
|
return request.param
|
|
|
|
|
|
@pytest.fixture(params=['kaiser', 'gaussian', 'general_gaussian'])
|
|
def win_types_special(request):
|
|
return request.param
|
|
|
|
|
|
class Base(object):
|
|
|
|
_nan_locs = np.arange(20, 40)
|
|
_inf_locs = np.array([])
|
|
|
|
def _create_data(self):
|
|
arr = randn(N)
|
|
arr[self._nan_locs] = np.NaN
|
|
|
|
self.arr = arr
|
|
self.rng = bdate_range(datetime(2009, 1, 1), periods=N)
|
|
self.series = Series(arr.copy(), index=self.rng)
|
|
self.frame = DataFrame(randn(N, K), index=self.rng,
|
|
columns=np.arange(K))
|
|
|
|
|
|
class TestApi(Base):
|
|
|
|
def setup_method(self, method):
|
|
self._create_data()
|
|
|
|
def test_getitem(self):
|
|
|
|
r = self.frame.rolling(window=5)
|
|
tm.assert_index_equal(r._selected_obj.columns, self.frame.columns)
|
|
|
|
r = self.frame.rolling(window=5)[1]
|
|
assert r._selected_obj.name == self.frame.columns[1]
|
|
|
|
# technically this is allowed
|
|
r = self.frame.rolling(window=5)[1, 3]
|
|
tm.assert_index_equal(r._selected_obj.columns,
|
|
self.frame.columns[[1, 3]])
|
|
|
|
r = self.frame.rolling(window=5)[[1, 3]]
|
|
tm.assert_index_equal(r._selected_obj.columns,
|
|
self.frame.columns[[1, 3]])
|
|
|
|
def test_select_bad_cols(self):
|
|
df = DataFrame([[1, 2]], columns=['A', 'B'])
|
|
g = df.rolling(window=5)
|
|
pytest.raises(KeyError, g.__getitem__, ['C']) # g[['C']]
|
|
|
|
pytest.raises(KeyError, g.__getitem__, ['A', 'C']) # g[['A', 'C']]
|
|
with tm.assert_raises_regex(KeyError, '^[^A]+$'):
|
|
# A should not be referenced as a bad column...
|
|
# will have to rethink regex if you change message!
|
|
g[['A', 'C']]
|
|
|
|
def test_attribute_access(self):
|
|
|
|
df = DataFrame([[1, 2]], columns=['A', 'B'])
|
|
r = df.rolling(window=5)
|
|
tm.assert_series_equal(r.A.sum(), r['A'].sum())
|
|
pytest.raises(AttributeError, lambda: r.F)
|
|
|
|
def tests_skip_nuisance(self):
|
|
|
|
df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'})
|
|
r = df.rolling(window=3)
|
|
result = r[['A', 'B']].sum()
|
|
expected = DataFrame({'A': [np.nan, np.nan, 3, 6, 9],
|
|
'B': [np.nan, np.nan, 18, 21, 24]},
|
|
columns=list('AB'))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_skip_sum_object_raises(self):
|
|
df = DataFrame({'A': range(5), 'B': range(5, 10), 'C': 'foo'})
|
|
r = df.rolling(window=3)
|
|
|
|
with tm.assert_raises_regex(TypeError, 'cannot handle this type'):
|
|
r.sum()
|
|
|
|
def test_agg(self):
|
|
df = DataFrame({'A': range(5), 'B': range(0, 10, 2)})
|
|
|
|
r = df.rolling(window=3)
|
|
a_mean = r['A'].mean()
|
|
a_std = r['A'].std()
|
|
a_sum = r['A'].sum()
|
|
b_mean = r['B'].mean()
|
|
b_std = r['B'].std()
|
|
b_sum = r['B'].sum()
|
|
|
|
result = r.aggregate([np.mean, np.std])
|
|
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
|
|
expected.columns = pd.MultiIndex.from_product([['A', 'B'], ['mean',
|
|
'std']])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = r.aggregate({'A': np.mean, 'B': np.std})
|
|
|
|
expected = concat([a_mean, b_std], axis=1)
|
|
tm.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
result = r.aggregate({'A': ['mean', 'std']})
|
|
expected = concat([a_mean, a_std], axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'), ('A',
|
|
'std')])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = r['A'].aggregate(['mean', 'sum'])
|
|
expected = concat([a_mean, a_sum], axis=1)
|
|
expected.columns = ['mean', 'sum']
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
with catch_warnings(record=True):
|
|
result = r.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}})
|
|
expected = concat([a_mean, a_sum], axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
|
|
('A', 'sum')])
|
|
tm.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
with catch_warnings(record=True):
|
|
result = r.aggregate({'A': {'mean': 'mean',
|
|
'sum': 'sum'},
|
|
'B': {'mean2': 'mean',
|
|
'sum2': 'sum'}})
|
|
expected = concat([a_mean, a_sum, b_mean, b_sum], axis=1)
|
|
exp_cols = [('A', 'mean'), ('A', 'sum'), ('B', 'mean2'), ('B', 'sum2')]
|
|
expected.columns = pd.MultiIndex.from_tuples(exp_cols)
|
|
tm.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
result = r.aggregate({'A': ['mean', 'std'], 'B': ['mean', 'std']})
|
|
expected = concat([a_mean, a_std, b_mean, b_std], axis=1)
|
|
|
|
exp_cols = [('A', 'mean'), ('A', 'std'), ('B', 'mean'), ('B', 'std')]
|
|
expected.columns = pd.MultiIndex.from_tuples(exp_cols)
|
|
tm.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
def test_agg_apply(self, raw):
|
|
|
|
# passed lambda
|
|
df = DataFrame({'A': range(5), 'B': range(0, 10, 2)})
|
|
|
|
r = df.rolling(window=3)
|
|
a_sum = r['A'].sum()
|
|
|
|
result = r.agg({'A': np.sum, 'B': lambda x: np.std(x, ddof=1)})
|
|
rcustom = r['B'].apply(lambda x: np.std(x, ddof=1), raw=raw)
|
|
expected = concat([a_sum, rcustom], axis=1)
|
|
tm.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
def test_agg_consistency(self):
|
|
|
|
df = DataFrame({'A': range(5), 'B': range(0, 10, 2)})
|
|
r = df.rolling(window=3)
|
|
|
|
result = r.agg([np.sum, np.mean]).columns
|
|
expected = pd.MultiIndex.from_product([list('AB'), ['sum', 'mean']])
|
|
tm.assert_index_equal(result, expected)
|
|
|
|
result = r['A'].agg([np.sum, np.mean]).columns
|
|
expected = Index(['sum', 'mean'])
|
|
tm.assert_index_equal(result, expected)
|
|
|
|
result = r.agg({'A': [np.sum, np.mean]}).columns
|
|
expected = pd.MultiIndex.from_tuples([('A', 'sum'), ('A', 'mean')])
|
|
tm.assert_index_equal(result, expected)
|
|
|
|
def test_agg_nested_dicts(self):
|
|
|
|
# API change for disallowing these types of nested dicts
|
|
df = DataFrame({'A': range(5), 'B': range(0, 10, 2)})
|
|
r = df.rolling(window=3)
|
|
|
|
def f():
|
|
r.aggregate({'r1': {'A': ['mean', 'sum']},
|
|
'r2': {'B': ['mean', 'sum']}})
|
|
|
|
pytest.raises(SpecificationError, f)
|
|
|
|
expected = concat([r['A'].mean(), r['A'].std(),
|
|
r['B'].mean(), r['B'].std()], axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), (
|
|
'ra', 'std'), ('rb', 'mean'), ('rb', 'std')])
|
|
with catch_warnings(record=True):
|
|
result = r[['A', 'B']].agg({'A': {'ra': ['mean', 'std']},
|
|
'B': {'rb': ['mean', 'std']}})
|
|
tm.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
with catch_warnings(record=True):
|
|
result = r.agg({'A': {'ra': ['mean', 'std']},
|
|
'B': {'rb': ['mean', 'std']}})
|
|
expected.columns = pd.MultiIndex.from_tuples([('A', 'ra', 'mean'), (
|
|
'A', 'ra', 'std'), ('B', 'rb', 'mean'), ('B', 'rb', 'std')])
|
|
tm.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
def test_count_nonnumeric_types(self):
|
|
# GH12541
|
|
cols = ['int', 'float', 'string', 'datetime', 'timedelta', 'periods',
|
|
'fl_inf', 'fl_nan', 'str_nan', 'dt_nat', 'periods_nat']
|
|
|
|
df = DataFrame(
|
|
{'int': [1, 2, 3],
|
|
'float': [4., 5., 6.],
|
|
'string': list('abc'),
|
|
'datetime': pd.date_range('20170101', periods=3),
|
|
'timedelta': pd.timedelta_range('1 s', periods=3, freq='s'),
|
|
'periods': [pd.Period('2012-01'), pd.Period('2012-02'),
|
|
pd.Period('2012-03')],
|
|
'fl_inf': [1., 2., np.Inf],
|
|
'fl_nan': [1., 2., np.NaN],
|
|
'str_nan': ['aa', 'bb', np.NaN],
|
|
'dt_nat': [Timestamp('20170101'), Timestamp('20170203'),
|
|
Timestamp(None)],
|
|
'periods_nat': [pd.Period('2012-01'), pd.Period('2012-02'),
|
|
pd.Period(None)]},
|
|
columns=cols)
|
|
|
|
expected = DataFrame(
|
|
{'int': [1., 2., 2.],
|
|
'float': [1., 2., 2.],
|
|
'string': [1., 2., 2.],
|
|
'datetime': [1., 2., 2.],
|
|
'timedelta': [1., 2., 2.],
|
|
'periods': [1., 2., 2.],
|
|
'fl_inf': [1., 2., 2.],
|
|
'fl_nan': [1., 2., 1.],
|
|
'str_nan': [1., 2., 1.],
|
|
'dt_nat': [1., 2., 1.],
|
|
'periods_nat': [1., 2., 1.]},
|
|
columns=cols)
|
|
|
|
result = df.rolling(window=2).count()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(1).count()
|
|
expected = df.notna().astype(float)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_window_with_args(self):
|
|
# make sure that we are aggregating window functions correctly with arg
|
|
r = Series(np.random.randn(100)).rolling(window=10, min_periods=1,
|
|
win_type='gaussian')
|
|
expected = concat([r.mean(std=10), r.mean(std=.01)], axis=1)
|
|
expected.columns = ['<lambda>', '<lambda>']
|
|
result = r.aggregate([lambda x: x.mean(std=10),
|
|
lambda x: x.mean(std=.01)])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def a(x):
|
|
return x.mean(std=10)
|
|
|
|
def b(x):
|
|
return x.mean(std=0.01)
|
|
|
|
expected = concat([r.mean(std=10), r.mean(std=.01)], axis=1)
|
|
expected.columns = ['a', 'b']
|
|
result = r.aggregate([a, b])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_preserve_metadata(self):
|
|
# GH 10565
|
|
s = Series(np.arange(100), name='foo')
|
|
|
|
s2 = s.rolling(30).sum()
|
|
s3 = s.rolling(20).sum()
|
|
assert s2.name == 'foo'
|
|
assert s3.name == 'foo'
|
|
|
|
|
|
class TestWindow(Base):
|
|
|
|
def setup_method(self, method):
|
|
self._create_data()
|
|
|
|
@td.skip_if_no_scipy
|
|
@pytest.mark.parametrize(
|
|
'which', ['series', 'frame'])
|
|
def test_constructor(self, which):
|
|
# GH 12669
|
|
|
|
o = getattr(self, which)
|
|
c = o.rolling
|
|
|
|
# valid
|
|
c(win_type='boxcar', window=2, min_periods=1)
|
|
c(win_type='boxcar', window=2, min_periods=1, center=True)
|
|
c(win_type='boxcar', window=2, min_periods=1, center=False)
|
|
|
|
# not valid
|
|
for w in [2., 'foo', np.array([2])]:
|
|
with pytest.raises(ValueError):
|
|
c(win_type='boxcar', window=2, min_periods=w)
|
|
with pytest.raises(ValueError):
|
|
c(win_type='boxcar', window=2, min_periods=1, center=w)
|
|
|
|
for wt in ['foobar', 1]:
|
|
with pytest.raises(ValueError):
|
|
c(win_type=wt, window=2)
|
|
|
|
@td.skip_if_no_scipy
|
|
@pytest.mark.parametrize(
|
|
'which', ['series', 'frame'])
|
|
def test_constructor_with_win_type(self, which, win_types):
|
|
# GH 12669
|
|
o = getattr(self, which)
|
|
c = o.rolling
|
|
c(win_type=win_types, window=2)
|
|
|
|
@pytest.mark.parametrize(
|
|
'method', ['sum', 'mean'])
|
|
def test_numpy_compat(self, method):
|
|
# see gh-12811
|
|
w = rwindow.Window(Series([2, 4, 6]), window=[0, 2])
|
|
|
|
msg = "numpy operations are not valid with window objects"
|
|
|
|
tm.assert_raises_regex(UnsupportedFunctionCall, msg,
|
|
getattr(w, method), 1, 2, 3)
|
|
tm.assert_raises_regex(UnsupportedFunctionCall, msg,
|
|
getattr(w, method), dtype=np.float64)
|
|
|
|
|
|
class TestRolling(Base):
|
|
|
|
def setup_method(self, method):
|
|
self._create_data()
|
|
|
|
def test_doc_string(self):
|
|
|
|
df = DataFrame({'B': [0, 1, 2, np.nan, 4]})
|
|
df
|
|
df.rolling(2).sum()
|
|
df.rolling(2, min_periods=1).sum()
|
|
|
|
@pytest.mark.parametrize(
|
|
'which', ['series', 'frame'])
|
|
def test_constructor(self, which):
|
|
# GH 12669
|
|
|
|
o = getattr(self, which)
|
|
c = o.rolling
|
|
|
|
# valid
|
|
c(window=2)
|
|
c(window=2, min_periods=1)
|
|
c(window=2, min_periods=1, center=True)
|
|
c(window=2, min_periods=1, center=False)
|
|
|
|
# GH 13383
|
|
with pytest.raises(ValueError):
|
|
c(0)
|
|
c(-1)
|
|
|
|
# not valid
|
|
for w in [2., 'foo', np.array([2])]:
|
|
with pytest.raises(ValueError):
|
|
c(window=w)
|
|
with pytest.raises(ValueError):
|
|
c(window=2, min_periods=w)
|
|
with pytest.raises(ValueError):
|
|
c(window=2, min_periods=1, center=w)
|
|
|
|
@td.skip_if_no_scipy
|
|
@pytest.mark.parametrize(
|
|
'which', ['series', 'frame'])
|
|
def test_constructor_with_win_type(self, which):
|
|
# GH 13383
|
|
o = getattr(self, which)
|
|
c = o.rolling
|
|
with pytest.raises(ValueError):
|
|
c(-1, win_type='boxcar')
|
|
|
|
@pytest.mark.parametrize(
|
|
'window', [timedelta(days=3), pd.Timedelta(days=3)])
|
|
def test_constructor_with_timedelta_window(self, window):
|
|
# GH 15440
|
|
n = 10
|
|
df = DataFrame({'value': np.arange(n)},
|
|
index=pd.date_range('2015-12-24', periods=n, freq="D"))
|
|
expected_data = np.append([0., 1.], np.arange(3., 27., 3))
|
|
|
|
result = df.rolling(window=window).sum()
|
|
expected = DataFrame({'value': expected_data},
|
|
index=pd.date_range('2015-12-24', periods=n,
|
|
freq="D"))
|
|
tm.assert_frame_equal(result, expected)
|
|
expected = df.rolling('3D').sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize(
|
|
'window', [timedelta(days=3), pd.Timedelta(days=3), '3D'])
|
|
def test_constructor_timedelta_window_and_minperiods(self, window, raw):
|
|
# GH 15305
|
|
n = 10
|
|
df = DataFrame({'value': np.arange(n)},
|
|
index=pd.date_range('2017-08-08', periods=n, freq="D"))
|
|
expected = DataFrame(
|
|
{'value': np.append([np.NaN, 1.], np.arange(3., 27., 3))},
|
|
index=pd.date_range('2017-08-08', periods=n, freq="D"))
|
|
result_roll_sum = df.rolling(window=window, min_periods=2).sum()
|
|
result_roll_generic = df.rolling(window=window,
|
|
min_periods=2).apply(sum, raw=raw)
|
|
tm.assert_frame_equal(result_roll_sum, expected)
|
|
tm.assert_frame_equal(result_roll_generic, expected)
|
|
|
|
@pytest.mark.parametrize(
|
|
'method', ['std', 'mean', 'sum', 'max', 'min', 'var'])
|
|
def test_numpy_compat(self, method):
|
|
# see gh-12811
|
|
r = rwindow.Rolling(Series([2, 4, 6]), window=2)
|
|
|
|
msg = "numpy operations are not valid with window objects"
|
|
|
|
tm.assert_raises_regex(UnsupportedFunctionCall, msg,
|
|
getattr(r, method), 1, 2, 3)
|
|
tm.assert_raises_regex(UnsupportedFunctionCall, msg,
|
|
getattr(r, method), dtype=np.float64)
|
|
|
|
def test_closed(self):
|
|
df = DataFrame({'A': [0, 1, 2, 3, 4]})
|
|
# closed only allowed for datetimelike
|
|
with pytest.raises(ValueError):
|
|
df.rolling(window=3, closed='neither')
|
|
|
|
@pytest.mark.parametrize('roller', ['1s', 1])
|
|
def tests_empty_df_rolling(self, roller):
|
|
# GH 15819 Verifies that datetime and integer rolling windows can be
|
|
# applied to empty DataFrames
|
|
expected = DataFrame()
|
|
result = DataFrame().rolling(roller).sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# Verifies that datetime and integer rolling windows can be applied to
|
|
# empty DataFrames with datetime index
|
|
expected = DataFrame(index=pd.DatetimeIndex([]))
|
|
result = DataFrame(index=pd.DatetimeIndex([])).rolling(roller).sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_missing_minp_zero(self):
|
|
# https://github.com/pandas-dev/pandas/pull/18921
|
|
# minp=0
|
|
x = pd.Series([np.nan])
|
|
result = x.rolling(1, min_periods=0).sum()
|
|
expected = pd.Series([0.0])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# minp=1
|
|
result = x.rolling(1, min_periods=1).sum()
|
|
expected = pd.Series([np.nan])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_missing_minp_zero_variable(self):
|
|
# https://github.com/pandas-dev/pandas/pull/18921
|
|
x = pd.Series([np.nan] * 4,
|
|
index=pd.DatetimeIndex(['2017-01-01', '2017-01-04',
|
|
'2017-01-06', '2017-01-07']))
|
|
result = x.rolling(pd.Timedelta("2d"), min_periods=0).sum()
|
|
expected = pd.Series(0.0, index=x.index)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_multi_index_names(self):
|
|
|
|
# GH 16789, 16825
|
|
cols = pd.MultiIndex.from_product([['A', 'B'], ['C', 'D', 'E']],
|
|
names=['1', '2'])
|
|
df = DataFrame(np.ones((10, 6)), columns=cols)
|
|
result = df.rolling(3).cov()
|
|
|
|
tm.assert_index_equal(result.columns, df.columns)
|
|
assert result.index.names == [None, '1', '2']
|
|
|
|
@pytest.mark.parametrize('klass', [pd.Series, pd.DataFrame])
|
|
def test_iter_raises(self, klass):
|
|
# https://github.com/pandas-dev/pandas/issues/11704
|
|
# Iteration over a Window
|
|
obj = klass([1, 2, 3, 4])
|
|
with pytest.raises(NotImplementedError):
|
|
iter(obj.rolling(2))
|
|
|
|
|
|
class TestExpanding(Base):
|
|
|
|
def setup_method(self, method):
|
|
self._create_data()
|
|
|
|
def test_doc_string(self):
|
|
|
|
df = DataFrame({'B': [0, 1, 2, np.nan, 4]})
|
|
df
|
|
df.expanding(2).sum()
|
|
|
|
@pytest.mark.parametrize(
|
|
'which', ['series', 'frame'])
|
|
def test_constructor(self, which):
|
|
# GH 12669
|
|
|
|
o = getattr(self, which)
|
|
c = o.expanding
|
|
|
|
# valid
|
|
c(min_periods=1)
|
|
c(min_periods=1, center=True)
|
|
c(min_periods=1, center=False)
|
|
|
|
# not valid
|
|
for w in [2., 'foo', np.array([2])]:
|
|
with pytest.raises(ValueError):
|
|
c(min_periods=w)
|
|
with pytest.raises(ValueError):
|
|
c(min_periods=1, center=w)
|
|
|
|
@pytest.mark.parametrize(
|
|
'method', ['std', 'mean', 'sum', 'max', 'min', 'var'])
|
|
def test_numpy_compat(self, method):
|
|
# see gh-12811
|
|
e = rwindow.Expanding(Series([2, 4, 6]), window=2)
|
|
|
|
msg = "numpy operations are not valid with window objects"
|
|
|
|
tm.assert_raises_regex(UnsupportedFunctionCall, msg,
|
|
getattr(e, method), 1, 2, 3)
|
|
tm.assert_raises_regex(UnsupportedFunctionCall, msg,
|
|
getattr(e, method), dtype=np.float64)
|
|
|
|
@pytest.mark.parametrize(
|
|
'expander',
|
|
[1, pytest.param('ls', marks=pytest.mark.xfail(
|
|
reason='GH 16425 expanding with '
|
|
'offset not supported'))])
|
|
def test_empty_df_expanding(self, expander):
|
|
# GH 15819 Verifies that datetime and integer expanding windows can be
|
|
# applied to empty DataFrames
|
|
|
|
expected = DataFrame()
|
|
result = DataFrame().expanding(expander).sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# Verifies that datetime and integer expanding windows can be applied
|
|
# to empty DataFrames with datetime index
|
|
expected = DataFrame(index=pd.DatetimeIndex([]))
|
|
result = DataFrame(
|
|
index=pd.DatetimeIndex([])).expanding(expander).sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_missing_minp_zero(self):
|
|
# https://github.com/pandas-dev/pandas/pull/18921
|
|
# minp=0
|
|
x = pd.Series([np.nan])
|
|
result = x.expanding(min_periods=0).sum()
|
|
expected = pd.Series([0.0])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# minp=1
|
|
result = x.expanding(min_periods=1).sum()
|
|
expected = pd.Series([np.nan])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize('klass', [pd.Series, pd.DataFrame])
|
|
def test_iter_raises(self, klass):
|
|
# https://github.com/pandas-dev/pandas/issues/11704
|
|
# Iteration over a Window
|
|
obj = klass([1, 2, 3, 4])
|
|
with pytest.raises(NotImplementedError):
|
|
iter(obj.expanding(2))
|
|
|
|
|
|
class TestEWM(Base):
|
|
|
|
def setup_method(self, method):
|
|
self._create_data()
|
|
|
|
def test_doc_string(self):
|
|
|
|
df = DataFrame({'B': [0, 1, 2, np.nan, 4]})
|
|
df
|
|
df.ewm(com=0.5).mean()
|
|
|
|
@pytest.mark.parametrize(
|
|
'which', ['series', 'frame'])
|
|
def test_constructor(self, which):
|
|
o = getattr(self, which)
|
|
c = o.ewm
|
|
|
|
# valid
|
|
c(com=0.5)
|
|
c(span=1.5)
|
|
c(alpha=0.5)
|
|
c(halflife=0.75)
|
|
c(com=0.5, span=None)
|
|
c(alpha=0.5, com=None)
|
|
c(halflife=0.75, alpha=None)
|
|
|
|
# not valid: mutually exclusive
|
|
with pytest.raises(ValueError):
|
|
c(com=0.5, alpha=0.5)
|
|
with pytest.raises(ValueError):
|
|
c(span=1.5, halflife=0.75)
|
|
with pytest.raises(ValueError):
|
|
c(alpha=0.5, span=1.5)
|
|
|
|
# not valid: com < 0
|
|
with pytest.raises(ValueError):
|
|
c(com=-0.5)
|
|
|
|
# not valid: span < 1
|
|
with pytest.raises(ValueError):
|
|
c(span=0.5)
|
|
|
|
# not valid: halflife <= 0
|
|
with pytest.raises(ValueError):
|
|
c(halflife=0)
|
|
|
|
# not valid: alpha <= 0 or alpha > 1
|
|
for alpha in (-0.5, 1.5):
|
|
with pytest.raises(ValueError):
|
|
c(alpha=alpha)
|
|
|
|
@pytest.mark.parametrize(
|
|
'method', ['std', 'mean', 'var'])
|
|
def test_numpy_compat(self, method):
|
|
# see gh-12811
|
|
e = rwindow.EWM(Series([2, 4, 6]), alpha=0.5)
|
|
|
|
msg = "numpy operations are not valid with window objects"
|
|
|
|
tm.assert_raises_regex(UnsupportedFunctionCall, msg,
|
|
getattr(e, method), 1, 2, 3)
|
|
tm.assert_raises_regex(UnsupportedFunctionCall, msg,
|
|
getattr(e, method), dtype=np.float64)
|
|
|
|
|
|
# gh-12373 : rolling functions error on float32 data
|
|
# make sure rolling functions works for different dtypes
|
|
#
|
|
# NOTE that these are yielded tests and so _create_data
|
|
# is explicitly called.
|
|
#
|
|
# further note that we are only checking rolling for fully dtype
|
|
# compliance (though both expanding and ewm inherit)
|
|
class Dtype(object):
|
|
window = 2
|
|
|
|
funcs = {
|
|
'count': lambda v: v.count(),
|
|
'max': lambda v: v.max(),
|
|
'min': lambda v: v.min(),
|
|
'sum': lambda v: v.sum(),
|
|
'mean': lambda v: v.mean(),
|
|
'std': lambda v: v.std(),
|
|
'var': lambda v: v.var(),
|
|
'median': lambda v: v.median()
|
|
}
|
|
|
|
def get_expects(self):
|
|
expects = {
|
|
'sr1': {
|
|
'count': Series([1, 2, 2, 2, 2], dtype='float64'),
|
|
'max': Series([np.nan, 1, 2, 3, 4], dtype='float64'),
|
|
'min': Series([np.nan, 0, 1, 2, 3], dtype='float64'),
|
|
'sum': Series([np.nan, 1, 3, 5, 7], dtype='float64'),
|
|
'mean': Series([np.nan, .5, 1.5, 2.5, 3.5], dtype='float64'),
|
|
'std': Series([np.nan] + [np.sqrt(.5)] * 4, dtype='float64'),
|
|
'var': Series([np.nan, .5, .5, .5, .5], dtype='float64'),
|
|
'median': Series([np.nan, .5, 1.5, 2.5, 3.5], dtype='float64')
|
|
},
|
|
'sr2': {
|
|
'count': Series([1, 2, 2, 2, 2], dtype='float64'),
|
|
'max': Series([np.nan, 10, 8, 6, 4], dtype='float64'),
|
|
'min': Series([np.nan, 8, 6, 4, 2], dtype='float64'),
|
|
'sum': Series([np.nan, 18, 14, 10, 6], dtype='float64'),
|
|
'mean': Series([np.nan, 9, 7, 5, 3], dtype='float64'),
|
|
'std': Series([np.nan] + [np.sqrt(2)] * 4, dtype='float64'),
|
|
'var': Series([np.nan, 2, 2, 2, 2], dtype='float64'),
|
|
'median': Series([np.nan, 9, 7, 5, 3], dtype='float64')
|
|
},
|
|
'df': {
|
|
'count': DataFrame({0: Series([1, 2, 2, 2, 2]),
|
|
1: Series([1, 2, 2, 2, 2])},
|
|
dtype='float64'),
|
|
'max': DataFrame({0: Series([np.nan, 2, 4, 6, 8]),
|
|
1: Series([np.nan, 3, 5, 7, 9])},
|
|
dtype='float64'),
|
|
'min': DataFrame({0: Series([np.nan, 0, 2, 4, 6]),
|
|
1: Series([np.nan, 1, 3, 5, 7])},
|
|
dtype='float64'),
|
|
'sum': DataFrame({0: Series([np.nan, 2, 6, 10, 14]),
|
|
1: Series([np.nan, 4, 8, 12, 16])},
|
|
dtype='float64'),
|
|
'mean': DataFrame({0: Series([np.nan, 1, 3, 5, 7]),
|
|
1: Series([np.nan, 2, 4, 6, 8])},
|
|
dtype='float64'),
|
|
'std': DataFrame({0: Series([np.nan] + [np.sqrt(2)] * 4),
|
|
1: Series([np.nan] + [np.sqrt(2)] * 4)},
|
|
dtype='float64'),
|
|
'var': DataFrame({0: Series([np.nan, 2, 2, 2, 2]),
|
|
1: Series([np.nan, 2, 2, 2, 2])},
|
|
dtype='float64'),
|
|
'median': DataFrame({0: Series([np.nan, 1, 3, 5, 7]),
|
|
1: Series([np.nan, 2, 4, 6, 8])},
|
|
dtype='float64'),
|
|
}
|
|
}
|
|
return expects
|
|
|
|
def _create_dtype_data(self, dtype):
|
|
sr1 = Series(np.arange(5), dtype=dtype)
|
|
sr2 = Series(np.arange(10, 0, -2), dtype=dtype)
|
|
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype)
|
|
|
|
data = {
|
|
'sr1': sr1,
|
|
'sr2': sr2,
|
|
'df': df
|
|
}
|
|
|
|
return data
|
|
|
|
def _create_data(self):
|
|
self.data = self._create_dtype_data(self.dtype)
|
|
self.expects = self.get_expects()
|
|
|
|
def test_dtypes(self):
|
|
self._create_data()
|
|
for f_name, d_name in product(self.funcs.keys(), self.data.keys()):
|
|
|
|
f = self.funcs[f_name]
|
|
d = self.data[d_name]
|
|
exp = self.expects[d_name][f_name]
|
|
self.check_dtypes(f, f_name, d, d_name, exp)
|
|
|
|
def check_dtypes(self, f, f_name, d, d_name, exp):
|
|
roll = d.rolling(window=self.window)
|
|
result = f(roll)
|
|
tm.assert_almost_equal(result, exp)
|
|
|
|
|
|
class TestDtype_object(Dtype):
|
|
dtype = object
|
|
|
|
|
|
class Dtype_integer(Dtype):
|
|
pass
|
|
|
|
|
|
class TestDtype_int8(Dtype_integer):
|
|
dtype = np.int8
|
|
|
|
|
|
class TestDtype_int16(Dtype_integer):
|
|
dtype = np.int16
|
|
|
|
|
|
class TestDtype_int32(Dtype_integer):
|
|
dtype = np.int32
|
|
|
|
|
|
class TestDtype_int64(Dtype_integer):
|
|
dtype = np.int64
|
|
|
|
|
|
class Dtype_uinteger(Dtype):
|
|
pass
|
|
|
|
|
|
class TestDtype_uint8(Dtype_uinteger):
|
|
dtype = np.uint8
|
|
|
|
|
|
class TestDtype_uint16(Dtype_uinteger):
|
|
dtype = np.uint16
|
|
|
|
|
|
class TestDtype_uint32(Dtype_uinteger):
|
|
dtype = np.uint32
|
|
|
|
|
|
class TestDtype_uint64(Dtype_uinteger):
|
|
dtype = np.uint64
|
|
|
|
|
|
class Dtype_float(Dtype):
|
|
pass
|
|
|
|
|
|
class TestDtype_float16(Dtype_float):
|
|
dtype = np.float16
|
|
|
|
|
|
class TestDtype_float32(Dtype_float):
|
|
dtype = np.float32
|
|
|
|
|
|
class TestDtype_float64(Dtype_float):
|
|
dtype = np.float64
|
|
|
|
|
|
class TestDtype_category(Dtype):
|
|
dtype = 'category'
|
|
include_df = False
|
|
|
|
def _create_dtype_data(self, dtype):
|
|
sr1 = Series(range(5), dtype=dtype)
|
|
sr2 = Series(range(10, 0, -2), dtype=dtype)
|
|
|
|
data = {
|
|
'sr1': sr1,
|
|
'sr2': sr2
|
|
}
|
|
|
|
return data
|
|
|
|
|
|
class DatetimeLike(Dtype):
|
|
|
|
def check_dtypes(self, f, f_name, d, d_name, exp):
|
|
|
|
roll = d.rolling(window=self.window)
|
|
|
|
if f_name == 'count':
|
|
result = f(roll)
|
|
tm.assert_almost_equal(result, exp)
|
|
|
|
else:
|
|
|
|
# other methods not Implemented ATM
|
|
with pytest.raises(NotImplementedError):
|
|
f(roll)
|
|
|
|
|
|
class TestDtype_timedelta(DatetimeLike):
|
|
dtype = np.dtype('m8[ns]')
|
|
|
|
|
|
class TestDtype_datetime(DatetimeLike):
|
|
dtype = np.dtype('M8[ns]')
|
|
|
|
|
|
class TestDtype_datetime64UTC(DatetimeLike):
|
|
dtype = 'datetime64[ns, UTC]'
|
|
|
|
def _create_data(self):
|
|
pytest.skip("direct creation of extension dtype "
|
|
"datetime64[ns, UTC] is not supported ATM")
|
|
|
|
|
|
class TestMoments(Base):
|
|
|
|
def setup_method(self, method):
|
|
self._create_data()
|
|
|
|
def test_centered_axis_validation(self):
|
|
|
|
# ok
|
|
Series(np.ones(10)).rolling(window=3, center=True, axis=0).mean()
|
|
|
|
# bad axis
|
|
with pytest.raises(ValueError):
|
|
Series(np.ones(10)).rolling(window=3, center=True, axis=1).mean()
|
|
|
|
# ok ok
|
|
DataFrame(np.ones((10, 10))).rolling(window=3, center=True,
|
|
axis=0).mean()
|
|
DataFrame(np.ones((10, 10))).rolling(window=3, center=True,
|
|
axis=1).mean()
|
|
|
|
# bad axis
|
|
with pytest.raises(ValueError):
|
|
(DataFrame(np.ones((10, 10)))
|
|
.rolling(window=3, center=True, axis=2).mean())
|
|
|
|
def test_rolling_sum(self):
|
|
self._check_moment_func(np.nansum, name='sum',
|
|
zero_min_periods_equal=False)
|
|
|
|
def test_rolling_count(self):
|
|
counter = lambda x: np.isfinite(x).astype(float).sum()
|
|
self._check_moment_func(counter, name='count', has_min_periods=False,
|
|
fill_value=0)
|
|
|
|
def test_rolling_mean(self):
|
|
self._check_moment_func(np.mean, name='mean')
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_cmov_mean(self):
|
|
# GH 8238
|
|
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48,
|
|
10.63, 14.48])
|
|
result = Series(vals).rolling(5, center=True).mean()
|
|
expected = Series([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516,
|
|
12.818, 12.952, np.nan, np.nan])
|
|
tm.assert_series_equal(expected, result)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_cmov_window(self):
|
|
# GH 8238
|
|
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48,
|
|
10.63, 14.48])
|
|
result = Series(vals).rolling(5, win_type='boxcar', center=True).mean()
|
|
expected = Series([np.nan, np.nan, 9.962, 11.27, 11.564, 12.516,
|
|
12.818, 12.952, np.nan, np.nan])
|
|
tm.assert_series_equal(expected, result)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_cmov_window_corner(self):
|
|
# GH 8238
|
|
# all nan
|
|
vals = pd.Series([np.nan] * 10)
|
|
result = vals.rolling(5, center=True, win_type='boxcar').mean()
|
|
assert np.isnan(result).all()
|
|
|
|
# empty
|
|
vals = pd.Series([])
|
|
result = vals.rolling(5, center=True, win_type='boxcar').mean()
|
|
assert len(result) == 0
|
|
|
|
# shorter than window
|
|
vals = pd.Series(np.random.randn(5))
|
|
result = vals.rolling(10, win_type='boxcar').mean()
|
|
assert np.isnan(result).all()
|
|
assert len(result) == 5
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_cmov_window_frame(self):
|
|
# Gh 8238
|
|
vals = np.array([[12.18, 3.64], [10.18, 9.16], [13.24, 14.61],
|
|
[4.51, 8.11], [6.15, 11.44], [9.14, 6.21],
|
|
[11.31, 10.67], [2.94, 6.51], [9.42, 8.39], [12.44,
|
|
7.34]])
|
|
|
|
xp = np.array([[np.nan, np.nan], [np.nan, np.nan], [9.252, 9.392],
|
|
[8.644, 9.906], [8.87, 10.208], [6.81, 8.588],
|
|
[7.792, 8.644], [9.05, 7.824], [np.nan, np.nan
|
|
], [np.nan, np.nan]])
|
|
|
|
# DataFrame
|
|
rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).mean()
|
|
tm.assert_frame_equal(DataFrame(xp), rs)
|
|
|
|
# invalid method
|
|
with pytest.raises(AttributeError):
|
|
(DataFrame(vals).rolling(5, win_type='boxcar', center=True)
|
|
.std())
|
|
|
|
# sum
|
|
xp = np.array([[np.nan, np.nan], [np.nan, np.nan], [46.26, 46.96],
|
|
[43.22, 49.53], [44.35, 51.04], [34.05, 42.94],
|
|
[38.96, 43.22], [45.25, 39.12], [np.nan, np.nan
|
|
], [np.nan, np.nan]])
|
|
|
|
rs = DataFrame(vals).rolling(5, win_type='boxcar', center=True).sum()
|
|
tm.assert_frame_equal(DataFrame(xp), rs)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_cmov_window_na_min_periods(self):
|
|
# min_periods
|
|
vals = Series(np.random.randn(10))
|
|
vals[4] = np.nan
|
|
vals[8] = np.nan
|
|
|
|
xp = vals.rolling(5, min_periods=4, center=True).mean()
|
|
rs = vals.rolling(5, win_type='boxcar', min_periods=4,
|
|
center=True).mean()
|
|
tm.assert_series_equal(xp, rs)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_cmov_window_regular(self, win_types):
|
|
# GH 8238
|
|
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48,
|
|
10.63, 14.48])
|
|
xps = {
|
|
'hamming': [np.nan, np.nan, 8.71384, 9.56348, 12.38009, 14.03687,
|
|
13.8567, 11.81473, np.nan, np.nan],
|
|
'triang': [np.nan, np.nan, 9.28667, 10.34667, 12.00556, 13.33889,
|
|
13.38, 12.33667, np.nan, np.nan],
|
|
'barthann': [np.nan, np.nan, 8.4425, 9.1925, 12.5575, 14.3675,
|
|
14.0825, 11.5675, np.nan, np.nan],
|
|
'bohman': [np.nan, np.nan, 7.61599, 9.1764, 12.83559, 14.17267,
|
|
14.65923, 11.10401, np.nan, np.nan],
|
|
'blackmanharris': [np.nan, np.nan, 6.97691, 9.16438, 13.05052,
|
|
14.02156, 15.10512, 10.74574, np.nan, np.nan],
|
|
'nuttall': [np.nan, np.nan, 7.04618, 9.16786, 13.02671, 14.03559,
|
|
15.05657, 10.78514, np.nan, np.nan],
|
|
'blackman': [np.nan, np.nan, 7.73345, 9.17869, 12.79607, 14.20036,
|
|
14.57726, 11.16988, np.nan, np.nan],
|
|
'bartlett': [np.nan, np.nan, 8.4425, 9.1925, 12.5575, 14.3675,
|
|
14.0825, 11.5675, np.nan, np.nan]
|
|
}
|
|
|
|
xp = Series(xps[win_types])
|
|
rs = Series(vals).rolling(5, win_type=win_types, center=True).mean()
|
|
tm.assert_series_equal(xp, rs)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_cmov_window_regular_linear_range(self, win_types):
|
|
# GH 8238
|
|
vals = np.array(range(10), dtype=np.float)
|
|
xp = vals.copy()
|
|
xp[:2] = np.nan
|
|
xp[-2:] = np.nan
|
|
xp = Series(xp)
|
|
|
|
rs = Series(vals).rolling(5, win_type=win_types, center=True).mean()
|
|
tm.assert_series_equal(xp, rs)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_cmov_window_regular_missing_data(self, win_types):
|
|
# GH 8238
|
|
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, np.nan,
|
|
10.63, 14.48])
|
|
xps = {
|
|
'bartlett': [np.nan, np.nan, 9.70333, 10.5225, 8.4425, 9.1925,
|
|
12.5575, 14.3675, 15.61667, 13.655],
|
|
'blackman': [np.nan, np.nan, 9.04582, 11.41536, 7.73345, 9.17869,
|
|
12.79607, 14.20036, 15.8706, 13.655],
|
|
'barthann': [np.nan, np.nan, 9.70333, 10.5225, 8.4425, 9.1925,
|
|
12.5575, 14.3675, 15.61667, 13.655],
|
|
'bohman': [np.nan, np.nan, 8.9444, 11.56327, 7.61599, 9.1764,
|
|
12.83559, 14.17267, 15.90976, 13.655],
|
|
'hamming': [np.nan, np.nan, 9.59321, 10.29694, 8.71384, 9.56348,
|
|
12.38009, 14.20565, 15.24694, 13.69758],
|
|
'nuttall': [np.nan, np.nan, 8.47693, 12.2821, 7.04618, 9.16786,
|
|
13.02671, 14.03673, 16.08759, 13.65553],
|
|
'triang': [np.nan, np.nan, 9.33167, 9.76125, 9.28667, 10.34667,
|
|
12.00556, 13.82125, 14.49429, 13.765],
|
|
'blackmanharris': [np.nan, np.nan, 8.42526, 12.36824, 6.97691,
|
|
9.16438, 13.05052, 14.02175, 16.1098, 13.65509]
|
|
}
|
|
|
|
xp = Series(xps[win_types])
|
|
rs = Series(vals).rolling(5, win_type=win_types, min_periods=3).mean()
|
|
tm.assert_series_equal(xp, rs)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_cmov_window_special(self, win_types_special):
|
|
# GH 8238
|
|
kwds = {
|
|
'kaiser': {'beta': 1.},
|
|
'gaussian': {'std': 1.},
|
|
'general_gaussian': {'power': 2., 'width': 2.}}
|
|
|
|
vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48,
|
|
10.63, 14.48])
|
|
|
|
xps = {
|
|
'gaussian': [np.nan, np.nan, 8.97297, 9.76077, 12.24763, 13.89053,
|
|
13.65671, 12.01002, np.nan, np.nan],
|
|
'general_gaussian': [np.nan, np.nan, 9.85011, 10.71589, 11.73161,
|
|
13.08516, 12.95111, 12.74577, np.nan, np.nan],
|
|
'kaiser': [np.nan, np.nan, 9.86851, 11.02969, 11.65161, 12.75129,
|
|
12.90702, 12.83757, np.nan, np.nan]
|
|
}
|
|
|
|
xp = Series(xps[win_types_special])
|
|
rs = Series(vals).rolling(
|
|
5, win_type=win_types_special, center=True).mean(
|
|
**kwds[win_types_special])
|
|
tm.assert_series_equal(xp, rs)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_cmov_window_special_linear_range(self, win_types_special):
|
|
# GH 8238
|
|
kwds = {
|
|
'kaiser': {'beta': 1.},
|
|
'gaussian': {'std': 1.},
|
|
'general_gaussian': {'power': 2., 'width': 2.},
|
|
'slepian': {'width': 0.5}}
|
|
|
|
vals = np.array(range(10), dtype=np.float)
|
|
xp = vals.copy()
|
|
xp[:2] = np.nan
|
|
xp[-2:] = np.nan
|
|
xp = Series(xp)
|
|
|
|
rs = Series(vals).rolling(
|
|
5, win_type=win_types_special, center=True).mean(
|
|
**kwds[win_types_special])
|
|
tm.assert_series_equal(xp, rs)
|
|
|
|
def test_rolling_median(self):
|
|
self._check_moment_func(np.median, name='median')
|
|
|
|
def test_rolling_min(self):
|
|
self._check_moment_func(np.min, name='min')
|
|
|
|
a = pd.Series([1, 2, 3, 4, 5])
|
|
result = a.rolling(window=100, min_periods=1).min()
|
|
expected = pd.Series(np.ones(len(a)))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
with pytest.raises(ValueError):
|
|
pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).min()
|
|
|
|
def test_rolling_max(self):
|
|
self._check_moment_func(np.max, name='max')
|
|
|
|
a = pd.Series([1, 2, 3, 4, 5], dtype=np.float64)
|
|
b = a.rolling(window=100, min_periods=1).max()
|
|
tm.assert_almost_equal(a, b)
|
|
|
|
with pytest.raises(ValueError):
|
|
pd.Series([1, 2, 3]).rolling(window=3, min_periods=5).max()
|
|
|
|
@pytest.mark.parametrize('q', [0.0, .1, .5, .9, 1.0])
|
|
def test_rolling_quantile(self, q):
|
|
|
|
def scoreatpercentile(a, per):
|
|
values = np.sort(a, axis=0)
|
|
|
|
idx = int(per / 1. * (values.shape[0] - 1))
|
|
|
|
if idx == values.shape[0] - 1:
|
|
retval = values[-1]
|
|
|
|
else:
|
|
qlow = float(idx) / float(values.shape[0] - 1)
|
|
qhig = float(idx + 1) / float(values.shape[0] - 1)
|
|
vlow = values[idx]
|
|
vhig = values[idx + 1]
|
|
retval = vlow + (vhig - vlow) * (per - qlow) / (qhig - qlow)
|
|
|
|
return retval
|
|
|
|
def quantile_func(x):
|
|
return scoreatpercentile(x, q)
|
|
|
|
self._check_moment_func(quantile_func, name='quantile',
|
|
quantile=q)
|
|
|
|
def test_rolling_quantile_np_percentile(self):
|
|
# #9413: Tests that rolling window's quantile default behavior
|
|
# is analogus to Numpy's percentile
|
|
row = 10
|
|
col = 5
|
|
idx = pd.date_range('20100101', periods=row, freq='B')
|
|
df = DataFrame(np.random.rand(row * col).reshape((row, -1)), index=idx)
|
|
|
|
df_quantile = df.quantile([0.25, 0.5, 0.75], axis=0)
|
|
np_percentile = np.percentile(df, [25, 50, 75], axis=0)
|
|
|
|
tm.assert_almost_equal(df_quantile.values, np.array(np_percentile))
|
|
|
|
@pytest.mark.skipif(_np_version_under1p12,
|
|
reason='numpy midpoint interpolation is broken')
|
|
@pytest.mark.parametrize('quantile', [0.0, 0.1, 0.45, 0.5, 1])
|
|
@pytest.mark.parametrize('interpolation', ['linear', 'lower', 'higher',
|
|
'nearest', 'midpoint'])
|
|
@pytest.mark.parametrize('data', [[1., 2., 3., 4., 5., 6., 7.],
|
|
[8., 1., 3., 4., 5., 2., 6., 7.],
|
|
[0., np.nan, 0.2, np.nan, 0.4],
|
|
[np.nan, np.nan, np.nan, np.nan],
|
|
[np.nan, 0.1, np.nan, 0.3, 0.4, 0.5],
|
|
[0.5], [np.nan, 0.7, 0.6]])
|
|
def test_rolling_quantile_interpolation_options(self, quantile,
|
|
interpolation, data):
|
|
# Tests that rolling window's quantile behavior is analogous to
|
|
# Series' quantile for each interpolation option
|
|
s = Series(data)
|
|
|
|
q1 = s.quantile(quantile, interpolation)
|
|
q2 = s.expanding(min_periods=1).quantile(
|
|
quantile, interpolation).iloc[-1]
|
|
|
|
if np.isnan(q1):
|
|
assert np.isnan(q2)
|
|
else:
|
|
assert q1 == q2
|
|
|
|
def test_invalid_quantile_value(self):
|
|
data = np.arange(5)
|
|
s = Series(data)
|
|
|
|
with pytest.raises(ValueError, match="Interpolation 'invalid'"
|
|
" is not supported"):
|
|
s.rolling(len(data), min_periods=1).quantile(
|
|
0.5, interpolation='invalid')
|
|
|
|
def test_rolling_quantile_param(self):
|
|
ser = Series([0.0, .1, .5, .9, 1.0])
|
|
|
|
with pytest.raises(ValueError):
|
|
ser.rolling(3).quantile(-0.1)
|
|
|
|
with pytest.raises(ValueError):
|
|
ser.rolling(3).quantile(10.0)
|
|
|
|
with pytest.raises(TypeError):
|
|
ser.rolling(3).quantile('foo')
|
|
|
|
def test_rolling_apply(self, raw):
|
|
# suppress warnings about empty slices, as we are deliberately testing
|
|
# with a 0-length Series
|
|
|
|
with warnings.catch_warnings():
|
|
warnings.filterwarnings("ignore",
|
|
message=".*(empty slice|0 for slice).*",
|
|
category=RuntimeWarning)
|
|
|
|
def f(x):
|
|
return x[np.isfinite(x)].mean()
|
|
|
|
self._check_moment_func(np.mean, name='apply', func=f, raw=raw)
|
|
|
|
expected = Series([])
|
|
result = expected.rolling(10).apply(lambda x: x.mean(), raw=raw)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# gh-8080
|
|
s = Series([None, None, None])
|
|
result = s.rolling(2, min_periods=0).apply(lambda x: len(x), raw=raw)
|
|
expected = Series([1., 2., 2.])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = s.rolling(2, min_periods=0).apply(len, raw=raw)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize('klass', [Series, DataFrame])
|
|
@pytest.mark.parametrize(
|
|
'method', [lambda x: x.rolling(window=2), lambda x: x.expanding()])
|
|
def test_apply_future_warning(self, klass, method):
|
|
|
|
# gh-5071
|
|
s = klass(np.arange(3))
|
|
|
|
with tm.assert_produces_warning(FutureWarning):
|
|
method(s).apply(lambda x: len(x))
|
|
|
|
def test_rolling_apply_out_of_bounds(self, raw):
|
|
# gh-1850
|
|
vals = pd.Series([1, 2, 3, 4])
|
|
|
|
result = vals.rolling(10).apply(np.sum, raw=raw)
|
|
assert result.isna().all()
|
|
|
|
result = vals.rolling(10, min_periods=1).apply(np.sum, raw=raw)
|
|
expected = pd.Series([1, 3, 6, 10], dtype=float)
|
|
tm.assert_almost_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize('window', [2, '2s'])
|
|
def test_rolling_apply_with_pandas_objects(self, window):
|
|
# 5071
|
|
df = pd.DataFrame({'A': np.random.randn(5),
|
|
'B': np.random.randint(0, 10, size=5)},
|
|
index=pd.date_range('20130101', periods=5, freq='s'))
|
|
|
|
# we have an equal spaced timeseries index
|
|
# so simulate removing the first period
|
|
def f(x):
|
|
if x.index[0] == df.index[0]:
|
|
return np.nan
|
|
return x.iloc[-1]
|
|
|
|
result = df.rolling(window).apply(f, raw=False)
|
|
expected = df.iloc[2:].reindex_like(df)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
with pytest.raises(AttributeError):
|
|
df.rolling(window).apply(f, raw=True)
|
|
|
|
def test_rolling_std(self):
|
|
self._check_moment_func(lambda x: np.std(x, ddof=1),
|
|
name='std')
|
|
self._check_moment_func(lambda x: np.std(x, ddof=0),
|
|
name='std', ddof=0)
|
|
|
|
def test_rolling_std_1obs(self):
|
|
vals = pd.Series([1., 2., 3., 4., 5.])
|
|
|
|
result = vals.rolling(1, min_periods=1).std()
|
|
expected = pd.Series([np.nan] * 5)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = vals.rolling(1, min_periods=1).std(ddof=0)
|
|
expected = pd.Series([0.] * 5)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = (pd.Series([np.nan, np.nan, 3, 4, 5])
|
|
.rolling(3, min_periods=2).std())
|
|
assert np.isnan(result[2])
|
|
|
|
def test_rolling_std_neg_sqrt(self):
|
|
# unit test from Bottleneck
|
|
|
|
# Test move_nanstd for neg sqrt.
|
|
|
|
a = pd.Series([0.0011448196318903589, 0.00028718669878572767,
|
|
0.00028718669878572767, 0.00028718669878572767,
|
|
0.00028718669878572767])
|
|
b = a.rolling(window=3).std()
|
|
assert np.isfinite(b[2:]).all()
|
|
|
|
b = a.ewm(span=3).std()
|
|
assert np.isfinite(b[2:]).all()
|
|
|
|
def test_rolling_var(self):
|
|
self._check_moment_func(lambda x: np.var(x, ddof=1),
|
|
name='var')
|
|
self._check_moment_func(lambda x: np.var(x, ddof=0),
|
|
name='var', ddof=0)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_rolling_skew(self):
|
|
from scipy.stats import skew
|
|
self._check_moment_func(lambda x: skew(x, bias=False), name='skew')
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_rolling_kurt(self):
|
|
from scipy.stats import kurtosis
|
|
self._check_moment_func(lambda x: kurtosis(x, bias=False),
|
|
name='kurt')
|
|
|
|
def _check_moment_func(self, static_comp, name, has_min_periods=True,
|
|
has_center=True, has_time_rule=True,
|
|
fill_value=None, zero_min_periods_equal=True,
|
|
**kwargs):
|
|
|
|
def get_result(obj, window, min_periods=None, center=False):
|
|
r = obj.rolling(window=window, min_periods=min_periods,
|
|
center=center)
|
|
return getattr(r, name)(**kwargs)
|
|
|
|
series_result = get_result(self.series, window=50)
|
|
assert isinstance(series_result, Series)
|
|
tm.assert_almost_equal(series_result.iloc[-1],
|
|
static_comp(self.series[-50:]))
|
|
|
|
frame_result = get_result(self.frame, window=50)
|
|
assert isinstance(frame_result, DataFrame)
|
|
tm.assert_series_equal(
|
|
frame_result.iloc[-1, :],
|
|
self.frame.iloc[-50:, :].apply(static_comp, axis=0, raw=raw),
|
|
check_names=False)
|
|
|
|
# check time_rule works
|
|
if has_time_rule:
|
|
win = 25
|
|
minp = 10
|
|
series = self.series[::2].resample('B').mean()
|
|
frame = self.frame[::2].resample('B').mean()
|
|
|
|
if has_min_periods:
|
|
series_result = get_result(series, window=win,
|
|
min_periods=minp)
|
|
frame_result = get_result(frame, window=win,
|
|
min_periods=minp)
|
|
else:
|
|
series_result = get_result(series, window=win)
|
|
frame_result = get_result(frame, window=win)
|
|
|
|
last_date = series_result.index[-1]
|
|
prev_date = last_date - 24 * offsets.BDay()
|
|
|
|
trunc_series = self.series[::2].truncate(prev_date, last_date)
|
|
trunc_frame = self.frame[::2].truncate(prev_date, last_date)
|
|
|
|
tm.assert_almost_equal(series_result[-1],
|
|
static_comp(trunc_series))
|
|
|
|
tm.assert_series_equal(frame_result.xs(last_date),
|
|
trunc_frame.apply(static_comp, raw=raw),
|
|
check_names=False)
|
|
|
|
# excluding NaNs correctly
|
|
obj = Series(randn(50))
|
|
obj[:10] = np.NaN
|
|
obj[-10:] = np.NaN
|
|
if has_min_periods:
|
|
result = get_result(obj, 50, min_periods=30)
|
|
tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10]))
|
|
|
|
# min_periods is working correctly
|
|
result = get_result(obj, 20, min_periods=15)
|
|
assert isna(result.iloc[23])
|
|
assert not isna(result.iloc[24])
|
|
|
|
assert not isna(result.iloc[-6])
|
|
assert isna(result.iloc[-5])
|
|
|
|
obj2 = Series(randn(20))
|
|
result = get_result(obj2, 10, min_periods=5)
|
|
assert isna(result.iloc[3])
|
|
assert notna(result.iloc[4])
|
|
|
|
if zero_min_periods_equal:
|
|
# min_periods=0 may be equivalent to min_periods=1
|
|
result0 = get_result(obj, 20, min_periods=0)
|
|
result1 = get_result(obj, 20, min_periods=1)
|
|
tm.assert_almost_equal(result0, result1)
|
|
else:
|
|
result = get_result(obj, 50)
|
|
tm.assert_almost_equal(result.iloc[-1], static_comp(obj[10:-10]))
|
|
|
|
# window larger than series length (#7297)
|
|
if has_min_periods:
|
|
for minp in (0, len(self.series) - 1, len(self.series)):
|
|
result = get_result(self.series, len(self.series) + 1,
|
|
min_periods=minp)
|
|
expected = get_result(self.series, len(self.series),
|
|
min_periods=minp)
|
|
nan_mask = isna(result)
|
|
tm.assert_series_equal(nan_mask, isna(expected))
|
|
|
|
nan_mask = ~nan_mask
|
|
tm.assert_almost_equal(result[nan_mask],
|
|
expected[nan_mask])
|
|
else:
|
|
result = get_result(self.series, len(self.series) + 1)
|
|
expected = get_result(self.series, len(self.series))
|
|
nan_mask = isna(result)
|
|
tm.assert_series_equal(nan_mask, isna(expected))
|
|
|
|
nan_mask = ~nan_mask
|
|
tm.assert_almost_equal(result[nan_mask], expected[nan_mask])
|
|
|
|
# check center=True
|
|
if has_center:
|
|
if has_min_periods:
|
|
result = get_result(obj, 20, min_periods=15, center=True)
|
|
expected = get_result(
|
|
pd.concat([obj, Series([np.NaN] * 9)]), 20,
|
|
min_periods=15)[9:].reset_index(drop=True)
|
|
else:
|
|
result = get_result(obj, 20, center=True)
|
|
expected = get_result(
|
|
pd.concat([obj, Series([np.NaN] * 9)]),
|
|
20)[9:].reset_index(drop=True)
|
|
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# shifter index
|
|
s = ['x%d' % x for x in range(12)]
|
|
|
|
if has_min_periods:
|
|
minp = 10
|
|
|
|
series_xp = get_result(
|
|
self.series.reindex(list(self.series.index) + s),
|
|
window=25,
|
|
min_periods=minp).shift(-12).reindex(self.series.index)
|
|
frame_xp = get_result(
|
|
self.frame.reindex(list(self.frame.index) + s),
|
|
window=25,
|
|
min_periods=minp).shift(-12).reindex(self.frame.index)
|
|
|
|
series_rs = get_result(self.series, window=25,
|
|
min_periods=minp, center=True)
|
|
frame_rs = get_result(self.frame, window=25, min_periods=minp,
|
|
center=True)
|
|
|
|
else:
|
|
series_xp = get_result(
|
|
self.series.reindex(list(self.series.index) + s),
|
|
window=25).shift(-12).reindex(self.series.index)
|
|
frame_xp = get_result(
|
|
self.frame.reindex(list(self.frame.index) + s),
|
|
window=25).shift(-12).reindex(self.frame.index)
|
|
|
|
series_rs = get_result(self.series, window=25, center=True)
|
|
frame_rs = get_result(self.frame, window=25, center=True)
|
|
|
|
if fill_value is not None:
|
|
series_xp = series_xp.fillna(fill_value)
|
|
frame_xp = frame_xp.fillna(fill_value)
|
|
tm.assert_series_equal(series_xp, series_rs)
|
|
tm.assert_frame_equal(frame_xp, frame_rs)
|
|
|
|
def test_ewma(self):
|
|
self._check_ew(name='mean')
|
|
|
|
vals = pd.Series(np.zeros(1000))
|
|
vals[5] = 1
|
|
result = vals.ewm(span=100, adjust=False).mean().sum()
|
|
assert np.abs(result - 1) < 1e-2
|
|
|
|
@pytest.mark.parametrize('adjust', [True, False])
|
|
@pytest.mark.parametrize('ignore_na', [True, False])
|
|
def test_ewma_cases(self, adjust, ignore_na):
|
|
# try adjust/ignore_na args matrix
|
|
|
|
s = Series([1.0, 2.0, 4.0, 8.0])
|
|
|
|
if adjust:
|
|
expected = Series([1.0, 1.6, 2.736842, 4.923077])
|
|
else:
|
|
expected = Series([1.0, 1.333333, 2.222222, 4.148148])
|
|
|
|
result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_ewma_nan_handling(self):
|
|
s = Series([1.] + [np.nan] * 5 + [1.])
|
|
result = s.ewm(com=5).mean()
|
|
tm.assert_series_equal(result, Series([1.] * len(s)))
|
|
|
|
s = Series([np.nan] * 2 + [1.] + [np.nan] * 2 + [1.])
|
|
result = s.ewm(com=5).mean()
|
|
tm.assert_series_equal(result, Series([np.nan] * 2 + [1.] * 4))
|
|
|
|
# GH 7603
|
|
s0 = Series([np.nan, 1., 101.])
|
|
s1 = Series([1., np.nan, 101.])
|
|
s2 = Series([np.nan, 1., np.nan, np.nan, 101., np.nan])
|
|
s3 = Series([1., np.nan, 101., 50.])
|
|
com = 2.
|
|
alpha = 1. / (1. + com)
|
|
|
|
def simple_wma(s, w):
|
|
return (s.multiply(w).cumsum() / w.cumsum()).fillna(method='ffill')
|
|
|
|
for (s, adjust, ignore_na, w) in [
|
|
(s0, True, False, [np.nan, (1. - alpha), 1.]),
|
|
(s0, True, True, [np.nan, (1. - alpha), 1.]),
|
|
(s0, False, False, [np.nan, (1. - alpha), alpha]),
|
|
(s0, False, True, [np.nan, (1. - alpha), alpha]),
|
|
(s1, True, False, [(1. - alpha) ** 2, np.nan, 1.]),
|
|
(s1, True, True, [(1. - alpha), np.nan, 1.]),
|
|
(s1, False, False, [(1. - alpha) ** 2, np.nan, alpha]),
|
|
(s1, False, True, [(1. - alpha), np.nan, alpha]),
|
|
(s2, True, False, [np.nan, (1. - alpha) **
|
|
3, np.nan, np.nan, 1., np.nan]),
|
|
(s2, True, True, [np.nan, (1. - alpha),
|
|
np.nan, np.nan, 1., np.nan]),
|
|
(s2, False, False, [np.nan, (1. - alpha) **
|
|
3, np.nan, np.nan, alpha, np.nan]),
|
|
(s2, False, True, [np.nan, (1. - alpha),
|
|
np.nan, np.nan, alpha, np.nan]),
|
|
(s3, True, False, [(1. - alpha) **
|
|
3, np.nan, (1. - alpha), 1.]),
|
|
(s3, True, True, [(1. - alpha) **
|
|
2, np.nan, (1. - alpha), 1.]),
|
|
(s3, False, False, [(1. - alpha) ** 3, np.nan,
|
|
(1. - alpha) * alpha,
|
|
alpha * ((1. - alpha) ** 2 + alpha)]),
|
|
(s3, False, True, [(1. - alpha) ** 2,
|
|
np.nan, (1. - alpha) * alpha, alpha])]:
|
|
expected = simple_wma(s, Series(w))
|
|
result = s.ewm(com=com, adjust=adjust, ignore_na=ignore_na).mean()
|
|
|
|
tm.assert_series_equal(result, expected)
|
|
if ignore_na is False:
|
|
# check that ignore_na defaults to False
|
|
result = s.ewm(com=com, adjust=adjust).mean()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_ewmvar(self):
|
|
self._check_ew(name='var')
|
|
|
|
def test_ewmvol(self):
|
|
self._check_ew(name='vol')
|
|
|
|
def test_ewma_span_com_args(self):
|
|
A = self.series.ewm(com=9.5).mean()
|
|
B = self.series.ewm(span=20).mean()
|
|
tm.assert_almost_equal(A, B)
|
|
|
|
with pytest.raises(ValueError):
|
|
self.series.ewm(com=9.5, span=20)
|
|
with pytest.raises(ValueError):
|
|
self.series.ewm().mean()
|
|
|
|
def test_ewma_halflife_arg(self):
|
|
A = self.series.ewm(com=13.932726172912965).mean()
|
|
B = self.series.ewm(halflife=10.0).mean()
|
|
tm.assert_almost_equal(A, B)
|
|
|
|
with pytest.raises(ValueError):
|
|
self.series.ewm(span=20, halflife=50)
|
|
with pytest.raises(ValueError):
|
|
self.series.ewm(com=9.5, halflife=50)
|
|
with pytest.raises(ValueError):
|
|
self.series.ewm(com=9.5, span=20, halflife=50)
|
|
with pytest.raises(ValueError):
|
|
self.series.ewm()
|
|
|
|
def test_ewm_alpha(self):
|
|
# GH 10789
|
|
s = Series(self.arr)
|
|
a = s.ewm(alpha=0.61722699889169674).mean()
|
|
b = s.ewm(com=0.62014947789973052).mean()
|
|
c = s.ewm(span=2.240298955799461).mean()
|
|
d = s.ewm(halflife=0.721792864318).mean()
|
|
tm.assert_series_equal(a, b)
|
|
tm.assert_series_equal(a, c)
|
|
tm.assert_series_equal(a, d)
|
|
|
|
def test_ewm_alpha_arg(self):
|
|
# GH 10789
|
|
s = self.series
|
|
with pytest.raises(ValueError):
|
|
s.ewm()
|
|
with pytest.raises(ValueError):
|
|
s.ewm(com=10.0, alpha=0.5)
|
|
with pytest.raises(ValueError):
|
|
s.ewm(span=10.0, alpha=0.5)
|
|
with pytest.raises(ValueError):
|
|
s.ewm(halflife=10.0, alpha=0.5)
|
|
|
|
def test_ewm_domain_checks(self):
|
|
# GH 12492
|
|
s = Series(self.arr)
|
|
# com must satisfy: com >= 0
|
|
pytest.raises(ValueError, s.ewm, com=-0.1)
|
|
s.ewm(com=0.0)
|
|
s.ewm(com=0.1)
|
|
# span must satisfy: span >= 1
|
|
pytest.raises(ValueError, s.ewm, span=-0.1)
|
|
pytest.raises(ValueError, s.ewm, span=0.0)
|
|
pytest.raises(ValueError, s.ewm, span=0.9)
|
|
s.ewm(span=1.0)
|
|
s.ewm(span=1.1)
|
|
# halflife must satisfy: halflife > 0
|
|
pytest.raises(ValueError, s.ewm, halflife=-0.1)
|
|
pytest.raises(ValueError, s.ewm, halflife=0.0)
|
|
s.ewm(halflife=0.1)
|
|
# alpha must satisfy: 0 < alpha <= 1
|
|
pytest.raises(ValueError, s.ewm, alpha=-0.1)
|
|
pytest.raises(ValueError, s.ewm, alpha=0.0)
|
|
s.ewm(alpha=0.1)
|
|
s.ewm(alpha=1.0)
|
|
pytest.raises(ValueError, s.ewm, alpha=1.1)
|
|
|
|
@pytest.mark.parametrize('method', ['mean', 'vol', 'var'])
|
|
def test_ew_empty_series(self, method):
|
|
vals = pd.Series([], dtype=np.float64)
|
|
|
|
ewm = vals.ewm(3)
|
|
result = getattr(ewm, method)()
|
|
tm.assert_almost_equal(result, vals)
|
|
|
|
def _check_ew(self, name=None, preserve_nan=False):
|
|
series_result = getattr(self.series.ewm(com=10), name)()
|
|
assert isinstance(series_result, Series)
|
|
|
|
frame_result = getattr(self.frame.ewm(com=10), name)()
|
|
assert type(frame_result) == DataFrame
|
|
|
|
result = getattr(self.series.ewm(com=10), name)()
|
|
if preserve_nan:
|
|
assert result[self._nan_locs].isna().all()
|
|
|
|
# excluding NaNs correctly
|
|
arr = randn(50)
|
|
arr[:10] = np.NaN
|
|
arr[-10:] = np.NaN
|
|
s = Series(arr)
|
|
|
|
# check min_periods
|
|
# GH 7898
|
|
result = getattr(s.ewm(com=50, min_periods=2), name)()
|
|
assert result[:11].isna().all()
|
|
assert not result[11:].isna().any()
|
|
|
|
for min_periods in (0, 1):
|
|
result = getattr(s.ewm(com=50, min_periods=min_periods), name)()
|
|
if name == 'mean':
|
|
assert result[:10].isna().all()
|
|
assert not result[10:].isna().any()
|
|
else:
|
|
# ewm.std, ewm.vol, ewm.var (with bias=False) require at least
|
|
# two values
|
|
assert result[:11].isna().all()
|
|
assert not result[11:].isna().any()
|
|
|
|
# check series of length 0
|
|
result = getattr(Series().ewm(com=50, min_periods=min_periods),
|
|
name)()
|
|
tm.assert_series_equal(result, Series())
|
|
|
|
# check series of length 1
|
|
result = getattr(Series([1.]).ewm(50, min_periods=min_periods),
|
|
name)()
|
|
if name == 'mean':
|
|
tm.assert_series_equal(result, Series([1.]))
|
|
else:
|
|
# ewm.std, ewm.vol, ewm.var with bias=False require at least
|
|
# two values
|
|
tm.assert_series_equal(result, Series([np.NaN]))
|
|
|
|
# pass in ints
|
|
result2 = getattr(Series(np.arange(50)).ewm(span=10), name)()
|
|
assert result2.dtype == np.float_
|
|
|
|
|
|
class TestPairwise(object):
|
|
|
|
# GH 7738
|
|
df1s = [DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0, 1]),
|
|
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 0]),
|
|
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1, 1]),
|
|
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]],
|
|
columns=['C', 'C']),
|
|
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[1., 0]),
|
|
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=[0., 1]),
|
|
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1]], columns=['C', 1]),
|
|
DataFrame([[2., 4.], [1., 2.], [5., 2.], [8., 1.]],
|
|
columns=[1, 0.]),
|
|
DataFrame([[2, 4.], [1, 2.], [5, 2.], [8, 1.]],
|
|
columns=[0, 1.]),
|
|
DataFrame([[2, 4], [1, 2], [5, 2], [8, 1.]],
|
|
columns=[1., 'X']), ]
|
|
df2 = DataFrame([[None, 1, 1], [None, 1, 2],
|
|
[None, 3, 2], [None, 8, 1]], columns=['Y', 'Z', 'X'])
|
|
s = Series([1, 1, 3, 8])
|
|
|
|
def compare(self, result, expected):
|
|
|
|
# since we have sorted the results
|
|
# we can only compare non-nans
|
|
result = result.dropna().values
|
|
expected = expected.dropna().values
|
|
|
|
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
|
|
|
@pytest.mark.parametrize('f', [lambda x: x.cov(), lambda x: x.corr()])
|
|
def test_no_flex(self, f):
|
|
|
|
# DataFrame methods (which do not call _flex_binary_moment())
|
|
|
|
results = [f(df) for df in self.df1s]
|
|
for (df, result) in zip(self.df1s, results):
|
|
tm.assert_index_equal(result.index, df.columns)
|
|
tm.assert_index_equal(result.columns, df.columns)
|
|
for i, result in enumerate(results):
|
|
if i > 0:
|
|
self.compare(result, results[0])
|
|
|
|
@pytest.mark.parametrize(
|
|
'f', [lambda x: x.expanding().cov(pairwise=True),
|
|
lambda x: x.expanding().corr(pairwise=True),
|
|
lambda x: x.rolling(window=3).cov(pairwise=True),
|
|
lambda x: x.rolling(window=3).corr(pairwise=True),
|
|
lambda x: x.ewm(com=3).cov(pairwise=True),
|
|
lambda x: x.ewm(com=3).corr(pairwise=True)])
|
|
def test_pairwise_with_self(self, f):
|
|
|
|
# DataFrame with itself, pairwise=True
|
|
# note that we may construct the 1st level of the MI
|
|
# in a non-motononic way, so compare accordingly
|
|
results = []
|
|
for i, df in enumerate(self.df1s):
|
|
result = f(df)
|
|
tm.assert_index_equal(result.index.levels[0],
|
|
df.index,
|
|
check_names=False)
|
|
tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
|
|
safe_sort(df.columns.unique()))
|
|
tm.assert_index_equal(result.columns, df.columns)
|
|
results.append(df)
|
|
|
|
for i, result in enumerate(results):
|
|
if i > 0:
|
|
self.compare(result, results[0])
|
|
|
|
@pytest.mark.parametrize(
|
|
'f', [lambda x: x.expanding().cov(pairwise=False),
|
|
lambda x: x.expanding().corr(pairwise=False),
|
|
lambda x: x.rolling(window=3).cov(pairwise=False),
|
|
lambda x: x.rolling(window=3).corr(pairwise=False),
|
|
lambda x: x.ewm(com=3).cov(pairwise=False),
|
|
lambda x: x.ewm(com=3).corr(pairwise=False), ])
|
|
def test_no_pairwise_with_self(self, f):
|
|
|
|
# DataFrame with itself, pairwise=False
|
|
results = [f(df) for df in self.df1s]
|
|
for (df, result) in zip(self.df1s, results):
|
|
tm.assert_index_equal(result.index, df.index)
|
|
tm.assert_index_equal(result.columns, df.columns)
|
|
for i, result in enumerate(results):
|
|
if i > 0:
|
|
self.compare(result, results[0])
|
|
|
|
@pytest.mark.parametrize(
|
|
'f', [lambda x, y: x.expanding().cov(y, pairwise=True),
|
|
lambda x, y: x.expanding().corr(y, pairwise=True),
|
|
lambda x, y: x.rolling(window=3).cov(y, pairwise=True),
|
|
lambda x, y: x.rolling(window=3).corr(y, pairwise=True),
|
|
lambda x, y: x.ewm(com=3).cov(y, pairwise=True),
|
|
lambda x, y: x.ewm(com=3).corr(y, pairwise=True), ])
|
|
def test_pairwise_with_other(self, f):
|
|
|
|
# DataFrame with another DataFrame, pairwise=True
|
|
results = [f(df, self.df2) for df in self.df1s]
|
|
for (df, result) in zip(self.df1s, results):
|
|
tm.assert_index_equal(result.index.levels[0],
|
|
df.index,
|
|
check_names=False)
|
|
tm.assert_numpy_array_equal(safe_sort(result.index.levels[1]),
|
|
safe_sort(self.df2.columns.unique()))
|
|
for i, result in enumerate(results):
|
|
if i > 0:
|
|
self.compare(result, results[0])
|
|
|
|
@pytest.mark.parametrize(
|
|
'f', [lambda x, y: x.expanding().cov(y, pairwise=False),
|
|
lambda x, y: x.expanding().corr(y, pairwise=False),
|
|
lambda x, y: x.rolling(window=3).cov(y, pairwise=False),
|
|
lambda x, y: x.rolling(window=3).corr(y, pairwise=False),
|
|
lambda x, y: x.ewm(com=3).cov(y, pairwise=False),
|
|
lambda x, y: x.ewm(com=3).corr(y, pairwise=False), ])
|
|
def test_no_pairwise_with_other(self, f):
|
|
|
|
# DataFrame with another DataFrame, pairwise=False
|
|
results = [f(df, self.df2) if df.columns.is_unique else None
|
|
for df in self.df1s]
|
|
for (df, result) in zip(self.df1s, results):
|
|
if result is not None:
|
|
with catch_warnings(record=True):
|
|
# we can have int and str columns
|
|
expected_index = df.index.union(self.df2.index)
|
|
expected_columns = df.columns.union(self.df2.columns)
|
|
tm.assert_index_equal(result.index, expected_index)
|
|
tm.assert_index_equal(result.columns, expected_columns)
|
|
else:
|
|
tm.assert_raises_regex(
|
|
ValueError, "'arg1' columns are not unique", f, df,
|
|
self.df2)
|
|
tm.assert_raises_regex(
|
|
ValueError, "'arg2' columns are not unique", f,
|
|
self.df2, df)
|
|
|
|
@pytest.mark.parametrize(
|
|
'f', [lambda x, y: x.expanding().cov(y),
|
|
lambda x, y: x.expanding().corr(y),
|
|
lambda x, y: x.rolling(window=3).cov(y),
|
|
lambda x, y: x.rolling(window=3).corr(y),
|
|
lambda x, y: x.ewm(com=3).cov(y),
|
|
lambda x, y: x.ewm(com=3).corr(y), ])
|
|
def test_pairwise_with_series(self, f):
|
|
|
|
# DataFrame with a Series
|
|
results = ([f(df, self.s) for df in self.df1s] +
|
|
[f(self.s, df) for df in self.df1s])
|
|
for (df, result) in zip(self.df1s, results):
|
|
tm.assert_index_equal(result.index, df.index)
|
|
tm.assert_index_equal(result.columns, df.columns)
|
|
for i, result in enumerate(results):
|
|
if i > 0:
|
|
self.compare(result, results[0])
|
|
|
|
|
|
# create the data only once as we are not setting it
|
|
def _create_consistency_data():
|
|
def create_series():
|
|
return [Series(),
|
|
Series([np.nan]),
|
|
Series([np.nan, np.nan]),
|
|
Series([3.]),
|
|
Series([np.nan, 3.]),
|
|
Series([3., np.nan]),
|
|
Series([1., 3.]),
|
|
Series([2., 2.]),
|
|
Series([3., 1.]),
|
|
Series([5., 5., 5., 5., np.nan, np.nan, np.nan, 5., 5., np.nan,
|
|
np.nan]),
|
|
Series([np.nan, 5., 5., 5., np.nan, np.nan, np.nan, 5., 5.,
|
|
np.nan, np.nan]),
|
|
Series([np.nan, np.nan, 5., 5., np.nan, np.nan, np.nan, 5., 5.,
|
|
np.nan, np.nan]),
|
|
Series([np.nan, 3., np.nan, 3., 4., 5., 6., np.nan, np.nan, 7.,
|
|
12., 13., 14., 15.]),
|
|
Series([np.nan, 5., np.nan, 2., 4., 0., 9., np.nan, np.nan, 3.,
|
|
12., 13., 14., 15.]),
|
|
Series([2., 3., np.nan, 3., 4., 5., 6., np.nan, np.nan, 7.,
|
|
12., 13., 14., 15.]),
|
|
Series([2., 5., np.nan, 2., 4., 0., 9., np.nan, np.nan, 3.,
|
|
12., 13., 14., 15.]),
|
|
Series(range(10)),
|
|
Series(range(20, 0, -2)), ]
|
|
|
|
def create_dataframes():
|
|
return ([DataFrame(),
|
|
DataFrame(columns=['a']),
|
|
DataFrame(columns=['a', 'a']),
|
|
DataFrame(columns=['a', 'b']),
|
|
DataFrame(np.arange(10).reshape((5, 2))),
|
|
DataFrame(np.arange(25).reshape((5, 5))),
|
|
DataFrame(np.arange(25).reshape((5, 5)),
|
|
columns=['a', 'b', 99, 'd', 'd'])] +
|
|
[DataFrame(s) for s in create_series()])
|
|
|
|
def is_constant(x):
|
|
values = x.values.ravel()
|
|
return len(set(values[notna(values)])) == 1
|
|
|
|
def no_nans(x):
|
|
return x.notna().all().all()
|
|
|
|
# data is a tuple(object, is_contant, no_nans)
|
|
data = create_series() + create_dataframes()
|
|
|
|
return [(x, is_constant(x), no_nans(x)) for x in data]
|
|
|
|
|
|
_consistency_data = _create_consistency_data()
|
|
|
|
|
|
def _rolling_consistency_cases():
|
|
for window in [1, 2, 3, 10, 20]:
|
|
for min_periods in set([0, 1, 2, 3, 4, window]):
|
|
if min_periods and (min_periods > window):
|
|
continue
|
|
for center in [False, True]:
|
|
yield window, min_periods, center
|
|
|
|
|
|
class TestMomentsConsistency(Base):
|
|
base_functions = [
|
|
(lambda v: Series(v).count(), None, 'count'),
|
|
(lambda v: Series(v).max(), None, 'max'),
|
|
(lambda v: Series(v).min(), None, 'min'),
|
|
(lambda v: Series(v).sum(), None, 'sum'),
|
|
(lambda v: Series(v).mean(), None, 'mean'),
|
|
(lambda v: Series(v).std(), 1, 'std'),
|
|
(lambda v: Series(v).cov(Series(v)), None, 'cov'),
|
|
(lambda v: Series(v).corr(Series(v)), None, 'corr'),
|
|
(lambda v: Series(v).var(), 1, 'var'),
|
|
|
|
# restore once GH 8086 is fixed
|
|
# lambda v: Series(v).skew(), 3, 'skew'),
|
|
# (lambda v: Series(v).kurt(), 4, 'kurt'),
|
|
|
|
# restore once GH 8084 is fixed
|
|
# lambda v: Series(v).quantile(0.3), None, 'quantile'),
|
|
|
|
(lambda v: Series(v).median(), None, 'median'),
|
|
(np.nanmax, 1, 'max'),
|
|
(np.nanmin, 1, 'min'),
|
|
(np.nansum, 1, 'sum'),
|
|
(np.nanmean, 1, 'mean'),
|
|
(lambda v: np.nanstd(v, ddof=1), 1, 'std'),
|
|
(lambda v: np.nanvar(v, ddof=1), 1, 'var'),
|
|
(np.nanmedian, 1, 'median'),
|
|
]
|
|
no_nan_functions = [
|
|
(np.max, None, 'max'),
|
|
(np.min, None, 'min'),
|
|
(np.sum, None, 'sum'),
|
|
(np.mean, None, 'mean'),
|
|
(lambda v: np.std(v, ddof=1), 1, 'std'),
|
|
(lambda v: np.var(v, ddof=1), 1, 'var'),
|
|
(np.median, None, 'median'),
|
|
]
|
|
|
|
def _create_data(self):
|
|
super(TestMomentsConsistency, self)._create_data()
|
|
self.data = _consistency_data
|
|
|
|
def setup_method(self, method):
|
|
self._create_data()
|
|
|
|
def _test_moments_consistency(self, min_periods, count, mean, mock_mean,
|
|
corr, var_unbiased=None, std_unbiased=None,
|
|
cov_unbiased=None, var_biased=None,
|
|
std_biased=None, cov_biased=None,
|
|
var_debiasing_factors=None):
|
|
def _non_null_values(x):
|
|
values = x.values.ravel()
|
|
return set(values[notna(values)].tolist())
|
|
|
|
for (x, is_constant, no_nans) in self.data:
|
|
count_x = count(x)
|
|
mean_x = mean(x)
|
|
|
|
if mock_mean:
|
|
# check that mean equals mock_mean
|
|
expected = mock_mean(x)
|
|
assert_equal(mean_x, expected.astype('float64'))
|
|
|
|
# check that correlation of a series with itself is either 1 or NaN
|
|
corr_x_x = corr(x, x)
|
|
|
|
# assert _non_null_values(corr_x_x).issubset(set([1.]))
|
|
# restore once rolling_cov(x, x) is identically equal to var(x)
|
|
|
|
if is_constant:
|
|
exp = x.max() if isinstance(x, Series) else x.max().max()
|
|
|
|
# check mean of constant series
|
|
expected = x * np.nan
|
|
expected[count_x >= max(min_periods, 1)] = exp
|
|
assert_equal(mean_x, expected)
|
|
|
|
# check correlation of constant series with itself is NaN
|
|
expected[:] = np.nan
|
|
assert_equal(corr_x_x, expected)
|
|
|
|
if var_unbiased and var_biased and var_debiasing_factors:
|
|
# check variance debiasing factors
|
|
var_unbiased_x = var_unbiased(x)
|
|
var_biased_x = var_biased(x)
|
|
var_debiasing_factors_x = var_debiasing_factors(x)
|
|
assert_equal(var_unbiased_x, var_biased_x *
|
|
var_debiasing_factors_x)
|
|
|
|
for (std, var, cov) in [(std_biased, var_biased, cov_biased),
|
|
(std_unbiased, var_unbiased, cov_unbiased)
|
|
]:
|
|
|
|
# check that var(x), std(x), and cov(x) are all >= 0
|
|
var_x = var(x)
|
|
std_x = std(x)
|
|
assert not (var_x < 0).any().any()
|
|
assert not (std_x < 0).any().any()
|
|
if cov:
|
|
cov_x_x = cov(x, x)
|
|
assert not (cov_x_x < 0).any().any()
|
|
|
|
# check that var(x) == cov(x, x)
|
|
assert_equal(var_x, cov_x_x)
|
|
|
|
# check that var(x) == std(x)^2
|
|
assert_equal(var_x, std_x * std_x)
|
|
|
|
if var is var_biased:
|
|
# check that biased var(x) == mean(x^2) - mean(x)^2
|
|
mean_x2 = mean(x * x)
|
|
assert_equal(var_x, mean_x2 - (mean_x * mean_x))
|
|
|
|
if is_constant:
|
|
# check that variance of constant series is identically 0
|
|
assert not (var_x > 0).any().any()
|
|
expected = x * np.nan
|
|
expected[count_x >= max(min_periods, 1)] = 0.
|
|
if var is var_unbiased:
|
|
expected[count_x < 2] = np.nan
|
|
assert_equal(var_x, expected)
|
|
|
|
if isinstance(x, Series):
|
|
for (y, is_constant, no_nans) in self.data:
|
|
if not x.isna().equals(y.isna()):
|
|
# can only easily test two Series with similar
|
|
# structure
|
|
continue
|
|
|
|
# check that cor(x, y) is symmetric
|
|
corr_x_y = corr(x, y)
|
|
corr_y_x = corr(y, x)
|
|
assert_equal(corr_x_y, corr_y_x)
|
|
|
|
if cov:
|
|
# check that cov(x, y) is symmetric
|
|
cov_x_y = cov(x, y)
|
|
cov_y_x = cov(y, x)
|
|
assert_equal(cov_x_y, cov_y_x)
|
|
|
|
# check that cov(x, y) == (var(x+y) - var(x) -
|
|
# var(y)) / 2
|
|
var_x_plus_y = var(x + y)
|
|
var_y = var(y)
|
|
assert_equal(cov_x_y, 0.5 *
|
|
(var_x_plus_y - var_x - var_y))
|
|
|
|
# check that corr(x, y) == cov(x, y) / (std(x) *
|
|
# std(y))
|
|
std_y = std(y)
|
|
assert_equal(corr_x_y, cov_x_y / (std_x * std_y))
|
|
|
|
if cov is cov_biased:
|
|
# check that biased cov(x, y) == mean(x*y) -
|
|
# mean(x)*mean(y)
|
|
mean_y = mean(y)
|
|
mean_x_times_y = mean(x * y)
|
|
assert_equal(cov_x_y, mean_x_times_y -
|
|
(mean_x * mean_y))
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.parametrize(
|
|
'min_periods, adjust, ignore_na', product([0, 1, 2, 3, 4],
|
|
[True, False],
|
|
[False, True]))
|
|
def test_ewm_consistency(self, min_periods, adjust, ignore_na):
|
|
def _weights(s, com, adjust, ignore_na):
|
|
if isinstance(s, DataFrame):
|
|
if not len(s.columns):
|
|
return DataFrame(index=s.index, columns=s.columns)
|
|
w = concat([
|
|
_weights(s.iloc[:, i], com=com, adjust=adjust,
|
|
ignore_na=ignore_na)
|
|
for i, _ in enumerate(s.columns)], axis=1)
|
|
w.index = s.index
|
|
w.columns = s.columns
|
|
return w
|
|
|
|
w = Series(np.nan, index=s.index)
|
|
alpha = 1. / (1. + com)
|
|
if ignore_na:
|
|
w[s.notna()] = _weights(s[s.notna()], com=com,
|
|
adjust=adjust, ignore_na=False)
|
|
elif adjust:
|
|
for i in range(len(s)):
|
|
if s.iat[i] == s.iat[i]:
|
|
w.iat[i] = pow(1. / (1. - alpha), i)
|
|
else:
|
|
sum_wts = 0.
|
|
prev_i = -1
|
|
for i in range(len(s)):
|
|
if s.iat[i] == s.iat[i]:
|
|
if prev_i == -1:
|
|
w.iat[i] = 1.
|
|
else:
|
|
w.iat[i] = alpha * sum_wts / pow(1. - alpha,
|
|
i - prev_i)
|
|
sum_wts += w.iat[i]
|
|
prev_i = i
|
|
return w
|
|
|
|
def _variance_debiasing_factors(s, com, adjust, ignore_na):
|
|
weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na)
|
|
cum_sum = weights.cumsum().fillna(method='ffill')
|
|
cum_sum_sq = (weights * weights).cumsum().fillna(method='ffill')
|
|
numerator = cum_sum * cum_sum
|
|
denominator = numerator - cum_sum_sq
|
|
denominator[denominator <= 0.] = np.nan
|
|
return numerator / denominator
|
|
|
|
def _ewma(s, com, min_periods, adjust, ignore_na):
|
|
weights = _weights(s, com=com, adjust=adjust, ignore_na=ignore_na)
|
|
result = s.multiply(weights).cumsum().divide(weights.cumsum(
|
|
)).fillna(method='ffill')
|
|
result[s.expanding().count() < (max(min_periods, 1) if min_periods
|
|
else 1)] = np.nan
|
|
return result
|
|
|
|
com = 3.
|
|
# test consistency between different ewm* moments
|
|
self._test_moments_consistency(
|
|
min_periods=min_periods,
|
|
count=lambda x: x.expanding().count(),
|
|
mean=lambda x: x.ewm(com=com, min_periods=min_periods,
|
|
adjust=adjust,
|
|
ignore_na=ignore_na).mean(),
|
|
mock_mean=lambda x: _ewma(x, com=com,
|
|
min_periods=min_periods,
|
|
adjust=adjust,
|
|
ignore_na=ignore_na),
|
|
corr=lambda x, y: x.ewm(com=com, min_periods=min_periods,
|
|
adjust=adjust,
|
|
ignore_na=ignore_na).corr(y),
|
|
var_unbiased=lambda x: (
|
|
x.ewm(com=com, min_periods=min_periods,
|
|
adjust=adjust,
|
|
ignore_na=ignore_na).var(bias=False)),
|
|
std_unbiased=lambda x: (
|
|
x.ewm(com=com, min_periods=min_periods,
|
|
adjust=adjust, ignore_na=ignore_na)
|
|
.std(bias=False)),
|
|
cov_unbiased=lambda x, y: (
|
|
x.ewm(com=com, min_periods=min_periods,
|
|
adjust=adjust, ignore_na=ignore_na)
|
|
.cov(y, bias=False)),
|
|
var_biased=lambda x: (
|
|
x.ewm(com=com, min_periods=min_periods,
|
|
adjust=adjust, ignore_na=ignore_na)
|
|
.var(bias=True)),
|
|
std_biased=lambda x: x.ewm(com=com, min_periods=min_periods,
|
|
adjust=adjust,
|
|
ignore_na=ignore_na).std(bias=True),
|
|
cov_biased=lambda x, y: (
|
|
x.ewm(com=com, min_periods=min_periods,
|
|
adjust=adjust, ignore_na=ignore_na)
|
|
.cov(y, bias=True)),
|
|
var_debiasing_factors=lambda x: (
|
|
_variance_debiasing_factors(x, com=com, adjust=adjust,
|
|
ignore_na=ignore_na)))
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.parametrize(
|
|
'min_periods', [0, 1, 2, 3, 4])
|
|
def test_expanding_consistency(self, min_periods):
|
|
|
|
# suppress warnings about empty slices, as we are deliberately testing
|
|
# with empty/0-length Series/DataFrames
|
|
with warnings.catch_warnings():
|
|
warnings.filterwarnings("ignore",
|
|
message=".*(empty slice|0 for slice).*",
|
|
category=RuntimeWarning)
|
|
|
|
# test consistency between different expanding_* moments
|
|
self._test_moments_consistency(
|
|
min_periods=min_periods,
|
|
count=lambda x: x.expanding().count(),
|
|
mean=lambda x: x.expanding(
|
|
min_periods=min_periods).mean(),
|
|
mock_mean=lambda x: x.expanding(
|
|
min_periods=min_periods).sum() / x.expanding().count(),
|
|
corr=lambda x, y: x.expanding(
|
|
min_periods=min_periods).corr(y),
|
|
var_unbiased=lambda x: x.expanding(
|
|
min_periods=min_periods).var(),
|
|
std_unbiased=lambda x: x.expanding(
|
|
min_periods=min_periods).std(),
|
|
cov_unbiased=lambda x, y: x.expanding(
|
|
min_periods=min_periods).cov(y),
|
|
var_biased=lambda x: x.expanding(
|
|
min_periods=min_periods).var(ddof=0),
|
|
std_biased=lambda x: x.expanding(
|
|
min_periods=min_periods).std(ddof=0),
|
|
cov_biased=lambda x, y: x.expanding(
|
|
min_periods=min_periods).cov(y, ddof=0),
|
|
var_debiasing_factors=lambda x: (
|
|
x.expanding().count() /
|
|
(x.expanding().count() - 1.)
|
|
.replace(0., np.nan)))
|
|
|
|
# test consistency between expanding_xyz() and either (a)
|
|
# expanding_apply of Series.xyz(), or (b) expanding_apply of
|
|
# np.nanxyz()
|
|
for (x, is_constant, no_nans) in self.data:
|
|
functions = self.base_functions
|
|
|
|
# GH 8269
|
|
if no_nans:
|
|
functions = self.base_functions + self.no_nan_functions
|
|
for (f, require_min_periods, name) in functions:
|
|
expanding_f = getattr(
|
|
x.expanding(min_periods=min_periods), name)
|
|
|
|
if (require_min_periods and
|
|
(min_periods is not None) and
|
|
(min_periods < require_min_periods)):
|
|
continue
|
|
|
|
if name == 'count':
|
|
expanding_f_result = expanding_f()
|
|
expanding_apply_f_result = x.expanding(
|
|
min_periods=0).apply(func=f, raw=True)
|
|
else:
|
|
if name in ['cov', 'corr']:
|
|
expanding_f_result = expanding_f(
|
|
pairwise=False)
|
|
else:
|
|
expanding_f_result = expanding_f()
|
|
expanding_apply_f_result = x.expanding(
|
|
min_periods=min_periods).apply(func=f, raw=True)
|
|
|
|
# GH 9422
|
|
if name in ['sum', 'prod']:
|
|
assert_equal(expanding_f_result,
|
|
expanding_apply_f_result)
|
|
|
|
@pytest.mark.slow
|
|
@pytest.mark.parametrize(
|
|
'window,min_periods,center', list(_rolling_consistency_cases()))
|
|
def test_rolling_consistency(self, window, min_periods, center):
|
|
|
|
# suppress warnings about empty slices, as we are deliberately testing
|
|
# with empty/0-length Series/DataFrames
|
|
with warnings.catch_warnings():
|
|
warnings.filterwarnings("ignore",
|
|
message=".*(empty slice|0 for slice).*",
|
|
category=RuntimeWarning)
|
|
|
|
# test consistency between different rolling_* moments
|
|
self._test_moments_consistency(
|
|
min_periods=min_periods,
|
|
count=lambda x: (
|
|
x.rolling(window=window, center=center)
|
|
.count()),
|
|
mean=lambda x: (
|
|
x.rolling(window=window, min_periods=min_periods,
|
|
center=center).mean()),
|
|
mock_mean=lambda x: (
|
|
x.rolling(window=window,
|
|
min_periods=min_periods,
|
|
center=center).sum()
|
|
.divide(x.rolling(window=window,
|
|
min_periods=min_periods,
|
|
center=center).count())),
|
|
corr=lambda x, y: (
|
|
x.rolling(window=window, min_periods=min_periods,
|
|
center=center).corr(y)),
|
|
|
|
var_unbiased=lambda x: (
|
|
x.rolling(window=window, min_periods=min_periods,
|
|
center=center).var()),
|
|
|
|
std_unbiased=lambda x: (
|
|
x.rolling(window=window, min_periods=min_periods,
|
|
center=center).std()),
|
|
|
|
cov_unbiased=lambda x, y: (
|
|
x.rolling(window=window, min_periods=min_periods,
|
|
center=center).cov(y)),
|
|
|
|
var_biased=lambda x: (
|
|
x.rolling(window=window, min_periods=min_periods,
|
|
center=center).var(ddof=0)),
|
|
|
|
std_biased=lambda x: (
|
|
x.rolling(window=window, min_periods=min_periods,
|
|
center=center).std(ddof=0)),
|
|
|
|
cov_biased=lambda x, y: (
|
|
x.rolling(window=window, min_periods=min_periods,
|
|
center=center).cov(y, ddof=0)),
|
|
var_debiasing_factors=lambda x: (
|
|
x.rolling(window=window, center=center).count()
|
|
.divide((x.rolling(window=window, center=center)
|
|
.count() - 1.)
|
|
.replace(0., np.nan))))
|
|
|
|
# test consistency between rolling_xyz() and either (a)
|
|
# rolling_apply of Series.xyz(), or (b) rolling_apply of
|
|
# np.nanxyz()
|
|
for (x, is_constant, no_nans) in self.data:
|
|
functions = self.base_functions
|
|
|
|
# GH 8269
|
|
if no_nans:
|
|
functions = self.base_functions + self.no_nan_functions
|
|
for (f, require_min_periods, name) in functions:
|
|
rolling_f = getattr(
|
|
x.rolling(window=window, center=center,
|
|
min_periods=min_periods), name)
|
|
|
|
if require_min_periods and (
|
|
min_periods is not None) and (
|
|
min_periods < require_min_periods):
|
|
continue
|
|
|
|
if name == 'count':
|
|
rolling_f_result = rolling_f()
|
|
rolling_apply_f_result = x.rolling(
|
|
window=window, min_periods=0,
|
|
center=center).apply(func=f, raw=True)
|
|
else:
|
|
if name in ['cov', 'corr']:
|
|
rolling_f_result = rolling_f(
|
|
pairwise=False)
|
|
else:
|
|
rolling_f_result = rolling_f()
|
|
rolling_apply_f_result = x.rolling(
|
|
window=window, min_periods=min_periods,
|
|
center=center).apply(func=f, raw=True)
|
|
|
|
# GH 9422
|
|
if name in ['sum', 'prod']:
|
|
assert_equal(rolling_f_result,
|
|
rolling_apply_f_result)
|
|
|
|
# binary moments
|
|
def test_rolling_cov(self):
|
|
A = self.series
|
|
B = A + randn(len(A))
|
|
|
|
result = A.rolling(window=50, min_periods=25).cov(B)
|
|
tm.assert_almost_equal(result[-1], np.cov(A[-50:], B[-50:])[0, 1])
|
|
|
|
def test_rolling_cov_pairwise(self):
|
|
self._check_pairwise_moment('rolling', 'cov', window=10, min_periods=5)
|
|
|
|
def test_rolling_corr(self):
|
|
A = self.series
|
|
B = A + randn(len(A))
|
|
|
|
result = A.rolling(window=50, min_periods=25).corr(B)
|
|
tm.assert_almost_equal(result[-1], np.corrcoef(A[-50:], B[-50:])[0, 1])
|
|
|
|
# test for correct bias correction
|
|
a = tm.makeTimeSeries()
|
|
b = tm.makeTimeSeries()
|
|
a[:5] = np.nan
|
|
b[:10] = np.nan
|
|
|
|
result = a.rolling(window=len(a), min_periods=1).corr(b)
|
|
tm.assert_almost_equal(result[-1], a.corr(b))
|
|
|
|
def test_rolling_corr_pairwise(self):
|
|
self._check_pairwise_moment('rolling', 'corr', window=10,
|
|
min_periods=5)
|
|
|
|
@pytest.mark.parametrize('window', range(7))
|
|
def test_rolling_corr_with_zero_variance(self, window):
|
|
# GH 18430
|
|
s = pd.Series(np.zeros(20))
|
|
other = pd.Series(np.arange(20))
|
|
|
|
assert s.rolling(window=window).corr(other=other).isna().all()
|
|
|
|
def _check_pairwise_moment(self, dispatch, name, **kwargs):
|
|
def get_result(obj, obj2=None):
|
|
return getattr(getattr(obj, dispatch)(**kwargs), name)(obj2)
|
|
|
|
result = get_result(self.frame)
|
|
result = result.loc[(slice(None), 1), 5]
|
|
result.index = result.index.droplevel(1)
|
|
expected = get_result(self.frame[1], self.frame[5])
|
|
tm.assert_series_equal(result, expected, check_names=False)
|
|
|
|
def test_flex_binary_moment(self):
|
|
# GH3155
|
|
# don't blow the stack
|
|
pytest.raises(TypeError, rwindow._flex_binary_moment, 5, 6, None)
|
|
|
|
def test_corr_sanity(self):
|
|
# GH 3155
|
|
df = DataFrame(np.array(
|
|
[[0.87024726, 0.18505595], [0.64355431, 0.3091617],
|
|
[0.92372966, 0.50552513], [0.00203756, 0.04520709],
|
|
[0.84780328, 0.33394331], [0.78369152, 0.63919667]]))
|
|
|
|
res = df[0].rolling(5, center=True).corr(df[1])
|
|
assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res)
|
|
|
|
# and some fuzzing
|
|
for _ in range(10):
|
|
df = DataFrame(np.random.rand(30, 2))
|
|
res = df[0].rolling(5, center=True).corr(df[1])
|
|
try:
|
|
assert all(np.abs(np.nan_to_num(x)) <= 1 for x in res)
|
|
except AssertionError:
|
|
print(res)
|
|
|
|
@pytest.mark.parametrize('method', ['corr', 'cov'])
|
|
def test_flex_binary_frame(self, method):
|
|
series = self.frame[1]
|
|
|
|
res = getattr(series.rolling(window=10), method)(self.frame)
|
|
res2 = getattr(self.frame.rolling(window=10), method)(series)
|
|
exp = self.frame.apply(lambda x: getattr(
|
|
series.rolling(window=10), method)(x))
|
|
|
|
tm.assert_frame_equal(res, exp)
|
|
tm.assert_frame_equal(res2, exp)
|
|
|
|
frame2 = self.frame.copy()
|
|
frame2.values[:] = np.random.randn(*frame2.shape)
|
|
|
|
res3 = getattr(self.frame.rolling(window=10), method)(frame2)
|
|
exp = DataFrame(dict((k, getattr(self.frame[k].rolling(
|
|
window=10), method)(frame2[k])) for k in self.frame))
|
|
tm.assert_frame_equal(res3, exp)
|
|
|
|
def test_ewmcov(self):
|
|
self._check_binary_ew('cov')
|
|
|
|
def test_ewmcov_pairwise(self):
|
|
self._check_pairwise_moment('ewm', 'cov', span=10, min_periods=5)
|
|
|
|
def test_ewmcorr(self):
|
|
self._check_binary_ew('corr')
|
|
|
|
def test_ewmcorr_pairwise(self):
|
|
self._check_pairwise_moment('ewm', 'corr', span=10, min_periods=5)
|
|
|
|
def _check_binary_ew(self, name):
|
|
def func(A, B, com, **kwargs):
|
|
return getattr(A.ewm(com, **kwargs), name)(B)
|
|
|
|
A = Series(randn(50), index=np.arange(50))
|
|
B = A[2:] + randn(48)
|
|
|
|
A[:10] = np.NaN
|
|
B[-10:] = np.NaN
|
|
|
|
result = func(A, B, 20, min_periods=5)
|
|
assert np.isnan(result.values[:14]).all()
|
|
assert not np.isnan(result.values[14:]).any()
|
|
|
|
# GH 7898
|
|
for min_periods in (0, 1, 2):
|
|
result = func(A, B, 20, min_periods=min_periods)
|
|
# binary functions (ewmcov, ewmcorr) with bias=False require at
|
|
# least two values
|
|
assert np.isnan(result.values[:11]).all()
|
|
assert not np.isnan(result.values[11:]).any()
|
|
|
|
# check series of length 0
|
|
result = func(Series([]), Series([]), 50, min_periods=min_periods)
|
|
tm.assert_series_equal(result, Series([]))
|
|
|
|
# check series of length 1
|
|
result = func(
|
|
Series([1.]), Series([1.]), 50, min_periods=min_periods)
|
|
tm.assert_series_equal(result, Series([np.NaN]))
|
|
|
|
pytest.raises(Exception, func, A, randn(50), 20, min_periods=5)
|
|
|
|
def test_expanding_apply_args_kwargs(self, raw):
|
|
|
|
def mean_w_arg(x, const):
|
|
return np.mean(x) + const
|
|
|
|
df = DataFrame(np.random.rand(20, 3))
|
|
|
|
expected = df.expanding().apply(np.mean, raw=raw) + 20.
|
|
|
|
result = df.expanding().apply(mean_w_arg,
|
|
raw=raw,
|
|
args=(20, ))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.expanding().apply(mean_w_arg,
|
|
raw=raw,
|
|
kwargs={'const': 20})
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_expanding_corr(self):
|
|
A = self.series.dropna()
|
|
B = (A + randn(len(A)))[:-5]
|
|
|
|
result = A.expanding().corr(B)
|
|
|
|
rolling_result = A.rolling(window=len(A), min_periods=1).corr(B)
|
|
|
|
tm.assert_almost_equal(rolling_result, result)
|
|
|
|
def test_expanding_count(self):
|
|
result = self.series.expanding().count()
|
|
tm.assert_almost_equal(result, self.series.rolling(
|
|
window=len(self.series)).count())
|
|
|
|
def test_expanding_quantile(self):
|
|
result = self.series.expanding().quantile(0.5)
|
|
|
|
rolling_result = self.series.rolling(window=len(self.series),
|
|
min_periods=1).quantile(0.5)
|
|
|
|
tm.assert_almost_equal(result, rolling_result)
|
|
|
|
def test_expanding_cov(self):
|
|
A = self.series
|
|
B = (A + randn(len(A)))[:-5]
|
|
|
|
result = A.expanding().cov(B)
|
|
|
|
rolling_result = A.rolling(window=len(A), min_periods=1).cov(B)
|
|
|
|
tm.assert_almost_equal(rolling_result, result)
|
|
|
|
def test_expanding_cov_pairwise(self):
|
|
result = self.frame.expanding().corr()
|
|
|
|
rolling_result = self.frame.rolling(window=len(self.frame),
|
|
min_periods=1).corr()
|
|
|
|
tm.assert_frame_equal(result, rolling_result)
|
|
|
|
def test_expanding_corr_pairwise(self):
|
|
result = self.frame.expanding().corr()
|
|
|
|
rolling_result = self.frame.rolling(window=len(self.frame),
|
|
min_periods=1).corr()
|
|
tm.assert_frame_equal(result, rolling_result)
|
|
|
|
def test_expanding_cov_diff_index(self):
|
|
# GH 7512
|
|
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
|
s2 = Series([1, 3], index=[0, 2])
|
|
result = s1.expanding().cov(s2)
|
|
expected = Series([None, None, 2.0])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
s2a = Series([1, None, 3], index=[0, 1, 2])
|
|
result = s1.expanding().cov(s2a)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
s1 = Series([7, 8, 10], index=[0, 1, 3])
|
|
s2 = Series([7, 9, 10], index=[0, 2, 3])
|
|
result = s1.expanding().cov(s2)
|
|
expected = Series([None, None, None, 4.5])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_expanding_corr_diff_index(self):
|
|
# GH 7512
|
|
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
|
s2 = Series([1, 3], index=[0, 2])
|
|
result = s1.expanding().corr(s2)
|
|
expected = Series([None, None, 1.0])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
s2a = Series([1, None, 3], index=[0, 1, 2])
|
|
result = s1.expanding().corr(s2a)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
s1 = Series([7, 8, 10], index=[0, 1, 3])
|
|
s2 = Series([7, 9, 10], index=[0, 2, 3])
|
|
result = s1.expanding().corr(s2)
|
|
expected = Series([None, None, None, 1.])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_rolling_cov_diff_length(self):
|
|
# GH 7512
|
|
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
|
s2 = Series([1, 3], index=[0, 2])
|
|
result = s1.rolling(window=3, min_periods=2).cov(s2)
|
|
expected = Series([None, None, 2.0])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
s2a = Series([1, None, 3], index=[0, 1, 2])
|
|
result = s1.rolling(window=3, min_periods=2).cov(s2a)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_rolling_corr_diff_length(self):
|
|
# GH 7512
|
|
s1 = Series([1, 2, 3], index=[0, 1, 2])
|
|
s2 = Series([1, 3], index=[0, 2])
|
|
result = s1.rolling(window=3, min_periods=2).corr(s2)
|
|
expected = Series([None, None, 1.0])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
s2a = Series([1, None, 3], index=[0, 1, 2])
|
|
result = s1.rolling(window=3, min_periods=2).corr(s2a)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize(
|
|
'f',
|
|
[
|
|
lambda x: (x.rolling(window=10, min_periods=5)
|
|
.cov(x, pairwise=False)),
|
|
lambda x: (x.rolling(window=10, min_periods=5)
|
|
.corr(x, pairwise=False)),
|
|
lambda x: x.rolling(window=10, min_periods=5).max(),
|
|
lambda x: x.rolling(window=10, min_periods=5).min(),
|
|
lambda x: x.rolling(window=10, min_periods=5).sum(),
|
|
lambda x: x.rolling(window=10, min_periods=5).mean(),
|
|
lambda x: x.rolling(window=10, min_periods=5).std(),
|
|
lambda x: x.rolling(window=10, min_periods=5).var(),
|
|
lambda x: x.rolling(window=10, min_periods=5).skew(),
|
|
lambda x: x.rolling(window=10, min_periods=5).kurt(),
|
|
lambda x: x.rolling(
|
|
window=10, min_periods=5).quantile(quantile=0.5),
|
|
lambda x: x.rolling(window=10, min_periods=5).median(),
|
|
lambda x: x.rolling(window=10, min_periods=5).apply(
|
|
sum, raw=False),
|
|
lambda x: x.rolling(window=10, min_periods=5).apply(
|
|
sum, raw=True),
|
|
lambda x: x.rolling(win_type='boxcar',
|
|
window=10, min_periods=5).mean()])
|
|
def test_rolling_functions_window_non_shrinkage(self, f):
|
|
# GH 7764
|
|
s = Series(range(4))
|
|
s_expected = Series(np.nan, index=s.index)
|
|
df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]], columns=['A', 'B'])
|
|
df_expected = DataFrame(np.nan, index=df.index, columns=df.columns)
|
|
|
|
try:
|
|
s_result = f(s)
|
|
tm.assert_series_equal(s_result, s_expected)
|
|
|
|
df_result = f(df)
|
|
tm.assert_frame_equal(df_result, df_expected)
|
|
except (ImportError):
|
|
|
|
# scipy needed for rolling_window
|
|
pytest.skip("scipy not available")
|
|
|
|
def test_rolling_functions_window_non_shrinkage_binary(self):
|
|
|
|
# corr/cov return a MI DataFrame
|
|
df = DataFrame([[1, 5], [3, 2], [3, 9], [-1, 0]],
|
|
columns=Index(['A', 'B'], name='foo'),
|
|
index=Index(range(4), name='bar'))
|
|
df_expected = DataFrame(
|
|
columns=Index(['A', 'B'], name='foo'),
|
|
index=pd.MultiIndex.from_product([df.index, df.columns],
|
|
names=['bar', 'foo']),
|
|
dtype='float64')
|
|
functions = [lambda x: (x.rolling(window=10, min_periods=5)
|
|
.cov(x, pairwise=True)),
|
|
lambda x: (x.rolling(window=10, min_periods=5)
|
|
.corr(x, pairwise=True))]
|
|
for f in functions:
|
|
df_result = f(df)
|
|
tm.assert_frame_equal(df_result, df_expected)
|
|
|
|
def test_moment_functions_zero_length(self):
|
|
# GH 8056
|
|
s = Series()
|
|
s_expected = s
|
|
df1 = DataFrame()
|
|
df1_expected = df1
|
|
df2 = DataFrame(columns=['a'])
|
|
df2['a'] = df2['a'].astype('float64')
|
|
df2_expected = df2
|
|
|
|
functions = [lambda x: x.expanding().count(),
|
|
lambda x: x.expanding(min_periods=5).cov(
|
|
x, pairwise=False),
|
|
lambda x: x.expanding(min_periods=5).corr(
|
|
x, pairwise=False),
|
|
lambda x: x.expanding(min_periods=5).max(),
|
|
lambda x: x.expanding(min_periods=5).min(),
|
|
lambda x: x.expanding(min_periods=5).sum(),
|
|
lambda x: x.expanding(min_periods=5).mean(),
|
|
lambda x: x.expanding(min_periods=5).std(),
|
|
lambda x: x.expanding(min_periods=5).var(),
|
|
lambda x: x.expanding(min_periods=5).skew(),
|
|
lambda x: x.expanding(min_periods=5).kurt(),
|
|
lambda x: x.expanding(min_periods=5).quantile(0.5),
|
|
lambda x: x.expanding(min_periods=5).median(),
|
|
lambda x: x.expanding(min_periods=5).apply(
|
|
sum, raw=False),
|
|
lambda x: x.expanding(min_periods=5).apply(
|
|
sum, raw=True),
|
|
lambda x: x.rolling(window=10).count(),
|
|
lambda x: x.rolling(window=10, min_periods=5).cov(
|
|
x, pairwise=False),
|
|
lambda x: x.rolling(window=10, min_periods=5).corr(
|
|
x, pairwise=False),
|
|
lambda x: x.rolling(window=10, min_periods=5).max(),
|
|
lambda x: x.rolling(window=10, min_periods=5).min(),
|
|
lambda x: x.rolling(window=10, min_periods=5).sum(),
|
|
lambda x: x.rolling(window=10, min_periods=5).mean(),
|
|
lambda x: x.rolling(window=10, min_periods=5).std(),
|
|
lambda x: x.rolling(window=10, min_periods=5).var(),
|
|
lambda x: x.rolling(window=10, min_periods=5).skew(),
|
|
lambda x: x.rolling(window=10, min_periods=5).kurt(),
|
|
lambda x: x.rolling(
|
|
window=10, min_periods=5).quantile(0.5),
|
|
lambda x: x.rolling(window=10, min_periods=5).median(),
|
|
lambda x: x.rolling(window=10, min_periods=5).apply(
|
|
sum, raw=False),
|
|
lambda x: x.rolling(window=10, min_periods=5).apply(
|
|
sum, raw=True),
|
|
lambda x: x.rolling(win_type='boxcar',
|
|
window=10, min_periods=5).mean(),
|
|
]
|
|
for f in functions:
|
|
try:
|
|
s_result = f(s)
|
|
tm.assert_series_equal(s_result, s_expected)
|
|
|
|
df1_result = f(df1)
|
|
tm.assert_frame_equal(df1_result, df1_expected)
|
|
|
|
df2_result = f(df2)
|
|
tm.assert_frame_equal(df2_result, df2_expected)
|
|
except (ImportError):
|
|
|
|
# scipy needed for rolling_window
|
|
continue
|
|
|
|
def test_moment_functions_zero_length_pairwise(self):
|
|
|
|
df1 = DataFrame()
|
|
df1_expected = df1
|
|
df2 = DataFrame(columns=Index(['a'], name='foo'),
|
|
index=Index([], name='bar'))
|
|
df2['a'] = df2['a'].astype('float64')
|
|
|
|
df1_expected = DataFrame(
|
|
index=pd.MultiIndex.from_product([df1.index, df1.columns]),
|
|
columns=Index([]))
|
|
df2_expected = DataFrame(
|
|
index=pd.MultiIndex.from_product([df2.index, df2.columns],
|
|
names=['bar', 'foo']),
|
|
columns=Index(['a'], name='foo'),
|
|
dtype='float64')
|
|
|
|
functions = [lambda x: (x.expanding(min_periods=5)
|
|
.cov(x, pairwise=True)),
|
|
lambda x: (x.expanding(min_periods=5)
|
|
.corr(x, pairwise=True)),
|
|
lambda x: (x.rolling(window=10, min_periods=5)
|
|
.cov(x, pairwise=True)),
|
|
lambda x: (x.rolling(window=10, min_periods=5)
|
|
.corr(x, pairwise=True)),
|
|
]
|
|
for f in functions:
|
|
df1_result = f(df1)
|
|
tm.assert_frame_equal(df1_result, df1_expected)
|
|
|
|
df2_result = f(df2)
|
|
tm.assert_frame_equal(df2_result, df2_expected)
|
|
|
|
def test_expanding_cov_pairwise_diff_length(self):
|
|
# GH 7512
|
|
df1 = DataFrame([[1, 5], [3, 2], [3, 9]],
|
|
columns=Index(['A', 'B'], name='foo'))
|
|
df1a = DataFrame([[1, 5], [3, 9]],
|
|
index=[0, 2],
|
|
columns=Index(['A', 'B'], name='foo'))
|
|
df2 = DataFrame([[5, 6], [None, None], [2, 1]],
|
|
columns=Index(['X', 'Y'], name='foo'))
|
|
df2a = DataFrame([[5, 6], [2, 1]],
|
|
index=[0, 2],
|
|
columns=Index(['X', 'Y'], name='foo'))
|
|
# TODO: xref gh-15826
|
|
# .loc is not preserving the names
|
|
result1 = df1.expanding().cov(df2a, pairwise=True).loc[2]
|
|
result2 = df1.expanding().cov(df2a, pairwise=True).loc[2]
|
|
result3 = df1a.expanding().cov(df2, pairwise=True).loc[2]
|
|
result4 = df1a.expanding().cov(df2a, pairwise=True).loc[2]
|
|
expected = DataFrame([[-3.0, -6.0], [-5.0, -10.0]],
|
|
columns=Index(['A', 'B'], name='foo'),
|
|
index=Index(['X', 'Y'], name='foo'))
|
|
tm.assert_frame_equal(result1, expected)
|
|
tm.assert_frame_equal(result2, expected)
|
|
tm.assert_frame_equal(result3, expected)
|
|
tm.assert_frame_equal(result4, expected)
|
|
|
|
def test_expanding_corr_pairwise_diff_length(self):
|
|
# GH 7512
|
|
df1 = DataFrame([[1, 2], [3, 2], [3, 4]],
|
|
columns=['A', 'B'],
|
|
index=Index(range(3), name='bar'))
|
|
df1a = DataFrame([[1, 2], [3, 4]],
|
|
index=Index([0, 2], name='bar'),
|
|
columns=['A', 'B'])
|
|
df2 = DataFrame([[5, 6], [None, None], [2, 1]],
|
|
columns=['X', 'Y'],
|
|
index=Index(range(3), name='bar'))
|
|
df2a = DataFrame([[5, 6], [2, 1]],
|
|
index=Index([0, 2], name='bar'),
|
|
columns=['X', 'Y'])
|
|
result1 = df1.expanding().corr(df2, pairwise=True).loc[2]
|
|
result2 = df1.expanding().corr(df2a, pairwise=True).loc[2]
|
|
result3 = df1a.expanding().corr(df2, pairwise=True).loc[2]
|
|
result4 = df1a.expanding().corr(df2a, pairwise=True).loc[2]
|
|
expected = DataFrame([[-1.0, -1.0], [-1.0, -1.0]],
|
|
columns=['A', 'B'],
|
|
index=Index(['X', 'Y']))
|
|
tm.assert_frame_equal(result1, expected)
|
|
tm.assert_frame_equal(result2, expected)
|
|
tm.assert_frame_equal(result3, expected)
|
|
tm.assert_frame_equal(result4, expected)
|
|
|
|
def test_rolling_skew_edge_cases(self):
|
|
|
|
all_nan = Series([np.NaN] * 5)
|
|
|
|
# yields all NaN (0 variance)
|
|
d = Series([1] * 5)
|
|
x = d.rolling(window=5).skew()
|
|
tm.assert_series_equal(all_nan, x)
|
|
|
|
# yields all NaN (window too small)
|
|
d = Series(np.random.randn(5))
|
|
x = d.rolling(window=2).skew()
|
|
tm.assert_series_equal(all_nan, x)
|
|
|
|
# yields [NaN, NaN, NaN, 0.177994, 1.548824]
|
|
d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401
|
|
])
|
|
expected = Series([np.NaN, np.NaN, np.NaN, 0.177994, 1.548824])
|
|
x = d.rolling(window=4).skew()
|
|
tm.assert_series_equal(expected, x)
|
|
|
|
def test_rolling_kurt_edge_cases(self):
|
|
|
|
all_nan = Series([np.NaN] * 5)
|
|
|
|
# yields all NaN (0 variance)
|
|
d = Series([1] * 5)
|
|
x = d.rolling(window=5).kurt()
|
|
tm.assert_series_equal(all_nan, x)
|
|
|
|
# yields all NaN (window too small)
|
|
d = Series(np.random.randn(5))
|
|
x = d.rolling(window=3).kurt()
|
|
tm.assert_series_equal(all_nan, x)
|
|
|
|
# yields [NaN, NaN, NaN, 1.224307, 2.671499]
|
|
d = Series([-1.50837035, -0.1297039, 0.19501095, 1.73508164, 0.41941401
|
|
])
|
|
expected = Series([np.NaN, np.NaN, np.NaN, 1.224307, 2.671499])
|
|
x = d.rolling(window=4).kurt()
|
|
tm.assert_series_equal(expected, x)
|
|
|
|
def test_rolling_skew_eq_value_fperr(self):
|
|
# #18804 all rolling skew for all equal values should return Nan
|
|
a = Series([1.1] * 15).rolling(window=10).skew()
|
|
assert np.isnan(a).all()
|
|
|
|
def test_rolling_kurt_eq_value_fperr(self):
|
|
# #18804 all rolling kurt for all equal values should return Nan
|
|
a = Series([1.1] * 15).rolling(window=10).kurt()
|
|
assert np.isnan(a).all()
|
|
|
|
@pytest.mark.parametrize('func,static_comp', [('sum', np.sum),
|
|
('mean', np.mean),
|
|
('max', np.max),
|
|
('min', np.min)],
|
|
ids=['sum', 'mean', 'max', 'min'])
|
|
def test_expanding_func(self, func, static_comp):
|
|
def expanding_func(x, min_periods=1, center=False, axis=0):
|
|
exp = x.expanding(min_periods=min_periods,
|
|
center=center, axis=axis)
|
|
return getattr(exp, func)()
|
|
self._check_expanding(expanding_func, static_comp, preserve_nan=False)
|
|
|
|
def test_expanding_apply(self, raw):
|
|
|
|
def expanding_mean(x, min_periods=1):
|
|
|
|
exp = x.expanding(min_periods=min_periods)
|
|
result = exp.apply(lambda x: x.mean(), raw=raw)
|
|
return result
|
|
|
|
# TODO(jreback), needed to add preserve_nan=False
|
|
# here to make this pass
|
|
self._check_expanding(expanding_mean, np.mean, preserve_nan=False)
|
|
|
|
ser = Series([])
|
|
tm.assert_series_equal(ser, ser.expanding().apply(
|
|
lambda x: x.mean(), raw=raw))
|
|
|
|
# GH 8080
|
|
s = Series([None, None, None])
|
|
result = s.expanding(min_periods=0).apply(lambda x: len(x), raw=raw)
|
|
expected = Series([1., 2., 3.])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def _check_expanding(self, func, static_comp, has_min_periods=True,
|
|
has_time_rule=True, preserve_nan=True):
|
|
|
|
series_result = func(self.series)
|
|
assert isinstance(series_result, Series)
|
|
frame_result = func(self.frame)
|
|
assert isinstance(frame_result, DataFrame)
|
|
|
|
result = func(self.series)
|
|
tm.assert_almost_equal(result[10], static_comp(self.series[:11]))
|
|
|
|
if preserve_nan:
|
|
assert result.iloc[self._nan_locs].isna().all()
|
|
|
|
ser = Series(randn(50))
|
|
|
|
if has_min_periods:
|
|
result = func(ser, min_periods=30)
|
|
assert result[:29].isna().all()
|
|
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
|
|
|
|
# min_periods is working correctly
|
|
result = func(ser, min_periods=15)
|
|
assert isna(result.iloc[13])
|
|
assert notna(result.iloc[14])
|
|
|
|
ser2 = Series(randn(20))
|
|
result = func(ser2, min_periods=5)
|
|
assert isna(result[3])
|
|
assert notna(result[4])
|
|
|
|
# min_periods=0
|
|
result0 = func(ser, min_periods=0)
|
|
result1 = func(ser, min_periods=1)
|
|
tm.assert_almost_equal(result0, result1)
|
|
else:
|
|
result = func(ser)
|
|
tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50]))
|
|
|
|
def test_rolling_max_gh6297(self):
|
|
"""Replicate result expected in GH #6297"""
|
|
|
|
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
|
# So that we can have 2 datapoints on one of the days
|
|
indices.append(datetime(1975, 1, 3, 6, 0))
|
|
series = Series(range(1, 7), index=indices)
|
|
# Use floats instead of ints as values
|
|
series = series.map(lambda x: float(x))
|
|
# Sort chronologically
|
|
series = series.sort_index()
|
|
|
|
expected = Series([1.0, 2.0, 6.0, 4.0, 5.0],
|
|
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
|
|
x = series.resample('D').max().rolling(window=1).max()
|
|
tm.assert_series_equal(expected, x)
|
|
|
|
def test_rolling_max_resample(self):
|
|
|
|
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
|
# So that we can have 3 datapoints on last day (4, 10, and 20)
|
|
indices.append(datetime(1975, 1, 5, 1))
|
|
indices.append(datetime(1975, 1, 5, 2))
|
|
series = Series(list(range(0, 5)) + [10, 20], index=indices)
|
|
# Use floats instead of ints as values
|
|
series = series.map(lambda x: float(x))
|
|
# Sort chronologically
|
|
series = series.sort_index()
|
|
|
|
# Default how should be max
|
|
expected = Series([0.0, 1.0, 2.0, 3.0, 20.0],
|
|
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
|
|
x = series.resample('D').max().rolling(window=1).max()
|
|
tm.assert_series_equal(expected, x)
|
|
|
|
# Now specify median (10.0)
|
|
expected = Series([0.0, 1.0, 2.0, 3.0, 10.0],
|
|
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
|
|
x = series.resample('D').median().rolling(window=1).max()
|
|
tm.assert_series_equal(expected, x)
|
|
|
|
# Now specify mean (4+10+20)/3
|
|
v = (4.0 + 10.0 + 20.0) / 3.0
|
|
expected = Series([0.0, 1.0, 2.0, 3.0, v],
|
|
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
|
|
x = series.resample('D').mean().rolling(window=1).max()
|
|
tm.assert_series_equal(expected, x)
|
|
|
|
def test_rolling_min_resample(self):
|
|
|
|
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
|
# So that we can have 3 datapoints on last day (4, 10, and 20)
|
|
indices.append(datetime(1975, 1, 5, 1))
|
|
indices.append(datetime(1975, 1, 5, 2))
|
|
series = Series(list(range(0, 5)) + [10, 20], index=indices)
|
|
# Use floats instead of ints as values
|
|
series = series.map(lambda x: float(x))
|
|
# Sort chronologically
|
|
series = series.sort_index()
|
|
|
|
# Default how should be min
|
|
expected = Series([0.0, 1.0, 2.0, 3.0, 4.0],
|
|
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
|
|
r = series.resample('D').min().rolling(window=1)
|
|
tm.assert_series_equal(expected, r.min())
|
|
|
|
def test_rolling_median_resample(self):
|
|
|
|
indices = [datetime(1975, 1, i) for i in range(1, 6)]
|
|
# So that we can have 3 datapoints on last day (4, 10, and 20)
|
|
indices.append(datetime(1975, 1, 5, 1))
|
|
indices.append(datetime(1975, 1, 5, 2))
|
|
series = Series(list(range(0, 5)) + [10, 20], index=indices)
|
|
# Use floats instead of ints as values
|
|
series = series.map(lambda x: float(x))
|
|
# Sort chronologically
|
|
series = series.sort_index()
|
|
|
|
# Default how should be median
|
|
expected = Series([0.0, 1.0, 2.0, 3.0, 10],
|
|
index=[datetime(1975, 1, i, 0) for i in range(1, 6)])
|
|
x = series.resample('D').median().rolling(window=1).median()
|
|
tm.assert_series_equal(expected, x)
|
|
|
|
def test_rolling_median_memory_error(self):
|
|
# GH11722
|
|
n = 20000
|
|
Series(np.random.randn(n)).rolling(window=2, center=False).median()
|
|
Series(np.random.randn(n)).rolling(window=2, center=False).median()
|
|
|
|
def test_rolling_min_max_numeric_types(self):
|
|
|
|
# GH12373
|
|
types_test = [np.dtype("f{}".format(width)) for width in [4, 8]]
|
|
types_test.extend([np.dtype("{}{}".format(sign, width))
|
|
for width in [1, 2, 4, 8] for sign in "ui"])
|
|
for data_type in types_test:
|
|
# Just testing that these don't throw exceptions and that
|
|
# the return type is float64. Other tests will cover quantitative
|
|
# correctness
|
|
result = (DataFrame(np.arange(20, dtype=data_type))
|
|
.rolling(window=5).max())
|
|
assert result.dtypes[0] == np.dtype("f8")
|
|
result = (DataFrame(np.arange(20, dtype=data_type))
|
|
.rolling(window=5).min())
|
|
assert result.dtypes[0] == np.dtype("f8")
|
|
|
|
|
|
class TestGrouperGrouping(object):
|
|
|
|
def setup_method(self, method):
|
|
self.series = Series(np.arange(10))
|
|
self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8,
|
|
'B': np.arange(40)})
|
|
|
|
def test_mutated(self):
|
|
|
|
def f():
|
|
self.frame.groupby('A', foo=1)
|
|
pytest.raises(TypeError, f)
|
|
|
|
g = self.frame.groupby('A')
|
|
assert not g.mutated
|
|
g = self.frame.groupby('A', mutated=True)
|
|
assert g.mutated
|
|
|
|
def test_getitem(self):
|
|
g = self.frame.groupby('A')
|
|
g_mutated = self.frame.groupby('A', mutated=True)
|
|
|
|
expected = g_mutated.B.apply(lambda x: x.rolling(2).mean())
|
|
|
|
result = g.rolling(2).mean().B
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = g.rolling(2).B.mean()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = g.B.rolling(2).mean()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = self.frame.B.groupby(self.frame.A).rolling(2).mean()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_getitem_multiple(self):
|
|
|
|
# GH 13174
|
|
g = self.frame.groupby('A')
|
|
r = g.rolling(2)
|
|
g_mutated = self.frame.groupby('A', mutated=True)
|
|
expected = g_mutated.B.apply(lambda x: x.rolling(2).count())
|
|
|
|
result = r.B.count()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = r.B.count()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_rolling(self):
|
|
g = self.frame.groupby('A')
|
|
r = g.rolling(window=4)
|
|
|
|
for f in ['sum', 'mean', 'min', 'max', 'count', 'kurt', 'skew']:
|
|
|
|
result = getattr(r, f)()
|
|
expected = g.apply(lambda x: getattr(x.rolling(4), f)())
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
for f in ['std', 'var']:
|
|
result = getattr(r, f)(ddof=1)
|
|
expected = g.apply(lambda x: getattr(x.rolling(4), f)(ddof=1))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = r.quantile(0.5)
|
|
expected = g.apply(lambda x: x.rolling(4).quantile(0.5))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_rolling_corr_cov(self):
|
|
g = self.frame.groupby('A')
|
|
r = g.rolling(window=4)
|
|
|
|
for f in ['corr', 'cov']:
|
|
result = getattr(r, f)(self.frame)
|
|
|
|
def func(x):
|
|
return getattr(x.rolling(4), f)(self.frame)
|
|
expected = g.apply(func)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = getattr(r.B, f)(pairwise=True)
|
|
|
|
def func(x):
|
|
return getattr(x.B.rolling(4), f)(pairwise=True)
|
|
expected = g.apply(func)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_rolling_apply(self, raw):
|
|
g = self.frame.groupby('A')
|
|
r = g.rolling(window=4)
|
|
|
|
# reduction
|
|
result = r.apply(lambda x: x.sum(), raw=raw)
|
|
expected = g.apply(
|
|
lambda x: x.rolling(4).apply(lambda y: y.sum(), raw=raw))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_rolling_apply_mutability(self):
|
|
# GH 14013
|
|
df = pd.DataFrame({'A': ['foo'] * 3 + ['bar'] * 3, 'B': [1] * 6})
|
|
g = df.groupby('A')
|
|
|
|
mi = pd.MultiIndex.from_tuples([('bar', 3), ('bar', 4), ('bar', 5),
|
|
('foo', 0), ('foo', 1), ('foo', 2)])
|
|
|
|
mi.names = ['A', None]
|
|
# Grouped column should not be a part of the output
|
|
expected = pd.DataFrame([np.nan, 2., 2.] * 2, columns=['B'], index=mi)
|
|
|
|
result = g.rolling(window=2).sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# Call an arbitrary function on the groupby
|
|
g.sum()
|
|
|
|
# Make sure nothing has been mutated
|
|
result = g.rolling(window=2).sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_expanding(self):
|
|
g = self.frame.groupby('A')
|
|
r = g.expanding()
|
|
|
|
for f in ['sum', 'mean', 'min', 'max', 'count', 'kurt', 'skew']:
|
|
|
|
result = getattr(r, f)()
|
|
expected = g.apply(lambda x: getattr(x.expanding(), f)())
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
for f in ['std', 'var']:
|
|
result = getattr(r, f)(ddof=0)
|
|
expected = g.apply(lambda x: getattr(x.expanding(), f)(ddof=0))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = r.quantile(0.5)
|
|
expected = g.apply(lambda x: x.expanding().quantile(0.5))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_expanding_corr_cov(self):
|
|
g = self.frame.groupby('A')
|
|
r = g.expanding()
|
|
|
|
for f in ['corr', 'cov']:
|
|
result = getattr(r, f)(self.frame)
|
|
|
|
def func(x):
|
|
return getattr(x.expanding(), f)(self.frame)
|
|
expected = g.apply(func)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = getattr(r.B, f)(pairwise=True)
|
|
|
|
def func(x):
|
|
return getattr(x.B.expanding(), f)(pairwise=True)
|
|
expected = g.apply(func)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_expanding_apply(self, raw):
|
|
g = self.frame.groupby('A')
|
|
r = g.expanding()
|
|
|
|
# reduction
|
|
result = r.apply(lambda x: x.sum(), raw=raw)
|
|
expected = g.apply(
|
|
lambda x: x.expanding().apply(lambda y: y.sum(), raw=raw))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
class TestRollingTS(object):
|
|
|
|
# rolling time-series friendly
|
|
# xref GH13327
|
|
|
|
def setup_method(self, method):
|
|
|
|
self.regular = DataFrame({'A': pd.date_range('20130101',
|
|
periods=5,
|
|
freq='s'),
|
|
'B': range(5)}).set_index('A')
|
|
|
|
self.ragged = DataFrame({'B': range(5)})
|
|
self.ragged.index = [Timestamp('20130101 09:00:00'),
|
|
Timestamp('20130101 09:00:02'),
|
|
Timestamp('20130101 09:00:03'),
|
|
Timestamp('20130101 09:00:05'),
|
|
Timestamp('20130101 09:00:06')]
|
|
|
|
def test_doc_string(self):
|
|
|
|
df = DataFrame({'B': [0, 1, 2, np.nan, 4]},
|
|
index=[Timestamp('20130101 09:00:00'),
|
|
Timestamp('20130101 09:00:02'),
|
|
Timestamp('20130101 09:00:03'),
|
|
Timestamp('20130101 09:00:05'),
|
|
Timestamp('20130101 09:00:06')])
|
|
df
|
|
df.rolling('2s').sum()
|
|
|
|
def test_valid(self):
|
|
|
|
df = self.regular
|
|
|
|
# not a valid freq
|
|
with pytest.raises(ValueError):
|
|
df.rolling(window='foobar')
|
|
|
|
# not a datetimelike index
|
|
with pytest.raises(ValueError):
|
|
df.reset_index().rolling(window='foobar')
|
|
|
|
# non-fixed freqs
|
|
for freq in ['2MS', pd.offsets.MonthBegin(2)]:
|
|
with pytest.raises(ValueError):
|
|
df.rolling(window=freq)
|
|
|
|
for freq in ['1D', pd.offsets.Day(2), '2ms']:
|
|
df.rolling(window=freq)
|
|
|
|
# non-integer min_periods
|
|
for minp in [1.0, 'foo', np.array([1, 2, 3])]:
|
|
with pytest.raises(ValueError):
|
|
df.rolling(window='1D', min_periods=minp)
|
|
|
|
# center is not implemented
|
|
with pytest.raises(NotImplementedError):
|
|
df.rolling(window='1D', center=True)
|
|
|
|
def test_on(self):
|
|
|
|
df = self.regular
|
|
|
|
# not a valid column
|
|
with pytest.raises(ValueError):
|
|
df.rolling(window='2s', on='foobar')
|
|
|
|
# column is valid
|
|
df = df.copy()
|
|
df['C'] = pd.date_range('20130101', periods=len(df))
|
|
df.rolling(window='2d', on='C').sum()
|
|
|
|
# invalid columns
|
|
with pytest.raises(ValueError):
|
|
df.rolling(window='2d', on='B')
|
|
|
|
# ok even though on non-selected
|
|
df.rolling(window='2d', on='C').B.sum()
|
|
|
|
def test_monotonic_on(self):
|
|
|
|
# on/index must be monotonic
|
|
df = DataFrame({'A': pd.date_range('20130101',
|
|
periods=5,
|
|
freq='s'),
|
|
'B': range(5)})
|
|
|
|
assert df.A.is_monotonic
|
|
df.rolling('2s', on='A').sum()
|
|
|
|
df = df.set_index('A')
|
|
assert df.index.is_monotonic
|
|
df.rolling('2s').sum()
|
|
|
|
# non-monotonic
|
|
df.index = reversed(df.index.tolist())
|
|
assert not df.index.is_monotonic
|
|
|
|
with pytest.raises(ValueError):
|
|
df.rolling('2s').sum()
|
|
|
|
df = df.reset_index()
|
|
with pytest.raises(ValueError):
|
|
df.rolling('2s', on='A').sum()
|
|
|
|
def test_frame_on(self):
|
|
|
|
df = DataFrame({'B': range(5),
|
|
'C': pd.date_range('20130101 09:00:00',
|
|
periods=5,
|
|
freq='3s')})
|
|
|
|
df['A'] = [Timestamp('20130101 09:00:00'),
|
|
Timestamp('20130101 09:00:02'),
|
|
Timestamp('20130101 09:00:03'),
|
|
Timestamp('20130101 09:00:05'),
|
|
Timestamp('20130101 09:00:06')]
|
|
|
|
# we are doing simulating using 'on'
|
|
expected = (df.set_index('A')
|
|
.rolling('2s')
|
|
.B
|
|
.sum()
|
|
.reset_index(drop=True)
|
|
)
|
|
|
|
result = (df.rolling('2s', on='A')
|
|
.B
|
|
.sum()
|
|
)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# test as a frame
|
|
# we should be ignoring the 'on' as an aggregation column
|
|
# note that the expected is setting, computing, and resetting
|
|
# so the columns need to be switched compared
|
|
# to the actual result where they are ordered as in the
|
|
# original
|
|
expected = (df.set_index('A')
|
|
.rolling('2s')[['B']]
|
|
.sum()
|
|
.reset_index()[['B', 'A']]
|
|
)
|
|
|
|
result = (df.rolling('2s', on='A')[['B']]
|
|
.sum()
|
|
)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_frame_on2(self):
|
|
|
|
# using multiple aggregation columns
|
|
df = DataFrame({'A': [0, 1, 2, 3, 4],
|
|
'B': [0, 1, 2, np.nan, 4],
|
|
'C': Index([Timestamp('20130101 09:00:00'),
|
|
Timestamp('20130101 09:00:02'),
|
|
Timestamp('20130101 09:00:03'),
|
|
Timestamp('20130101 09:00:05'),
|
|
Timestamp('20130101 09:00:06')])},
|
|
columns=['A', 'C', 'B'])
|
|
|
|
expected1 = DataFrame({'A': [0., 1, 3, 3, 7],
|
|
'B': [0, 1, 3, np.nan, 4],
|
|
'C': df['C']},
|
|
columns=['A', 'C', 'B'])
|
|
|
|
result = df.rolling('2s', on='C').sum()
|
|
expected = expected1
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = Series([0, 1, 3, np.nan, 4], name='B')
|
|
result = df.rolling('2s', on='C').B.sum()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
expected = expected1[['A', 'B', 'C']]
|
|
result = df.rolling('2s', on='C')[['A', 'B', 'C']].sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_basic_regular(self):
|
|
|
|
df = self.regular.copy()
|
|
|
|
df.index = pd.date_range('20130101', periods=5, freq='D')
|
|
expected = df.rolling(window=1, min_periods=1).sum()
|
|
result = df.rolling(window='1D').sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
df.index = pd.date_range('20130101', periods=5, freq='2D')
|
|
expected = df.rolling(window=1, min_periods=1).sum()
|
|
result = df.rolling(window='2D', min_periods=1).sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = df.rolling(window=1, min_periods=1).sum()
|
|
result = df.rolling(window='2D', min_periods=1).sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = df.rolling(window=1).sum()
|
|
result = df.rolling(window='2D').sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_min_periods(self):
|
|
|
|
# compare for min_periods
|
|
df = self.regular
|
|
|
|
# these slightly different
|
|
expected = df.rolling(2, min_periods=1).sum()
|
|
result = df.rolling('2s').sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = df.rolling(2, min_periods=1).sum()
|
|
result = df.rolling('2s', min_periods=1).sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_closed(self):
|
|
|
|
# xref GH13965
|
|
|
|
df = DataFrame({'A': [1] * 5},
|
|
index=[Timestamp('20130101 09:00:01'),
|
|
Timestamp('20130101 09:00:02'),
|
|
Timestamp('20130101 09:00:03'),
|
|
Timestamp('20130101 09:00:04'),
|
|
Timestamp('20130101 09:00:06')])
|
|
|
|
# closed must be 'right', 'left', 'both', 'neither'
|
|
with pytest.raises(ValueError):
|
|
self.regular.rolling(window='2s', closed="blabla")
|
|
|
|
expected = df.copy()
|
|
expected["A"] = [1.0, 2, 2, 2, 1]
|
|
result = df.rolling('2s', closed='right').sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# default should be 'right'
|
|
result = df.rolling('2s').sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = df.copy()
|
|
expected["A"] = [1.0, 2, 3, 3, 2]
|
|
result = df.rolling('2s', closed='both').sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = df.copy()
|
|
expected["A"] = [np.nan, 1.0, 2, 2, 1]
|
|
result = df.rolling('2s', closed='left').sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = df.copy()
|
|
expected["A"] = [np.nan, 1.0, 1, 1, np.nan]
|
|
result = df.rolling('2s', closed='neither').sum()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_sum(self):
|
|
|
|
df = self.ragged
|
|
result = df.rolling(window='1s', min_periods=1).sum()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 2, 3, 4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='2s', min_periods=1).sum()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 3, 3, 7]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='2s', min_periods=2).sum()
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan, np.nan, 3, np.nan, 7]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='3s', min_periods=1).sum()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 3, 5, 7]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='3s').sum()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 3, 5, 7]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='4s', min_periods=1).sum()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 3, 6, 9]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='4s', min_periods=3).sum()
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan, np.nan, 3, 6, 9]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='5s', min_periods=1).sum()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 3, 6, 10]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_mean(self):
|
|
|
|
df = self.ragged
|
|
result = df.rolling(window='1s', min_periods=1).mean()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 2, 3, 4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='2s', min_periods=1).mean()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 1.5, 3.0, 3.5]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_median(self):
|
|
|
|
df = self.ragged
|
|
result = df.rolling(window='1s', min_periods=1).median()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 2, 3, 4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='2s', min_periods=1).median()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 1.5, 3.0, 3.5]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_quantile(self):
|
|
|
|
df = self.ragged
|
|
result = df.rolling(window='1s', min_periods=1).quantile(0.5)
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 2, 3, 4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='2s', min_periods=1).quantile(0.5)
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 1.5, 3.0, 3.5]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_std(self):
|
|
|
|
df = self.ragged
|
|
result = df.rolling(window='1s', min_periods=1).std(ddof=0)
|
|
expected = df.copy()
|
|
expected['B'] = [0.0] * 5
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='1s', min_periods=1).std(ddof=1)
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan] * 5
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='3s', min_periods=1).std(ddof=0)
|
|
expected = df.copy()
|
|
expected['B'] = [0.0] + [0.5] * 4
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='5s', min_periods=1).std(ddof=1)
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan, 0.707107, 1.0, 1.0, 1.290994]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_var(self):
|
|
|
|
df = self.ragged
|
|
result = df.rolling(window='1s', min_periods=1).var(ddof=0)
|
|
expected = df.copy()
|
|
expected['B'] = [0.0] * 5
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='1s', min_periods=1).var(ddof=1)
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan] * 5
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='3s', min_periods=1).var(ddof=0)
|
|
expected = df.copy()
|
|
expected['B'] = [0.0] + [0.25] * 4
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='5s', min_periods=1).var(ddof=1)
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan, 0.5, 1.0, 1.0, 1 + 2 / 3.]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_skew(self):
|
|
|
|
df = self.ragged
|
|
result = df.rolling(window='3s', min_periods=1).skew()
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan] * 5
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='5s', min_periods=1).skew()
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan] * 2 + [0.0, 0.0, 0.0]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_kurt(self):
|
|
|
|
df = self.ragged
|
|
result = df.rolling(window='3s', min_periods=1).kurt()
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan] * 5
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='5s', min_periods=1).kurt()
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan] * 4 + [-1.2]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_count(self):
|
|
|
|
df = self.ragged
|
|
result = df.rolling(window='1s', min_periods=1).count()
|
|
expected = df.copy()
|
|
expected['B'] = [1.0, 1, 1, 1, 1]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
df = self.ragged
|
|
result = df.rolling(window='1s').count()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='2s', min_periods=1).count()
|
|
expected = df.copy()
|
|
expected['B'] = [1.0, 1, 2, 1, 2]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='2s', min_periods=2).count()
|
|
expected = df.copy()
|
|
expected['B'] = [np.nan, np.nan, 2, np.nan, 2]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_regular_min(self):
|
|
|
|
df = DataFrame({'A': pd.date_range('20130101',
|
|
periods=5,
|
|
freq='s'),
|
|
'B': [0.0, 1, 2, 3, 4]}).set_index('A')
|
|
result = df.rolling('1s').min()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 2, 3, 4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
df = DataFrame({'A': pd.date_range('20130101',
|
|
periods=5,
|
|
freq='s'),
|
|
'B': [5, 4, 3, 4, 5]}).set_index('A')
|
|
|
|
tm.assert_frame_equal(result, expected)
|
|
result = df.rolling('2s').min()
|
|
expected = df.copy()
|
|
expected['B'] = [5.0, 4, 3, 3, 4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling('5s').min()
|
|
expected = df.copy()
|
|
expected['B'] = [5.0, 4, 3, 3, 3]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_min(self):
|
|
|
|
df = self.ragged
|
|
|
|
result = df.rolling(window='1s', min_periods=1).min()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 2, 3, 4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='2s', min_periods=1).min()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 1, 3, 3]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='5s', min_periods=1).min()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 0, 0, 1, 1]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_perf_min(self):
|
|
|
|
N = 10000
|
|
|
|
dfp = DataFrame({'B': np.random.randn(N)},
|
|
index=pd.date_range('20130101',
|
|
periods=N,
|
|
freq='s'))
|
|
expected = dfp.rolling(2, min_periods=1).min()
|
|
result = dfp.rolling('2s').min()
|
|
assert ((result - expected) < 0.01).all().bool()
|
|
|
|
expected = dfp.rolling(200, min_periods=1).min()
|
|
result = dfp.rolling('200s').min()
|
|
assert ((result - expected) < 0.01).all().bool()
|
|
|
|
def test_ragged_max(self):
|
|
|
|
df = self.ragged
|
|
|
|
result = df.rolling(window='1s', min_periods=1).max()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 2, 3, 4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='2s', min_periods=1).max()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 2, 3, 4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='5s', min_periods=1).max()
|
|
expected = df.copy()
|
|
expected['B'] = [0.0, 1, 2, 3, 4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_ragged_apply(self, raw):
|
|
|
|
df = self.ragged
|
|
|
|
f = lambda x: 1
|
|
result = df.rolling(window='1s', min_periods=1).apply(f, raw=raw)
|
|
expected = df.copy()
|
|
expected['B'] = 1.
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='2s', min_periods=1).apply(f, raw=raw)
|
|
expected = df.copy()
|
|
expected['B'] = 1.
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.rolling(window='5s', min_periods=1).apply(f, raw=raw)
|
|
expected = df.copy()
|
|
expected['B'] = 1.
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_all(self):
|
|
|
|
# simple comparison of integer vs time-based windowing
|
|
df = self.regular * 2
|
|
er = df.rolling(window=1)
|
|
r = df.rolling(window='1s')
|
|
|
|
for f in ['sum', 'mean', 'count', 'median', 'std',
|
|
'var', 'kurt', 'skew', 'min', 'max']:
|
|
|
|
result = getattr(r, f)()
|
|
expected = getattr(er, f)()
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = r.quantile(0.5)
|
|
expected = er.quantile(0.5)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_all_apply(self, raw):
|
|
|
|
df = self.regular * 2
|
|
er = df.rolling(window=1)
|
|
r = df.rolling(window='1s')
|
|
|
|
result = r.apply(lambda x: 1, raw=raw)
|
|
expected = er.apply(lambda x: 1, raw=raw)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_all2(self):
|
|
|
|
# more sophisticated comparison of integer vs.
|
|
# time-based windowing
|
|
df = DataFrame({'B': np.arange(50)},
|
|
index=pd.date_range('20130101',
|
|
periods=50, freq='H')
|
|
)
|
|
# in-range data
|
|
dft = df.between_time("09:00", "16:00")
|
|
|
|
r = dft.rolling(window='5H')
|
|
|
|
for f in ['sum', 'mean', 'count', 'median', 'std',
|
|
'var', 'kurt', 'skew', 'min', 'max']:
|
|
|
|
result = getattr(r, f)()
|
|
|
|
# we need to roll the days separately
|
|
# to compare with a time-based roll
|
|
# finally groupby-apply will return a multi-index
|
|
# so we need to drop the day
|
|
def agg_by_day(x):
|
|
x = x.between_time("09:00", "16:00")
|
|
return getattr(x.rolling(5, min_periods=1), f)()
|
|
expected = df.groupby(df.index.day).apply(
|
|
agg_by_day).reset_index(level=0, drop=True)
|
|
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_groupby_monotonic(self):
|
|
|
|
# GH 15130
|
|
# we don't need to validate monotonicity when grouping
|
|
|
|
data = [
|
|
['David', '1/1/2015', 100], ['David', '1/5/2015', 500],
|
|
['David', '5/30/2015', 50], ['David', '7/25/2015', 50],
|
|
['Ryan', '1/4/2014', 100], ['Ryan', '1/19/2015', 500],
|
|
['Ryan', '3/31/2016', 50], ['Joe', '7/1/2015', 100],
|
|
['Joe', '9/9/2015', 500], ['Joe', '10/15/2015', 50]]
|
|
|
|
df = DataFrame(data=data, columns=['name', 'date', 'amount'])
|
|
df['date'] = pd.to_datetime(df['date'])
|
|
|
|
expected = df.set_index('date').groupby('name').apply(
|
|
lambda x: x.rolling('180D')['amount'].sum())
|
|
result = df.groupby('name').rolling('180D', on='date')['amount'].sum()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_non_monotonic(self):
|
|
# GH 13966 (similar to #15130, closed by #15175)
|
|
|
|
dates = pd.date_range(start='2016-01-01 09:30:00',
|
|
periods=20, freq='s')
|
|
df = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8,
|
|
'B': np.concatenate((dates, dates)),
|
|
'C': np.arange(40)})
|
|
|
|
result = df.groupby('A').rolling('4s', on='B').C.mean()
|
|
expected = df.set_index('B').groupby('A').apply(
|
|
lambda x: x.rolling('4s')['C'].mean())
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
df2 = df.sort_values('B')
|
|
result = df2.groupby('A').rolling('4s', on='B').C.mean()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_rolling_cov_offset(self):
|
|
# GH16058
|
|
|
|
idx = pd.date_range('2017-01-01', periods=24, freq='1h')
|
|
ss = Series(np.arange(len(idx)), index=idx)
|
|
|
|
result = ss.rolling('2h').cov()
|
|
expected = Series([np.nan] + [0.5] * (len(idx) - 1), index=idx)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
expected2 = ss.rolling(2, min_periods=1).cov()
|
|
tm.assert_series_equal(result, expected2)
|
|
|
|
result = ss.rolling('3h').cov()
|
|
expected = Series([np.nan, 0.5] + [1.0] * (len(idx) - 2), index=idx)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
expected2 = ss.rolling(3, min_periods=1).cov()
|
|
tm.assert_series_equal(result, expected2)
|