3414 lines
131 KiB
Python
3414 lines
131 KiB
Python
# pylint: disable=E1101
|
|
|
|
from warnings import catch_warnings
|
|
from datetime import datetime, timedelta
|
|
from functools import partial
|
|
from textwrap import dedent
|
|
from operator import methodcaller
|
|
|
|
import pytz
|
|
import pytest
|
|
import dateutil
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
import pandas.tseries.offsets as offsets
|
|
import pandas.util.testing as tm
|
|
from pandas import (Series, DataFrame, Panel, Index, isna,
|
|
notna, Timestamp)
|
|
|
|
from pandas.compat import range, lrange, zip, product, OrderedDict
|
|
from pandas.errors import UnsupportedFunctionCall
|
|
from pandas.core.groupby.groupby import DataError
|
|
import pandas.core.common as com
|
|
|
|
from pandas.tseries.frequencies import to_offset
|
|
from pandas.core.indexes.datetimes import date_range
|
|
from pandas.tseries.offsets import Minute, BDay
|
|
from pandas.core.indexes.period import period_range, PeriodIndex, Period
|
|
from pandas.core.resample import DatetimeIndex, TimeGrouper
|
|
from pandas.core.indexes.timedeltas import timedelta_range, TimedeltaIndex
|
|
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
|
|
assert_frame_equal, assert_index_equal)
|
|
from pandas._libs.tslibs.period import IncompatibleFrequency
|
|
from pandas._libs.tslibs.ccalendar import DAYS, MONTHS
|
|
|
|
bday = BDay()
|
|
|
|
# The various methods we support
|
|
downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
|
|
'median', 'prod', 'var', 'ohlc']
|
|
upsample_methods = ['count', 'size']
|
|
series_methods = ['nunique']
|
|
resample_methods = downsample_methods + upsample_methods + series_methods
|
|
|
|
|
|
def _simple_ts(start, end, freq='D'):
|
|
rng = date_range(start, end, freq=freq)
|
|
return Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
|
|
def _simple_pts(start, end, freq='D'):
|
|
rng = period_range(start, end, freq=freq)
|
|
return Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
|
|
class TestResampleAPI(object):
|
|
|
|
def setup_method(self, method):
|
|
dti = DatetimeIndex(start=datetime(2005, 1, 1),
|
|
end=datetime(2005, 1, 10), freq='Min')
|
|
|
|
self.series = Series(np.random.rand(len(dti)), dti)
|
|
self.frame = DataFrame(
|
|
{'A': self.series, 'B': self.series, 'C': np.arange(len(dti))})
|
|
|
|
def test_str(self):
|
|
|
|
r = self.series.resample('H')
|
|
assert ('DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, '
|
|
'label=left, convention=start, base=0]' in str(r))
|
|
|
|
def test_api(self):
|
|
|
|
r = self.series.resample('H')
|
|
result = r.mean()
|
|
assert isinstance(result, Series)
|
|
assert len(result) == 217
|
|
|
|
r = self.series.to_frame().resample('H')
|
|
result = r.mean()
|
|
assert isinstance(result, DataFrame)
|
|
assert len(result) == 217
|
|
|
|
def test_groupby_resample_api(self):
|
|
|
|
# GH 12448
|
|
# .groupby(...).resample(...) hitting warnings
|
|
# when appropriate
|
|
df = DataFrame({'date': pd.date_range(start='2016-01-01',
|
|
periods=4,
|
|
freq='W'),
|
|
'group': [1, 1, 2, 2],
|
|
'val': [5, 6, 7, 8]}).set_index('date')
|
|
|
|
# replication step
|
|
i = pd.date_range('2016-01-03', periods=8).tolist() + \
|
|
pd.date_range('2016-01-17', periods=8).tolist()
|
|
index = pd.MultiIndex.from_arrays([[1] * 8 + [2] * 8, i],
|
|
names=['group', 'date'])
|
|
expected = DataFrame({'val': [5] * 7 + [6] + [7] * 7 + [8]},
|
|
index=index)
|
|
result = df.groupby('group').apply(
|
|
lambda x: x.resample('1D').ffill())[['val']]
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_groupby_resample_on_api(self):
|
|
|
|
# GH 15021
|
|
# .groupby(...).resample(on=...) results in an unexpected
|
|
# keyword warning.
|
|
df = DataFrame({'key': ['A', 'B'] * 5,
|
|
'dates': pd.date_range('2016-01-01', periods=10),
|
|
'values': np.random.randn(10)})
|
|
|
|
expected = df.set_index('dates').groupby('key').resample('D').mean()
|
|
|
|
result = df.groupby('key').resample('D', on='dates').mean()
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_pipe(self):
|
|
# GH17905
|
|
|
|
# series
|
|
r = self.series.resample('H')
|
|
expected = r.max() - r.mean()
|
|
result = r.pipe(lambda x: x.max() - x.mean())
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# dataframe
|
|
r = self.frame.resample('H')
|
|
expected = r.max() - r.mean()
|
|
result = r.pipe(lambda x: x.max() - x.mean())
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_getitem(self):
|
|
|
|
r = self.frame.resample('H')
|
|
tm.assert_index_equal(r._selected_obj.columns, self.frame.columns)
|
|
|
|
r = self.frame.resample('H')['B']
|
|
assert r._selected_obj.name == self.frame.columns[1]
|
|
|
|
# technically this is allowed
|
|
r = self.frame.resample('H')['A', 'B']
|
|
tm.assert_index_equal(r._selected_obj.columns,
|
|
self.frame.columns[[0, 1]])
|
|
|
|
r = self.frame.resample('H')['A', 'B']
|
|
tm.assert_index_equal(r._selected_obj.columns,
|
|
self.frame.columns[[0, 1]])
|
|
|
|
def test_select_bad_cols(self):
|
|
|
|
g = self.frame.resample('H')
|
|
pytest.raises(KeyError, g.__getitem__, ['D'])
|
|
|
|
pytest.raises(KeyError, g.__getitem__, ['A', 'D'])
|
|
with tm.assert_raises_regex(KeyError, '^[^A]+$'):
|
|
# A should not be referenced as a bad column...
|
|
# will have to rethink regex if you change message!
|
|
g[['A', 'D']]
|
|
|
|
def test_attribute_access(self):
|
|
|
|
r = self.frame.resample('H')
|
|
tm.assert_series_equal(r.A.sum(), r['A'].sum())
|
|
|
|
def test_api_compat_before_use(self):
|
|
|
|
# make sure that we are setting the binner
|
|
# on these attributes
|
|
for attr in ['groups', 'ngroups', 'indices']:
|
|
rng = pd.date_range('1/1/2012', periods=100, freq='S')
|
|
ts = Series(np.arange(len(rng)), index=rng)
|
|
rs = ts.resample('30s')
|
|
|
|
# before use
|
|
getattr(rs, attr)
|
|
|
|
# after grouper is initialized is ok
|
|
rs.mean()
|
|
getattr(rs, attr)
|
|
|
|
def tests_skip_nuisance(self):
|
|
|
|
df = self.frame
|
|
df['D'] = 'foo'
|
|
r = df.resample('H')
|
|
result = r[['A', 'B']].sum()
|
|
expected = pd.concat([r.A.sum(), r.B.sum()], axis=1)
|
|
assert_frame_equal(result, expected)
|
|
|
|
expected = r[['A', 'B', 'C']].sum()
|
|
result = r.sum()
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_downsample_but_actually_upsampling(self):
|
|
|
|
# this is reindex / asfreq
|
|
rng = pd.date_range('1/1/2012', periods=100, freq='S')
|
|
ts = Series(np.arange(len(rng), dtype='int64'), index=rng)
|
|
result = ts.resample('20s').asfreq()
|
|
expected = Series([0, 20, 40, 60, 80],
|
|
index=pd.date_range('2012-01-01 00:00:00',
|
|
freq='20s',
|
|
periods=5))
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_combined_up_downsampling_of_irregular(self):
|
|
|
|
# since we are reallydoing an operation like this
|
|
# ts2.resample('2s').mean().ffill()
|
|
# preserve these semantics
|
|
|
|
rng = pd.date_range('1/1/2012', periods=100, freq='S')
|
|
ts = Series(np.arange(len(rng)), index=rng)
|
|
ts2 = ts.iloc[[0, 1, 2, 3, 5, 7, 11, 15, 16, 25, 30]]
|
|
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
result = ts2.resample('2s', how='mean', fill_method='ffill')
|
|
expected = ts2.resample('2s').mean().ffill()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_transform(self):
|
|
|
|
r = self.series.resample('20min')
|
|
expected = self.series.groupby(
|
|
pd.Grouper(freq='20min')).transform('mean')
|
|
result = r.transform('mean')
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_fillna(self):
|
|
|
|
# need to upsample here
|
|
rng = pd.date_range('1/1/2012', periods=10, freq='2S')
|
|
ts = Series(np.arange(len(rng), dtype='int64'), index=rng)
|
|
r = ts.resample('s')
|
|
|
|
expected = r.ffill()
|
|
result = r.fillna(method='ffill')
|
|
assert_series_equal(result, expected)
|
|
|
|
expected = r.bfill()
|
|
result = r.fillna(method='bfill')
|
|
assert_series_equal(result, expected)
|
|
|
|
with pytest.raises(ValueError):
|
|
r.fillna(0)
|
|
|
|
def test_apply_without_aggregation(self):
|
|
|
|
# both resample and groupby should work w/o aggregation
|
|
r = self.series.resample('20min')
|
|
g = self.series.groupby(pd.Grouper(freq='20min'))
|
|
|
|
for t in [g, r]:
|
|
result = t.apply(lambda x: x)
|
|
assert_series_equal(result, self.series)
|
|
|
|
def test_agg_consistency(self):
|
|
|
|
# make sure that we are consistent across
|
|
# similar aggregations with and w/o selection list
|
|
df = DataFrame(np.random.randn(1000, 3),
|
|
index=pd.date_range('1/1/2012', freq='S', periods=1000),
|
|
columns=['A', 'B', 'C'])
|
|
|
|
r = df.resample('3T')
|
|
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
expected = r[['A', 'B', 'C']].agg({'r1': 'mean', 'r2': 'sum'})
|
|
result = r.agg({'r1': 'mean', 'r2': 'sum'})
|
|
assert_frame_equal(result, expected)
|
|
|
|
# TODO: once GH 14008 is fixed, move these tests into
|
|
# `Base` test class
|
|
def test_agg(self):
|
|
# test with all three Resampler apis and TimeGrouper
|
|
|
|
np.random.seed(1234)
|
|
index = date_range(datetime(2005, 1, 1),
|
|
datetime(2005, 1, 10), freq='D')
|
|
index.name = 'date'
|
|
df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
|
|
df_col = df.reset_index()
|
|
df_mult = df_col.copy()
|
|
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
|
|
names=['index', 'date'])
|
|
r = df.resample('2D')
|
|
cases = [
|
|
r,
|
|
df_col.resample('2D', on='date'),
|
|
df_mult.resample('2D', level='date'),
|
|
df.groupby(pd.Grouper(freq='2D'))
|
|
]
|
|
|
|
a_mean = r['A'].mean()
|
|
a_std = r['A'].std()
|
|
a_sum = r['A'].sum()
|
|
b_mean = r['B'].mean()
|
|
b_std = r['B'].std()
|
|
b_sum = r['B'].sum()
|
|
|
|
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
|
|
expected.columns = pd.MultiIndex.from_product([['A', 'B'],
|
|
['mean', 'std']])
|
|
for t in cases:
|
|
result = t.aggregate([np.mean, np.std])
|
|
assert_frame_equal(result, expected)
|
|
|
|
expected = pd.concat([a_mean, b_std], axis=1)
|
|
for t in cases:
|
|
result = t.aggregate({'A': np.mean,
|
|
'B': np.std})
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
expected = pd.concat([a_mean, a_std], axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
|
|
('A', 'std')])
|
|
for t in cases:
|
|
result = t.aggregate({'A': ['mean', 'std']})
|
|
assert_frame_equal(result, expected)
|
|
|
|
expected = pd.concat([a_mean, a_sum], axis=1)
|
|
expected.columns = ['mean', 'sum']
|
|
for t in cases:
|
|
result = t['A'].aggregate(['mean', 'sum'])
|
|
assert_frame_equal(result, expected)
|
|
|
|
expected = pd.concat([a_mean, a_sum], axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
|
|
('A', 'sum')])
|
|
for t in cases:
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'}})
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
|
|
('A', 'sum'),
|
|
('B', 'mean2'),
|
|
('B', 'sum2')])
|
|
for t in cases:
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
result = t.aggregate({'A': {'mean': 'mean', 'sum': 'sum'},
|
|
'B': {'mean2': 'mean', 'sum2': 'sum'}})
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('A', 'mean'),
|
|
('A', 'std'),
|
|
('B', 'mean'),
|
|
('B', 'std')])
|
|
for t in cases:
|
|
result = t.aggregate({'A': ['mean', 'std'],
|
|
'B': ['mean', 'std']})
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
expected = pd.concat([a_mean, a_sum, b_mean, b_sum], axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('r1', 'A', 'mean'),
|
|
('r1', 'A', 'sum'),
|
|
('r2', 'B', 'mean'),
|
|
('r2', 'B', 'sum')])
|
|
|
|
def test_agg_misc(self):
|
|
# test with all three Resampler apis and TimeGrouper
|
|
|
|
np.random.seed(1234)
|
|
index = date_range(datetime(2005, 1, 1),
|
|
datetime(2005, 1, 10), freq='D')
|
|
index.name = 'date'
|
|
df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
|
|
df_col = df.reset_index()
|
|
df_mult = df_col.copy()
|
|
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
|
|
names=['index', 'date'])
|
|
|
|
r = df.resample('2D')
|
|
cases = [
|
|
r,
|
|
df_col.resample('2D', on='date'),
|
|
df_mult.resample('2D', level='date'),
|
|
df.groupby(pd.Grouper(freq='2D'))
|
|
]
|
|
|
|
# passed lambda
|
|
for t in cases:
|
|
result = t.agg({'A': np.sum,
|
|
'B': lambda x: np.std(x, ddof=1)})
|
|
rcustom = t['B'].apply(lambda x: np.std(x, ddof=1))
|
|
expected = pd.concat([r['A'].sum(), rcustom], axis=1)
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
# agg with renamers
|
|
expected = pd.concat([t['A'].sum(),
|
|
t['B'].sum(),
|
|
t['A'].mean(),
|
|
t['B'].mean()],
|
|
axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('result1', 'A'),
|
|
('result1', 'B'),
|
|
('result2', 'A'),
|
|
('result2', 'B')])
|
|
|
|
for t in cases:
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
result = t[['A', 'B']].agg(OrderedDict([('result1', np.sum),
|
|
('result2', np.mean)]))
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
# agg with different hows
|
|
expected = pd.concat([t['A'].sum(),
|
|
t['A'].std(),
|
|
t['B'].mean(),
|
|
t['B'].std()],
|
|
axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
|
|
('A', 'std'),
|
|
('B', 'mean'),
|
|
('B', 'std')])
|
|
for t in cases:
|
|
result = t.agg(OrderedDict([('A', ['sum', 'std']),
|
|
('B', ['mean', 'std'])]))
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
# equivalent of using a selection list / or not
|
|
for t in cases:
|
|
result = t[['A', 'B']].agg({'A': ['sum', 'std'],
|
|
'B': ['mean', 'std']})
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
# series like aggs
|
|
for t in cases:
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
result = t['A'].agg({'A': ['sum', 'std']})
|
|
expected = pd.concat([t['A'].sum(),
|
|
t['A'].std()],
|
|
axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
|
|
('A', 'std')])
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
expected = pd.concat([t['A'].agg(['sum', 'std']),
|
|
t['A'].agg(['mean', 'std'])],
|
|
axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('A', 'sum'),
|
|
('A', 'std'),
|
|
('B', 'mean'),
|
|
('B', 'std')])
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
result = t['A'].agg({'A': ['sum', 'std'],
|
|
'B': ['mean', 'std']})
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
# errors
|
|
# invalid names in the agg specification
|
|
for t in cases:
|
|
def f():
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
t[['A']].agg({'A': ['sum', 'std'],
|
|
'B': ['mean', 'std']})
|
|
|
|
pytest.raises(KeyError, f)
|
|
|
|
def test_agg_nested_dicts(self):
|
|
|
|
np.random.seed(1234)
|
|
index = date_range(datetime(2005, 1, 1),
|
|
datetime(2005, 1, 10), freq='D')
|
|
index.name = 'date'
|
|
df = DataFrame(np.random.rand(10, 2), columns=list('AB'), index=index)
|
|
df_col = df.reset_index()
|
|
df_mult = df_col.copy()
|
|
df_mult.index = pd.MultiIndex.from_arrays([range(10), df.index],
|
|
names=['index', 'date'])
|
|
r = df.resample('2D')
|
|
cases = [
|
|
r,
|
|
df_col.resample('2D', on='date'),
|
|
df_mult.resample('2D', level='date'),
|
|
df.groupby(pd.Grouper(freq='2D'))
|
|
]
|
|
|
|
for t in cases:
|
|
def f():
|
|
t.aggregate({'r1': {'A': ['mean', 'sum']},
|
|
'r2': {'B': ['mean', 'sum']}})
|
|
pytest.raises(ValueError, f)
|
|
|
|
for t in cases:
|
|
expected = pd.concat([t['A'].mean(), t['A'].std(), t['B'].mean(),
|
|
t['B'].std()], axis=1)
|
|
expected.columns = pd.MultiIndex.from_tuples([('ra', 'mean'), (
|
|
'ra', 'std'), ('rb', 'mean'), ('rb', 'std')])
|
|
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
result = t[['A', 'B']].agg({'A': {'ra': ['mean', 'std']},
|
|
'B': {'rb': ['mean', 'std']}})
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
result = t.agg({'A': {'ra': ['mean', 'std']},
|
|
'B': {'rb': ['mean', 'std']}})
|
|
assert_frame_equal(result, expected, check_like=True)
|
|
|
|
def test_try_aggregate_non_existing_column(self):
|
|
# GH 16766
|
|
data = [
|
|
{'dt': datetime(2017, 6, 1, 0), 'x': 1.0, 'y': 2.0},
|
|
{'dt': datetime(2017, 6, 1, 1), 'x': 2.0, 'y': 2.0},
|
|
{'dt': datetime(2017, 6, 1, 2), 'x': 3.0, 'y': 1.5}
|
|
]
|
|
df = DataFrame(data).set_index('dt')
|
|
|
|
# Error as we don't have 'z' column
|
|
with pytest.raises(KeyError):
|
|
df.resample('30T').agg({'x': ['mean'],
|
|
'y': ['median'],
|
|
'z': ['sum']})
|
|
|
|
def test_selection_api_validation(self):
|
|
# GH 13500
|
|
index = date_range(datetime(2005, 1, 1),
|
|
datetime(2005, 1, 10), freq='D')
|
|
|
|
rng = np.arange(len(index), dtype=np.int64)
|
|
df = DataFrame({'date': index, 'a': rng},
|
|
index=pd.MultiIndex.from_arrays([rng, index],
|
|
names=['v', 'd']))
|
|
df_exp = DataFrame({'a': rng}, index=index)
|
|
|
|
# non DatetimeIndex
|
|
with pytest.raises(TypeError):
|
|
df.resample('2D', level='v')
|
|
|
|
with pytest.raises(ValueError):
|
|
df.resample('2D', on='date', level='d')
|
|
|
|
with pytest.raises(TypeError):
|
|
df.resample('2D', on=['a', 'date'])
|
|
|
|
with pytest.raises(KeyError):
|
|
df.resample('2D', level=['a', 'date'])
|
|
|
|
# upsampling not allowed
|
|
with pytest.raises(ValueError):
|
|
df.resample('2D', level='d').asfreq()
|
|
|
|
with pytest.raises(ValueError):
|
|
df.resample('2D', on='date').asfreq()
|
|
|
|
exp = df_exp.resample('2D').sum()
|
|
exp.index.name = 'date'
|
|
assert_frame_equal(exp, df.resample('2D', on='date').sum())
|
|
|
|
exp.index.name = 'd'
|
|
assert_frame_equal(exp, df.resample('2D', level='d').sum())
|
|
|
|
|
|
class Base(object):
|
|
"""
|
|
base class for resampling testing, calling
|
|
.create_series() generates a series of each index type
|
|
"""
|
|
|
|
def create_index(self, *args, **kwargs):
|
|
""" return the _index_factory created using the args, kwargs """
|
|
factory = self._index_factory()
|
|
return factory(*args, **kwargs)
|
|
|
|
@pytest.fixture
|
|
def _index_start(self):
|
|
return datetime(2005, 1, 1)
|
|
|
|
@pytest.fixture
|
|
def _index_end(self):
|
|
return datetime(2005, 1, 10)
|
|
|
|
@pytest.fixture
|
|
def _index_freq(self):
|
|
return 'D'
|
|
|
|
@pytest.fixture
|
|
def index(self, _index_start, _index_end, _index_freq):
|
|
return self.create_index(_index_start, _index_end, freq=_index_freq)
|
|
|
|
@pytest.fixture
|
|
def _series_name(self):
|
|
raise com.AbstractMethodError(self)
|
|
|
|
@pytest.fixture
|
|
def _static_values(self, index):
|
|
return np.arange(len(index))
|
|
|
|
@pytest.fixture
|
|
def series(self, index, _series_name, _static_values):
|
|
return Series(_static_values, index=index, name=_series_name)
|
|
|
|
@pytest.fixture
|
|
def frame(self, index, _static_values):
|
|
return DataFrame({'value': _static_values}, index=index)
|
|
|
|
@pytest.fixture(params=[Series, DataFrame])
|
|
def series_and_frame(self, request, index, _series_name, _static_values):
|
|
if request.param == Series:
|
|
return Series(_static_values, index=index, name=_series_name)
|
|
if request.param == DataFrame:
|
|
return DataFrame({'value': _static_values}, index=index)
|
|
|
|
@pytest.mark.parametrize('freq', ['2D', '1H'])
|
|
def test_asfreq(self, series_and_frame, freq):
|
|
obj = series_and_frame
|
|
|
|
result = obj.resample(freq).asfreq()
|
|
if freq == '2D':
|
|
new_index = obj.index.take(np.arange(0, len(obj.index), 2))
|
|
new_index.freq = to_offset('2D')
|
|
else:
|
|
new_index = self.create_index(obj.index[0], obj.index[-1],
|
|
freq=freq)
|
|
expected = obj.reindex(new_index)
|
|
assert_almost_equal(result, expected)
|
|
|
|
def test_asfreq_fill_value(self):
|
|
# test for fill value during resampling, issue 3715
|
|
|
|
s = self.create_series()
|
|
|
|
result = s.resample('1H').asfreq()
|
|
new_index = self.create_index(s.index[0], s.index[-1], freq='1H')
|
|
expected = s.reindex(new_index)
|
|
assert_series_equal(result, expected)
|
|
|
|
frame = s.to_frame('value')
|
|
frame.iloc[1] = None
|
|
result = frame.resample('1H').asfreq(fill_value=4.0)
|
|
new_index = self.create_index(frame.index[0],
|
|
frame.index[-1], freq='1H')
|
|
expected = frame.reindex(new_index, fill_value=4.0)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_resample_interpolate(self):
|
|
# # 12925
|
|
df = self.create_series().to_frame('value')
|
|
assert_frame_equal(
|
|
df.resample('1T').asfreq().interpolate(),
|
|
df.resample('1T').interpolate())
|
|
|
|
def test_raises_on_non_datetimelike_index(self):
|
|
# this is a non datetimelike index
|
|
xp = DataFrame()
|
|
pytest.raises(TypeError, lambda: xp.resample('A').mean())
|
|
|
|
def test_resample_empty_series(self):
|
|
# GH12771 & GH12868
|
|
|
|
s = self.create_series()[:0]
|
|
|
|
for freq in ['M', 'D', 'H']:
|
|
# need to test for ohlc from GH13083
|
|
methods = [method for method in resample_methods
|
|
if method != 'ohlc']
|
|
for method in methods:
|
|
result = getattr(s.resample(freq), method)()
|
|
|
|
expected = s.copy()
|
|
expected.index = s.index._shallow_copy(freq=freq)
|
|
assert_index_equal(result.index, expected.index)
|
|
assert result.index.freq == expected.index.freq
|
|
assert_series_equal(result, expected, check_dtype=False)
|
|
|
|
def test_resample_empty_dataframe(self):
|
|
# GH13212
|
|
index = self.create_series().index[:0]
|
|
f = DataFrame(index=index)
|
|
|
|
for freq in ['M', 'D', 'H']:
|
|
# count retains dimensions too
|
|
methods = downsample_methods + upsample_methods
|
|
for method in methods:
|
|
result = getattr(f.resample(freq), method)()
|
|
if method != 'size':
|
|
expected = f.copy()
|
|
else:
|
|
# GH14962
|
|
expected = Series([])
|
|
|
|
expected.index = f.index._shallow_copy(freq=freq)
|
|
assert_index_equal(result.index, expected.index)
|
|
assert result.index.freq == expected.index.freq
|
|
assert_almost_equal(result, expected, check_dtype=False)
|
|
|
|
# test size for GH13212 (currently stays as df)
|
|
|
|
@pytest.mark.parametrize("index", tm.all_timeseries_index_generator(0))
|
|
@pytest.mark.parametrize(
|
|
"dtype",
|
|
[np.float, np.int, np.object, 'datetime64[ns]'])
|
|
def test_resample_empty_dtypes(self, index, dtype):
|
|
|
|
# Empty series were sometimes causing a segfault (for the functions
|
|
# with Cython bounds-checking disabled) or an IndexError. We just run
|
|
# them to ensure they no longer do. (GH #10228)
|
|
for how in downsample_methods + upsample_methods:
|
|
empty_series = Series([], index, dtype)
|
|
try:
|
|
getattr(empty_series.resample('d'), how)()
|
|
except DataError:
|
|
# Ignore these since some combinations are invalid
|
|
# (ex: doing mean with dtype of np.object)
|
|
pass
|
|
|
|
def test_resample_loffset_arg_type(self):
|
|
# GH 13218, 15002
|
|
df = self.create_series().to_frame('value')
|
|
expected_means = [df.values[i:i + 2].mean()
|
|
for i in range(0, len(df.values), 2)]
|
|
expected_index = self.create_index(df.index[0],
|
|
periods=len(df.index) / 2,
|
|
freq='2D')
|
|
|
|
# loffset coerces PeriodIndex to DateTimeIndex
|
|
if isinstance(expected_index, PeriodIndex):
|
|
expected_index = expected_index.to_timestamp()
|
|
|
|
expected_index += timedelta(hours=2)
|
|
expected = DataFrame({'value': expected_means}, index=expected_index)
|
|
|
|
for arg in ['mean', {'value': 'mean'}, ['mean']]:
|
|
|
|
result_agg = df.resample('2D', loffset='2H').agg(arg)
|
|
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
result_how = df.resample('2D', how=arg, loffset='2H')
|
|
|
|
if isinstance(arg, list):
|
|
expected.columns = pd.MultiIndex.from_tuples([('value',
|
|
'mean')])
|
|
|
|
# GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
|
|
if isinstance(expected.index, TimedeltaIndex):
|
|
with pytest.raises(AssertionError):
|
|
assert_frame_equal(result_agg, expected)
|
|
assert_frame_equal(result_how, expected)
|
|
else:
|
|
assert_frame_equal(result_agg, expected)
|
|
assert_frame_equal(result_how, expected)
|
|
|
|
def test_apply_to_empty_series(self):
|
|
# GH 14313
|
|
series = self.create_series()[:0]
|
|
|
|
for freq in ['M', 'D', 'H']:
|
|
result = series.resample(freq).apply(lambda x: 1)
|
|
expected = series.resample(freq).apply(np.sum)
|
|
|
|
assert_series_equal(result, expected, check_dtype=False)
|
|
|
|
|
|
class TestDatetimeIndex(Base):
|
|
_index_factory = lambda x: date_range
|
|
|
|
@pytest.fixture
|
|
def _series_name(self):
|
|
return 'dti'
|
|
|
|
def setup_method(self, method):
|
|
dti = DatetimeIndex(start=datetime(2005, 1, 1),
|
|
end=datetime(2005, 1, 10), freq='Min')
|
|
|
|
self.series = Series(np.random.rand(len(dti)), dti)
|
|
|
|
def create_series(self):
|
|
i = date_range(datetime(2005, 1, 1),
|
|
datetime(2005, 1, 10), freq='D')
|
|
|
|
return Series(np.arange(len(i)), index=i, name='dti')
|
|
|
|
def test_custom_grouper(self):
|
|
|
|
dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1),
|
|
end=datetime(2005, 1, 10))
|
|
|
|
s = Series(np.array([1] * len(dti)), index=dti, dtype='int64')
|
|
|
|
b = TimeGrouper(Minute(5))
|
|
g = s.groupby(b)
|
|
|
|
# check all cython functions work
|
|
funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
|
|
for f in funcs:
|
|
g._cython_agg_general(f)
|
|
|
|
b = TimeGrouper(Minute(5), closed='right', label='right')
|
|
g = s.groupby(b)
|
|
# check all cython functions work
|
|
funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var']
|
|
for f in funcs:
|
|
g._cython_agg_general(f)
|
|
|
|
assert g.ngroups == 2593
|
|
assert notna(g.mean()).all()
|
|
|
|
# construct expected val
|
|
arr = [1] + [5] * 2592
|
|
idx = dti[0:-1:5]
|
|
idx = idx.append(dti[-1:])
|
|
expect = Series(arr, index=idx)
|
|
|
|
# GH2763 - return in put dtype if we can
|
|
result = g.agg(np.sum)
|
|
assert_series_equal(result, expect)
|
|
|
|
df = DataFrame(np.random.rand(len(dti), 10),
|
|
index=dti, dtype='float64')
|
|
r = df.groupby(b).agg(np.sum)
|
|
|
|
assert len(r.columns) == 10
|
|
assert len(r.index) == 2593
|
|
|
|
def test_resample_basic(self):
|
|
rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min',
|
|
name='index')
|
|
s = Series(np.random.randn(14), index=rng)
|
|
|
|
result = s.resample('5min', closed='right', label='right').mean()
|
|
|
|
exp_idx = date_range('1/1/2000', periods=4, freq='5min', name='index')
|
|
expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
|
|
index=exp_idx)
|
|
assert_series_equal(result, expected)
|
|
assert result.index.name == 'index'
|
|
|
|
result = s.resample('5min', closed='left', label='right').mean()
|
|
|
|
exp_idx = date_range('1/1/2000 00:05', periods=3, freq='5min',
|
|
name='index')
|
|
expected = Series([s[:5].mean(), s[5:10].mean(),
|
|
s[10:].mean()], index=exp_idx)
|
|
assert_series_equal(result, expected)
|
|
|
|
s = self.series
|
|
result = s.resample('5Min').last()
|
|
grouper = TimeGrouper(Minute(5), closed='left', label='left')
|
|
expect = s.groupby(grouper).agg(lambda x: x[-1])
|
|
assert_series_equal(result, expect)
|
|
|
|
def test_resample_string_kwargs(self):
|
|
# Test for issue #19303
|
|
rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min',
|
|
name='index')
|
|
s = Series(np.random.randn(14), index=rng)
|
|
|
|
# Check that wrong keyword argument strings raise an error
|
|
with pytest.raises(ValueError):
|
|
s.resample('5min', label='righttt').mean()
|
|
with pytest.raises(ValueError):
|
|
s.resample('5min', closed='righttt').mean()
|
|
with pytest.raises(ValueError):
|
|
s.resample('5min', convention='starttt').mean()
|
|
|
|
def test_resample_how(self):
|
|
rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min',
|
|
name='index')
|
|
s = Series(np.random.randn(14), index=rng)
|
|
grouplist = np.ones_like(s)
|
|
grouplist[0] = 0
|
|
grouplist[1:6] = 1
|
|
grouplist[6:11] = 2
|
|
grouplist[11:] = 3
|
|
args = downsample_methods
|
|
|
|
def _ohlc(group):
|
|
if isna(group).all():
|
|
return np.repeat(np.nan, 4)
|
|
return [group[0], group.max(), group.min(), group[-1]]
|
|
|
|
inds = date_range('1/1/2000', periods=4, freq='5min', name='index')
|
|
|
|
for arg in args:
|
|
if arg == 'ohlc':
|
|
func = _ohlc
|
|
else:
|
|
func = arg
|
|
try:
|
|
result = getattr(s.resample(
|
|
'5min', closed='right', label='right'), arg)()
|
|
|
|
expected = s.groupby(grouplist).agg(func)
|
|
assert result.index.name == 'index'
|
|
if arg == 'ohlc':
|
|
expected = DataFrame(expected.values.tolist())
|
|
expected.columns = ['open', 'high', 'low', 'close']
|
|
expected.index = Index(inds, name='index')
|
|
assert_frame_equal(result, expected)
|
|
else:
|
|
expected.index = inds
|
|
assert_series_equal(result, expected)
|
|
except BaseException as exc:
|
|
|
|
exc.args += ('how=%s' % arg,)
|
|
raise
|
|
|
|
def test_numpy_compat(self):
|
|
# see gh-12811
|
|
s = Series([1, 2, 3, 4, 5], index=date_range(
|
|
'20130101', periods=5, freq='s'))
|
|
r = s.resample('2s')
|
|
|
|
msg = "numpy operations are not valid with resample"
|
|
|
|
for func in ('min', 'max', 'sum', 'prod',
|
|
'mean', 'var', 'std'):
|
|
tm.assert_raises_regex(UnsupportedFunctionCall, msg,
|
|
getattr(r, func),
|
|
func, 1, 2, 3)
|
|
tm.assert_raises_regex(UnsupportedFunctionCall, msg,
|
|
getattr(r, func), axis=1)
|
|
|
|
def test_resample_how_callables(self):
|
|
# GH 7929
|
|
data = np.arange(5, dtype=np.int64)
|
|
ind = pd.DatetimeIndex(start='2014-01-01', periods=len(data), freq='d')
|
|
df = DataFrame({"A": data, "B": data}, index=ind)
|
|
|
|
def fn(x, a=1):
|
|
return str(type(x))
|
|
|
|
class FnClass(object):
|
|
|
|
def __call__(self, x):
|
|
return str(type(x))
|
|
|
|
df_standard = df.resample("M").apply(fn)
|
|
df_lambda = df.resample("M").apply(lambda x: str(type(x)))
|
|
df_partial = df.resample("M").apply(partial(fn))
|
|
df_partial2 = df.resample("M").apply(partial(fn, a=2))
|
|
df_class = df.resample("M").apply(FnClass())
|
|
|
|
assert_frame_equal(df_standard, df_lambda)
|
|
assert_frame_equal(df_standard, df_partial)
|
|
assert_frame_equal(df_standard, df_partial2)
|
|
assert_frame_equal(df_standard, df_class)
|
|
|
|
def test_resample_with_timedeltas(self):
|
|
|
|
expected = DataFrame({'A': np.arange(1480)})
|
|
expected = expected.groupby(expected.index // 30).sum()
|
|
expected.index = pd.timedelta_range('0 days', freq='30T', periods=50)
|
|
|
|
df = DataFrame({'A': np.arange(1480)}, index=pd.to_timedelta(
|
|
np.arange(1480), unit='T'))
|
|
result = df.resample('30T').sum()
|
|
|
|
assert_frame_equal(result, expected)
|
|
|
|
s = df['A']
|
|
result = s.resample('30T').sum()
|
|
assert_series_equal(result, expected['A'])
|
|
|
|
def test_resample_single_period_timedelta(self):
|
|
|
|
s = Series(list(range(5)), index=pd.timedelta_range(
|
|
'1 day', freq='s', periods=5))
|
|
result = s.resample('2s').sum()
|
|
expected = Series([1, 5, 4], index=pd.timedelta_range(
|
|
'1 day', freq='2s', periods=3))
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_timedelta_idempotency(self):
|
|
|
|
# GH 12072
|
|
index = pd.timedelta_range('0', periods=9, freq='10L')
|
|
series = Series(range(9), index=index)
|
|
result = series.resample('10L').mean()
|
|
expected = series
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_rounding(self):
|
|
# GH 8371
|
|
# odd results when rounding is needed
|
|
|
|
data = """date,time,value
|
|
11-08-2014,00:00:01.093,1
|
|
11-08-2014,00:00:02.159,1
|
|
11-08-2014,00:00:02.667,1
|
|
11-08-2014,00:00:03.175,1
|
|
11-08-2014,00:00:07.058,1
|
|
11-08-2014,00:00:07.362,1
|
|
11-08-2014,00:00:08.324,1
|
|
11-08-2014,00:00:08.830,1
|
|
11-08-2014,00:00:08.982,1
|
|
11-08-2014,00:00:09.815,1
|
|
11-08-2014,00:00:10.540,1
|
|
11-08-2014,00:00:11.061,1
|
|
11-08-2014,00:00:11.617,1
|
|
11-08-2014,00:00:13.607,1
|
|
11-08-2014,00:00:14.535,1
|
|
11-08-2014,00:00:15.525,1
|
|
11-08-2014,00:00:17.960,1
|
|
11-08-2014,00:00:20.674,1
|
|
11-08-2014,00:00:21.191,1"""
|
|
|
|
from pandas.compat import StringIO
|
|
df = pd.read_csv(StringIO(data), parse_dates={'timestamp': [
|
|
'date', 'time']}, index_col='timestamp')
|
|
df.index.name = None
|
|
result = df.resample('6s').sum()
|
|
expected = DataFrame({'value': [
|
|
4, 9, 4, 2
|
|
]}, index=date_range('2014-11-08', freq='6s', periods=4))
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.resample('7s').sum()
|
|
expected = DataFrame({'value': [
|
|
4, 10, 4, 1
|
|
]}, index=date_range('2014-11-08', freq='7s', periods=4))
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.resample('11s').sum()
|
|
expected = DataFrame({'value': [
|
|
11, 8
|
|
]}, index=date_range('2014-11-08', freq='11s', periods=2))
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.resample('13s').sum()
|
|
expected = DataFrame({'value': [
|
|
13, 6
|
|
]}, index=date_range('2014-11-08', freq='13s', periods=2))
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.resample('17s').sum()
|
|
expected = DataFrame({'value': [
|
|
16, 3
|
|
]}, index=date_range('2014-11-08', freq='17s', periods=2))
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_resample_basic_from_daily(self):
|
|
# from daily
|
|
dti = DatetimeIndex(start=datetime(2005, 1, 1),
|
|
end=datetime(2005, 1, 10), freq='D', name='index')
|
|
|
|
s = Series(np.random.rand(len(dti)), dti)
|
|
|
|
# to weekly
|
|
result = s.resample('w-sun').last()
|
|
|
|
assert len(result) == 3
|
|
assert (result.index.dayofweek == [6, 6, 6]).all()
|
|
assert result.iloc[0] == s['1/2/2005']
|
|
assert result.iloc[1] == s['1/9/2005']
|
|
assert result.iloc[2] == s.iloc[-1]
|
|
|
|
result = s.resample('W-MON').last()
|
|
assert len(result) == 2
|
|
assert (result.index.dayofweek == [0, 0]).all()
|
|
assert result.iloc[0] == s['1/3/2005']
|
|
assert result.iloc[1] == s['1/10/2005']
|
|
|
|
result = s.resample('W-TUE').last()
|
|
assert len(result) == 2
|
|
assert (result.index.dayofweek == [1, 1]).all()
|
|
assert result.iloc[0] == s['1/4/2005']
|
|
assert result.iloc[1] == s['1/10/2005']
|
|
|
|
result = s.resample('W-WED').last()
|
|
assert len(result) == 2
|
|
assert (result.index.dayofweek == [2, 2]).all()
|
|
assert result.iloc[0] == s['1/5/2005']
|
|
assert result.iloc[1] == s['1/10/2005']
|
|
|
|
result = s.resample('W-THU').last()
|
|
assert len(result) == 2
|
|
assert (result.index.dayofweek == [3, 3]).all()
|
|
assert result.iloc[0] == s['1/6/2005']
|
|
assert result.iloc[1] == s['1/10/2005']
|
|
|
|
result = s.resample('W-FRI').last()
|
|
assert len(result) == 2
|
|
assert (result.index.dayofweek == [4, 4]).all()
|
|
assert result.iloc[0] == s['1/7/2005']
|
|
assert result.iloc[1] == s['1/10/2005']
|
|
|
|
# to biz day
|
|
result = s.resample('B').last()
|
|
assert len(result) == 7
|
|
assert (result.index.dayofweek == [4, 0, 1, 2, 3, 4, 0]).all()
|
|
|
|
assert result.iloc[0] == s['1/2/2005']
|
|
assert result.iloc[1] == s['1/3/2005']
|
|
assert result.iloc[5] == s['1/9/2005']
|
|
assert result.index.name == 'index'
|
|
|
|
def test_resample_upsampling_picked_but_not_correct(self):
|
|
|
|
# Test for issue #3020
|
|
dates = date_range('01-Jan-2014', '05-Jan-2014', freq='D')
|
|
series = Series(1, index=dates)
|
|
|
|
result = series.resample('D').mean()
|
|
assert result.index[0] == dates[0]
|
|
|
|
# GH 5955
|
|
# incorrect deciding to upsample when the axis frequency matches the
|
|
# resample frequency
|
|
|
|
import datetime
|
|
s = Series(np.arange(1., 6), index=[datetime.datetime(
|
|
1975, 1, i, 12, 0) for i in range(1, 6)])
|
|
expected = Series(np.arange(1., 6), index=date_range(
|
|
'19750101', periods=5, freq='D'))
|
|
|
|
result = s.resample('D').count()
|
|
assert_series_equal(result, Series(1, index=expected.index))
|
|
|
|
result1 = s.resample('D').sum()
|
|
result2 = s.resample('D').mean()
|
|
assert_series_equal(result1, expected)
|
|
assert_series_equal(result2, expected)
|
|
|
|
def test_resample_frame_basic(self):
|
|
df = tm.makeTimeDataFrame()
|
|
|
|
b = TimeGrouper('M')
|
|
g = df.groupby(b)
|
|
|
|
# check all cython functions work
|
|
funcs = ['add', 'mean', 'prod', 'min', 'max', 'var']
|
|
for f in funcs:
|
|
g._cython_agg_general(f)
|
|
|
|
result = df.resample('A').mean()
|
|
assert_series_equal(result['A'], df['A'].resample('A').mean())
|
|
|
|
result = df.resample('M').mean()
|
|
assert_series_equal(result['A'], df['A'].resample('M').mean())
|
|
|
|
df.resample('M', kind='period').mean()
|
|
df.resample('W-WED', kind='period').mean()
|
|
|
|
def test_resample_loffset(self):
|
|
rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min')
|
|
s = Series(np.random.randn(14), index=rng)
|
|
|
|
result = s.resample('5min', closed='right', label='right',
|
|
loffset=timedelta(minutes=1)).mean()
|
|
idx = date_range('1/1/2000', periods=4, freq='5min')
|
|
expected = Series([s[0], s[1:6].mean(), s[6:11].mean(), s[11:].mean()],
|
|
index=idx + timedelta(minutes=1))
|
|
assert_series_equal(result, expected)
|
|
|
|
expected = s.resample(
|
|
'5min', closed='right', label='right',
|
|
loffset='1min').mean()
|
|
assert_series_equal(result, expected)
|
|
|
|
expected = s.resample(
|
|
'5min', closed='right', label='right',
|
|
loffset=Minute(1)).mean()
|
|
assert_series_equal(result, expected)
|
|
|
|
assert result.index.freq == Minute(5)
|
|
|
|
# from daily
|
|
dti = DatetimeIndex(start=datetime(2005, 1, 1),
|
|
end=datetime(2005, 1, 10), freq='D')
|
|
ser = Series(np.random.rand(len(dti)), dti)
|
|
|
|
# to weekly
|
|
result = ser.resample('w-sun').last()
|
|
expected = ser.resample('w-sun', loffset=-bday).last()
|
|
assert result.index[0] - bday == expected.index[0]
|
|
|
|
def test_resample_loffset_upsample(self):
|
|
# GH 20744
|
|
rng = date_range('1/1/2000 00:00:00', '1/1/2000 00:13:00', freq='min')
|
|
s = Series(np.random.randn(14), index=rng)
|
|
|
|
result = s.resample('5min', closed='right', label='right',
|
|
loffset=timedelta(minutes=1)).ffill()
|
|
idx = date_range('1/1/2000', periods=4, freq='5min')
|
|
expected = Series([s[0], s[5], s[10], s[-1]],
|
|
index=idx + timedelta(minutes=1))
|
|
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_loffset_count(self):
|
|
# GH 12725
|
|
start_time = '1/1/2000 00:00:00'
|
|
rng = date_range(start_time, periods=100, freq='S')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
result = ts.resample('10S', loffset='1s').count()
|
|
|
|
expected_index = (
|
|
date_range(start_time, periods=10, freq='10S') +
|
|
timedelta(seconds=1)
|
|
)
|
|
expected = Series(10, index=expected_index)
|
|
|
|
assert_series_equal(result, expected)
|
|
|
|
# Same issue should apply to .size() since it goes through
|
|
# same code path
|
|
result = ts.resample('10S', loffset='1s').size()
|
|
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_upsample(self):
|
|
# from daily
|
|
dti = DatetimeIndex(start=datetime(2005, 1, 1),
|
|
end=datetime(2005, 1, 10), freq='D', name='index')
|
|
|
|
s = Series(np.random.rand(len(dti)), dti)
|
|
|
|
# to minutely, by padding
|
|
result = s.resample('Min').pad()
|
|
assert len(result) == 12961
|
|
assert result[0] == s[0]
|
|
assert result[-1] == s[-1]
|
|
|
|
assert result.index.name == 'index'
|
|
|
|
def test_resample_how_method(self):
|
|
# GH9915
|
|
s = Series([11, 22],
|
|
index=[Timestamp('2015-03-31 21:48:52.672000'),
|
|
Timestamp('2015-03-31 21:49:52.739000')])
|
|
expected = Series([11, np.NaN, np.NaN, np.NaN, np.NaN, np.NaN, 22],
|
|
index=[Timestamp('2015-03-31 21:48:50'),
|
|
Timestamp('2015-03-31 21:49:00'),
|
|
Timestamp('2015-03-31 21:49:10'),
|
|
Timestamp('2015-03-31 21:49:20'),
|
|
Timestamp('2015-03-31 21:49:30'),
|
|
Timestamp('2015-03-31 21:49:40'),
|
|
Timestamp('2015-03-31 21:49:50')])
|
|
assert_series_equal(s.resample("10S").mean(), expected)
|
|
|
|
def test_resample_extra_index_point(self):
|
|
# GH 9756
|
|
index = DatetimeIndex(start='20150101', end='20150331', freq='BM')
|
|
expected = DataFrame({'A': Series([21, 41, 63], index=index)})
|
|
|
|
index = DatetimeIndex(start='20150101', end='20150331', freq='B')
|
|
df = DataFrame(
|
|
{'A': Series(range(len(index)), index=index)}, dtype='int64')
|
|
result = df.resample('BM').last()
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_upsample_with_limit(self):
|
|
rng = date_range('1/1/2000', periods=3, freq='5t')
|
|
ts = Series(np.random.randn(len(rng)), rng)
|
|
|
|
result = ts.resample('t').ffill(limit=2)
|
|
expected = ts.reindex(result.index, method='ffill', limit=2)
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_nearest_upsample_with_limit(self):
|
|
rng = date_range('1/1/2000', periods=3, freq='5t')
|
|
ts = Series(np.random.randn(len(rng)), rng)
|
|
|
|
result = ts.resample('t').nearest(limit=2)
|
|
expected = ts.reindex(result.index, method='nearest', limit=2)
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_ohlc(self):
|
|
s = self.series
|
|
|
|
grouper = TimeGrouper(Minute(5))
|
|
expect = s.groupby(grouper).agg(lambda x: x[-1])
|
|
result = s.resample('5Min').ohlc()
|
|
|
|
assert len(result) == len(expect)
|
|
assert len(result.columns) == 4
|
|
|
|
xs = result.iloc[-2]
|
|
assert xs['open'] == s[-6]
|
|
assert xs['high'] == s[-6:-1].max()
|
|
assert xs['low'] == s[-6:-1].min()
|
|
assert xs['close'] == s[-2]
|
|
|
|
xs = result.iloc[0]
|
|
assert xs['open'] == s[0]
|
|
assert xs['high'] == s[:5].max()
|
|
assert xs['low'] == s[:5].min()
|
|
assert xs['close'] == s[4]
|
|
|
|
def test_resample_ohlc_result(self):
|
|
|
|
# GH 12332
|
|
index = pd.date_range('1-1-2000', '2-15-2000', freq='h')
|
|
index = index.union(pd.date_range('4-15-2000', '5-15-2000', freq='h'))
|
|
s = Series(range(len(index)), index=index)
|
|
|
|
a = s.loc[:'4-15-2000'].resample('30T').ohlc()
|
|
assert isinstance(a, DataFrame)
|
|
|
|
b = s.loc[:'4-14-2000'].resample('30T').ohlc()
|
|
assert isinstance(b, DataFrame)
|
|
|
|
# GH12348
|
|
# raising on odd period
|
|
rng = date_range('2013-12-30', '2014-01-07')
|
|
index = rng.drop([Timestamp('2014-01-01'),
|
|
Timestamp('2013-12-31'),
|
|
Timestamp('2014-01-04'),
|
|
Timestamp('2014-01-05')])
|
|
df = DataFrame(data=np.arange(len(index)), index=index)
|
|
result = df.resample('B').mean()
|
|
expected = df.reindex(index=date_range(rng[0], rng[-1], freq='B'))
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_resample_ohlc_dataframe(self):
|
|
df = (
|
|
DataFrame({
|
|
'PRICE': {
|
|
Timestamp('2011-01-06 10:59:05', tz=None): 24990,
|
|
Timestamp('2011-01-06 12:43:33', tz=None): 25499,
|
|
Timestamp('2011-01-06 12:54:09', tz=None): 25499},
|
|
'VOLUME': {
|
|
Timestamp('2011-01-06 10:59:05', tz=None): 1500000000,
|
|
Timestamp('2011-01-06 12:43:33', tz=None): 5000000000,
|
|
Timestamp('2011-01-06 12:54:09', tz=None): 100000000}})
|
|
).reindex(['VOLUME', 'PRICE'], axis=1)
|
|
res = df.resample('H').ohlc()
|
|
exp = pd.concat([df['VOLUME'].resample('H').ohlc(),
|
|
df['PRICE'].resample('H').ohlc()],
|
|
axis=1,
|
|
keys=['VOLUME', 'PRICE'])
|
|
assert_frame_equal(exp, res)
|
|
|
|
df.columns = [['a', 'b'], ['c', 'd']]
|
|
res = df.resample('H').ohlc()
|
|
exp.columns = pd.MultiIndex.from_tuples([
|
|
('a', 'c', 'open'), ('a', 'c', 'high'), ('a', 'c', 'low'),
|
|
('a', 'c', 'close'), ('b', 'd', 'open'), ('b', 'd', 'high'),
|
|
('b', 'd', 'low'), ('b', 'd', 'close')])
|
|
assert_frame_equal(exp, res)
|
|
|
|
# dupe columns fail atm
|
|
# df.columns = ['PRICE', 'PRICE']
|
|
|
|
def test_resample_dup_index(self):
|
|
|
|
# GH 4812
|
|
# dup columns with resample raising
|
|
df = DataFrame(np.random.randn(4, 12), index=[2000, 2000, 2000, 2000],
|
|
columns=[Period(year=2000, month=i + 1, freq='M')
|
|
for i in range(12)])
|
|
df.iloc[3, :] = np.nan
|
|
result = df.resample('Q', axis=1).mean()
|
|
expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean()
|
|
expected.columns = [
|
|
Period(year=2000, quarter=i + 1, freq='Q') for i in range(4)]
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_resample_reresample(self):
|
|
dti = DatetimeIndex(start=datetime(2005, 1, 1),
|
|
end=datetime(2005, 1, 10), freq='D')
|
|
s = Series(np.random.rand(len(dti)), dti)
|
|
bs = s.resample('B', closed='right', label='right').mean()
|
|
result = bs.resample('8H').mean()
|
|
assert len(result) == 22
|
|
assert isinstance(result.index.freq, offsets.DateOffset)
|
|
assert result.index.freq == offsets.Hour(8)
|
|
|
|
def test_resample_timestamp_to_period(self):
|
|
ts = _simple_ts('1/1/1990', '1/1/2000')
|
|
|
|
result = ts.resample('A-DEC', kind='period').mean()
|
|
expected = ts.resample('A-DEC').mean()
|
|
expected.index = period_range('1990', '2000', freq='a-dec')
|
|
assert_series_equal(result, expected)
|
|
|
|
result = ts.resample('A-JUN', kind='period').mean()
|
|
expected = ts.resample('A-JUN').mean()
|
|
expected.index = period_range('1990', '2000', freq='a-jun')
|
|
assert_series_equal(result, expected)
|
|
|
|
result = ts.resample('M', kind='period').mean()
|
|
expected = ts.resample('M').mean()
|
|
expected.index = period_range('1990-01', '2000-01', freq='M')
|
|
assert_series_equal(result, expected)
|
|
|
|
result = ts.resample('M', kind='period').mean()
|
|
expected = ts.resample('M').mean()
|
|
expected.index = period_range('1990-01', '2000-01', freq='M')
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_ohlc_5min(self):
|
|
def _ohlc(group):
|
|
if isna(group).all():
|
|
return np.repeat(np.nan, 4)
|
|
return [group[0], group.max(), group.min(), group[-1]]
|
|
|
|
rng = date_range('1/1/2000 00:00:00', '1/1/2000 5:59:50', freq='10s')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
resampled = ts.resample('5min', closed='right',
|
|
label='right').ohlc()
|
|
|
|
assert (resampled.loc['1/1/2000 00:00'] == ts[0]).all()
|
|
|
|
exp = _ohlc(ts[1:31])
|
|
assert (resampled.loc['1/1/2000 00:05'] == exp).all()
|
|
|
|
exp = _ohlc(ts['1/1/2000 5:55:01':])
|
|
assert (resampled.loc['1/1/2000 6:00:00'] == exp).all()
|
|
|
|
def test_downsample_non_unique(self):
|
|
rng = date_range('1/1/2000', '2/29/2000')
|
|
rng2 = rng.repeat(5).values
|
|
ts = Series(np.random.randn(len(rng2)), index=rng2)
|
|
|
|
result = ts.resample('M').mean()
|
|
|
|
expected = ts.groupby(lambda x: x.month).mean()
|
|
assert len(result) == 2
|
|
assert_almost_equal(result[0], expected[1])
|
|
assert_almost_equal(result[1], expected[2])
|
|
|
|
def test_asfreq_non_unique(self):
|
|
# GH #1077
|
|
rng = date_range('1/1/2000', '2/29/2000')
|
|
rng2 = rng.repeat(2).values
|
|
ts = Series(np.random.randn(len(rng2)), index=rng2)
|
|
|
|
pytest.raises(Exception, ts.asfreq, 'B')
|
|
|
|
def test_resample_axis1(self):
|
|
rng = date_range('1/1/2000', '2/29/2000')
|
|
df = DataFrame(np.random.randn(3, len(rng)), columns=rng,
|
|
index=['a', 'b', 'c'])
|
|
|
|
result = df.resample('M', axis=1).mean()
|
|
expected = df.T.resample('M').mean().T
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_resample_panel(self):
|
|
rng = date_range('1/1/2000', '6/30/2000')
|
|
n = len(rng)
|
|
|
|
with catch_warnings(record=True):
|
|
panel = Panel(np.random.randn(3, n, 5),
|
|
items=['one', 'two', 'three'],
|
|
major_axis=rng,
|
|
minor_axis=['a', 'b', 'c', 'd', 'e'])
|
|
|
|
result = panel.resample('M', axis=1).mean()
|
|
|
|
def p_apply(panel, f):
|
|
result = {}
|
|
for item in panel.items:
|
|
result[item] = f(panel[item])
|
|
return Panel(result, items=panel.items)
|
|
|
|
expected = p_apply(panel, lambda x: x.resample('M').mean())
|
|
tm.assert_panel_equal(result, expected)
|
|
|
|
panel2 = panel.swapaxes(1, 2)
|
|
result = panel2.resample('M', axis=2).mean()
|
|
expected = p_apply(panel2,
|
|
lambda x: x.resample('M', axis=1).mean())
|
|
tm.assert_panel_equal(result, expected)
|
|
|
|
def test_resample_panel_numpy(self):
|
|
rng = date_range('1/1/2000', '6/30/2000')
|
|
n = len(rng)
|
|
|
|
with catch_warnings(record=True):
|
|
panel = Panel(np.random.randn(3, n, 5),
|
|
items=['one', 'two', 'three'],
|
|
major_axis=rng,
|
|
minor_axis=['a', 'b', 'c', 'd', 'e'])
|
|
|
|
result = panel.resample('M', axis=1).apply(lambda x: x.mean(1))
|
|
expected = panel.resample('M', axis=1).mean()
|
|
tm.assert_panel_equal(result, expected)
|
|
|
|
panel = panel.swapaxes(1, 2)
|
|
result = panel.resample('M', axis=2).apply(lambda x: x.mean(2))
|
|
expected = panel.resample('M', axis=2).mean()
|
|
tm.assert_panel_equal(result, expected)
|
|
|
|
def test_resample_anchored_ticks(self):
|
|
# If a fixed delta (5 minute, 4 hour) evenly divides a day, we should
|
|
# "anchor" the origin at midnight so we get regular intervals rather
|
|
# than starting from the first timestamp which might start in the
|
|
# middle of a desired interval
|
|
|
|
rng = date_range('1/1/2000 04:00:00', periods=86400, freq='s')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
ts[:2] = np.nan # so results are the same
|
|
|
|
freqs = ['t', '5t', '15t', '30t', '4h', '12h']
|
|
for freq in freqs:
|
|
result = ts[2:].resample(freq, closed='left', label='left').mean()
|
|
expected = ts.resample(freq, closed='left', label='left').mean()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_single_group(self):
|
|
mysum = lambda x: x.sum()
|
|
|
|
rng = date_range('2000-1-1', '2000-2-10', freq='D')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
assert_series_equal(ts.resample('M').sum(),
|
|
ts.resample('M').apply(mysum))
|
|
|
|
rng = date_range('2000-1-1', '2000-1-10', freq='D')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
assert_series_equal(ts.resample('M').sum(),
|
|
ts.resample('M').apply(mysum))
|
|
|
|
# GH 3849
|
|
s = Series([30.1, 31.6], index=[Timestamp('20070915 15:30:00'),
|
|
Timestamp('20070915 15:40:00')])
|
|
expected = Series([0.75], index=[Timestamp('20070915')])
|
|
result = s.resample('D').apply(lambda x: np.std(x))
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_base(self):
|
|
rng = date_range('1/1/2000 00:00:00', '1/1/2000 02:00', freq='s')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
resampled = ts.resample('5min', base=2).mean()
|
|
exp_rng = date_range('12/31/1999 23:57:00', '1/1/2000 01:57',
|
|
freq='5min')
|
|
tm.assert_index_equal(resampled.index, exp_rng)
|
|
|
|
def test_resample_base_with_timedeltaindex(self):
|
|
|
|
# GH 10530
|
|
rng = timedelta_range(start='0s', periods=25, freq='s')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
with_base = ts.resample('2s', base=5).mean()
|
|
without_base = ts.resample('2s').mean()
|
|
|
|
exp_without_base = timedelta_range(start='0s', end='25s', freq='2s')
|
|
exp_with_base = timedelta_range(start='5s', end='29s', freq='2s')
|
|
|
|
tm.assert_index_equal(without_base.index, exp_without_base)
|
|
tm.assert_index_equal(with_base.index, exp_with_base)
|
|
|
|
def test_resample_categorical_data_with_timedeltaindex(self):
|
|
# GH #12169
|
|
df = DataFrame({'Group_obj': 'A'},
|
|
index=pd.to_timedelta(list(range(20)), unit='s'))
|
|
df['Group'] = df['Group_obj'].astype('category')
|
|
result = df.resample('10s').agg(lambda x: (x.value_counts().index[0]))
|
|
expected = DataFrame({'Group_obj': ['A', 'A'],
|
|
'Group': ['A', 'A']},
|
|
index=pd.to_timedelta([0, 10], unit='s'))
|
|
expected = expected.reindex(['Group_obj', 'Group'], axis=1)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_resample_daily_anchored(self):
|
|
rng = date_range('1/1/2000 0:00:00', periods=10000, freq='T')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
ts[:2] = np.nan # so results are the same
|
|
|
|
result = ts[2:].resample('D', closed='left', label='left').mean()
|
|
expected = ts.resample('D', closed='left', label='left').mean()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_to_period_monthly_buglet(self):
|
|
# GH #1259
|
|
|
|
rng = date_range('1/1/2000', '12/31/2000')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
result = ts.resample('M', kind='period').mean()
|
|
exp_index = period_range('Jan-2000', 'Dec-2000', freq='M')
|
|
tm.assert_index_equal(result.index, exp_index)
|
|
|
|
def test_period_with_agg(self):
|
|
|
|
# aggregate a period resampler with a lambda
|
|
s2 = Series(np.random.randint(0, 5, 50),
|
|
index=pd.period_range('2012-01-01', freq='H', periods=50),
|
|
dtype='float64')
|
|
|
|
expected = s2.to_timestamp().resample('D').mean().to_period()
|
|
result = s2.resample('D').agg(lambda x: x.mean())
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_segfault(self):
|
|
# GH 8573
|
|
# segfaulting in older versions
|
|
all_wins_and_wagers = [
|
|
(1, datetime(2013, 10, 1, 16, 20), 1, 0),
|
|
(2, datetime(2013, 10, 1, 16, 10), 1, 0),
|
|
(2, datetime(2013, 10, 1, 18, 15), 1, 0),
|
|
(2, datetime(2013, 10, 1, 16, 10, 31), 1, 0)]
|
|
|
|
df = DataFrame.from_records(all_wins_and_wagers,
|
|
columns=("ID", "timestamp", "A", "B")
|
|
).set_index("timestamp")
|
|
result = df.groupby("ID").resample("5min").sum()
|
|
expected = df.groupby("ID").apply(lambda x: x.resample("5min").sum())
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_resample_dtype_preservation(self):
|
|
|
|
# GH 12202
|
|
# validation tests for dtype preservation
|
|
|
|
df = DataFrame({'date': pd.date_range(start='2016-01-01',
|
|
periods=4, freq='W'),
|
|
'group': [1, 1, 2, 2],
|
|
'val': Series([5, 6, 7, 8],
|
|
dtype='int32')}
|
|
).set_index('date')
|
|
|
|
result = df.resample('1D').ffill()
|
|
assert result.val.dtype == np.int32
|
|
|
|
result = df.groupby('group').resample('1D').ffill()
|
|
assert result.val.dtype == np.int32
|
|
|
|
def test_resample_dtype_coerceion(self):
|
|
|
|
pytest.importorskip('scipy.interpolate')
|
|
|
|
# GH 16361
|
|
df = {"a": [1, 3, 1, 4]}
|
|
df = DataFrame(df, index=pd.date_range("2017-01-01", "2017-01-04"))
|
|
|
|
expected = (df.astype("float64")
|
|
.resample("H")
|
|
.mean()
|
|
["a"]
|
|
.interpolate("cubic")
|
|
)
|
|
|
|
result = df.resample("H")["a"].mean().interpolate("cubic")
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = df.resample("H").mean()["a"].interpolate("cubic")
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_weekly_resample_buglet(self):
|
|
# #1327
|
|
rng = date_range('1/1/2000', freq='B', periods=20)
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
resampled = ts.resample('W').mean()
|
|
expected = ts.resample('W-SUN').mean()
|
|
assert_series_equal(resampled, expected)
|
|
|
|
def test_monthly_resample_error(self):
|
|
# #1451
|
|
dates = date_range('4/16/2012 20:00', periods=5000, freq='h')
|
|
ts = Series(np.random.randn(len(dates)), index=dates)
|
|
# it works!
|
|
ts.resample('M')
|
|
|
|
def test_nanosecond_resample_error(self):
|
|
# GH 12307 - Values falls after last bin when
|
|
# Resampling using pd.tseries.offsets.Nano as period
|
|
start = 1443707890427
|
|
exp_start = 1443707890400
|
|
indx = pd.date_range(
|
|
start=pd.to_datetime(start),
|
|
periods=10,
|
|
freq='100n'
|
|
)
|
|
ts = Series(range(len(indx)), index=indx)
|
|
r = ts.resample(pd.tseries.offsets.Nano(100))
|
|
result = r.agg('mean')
|
|
|
|
exp_indx = pd.date_range(
|
|
start=pd.to_datetime(exp_start),
|
|
periods=10,
|
|
freq='100n'
|
|
)
|
|
exp = Series(range(len(exp_indx)), index=exp_indx)
|
|
|
|
assert_series_equal(result, exp)
|
|
|
|
def test_resample_anchored_intraday(self):
|
|
# #1471, #1458
|
|
|
|
rng = date_range('1/1/2012', '4/1/2012', freq='100min')
|
|
df = DataFrame(rng.month, index=rng)
|
|
|
|
result = df.resample('M').mean()
|
|
expected = df.resample(
|
|
'M', kind='period').mean().to_timestamp(how='end')
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.resample('M', closed='left').mean()
|
|
exp = df.tshift(1, freq='D').resample('M', kind='period').mean()
|
|
exp = exp.to_timestamp(how='end')
|
|
|
|
tm.assert_frame_equal(result, exp)
|
|
|
|
rng = date_range('1/1/2012', '4/1/2012', freq='100min')
|
|
df = DataFrame(rng.month, index=rng)
|
|
|
|
result = df.resample('Q').mean()
|
|
expected = df.resample(
|
|
'Q', kind='period').mean().to_timestamp(how='end')
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.resample('Q', closed='left').mean()
|
|
expected = df.tshift(1, freq='D').resample('Q', kind='period',
|
|
closed='left').mean()
|
|
expected = expected.to_timestamp(how='end')
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
ts = _simple_ts('2012-04-29 23:00', '2012-04-30 5:00', freq='h')
|
|
resampled = ts.resample('M').mean()
|
|
assert len(resampled) == 1
|
|
|
|
def test_resample_anchored_monthstart(self):
|
|
ts = _simple_ts('1/1/2000', '12/31/2002')
|
|
|
|
freqs = ['MS', 'BMS', 'QS-MAR', 'AS-DEC', 'AS-JUN']
|
|
|
|
for freq in freqs:
|
|
ts.resample(freq).mean()
|
|
|
|
def test_resample_anchored_multiday(self):
|
|
# When resampling a range spanning multiple days, ensure that the
|
|
# start date gets used to determine the offset. Fixes issue where
|
|
# a one day period is not a multiple of the frequency.
|
|
#
|
|
# See: https://github.com/pandas-dev/pandas/issues/8683
|
|
|
|
index = pd.date_range(
|
|
'2014-10-14 23:06:23.206', periods=3, freq='400L'
|
|
) | pd.date_range(
|
|
'2014-10-15 23:00:00', periods=2, freq='2200L')
|
|
|
|
s = Series(np.random.randn(5), index=index)
|
|
|
|
# Ensure left closing works
|
|
result = s.resample('2200L').mean()
|
|
assert result.index[-1] == Timestamp('2014-10-15 23:00:02.000')
|
|
|
|
# Ensure right closing works
|
|
result = s.resample('2200L', label='right').mean()
|
|
assert result.index[-1] == Timestamp('2014-10-15 23:00:04.200')
|
|
|
|
def test_corner_cases(self):
|
|
# miscellaneous test coverage
|
|
|
|
rng = date_range('1/1/2000', periods=12, freq='t')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
result = ts.resample('5t', closed='right', label='left').mean()
|
|
ex_index = date_range('1999-12-31 23:55', periods=4, freq='5t')
|
|
tm.assert_index_equal(result.index, ex_index)
|
|
|
|
len0pts = _simple_pts('2007-01', '2010-05', freq='M')[:0]
|
|
# it works
|
|
result = len0pts.resample('A-DEC').mean()
|
|
assert len(result) == 0
|
|
|
|
# resample to periods
|
|
ts = _simple_ts('2000-04-28', '2000-04-30 11:00', freq='h')
|
|
result = ts.resample('M', kind='period').mean()
|
|
assert len(result) == 1
|
|
assert result.index[0] == Period('2000-04', freq='M')
|
|
|
|
def test_anchored_lowercase_buglet(self):
|
|
dates = date_range('4/16/2012 20:00', periods=50000, freq='s')
|
|
ts = Series(np.random.randn(len(dates)), index=dates)
|
|
# it works!
|
|
ts.resample('d').mean()
|
|
|
|
def test_upsample_apply_functions(self):
|
|
# #1596
|
|
rng = pd.date_range('2012-06-12', periods=4, freq='h')
|
|
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
result = ts.resample('20min').aggregate(['mean', 'sum'])
|
|
assert isinstance(result, DataFrame)
|
|
|
|
def test_resample_not_monotonic(self):
|
|
rng = pd.date_range('2012-06-12', periods=200, freq='h')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
ts = ts.take(np.random.permutation(len(ts)))
|
|
|
|
result = ts.resample('D').sum()
|
|
exp = ts.sort_index().resample('D').sum()
|
|
assert_series_equal(result, exp)
|
|
|
|
def test_resample_median_bug_1688(self):
|
|
|
|
for dtype in ['int64', 'int32', 'float64', 'float32']:
|
|
df = DataFrame([1, 2], index=[datetime(2012, 1, 1, 0, 0, 0),
|
|
datetime(2012, 1, 1, 0, 5, 0)],
|
|
dtype=dtype)
|
|
|
|
result = df.resample("T").apply(lambda x: x.mean())
|
|
exp = df.asfreq('T')
|
|
tm.assert_frame_equal(result, exp)
|
|
|
|
result = df.resample("T").median()
|
|
exp = df.asfreq('T')
|
|
tm.assert_frame_equal(result, exp)
|
|
|
|
def test_how_lambda_functions(self):
|
|
|
|
ts = _simple_ts('1/1/2000', '4/1/2000')
|
|
|
|
result = ts.resample('M').apply(lambda x: x.mean())
|
|
exp = ts.resample('M').mean()
|
|
tm.assert_series_equal(result, exp)
|
|
|
|
foo_exp = ts.resample('M').mean()
|
|
foo_exp.name = 'foo'
|
|
bar_exp = ts.resample('M').std()
|
|
bar_exp.name = 'bar'
|
|
|
|
result = ts.resample('M').apply(
|
|
[lambda x: x.mean(), lambda x: x.std(ddof=1)])
|
|
result.columns = ['foo', 'bar']
|
|
tm.assert_series_equal(result['foo'], foo_exp)
|
|
tm.assert_series_equal(result['bar'], bar_exp)
|
|
|
|
# this is a MI Series, so comparing the names of the results
|
|
# doesn't make sense
|
|
result = ts.resample('M').aggregate({'foo': lambda x: x.mean(),
|
|
'bar': lambda x: x.std(ddof=1)})
|
|
tm.assert_series_equal(result['foo'], foo_exp, check_names=False)
|
|
tm.assert_series_equal(result['bar'], bar_exp, check_names=False)
|
|
|
|
def test_resample_unequal_times(self):
|
|
# #1772
|
|
start = datetime(1999, 3, 1, 5)
|
|
# end hour is less than start
|
|
end = datetime(2012, 7, 31, 4)
|
|
bad_ind = date_range(start, end, freq="30min")
|
|
df = DataFrame({'close': 1}, index=bad_ind)
|
|
|
|
# it works!
|
|
df.resample('AS').sum()
|
|
|
|
def test_resample_consistency(self):
|
|
|
|
# GH 6418
|
|
# resample with bfill / limit / reindex consistency
|
|
|
|
i30 = pd.date_range('2002-02-02', periods=4, freq='30T')
|
|
s = Series(np.arange(4.), index=i30)
|
|
s[2] = np.NaN
|
|
|
|
# Upsample by factor 3 with reindex() and resample() methods:
|
|
i10 = pd.date_range(i30[0], i30[-1], freq='10T')
|
|
|
|
s10 = s.reindex(index=i10, method='bfill')
|
|
s10_2 = s.reindex(index=i10, method='bfill', limit=2)
|
|
rl = s.reindex_like(s10, method='bfill', limit=2)
|
|
r10_2 = s.resample('10Min').bfill(limit=2)
|
|
r10 = s.resample('10Min').bfill()
|
|
|
|
# s10_2, r10, r10_2, rl should all be equal
|
|
assert_series_equal(s10_2, r10)
|
|
assert_series_equal(s10_2, r10_2)
|
|
assert_series_equal(s10_2, rl)
|
|
|
|
def test_resample_timegrouper(self):
|
|
# GH 7227
|
|
dates1 = [datetime(2014, 10, 1), datetime(2014, 9, 3),
|
|
datetime(2014, 11, 5), datetime(2014, 9, 5),
|
|
datetime(2014, 10, 8), datetime(2014, 7, 15)]
|
|
|
|
dates2 = dates1[:2] + [pd.NaT] + dates1[2:4] + [pd.NaT] + dates1[4:]
|
|
dates3 = [pd.NaT] + dates1 + [pd.NaT]
|
|
|
|
for dates in [dates1, dates2, dates3]:
|
|
df = DataFrame(dict(A=dates, B=np.arange(len(dates))))
|
|
result = df.set_index('A').resample('M').count()
|
|
exp_idx = pd.DatetimeIndex(['2014-07-31', '2014-08-31',
|
|
'2014-09-30',
|
|
'2014-10-31', '2014-11-30'],
|
|
freq='M', name='A')
|
|
expected = DataFrame({'B': [1, 0, 2, 2, 1]}, index=exp_idx)
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.groupby(pd.Grouper(freq='M', key='A')).count()
|
|
assert_frame_equal(result, expected)
|
|
|
|
df = DataFrame(dict(A=dates, B=np.arange(len(dates)), C=np.arange(
|
|
len(dates))))
|
|
result = df.set_index('A').resample('M').count()
|
|
expected = DataFrame({'B': [1, 0, 2, 2, 1], 'C': [1, 0, 2, 2, 1]},
|
|
index=exp_idx, columns=['B', 'C'])
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = df.groupby(pd.Grouper(freq='M', key='A')).count()
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_resample_nunique(self):
|
|
|
|
# GH 12352
|
|
df = DataFrame({
|
|
'ID': {Timestamp('2015-06-05 00:00:00'): '0010100903',
|
|
Timestamp('2015-06-08 00:00:00'): '0010150847'},
|
|
'DATE': {Timestamp('2015-06-05 00:00:00'): '2015-06-05',
|
|
Timestamp('2015-06-08 00:00:00'): '2015-06-08'}})
|
|
r = df.resample('D')
|
|
g = df.groupby(pd.Grouper(freq='D'))
|
|
expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x:
|
|
x.nunique())
|
|
assert expected.name == 'ID'
|
|
|
|
for t in [r, g]:
|
|
result = r.ID.nunique()
|
|
assert_series_equal(result, expected)
|
|
|
|
result = df.ID.resample('D').nunique()
|
|
assert_series_equal(result, expected)
|
|
|
|
result = df.ID.groupby(pd.Grouper(freq='D')).nunique()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_nunique_with_date_gap(self):
|
|
# GH 13453
|
|
index = pd.date_range('1-1-2000', '2-15-2000', freq='h')
|
|
index2 = pd.date_range('4-15-2000', '5-15-2000', freq='h')
|
|
index3 = index.append(index2)
|
|
s = Series(range(len(index3)), index=index3, dtype='int64')
|
|
r = s.resample('M')
|
|
|
|
# Since all elements are unique, these should all be the same
|
|
results = [
|
|
r.count(),
|
|
r.nunique(),
|
|
r.agg(Series.nunique),
|
|
r.agg('nunique')
|
|
]
|
|
|
|
assert_series_equal(results[0], results[1])
|
|
assert_series_equal(results[0], results[2])
|
|
assert_series_equal(results[0], results[3])
|
|
|
|
def test_resample_group_info(self): # GH10914
|
|
for n, k in product((10000, 100000), (10, 100, 1000)):
|
|
dr = date_range(start='2015-08-27', periods=n // 10, freq='T')
|
|
ts = Series(np.random.randint(0, n // k, n).astype('int64'),
|
|
index=np.random.choice(dr, n))
|
|
|
|
left = ts.resample('30T').nunique()
|
|
ix = date_range(start=ts.index.min(), end=ts.index.max(),
|
|
freq='30T')
|
|
|
|
vals = ts.values
|
|
bins = np.searchsorted(ix.values, ts.index, side='right')
|
|
|
|
sorter = np.lexsort((vals, bins))
|
|
vals, bins = vals[sorter], bins[sorter]
|
|
|
|
mask = np.r_[True, vals[1:] != vals[:-1]]
|
|
mask |= np.r_[True, bins[1:] != bins[:-1]]
|
|
|
|
arr = np.bincount(bins[mask] - 1,
|
|
minlength=len(ix)).astype('int64', copy=False)
|
|
right = Series(arr, index=ix)
|
|
|
|
assert_series_equal(left, right)
|
|
|
|
def test_resample_size(self):
|
|
n = 10000
|
|
dr = date_range('2015-09-19', periods=n, freq='T')
|
|
ts = Series(np.random.randn(n), index=np.random.choice(dr, n))
|
|
|
|
left = ts.resample('7T').size()
|
|
ix = date_range(start=left.index.min(), end=ts.index.max(), freq='7T')
|
|
|
|
bins = np.searchsorted(ix.values, ts.index.values, side='right')
|
|
val = np.bincount(bins, minlength=len(ix) + 1)[1:].astype('int64',
|
|
copy=False)
|
|
|
|
right = Series(val, index=ix)
|
|
assert_series_equal(left, right)
|
|
|
|
def test_resample_across_dst(self):
|
|
# The test resamples a DatetimeIndex with values before and after a
|
|
# DST change
|
|
# Issue: 14682
|
|
|
|
# The DatetimeIndex we will start with
|
|
# (note that DST happens at 03:00+02:00 -> 02:00+01:00)
|
|
# 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00
|
|
df1 = DataFrame([1477786980, 1477790580], columns=['ts'])
|
|
dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s')
|
|
.dt.tz_localize('UTC')
|
|
.dt.tz_convert('Europe/Madrid'))
|
|
|
|
# The expected DatetimeIndex after resampling.
|
|
# 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00
|
|
df2 = DataFrame([1477785600, 1477789200], columns=['ts'])
|
|
dti2 = DatetimeIndex(pd.to_datetime(df2.ts, unit='s')
|
|
.dt.tz_localize('UTC')
|
|
.dt.tz_convert('Europe/Madrid'))
|
|
df = DataFrame([5, 5], index=dti1)
|
|
|
|
result = df.resample(rule='H').sum()
|
|
expected = DataFrame([5, 5], index=dti2)
|
|
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_resample_dst_anchor(self):
|
|
# 5172
|
|
dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern')
|
|
df = DataFrame([5], index=dti)
|
|
assert_frame_equal(df.resample(rule='D').sum(),
|
|
DataFrame([5], index=df.index.normalize()))
|
|
df.resample(rule='MS').sum()
|
|
assert_frame_equal(
|
|
df.resample(rule='MS').sum(),
|
|
DataFrame([5], index=DatetimeIndex([datetime(2012, 11, 1)],
|
|
tz='US/Eastern')))
|
|
|
|
dti = date_range('2013-09-30', '2013-11-02', freq='30Min',
|
|
tz='Europe/Paris')
|
|
values = range(dti.size)
|
|
df = DataFrame({"a": values,
|
|
"b": values,
|
|
"c": values}, index=dti, dtype='int64')
|
|
how = {"a": "min", "b": "max", "c": "count"}
|
|
|
|
assert_frame_equal(
|
|
df.resample("W-MON").agg(how)[["a", "b", "c"]],
|
|
DataFrame({"a": [0, 48, 384, 720, 1056, 1394],
|
|
"b": [47, 383, 719, 1055, 1393, 1586],
|
|
"c": [48, 336, 336, 336, 338, 193]},
|
|
index=date_range('9/30/2013', '11/4/2013',
|
|
freq='W-MON', tz='Europe/Paris')),
|
|
'W-MON Frequency')
|
|
|
|
assert_frame_equal(
|
|
df.resample("2W-MON").agg(how)[["a", "b", "c"]],
|
|
DataFrame({"a": [0, 48, 720, 1394],
|
|
"b": [47, 719, 1393, 1586],
|
|
"c": [48, 672, 674, 193]},
|
|
index=date_range('9/30/2013', '11/11/2013',
|
|
freq='2W-MON', tz='Europe/Paris')),
|
|
'2W-MON Frequency')
|
|
|
|
assert_frame_equal(
|
|
df.resample("MS").agg(how)[["a", "b", "c"]],
|
|
DataFrame({"a": [0, 48, 1538],
|
|
"b": [47, 1537, 1586],
|
|
"c": [48, 1490, 49]},
|
|
index=date_range('9/1/2013', '11/1/2013',
|
|
freq='MS', tz='Europe/Paris')),
|
|
'MS Frequency')
|
|
|
|
assert_frame_equal(
|
|
df.resample("2MS").agg(how)[["a", "b", "c"]],
|
|
DataFrame({"a": [0, 1538],
|
|
"b": [1537, 1586],
|
|
"c": [1538, 49]},
|
|
index=date_range('9/1/2013', '11/1/2013',
|
|
freq='2MS', tz='Europe/Paris')),
|
|
'2MS Frequency')
|
|
|
|
df_daily = df['10/26/2013':'10/29/2013']
|
|
assert_frame_equal(
|
|
df_daily.resample("D").agg({"a": "min", "b": "max", "c": "count"})
|
|
[["a", "b", "c"]],
|
|
DataFrame({"a": [1248, 1296, 1346, 1394],
|
|
"b": [1295, 1345, 1393, 1441],
|
|
"c": [48, 50, 48, 48]},
|
|
index=date_range('10/26/2013', '10/29/2013',
|
|
freq='D', tz='Europe/Paris')),
|
|
'D Frequency')
|
|
|
|
def test_resample_with_nat(self):
|
|
# GH 13020
|
|
index = DatetimeIndex([pd.NaT,
|
|
'1970-01-01 00:00:00',
|
|
pd.NaT,
|
|
'1970-01-01 00:00:01',
|
|
'1970-01-01 00:00:02'])
|
|
frame = DataFrame([2, 3, 5, 7, 11], index=index)
|
|
|
|
index_1s = DatetimeIndex(['1970-01-01 00:00:00',
|
|
'1970-01-01 00:00:01',
|
|
'1970-01-01 00:00:02'])
|
|
frame_1s = DataFrame([3, 7, 11], index=index_1s)
|
|
assert_frame_equal(frame.resample('1s').mean(), frame_1s)
|
|
|
|
index_2s = DatetimeIndex(['1970-01-01 00:00:00',
|
|
'1970-01-01 00:00:02'])
|
|
frame_2s = DataFrame([5, 11], index=index_2s)
|
|
assert_frame_equal(frame.resample('2s').mean(), frame_2s)
|
|
|
|
index_3s = DatetimeIndex(['1970-01-01 00:00:00'])
|
|
frame_3s = DataFrame([7], index=index_3s)
|
|
assert_frame_equal(frame.resample('3s').mean(), frame_3s)
|
|
|
|
assert_frame_equal(frame.resample('60s').mean(), frame_3s)
|
|
|
|
def test_resample_timedelta_values(self):
|
|
# GH 13119
|
|
# check that timedelta dtype is preserved when NaT values are
|
|
# introduced by the resampling
|
|
|
|
times = timedelta_range('1 day', '4 day', freq='4D')
|
|
df = DataFrame({'time': times}, index=times)
|
|
|
|
times2 = timedelta_range('1 day', '4 day', freq='2D')
|
|
exp = Series(times2, index=times2, name='time')
|
|
exp.iloc[1] = pd.NaT
|
|
|
|
res = df.resample('2D').first()['time']
|
|
tm.assert_series_equal(res, exp)
|
|
res = df['time'].resample('2D').first()
|
|
tm.assert_series_equal(res, exp)
|
|
|
|
def test_resample_datetime_values(self):
|
|
# GH 13119
|
|
# check that datetime dtype is preserved when NaT values are
|
|
# introduced by the resampling
|
|
|
|
dates = [datetime(2016, 1, 15), datetime(2016, 1, 19)]
|
|
df = DataFrame({'timestamp': dates}, index=dates)
|
|
|
|
exp = Series([datetime(2016, 1, 15), pd.NaT, datetime(2016, 1, 19)],
|
|
index=date_range('2016-01-15', periods=3, freq='2D'),
|
|
name='timestamp')
|
|
|
|
res = df.resample('2D').first()['timestamp']
|
|
tm.assert_series_equal(res, exp)
|
|
res = df['timestamp'].resample('2D').first()
|
|
tm.assert_series_equal(res, exp)
|
|
|
|
|
|
class TestPeriodIndex(Base):
|
|
_index_factory = lambda x: period_range
|
|
|
|
@pytest.fixture
|
|
def _series_name(self):
|
|
return 'pi'
|
|
|
|
def create_series(self):
|
|
# TODO: replace calls to .create_series() by injecting the series
|
|
# fixture
|
|
i = period_range(datetime(2005, 1, 1),
|
|
datetime(2005, 1, 10), freq='D')
|
|
|
|
return Series(np.arange(len(i)), index=i, name='pi')
|
|
|
|
@pytest.mark.parametrize('freq', ['2D', '1H', '2H'])
|
|
@pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
|
|
def test_asfreq(self, series_and_frame, freq, kind):
|
|
# GH 12884, 15944
|
|
# make sure .asfreq() returns PeriodIndex (except kind='timestamp')
|
|
|
|
obj = series_and_frame
|
|
if kind == 'timestamp':
|
|
expected = obj.to_timestamp().resample(freq).asfreq()
|
|
else:
|
|
start = obj.index[0].to_timestamp(how='start')
|
|
end = (obj.index[-1] + 1).to_timestamp(how='start')
|
|
new_index = date_range(start=start, end=end, freq=freq,
|
|
closed='left')
|
|
expected = obj.to_timestamp().reindex(new_index).to_period(freq)
|
|
result = obj.resample(freq, kind=kind).asfreq()
|
|
assert_almost_equal(result, expected)
|
|
|
|
def test_asfreq_fill_value(self):
|
|
# test for fill value during resampling, issue 3715
|
|
|
|
s = self.create_series()
|
|
new_index = date_range(s.index[0].to_timestamp(how='start'),
|
|
(s.index[-1]).to_timestamp(how='start'),
|
|
freq='1H')
|
|
expected = s.to_timestamp().reindex(new_index, fill_value=4.0)
|
|
result = s.resample('1H', kind='timestamp').asfreq(fill_value=4.0)
|
|
assert_series_equal(result, expected)
|
|
|
|
frame = s.to_frame('value')
|
|
new_index = date_range(frame.index[0].to_timestamp(how='start'),
|
|
(frame.index[-1]).to_timestamp(how='start'),
|
|
freq='1H')
|
|
expected = frame.to_timestamp().reindex(new_index, fill_value=3.0)
|
|
result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0)
|
|
assert_frame_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize('freq', ['H', '12H', '2D', 'W'])
|
|
@pytest.mark.parametrize('kind', [None, 'period', 'timestamp'])
|
|
def test_selection(self, index, freq, kind):
|
|
# This is a bug, these should be implemented
|
|
# GH 14008
|
|
rng = np.arange(len(index), dtype=np.int64)
|
|
df = DataFrame({'date': index, 'a': rng},
|
|
index=pd.MultiIndex.from_arrays([rng, index],
|
|
names=['v', 'd']))
|
|
with pytest.raises(NotImplementedError):
|
|
df.resample(freq, on='date', kind=kind)
|
|
with pytest.raises(NotImplementedError):
|
|
df.resample(freq, level='d', kind=kind)
|
|
|
|
def test_annual_upsample_D_s_f(self):
|
|
self._check_annual_upsample_cases('D', 'start', 'ffill')
|
|
|
|
def test_annual_upsample_D_e_f(self):
|
|
self._check_annual_upsample_cases('D', 'end', 'ffill')
|
|
|
|
def test_annual_upsample_D_s_b(self):
|
|
self._check_annual_upsample_cases('D', 'start', 'bfill')
|
|
|
|
def test_annual_upsample_D_e_b(self):
|
|
self._check_annual_upsample_cases('D', 'end', 'bfill')
|
|
|
|
def test_annual_upsample_B_s_f(self):
|
|
self._check_annual_upsample_cases('B', 'start', 'ffill')
|
|
|
|
def test_annual_upsample_B_e_f(self):
|
|
self._check_annual_upsample_cases('B', 'end', 'ffill')
|
|
|
|
def test_annual_upsample_B_s_b(self):
|
|
self._check_annual_upsample_cases('B', 'start', 'bfill')
|
|
|
|
def test_annual_upsample_B_e_b(self):
|
|
self._check_annual_upsample_cases('B', 'end', 'bfill')
|
|
|
|
def test_annual_upsample_M_s_f(self):
|
|
self._check_annual_upsample_cases('M', 'start', 'ffill')
|
|
|
|
def test_annual_upsample_M_e_f(self):
|
|
self._check_annual_upsample_cases('M', 'end', 'ffill')
|
|
|
|
def test_annual_upsample_M_s_b(self):
|
|
self._check_annual_upsample_cases('M', 'start', 'bfill')
|
|
|
|
def test_annual_upsample_M_e_b(self):
|
|
self._check_annual_upsample_cases('M', 'end', 'bfill')
|
|
|
|
def _check_annual_upsample_cases(self, targ, conv, meth, end='12/31/1991'):
|
|
for month in MONTHS:
|
|
ts = _simple_pts('1/1/1990', end, freq='A-%s' % month)
|
|
|
|
result = getattr(ts.resample(targ, convention=conv), meth)()
|
|
expected = result.to_timestamp(targ, how=conv)
|
|
expected = expected.asfreq(targ, meth).to_period()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_basic_downsample(self):
|
|
ts = _simple_pts('1/1/1990', '6/30/1995', freq='M')
|
|
result = ts.resample('a-dec').mean()
|
|
|
|
expected = ts.groupby(ts.index.year).mean()
|
|
expected.index = period_range('1/1/1990', '6/30/1995', freq='a-dec')
|
|
assert_series_equal(result, expected)
|
|
|
|
# this is ok
|
|
assert_series_equal(ts.resample('a-dec').mean(), result)
|
|
assert_series_equal(ts.resample('a').mean(), result)
|
|
|
|
def test_not_subperiod(self):
|
|
# These are incompatible period rules for resampling
|
|
ts = _simple_pts('1/1/1990', '6/30/1995', freq='w-wed')
|
|
pytest.raises(ValueError, lambda: ts.resample('a-dec').mean())
|
|
pytest.raises(ValueError, lambda: ts.resample('q-mar').mean())
|
|
pytest.raises(ValueError, lambda: ts.resample('M').mean())
|
|
pytest.raises(ValueError, lambda: ts.resample('w-thu').mean())
|
|
|
|
@pytest.mark.parametrize('freq', ['D', '2D'])
|
|
def test_basic_upsample(self, freq):
|
|
ts = _simple_pts('1/1/1990', '6/30/1995', freq='M')
|
|
result = ts.resample('a-dec').mean()
|
|
|
|
resampled = result.resample(freq, convention='end').ffill()
|
|
expected = result.to_timestamp(freq, how='end')
|
|
expected = expected.asfreq(freq, 'ffill').to_period(freq)
|
|
assert_series_equal(resampled, expected)
|
|
|
|
def test_upsample_with_limit(self):
|
|
rng = period_range('1/1/2000', periods=5, freq='A')
|
|
ts = Series(np.random.randn(len(rng)), rng)
|
|
|
|
result = ts.resample('M', convention='end').ffill(limit=2)
|
|
expected = ts.asfreq('M').reindex(result.index, method='ffill',
|
|
limit=2)
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_annual_upsample(self):
|
|
ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC')
|
|
df = DataFrame({'a': ts})
|
|
rdf = df.resample('D').ffill()
|
|
exp = df['a'].resample('D').ffill()
|
|
assert_series_equal(rdf['a'], exp)
|
|
|
|
rng = period_range('2000', '2003', freq='A-DEC')
|
|
ts = Series([1, 2, 3, 4], index=rng)
|
|
|
|
result = ts.resample('M').ffill()
|
|
ex_index = period_range('2000-01', '2003-12', freq='M')
|
|
|
|
expected = ts.asfreq('M', how='start').reindex(ex_index,
|
|
method='ffill')
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_quarterly_upsample(self):
|
|
targets = ['D', 'B', 'M']
|
|
|
|
for month in MONTHS:
|
|
ts = _simple_pts('1/1/1990', '12/31/1995', freq='Q-%s' % month)
|
|
|
|
for targ, conv in product(targets, ['start', 'end']):
|
|
result = ts.resample(targ, convention=conv).ffill()
|
|
expected = result.to_timestamp(targ, how=conv)
|
|
expected = expected.asfreq(targ, 'ffill').to_period()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_monthly_upsample(self):
|
|
targets = ['D', 'B']
|
|
|
|
ts = _simple_pts('1/1/1990', '12/31/1995', freq='M')
|
|
|
|
for targ, conv in product(targets, ['start', 'end']):
|
|
result = ts.resample(targ, convention=conv).ffill()
|
|
expected = result.to_timestamp(targ, how=conv)
|
|
expected = expected.asfreq(targ, 'ffill').to_period()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_basic(self):
|
|
# GH3609
|
|
s = Series(range(100), index=date_range(
|
|
'20130101', freq='s', periods=100, name='idx'), dtype='float')
|
|
s[10:30] = np.nan
|
|
index = PeriodIndex([
|
|
Period('2013-01-01 00:00', 'T'),
|
|
Period('2013-01-01 00:01', 'T')], name='idx')
|
|
expected = Series([34.5, 79.5], index=index)
|
|
result = s.to_period().resample('T', kind='period').mean()
|
|
assert_series_equal(result, expected)
|
|
result2 = s.resample('T', kind='period').mean()
|
|
assert_series_equal(result2, expected)
|
|
|
|
@pytest.mark.parametrize('freq,expected_vals', [('M', [31, 29, 31, 9]),
|
|
('2M', [31 + 29, 31 + 9])])
|
|
def test_resample_count(self, freq, expected_vals):
|
|
# GH12774
|
|
series = Series(1, index=pd.period_range(start='2000', periods=100))
|
|
result = series.resample(freq).count()
|
|
expected_index = pd.period_range(start='2000', freq=freq,
|
|
periods=len(expected_vals))
|
|
expected = Series(expected_vals, index=expected_index)
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_same_freq(self):
|
|
|
|
# GH12770
|
|
series = Series(range(3), index=pd.period_range(
|
|
start='2000', periods=3, freq='M'))
|
|
expected = series
|
|
|
|
for method in resample_methods:
|
|
result = getattr(series.resample('M'), method)()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_incompat_freq(self):
|
|
|
|
with pytest.raises(IncompatibleFrequency):
|
|
Series(range(3), index=pd.period_range(
|
|
start='2000', periods=3, freq='M')).resample('W').mean()
|
|
|
|
def test_with_local_timezone_pytz(self):
|
|
# see gh-5430
|
|
local_timezone = pytz.timezone('America/Los_Angeles')
|
|
|
|
start = datetime(year=2013, month=11, day=1, hour=0, minute=0,
|
|
tzinfo=pytz.utc)
|
|
# 1 day later
|
|
end = datetime(year=2013, month=11, day=2, hour=0, minute=0,
|
|
tzinfo=pytz.utc)
|
|
|
|
index = pd.date_range(start, end, freq='H')
|
|
|
|
series = Series(1, index=index)
|
|
series = series.tz_convert(local_timezone)
|
|
result = series.resample('D', kind='period').mean()
|
|
|
|
# Create the expected series
|
|
# Index is moved back a day with the timezone conversion from UTC to
|
|
# Pacific
|
|
expected_index = (pd.period_range(start=start, end=end, freq='D') - 1)
|
|
expected = Series(1, index=expected_index)
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_with_pytz(self):
|
|
# GH 13238
|
|
s = Series(2, index=pd.date_range('2017-01-01', periods=48, freq="H",
|
|
tz="US/Eastern"))
|
|
result = s.resample("D").mean()
|
|
expected = Series(2, index=pd.DatetimeIndex(['2017-01-01',
|
|
'2017-01-02'],
|
|
tz="US/Eastern"))
|
|
assert_series_equal(result, expected)
|
|
# Especially assert that the timezone is LMT for pytz
|
|
assert result.index.tz == pytz.timezone('US/Eastern')
|
|
|
|
def test_with_local_timezone_dateutil(self):
|
|
# see gh-5430
|
|
local_timezone = 'dateutil/America/Los_Angeles'
|
|
|
|
start = datetime(year=2013, month=11, day=1, hour=0, minute=0,
|
|
tzinfo=dateutil.tz.tzutc())
|
|
# 1 day later
|
|
end = datetime(year=2013, month=11, day=2, hour=0, minute=0,
|
|
tzinfo=dateutil.tz.tzutc())
|
|
|
|
index = pd.date_range(start, end, freq='H', name='idx')
|
|
|
|
series = Series(1, index=index)
|
|
series = series.tz_convert(local_timezone)
|
|
result = series.resample('D', kind='period').mean()
|
|
|
|
# Create the expected series
|
|
# Index is moved back a day with the timezone conversion from UTC to
|
|
# Pacific
|
|
expected_index = (pd.period_range(start=start, end=end, freq='D',
|
|
name='idx') - 1)
|
|
expected = Series(1, index=expected_index)
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_fill_method_and_how_upsample(self):
|
|
# GH2073
|
|
s = Series(np.arange(9, dtype='int64'),
|
|
index=date_range('2010-01-01', periods=9, freq='Q'))
|
|
last = s.resample('M').ffill()
|
|
both = s.resample('M').ffill().resample('M').last().astype('int64')
|
|
assert_series_equal(last, both)
|
|
|
|
def test_weekly_upsample(self):
|
|
targets = ['D', 'B']
|
|
|
|
for day in DAYS:
|
|
ts = _simple_pts('1/1/1990', '12/31/1995', freq='W-%s' % day)
|
|
|
|
for targ, conv in product(targets, ['start', 'end']):
|
|
result = ts.resample(targ, convention=conv).ffill()
|
|
expected = result.to_timestamp(targ, how=conv)
|
|
expected = expected.asfreq(targ, 'ffill').to_period()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_to_timestamps(self):
|
|
ts = _simple_pts('1/1/1990', '12/31/1995', freq='M')
|
|
|
|
result = ts.resample('A-DEC', kind='timestamp').mean()
|
|
expected = ts.to_timestamp(how='end').resample('A-DEC').mean()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_to_quarterly(self):
|
|
for month in MONTHS:
|
|
ts = _simple_pts('1990', '1992', freq='A-%s' % month)
|
|
quar_ts = ts.resample('Q-%s' % month).ffill()
|
|
|
|
stamps = ts.to_timestamp('D', how='start')
|
|
qdates = period_range(ts.index[0].asfreq('D', 'start'),
|
|
ts.index[-1].asfreq('D', 'end'),
|
|
freq='Q-%s' % month)
|
|
|
|
expected = stamps.reindex(qdates.to_timestamp('D', 's'),
|
|
method='ffill')
|
|
expected.index = qdates
|
|
|
|
assert_series_equal(quar_ts, expected)
|
|
|
|
# conforms, but different month
|
|
ts = _simple_pts('1990', '1992', freq='A-JUN')
|
|
|
|
for how in ['start', 'end']:
|
|
result = ts.resample('Q-MAR', convention=how).ffill()
|
|
expected = ts.asfreq('Q-MAR', how=how)
|
|
expected = expected.reindex(result.index, method='ffill')
|
|
|
|
# .to_timestamp('D')
|
|
# expected = expected.resample('Q-MAR').ffill()
|
|
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_fill_missing(self):
|
|
rng = PeriodIndex([2000, 2005, 2007, 2009], freq='A')
|
|
|
|
s = Series(np.random.randn(4), index=rng)
|
|
|
|
stamps = s.to_timestamp()
|
|
filled = s.resample('A').ffill()
|
|
expected = stamps.resample('A').ffill().to_period('A')
|
|
assert_series_equal(filled, expected)
|
|
|
|
def test_cant_fill_missing_dups(self):
|
|
rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A')
|
|
s = Series(np.random.randn(5), index=rng)
|
|
pytest.raises(Exception, lambda: s.resample('A').ffill())
|
|
|
|
@pytest.mark.parametrize('freq', ['5min'])
|
|
@pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
|
|
def test_resample_5minute(self, freq, kind):
|
|
rng = period_range('1/1/2000', '1/5/2000', freq='T')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
expected = ts.to_timestamp().resample(freq).mean()
|
|
if kind != 'timestamp':
|
|
expected = expected.to_period(freq)
|
|
result = ts.resample(freq, kind=kind).mean()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_upsample_daily_business_daily(self):
|
|
ts = _simple_pts('1/1/2000', '2/1/2000', freq='B')
|
|
|
|
result = ts.resample('D').asfreq()
|
|
expected = ts.asfreq('D').reindex(period_range('1/3/2000', '2/1/2000'))
|
|
assert_series_equal(result, expected)
|
|
|
|
ts = _simple_pts('1/1/2000', '2/1/2000')
|
|
result = ts.resample('H', convention='s').asfreq()
|
|
exp_rng = period_range('1/1/2000', '2/1/2000 23:00', freq='H')
|
|
expected = ts.asfreq('H', how='s').reindex(exp_rng)
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_irregular_sparse(self):
|
|
dr = date_range(start='1/1/2012', freq='5min', periods=1000)
|
|
s = Series(np.array(100), index=dr)
|
|
# subset the data.
|
|
subset = s[:'2012-01-04 06:55']
|
|
|
|
result = subset.resample('10min').apply(len)
|
|
expected = s.resample('10min').apply(len).loc[result.index]
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_weekly_all_na(self):
|
|
rng = date_range('1/1/2000', periods=10, freq='W-WED')
|
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
|
|
|
result = ts.resample('W-THU').asfreq()
|
|
|
|
assert result.isna().all()
|
|
|
|
result = ts.resample('W-THU').asfreq().ffill()[:-1]
|
|
expected = ts.asfreq('W-THU').ffill()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_tz_localized(self):
|
|
dr = date_range(start='2012-4-13', end='2012-5-1')
|
|
ts = Series(lrange(len(dr)), dr)
|
|
|
|
ts_utc = ts.tz_localize('UTC')
|
|
ts_local = ts_utc.tz_convert('America/Los_Angeles')
|
|
|
|
result = ts_local.resample('W').mean()
|
|
|
|
ts_local_naive = ts_local.copy()
|
|
ts_local_naive.index = [x.replace(tzinfo=None)
|
|
for x in ts_local_naive.index.to_pydatetime()]
|
|
|
|
exp = ts_local_naive.resample(
|
|
'W').mean().tz_localize('America/Los_Angeles')
|
|
|
|
assert_series_equal(result, exp)
|
|
|
|
# it works
|
|
result = ts_local.resample('D').mean()
|
|
|
|
# #2245
|
|
idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T',
|
|
tz='Australia/Sydney')
|
|
s = Series([1, 2], index=idx)
|
|
|
|
result = s.resample('D', closed='right', label='right').mean()
|
|
ex_index = date_range('2001-09-21', periods=1, freq='D',
|
|
tz='Australia/Sydney')
|
|
expected = Series([1.5], index=ex_index)
|
|
|
|
assert_series_equal(result, expected)
|
|
|
|
# for good measure
|
|
result = s.resample('D', kind='period').mean()
|
|
ex_index = period_range('2001-09-20', periods=1, freq='D')
|
|
expected = Series([1.5], index=ex_index)
|
|
assert_series_equal(result, expected)
|
|
|
|
# GH 6397
|
|
# comparing an offset that doesn't propagate tz's
|
|
rng = date_range('1/1/2011', periods=20000, freq='H')
|
|
rng = rng.tz_localize('EST')
|
|
ts = DataFrame(index=rng)
|
|
ts['first'] = np.random.randn(len(rng))
|
|
ts['second'] = np.cumsum(np.random.randn(len(rng)))
|
|
expected = DataFrame(
|
|
{
|
|
'first': ts.resample('A').sum()['first'],
|
|
'second': ts.resample('A').mean()['second']},
|
|
columns=['first', 'second'])
|
|
result = ts.resample(
|
|
'A').agg({'first': np.sum,
|
|
'second': np.mean}).reindex(columns=['first', 'second'])
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_closed_left_corner(self):
|
|
# #1465
|
|
s = Series(np.random.randn(21),
|
|
index=date_range(start='1/1/2012 9:30',
|
|
freq='1min', periods=21))
|
|
s[0] = np.nan
|
|
|
|
result = s.resample('10min', closed='left', label='right').mean()
|
|
exp = s[1:].resample('10min', closed='left', label='right').mean()
|
|
assert_series_equal(result, exp)
|
|
|
|
result = s.resample('10min', closed='left', label='left').mean()
|
|
exp = s[1:].resample('10min', closed='left', label='left').mean()
|
|
|
|
ex_index = date_range(start='1/1/2012 9:30', freq='10min', periods=3)
|
|
|
|
tm.assert_index_equal(result.index, ex_index)
|
|
assert_series_equal(result, exp)
|
|
|
|
def test_quarterly_resampling(self):
|
|
rng = period_range('2000Q1', periods=10, freq='Q-DEC')
|
|
ts = Series(np.arange(10), index=rng)
|
|
|
|
result = ts.resample('A').mean()
|
|
exp = ts.to_timestamp().resample('A').mean().to_period()
|
|
assert_series_equal(result, exp)
|
|
|
|
def test_resample_weekly_bug_1726(self):
|
|
# 8/6/12 is a Monday
|
|
ind = DatetimeIndex(start="8/6/2012", end="8/26/2012", freq="D")
|
|
n = len(ind)
|
|
data = [[x] * 5 for x in range(n)]
|
|
df = DataFrame(data, columns=['open', 'high', 'low', 'close', 'vol'],
|
|
index=ind)
|
|
|
|
# it works!
|
|
df.resample('W-MON', closed='left', label='left').first()
|
|
|
|
def test_resample_with_dst_time_change(self):
|
|
# GH 15549
|
|
index = pd.DatetimeIndex([1457537600000000000, 1458059600000000000],
|
|
tz='UTC').tz_convert('America/Chicago')
|
|
df = pd.DataFrame([1, 2], index=index)
|
|
result = df.resample('12h', closed='right',
|
|
label='right').last().ffill()
|
|
|
|
expected_index_values = ['2016-03-09 12:00:00-06:00',
|
|
'2016-03-10 00:00:00-06:00',
|
|
'2016-03-10 12:00:00-06:00',
|
|
'2016-03-11 00:00:00-06:00',
|
|
'2016-03-11 12:00:00-06:00',
|
|
'2016-03-12 00:00:00-06:00',
|
|
'2016-03-12 12:00:00-06:00',
|
|
'2016-03-13 00:00:00-06:00',
|
|
'2016-03-13 13:00:00-05:00',
|
|
'2016-03-14 01:00:00-05:00',
|
|
'2016-03-14 13:00:00-05:00',
|
|
'2016-03-15 01:00:00-05:00',
|
|
'2016-03-15 13:00:00-05:00']
|
|
index = pd.DatetimeIndex(expected_index_values,
|
|
tz='UTC').tz_convert('America/Chicago')
|
|
expected = pd.DataFrame([1.0, 1.0, 1.0, 1.0, 1.0,
|
|
1.0, 1.0, 1.0, 1.0, 1.0,
|
|
1.0, 1.0, 2.0], index=index)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_resample_bms_2752(self):
|
|
# GH2753
|
|
foo = Series(index=pd.bdate_range('20000101', '20000201'))
|
|
res1 = foo.resample("BMS").mean()
|
|
res2 = foo.resample("BMS").mean().resample("B").mean()
|
|
assert res1.index[0] == Timestamp('20000103')
|
|
assert res1.index[0] == res2.index[0]
|
|
|
|
# def test_monthly_convention_span(self):
|
|
# rng = period_range('2000-01', periods=3, freq='M')
|
|
# ts = Series(np.arange(3), index=rng)
|
|
|
|
# # hacky way to get same thing
|
|
# exp_index = period_range('2000-01-01', '2000-03-31', freq='D')
|
|
# expected = ts.asfreq('D', how='end').reindex(exp_index)
|
|
# expected = expected.fillna(method='bfill')
|
|
|
|
# result = ts.resample('D', convention='span').mean()
|
|
|
|
# assert_series_equal(result, expected)
|
|
|
|
def test_default_right_closed_label(self):
|
|
end_freq = ['D', 'Q', 'M', 'D']
|
|
end_types = ['M', 'A', 'Q', 'W']
|
|
|
|
for from_freq, to_freq in zip(end_freq, end_types):
|
|
idx = DatetimeIndex(start='8/15/2012', periods=100, freq=from_freq)
|
|
df = DataFrame(np.random.randn(len(idx), 2), idx)
|
|
|
|
resampled = df.resample(to_freq).mean()
|
|
assert_frame_equal(resampled, df.resample(to_freq, closed='right',
|
|
label='right').mean())
|
|
|
|
def test_default_left_closed_label(self):
|
|
others = ['MS', 'AS', 'QS', 'D', 'H']
|
|
others_freq = ['D', 'Q', 'M', 'H', 'T']
|
|
|
|
for from_freq, to_freq in zip(others_freq, others):
|
|
idx = DatetimeIndex(start='8/15/2012', periods=100, freq=from_freq)
|
|
df = DataFrame(np.random.randn(len(idx), 2), idx)
|
|
|
|
resampled = df.resample(to_freq).mean()
|
|
assert_frame_equal(resampled, df.resample(to_freq, closed='left',
|
|
label='left').mean())
|
|
|
|
def test_all_values_single_bin(self):
|
|
# 2070
|
|
index = period_range(start="2012-01-01", end="2012-12-31", freq="M")
|
|
s = Series(np.random.randn(len(index)), index=index)
|
|
|
|
result = s.resample("A").mean()
|
|
tm.assert_almost_equal(result[0], s.mean())
|
|
|
|
def test_evenly_divisible_with_no_extra_bins(self):
|
|
# 4076
|
|
# when the frequency is evenly divisible, sometimes extra bins
|
|
|
|
df = DataFrame(np.random.randn(9, 3),
|
|
index=date_range('2000-1-1', periods=9))
|
|
result = df.resample('5D').mean()
|
|
expected = pd.concat(
|
|
[df.iloc[0:5].mean(), df.iloc[5:].mean()], axis=1).T
|
|
expected.index = [Timestamp('2000-1-1'), Timestamp('2000-1-6')]
|
|
assert_frame_equal(result, expected)
|
|
|
|
index = date_range(start='2001-5-4', periods=28)
|
|
df = DataFrame(
|
|
[{'REST_KEY': 1, 'DLY_TRN_QT': 80, 'DLY_SLS_AMT': 90,
|
|
'COOP_DLY_TRN_QT': 30, 'COOP_DLY_SLS_AMT': 20}] * 28 +
|
|
[{'REST_KEY': 2, 'DLY_TRN_QT': 70, 'DLY_SLS_AMT': 10,
|
|
'COOP_DLY_TRN_QT': 50, 'COOP_DLY_SLS_AMT': 20}] * 28,
|
|
index=index.append(index)).sort_index()
|
|
|
|
index = date_range('2001-5-4', periods=4, freq='7D')
|
|
expected = DataFrame(
|
|
[{'REST_KEY': 14, 'DLY_TRN_QT': 14, 'DLY_SLS_AMT': 14,
|
|
'COOP_DLY_TRN_QT': 14, 'COOP_DLY_SLS_AMT': 14}] * 4,
|
|
index=index)
|
|
result = df.resample('7D').count()
|
|
assert_frame_equal(result, expected)
|
|
|
|
expected = DataFrame(
|
|
[{'REST_KEY': 21, 'DLY_TRN_QT': 1050, 'DLY_SLS_AMT': 700,
|
|
'COOP_DLY_TRN_QT': 560, 'COOP_DLY_SLS_AMT': 280}] * 4,
|
|
index=index)
|
|
result = df.resample('7D').sum()
|
|
assert_frame_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
|
|
@pytest.mark.parametrize('agg_arg', ['mean', {'value': 'mean'}, ['mean']])
|
|
def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg):
|
|
# make sure passing loffset returns DatetimeIndex in all cases
|
|
# basic method taken from Base.test_resample_loffset_arg_type()
|
|
df = frame
|
|
expected_means = [df.values[i:i + 2].mean()
|
|
for i in range(0, len(df.values), 2)]
|
|
expected_index = self.create_index(df.index[0],
|
|
periods=len(df.index) / 2,
|
|
freq='2D')
|
|
|
|
# loffset coerces PeriodIndex to DateTimeIndex
|
|
expected_index = expected_index.to_timestamp()
|
|
expected_index += timedelta(hours=2)
|
|
expected = DataFrame({'value': expected_means}, index=expected_index)
|
|
|
|
result_agg = df.resample('2D', loffset='2H', kind=kind).agg(agg_arg)
|
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
|
result_how = df.resample('2D', how=agg_arg, loffset='2H',
|
|
kind=kind)
|
|
if isinstance(agg_arg, list):
|
|
expected.columns = pd.MultiIndex.from_tuples([('value', 'mean')])
|
|
assert_frame_equal(result_agg, expected)
|
|
assert_frame_equal(result_how, expected)
|
|
|
|
@pytest.mark.parametrize('freq, period_mult', [('H', 24), ('12H', 2)])
|
|
@pytest.mark.parametrize('kind', [None, 'period'])
|
|
def test_upsampling_ohlc(self, freq, period_mult, kind):
|
|
# GH 13083
|
|
pi = PeriodIndex(start='2000', freq='D', periods=10)
|
|
s = Series(range(len(pi)), index=pi)
|
|
expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
|
|
|
|
# timestamp-based resampling doesn't include all sub-periods
|
|
# of the last original period, so extend accordingly:
|
|
new_index = PeriodIndex(start='2000', freq=freq,
|
|
periods=period_mult * len(pi))
|
|
expected = expected.reindex(new_index)
|
|
result = s.resample(freq, kind=kind).ohlc()
|
|
assert_frame_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize('periods, values',
|
|
[([pd.NaT, '1970-01-01 00:00:00', pd.NaT,
|
|
'1970-01-01 00:00:02', '1970-01-01 00:00:03'],
|
|
[2, 3, 5, 7, 11]),
|
|
([pd.NaT, pd.NaT, '1970-01-01 00:00:00', pd.NaT,
|
|
pd.NaT, pd.NaT, '1970-01-01 00:00:02',
|
|
'1970-01-01 00:00:03', pd.NaT, pd.NaT],
|
|
[1, 2, 3, 5, 6, 8, 7, 11, 12, 13])])
|
|
@pytest.mark.parametrize('freq, expected_values',
|
|
[('1s', [3, np.NaN, 7, 11]),
|
|
('2s', [3, int((7 + 11) / 2)]),
|
|
('3s', [int((3 + 7) / 2), 11])])
|
|
def test_resample_with_nat(self, periods, values, freq, expected_values):
|
|
# GH 13224
|
|
index = PeriodIndex(periods, freq='S')
|
|
frame = DataFrame(values, index=index)
|
|
|
|
expected_index = period_range('1970-01-01 00:00:00',
|
|
periods=len(expected_values), freq=freq)
|
|
expected = DataFrame(expected_values, index=expected_index)
|
|
result = frame.resample(freq).mean()
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_resample_with_only_nat(self):
|
|
# GH 13224
|
|
pi = PeriodIndex([pd.NaT] * 3, freq='S')
|
|
frame = DataFrame([2, 3, 5], index=pi)
|
|
expected_index = PeriodIndex(data=[], freq=pi.freq)
|
|
expected = DataFrame([], index=expected_index)
|
|
result = frame.resample('1s').mean()
|
|
assert_frame_equal(result, expected)
|
|
|
|
|
|
class TestTimedeltaIndex(Base):
|
|
_index_factory = lambda x: timedelta_range
|
|
|
|
@pytest.fixture
|
|
def _index_start(self):
|
|
return '1 day'
|
|
|
|
@pytest.fixture
|
|
def _index_end(self):
|
|
return '10 day'
|
|
|
|
@pytest.fixture
|
|
def _series_name(self):
|
|
return 'tdi'
|
|
|
|
def create_series(self):
|
|
i = timedelta_range('1 day',
|
|
'10 day', freq='D')
|
|
|
|
return Series(np.arange(len(i)), index=i, name='tdi')
|
|
|
|
def test_asfreq_bug(self):
|
|
import datetime as dt
|
|
df = DataFrame(data=[1, 3],
|
|
index=[dt.timedelta(), dt.timedelta(minutes=3)])
|
|
result = df.resample('1T').asfreq()
|
|
expected = DataFrame(data=[1, np.nan, np.nan, 3],
|
|
index=timedelta_range('0 day',
|
|
periods=4,
|
|
freq='1T'))
|
|
assert_frame_equal(result, expected)
|
|
|
|
|
|
class TestResamplerGrouper(object):
|
|
|
|
def setup_method(self, method):
|
|
self.frame = DataFrame({'A': [1] * 20 + [2] * 12 + [3] * 8,
|
|
'B': np.arange(40)},
|
|
index=date_range('1/1/2000',
|
|
freq='s',
|
|
periods=40))
|
|
|
|
def test_tab_complete_ipython6_warning(self, ip):
|
|
from IPython.core.completer import provisionalcompleter
|
|
code = dedent("""\
|
|
import pandas.util.testing as tm
|
|
s = tm.makeTimeSeries()
|
|
rs = s.resample("D")
|
|
""")
|
|
ip.run_code(code)
|
|
|
|
with tm.assert_produces_warning(None):
|
|
with provisionalcompleter('ignore'):
|
|
list(ip.Completer.completions('rs.', 1))
|
|
|
|
def test_deferred_with_groupby(self):
|
|
|
|
# GH 12486
|
|
# support deferred resample ops with groupby
|
|
data = [['2010-01-01', 'A', 2], ['2010-01-02', 'A', 3],
|
|
['2010-01-05', 'A', 8], ['2010-01-10', 'A', 7],
|
|
['2010-01-13', 'A', 3], ['2010-01-01', 'B', 5],
|
|
['2010-01-03', 'B', 2], ['2010-01-04', 'B', 1],
|
|
['2010-01-11', 'B', 7], ['2010-01-14', 'B', 3]]
|
|
|
|
df = DataFrame(data, columns=['date', 'id', 'score'])
|
|
df.date = pd.to_datetime(df.date)
|
|
f = lambda x: x.set_index('date').resample('D').asfreq()
|
|
expected = df.groupby('id').apply(f)
|
|
result = df.set_index('date').groupby('id').resample('D').asfreq()
|
|
assert_frame_equal(result, expected)
|
|
|
|
df = DataFrame({'date': pd.date_range(start='2016-01-01',
|
|
periods=4,
|
|
freq='W'),
|
|
'group': [1, 1, 2, 2],
|
|
'val': [5, 6, 7, 8]}).set_index('date')
|
|
|
|
f = lambda x: x.resample('1D').ffill()
|
|
expected = df.groupby('group').apply(f)
|
|
result = df.groupby('group').resample('1D').ffill()
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_getitem(self):
|
|
g = self.frame.groupby('A')
|
|
|
|
expected = g.B.apply(lambda x: x.resample('2s').mean())
|
|
|
|
result = g.resample('2s').B.mean()
|
|
assert_series_equal(result, expected)
|
|
|
|
result = g.B.resample('2s').mean()
|
|
assert_series_equal(result, expected)
|
|
|
|
result = g.resample('2s').mean().B
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_getitem_multiple(self):
|
|
|
|
# GH 13174
|
|
# multiple calls after selection causing an issue with aliasing
|
|
data = [{'id': 1, 'buyer': 'A'}, {'id': 2, 'buyer': 'B'}]
|
|
df = DataFrame(data, index=pd.date_range('2016-01-01', periods=2))
|
|
r = df.groupby('id').resample('1D')
|
|
result = r['buyer'].count()
|
|
expected = Series([1, 1],
|
|
index=pd.MultiIndex.from_tuples(
|
|
[(1, Timestamp('2016-01-01')),
|
|
(2, Timestamp('2016-01-02'))],
|
|
names=['id', None]),
|
|
name='buyer')
|
|
assert_series_equal(result, expected)
|
|
|
|
result = r['buyer'].count()
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_groupby_resample_on_api_with_getitem(self):
|
|
# GH 17813
|
|
df = pd.DataFrame({'id': list('aabbb'),
|
|
'date': pd.date_range('1-1-2016', periods=5),
|
|
'data': 1})
|
|
exp = df.set_index('date').groupby('id').resample('2D')['data'].sum()
|
|
result = df.groupby('id').resample('2D', on='date')['data'].sum()
|
|
assert_series_equal(result, exp)
|
|
|
|
def test_nearest(self):
|
|
|
|
# GH 17496
|
|
# Resample nearest
|
|
index = pd.date_range('1/1/2000', periods=3, freq='T')
|
|
result = Series(range(3), index=index).resample('20s').nearest()
|
|
|
|
expected = Series(
|
|
[0, 0, 1, 1, 1, 2, 2],
|
|
index=pd.DatetimeIndex(
|
|
['2000-01-01 00:00:00', '2000-01-01 00:00:20',
|
|
'2000-01-01 00:00:40', '2000-01-01 00:01:00',
|
|
'2000-01-01 00:01:20', '2000-01-01 00:01:40',
|
|
'2000-01-01 00:02:00'],
|
|
dtype='datetime64[ns]',
|
|
freq='20S'))
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_methods(self):
|
|
g = self.frame.groupby('A')
|
|
r = g.resample('2s')
|
|
|
|
for f in ['first', 'last', 'median', 'sem', 'sum', 'mean',
|
|
'min', 'max']:
|
|
result = getattr(r, f)()
|
|
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
|
|
assert_frame_equal(result, expected)
|
|
|
|
for f in ['size']:
|
|
result = getattr(r, f)()
|
|
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
|
|
assert_series_equal(result, expected)
|
|
|
|
for f in ['count']:
|
|
result = getattr(r, f)()
|
|
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
|
|
assert_frame_equal(result, expected)
|
|
|
|
# series only
|
|
for f in ['nunique']:
|
|
result = getattr(r.B, f)()
|
|
expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)())
|
|
assert_series_equal(result, expected)
|
|
|
|
for f in ['nearest', 'backfill', 'ffill', 'asfreq']:
|
|
result = getattr(r, f)()
|
|
expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = r.ohlc()
|
|
expected = g.apply(lambda x: x.resample('2s').ohlc())
|
|
assert_frame_equal(result, expected)
|
|
|
|
for f in ['std', 'var']:
|
|
result = getattr(r, f)(ddof=1)
|
|
expected = g.apply(lambda x: getattr(x.resample('2s'), f)(ddof=1))
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_apply(self):
|
|
|
|
g = self.frame.groupby('A')
|
|
r = g.resample('2s')
|
|
|
|
# reduction
|
|
expected = g.resample('2s').sum()
|
|
|
|
def f(x):
|
|
return x.resample('2s').sum()
|
|
|
|
result = r.apply(f)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def f(x):
|
|
return x.resample('2s').apply(lambda y: y.sum())
|
|
|
|
result = g.apply(f)
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_apply_with_mutated_index(self):
|
|
# GH 15169
|
|
index = pd.date_range('1-1-2015', '12-31-15', freq='D')
|
|
df = DataFrame(data={'col1': np.random.rand(len(index))}, index=index)
|
|
|
|
def f(x):
|
|
s = Series([1, 2], index=['a', 'b'])
|
|
return s
|
|
|
|
expected = df.groupby(pd.Grouper(freq='M')).apply(f)
|
|
|
|
result = df.resample('M').apply(f)
|
|
assert_frame_equal(result, expected)
|
|
|
|
# A case for series
|
|
expected = df['col1'].groupby(pd.Grouper(freq='M')).apply(f)
|
|
result = df['col1'].resample('M').apply(f)
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_resample_groupby_with_label(self):
|
|
# GH 13235
|
|
index = date_range('2000-01-01', freq='2D', periods=5)
|
|
df = DataFrame(index=index,
|
|
data={'col0': [0, 0, 1, 1, 2], 'col1': [1, 1, 1, 1, 1]}
|
|
)
|
|
result = df.groupby('col0').resample('1W', label='left').sum()
|
|
|
|
mi = [np.array([0, 0, 1, 2]),
|
|
pd.to_datetime(np.array(['1999-12-26', '2000-01-02',
|
|
'2000-01-02', '2000-01-02'])
|
|
)
|
|
]
|
|
mindex = pd.MultiIndex.from_arrays(mi, names=['col0', None])
|
|
expected = DataFrame(data={'col0': [0, 0, 2, 2], 'col1': [1, 1, 2, 1]},
|
|
index=mindex
|
|
)
|
|
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_consistency_with_window(self):
|
|
|
|
# consistent return values with window
|
|
df = self.frame
|
|
expected = pd.Int64Index([1, 2, 3], name='A')
|
|
result = df.groupby('A').resample('2s').mean()
|
|
assert result.index.nlevels == 2
|
|
tm.assert_index_equal(result.index.levels[0], expected)
|
|
|
|
result = df.groupby('A').rolling(20).mean()
|
|
assert result.index.nlevels == 2
|
|
tm.assert_index_equal(result.index.levels[0], expected)
|
|
|
|
def test_median_duplicate_columns(self):
|
|
# GH 14233
|
|
|
|
df = DataFrame(np.random.randn(20, 3),
|
|
columns=list('aaa'),
|
|
index=pd.date_range('2012-01-01', periods=20, freq='s'))
|
|
df2 = df.copy()
|
|
df2.columns = ['a', 'b', 'c']
|
|
expected = df2.resample('5s').median()
|
|
result = df.resample('5s').median()
|
|
expected.columns = result.columns
|
|
assert_frame_equal(result, expected)
|
|
|
|
|
|
class TestTimeGrouper(object):
|
|
|
|
def setup_method(self, method):
|
|
self.ts = Series(np.random.randn(1000),
|
|
index=date_range('1/1/2000', periods=1000))
|
|
|
|
def test_apply(self):
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
grouper = pd.TimeGrouper(freq='A', label='right', closed='right')
|
|
|
|
grouped = self.ts.groupby(grouper)
|
|
|
|
f = lambda x: x.sort_values()[-3:]
|
|
|
|
applied = grouped.apply(f)
|
|
expected = self.ts.groupby(lambda x: x.year).apply(f)
|
|
|
|
applied.index = applied.index.droplevel(0)
|
|
expected.index = expected.index.droplevel(0)
|
|
assert_series_equal(applied, expected)
|
|
|
|
def test_count(self):
|
|
self.ts[::3] = np.nan
|
|
|
|
expected = self.ts.groupby(lambda x: x.year).count()
|
|
|
|
with tm.assert_produces_warning(FutureWarning,
|
|
check_stacklevel=False):
|
|
grouper = pd.TimeGrouper(freq='A', label='right', closed='right')
|
|
result = self.ts.groupby(grouper).count()
|
|
expected.index = result.index
|
|
assert_series_equal(result, expected)
|
|
|
|
result = self.ts.resample('A').count()
|
|
expected.index = result.index
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_numpy_reduction(self):
|
|
result = self.ts.resample('A', closed='right').prod()
|
|
|
|
expected = self.ts.groupby(lambda x: x.year).agg(np.prod)
|
|
expected.index = result.index
|
|
|
|
assert_series_equal(result, expected)
|
|
|
|
def test_apply_iteration(self):
|
|
# #2300
|
|
N = 1000
|
|
ind = pd.date_range(start="2000-01-01", freq="D", periods=N)
|
|
df = DataFrame({'open': 1, 'close': 2}, index=ind)
|
|
tg = TimeGrouper('M')
|
|
|
|
_, grouper, _ = tg._get_grouper(df)
|
|
|
|
# Errors
|
|
grouped = df.groupby(grouper, group_keys=False)
|
|
f = lambda df: df['close'] / df['open']
|
|
|
|
# it works!
|
|
result = grouped.apply(f)
|
|
tm.assert_index_equal(result.index, df.index)
|
|
|
|
def test_panel_aggregation(self):
|
|
ind = pd.date_range('1/1/2000', periods=100)
|
|
data = np.random.randn(2, len(ind), 4)
|
|
|
|
with catch_warnings(record=True):
|
|
wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind,
|
|
minor_axis=['A', 'B', 'C', 'D'])
|
|
|
|
tg = TimeGrouper('M', axis=1)
|
|
_, grouper, _ = tg._get_grouper(wp)
|
|
bingrouped = wp.groupby(grouper)
|
|
binagg = bingrouped.mean()
|
|
|
|
def f(x):
|
|
assert (isinstance(x, Panel))
|
|
return x.mean(1)
|
|
|
|
result = bingrouped.agg(f)
|
|
tm.assert_panel_equal(result, binagg)
|
|
|
|
def test_fails_on_no_datetime_index(self):
|
|
index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex')
|
|
index_funcs = (tm.makeIntIndex,
|
|
tm.makeUnicodeIndex, tm.makeFloatIndex,
|
|
lambda m: tm.makeCustomIndex(m, 2))
|
|
n = 2
|
|
for name, func in zip(index_names, index_funcs):
|
|
index = func(n)
|
|
df = DataFrame({'a': np.random.randn(n)}, index=index)
|
|
with tm.assert_raises_regex(TypeError,
|
|
"Only valid with "
|
|
"DatetimeIndex, TimedeltaIndex "
|
|
"or PeriodIndex, but got an "
|
|
"instance of %r" % name):
|
|
df.groupby(TimeGrouper('D'))
|
|
|
|
def test_aaa_group_order(self):
|
|
# GH 12840
|
|
# check TimeGrouper perform stable sorts
|
|
n = 20
|
|
data = np.random.randn(n, 4)
|
|
df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
|
df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
|
|
datetime(2013, 1, 3), datetime(2013, 1, 4),
|
|
datetime(2013, 1, 5)] * 4
|
|
grouped = df.groupby(TimeGrouper(key='key', freq='D'))
|
|
|
|
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)),
|
|
df[::5])
|
|
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)),
|
|
df[1::5])
|
|
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)),
|
|
df[2::5])
|
|
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)),
|
|
df[3::5])
|
|
tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)),
|
|
df[4::5])
|
|
|
|
def test_aggregate_normal(self):
|
|
# check TimeGrouper's aggregation is identical as normal groupby
|
|
|
|
n = 20
|
|
data = np.random.randn(n, 4)
|
|
normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
|
normal_df['key'] = [1, 2, 3, 4, 5] * 4
|
|
|
|
dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
|
dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2),
|
|
datetime(2013, 1, 3), datetime(2013, 1, 4),
|
|
datetime(2013, 1, 5)] * 4
|
|
|
|
normal_grouped = normal_df.groupby('key')
|
|
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
|
|
|
|
for func in ['min', 'max', 'prod', 'var', 'std', 'mean']:
|
|
expected = getattr(normal_grouped, func)()
|
|
dt_result = getattr(dt_grouped, func)()
|
|
expected.index = date_range(start='2013-01-01', freq='D',
|
|
periods=5, name='key')
|
|
assert_frame_equal(expected, dt_result)
|
|
|
|
for func in ['count', 'sum']:
|
|
expected = getattr(normal_grouped, func)()
|
|
expected.index = date_range(start='2013-01-01', freq='D',
|
|
periods=5, name='key')
|
|
dt_result = getattr(dt_grouped, func)()
|
|
assert_frame_equal(expected, dt_result)
|
|
|
|
# GH 7453
|
|
for func in ['size']:
|
|
expected = getattr(normal_grouped, func)()
|
|
expected.index = date_range(start='2013-01-01', freq='D',
|
|
periods=5, name='key')
|
|
dt_result = getattr(dt_grouped, func)()
|
|
assert_series_equal(expected, dt_result)
|
|
|
|
# GH 7453
|
|
for func in ['first', 'last']:
|
|
expected = getattr(normal_grouped, func)()
|
|
expected.index = date_range(start='2013-01-01', freq='D',
|
|
periods=5, name='key')
|
|
dt_result = getattr(dt_grouped, func)()
|
|
assert_frame_equal(expected, dt_result)
|
|
|
|
# if TimeGrouper is used included, 'nth' doesn't work yet
|
|
|
|
"""
|
|
for func in ['nth']:
|
|
expected = getattr(normal_grouped, func)(3)
|
|
expected.index = date_range(start='2013-01-01',
|
|
freq='D', periods=5, name='key')
|
|
dt_result = getattr(dt_grouped, func)(3)
|
|
assert_frame_equal(expected, dt_result)
|
|
"""
|
|
|
|
@pytest.mark.parametrize('method, unit', [
|
|
('sum', 0),
|
|
('prod', 1),
|
|
])
|
|
def test_resample_entirly_nat_window(self, method, unit):
|
|
s = pd.Series([0] * 2 + [np.nan] * 2,
|
|
index=pd.date_range('2017', periods=4))
|
|
# 0 / 1 by default
|
|
result = methodcaller(method)(s.resample("2d"))
|
|
expected = pd.Series([0.0, unit],
|
|
index=pd.to_datetime(['2017-01-01',
|
|
'2017-01-03']))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# min_count=0
|
|
result = methodcaller(method, min_count=0)(s.resample("2d"))
|
|
expected = pd.Series([0.0, unit],
|
|
index=pd.to_datetime(['2017-01-01',
|
|
'2017-01-03']))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# min_count=1
|
|
result = methodcaller(method, min_count=1)(s.resample("2d"))
|
|
expected = pd.Series([0.0, np.nan],
|
|
index=pd.to_datetime(['2017-01-01',
|
|
'2017-01-03']))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize('func, fill_value', [
|
|
('min', np.nan),
|
|
('max', np.nan),
|
|
('sum', 0),
|
|
('prod', 1),
|
|
('count', 0),
|
|
])
|
|
def test_aggregate_with_nat(self, func, fill_value):
|
|
# check TimeGrouper's aggregation is identical as normal groupby
|
|
# if NaT is included, 'var', 'std', 'mean', 'first','last'
|
|
# and 'nth' doesn't work yet
|
|
|
|
n = 20
|
|
data = np.random.randn(n, 4).astype('int64')
|
|
normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
|
normal_df['key'] = [1, 2, np.nan, 4, 5] * 4
|
|
|
|
dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
|
dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT,
|
|
datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4
|
|
|
|
normal_grouped = normal_df.groupby('key')
|
|
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
|
|
|
|
normal_result = getattr(normal_grouped, func)()
|
|
dt_result = getattr(dt_grouped, func)()
|
|
|
|
pad = DataFrame([[fill_value] * 4], index=[3],
|
|
columns=['A', 'B', 'C', 'D'])
|
|
expected = normal_result.append(pad)
|
|
expected = expected.sort_index()
|
|
expected.index = date_range(start='2013-01-01', freq='D',
|
|
periods=5, name='key')
|
|
assert_frame_equal(expected, dt_result)
|
|
assert dt_result.index.name == 'key'
|
|
|
|
def test_aggregate_with_nat_size(self):
|
|
# GH 9925
|
|
n = 20
|
|
data = np.random.randn(n, 4).astype('int64')
|
|
normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
|
normal_df['key'] = [1, 2, np.nan, 4, 5] * 4
|
|
|
|
dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D'])
|
|
dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT,
|
|
datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4
|
|
|
|
normal_grouped = normal_df.groupby('key')
|
|
dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D'))
|
|
|
|
normal_result = normal_grouped.size()
|
|
dt_result = dt_grouped.size()
|
|
|
|
pad = Series([0], index=[3])
|
|
expected = normal_result.append(pad)
|
|
expected = expected.sort_index()
|
|
expected.index = date_range(start='2013-01-01', freq='D',
|
|
periods=5, name='key')
|
|
assert_series_equal(expected, dt_result)
|
|
assert dt_result.index.name == 'key'
|
|
|
|
def test_repr(self):
|
|
# GH18203
|
|
result = repr(TimeGrouper(key='A', freq='H'))
|
|
expected = ("TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, "
|
|
"closed='left', label='left', how='mean', "
|
|
"convention='e', base=0)")
|
|
assert result == expected
|
|
|
|
@pytest.mark.parametrize('method, unit', [
|
|
('sum', 0),
|
|
('prod', 1),
|
|
])
|
|
def test_upsample_sum(self, method, unit):
|
|
s = pd.Series(1, index=pd.date_range("2017", periods=2, freq="H"))
|
|
resampled = s.resample("30T")
|
|
index = pd.to_datetime(['2017-01-01T00:00:00',
|
|
'2017-01-01T00:30:00',
|
|
'2017-01-01T01:00:00'])
|
|
|
|
# 0 / 1 by default
|
|
result = methodcaller(method)(resampled)
|
|
expected = pd.Series([1, unit, 1], index=index)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# min_count=0
|
|
result = methodcaller(method, min_count=0)(resampled)
|
|
expected = pd.Series([1, unit, 1], index=index)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# min_count=1
|
|
result = methodcaller(method, min_count=1)(resampled)
|
|
expected = pd.Series([1, np.nan, 1], index=index)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# min_count>1
|
|
result = methodcaller(method, min_count=2)(resampled)
|
|
expected = pd.Series([np.nan, np.nan, np.nan], index=index)
|
|
tm.assert_series_equal(result, expected)
|