laywerrobot/lib/python3.6/site-packages/pandas/tests/indexes/period/test_indexing.py
2020-08-27 21:55:39 +02:00

635 lines
24 KiB
Python

from datetime import datetime, timedelta
import pytest
import numpy as np
import pandas as pd
from pandas.util import testing as tm
from pandas.compat import lrange
from pandas._libs import tslibs
from pandas import (PeriodIndex, Series, DatetimeIndex,
period_range, Period, notna)
from pandas._libs.tslibs import period as libperiod
class TestGetItem(object):
def test_getitem(self):
idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D',
name='idx')
for idx in [idx1]:
result = idx[0]
assert result == pd.Period('2011-01-01', freq='D')
result = idx[-1]
assert result == pd.Period('2011-01-31', freq='D')
result = idx[0:5]
expected = pd.period_range('2011-01-01', '2011-01-05', freq='D',
name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx[0:10:2]
expected = pd.PeriodIndex(['2011-01-01', '2011-01-03',
'2011-01-05',
'2011-01-07', '2011-01-09'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx[-20:-5:3]
expected = pd.PeriodIndex(['2011-01-12', '2011-01-15',
'2011-01-18',
'2011-01-21', '2011-01-24'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx[4::-1]
expected = PeriodIndex(['2011-01-05', '2011-01-04', '2011-01-03',
'2011-01-02', '2011-01-01'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
def test_getitem_index(self):
idx = period_range('2007-01', periods=10, freq='M', name='x')
result = idx[[1, 3, 5]]
exp = pd.PeriodIndex(['2007-02', '2007-04', '2007-06'],
freq='M', name='x')
tm.assert_index_equal(result, exp)
result = idx[[True, True, False, False, False,
True, True, False, False, False]]
exp = pd.PeriodIndex(['2007-01', '2007-02', '2007-06', '2007-07'],
freq='M', name='x')
tm.assert_index_equal(result, exp)
def test_getitem_partial(self):
rng = period_range('2007-01', periods=50, freq='M')
ts = Series(np.random.randn(len(rng)), rng)
pytest.raises(KeyError, ts.__getitem__, '2006')
result = ts['2008']
assert (result.index.year == 2008).all()
result = ts['2008':'2009']
assert len(result) == 24
result = ts['2008-1':'2009-12']
assert len(result) == 24
result = ts['2008Q1':'2009Q4']
assert len(result) == 24
result = ts[:'2009']
assert len(result) == 36
result = ts['2009':]
assert len(result) == 50 - 24
exp = result
result = ts[24:]
tm.assert_series_equal(exp, result)
ts = ts[10:].append(ts[10:])
tm.assert_raises_regex(KeyError,
"left slice bound for non-unique "
"label: '2008'",
ts.__getitem__, slice('2008', '2009'))
def test_getitem_datetime(self):
rng = period_range(start='2012-01-01', periods=10, freq='W-MON')
ts = Series(lrange(len(rng)), index=rng)
dt1 = datetime(2011, 10, 2)
dt4 = datetime(2012, 4, 20)
rs = ts[dt1:dt4]
tm.assert_series_equal(rs, ts)
def test_getitem_nat(self):
idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M')
assert idx[0] == pd.Period('2011-01', freq='M')
assert idx[1] is pd.NaT
s = pd.Series([0, 1, 2], index=idx)
assert s[pd.NaT] == 1
s = pd.Series(idx, index=idx)
assert (s[pd.Period('2011-01', freq='M')] ==
pd.Period('2011-01', freq='M'))
assert s[pd.NaT] is pd.NaT
def test_getitem_list_periods(self):
# GH 7710
rng = period_range(start='2012-01-01', periods=10, freq='D')
ts = Series(lrange(len(rng)), index=rng)
exp = ts.iloc[[1]]
tm.assert_series_equal(ts[[Period('2012-01-02', freq='D')]], exp)
def test_getitem_seconds(self):
# GH 6716
didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S',
periods=4000)
pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000)
for idx in [didx, pidx]:
# getitem against index should raise ValueError
values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H',
'2013/02/01 09:00']
for v in values:
# GH7116
# these show deprecations as we are trying
# to slice with non-integer indexers
# with pytest.raises(IndexError):
# idx[v]
continue
s = Series(np.random.rand(len(idx)), index=idx)
tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660])
tm.assert_series_equal(s['2013/01/01 9H'], s[:3600])
for d in ['2013/01/01', '2013/01', '2013']:
tm.assert_series_equal(s[d], s)
def test_getitem_day(self):
# GH 6716
# Confirm DatetimeIndex and PeriodIndex works identically
didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400)
pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400)
for idx in [didx, pidx]:
# getitem against index should raise ValueError
values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H',
'2013/02/01 09:00']
for v in values:
# GH7116
# these show deprecations as we are trying
# to slice with non-integer indexers
# with pytest.raises(IndexError):
# idx[v]
continue
s = Series(np.random.rand(len(idx)), index=idx)
tm.assert_series_equal(s['2013/01'], s[0:31])
tm.assert_series_equal(s['2013/02'], s[31:59])
tm.assert_series_equal(s['2014'], s[365:])
invalid = ['2013/02/01 9H', '2013/02/01 09:00']
for v in invalid:
with pytest.raises(KeyError):
s[v]
class TestWhere(object):
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
def test_where(self, klass):
i = period_range('20130101', periods=5, freq='D')
cond = [True] * len(i)
expected = i
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
cond = [False] + [True] * (len(i) - 1)
expected = PeriodIndex([pd.NaT] + i[1:].tolist(), freq='D')
result = i.where(klass(cond))
tm.assert_index_equal(result, expected)
def test_where_other(self):
i = period_range('20130101', periods=5, freq='D')
for arr in [np.nan, pd.NaT]:
result = i.where(notna(i), other=np.nan)
expected = i
tm.assert_index_equal(result, expected)
i2 = i.copy()
i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(),
freq='D')
result = i.where(notna(i2), i2)
tm.assert_index_equal(result, i2)
i2 = i.copy()
i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(),
freq='D')
result = i.where(notna(i2), i2.values)
tm.assert_index_equal(result, i2)
class TestTake(object):
def test_take(self):
# GH#10295
idx1 = pd.period_range('2011-01-01', '2011-01-31', freq='D',
name='idx')
for idx in [idx1]:
result = idx.take([0])
assert result == pd.Period('2011-01-01', freq='D')
result = idx.take([5])
assert result == pd.Period('2011-01-06', freq='D')
result = idx.take([0, 1, 2])
expected = pd.period_range('2011-01-01', '2011-01-03', freq='D',
name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == 'D'
assert result.freq == expected.freq
result = idx.take([0, 2, 4])
expected = pd.PeriodIndex(['2011-01-01', '2011-01-03',
'2011-01-05'], freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx.take([7, 4, 1])
expected = pd.PeriodIndex(['2011-01-08', '2011-01-05',
'2011-01-02'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx.take([3, 2, 5])
expected = PeriodIndex(['2011-01-04', '2011-01-03', '2011-01-06'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
result = idx.take([-3, 2, 5])
expected = PeriodIndex(['2011-01-29', '2011-01-03', '2011-01-06'],
freq='D', name='idx')
tm.assert_index_equal(result, expected)
assert result.freq == expected.freq
assert result.freq == 'D'
def test_take_misc(self):
index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D',
name='idx')
expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7),
datetime(2010, 1, 9), datetime(2010, 1, 13)],
freq='D', name='idx')
taken1 = index.take([5, 6, 8, 12])
taken2 = index[[5, 6, 8, 12]]
for taken in [taken1, taken2]:
tm.assert_index_equal(taken, expected)
assert isinstance(taken, PeriodIndex)
assert taken.freq == index.freq
assert taken.name == expected.name
def test_take_fill_value(self):
# GH#12631
idx = pd.PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'],
name='xxx', freq='D')
result = idx.take(np.array([1, 0, -1]))
expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'],
name='xxx', freq='D')
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', 'NaT'],
name='xxx', freq='D')
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
fill_value=True)
expected = pd.PeriodIndex(['2011-02-01', '2011-01-01', '2011-03-01'],
name='xxx', freq='D')
tm.assert_index_equal(result, expected)
msg = ('When allow_fill=True and fill_value is not None, '
'all indices must be >= -1')
with tm.assert_raises_regex(ValueError, msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with tm.assert_raises_regex(ValueError, msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
class TestIndexing(object):
def test_get_loc_msg(self):
idx = period_range('2000-1-1', freq='A', periods=10)
bad_period = Period('2012', 'A')
pytest.raises(KeyError, idx.get_loc, bad_period)
try:
idx.get_loc(bad_period)
except KeyError as inst:
assert inst.args[0] == bad_period
def test_get_loc_nat(self):
didx = DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03'])
pidx = PeriodIndex(['2011-01-01', 'NaT', '2011-01-03'], freq='M')
# check DatetimeIndex compat
for idx in [didx, pidx]:
assert idx.get_loc(pd.NaT) == 1
assert idx.get_loc(None) == 1
assert idx.get_loc(float('nan')) == 1
assert idx.get_loc(np.nan) == 1
def test_get_loc(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
# get the location of p1/p2 from
# monotonic increasing PeriodIndex with non-duplicate
idx0 = pd.PeriodIndex([p0, p1, p2])
expected_idx1_p1 = 1
expected_idx1_p2 = 2
assert idx0.get_loc(p1) == expected_idx1_p1
assert idx0.get_loc(str(p1)) == expected_idx1_p1
assert idx0.get_loc(p2) == expected_idx1_p2
assert idx0.get_loc(str(p2)) == expected_idx1_p2
pytest.raises(tslibs.parsing.DateParseError, idx0.get_loc, 'foo')
pytest.raises(KeyError, idx0.get_loc, 1.1)
pytest.raises(TypeError, idx0.get_loc, idx0)
# get the location of p1/p2 from
# monotonic increasing PeriodIndex with duplicate
idx1 = pd.PeriodIndex([p1, p1, p2])
expected_idx1_p1 = slice(0, 2)
expected_idx1_p2 = 2
assert idx1.get_loc(p1) == expected_idx1_p1
assert idx1.get_loc(str(p1)) == expected_idx1_p1
assert idx1.get_loc(p2) == expected_idx1_p2
assert idx1.get_loc(str(p2)) == expected_idx1_p2
pytest.raises(tslibs.parsing.DateParseError, idx1.get_loc, 'foo')
pytest.raises(KeyError, idx1.get_loc, 1.1)
pytest.raises(TypeError, idx1.get_loc, idx1)
# get the location of p1/p2 from
# non-monotonic increasing/decreasing PeriodIndex with duplicate
idx2 = pd.PeriodIndex([p2, p1, p2])
expected_idx2_p1 = 1
expected_idx2_p2 = np.array([True, False, True])
assert idx2.get_loc(p1) == expected_idx2_p1
assert idx2.get_loc(str(p1)) == expected_idx2_p1
tm.assert_numpy_array_equal(idx2.get_loc(p2), expected_idx2_p2)
tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2)
def test_is_monotonic_increasing(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
idx_inc0 = pd.PeriodIndex([p0, p1, p2])
idx_inc1 = pd.PeriodIndex([p0, p1, p1])
idx_dec0 = pd.PeriodIndex([p2, p1, p0])
idx_dec1 = pd.PeriodIndex([p2, p1, p1])
idx = pd.PeriodIndex([p1, p2, p0])
assert idx_inc0.is_monotonic_increasing
assert idx_inc1.is_monotonic_increasing
assert not idx_dec0.is_monotonic_increasing
assert not idx_dec1.is_monotonic_increasing
assert not idx.is_monotonic_increasing
def test_is_monotonic_decreasing(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
idx_inc0 = pd.PeriodIndex([p0, p1, p2])
idx_inc1 = pd.PeriodIndex([p0, p1, p1])
idx_dec0 = pd.PeriodIndex([p2, p1, p0])
idx_dec1 = pd.PeriodIndex([p2, p1, p1])
idx = pd.PeriodIndex([p1, p2, p0])
assert not idx_inc0.is_monotonic_decreasing
assert not idx_inc1.is_monotonic_decreasing
assert idx_dec0.is_monotonic_decreasing
assert idx_dec1.is_monotonic_decreasing
assert not idx.is_monotonic_decreasing
def test_is_unique(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
idx0 = pd.PeriodIndex([p0, p1, p2])
assert idx0.is_unique
idx1 = pd.PeriodIndex([p1, p1, p2])
assert not idx1.is_unique
def test_contains(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
p3 = pd.Period('2017-09-04')
ps0 = [p0, p1, p2]
idx0 = pd.PeriodIndex(ps0)
for p in ps0:
assert idx0.contains(p)
assert p in idx0
assert idx0.contains(str(p))
assert str(p) in idx0
assert idx0.contains('2017-09-01 00:00:01')
assert '2017-09-01 00:00:01' in idx0
assert idx0.contains('2017-09')
assert '2017-09' in idx0
assert not idx0.contains(p3)
assert p3 not in idx0
def test_get_value(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
idx0 = pd.PeriodIndex([p0, p1, p2])
input0 = np.array([1, 2, 3])
expected0 = 2
result0 = idx0.get_value(input0, p1)
assert result0 == expected0
idx1 = pd.PeriodIndex([p1, p1, p2])
input1 = np.array([1, 2, 3])
expected1 = np.array([1, 2])
result1 = idx1.get_value(input1, p1)
tm.assert_numpy_array_equal(result1, expected1)
idx2 = pd.PeriodIndex([p1, p2, p1])
input2 = np.array([1, 2, 3])
expected2 = np.array([1, 3])
result2 = idx2.get_value(input2, p1)
tm.assert_numpy_array_equal(result2, expected2)
def test_get_indexer(self):
# GH 17717
p1 = pd.Period('2017-09-01')
p2 = pd.Period('2017-09-04')
p3 = pd.Period('2017-09-07')
tp0 = pd.Period('2017-08-31')
tp1 = pd.Period('2017-09-02')
tp2 = pd.Period('2017-09-05')
tp3 = pd.Period('2017-09-09')
idx = pd.PeriodIndex([p1, p2, p3])
tm.assert_numpy_array_equal(idx.get_indexer(idx),
np.array([0, 1, 2], dtype=np.intp))
target = pd.PeriodIndex([tp0, tp1, tp2, tp3])
tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
np.array([-1, 0, 1, 2], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
np.array([0, 1, 2, -1], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
np.array([0, 0, 1, 2], dtype=np.intp))
res = idx.get_indexer(target, 'nearest',
tolerance=pd.Timedelta('1 day'))
tm.assert_numpy_array_equal(res,
np.array([0, 0, 1, -1], dtype=np.intp))
def test_get_indexer_non_unique(self):
# GH 17717
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')
p3 = pd.Period('2017-09-04')
p4 = pd.Period('2017-09-05')
idx1 = pd.PeriodIndex([p1, p2, p1])
idx2 = pd.PeriodIndex([p2, p1, p3, p4])
result = idx1.get_indexer_non_unique(idx2)
expected_indexer = np.array([1, 0, 2, -1, -1], dtype=np.intp)
expected_missing = np.array([2, 3], dtype=np.int64)
tm.assert_numpy_array_equal(result[0], expected_indexer)
tm.assert_numpy_array_equal(result[1], expected_missing)
# TODO: This method came from test_period; de-dup with version above
def test_get_loc2(self):
idx = pd.period_range('2000-01-01', periods=3)
for method in [None, 'pad', 'backfill', 'nearest']:
assert idx.get_loc(idx[1], method) == 1
assert idx.get_loc(idx[1].asfreq('H', how='start'), method) == 1
assert idx.get_loc(idx[1].to_timestamp(), method) == 1
assert idx.get_loc(idx[1].to_timestamp()
.to_pydatetime(), method) == 1
assert idx.get_loc(str(idx[1]), method) == 1
idx = pd.period_range('2000-01-01', periods=5)[::2]
assert idx.get_loc('2000-01-02T12', method='nearest',
tolerance='1 day') == 1
assert idx.get_loc('2000-01-02T12', method='nearest',
tolerance=pd.Timedelta('1D')) == 1
assert idx.get_loc('2000-01-02T12', method='nearest',
tolerance=np.timedelta64(1, 'D')) == 1
assert idx.get_loc('2000-01-02T12', method='nearest',
tolerance=timedelta(1)) == 1
with tm.assert_raises_regex(ValueError,
'unit abbreviation w/o a number'):
idx.get_loc('2000-01-10', method='nearest', tolerance='foo')
msg = 'Input has different freq from PeriodIndex\\(freq=D\\)'
with tm.assert_raises_regex(ValueError, msg):
idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour')
with pytest.raises(KeyError):
idx.get_loc('2000-01-10', method='nearest', tolerance='1 day')
with pytest.raises(
ValueError,
match='list-like tolerance size must match target index size'):
idx.get_loc('2000-01-10', method='nearest',
tolerance=[pd.Timedelta('1 day').to_timedelta64(),
pd.Timedelta('1 day').to_timedelta64()])
# TODO: This method came from test_period; de-dup with version above
def test_get_indexer2(self):
idx = pd.period_range('2000-01-01', periods=3).asfreq('H', how='start')
tm.assert_numpy_array_equal(idx.get_indexer(idx),
np.array([0, 1, 2], dtype=np.intp))
target = pd.PeriodIndex(['1999-12-31T23', '2000-01-01T12',
'2000-01-02T01'], freq='H')
tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'),
np.array([-1, 0, 1], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'),
np.array([0, 1, 2], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'),
np.array([0, 1, 1], dtype=np.intp))
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest',
tolerance='1 hour'),
np.array([0, -1, 1], dtype=np.intp))
msg = 'Input has different freq from PeriodIndex\\(freq=H\\)'
with tm.assert_raises_regex(ValueError, msg):
idx.get_indexer(target, 'nearest', tolerance='1 minute')
tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest',
tolerance='1 day'),
np.array([0, 1, 1], dtype=np.intp))
tol_raw = [pd.Timedelta('1 hour'),
pd.Timedelta('1 hour'),
np.timedelta64(1, 'D'), ]
tm.assert_numpy_array_equal(
idx.get_indexer(target, 'nearest',
tolerance=[np.timedelta64(x) for x in tol_raw]),
np.array([0, -1, 1], dtype=np.intp))
tol_bad = [pd.Timedelta('2 hour').to_timedelta64(),
pd.Timedelta('1 hour').to_timedelta64(),
np.timedelta64(1, 'M'), ]
with pytest.raises(
libperiod.IncompatibleFrequency,
match='Input has different freq from'):
idx.get_indexer(target, 'nearest', tolerance=tol_bad)
def test_indexing(self):
# GH 4390, iat incorrectly indexing
index = period_range('1/1/2001', periods=10)
s = Series(np.random.randn(10), index=index)
expected = s[index[0]]
result = s.iat[0]
assert expected == result
def test_period_index_indexer(self):
# GH4125
idx = pd.period_range('2002-01', '2003-12', freq='M')
df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx)
tm.assert_frame_equal(df, df.loc[idx])
tm.assert_frame_equal(df, df.loc[list(idx)])
tm.assert_frame_equal(df, df.loc[list(idx)])
tm.assert_frame_equal(df.iloc[0:5], df.loc[idx[0:5]])
tm.assert_frame_equal(df, df.loc[list(idx)])