laywerrobot/lib/python3.6/site-packages/pandas/tests/indexes/period/test_period.py

547 lines
19 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
import pytest
import numpy as np
import pandas as pd
import pandas.util._test_decorators as td
from pandas.util import testing as tm
from pandas import (PeriodIndex, period_range, DatetimeIndex, NaT,
Index, Period, Series, DataFrame, date_range,
offsets)
from ..datetimelike import DatetimeLike
class TestPeriodIndex(DatetimeLike):
_holder = PeriodIndex
def setup_method(self, method):
self.indices = dict(index=tm.makePeriodIndex(10),
index_dec=period_range('20130101', periods=10,
freq='D')[::-1])
self.setup_indices()
def create_index(self):
return period_range('20130101', periods=5, freq='D')
def test_pickle_compat_construction(self):
pass
@pytest.mark.parametrize('freq', ['D', 'M', 'A'])
def test_pickle_round_trip(self, freq):
idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq=freq)
result = tm.round_trip_pickle(idx)
tm.assert_index_equal(result, idx)
def test_where(self):
# This is handled in test_indexing
pass
def test_repeat(self):
# GH10183
idx = pd.period_range('2000-01-01', periods=3, freq='D')
res = idx.repeat(3)
exp = PeriodIndex(idx.values.repeat(3), freq='D')
tm.assert_index_equal(res, exp)
assert res.freqstr == 'D'
def test_fillna_period(self):
# GH 11343
idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT,
'2011-01-01 11:00'], freq='H')
exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00',
'2011-01-01 11:00'], freq='H')
tm.assert_index_equal(
idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp)
exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x',
pd.Period('2011-01-01 11:00', freq='H')], dtype=object)
tm.assert_index_equal(idx.fillna('x'), exp)
exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'),
pd.Period('2011-01-01', freq='D'),
pd.Period('2011-01-01 11:00', freq='H')], dtype=object)
tm.assert_index_equal(idx.fillna(
pd.Period('2011-01-01', freq='D')), exp)
def test_no_millisecond_field(self):
with pytest.raises(AttributeError):
DatetimeIndex.millisecond
with pytest.raises(AttributeError):
DatetimeIndex([]).millisecond
def test_difference_freq(self):
# GH14323: difference of Period MUST preserve frequency
# but the ability to union results must be preserved
index = period_range("20160920", "20160925", freq="D")
other = period_range("20160921", "20160924", freq="D")
expected = PeriodIndex(["20160920", "20160925"], freq='D')
idx_diff = index.difference(other)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal('freq', idx_diff, expected)
other = period_range("20160922", "20160925", freq="D")
idx_diff = index.difference(other)
expected = PeriodIndex(["20160920", "20160921"], freq='D')
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal('freq', idx_diff, expected)
def test_hash_error(self):
index = period_range('20010101', periods=10)
with tm.assert_raises_regex(TypeError, "unhashable type: %r" %
type(index).__name__):
hash(index)
def test_make_time_series(self):
index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
series = Series(1, index=index)
assert isinstance(series, Series)
def test_shallow_copy_empty(self):
# GH13067
idx = PeriodIndex([], freq='M')
result = idx._shallow_copy()
expected = idx
tm.assert_index_equal(result, expected)
def test_dtype_str(self):
pi = pd.PeriodIndex([], freq='M')
assert pi.dtype_str == 'period[M]'
assert pi.dtype_str == str(pi.dtype)
pi = pd.PeriodIndex([], freq='3M')
assert pi.dtype_str == 'period[3M]'
assert pi.dtype_str == str(pi.dtype)
def test_view_asi8(self):
idx = pd.PeriodIndex([], freq='M')
exp = np.array([], dtype=np.int64)
tm.assert_numpy_array_equal(idx.view('i8'), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)
idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M')
exp = np.array([492, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx.view('i8'), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D')
tm.assert_numpy_array_equal(idx.view('i8'), exp)
tm.assert_numpy_array_equal(idx.asi8, exp)
def test_values(self):
idx = pd.PeriodIndex([], freq='M')
exp = np.array([], dtype=np.object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.get_values(), exp)
exp = np.array([], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M')
exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.get_values(), exp)
exp = np.array([492, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D')
exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT],
dtype=object)
tm.assert_numpy_array_equal(idx.values, exp)
tm.assert_numpy_array_equal(idx.get_values(), exp)
exp = np.array([14975, -9223372036854775808], dtype=np.int64)
tm.assert_numpy_array_equal(idx._ndarray_values, exp)
def test_period_index_length(self):
pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
assert len(pi) == 9
pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009')
assert len(pi) == 4 * 9
pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009')
assert len(pi) == 12 * 9
start = Period('02-Apr-2005', 'B')
i1 = PeriodIndex(start=start, periods=20)
assert len(i1) == 20
assert i1.freq == start.freq
assert i1[0] == start
end_intv = Period('2006-12-31', 'W')
i1 = PeriodIndex(end=end_intv, periods=10)
assert len(i1) == 10
assert i1.freq == end_intv.freq
assert i1[-1] == end_intv
end_intv = Period('2006-12-31', '1w')
i2 = PeriodIndex(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
assert i1.freq == i2.freq
end_intv = Period('2006-12-31', ('w', 1))
i2 = PeriodIndex(end=end_intv, periods=10)
assert len(i1) == len(i2)
assert (i1 == i2).all()
assert i1.freq == i2.freq
try:
PeriodIndex(start=start, end=end_intv)
raise AssertionError('Cannot allow mixed freq for start and end')
except ValueError:
pass
end_intv = Period('2005-05-01', 'B')
i1 = PeriodIndex(start=start, end=end_intv)
try:
PeriodIndex(start=start)
raise AssertionError(
'Must specify periods if missing start or end')
except ValueError:
pass
# infer freq from first element
i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')])
assert len(i2) == 2
assert i2[0] == end_intv
i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')]))
assert len(i2) == 2
assert i2[0] == end_intv
# Mixed freq should fail
vals = [end_intv, Period('2006-12-31', 'w')]
pytest.raises(ValueError, PeriodIndex, vals)
vals = np.array(vals)
pytest.raises(ValueError, PeriodIndex, vals)
def test_fields(self):
# year, month, day, hour, minute
# second, weekofyear, week, dayofweek, weekday, dayofyear, quarter
# qyear
pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2005')
self._check_all_fields(pi)
pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2002')
self._check_all_fields(pi)
pi = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2002')
self._check_all_fields(pi)
pi = PeriodIndex(freq='D', start='12/1/2001', end='6/1/2001')
self._check_all_fields(pi)
pi = PeriodIndex(freq='B', start='12/1/2001', end='6/1/2001')
self._check_all_fields(pi)
pi = PeriodIndex(freq='H', start='12/31/2001', end='1/1/2002 23:00')
self._check_all_fields(pi)
pi = PeriodIndex(freq='Min', start='12/31/2001', end='1/1/2002 00:20')
self._check_all_fields(pi)
pi = PeriodIndex(freq='S', start='12/31/2001 00:00:00',
end='12/31/2001 00:05:00')
self._check_all_fields(pi)
end_intv = Period('2006-12-31', 'W')
i1 = PeriodIndex(end=end_intv, periods=10)
self._check_all_fields(i1)
def _check_all_fields(self, periodindex):
fields = ['year', 'month', 'day', 'hour', 'minute', 'second',
'weekofyear', 'week', 'dayofweek', 'dayofyear',
'quarter', 'qyear', 'days_in_month']
periods = list(periodindex)
s = pd.Series(periodindex)
for field in fields:
field_idx = getattr(periodindex, field)
assert len(periodindex) == len(field_idx)
for x, val in zip(periods, field_idx):
assert getattr(x, field) == val
if len(s) == 0:
continue
field_s = getattr(s.dt, field)
assert len(periodindex) == len(field_s)
for x, val in zip(periods, field_s):
assert getattr(x, field) == val
def test_period_set_index_reindex(self):
# GH 6631
df = DataFrame(np.random.random(6))
idx1 = period_range('2011/01/01', periods=6, freq='M')
idx2 = period_range('2013', periods=6, freq='A')
df = df.set_index(idx1)
tm.assert_index_equal(df.index, idx1)
df = df.set_index(idx2)
tm.assert_index_equal(df.index, idx2)
def test_factorize(self):
idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
'2014-03', '2014-03'], freq='M')
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
arr, idx = idx1.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
arr, idx = idx1.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
idx2 = pd.PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01',
'2014-03', '2014-01'], freq='M')
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
arr, idx = idx2.factorize(sort=True)
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
exp_idx = PeriodIndex(['2014-03', '2014-02', '2014-01'], freq='M')
arr, idx = idx2.factorize()
tm.assert_numpy_array_equal(arr, exp_arr)
tm.assert_index_equal(idx, exp_idx)
def test_is_(self):
create_index = lambda: PeriodIndex(freq='A', start='1/1/2001',
end='12/1/2009')
index = create_index()
assert index.is_(index)
assert not index.is_(create_index())
assert index.is_(index.view())
assert index.is_(index.view().view().view().view().view())
assert index.view().is_(index)
ind2 = index.view()
index.name = "Apple"
assert ind2.is_(index)
assert not index.is_(index[:])
assert not index.is_(index.asfreq('M'))
assert not index.is_(index.asfreq('A'))
assert not index.is_(index - 2)
assert not index.is_(index - 0)
def test_contains(self):
rng = period_range('2007-01', freq='M', periods=10)
assert Period('2007-01', freq='M') in rng
assert not Period('2007-01', freq='D') in rng
assert not Period('2007-01', freq='2M') in rng
def test_contains_nat(self):
# see gh-13582
idx = period_range('2007-01', freq='M', periods=10)
assert pd.NaT not in idx
assert None not in idx
assert float('nan') not in idx
assert np.nan not in idx
idx = pd.PeriodIndex(['2011-01', 'NaT', '2011-02'], freq='M')
assert pd.NaT in idx
assert None in idx
assert float('nan') in idx
assert np.nan in idx
def test_periods_number_check(self):
with pytest.raises(ValueError):
period_range('2011-1-1', '2012-1-1', 'B')
def test_index_duplicate_periods(self):
# monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN')
ts = Series(np.random.randn(len(idx)), index=idx)
result = ts[2007]
expected = ts[1:3]
tm.assert_series_equal(result, expected)
result[:] = 1
assert (ts[1:3] == 1).all()
# not monotonic
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN')
ts = Series(np.random.randn(len(idx)), index=idx)
result = ts[2007]
expected = ts[idx == 2007]
tm.assert_series_equal(result, expected)
def test_index_unique(self):
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq='A-JUN')
expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN')
tm.assert_index_equal(idx.unique(), expected)
assert idx.nunique() == 3
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq='A-JUN',
tz='US/Eastern')
expected = PeriodIndex([2000, 2007, 2009], freq='A-JUN',
tz='US/Eastern')
tm.assert_index_equal(idx.unique(), expected)
assert idx.nunique() == 3
def test_shift(self):
# This is tested in test_arithmetic
pass
@td.skip_if_32bit
def test_ndarray_compat_properties(self):
super(TestPeriodIndex, self).test_ndarray_compat_properties()
def test_negative_ordinals(self):
Period(ordinal=-1000, freq='A')
Period(ordinal=0, freq='A')
idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq='A')
idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq='A')
tm.assert_index_equal(idx1, idx2)
def test_pindex_fieldaccessor_nat(self):
idx = PeriodIndex(['2011-01', '2011-02', 'NaT',
'2012-03', '2012-04'], freq='D', name='name')
exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name='name')
tm.assert_index_equal(idx.year, exp)
exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name='name')
tm.assert_index_equal(idx.month, exp)
def test_pindex_qaccess(self):
pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q')
s = Series(np.random.rand(len(pi)), index=pi).cumsum()
# Todo: fix these accessors!
assert s['05Q4'] == s[2]
def test_numpy_repeat(self):
index = period_range('20010101', periods=2)
expected = PeriodIndex([Period('2001-01-01'), Period('2001-01-01'),
Period('2001-01-02'), Period('2001-01-02')])
tm.assert_index_equal(np.repeat(index, 2), expected)
msg = "the 'axis' parameter is not supported"
tm.assert_raises_regex(
ValueError, msg, np.repeat, index, 2, axis=1)
def test_pindex_multiples(self):
pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M')
expected = PeriodIndex(['2011-01', '2011-03', '2011-05', '2011-07',
'2011-09', '2011-11'], freq='2M')
tm.assert_index_equal(pi, expected)
assert pi.freq == offsets.MonthEnd(2)
assert pi.freqstr == '2M'
pi = period_range(start='1/1/11', end='12/31/11', freq='2M')
tm.assert_index_equal(pi, expected)
assert pi.freq == offsets.MonthEnd(2)
assert pi.freqstr == '2M'
pi = period_range(start='1/1/11', periods=6, freq='2M')
tm.assert_index_equal(pi, expected)
assert pi.freq == offsets.MonthEnd(2)
assert pi.freqstr == '2M'
def test_iteration(self):
index = PeriodIndex(start='1/1/10', periods=4, freq='B')
result = list(index)
assert isinstance(result[0], Period)
assert result[0].freq == index.freq
def test_is_full(self):
index = PeriodIndex([2005, 2007, 2009], freq='A')
assert not index.is_full
index = PeriodIndex([2005, 2006, 2007], freq='A')
assert index.is_full
index = PeriodIndex([2005, 2005, 2007], freq='A')
assert not index.is_full
index = PeriodIndex([2005, 2005, 2006], freq='A')
assert index.is_full
index = PeriodIndex([2006, 2005, 2005], freq='A')
pytest.raises(ValueError, getattr, index, 'is_full')
assert index[:0].is_full
def test_with_multi_index(self):
# #1705
index = date_range('1/1/2012', periods=4, freq='12H')
index_as_arrays = [index.to_period(freq='D'), index.hour]
s = Series([0, 1, 2, 3], index_as_arrays)
assert isinstance(s.index.levels[0], PeriodIndex)
assert isinstance(s.index.values[0][0], Period)
def test_convert_array_of_periods(self):
rng = period_range('1/1/2000', periods=20, freq='D')
periods = list(rng)
result = pd.Index(periods)
assert isinstance(result, PeriodIndex)
def test_append_concat(self):
# #1815
d1 = date_range('12/31/1990', '12/31/1999', freq='A-DEC')
d2 = date_range('12/31/2000', '12/31/2009', freq='A-DEC')
s1 = Series(np.random.randn(10), d1)
s2 = Series(np.random.randn(10), d2)
s1 = s1.to_period()
s2 = s2.to_period()
# drops index
result = pd.concat([s1, s2])
assert isinstance(result.index, PeriodIndex)
assert result.index[0] == s1.index[0]
def test_pickle_freq(self):
# GH2891
prng = period_range('1/1/2011', '1/1/2012', freq='M')
new_prng = tm.round_trip_pickle(prng)
assert new_prng.freq == offsets.MonthEnd()
assert new_prng.freqstr == 'M'
def test_map(self):
# test_map_dictlike generally tests
index = PeriodIndex([2005, 2007, 2009], freq='A')
result = index.map(lambda x: x.ordinal)
exp = Index([x.ordinal for x in index])
tm.assert_index_equal(result, exp)
def test_join_self(self, join_type):
index = period_range('1/1/2000', periods=10)
joined = index.join(index, how=join_type)
assert index is joined
def test_insert(self):
# GH 18295 (test missing)
expected = PeriodIndex(
['2017Q1', pd.NaT, '2017Q2', '2017Q3', '2017Q4'], freq='Q')
for na in (np.nan, pd.NaT, None):
result = period_range('2017Q1', periods=4, freq='Q').insert(1, na)
tm.assert_index_equal(result, expected)