401 lines
15 KiB
Python
401 lines
15 KiB
Python
import pytest
|
|
|
|
import numpy as np
|
|
from datetime import timedelta
|
|
|
|
import pandas as pd
|
|
import pandas.util.testing as tm
|
|
from pandas import to_timedelta
|
|
from pandas import (Series, Timedelta, Timestamp, TimedeltaIndex,
|
|
timedelta_range,
|
|
_np_version_under1p10)
|
|
from pandas._libs.tslib import iNaT
|
|
from pandas.tests.test_base import Ops
|
|
from pandas.tseries.offsets import Day, Hour
|
|
from pandas.core.dtypes.generic import ABCDateOffset
|
|
|
|
|
|
class TestTimedeltaIndexOps(Ops):
|
|
def setup_method(self, method):
|
|
super(TestTimedeltaIndexOps, self).setup_method(method)
|
|
mask = lambda x: isinstance(x, TimedeltaIndex)
|
|
self.is_valid_objs = [o for o in self.objs if mask(o)]
|
|
self.not_valid_objs = []
|
|
|
|
def test_ops_properties(self):
|
|
f = lambda x: isinstance(x, TimedeltaIndex)
|
|
self.check_ops_properties(TimedeltaIndex._field_ops, f)
|
|
self.check_ops_properties(TimedeltaIndex._object_ops, f)
|
|
|
|
def test_minmax(self):
|
|
|
|
# monotonic
|
|
idx1 = TimedeltaIndex(['1 days', '2 days', '3 days'])
|
|
assert idx1.is_monotonic
|
|
|
|
# non-monotonic
|
|
idx2 = TimedeltaIndex(['1 days', np.nan, '3 days', 'NaT'])
|
|
assert not idx2.is_monotonic
|
|
|
|
for idx in [idx1, idx2]:
|
|
assert idx.min() == Timedelta('1 days')
|
|
assert idx.max() == Timedelta('3 days')
|
|
assert idx.argmin() == 0
|
|
assert idx.argmax() == 2
|
|
|
|
for op in ['min', 'max']:
|
|
# Return NaT
|
|
obj = TimedeltaIndex([])
|
|
assert pd.isna(getattr(obj, op)())
|
|
|
|
obj = TimedeltaIndex([pd.NaT])
|
|
assert pd.isna(getattr(obj, op)())
|
|
|
|
obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT])
|
|
assert pd.isna(getattr(obj, op)())
|
|
|
|
def test_numpy_minmax(self):
|
|
dr = pd.date_range(start='2016-01-15', end='2016-01-20')
|
|
td = TimedeltaIndex(np.asarray(dr))
|
|
|
|
assert np.min(td) == Timedelta('16815 days')
|
|
assert np.max(td) == Timedelta('16820 days')
|
|
|
|
errmsg = "the 'out' parameter is not supported"
|
|
tm.assert_raises_regex(ValueError, errmsg, np.min, td, out=0)
|
|
tm.assert_raises_regex(ValueError, errmsg, np.max, td, out=0)
|
|
|
|
assert np.argmin(td) == 0
|
|
assert np.argmax(td) == 5
|
|
|
|
if not _np_version_under1p10:
|
|
errmsg = "the 'out' parameter is not supported"
|
|
tm.assert_raises_regex(
|
|
ValueError, errmsg, np.argmin, td, out=0)
|
|
tm.assert_raises_regex(
|
|
ValueError, errmsg, np.argmax, td, out=0)
|
|
|
|
def test_value_counts_unique(self):
|
|
# GH 7735
|
|
|
|
idx = timedelta_range('1 days 09:00:00', freq='H', periods=10)
|
|
# create repeated values, 'n'th element is repeated by n+1 times
|
|
idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))
|
|
|
|
exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10)
|
|
expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64')
|
|
|
|
for obj in [idx, Series(idx)]:
|
|
tm.assert_series_equal(obj.value_counts(), expected)
|
|
|
|
expected = timedelta_range('1 days 09:00:00', freq='H', periods=10)
|
|
tm.assert_index_equal(idx.unique(), expected)
|
|
|
|
idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00',
|
|
'1 days 09:00:00', '1 days 08:00:00',
|
|
'1 days 08:00:00', pd.NaT])
|
|
|
|
exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00'])
|
|
expected = Series([3, 2], index=exp_idx)
|
|
|
|
for obj in [idx, Series(idx)]:
|
|
tm.assert_series_equal(obj.value_counts(), expected)
|
|
|
|
exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00',
|
|
pd.NaT])
|
|
expected = Series([3, 2, 1], index=exp_idx)
|
|
|
|
for obj in [idx, Series(idx)]:
|
|
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
|
|
|
tm.assert_index_equal(idx.unique(), exp_idx)
|
|
|
|
def test_nonunique_contains(self):
|
|
# GH 9512
|
|
for idx in map(TimedeltaIndex, ([0, 1, 0], [0, 0, -1], [0, -1, -1],
|
|
['00:01:00', '00:01:00', '00:02:00'],
|
|
['00:01:00', '00:01:00', '00:00:01'])):
|
|
assert idx[0] in idx
|
|
|
|
def test_unknown_attribute(self):
|
|
# see gh-9680
|
|
tdi = pd.timedelta_range(start=0, periods=10, freq='1s')
|
|
ts = pd.Series(np.random.normal(size=10), index=tdi)
|
|
assert 'foo' not in ts.__dict__.keys()
|
|
pytest.raises(AttributeError, lambda: ts.foo)
|
|
|
|
def test_order(self):
|
|
# GH 10295
|
|
idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D',
|
|
name='idx')
|
|
idx2 = TimedeltaIndex(
|
|
['1 hour', '2 hour', '3 hour'], freq='H', name='idx')
|
|
|
|
for idx in [idx1, idx2]:
|
|
ordered = idx.sort_values()
|
|
tm.assert_index_equal(ordered, idx)
|
|
assert ordered.freq == idx.freq
|
|
|
|
ordered = idx.sort_values(ascending=False)
|
|
expected = idx[::-1]
|
|
tm.assert_index_equal(ordered, expected)
|
|
assert ordered.freq == expected.freq
|
|
assert ordered.freq.n == -1
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True)
|
|
tm.assert_index_equal(ordered, idx)
|
|
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]),
|
|
check_dtype=False)
|
|
assert ordered.freq == idx.freq
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True,
|
|
ascending=False)
|
|
tm.assert_index_equal(ordered, idx[::-1])
|
|
assert ordered.freq == expected.freq
|
|
assert ordered.freq.n == -1
|
|
|
|
idx1 = TimedeltaIndex(['1 hour', '3 hour', '5 hour',
|
|
'2 hour ', '1 hour'], name='idx1')
|
|
exp1 = TimedeltaIndex(['1 hour', '1 hour', '2 hour',
|
|
'3 hour', '5 hour'], name='idx1')
|
|
|
|
idx2 = TimedeltaIndex(['1 day', '3 day', '5 day',
|
|
'2 day', '1 day'], name='idx2')
|
|
|
|
# TODO(wesm): unused?
|
|
# exp2 = TimedeltaIndex(['1 day', '1 day', '2 day',
|
|
# '3 day', '5 day'], name='idx2')
|
|
|
|
# idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute',
|
|
# '2 minute', pd.NaT], name='idx3')
|
|
# exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute',
|
|
# '5 minute'], name='idx3')
|
|
|
|
for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
|
|
ordered = idx.sort_values()
|
|
tm.assert_index_equal(ordered, expected)
|
|
assert ordered.freq is None
|
|
|
|
ordered = idx.sort_values(ascending=False)
|
|
tm.assert_index_equal(ordered, expected[::-1])
|
|
assert ordered.freq is None
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True)
|
|
tm.assert_index_equal(ordered, expected)
|
|
|
|
exp = np.array([0, 4, 3, 1, 2])
|
|
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
|
assert ordered.freq is None
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True,
|
|
ascending=False)
|
|
tm.assert_index_equal(ordered, expected[::-1])
|
|
|
|
exp = np.array([2, 1, 3, 4, 0])
|
|
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
|
assert ordered.freq is None
|
|
|
|
def test_drop_duplicates_metadata(self):
|
|
# GH 10115
|
|
idx = pd.timedelta_range('1 day', '31 day', freq='D', name='idx')
|
|
result = idx.drop_duplicates()
|
|
tm.assert_index_equal(idx, result)
|
|
assert idx.freq == result.freq
|
|
|
|
idx_dup = idx.append(idx)
|
|
assert idx_dup.freq is None # freq is reset
|
|
result = idx_dup.drop_duplicates()
|
|
tm.assert_index_equal(idx, result)
|
|
assert result.freq is None
|
|
|
|
def test_drop_duplicates(self):
|
|
# to check Index/Series compat
|
|
base = pd.timedelta_range('1 day', '31 day', freq='D', name='idx')
|
|
idx = base.append(base[:5])
|
|
|
|
res = idx.drop_duplicates()
|
|
tm.assert_index_equal(res, base)
|
|
res = Series(idx).drop_duplicates()
|
|
tm.assert_series_equal(res, Series(base))
|
|
|
|
res = idx.drop_duplicates(keep='last')
|
|
exp = base[5:].append(base[:5])
|
|
tm.assert_index_equal(res, exp)
|
|
res = Series(idx).drop_duplicates(keep='last')
|
|
tm.assert_series_equal(res, Series(exp, index=np.arange(5, 36)))
|
|
|
|
res = idx.drop_duplicates(keep=False)
|
|
tm.assert_index_equal(res, base[5:])
|
|
res = Series(idx).drop_duplicates(keep=False)
|
|
tm.assert_series_equal(res, Series(base[5:], index=np.arange(5, 31)))
|
|
|
|
@pytest.mark.parametrize('freq', ['D', '3D', '-3D',
|
|
'H', '2H', '-2H',
|
|
'T', '2T', 'S', '-3S'])
|
|
def test_infer_freq(self, freq):
|
|
# GH#11018
|
|
idx = pd.timedelta_range('1', freq=freq, periods=10)
|
|
result = pd.TimedeltaIndex(idx.asi8, freq='infer')
|
|
tm.assert_index_equal(idx, result)
|
|
assert result.freq == freq
|
|
|
|
def test_nat_new(self):
|
|
|
|
idx = pd.timedelta_range('1', freq='D', periods=5, name='x')
|
|
result = idx._nat_new()
|
|
exp = pd.TimedeltaIndex([pd.NaT] * 5, name='x')
|
|
tm.assert_index_equal(result, exp)
|
|
|
|
result = idx._nat_new(box=False)
|
|
exp = np.array([iNaT] * 5, dtype=np.int64)
|
|
tm.assert_numpy_array_equal(result, exp)
|
|
|
|
def test_shift(self):
|
|
pass # handled in test_arithmetic.py
|
|
|
|
def test_repeat(self):
|
|
index = pd.timedelta_range('1 days', periods=2, freq='D')
|
|
exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days'])
|
|
for res in [index.repeat(2), np.repeat(index, 2)]:
|
|
tm.assert_index_equal(res, exp)
|
|
assert res.freq is None
|
|
|
|
index = TimedeltaIndex(['1 days', 'NaT', '3 days'])
|
|
exp = TimedeltaIndex(['1 days', '1 days', '1 days',
|
|
'NaT', 'NaT', 'NaT',
|
|
'3 days', '3 days', '3 days'])
|
|
for res in [index.repeat(3), np.repeat(index, 3)]:
|
|
tm.assert_index_equal(res, exp)
|
|
assert res.freq is None
|
|
|
|
def test_nat(self):
|
|
assert pd.TimedeltaIndex._na_value is pd.NaT
|
|
assert pd.TimedeltaIndex([])._na_value is pd.NaT
|
|
|
|
idx = pd.TimedeltaIndex(['1 days', '2 days'])
|
|
assert idx._can_hold_na
|
|
|
|
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
|
assert not idx.hasnans
|
|
tm.assert_numpy_array_equal(idx._nan_idxs,
|
|
np.array([], dtype=np.intp))
|
|
|
|
idx = pd.TimedeltaIndex(['1 days', 'NaT'])
|
|
assert idx._can_hold_na
|
|
|
|
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
|
assert idx.hasnans
|
|
tm.assert_numpy_array_equal(idx._nan_idxs,
|
|
np.array([1], dtype=np.intp))
|
|
|
|
def test_equals(self):
|
|
# GH 13107
|
|
idx = pd.TimedeltaIndex(['1 days', '2 days', 'NaT'])
|
|
assert idx.equals(idx)
|
|
assert idx.equals(idx.copy())
|
|
assert idx.equals(idx.astype(object))
|
|
assert idx.astype(object).equals(idx)
|
|
assert idx.astype(object).equals(idx.astype(object))
|
|
assert not idx.equals(list(idx))
|
|
assert not idx.equals(pd.Series(idx))
|
|
|
|
idx2 = pd.TimedeltaIndex(['2 days', '1 days', 'NaT'])
|
|
assert not idx.equals(idx2)
|
|
assert not idx.equals(idx2.copy())
|
|
assert not idx.equals(idx2.astype(object))
|
|
assert not idx.astype(object).equals(idx2)
|
|
assert not idx.astype(object).equals(idx2.astype(object))
|
|
assert not idx.equals(list(idx2))
|
|
assert not idx.equals(pd.Series(idx2))
|
|
|
|
@pytest.mark.parametrize('values', [['0 days', '2 days', '4 days'], []])
|
|
@pytest.mark.parametrize('freq', ['2D', Day(2), '48H', Hour(48)])
|
|
def test_freq_setter(self, values, freq):
|
|
# GH 20678
|
|
idx = TimedeltaIndex(values)
|
|
|
|
# can set to an offset, converting from string if necessary
|
|
idx.freq = freq
|
|
assert idx.freq == freq
|
|
assert isinstance(idx.freq, ABCDateOffset)
|
|
|
|
# can reset to None
|
|
idx.freq = None
|
|
assert idx.freq is None
|
|
|
|
def test_freq_setter_errors(self):
|
|
# GH 20678
|
|
idx = TimedeltaIndex(['0 days', '2 days', '4 days'])
|
|
|
|
# setting with an incompatible freq
|
|
msg = ('Inferred frequency 2D from passed values does not conform to '
|
|
'passed frequency 5D')
|
|
with tm.assert_raises_regex(ValueError, msg):
|
|
idx.freq = '5D'
|
|
|
|
# setting with a non-fixed frequency
|
|
msg = '<2 \* BusinessDays> is a non-fixed frequency'
|
|
with tm.assert_raises_regex(ValueError, msg):
|
|
idx.freq = '2B'
|
|
|
|
# setting with non-freq string
|
|
with tm.assert_raises_regex(ValueError, 'Invalid frequency'):
|
|
idx.freq = 'foo'
|
|
|
|
|
|
class TestTimedeltas(object):
|
|
|
|
def test_timedelta_ops(self):
|
|
# GH4984
|
|
# make sure ops return Timedelta
|
|
s = Series([Timestamp('20130101') + timedelta(seconds=i * i)
|
|
for i in range(10)])
|
|
td = s.diff()
|
|
|
|
result = td.mean()
|
|
expected = to_timedelta(timedelta(seconds=9))
|
|
assert result == expected
|
|
|
|
result = td.to_frame().mean()
|
|
assert result[0] == expected
|
|
|
|
result = td.quantile(.1)
|
|
expected = Timedelta(np.timedelta64(2600, 'ms'))
|
|
assert result == expected
|
|
|
|
result = td.median()
|
|
expected = to_timedelta('00:00:09')
|
|
assert result == expected
|
|
|
|
result = td.to_frame().median()
|
|
assert result[0] == expected
|
|
|
|
# GH 6462
|
|
# consistency in returned values for sum
|
|
result = td.sum()
|
|
expected = to_timedelta('00:01:21')
|
|
assert result == expected
|
|
|
|
result = td.to_frame().sum()
|
|
assert result[0] == expected
|
|
|
|
# std
|
|
result = td.std()
|
|
expected = to_timedelta(Series(td.dropna().values).std())
|
|
assert result == expected
|
|
|
|
result = td.to_frame().std()
|
|
assert result[0] == expected
|
|
|
|
# invalid ops
|
|
for op in ['skew', 'kurt', 'sem', 'prod']:
|
|
pytest.raises(TypeError, getattr(td, op))
|
|
|
|
# GH 10040
|
|
# make sure NaT is properly handled by median()
|
|
s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07')])
|
|
assert s.diff().median() == timedelta(days=4)
|
|
|
|
s = Series([Timestamp('2015-02-03'), Timestamp('2015-02-07'),
|
|
Timestamp('2015-02-15')])
|
|
assert s.diff().median() == timedelta(days=6)
|