1876 lines
68 KiB
Python
1876 lines
68 KiB
Python
|
# coding=utf-8
|
||
|
# pylint: disable-msg=E1101,W0612
|
||
|
|
||
|
import pytest
|
||
|
import pytz
|
||
|
|
||
|
from collections import Iterable
|
||
|
from datetime import datetime, timedelta
|
||
|
import operator
|
||
|
from itertools import product, starmap
|
||
|
|
||
|
from numpy import nan
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
|
||
|
from pandas import (Index, Series, DataFrame, isna, bdate_range,
|
||
|
NaT, date_range, timedelta_range, Categorical)
|
||
|
from pandas.core.indexes.datetimes import Timestamp
|
||
|
from pandas.core.indexes.timedeltas import Timedelta
|
||
|
import pandas.core.nanops as nanops
|
||
|
|
||
|
from pandas.errors import PerformanceWarning
|
||
|
from pandas.compat import range, zip
|
||
|
from pandas import compat
|
||
|
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
|
||
|
assert_frame_equal, assert_index_equal)
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
from .common import TestData
|
||
|
|
||
|
|
||
|
class TestSeriesComparisons(object):
|
||
|
def test_series_comparison_scalars(self):
|
||
|
series = Series(date_range('1/1/2000', periods=10))
|
||
|
|
||
|
val = datetime(2000, 1, 4)
|
||
|
result = series > val
|
||
|
expected = Series([x > val for x in series])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
val = series[5]
|
||
|
result = series > val
|
||
|
expected = Series([x > val for x in series])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_comparisons(self):
|
||
|
left = np.random.randn(10)
|
||
|
right = np.random.randn(10)
|
||
|
left[:3] = np.nan
|
||
|
|
||
|
result = nanops.nangt(left, right)
|
||
|
with np.errstate(invalid='ignore'):
|
||
|
expected = (left > right).astype('O')
|
||
|
expected[:3] = np.nan
|
||
|
|
||
|
assert_almost_equal(result, expected)
|
||
|
|
||
|
s = Series(['a', 'b', 'c'])
|
||
|
s2 = Series([False, True, False])
|
||
|
|
||
|
# it works!
|
||
|
exp = Series([False, False, False])
|
||
|
assert_series_equal(s == s2, exp)
|
||
|
assert_series_equal(s2 == s, exp)
|
||
|
|
||
|
def test_operator_series_comparison_zerorank(self):
|
||
|
# GH 13006
|
||
|
result = np.float64(0) > pd.Series([1, 2, 3])
|
||
|
expected = 0.0 > pd.Series([1, 2, 3])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
result = pd.Series([1, 2, 3]) < np.float64(0)
|
||
|
expected = pd.Series([1, 2, 3]) < 0.0
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
result = np.array([0, 1, 2])[0] > pd.Series([0, 1, 2])
|
||
|
expected = 0.0 > pd.Series([1, 2, 3])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_object_comparisons(self):
|
||
|
s = Series(['a', 'b', np.nan, 'c', 'a'])
|
||
|
|
||
|
result = s == 'a'
|
||
|
expected = Series([True, False, False, False, True])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = s < 'a'
|
||
|
expected = Series([False, False, False, False, False])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = s != 'a'
|
||
|
expected = -(s == 'a')
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_categorical_comparisons(self):
|
||
|
# GH 8938
|
||
|
# allow equality comparisons
|
||
|
a = Series(list('abc'), dtype="category")
|
||
|
b = Series(list('abc'), dtype="object")
|
||
|
c = Series(['a', 'b', 'cc'], dtype="object")
|
||
|
d = Series(list('acb'), dtype="object")
|
||
|
e = Categorical(list('abc'))
|
||
|
f = Categorical(list('acb'))
|
||
|
|
||
|
# vs scalar
|
||
|
assert not (a == 'a').all()
|
||
|
assert ((a != 'a') == ~(a == 'a')).all()
|
||
|
|
||
|
assert not ('a' == a).all()
|
||
|
assert (a == 'a')[0]
|
||
|
assert ('a' == a)[0]
|
||
|
assert not ('a' != a)[0]
|
||
|
|
||
|
# vs list-like
|
||
|
assert (a == a).all()
|
||
|
assert not (a != a).all()
|
||
|
|
||
|
assert (a == list(a)).all()
|
||
|
assert (a == b).all()
|
||
|
assert (b == a).all()
|
||
|
assert ((~(a == b)) == (a != b)).all()
|
||
|
assert ((~(b == a)) == (b != a)).all()
|
||
|
|
||
|
assert not (a == c).all()
|
||
|
assert not (c == a).all()
|
||
|
assert not (a == d).all()
|
||
|
assert not (d == a).all()
|
||
|
|
||
|
# vs a cat-like
|
||
|
assert (a == e).all()
|
||
|
assert (e == a).all()
|
||
|
assert not (a == f).all()
|
||
|
assert not (f == a).all()
|
||
|
|
||
|
assert ((~(a == e) == (a != e)).all())
|
||
|
assert ((~(e == a) == (e != a)).all())
|
||
|
assert ((~(a == f) == (a != f)).all())
|
||
|
assert ((~(f == a) == (f != a)).all())
|
||
|
|
||
|
# non-equality is not comparable
|
||
|
pytest.raises(TypeError, lambda: a < b)
|
||
|
pytest.raises(TypeError, lambda: b < a)
|
||
|
pytest.raises(TypeError, lambda: a > b)
|
||
|
pytest.raises(TypeError, lambda: b > a)
|
||
|
|
||
|
def test_comparison_tuples(self):
|
||
|
# GH11339
|
||
|
# comparisons vs tuple
|
||
|
s = Series([(1, 1), (1, 2)])
|
||
|
|
||
|
result = s == (1, 2)
|
||
|
expected = Series([False, True])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = s != (1, 2)
|
||
|
expected = Series([True, False])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = s == (0, 0)
|
||
|
expected = Series([False, False])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = s != (0, 0)
|
||
|
expected = Series([True, True])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
s = Series([(1, 1), (1, 1)])
|
||
|
|
||
|
result = s == (1, 1)
|
||
|
expected = Series([True, True])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = s != (1, 1)
|
||
|
expected = Series([False, False])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
s = Series([frozenset([1]), frozenset([1, 2])])
|
||
|
|
||
|
result = s == frozenset([1])
|
||
|
expected = Series([True, False])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_comparison_operators_with_nas(self):
|
||
|
ser = Series(bdate_range('1/1/2000', periods=10), dtype=object)
|
||
|
ser[::2] = np.nan
|
||
|
|
||
|
# test that comparisons work
|
||
|
ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne']
|
||
|
for op in ops:
|
||
|
val = ser[5]
|
||
|
|
||
|
f = getattr(operator, op)
|
||
|
result = f(ser, val)
|
||
|
|
||
|
expected = f(ser.dropna(), val).reindex(ser.index)
|
||
|
|
||
|
if op == 'ne':
|
||
|
expected = expected.fillna(True).astype(bool)
|
||
|
else:
|
||
|
expected = expected.fillna(False).astype(bool)
|
||
|
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# fffffffuuuuuuuuuuuu
|
||
|
# result = f(val, s)
|
||
|
# expected = f(val, s.dropna()).reindex(s.index)
|
||
|
# assert_series_equal(result, expected)
|
||
|
|
||
|
# boolean &, |, ^ should work with object arrays and propagate NAs
|
||
|
|
||
|
ops = ['and_', 'or_', 'xor']
|
||
|
mask = ser.isna()
|
||
|
for bool_op in ops:
|
||
|
func = getattr(operator, bool_op)
|
||
|
|
||
|
filled = ser.fillna(ser[0])
|
||
|
|
||
|
result = func(ser < ser[9], ser > ser[3])
|
||
|
|
||
|
expected = func(filled < filled[9], filled > filled[3])
|
||
|
expected[mask] = False
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_comparison_object_numeric_nas(self):
|
||
|
ser = Series(np.random.randn(10), dtype=object)
|
||
|
shifted = ser.shift(2)
|
||
|
|
||
|
ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne']
|
||
|
for op in ops:
|
||
|
func = getattr(operator, op)
|
||
|
|
||
|
result = func(ser, shifted)
|
||
|
expected = func(ser.astype(float), shifted.astype(float))
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_comparison_invalid(self):
|
||
|
# GH4968
|
||
|
# invalid date/int comparisons
|
||
|
s = Series(range(5))
|
||
|
s2 = Series(date_range('20010101', periods=5))
|
||
|
|
||
|
for (x, y) in [(s, s2), (s2, s)]:
|
||
|
pytest.raises(TypeError, lambda: x == y)
|
||
|
pytest.raises(TypeError, lambda: x != y)
|
||
|
pytest.raises(TypeError, lambda: x >= y)
|
||
|
pytest.raises(TypeError, lambda: x > y)
|
||
|
pytest.raises(TypeError, lambda: x < y)
|
||
|
pytest.raises(TypeError, lambda: x <= y)
|
||
|
|
||
|
def test_unequal_categorical_comparison_raises_type_error(self):
|
||
|
# unequal comparison should raise for unordered cats
|
||
|
cat = Series(Categorical(list("abc")))
|
||
|
|
||
|
def f():
|
||
|
cat > "b"
|
||
|
|
||
|
pytest.raises(TypeError, f)
|
||
|
cat = Series(Categorical(list("abc"), ordered=False))
|
||
|
|
||
|
def f():
|
||
|
cat > "b"
|
||
|
|
||
|
pytest.raises(TypeError, f)
|
||
|
|
||
|
# https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
|
||
|
# and following comparisons with scalars not in categories should raise
|
||
|
# for unequal comps, but not for equal/not equal
|
||
|
cat = Series(Categorical(list("abc"), ordered=True))
|
||
|
|
||
|
pytest.raises(TypeError, lambda: cat < "d")
|
||
|
pytest.raises(TypeError, lambda: cat > "d")
|
||
|
pytest.raises(TypeError, lambda: "d" < cat)
|
||
|
pytest.raises(TypeError, lambda: "d" > cat)
|
||
|
|
||
|
tm.assert_series_equal(cat == "d", Series([False, False, False]))
|
||
|
tm.assert_series_equal(cat != "d", Series([True, True, True]))
|
||
|
|
||
|
@pytest.mark.parametrize('dtype', [None, object])
|
||
|
def test_more_na_comparisons(self, dtype):
|
||
|
left = Series(['a', np.nan, 'c'], dtype=dtype)
|
||
|
right = Series(['a', np.nan, 'd'], dtype=dtype)
|
||
|
|
||
|
result = left == right
|
||
|
expected = Series([True, False, False])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = left != right
|
||
|
expected = Series([False, True, True])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = left == np.nan
|
||
|
expected = Series([False, False, False])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = left != np.nan
|
||
|
expected = Series([True, True, True])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize('pair', [
|
||
|
([pd.Timestamp('2011-01-01'), NaT, pd.Timestamp('2011-01-03')],
|
||
|
[NaT, NaT, pd.Timestamp('2011-01-03')]),
|
||
|
|
||
|
([pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')],
|
||
|
[NaT, NaT, pd.Timedelta('3 days')]),
|
||
|
|
||
|
([pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')],
|
||
|
[NaT, NaT, pd.Period('2011-03', freq='M')])])
|
||
|
@pytest.mark.parametrize('reverse', [True, False])
|
||
|
@pytest.mark.parametrize('box', [Series, Index])
|
||
|
@pytest.mark.parametrize('dtype', [None, object])
|
||
|
def test_nat_comparisons(self, dtype, box, reverse, pair):
|
||
|
l, r = pair
|
||
|
if reverse:
|
||
|
# add lhs / rhs switched data
|
||
|
l, r = r, l
|
||
|
|
||
|
left = Series(l, dtype=dtype)
|
||
|
right = box(r, dtype=dtype)
|
||
|
# Series, Index
|
||
|
|
||
|
expected = Series([False, False, True])
|
||
|
assert_series_equal(left == right, expected)
|
||
|
|
||
|
expected = Series([True, True, False])
|
||
|
assert_series_equal(left != right, expected)
|
||
|
|
||
|
expected = Series([False, False, False])
|
||
|
assert_series_equal(left < right, expected)
|
||
|
|
||
|
expected = Series([False, False, False])
|
||
|
assert_series_equal(left > right, expected)
|
||
|
|
||
|
expected = Series([False, False, True])
|
||
|
assert_series_equal(left >= right, expected)
|
||
|
|
||
|
expected = Series([False, False, True])
|
||
|
assert_series_equal(left <= right, expected)
|
||
|
|
||
|
@pytest.mark.parametrize('data', [
|
||
|
[pd.Timestamp('2011-01-01'), NaT, pd.Timestamp('2011-01-03')],
|
||
|
[pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')],
|
||
|
[pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')]
|
||
|
])
|
||
|
@pytest.mark.parametrize('dtype', [None, object])
|
||
|
def test_nat_comparisons_scalar(self, dtype, data):
|
||
|
left = Series(data, dtype=dtype)
|
||
|
|
||
|
expected = Series([False, False, False])
|
||
|
assert_series_equal(left == pd.NaT, expected)
|
||
|
assert_series_equal(pd.NaT == left, expected)
|
||
|
|
||
|
expected = Series([True, True, True])
|
||
|
assert_series_equal(left != pd.NaT, expected)
|
||
|
assert_series_equal(pd.NaT != left, expected)
|
||
|
|
||
|
expected = Series([False, False, False])
|
||
|
assert_series_equal(left < pd.NaT, expected)
|
||
|
assert_series_equal(pd.NaT > left, expected)
|
||
|
assert_series_equal(left <= pd.NaT, expected)
|
||
|
assert_series_equal(pd.NaT >= left, expected)
|
||
|
|
||
|
assert_series_equal(left > pd.NaT, expected)
|
||
|
assert_series_equal(pd.NaT < left, expected)
|
||
|
assert_series_equal(left >= pd.NaT, expected)
|
||
|
assert_series_equal(pd.NaT <= left, expected)
|
||
|
|
||
|
def test_comparison_different_length(self):
|
||
|
a = Series(['a', 'b', 'c'])
|
||
|
b = Series(['b', 'a'])
|
||
|
pytest.raises(ValueError, a.__lt__, b)
|
||
|
|
||
|
a = Series([1, 2])
|
||
|
b = Series([2, 3, 4])
|
||
|
pytest.raises(ValueError, a.__eq__, b)
|
||
|
|
||
|
def test_comparison_label_based(self):
|
||
|
|
||
|
# GH 4947
|
||
|
# comparisons should be label based
|
||
|
|
||
|
a = Series([True, False, True], list('bca'))
|
||
|
b = Series([False, True, False], list('abc'))
|
||
|
|
||
|
expected = Series([False, True, False], list('abc'))
|
||
|
result = a & b
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
expected = Series([True, True, False], list('abc'))
|
||
|
result = a | b
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
expected = Series([True, False, False], list('abc'))
|
||
|
result = a ^ b
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# rhs is bigger
|
||
|
a = Series([True, False, True], list('bca'))
|
||
|
b = Series([False, True, False, True], list('abcd'))
|
||
|
|
||
|
expected = Series([False, True, False, False], list('abcd'))
|
||
|
result = a & b
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
expected = Series([True, True, False, False], list('abcd'))
|
||
|
result = a | b
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# filling
|
||
|
|
||
|
# vs empty
|
||
|
result = a & Series([])
|
||
|
expected = Series([False, False, False], list('bca'))
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = a | Series([])
|
||
|
expected = Series([True, False, True], list('bca'))
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# vs non-matching
|
||
|
result = a & Series([1], ['z'])
|
||
|
expected = Series([False, False, False, False], list('abcz'))
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = a | Series([1], ['z'])
|
||
|
expected = Series([True, True, False, False], list('abcz'))
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# identity
|
||
|
# we would like s[s|e] == s to hold for any e, whether empty or not
|
||
|
for e in [Series([]), Series([1], ['z']),
|
||
|
Series(np.nan, b.index), Series(np.nan, a.index)]:
|
||
|
result = a[a | e]
|
||
|
assert_series_equal(result, a[a])
|
||
|
|
||
|
for e in [Series(['z'])]:
|
||
|
if compat.PY3:
|
||
|
with tm.assert_produces_warning(RuntimeWarning):
|
||
|
result = a[a | e]
|
||
|
else:
|
||
|
result = a[a | e]
|
||
|
assert_series_equal(result, a[a])
|
||
|
|
||
|
# vs scalars
|
||
|
index = list('bca')
|
||
|
t = Series([True, False, True])
|
||
|
|
||
|
for v in [True, 1, 2]:
|
||
|
result = Series([True, False, True], index=index) | v
|
||
|
expected = Series([True, True, True], index=index)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
for v in [np.nan, 'foo']:
|
||
|
pytest.raises(TypeError, lambda: t | v)
|
||
|
|
||
|
for v in [False, 0]:
|
||
|
result = Series([True, False, True], index=index) | v
|
||
|
expected = Series([True, False, True], index=index)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
for v in [True, 1]:
|
||
|
result = Series([True, False, True], index=index) & v
|
||
|
expected = Series([True, False, True], index=index)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
for v in [False, 0]:
|
||
|
result = Series([True, False, True], index=index) & v
|
||
|
expected = Series([False, False, False], index=index)
|
||
|
assert_series_equal(result, expected)
|
||
|
for v in [np.nan]:
|
||
|
pytest.raises(TypeError, lambda: t & v)
|
||
|
|
||
|
def test_comparison_flex_basic(self):
|
||
|
left = pd.Series(np.random.randn(10))
|
||
|
right = pd.Series(np.random.randn(10))
|
||
|
|
||
|
assert_series_equal(left.eq(right), left == right)
|
||
|
assert_series_equal(left.ne(right), left != right)
|
||
|
assert_series_equal(left.le(right), left < right)
|
||
|
assert_series_equal(left.lt(right), left <= right)
|
||
|
assert_series_equal(left.gt(right), left > right)
|
||
|
assert_series_equal(left.ge(right), left >= right)
|
||
|
|
||
|
# axis
|
||
|
for axis in [0, None, 'index']:
|
||
|
assert_series_equal(left.eq(right, axis=axis), left == right)
|
||
|
assert_series_equal(left.ne(right, axis=axis), left != right)
|
||
|
assert_series_equal(left.le(right, axis=axis), left < right)
|
||
|
assert_series_equal(left.lt(right, axis=axis), left <= right)
|
||
|
assert_series_equal(left.gt(right, axis=axis), left > right)
|
||
|
assert_series_equal(left.ge(right, axis=axis), left >= right)
|
||
|
|
||
|
#
|
||
|
msg = 'No axis named 1 for object type'
|
||
|
for op in ['eq', 'ne', 'le', 'le', 'gt', 'ge']:
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
getattr(left, op)(right, axis=1)
|
||
|
|
||
|
def test_comparison_flex_alignment(self):
|
||
|
left = Series([1, 3, 2], index=list('abc'))
|
||
|
right = Series([2, 2, 2], index=list('bcd'))
|
||
|
|
||
|
exp = pd.Series([False, False, True, False], index=list('abcd'))
|
||
|
assert_series_equal(left.eq(right), exp)
|
||
|
|
||
|
exp = pd.Series([True, True, False, True], index=list('abcd'))
|
||
|
assert_series_equal(left.ne(right), exp)
|
||
|
|
||
|
exp = pd.Series([False, False, True, False], index=list('abcd'))
|
||
|
assert_series_equal(left.le(right), exp)
|
||
|
|
||
|
exp = pd.Series([False, False, False, False], index=list('abcd'))
|
||
|
assert_series_equal(left.lt(right), exp)
|
||
|
|
||
|
exp = pd.Series([False, True, True, False], index=list('abcd'))
|
||
|
assert_series_equal(left.ge(right), exp)
|
||
|
|
||
|
exp = pd.Series([False, True, False, False], index=list('abcd'))
|
||
|
assert_series_equal(left.gt(right), exp)
|
||
|
|
||
|
def test_comparison_flex_alignment_fill(self):
|
||
|
left = Series([1, 3, 2], index=list('abc'))
|
||
|
right = Series([2, 2, 2], index=list('bcd'))
|
||
|
|
||
|
exp = pd.Series([False, False, True, True], index=list('abcd'))
|
||
|
assert_series_equal(left.eq(right, fill_value=2), exp)
|
||
|
|
||
|
exp = pd.Series([True, True, False, False], index=list('abcd'))
|
||
|
assert_series_equal(left.ne(right, fill_value=2), exp)
|
||
|
|
||
|
exp = pd.Series([False, False, True, True], index=list('abcd'))
|
||
|
assert_series_equal(left.le(right, fill_value=0), exp)
|
||
|
|
||
|
exp = pd.Series([False, False, False, True], index=list('abcd'))
|
||
|
assert_series_equal(left.lt(right, fill_value=0), exp)
|
||
|
|
||
|
exp = pd.Series([True, True, True, False], index=list('abcd'))
|
||
|
assert_series_equal(left.ge(right, fill_value=0), exp)
|
||
|
|
||
|
exp = pd.Series([True, True, False, False], index=list('abcd'))
|
||
|
assert_series_equal(left.gt(right, fill_value=0), exp)
|
||
|
|
||
|
def test_ne(self):
|
||
|
ts = Series([3, 4, 5, 6, 7], [3, 4, 5, 6, 7], dtype=float)
|
||
|
expected = [True, True, False, True, True]
|
||
|
assert tm.equalContents(ts.index != 5, expected)
|
||
|
assert tm.equalContents(~(ts.index == 5), expected)
|
||
|
|
||
|
def test_comp_ops_df_compat(self):
|
||
|
# GH 1134
|
||
|
s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x')
|
||
|
s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x')
|
||
|
|
||
|
s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x')
|
||
|
s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x')
|
||
|
|
||
|
for left, right in [(s1, s2), (s2, s1), (s3, s4), (s4, s3)]:
|
||
|
|
||
|
msg = "Can only compare identically-labeled Series objects"
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
left == right
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
left != right
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
left < right
|
||
|
|
||
|
msg = "Can only compare identically-labeled DataFrame objects"
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
left.to_frame() == right.to_frame()
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
left.to_frame() != right.to_frame()
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
left.to_frame() < right.to_frame()
|
||
|
|
||
|
|
||
|
class TestTimedeltaSeriesArithmetic(object):
|
||
|
|
||
|
def test_timedelta64_operations_with_DateOffset(self):
|
||
|
# GH 10699
|
||
|
td = Series([timedelta(minutes=5, seconds=3)] * 3)
|
||
|
result = td + pd.offsets.Minute(1)
|
||
|
expected = Series([timedelta(minutes=6, seconds=3)] * 3)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = td - pd.offsets.Minute(1)
|
||
|
expected = Series([timedelta(minutes=4, seconds=3)] * 3)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
with tm.assert_produces_warning(PerformanceWarning):
|
||
|
result = td + Series([pd.offsets.Minute(1), pd.offsets.Second(3),
|
||
|
pd.offsets.Hour(2)])
|
||
|
expected = Series([timedelta(minutes=6, seconds=3), timedelta(
|
||
|
minutes=5, seconds=6), timedelta(hours=2, minutes=5, seconds=3)])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = td + pd.offsets.Minute(1) + pd.offsets.Second(12)
|
||
|
expected = Series([timedelta(minutes=6, seconds=15)] * 3)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# valid DateOffsets
|
||
|
for do in ['Hour', 'Minute', 'Second', 'Day', 'Micro', 'Milli',
|
||
|
'Nano']:
|
||
|
op = getattr(pd.offsets, do)
|
||
|
td + op(5)
|
||
|
op(5) + td
|
||
|
td - op(5)
|
||
|
op(5) - td
|
||
|
|
||
|
def test_timedelta64_operations_with_timedeltas(self):
|
||
|
# td operate with td
|
||
|
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
|
||
|
td2 = timedelta(minutes=5, seconds=4)
|
||
|
result = td1 - td2
|
||
|
expected = (Series([timedelta(seconds=0)] * 3) -
|
||
|
Series([timedelta(seconds=1)] * 3))
|
||
|
assert result.dtype == 'm8[ns]'
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result2 = td2 - td1
|
||
|
expected = (Series([timedelta(seconds=1)] * 3) -
|
||
|
Series([timedelta(seconds=0)] * 3))
|
||
|
assert_series_equal(result2, expected)
|
||
|
|
||
|
# roundtrip
|
||
|
assert_series_equal(result + td2, td1)
|
||
|
|
||
|
# Now again, using pd.to_timedelta, which should build
|
||
|
# a Series or a scalar, depending on input.
|
||
|
td1 = Series(pd.to_timedelta(['00:05:03'] * 3))
|
||
|
td2 = pd.to_timedelta('00:05:04')
|
||
|
result = td1 - td2
|
||
|
expected = (Series([timedelta(seconds=0)] * 3) -
|
||
|
Series([timedelta(seconds=1)] * 3))
|
||
|
assert result.dtype == 'm8[ns]'
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result2 = td2 - td1
|
||
|
expected = (Series([timedelta(seconds=1)] * 3) -
|
||
|
Series([timedelta(seconds=0)] * 3))
|
||
|
assert_series_equal(result2, expected)
|
||
|
|
||
|
# roundtrip
|
||
|
assert_series_equal(result + td2, td1)
|
||
|
|
||
|
def test_operators_timedelta64(self):
|
||
|
# series ops
|
||
|
v1 = date_range('2012-1-1', periods=3, freq='D')
|
||
|
v2 = date_range('2012-1-2', periods=3, freq='D')
|
||
|
rs = Series(v2) - Series(v1)
|
||
|
xp = Series(1e9 * 3600 * 24,
|
||
|
rs.index).astype('int64').astype('timedelta64[ns]')
|
||
|
assert_series_equal(rs, xp)
|
||
|
assert rs.dtype == 'timedelta64[ns]'
|
||
|
|
||
|
df = DataFrame(dict(A=v1))
|
||
|
td = Series([timedelta(days=i) for i in range(3)])
|
||
|
assert td.dtype == 'timedelta64[ns]'
|
||
|
|
||
|
# series on the rhs
|
||
|
result = df['A'] - df['A'].shift()
|
||
|
assert result.dtype == 'timedelta64[ns]'
|
||
|
|
||
|
result = df['A'] + td
|
||
|
assert result.dtype == 'M8[ns]'
|
||
|
|
||
|
# scalar Timestamp on rhs
|
||
|
maxa = df['A'].max()
|
||
|
assert isinstance(maxa, Timestamp)
|
||
|
|
||
|
resultb = df['A'] - df['A'].max()
|
||
|
assert resultb.dtype == 'timedelta64[ns]'
|
||
|
|
||
|
# timestamp on lhs
|
||
|
result = resultb + df['A']
|
||
|
values = [Timestamp('20111230'), Timestamp('20120101'),
|
||
|
Timestamp('20120103')]
|
||
|
expected = Series(values, name='A')
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# datetimes on rhs
|
||
|
result = df['A'] - datetime(2001, 1, 1)
|
||
|
expected = Series(
|
||
|
[timedelta(days=4017 + i) for i in range(3)], name='A')
|
||
|
assert_series_equal(result, expected)
|
||
|
assert result.dtype == 'm8[ns]'
|
||
|
|
||
|
d = datetime(2001, 1, 1, 3, 4)
|
||
|
resulta = df['A'] - d
|
||
|
assert resulta.dtype == 'm8[ns]'
|
||
|
|
||
|
# roundtrip
|
||
|
resultb = resulta + d
|
||
|
assert_series_equal(df['A'], resultb)
|
||
|
|
||
|
# timedeltas on rhs
|
||
|
td = timedelta(days=1)
|
||
|
resulta = df['A'] + td
|
||
|
resultb = resulta - td
|
||
|
assert_series_equal(resultb, df['A'])
|
||
|
assert resultb.dtype == 'M8[ns]'
|
||
|
|
||
|
# roundtrip
|
||
|
td = timedelta(minutes=5, seconds=3)
|
||
|
resulta = df['A'] + td
|
||
|
resultb = resulta - td
|
||
|
assert_series_equal(df['A'], resultb)
|
||
|
assert resultb.dtype == 'M8[ns]'
|
||
|
|
||
|
# inplace
|
||
|
value = rs[2] + np.timedelta64(timedelta(minutes=5, seconds=1))
|
||
|
rs[2] += np.timedelta64(timedelta(minutes=5, seconds=1))
|
||
|
assert rs[2] == value
|
||
|
|
||
|
def test_timedelta64_ops_nat(self):
|
||
|
# GH 11349
|
||
|
timedelta_series = Series([NaT, Timedelta('1s')])
|
||
|
nat_series_dtype_timedelta = Series([NaT, NaT],
|
||
|
dtype='timedelta64[ns]')
|
||
|
single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]')
|
||
|
|
||
|
# subtraction
|
||
|
assert_series_equal(timedelta_series - NaT,
|
||
|
nat_series_dtype_timedelta)
|
||
|
assert_series_equal(-NaT + timedelta_series,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
assert_series_equal(timedelta_series - single_nat_dtype_timedelta,
|
||
|
nat_series_dtype_timedelta)
|
||
|
assert_series_equal(-single_nat_dtype_timedelta + timedelta_series,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
# addition
|
||
|
assert_series_equal(nat_series_dtype_timedelta + NaT,
|
||
|
nat_series_dtype_timedelta)
|
||
|
assert_series_equal(NaT + nat_series_dtype_timedelta,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
assert_series_equal(nat_series_dtype_timedelta +
|
||
|
single_nat_dtype_timedelta,
|
||
|
nat_series_dtype_timedelta)
|
||
|
assert_series_equal(single_nat_dtype_timedelta +
|
||
|
nat_series_dtype_timedelta,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
assert_series_equal(timedelta_series + NaT,
|
||
|
nat_series_dtype_timedelta)
|
||
|
assert_series_equal(NaT + timedelta_series,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
assert_series_equal(timedelta_series + single_nat_dtype_timedelta,
|
||
|
nat_series_dtype_timedelta)
|
||
|
assert_series_equal(single_nat_dtype_timedelta + timedelta_series,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
assert_series_equal(nat_series_dtype_timedelta + NaT,
|
||
|
nat_series_dtype_timedelta)
|
||
|
assert_series_equal(NaT + nat_series_dtype_timedelta,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
assert_series_equal(nat_series_dtype_timedelta +
|
||
|
single_nat_dtype_timedelta,
|
||
|
nat_series_dtype_timedelta)
|
||
|
assert_series_equal(single_nat_dtype_timedelta +
|
||
|
nat_series_dtype_timedelta,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
# multiplication
|
||
|
assert_series_equal(nat_series_dtype_timedelta * 1.0,
|
||
|
nat_series_dtype_timedelta)
|
||
|
assert_series_equal(1.0 * nat_series_dtype_timedelta,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
assert_series_equal(timedelta_series * 1, timedelta_series)
|
||
|
assert_series_equal(1 * timedelta_series, timedelta_series)
|
||
|
|
||
|
assert_series_equal(timedelta_series * 1.5,
|
||
|
Series([NaT, Timedelta('1.5s')]))
|
||
|
assert_series_equal(1.5 * timedelta_series,
|
||
|
Series([NaT, Timedelta('1.5s')]))
|
||
|
|
||
|
assert_series_equal(timedelta_series * nan,
|
||
|
nat_series_dtype_timedelta)
|
||
|
assert_series_equal(nan * timedelta_series,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
# division
|
||
|
assert_series_equal(timedelta_series / 2,
|
||
|
Series([NaT, Timedelta('0.5s')]))
|
||
|
assert_series_equal(timedelta_series / 2.0,
|
||
|
Series([NaT, Timedelta('0.5s')]))
|
||
|
assert_series_equal(timedelta_series / nan,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
@pytest.mark.parametrize('scalar_td', [timedelta(minutes=5, seconds=4),
|
||
|
Timedelta(minutes=5, seconds=4),
|
||
|
Timedelta('5m4s').to_timedelta64()])
|
||
|
def test_operators_timedelta64_with_timedelta(self, scalar_td):
|
||
|
# smoke tests
|
||
|
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
|
||
|
td1.iloc[2] = np.nan
|
||
|
|
||
|
td1 + scalar_td
|
||
|
scalar_td + td1
|
||
|
td1 - scalar_td
|
||
|
scalar_td - td1
|
||
|
td1 / scalar_td
|
||
|
scalar_td / td1
|
||
|
|
||
|
|
||
|
class TestDatetimeSeriesArithmetic(object):
|
||
|
@pytest.mark.parametrize(
|
||
|
'box, assert_func',
|
||
|
[(Series, tm.assert_series_equal),
|
||
|
(pd.Index, tm.assert_index_equal)])
|
||
|
def test_sub_datetime64_not_ns(self, box, assert_func):
|
||
|
# GH#7996
|
||
|
dt64 = np.datetime64('2013-01-01')
|
||
|
assert dt64.dtype == 'datetime64[D]'
|
||
|
|
||
|
obj = box(date_range('20130101', periods=3))
|
||
|
res = obj - dt64
|
||
|
expected = box([Timedelta(days=0), Timedelta(days=1),
|
||
|
Timedelta(days=2)])
|
||
|
assert_func(res, expected)
|
||
|
|
||
|
res = dt64 - obj
|
||
|
assert_func(res, -expected)
|
||
|
|
||
|
def test_operators_datetimelike_invalid(self, all_arithmetic_operators):
|
||
|
# these are all TypeEror ops
|
||
|
op_str = all_arithmetic_operators
|
||
|
|
||
|
def check(get_ser, test_ser):
|
||
|
|
||
|
# check that we are getting a TypeError
|
||
|
# with 'operate' (from core/ops.py) for the ops that are not
|
||
|
# defined
|
||
|
op = getattr(get_ser, op_str, None)
|
||
|
with tm.assert_raises_regex(TypeError, 'operate|cannot'):
|
||
|
op(test_ser)
|
||
|
|
||
|
# ## timedelta64 ###
|
||
|
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
|
||
|
td1.iloc[2] = np.nan
|
||
|
|
||
|
# ## datetime64 ###
|
||
|
dt1 = Series([Timestamp('20111230'), Timestamp('20120101'),
|
||
|
Timestamp('20120103')])
|
||
|
dt1.iloc[2] = np.nan
|
||
|
dt2 = Series([Timestamp('20111231'), Timestamp('20120102'),
|
||
|
Timestamp('20120104')])
|
||
|
if op_str not in ['__sub__', '__rsub__']:
|
||
|
check(dt1, dt2)
|
||
|
|
||
|
# ## datetime64 with timetimedelta ###
|
||
|
# TODO(jreback) __rsub__ should raise?
|
||
|
if op_str not in ['__add__', '__radd__', '__sub__']:
|
||
|
check(dt1, td1)
|
||
|
|
||
|
# 8260, 10763
|
||
|
# datetime64 with tz
|
||
|
tz = 'US/Eastern'
|
||
|
dt1 = Series(date_range('2000-01-01 09:00:00', periods=5,
|
||
|
tz=tz), name='foo')
|
||
|
dt2 = dt1.copy()
|
||
|
dt2.iloc[2] = np.nan
|
||
|
td1 = Series(timedelta_range('1 days 1 min', periods=5, freq='H'))
|
||
|
td2 = td1.copy()
|
||
|
td2.iloc[1] = np.nan
|
||
|
|
||
|
if op_str not in ['__add__', '__radd__', '__sub__', '__rsub__']:
|
||
|
check(dt2, td2)
|
||
|
|
||
|
def test_operators_datetimelike(self):
|
||
|
|
||
|
# ## timedelta64 ###
|
||
|
td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
|
||
|
td1.iloc[2] = np.nan
|
||
|
|
||
|
# ## datetime64 ###
|
||
|
dt1 = Series([Timestamp('20111230'), Timestamp('20120101'),
|
||
|
Timestamp('20120103')])
|
||
|
dt1.iloc[2] = np.nan
|
||
|
dt2 = Series([Timestamp('20111231'), Timestamp('20120102'),
|
||
|
Timestamp('20120104')])
|
||
|
dt1 - dt2
|
||
|
dt2 - dt1
|
||
|
|
||
|
# ## datetime64 with timetimedelta ###
|
||
|
dt1 + td1
|
||
|
td1 + dt1
|
||
|
dt1 - td1
|
||
|
# TODO: Decide if this ought to work.
|
||
|
# td1 - dt1
|
||
|
|
||
|
# ## timetimedelta with datetime64 ###
|
||
|
td1 + dt1
|
||
|
dt1 + td1
|
||
|
|
||
|
def test_operators_datetimelike_with_timezones(self):
|
||
|
|
||
|
tz = 'US/Eastern'
|
||
|
dt1 = Series(date_range('2000-01-01 09:00:00', periods=5,
|
||
|
tz=tz), name='foo')
|
||
|
dt2 = dt1.copy()
|
||
|
dt2.iloc[2] = np.nan
|
||
|
|
||
|
td1 = Series(timedelta_range('1 days 1 min', periods=5, freq='H'))
|
||
|
td2 = td1.copy()
|
||
|
td2.iloc[1] = np.nan
|
||
|
|
||
|
result = dt1 + td1[0]
|
||
|
exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz)
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
result = dt2 + td2[0]
|
||
|
exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz)
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
# odd numpy behavior with scalar timedeltas
|
||
|
result = td1[0] + dt1
|
||
|
exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz)
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
result = td2[0] + dt2
|
||
|
exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz)
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
result = dt1 - td1[0]
|
||
|
exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz)
|
||
|
assert_series_equal(result, exp)
|
||
|
pytest.raises(TypeError, lambda: td1[0] - dt1)
|
||
|
|
||
|
result = dt2 - td2[0]
|
||
|
exp = (dt2.dt.tz_localize(None) - td2[0]).dt.tz_localize(tz)
|
||
|
assert_series_equal(result, exp)
|
||
|
pytest.raises(TypeError, lambda: td2[0] - dt2)
|
||
|
|
||
|
result = dt1 + td1
|
||
|
exp = (dt1.dt.tz_localize(None) + td1).dt.tz_localize(tz)
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
result = dt2 + td2
|
||
|
exp = (dt2.dt.tz_localize(None) + td2).dt.tz_localize(tz)
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
result = dt1 - td1
|
||
|
exp = (dt1.dt.tz_localize(None) - td1).dt.tz_localize(tz)
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
result = dt2 - td2
|
||
|
exp = (dt2.dt.tz_localize(None) - td2).dt.tz_localize(tz)
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
pytest.raises(TypeError, lambda: td1 - dt1)
|
||
|
pytest.raises(TypeError, lambda: td2 - dt2)
|
||
|
|
||
|
def test_sub_single_tz(self):
|
||
|
# GH12290
|
||
|
s1 = Series([pd.Timestamp('2016-02-10', tz='America/Sao_Paulo')])
|
||
|
s2 = Series([pd.Timestamp('2016-02-08', tz='America/Sao_Paulo')])
|
||
|
result = s1 - s2
|
||
|
expected = Series([Timedelta('2days')])
|
||
|
assert_series_equal(result, expected)
|
||
|
result = s2 - s1
|
||
|
expected = Series([Timedelta('-2days')])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_dt64tz_series_sub_dtitz(self):
|
||
|
# GH#19071 subtracting tzaware DatetimeIndex from tzaware Series
|
||
|
# (with same tz) raises, fixed by #19024
|
||
|
dti = pd.date_range('1999-09-30', periods=10, tz='US/Pacific')
|
||
|
ser = pd.Series(dti)
|
||
|
expected = pd.Series(pd.TimedeltaIndex(['0days'] * 10))
|
||
|
|
||
|
res = dti - ser
|
||
|
tm.assert_series_equal(res, expected)
|
||
|
res = ser - dti
|
||
|
tm.assert_series_equal(res, expected)
|
||
|
|
||
|
def test_sub_datetime_compat(self):
|
||
|
# see gh-14088
|
||
|
s = Series([datetime(2016, 8, 23, 12, tzinfo=pytz.utc), pd.NaT])
|
||
|
dt = datetime(2016, 8, 22, 12, tzinfo=pytz.utc)
|
||
|
exp = Series([Timedelta('1 days'), pd.NaT])
|
||
|
assert_series_equal(s - dt, exp)
|
||
|
assert_series_equal(s - Timestamp(dt), exp)
|
||
|
|
||
|
def test_dt64_series_with_timedelta(self):
|
||
|
# scalar timedeltas/np.timedelta64 objects
|
||
|
# operate with np.timedelta64 correctly
|
||
|
s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')])
|
||
|
|
||
|
result = s + np.timedelta64(1, 's')
|
||
|
result2 = np.timedelta64(1, 's') + s
|
||
|
expected = Series([Timestamp('20130101 9:01:01'),
|
||
|
Timestamp('20130101 9:02:01')])
|
||
|
assert_series_equal(result, expected)
|
||
|
assert_series_equal(result2, expected)
|
||
|
|
||
|
result = s + np.timedelta64(5, 'ms')
|
||
|
result2 = np.timedelta64(5, 'ms') + s
|
||
|
expected = Series([Timestamp('20130101 9:01:00.005'),
|
||
|
Timestamp('20130101 9:02:00.005')])
|
||
|
assert_series_equal(result, expected)
|
||
|
assert_series_equal(result2, expected)
|
||
|
|
||
|
def test_dt64_series_add_tick_DateOffset(self):
|
||
|
# GH 4532
|
||
|
# operate with pd.offsets
|
||
|
ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')])
|
||
|
expected = Series([Timestamp('20130101 9:01:05'),
|
||
|
Timestamp('20130101 9:02:05')])
|
||
|
|
||
|
result = ser + pd.offsets.Second(5)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result2 = pd.offsets.Second(5) + ser
|
||
|
assert_series_equal(result2, expected)
|
||
|
|
||
|
def test_dt64_series_sub_tick_DateOffset(self):
|
||
|
# GH 4532
|
||
|
# operate with pd.offsets
|
||
|
ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')])
|
||
|
expected = Series([Timestamp('20130101 9:00:55'),
|
||
|
Timestamp('20130101 9:01:55')])
|
||
|
|
||
|
result = ser - pd.offsets.Second(5)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result2 = -pd.offsets.Second(5) + ser
|
||
|
assert_series_equal(result2, expected)
|
||
|
|
||
|
with pytest.raises(TypeError):
|
||
|
pd.offsets.Second(5) - ser
|
||
|
|
||
|
@pytest.mark.parametrize('cls_name', ['Day', 'Hour', 'Minute', 'Second',
|
||
|
'Milli', 'Micro', 'Nano'])
|
||
|
def test_dt64_series_with_tick_DateOffset_smoke(self, cls_name):
|
||
|
# GH 4532
|
||
|
# smoke tests for valid DateOffsets
|
||
|
ser = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')])
|
||
|
|
||
|
offset_cls = getattr(pd.offsets, cls_name)
|
||
|
ser + offset_cls(5)
|
||
|
offset_cls(5) + ser
|
||
|
|
||
|
def test_dt64_series_add_mixed_tick_DateOffset(self):
|
||
|
# GH 4532
|
||
|
# operate with pd.offsets
|
||
|
s = Series([Timestamp('20130101 9:01'), Timestamp('20130101 9:02')])
|
||
|
|
||
|
result = s + pd.offsets.Milli(5)
|
||
|
result2 = pd.offsets.Milli(5) + s
|
||
|
expected = Series([Timestamp('20130101 9:01:00.005'),
|
||
|
Timestamp('20130101 9:02:00.005')])
|
||
|
assert_series_equal(result, expected)
|
||
|
assert_series_equal(result2, expected)
|
||
|
|
||
|
result = s + pd.offsets.Minute(5) + pd.offsets.Milli(5)
|
||
|
expected = Series([Timestamp('20130101 9:06:00.005'),
|
||
|
Timestamp('20130101 9:07:00.005')])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_dt64_series_sub_NaT(self):
|
||
|
# GH#18808
|
||
|
dti = pd.DatetimeIndex([pd.NaT, pd.Timestamp('19900315')])
|
||
|
ser = pd.Series(dti)
|
||
|
res = ser - pd.NaT
|
||
|
expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]')
|
||
|
tm.assert_series_equal(res, expected)
|
||
|
|
||
|
dti_tz = dti.tz_localize('Asia/Tokyo')
|
||
|
ser_tz = pd.Series(dti_tz)
|
||
|
res = ser_tz - pd.NaT
|
||
|
expected = pd.Series([pd.NaT, pd.NaT], dtype='timedelta64[ns]')
|
||
|
tm.assert_series_equal(res, expected)
|
||
|
|
||
|
def test_datetime64_ops_nat(self):
|
||
|
# GH 11349
|
||
|
datetime_series = Series([NaT, Timestamp('19900315')])
|
||
|
nat_series_dtype_timestamp = Series([NaT, NaT], dtype='datetime64[ns]')
|
||
|
single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]')
|
||
|
|
||
|
# subtraction
|
||
|
assert_series_equal(-NaT + datetime_series, nat_series_dtype_timestamp)
|
||
|
with pytest.raises(TypeError):
|
||
|
-single_nat_dtype_datetime + datetime_series
|
||
|
|
||
|
assert_series_equal(-NaT + nat_series_dtype_timestamp,
|
||
|
nat_series_dtype_timestamp)
|
||
|
with pytest.raises(TypeError):
|
||
|
-single_nat_dtype_datetime + nat_series_dtype_timestamp
|
||
|
|
||
|
# addition
|
||
|
assert_series_equal(nat_series_dtype_timestamp + NaT,
|
||
|
nat_series_dtype_timestamp)
|
||
|
assert_series_equal(NaT + nat_series_dtype_timestamp,
|
||
|
nat_series_dtype_timestamp)
|
||
|
|
||
|
assert_series_equal(nat_series_dtype_timestamp + NaT,
|
||
|
nat_series_dtype_timestamp)
|
||
|
assert_series_equal(NaT + nat_series_dtype_timestamp,
|
||
|
nat_series_dtype_timestamp)
|
||
|
|
||
|
@pytest.mark.parametrize('dt64_series', [
|
||
|
Series([Timestamp('19900315'), Timestamp('19900315')]),
|
||
|
Series([NaT, Timestamp('19900315')]),
|
||
|
Series([NaT, NaT], dtype='datetime64[ns]')])
|
||
|
@pytest.mark.parametrize('one', [1, 1.0, np.array(1)])
|
||
|
def test_dt64_mul_div_numeric_invalid(self, one, dt64_series):
|
||
|
# multiplication
|
||
|
with pytest.raises(TypeError):
|
||
|
dt64_series * one
|
||
|
with pytest.raises(TypeError):
|
||
|
one * dt64_series
|
||
|
|
||
|
# division
|
||
|
with pytest.raises(TypeError):
|
||
|
dt64_series / one
|
||
|
with pytest.raises(TypeError):
|
||
|
one / dt64_series
|
||
|
|
||
|
def test_dt64_series_arith_overflow(self):
|
||
|
# GH#12534, fixed by #19024
|
||
|
dt = pd.Timestamp('1700-01-31')
|
||
|
td = pd.Timedelta('20000 Days')
|
||
|
dti = pd.date_range('1949-09-30', freq='100Y', periods=4)
|
||
|
ser = pd.Series(dti)
|
||
|
with pytest.raises(OverflowError):
|
||
|
ser - dt
|
||
|
with pytest.raises(OverflowError):
|
||
|
dt - ser
|
||
|
with pytest.raises(OverflowError):
|
||
|
ser + td
|
||
|
with pytest.raises(OverflowError):
|
||
|
td + ser
|
||
|
|
||
|
ser.iloc[-1] = pd.NaT
|
||
|
expected = pd.Series(['2004-10-03', '2104-10-04', '2204-10-04', 'NaT'],
|
||
|
dtype='datetime64[ns]')
|
||
|
res = ser + td
|
||
|
tm.assert_series_equal(res, expected)
|
||
|
res = td + ser
|
||
|
tm.assert_series_equal(res, expected)
|
||
|
|
||
|
ser.iloc[1:] = pd.NaT
|
||
|
expected = pd.Series(['91279 Days', 'NaT', 'NaT', 'NaT'],
|
||
|
dtype='timedelta64[ns]')
|
||
|
res = ser - dt
|
||
|
tm.assert_series_equal(res, expected)
|
||
|
res = dt - ser
|
||
|
tm.assert_series_equal(res, -expected)
|
||
|
|
||
|
@pytest.mark.parametrize('op', ['__add__', '__radd__',
|
||
|
'__sub__', '__rsub__'])
|
||
|
@pytest.mark.parametrize('tz', [None, 'Asia/Tokyo'])
|
||
|
def test_dt64_series_add_intlike(self, tz, op):
|
||
|
# GH#19123
|
||
|
dti = pd.DatetimeIndex(['2016-01-02', '2016-02-03', 'NaT'], tz=tz)
|
||
|
ser = Series(dti)
|
||
|
|
||
|
other = Series([20, 30, 40], dtype='uint8')
|
||
|
|
||
|
pytest.raises(TypeError, getattr(ser, op), 1)
|
||
|
|
||
|
pytest.raises(TypeError, getattr(ser, op), other)
|
||
|
|
||
|
pytest.raises(TypeError, getattr(ser, op), other.values)
|
||
|
|
||
|
pytest.raises(TypeError, getattr(ser, op), pd.Index(other))
|
||
|
|
||
|
|
||
|
class TestSeriesOperators(TestData):
|
||
|
@pytest.mark.parametrize(
|
||
|
'ts',
|
||
|
[
|
||
|
(lambda x: x, lambda x: x * 2, False),
|
||
|
(lambda x: x, lambda x: x[::2], False),
|
||
|
(lambda x: x, lambda x: 5, True),
|
||
|
(lambda x: tm.makeFloatSeries(),
|
||
|
lambda x: tm.makeFloatSeries(),
|
||
|
True)
|
||
|
])
|
||
|
@pytest.mark.parametrize('opname', ['add', 'sub', 'mul', 'floordiv',
|
||
|
'truediv', 'div', 'pow'])
|
||
|
def test_op_method(self, opname, ts):
|
||
|
# check that Series.{opname} behaves like Series.__{opname}__,
|
||
|
series = ts[0](self.ts)
|
||
|
other = ts[1](self.ts)
|
||
|
check_reverse = ts[2]
|
||
|
|
||
|
if opname == 'div' and compat.PY3:
|
||
|
pytest.skip('div test only for Py3')
|
||
|
|
||
|
op = getattr(Series, opname)
|
||
|
|
||
|
if op == 'div':
|
||
|
alt = operator.truediv
|
||
|
else:
|
||
|
alt = getattr(operator, opname)
|
||
|
|
||
|
result = op(series, other)
|
||
|
expected = alt(series, other)
|
||
|
assert_almost_equal(result, expected)
|
||
|
if check_reverse:
|
||
|
rop = getattr(Series, "r" + opname)
|
||
|
result = rop(series, other)
|
||
|
expected = alt(other, series)
|
||
|
assert_almost_equal(result, expected)
|
||
|
|
||
|
def test_neg(self):
|
||
|
assert_series_equal(-self.series, -1 * self.series)
|
||
|
|
||
|
def test_invert(self):
|
||
|
assert_series_equal(-(self.series < 0), ~(self.series < 0))
|
||
|
|
||
|
def test_operators(self):
|
||
|
def _check_op(series, other, op, pos_only=False,
|
||
|
check_dtype=True):
|
||
|
left = np.abs(series) if pos_only else series
|
||
|
right = np.abs(other) if pos_only else other
|
||
|
|
||
|
cython_or_numpy = op(left, right)
|
||
|
python = left.combine(right, op)
|
||
|
assert_series_equal(cython_or_numpy, python,
|
||
|
check_dtype=check_dtype)
|
||
|
|
||
|
def check(series, other):
|
||
|
simple_ops = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'mod']
|
||
|
|
||
|
for opname in simple_ops:
|
||
|
_check_op(series, other, getattr(operator, opname))
|
||
|
|
||
|
_check_op(series, other, operator.pow, pos_only=True)
|
||
|
|
||
|
_check_op(series, other, lambda x, y: operator.add(y, x))
|
||
|
_check_op(series, other, lambda x, y: operator.sub(y, x))
|
||
|
_check_op(series, other, lambda x, y: operator.truediv(y, x))
|
||
|
_check_op(series, other, lambda x, y: operator.floordiv(y, x))
|
||
|
_check_op(series, other, lambda x, y: operator.mul(y, x))
|
||
|
_check_op(series, other, lambda x, y: operator.pow(y, x),
|
||
|
pos_only=True)
|
||
|
_check_op(series, other, lambda x, y: operator.mod(y, x))
|
||
|
|
||
|
check(self.ts, self.ts * 2)
|
||
|
check(self.ts, self.ts * 0)
|
||
|
check(self.ts, self.ts[::2])
|
||
|
check(self.ts, 5)
|
||
|
|
||
|
def check_comparators(series, other, check_dtype=True):
|
||
|
_check_op(series, other, operator.gt, check_dtype=check_dtype)
|
||
|
_check_op(series, other, operator.ge, check_dtype=check_dtype)
|
||
|
_check_op(series, other, operator.eq, check_dtype=check_dtype)
|
||
|
_check_op(series, other, operator.lt, check_dtype=check_dtype)
|
||
|
_check_op(series, other, operator.le, check_dtype=check_dtype)
|
||
|
|
||
|
check_comparators(self.ts, 5)
|
||
|
check_comparators(self.ts, self.ts + 1, check_dtype=False)
|
||
|
|
||
|
def test_divmod(self):
|
||
|
def check(series, other):
|
||
|
results = divmod(series, other)
|
||
|
if isinstance(other, Iterable) and len(series) != len(other):
|
||
|
# if the lengths don't match, this is the test where we use
|
||
|
# `self.ts[::2]`. Pad every other value in `other_np` with nan.
|
||
|
other_np = []
|
||
|
for n in other:
|
||
|
other_np.append(n)
|
||
|
other_np.append(np.nan)
|
||
|
else:
|
||
|
other_np = other
|
||
|
other_np = np.asarray(other_np)
|
||
|
with np.errstate(all='ignore'):
|
||
|
expecteds = divmod(series.values, np.asarray(other_np))
|
||
|
|
||
|
for result, expected in zip(results, expecteds):
|
||
|
# check the values, name, and index separately
|
||
|
assert_almost_equal(np.asarray(result), expected)
|
||
|
|
||
|
assert result.name == series.name
|
||
|
assert_index_equal(result.index, series.index)
|
||
|
|
||
|
check(self.ts, self.ts * 2)
|
||
|
check(self.ts, self.ts * 0)
|
||
|
check(self.ts, self.ts[::2])
|
||
|
check(self.ts, 5)
|
||
|
|
||
|
def test_operators_empty_int_corner(self):
|
||
|
s1 = Series([], [], dtype=np.int32)
|
||
|
s2 = Series({'x': 0.})
|
||
|
assert_series_equal(s1 * s2, Series([np.nan], index=['x']))
|
||
|
|
||
|
@pytest.mark.parametrize("m", [1, 3, 10])
|
||
|
@pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns'])
|
||
|
def test_timedelta64_conversions(self, m, unit):
|
||
|
|
||
|
startdate = Series(date_range('2013-01-01', '2013-01-03'))
|
||
|
enddate = Series(date_range('2013-03-01', '2013-03-03'))
|
||
|
|
||
|
s1 = enddate - startdate
|
||
|
s1[2] = np.nan
|
||
|
|
||
|
# op
|
||
|
expected = s1.apply(lambda x: x / np.timedelta64(m, unit))
|
||
|
result = s1 / np.timedelta64(m, unit)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# reverse op
|
||
|
expected = s1.apply(
|
||
|
lambda x: Timedelta(np.timedelta64(m, unit)) / x)
|
||
|
result = np.timedelta64(m, unit) / s1
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize('op', [operator.add, operator.sub])
|
||
|
def test_timedelta64_equal_timedelta_supported_ops(self, op):
|
||
|
ser = Series([Timestamp('20130301'), Timestamp('20130228 23:00:00'),
|
||
|
Timestamp('20130228 22:00:00'),
|
||
|
Timestamp('20130228 21:00:00')])
|
||
|
|
||
|
intervals = 'D', 'h', 'm', 's', 'us'
|
||
|
|
||
|
# TODO: unused
|
||
|
# npy16_mappings = {'D': 24 * 60 * 60 * 1000000,
|
||
|
# 'h': 60 * 60 * 1000000,
|
||
|
# 'm': 60 * 1000000,
|
||
|
# 's': 1000000,
|
||
|
# 'us': 1}
|
||
|
|
||
|
def timedelta64(*args):
|
||
|
return sum(starmap(np.timedelta64, zip(args, intervals)))
|
||
|
|
||
|
for d, h, m, s, us in product(*([range(2)] * 5)):
|
||
|
nptd = timedelta64(d, h, m, s, us)
|
||
|
pytd = timedelta(days=d, hours=h, minutes=m, seconds=s,
|
||
|
microseconds=us)
|
||
|
lhs = op(ser, nptd)
|
||
|
rhs = op(ser, pytd)
|
||
|
|
||
|
assert_series_equal(lhs, rhs)
|
||
|
|
||
|
def test_ops_nat_mixed_datetime64_timedelta64(self):
|
||
|
# GH 11349
|
||
|
timedelta_series = Series([NaT, Timedelta('1s')])
|
||
|
datetime_series = Series([NaT, Timestamp('19900315')])
|
||
|
nat_series_dtype_timedelta = Series([NaT, NaT],
|
||
|
dtype='timedelta64[ns]')
|
||
|
nat_series_dtype_timestamp = Series([NaT, NaT], dtype='datetime64[ns]')
|
||
|
single_nat_dtype_datetime = Series([NaT], dtype='datetime64[ns]')
|
||
|
single_nat_dtype_timedelta = Series([NaT], dtype='timedelta64[ns]')
|
||
|
|
||
|
# subtraction
|
||
|
assert_series_equal(datetime_series - single_nat_dtype_datetime,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
assert_series_equal(datetime_series - single_nat_dtype_timedelta,
|
||
|
nat_series_dtype_timestamp)
|
||
|
assert_series_equal(-single_nat_dtype_timedelta + datetime_series,
|
||
|
nat_series_dtype_timestamp)
|
||
|
|
||
|
# without a Series wrapping the NaT, it is ambiguous
|
||
|
# whether it is a datetime64 or timedelta64
|
||
|
# defaults to interpreting it as timedelta64
|
||
|
assert_series_equal(nat_series_dtype_timestamp -
|
||
|
single_nat_dtype_datetime,
|
||
|
nat_series_dtype_timedelta)
|
||
|
|
||
|
assert_series_equal(nat_series_dtype_timestamp -
|
||
|
single_nat_dtype_timedelta,
|
||
|
nat_series_dtype_timestamp)
|
||
|
assert_series_equal(-single_nat_dtype_timedelta +
|
||
|
nat_series_dtype_timestamp,
|
||
|
nat_series_dtype_timestamp)
|
||
|
|
||
|
with pytest.raises(TypeError):
|
||
|
timedelta_series - single_nat_dtype_datetime
|
||
|
|
||
|
# addition
|
||
|
assert_series_equal(nat_series_dtype_timestamp +
|
||
|
single_nat_dtype_timedelta,
|
||
|
nat_series_dtype_timestamp)
|
||
|
assert_series_equal(single_nat_dtype_timedelta +
|
||
|
nat_series_dtype_timestamp,
|
||
|
nat_series_dtype_timestamp)
|
||
|
|
||
|
assert_series_equal(nat_series_dtype_timestamp +
|
||
|
single_nat_dtype_timedelta,
|
||
|
nat_series_dtype_timestamp)
|
||
|
assert_series_equal(single_nat_dtype_timedelta +
|
||
|
nat_series_dtype_timestamp,
|
||
|
nat_series_dtype_timestamp)
|
||
|
|
||
|
assert_series_equal(nat_series_dtype_timedelta +
|
||
|
single_nat_dtype_datetime,
|
||
|
nat_series_dtype_timestamp)
|
||
|
assert_series_equal(single_nat_dtype_datetime +
|
||
|
nat_series_dtype_timedelta,
|
||
|
nat_series_dtype_timestamp)
|
||
|
|
||
|
def test_ops_datetimelike_align(self):
|
||
|
# GH 7500
|
||
|
# datetimelike ops need to align
|
||
|
dt = Series(date_range('2012-1-1', periods=3, freq='D'))
|
||
|
dt.iloc[2] = np.nan
|
||
|
dt2 = dt[::-1]
|
||
|
|
||
|
expected = Series([timedelta(0), timedelta(0), pd.NaT])
|
||
|
# name is reset
|
||
|
result = dt2 - dt
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
expected = Series(expected, name=0)
|
||
|
result = (dt2.to_frame() - dt.to_frame())[0]
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_operators_bitwise(self):
|
||
|
# GH 9016: support bitwise op for integer types
|
||
|
index = list('bca')
|
||
|
|
||
|
s_tft = Series([True, False, True], index=index)
|
||
|
s_fff = Series([False, False, False], index=index)
|
||
|
s_tff = Series([True, False, False], index=index)
|
||
|
s_empty = Series([])
|
||
|
|
||
|
# TODO: unused
|
||
|
# s_0101 = Series([0, 1, 0, 1])
|
||
|
|
||
|
s_0123 = Series(range(4), dtype='int64')
|
||
|
s_3333 = Series([3] * 4)
|
||
|
s_4444 = Series([4] * 4)
|
||
|
|
||
|
res = s_tft & s_empty
|
||
|
expected = s_fff
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
res = s_tft | s_empty
|
||
|
expected = s_tft
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
res = s_0123 & s_3333
|
||
|
expected = Series(range(4), dtype='int64')
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
res = s_0123 | s_4444
|
||
|
expected = Series(range(4, 8), dtype='int64')
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
s_a0b1c0 = Series([1], list('b'))
|
||
|
|
||
|
res = s_tft & s_a0b1c0
|
||
|
expected = s_tff.reindex(list('abc'))
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
res = s_tft | s_a0b1c0
|
||
|
expected = s_tft.reindex(list('abc'))
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
n0 = 0
|
||
|
res = s_tft & n0
|
||
|
expected = s_fff
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
res = s_0123 & n0
|
||
|
expected = Series([0] * 4)
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
n1 = 1
|
||
|
res = s_tft & n1
|
||
|
expected = s_tft
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
res = s_0123 & n1
|
||
|
expected = Series([0, 1, 0, 1])
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
s_1111 = Series([1] * 4, dtype='int8')
|
||
|
res = s_0123 & s_1111
|
||
|
expected = Series([0, 1, 0, 1], dtype='int64')
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
|
||
|
expected = Series([1, 1, 3, 3], dtype='int32')
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
pytest.raises(TypeError, lambda: s_1111 & 'a')
|
||
|
pytest.raises(TypeError, lambda: s_1111 & ['a', 'b', 'c', 'd'])
|
||
|
pytest.raises(TypeError, lambda: s_0123 & np.NaN)
|
||
|
pytest.raises(TypeError, lambda: s_0123 & 3.14)
|
||
|
pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2])
|
||
|
|
||
|
# s_0123 will be all false now because of reindexing like s_tft
|
||
|
if compat.PY3:
|
||
|
# unable to sort incompatible object via .union.
|
||
|
exp = Series([False] * 7, index=['b', 'c', 'a', 0, 1, 2, 3])
|
||
|
with tm.assert_produces_warning(RuntimeWarning):
|
||
|
assert_series_equal(s_tft & s_0123, exp)
|
||
|
else:
|
||
|
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
|
||
|
assert_series_equal(s_tft & s_0123, exp)
|
||
|
|
||
|
# s_tft will be all false now because of reindexing like s_0123
|
||
|
if compat.PY3:
|
||
|
# unable to sort incompatible object via .union.
|
||
|
exp = Series([False] * 7, index=[0, 1, 2, 3, 'b', 'c', 'a'])
|
||
|
with tm.assert_produces_warning(RuntimeWarning):
|
||
|
assert_series_equal(s_0123 & s_tft, exp)
|
||
|
else:
|
||
|
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
|
||
|
assert_series_equal(s_0123 & s_tft, exp)
|
||
|
|
||
|
assert_series_equal(s_0123 & False, Series([False] * 4))
|
||
|
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
|
||
|
assert_series_equal(s_0123 & [False], Series([False] * 4))
|
||
|
assert_series_equal(s_0123 & (False), Series([False] * 4))
|
||
|
assert_series_equal(s_0123 & Series([False, np.NaN, False, False]),
|
||
|
Series([False] * 4))
|
||
|
|
||
|
s_ftft = Series([False, True, False, True])
|
||
|
assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft)
|
||
|
|
||
|
s_abNd = Series(['a', 'b', np.NaN, 'd'])
|
||
|
res = s_0123 & s_abNd
|
||
|
expected = s_ftft
|
||
|
assert_series_equal(res, expected)
|
||
|
|
||
|
def test_scalar_na_cmp_corners(self):
|
||
|
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])
|
||
|
|
||
|
def tester(a, b):
|
||
|
return a & b
|
||
|
|
||
|
pytest.raises(TypeError, tester, s, datetime(2005, 1, 1))
|
||
|
|
||
|
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
|
||
|
s[::2] = np.nan
|
||
|
|
||
|
expected = Series(True, index=s.index)
|
||
|
expected[::2] = False
|
||
|
assert_series_equal(tester(s, list(s)), expected)
|
||
|
|
||
|
d = DataFrame({'A': s})
|
||
|
# TODO: Fix this exception - needs to be fixed! (see GH5035)
|
||
|
# (previously this was a TypeError because series returned
|
||
|
# NotImplemented
|
||
|
|
||
|
# this is an alignment issue; these are equivalent
|
||
|
# https://github.com/pandas-dev/pandas/issues/5284
|
||
|
|
||
|
pytest.raises(ValueError, lambda: d.__and__(s, axis='columns'))
|
||
|
pytest.raises(ValueError, tester, s, d)
|
||
|
|
||
|
# this is wrong as its not a boolean result
|
||
|
# result = d.__and__(s,axis='index')
|
||
|
|
||
|
def test_operators_corner(self):
|
||
|
series = self.ts
|
||
|
|
||
|
empty = Series([], index=Index([]))
|
||
|
|
||
|
result = series + empty
|
||
|
assert np.isnan(result).all()
|
||
|
|
||
|
result = empty + Series([], index=Index([]))
|
||
|
assert len(result) == 0
|
||
|
|
||
|
# TODO: this returned NotImplemented earlier, what to do?
|
||
|
# deltas = Series([timedelta(1)] * 5, index=np.arange(5))
|
||
|
# sub_deltas = deltas[::2]
|
||
|
# deltas5 = deltas * 5
|
||
|
# deltas = deltas + sub_deltas
|
||
|
|
||
|
# float + int
|
||
|
int_ts = self.ts.astype(int)[:-5]
|
||
|
added = self.ts + int_ts
|
||
|
expected = Series(self.ts.values[:-5] + int_ts.values,
|
||
|
index=self.ts.index[:-5], name='ts')
|
||
|
tm.assert_series_equal(added[:-5], expected)
|
||
|
|
||
|
@pytest.mark.parametrize('op', [operator.add, operator.sub, operator.mul,
|
||
|
operator.truediv, operator.floordiv])
|
||
|
def test_operators_reverse_object(self, op):
|
||
|
# GH 56
|
||
|
arr = Series(np.random.randn(10), index=np.arange(10), dtype=object)
|
||
|
|
||
|
result = op(1., arr)
|
||
|
expected = op(1., arr.astype(float))
|
||
|
assert_series_equal(result.astype(float), expected)
|
||
|
|
||
|
def test_operators_combine(self):
|
||
|
def _check_fill(meth, op, a, b, fill_value=0):
|
||
|
exp_index = a.index.union(b.index)
|
||
|
a = a.reindex(exp_index)
|
||
|
b = b.reindex(exp_index)
|
||
|
|
||
|
amask = isna(a)
|
||
|
bmask = isna(b)
|
||
|
|
||
|
exp_values = []
|
||
|
for i in range(len(exp_index)):
|
||
|
with np.errstate(all='ignore'):
|
||
|
if amask[i]:
|
||
|
if bmask[i]:
|
||
|
exp_values.append(nan)
|
||
|
continue
|
||
|
exp_values.append(op(fill_value, b[i]))
|
||
|
elif bmask[i]:
|
||
|
if amask[i]:
|
||
|
exp_values.append(nan)
|
||
|
continue
|
||
|
exp_values.append(op(a[i], fill_value))
|
||
|
else:
|
||
|
exp_values.append(op(a[i], b[i]))
|
||
|
|
||
|
result = meth(a, b, fill_value=fill_value)
|
||
|
expected = Series(exp_values, exp_index)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
a = Series([nan, 1., 2., 3., nan], index=np.arange(5))
|
||
|
b = Series([nan, 1, nan, 3, nan, 4.], index=np.arange(6))
|
||
|
|
||
|
pairings = []
|
||
|
for op in ['add', 'sub', 'mul', 'pow', 'truediv', 'floordiv']:
|
||
|
fv = 0
|
||
|
lop = getattr(Series, op)
|
||
|
lequiv = getattr(operator, op)
|
||
|
rop = getattr(Series, 'r' + op)
|
||
|
# bind op at definition time...
|
||
|
requiv = lambda x, y, op=op: getattr(operator, op)(y, x)
|
||
|
pairings.append((lop, lequiv, fv))
|
||
|
pairings.append((rop, requiv, fv))
|
||
|
|
||
|
if compat.PY3:
|
||
|
pairings.append((Series.div, operator.truediv, 1))
|
||
|
pairings.append((Series.rdiv, lambda x, y: operator.truediv(y, x),
|
||
|
1))
|
||
|
else:
|
||
|
pairings.append((Series.div, operator.div, 1))
|
||
|
pairings.append((Series.rdiv, lambda x, y: operator.div(y, x), 1))
|
||
|
|
||
|
for op, equiv_op, fv in pairings:
|
||
|
result = op(a, b)
|
||
|
exp = equiv_op(a, b)
|
||
|
assert_series_equal(result, exp)
|
||
|
_check_fill(op, equiv_op, a, b, fill_value=fv)
|
||
|
# should accept axis=0 or axis='rows'
|
||
|
op(a, b, axis=0)
|
||
|
|
||
|
def test_operators_na_handling(self):
|
||
|
from decimal import Decimal
|
||
|
from datetime import date
|
||
|
s = Series([Decimal('1.3'), Decimal('2.3')],
|
||
|
index=[date(2012, 1, 1), date(2012, 1, 2)])
|
||
|
|
||
|
result = s + s.shift(1)
|
||
|
result2 = s.shift(1) + s
|
||
|
assert isna(result[0])
|
||
|
assert isna(result2[0])
|
||
|
|
||
|
s = Series(['foo', 'bar', 'baz', np.nan])
|
||
|
result = 'prefix_' + s
|
||
|
expected = Series(['prefix_foo', 'prefix_bar', 'prefix_baz', np.nan])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = s + '_suffix'
|
||
|
expected = Series(['foo_suffix', 'bar_suffix', 'baz_suffix', np.nan])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_datetime64_with_index(self):
|
||
|
# arithmetic integer ops with an index
|
||
|
ser = Series(np.random.randn(5))
|
||
|
expected = ser - ser.index.to_series()
|
||
|
result = ser - ser.index
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# GH 4629
|
||
|
# arithmetic datetime64 ops with an index
|
||
|
ser = Series(date_range('20130101', periods=5),
|
||
|
index=date_range('20130101', periods=5))
|
||
|
expected = ser - ser.index.to_series()
|
||
|
result = ser - ser.index
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
with pytest.raises(TypeError):
|
||
|
# GH#18850
|
||
|
result = ser - ser.index.to_period()
|
||
|
|
||
|
df = DataFrame(np.random.randn(5, 2),
|
||
|
index=date_range('20130101', periods=5))
|
||
|
df['date'] = Timestamp('20130102')
|
||
|
df['expected'] = df['date'] - df.index.to_series()
|
||
|
df['result'] = df['date'] - df.index
|
||
|
assert_series_equal(df['result'], df['expected'], check_names=False)
|
||
|
|
||
|
def test_op_duplicate_index(self):
|
||
|
# GH14227
|
||
|
s1 = Series([1, 2], index=[1, 1])
|
||
|
s2 = Series([10, 10], index=[1, 2])
|
||
|
result = s1 + s2
|
||
|
expected = pd.Series([11, 12, np.nan], index=[1, 1, 2])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"test_input,error_type",
|
||
|
[
|
||
|
(pd.Series([]), ValueError),
|
||
|
|
||
|
# For strings, or any Series with dtype 'O'
|
||
|
(pd.Series(['foo', 'bar', 'baz']), TypeError),
|
||
|
(pd.Series([(1,), (2,)]), TypeError),
|
||
|
|
||
|
# For mixed data types
|
||
|
(
|
||
|
pd.Series(['foo', 'foo', 'bar', 'bar', None, np.nan, 'baz']),
|
||
|
TypeError
|
||
|
),
|
||
|
]
|
||
|
)
|
||
|
def test_assert_idxminmax_raises(self, test_input, error_type):
|
||
|
"""
|
||
|
Cases where ``Series.argmax`` and related should raise an exception
|
||
|
"""
|
||
|
with pytest.raises(error_type):
|
||
|
test_input.idxmin()
|
||
|
with pytest.raises(error_type):
|
||
|
test_input.idxmin(skipna=False)
|
||
|
with pytest.raises(error_type):
|
||
|
test_input.idxmax()
|
||
|
with pytest.raises(error_type):
|
||
|
test_input.idxmax(skipna=False)
|
||
|
|
||
|
def test_idxminmax_with_inf(self):
|
||
|
# For numeric data with NA and Inf (GH #13595)
|
||
|
s = pd.Series([0, -np.inf, np.inf, np.nan])
|
||
|
|
||
|
assert s.idxmin() == 1
|
||
|
assert np.isnan(s.idxmin(skipna=False))
|
||
|
|
||
|
assert s.idxmax() == 2
|
||
|
assert np.isnan(s.idxmax(skipna=False))
|
||
|
|
||
|
# Using old-style behavior that treats floating point nan, -inf, and
|
||
|
# +inf as missing
|
||
|
with pd.option_context('mode.use_inf_as_na', True):
|
||
|
assert s.idxmin() == 0
|
||
|
assert np.isnan(s.idxmin(skipna=False))
|
||
|
assert s.idxmax() == 0
|
||
|
np.isnan(s.idxmax(skipna=False))
|
||
|
|
||
|
|
||
|
class TestSeriesOperationsDataFrameCompat(object):
|
||
|
def test_operators_frame(self):
|
||
|
# rpow does not work with DataFrame
|
||
|
ts = tm.makeTimeSeries()
|
||
|
ts.name = 'ts'
|
||
|
|
||
|
df = DataFrame({'A': ts})
|
||
|
|
||
|
assert_series_equal(ts + ts, ts + df['A'],
|
||
|
check_names=False)
|
||
|
assert_series_equal(ts ** ts, ts ** df['A'],
|
||
|
check_names=False)
|
||
|
assert_series_equal(ts < ts, ts < df['A'],
|
||
|
check_names=False)
|
||
|
assert_series_equal(ts / ts, ts / df['A'],
|
||
|
check_names=False)
|
||
|
|
||
|
def test_series_frame_radd_bug(self):
|
||
|
# GH#353
|
||
|
vals = Series(tm.rands_array(5, 10))
|
||
|
result = 'foo_' + vals
|
||
|
expected = vals.map(lambda x: 'foo_' + x)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
frame = DataFrame({'vals': vals})
|
||
|
result = 'foo_' + frame
|
||
|
expected = DataFrame({'vals': vals.map(lambda x: 'foo_' + x)})
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
ts = tm.makeTimeSeries()
|
||
|
ts.name = 'ts'
|
||
|
|
||
|
# really raise this time
|
||
|
with pytest.raises(TypeError):
|
||
|
datetime.now() + ts
|
||
|
|
||
|
with pytest.raises(TypeError):
|
||
|
ts + datetime.now()
|
||
|
|
||
|
def test_bool_ops_df_compat(self):
|
||
|
# GH 1134
|
||
|
s1 = pd.Series([True, False, True], index=list('ABC'), name='x')
|
||
|
s2 = pd.Series([True, True, False], index=list('ABD'), name='x')
|
||
|
|
||
|
exp = pd.Series([True, False, False, False],
|
||
|
index=list('ABCD'), name='x')
|
||
|
assert_series_equal(s1 & s2, exp)
|
||
|
assert_series_equal(s2 & s1, exp)
|
||
|
|
||
|
# True | np.nan => True
|
||
|
exp = pd.Series([True, True, True, False],
|
||
|
index=list('ABCD'), name='x')
|
||
|
assert_series_equal(s1 | s2, exp)
|
||
|
# np.nan | True => np.nan, filled with False
|
||
|
exp = pd.Series([True, True, False, False],
|
||
|
index=list('ABCD'), name='x')
|
||
|
assert_series_equal(s2 | s1, exp)
|
||
|
|
||
|
# DataFrame doesn't fill nan with False
|
||
|
exp = pd.DataFrame({'x': [True, False, np.nan, np.nan]},
|
||
|
index=list('ABCD'))
|
||
|
assert_frame_equal(s1.to_frame() & s2.to_frame(), exp)
|
||
|
assert_frame_equal(s2.to_frame() & s1.to_frame(), exp)
|
||
|
|
||
|
exp = pd.DataFrame({'x': [True, True, np.nan, np.nan]},
|
||
|
index=list('ABCD'))
|
||
|
assert_frame_equal(s1.to_frame() | s2.to_frame(), exp)
|
||
|
assert_frame_equal(s2.to_frame() | s1.to_frame(), exp)
|
||
|
|
||
|
# different length
|
||
|
s3 = pd.Series([True, False, True], index=list('ABC'), name='x')
|
||
|
s4 = pd.Series([True, True, True, True], index=list('ABCD'), name='x')
|
||
|
|
||
|
exp = pd.Series([True, False, True, False],
|
||
|
index=list('ABCD'), name='x')
|
||
|
assert_series_equal(s3 & s4, exp)
|
||
|
assert_series_equal(s4 & s3, exp)
|
||
|
|
||
|
# np.nan | True => np.nan, filled with False
|
||
|
exp = pd.Series([True, True, True, False],
|
||
|
index=list('ABCD'), name='x')
|
||
|
assert_series_equal(s3 | s4, exp)
|
||
|
# True | np.nan => True
|
||
|
exp = pd.Series([True, True, True, True],
|
||
|
index=list('ABCD'), name='x')
|
||
|
assert_series_equal(s4 | s3, exp)
|
||
|
|
||
|
exp = pd.DataFrame({'x': [True, False, True, np.nan]},
|
||
|
index=list('ABCD'))
|
||
|
assert_frame_equal(s3.to_frame() & s4.to_frame(), exp)
|
||
|
assert_frame_equal(s4.to_frame() & s3.to_frame(), exp)
|
||
|
|
||
|
exp = pd.DataFrame({'x': [True, True, True, np.nan]},
|
||
|
index=list('ABCD'))
|
||
|
assert_frame_equal(s3.to_frame() | s4.to_frame(), exp)
|
||
|
assert_frame_equal(s4.to_frame() | s3.to_frame(), exp)
|
||
|
|
||
|
def test_arith_ops_df_compat(self):
|
||
|
# GH#1134
|
||
|
s1 = pd.Series([1, 2, 3], index=list('ABC'), name='x')
|
||
|
s2 = pd.Series([2, 2, 2], index=list('ABD'), name='x')
|
||
|
|
||
|
exp = pd.Series([3.0, 4.0, np.nan, np.nan],
|
||
|
index=list('ABCD'), name='x')
|
||
|
assert_series_equal(s1 + s2, exp)
|
||
|
assert_series_equal(s2 + s1, exp)
|
||
|
|
||
|
exp = pd.DataFrame({'x': [3.0, 4.0, np.nan, np.nan]},
|
||
|
index=list('ABCD'))
|
||
|
assert_frame_equal(s1.to_frame() + s2.to_frame(), exp)
|
||
|
assert_frame_equal(s2.to_frame() + s1.to_frame(), exp)
|
||
|
|
||
|
# different length
|
||
|
s3 = pd.Series([1, 2, 3], index=list('ABC'), name='x')
|
||
|
s4 = pd.Series([2, 2, 2, 2], index=list('ABCD'), name='x')
|
||
|
|
||
|
exp = pd.Series([3, 4, 5, np.nan],
|
||
|
index=list('ABCD'), name='x')
|
||
|
assert_series_equal(s3 + s4, exp)
|
||
|
assert_series_equal(s4 + s3, exp)
|
||
|
|
||
|
exp = pd.DataFrame({'x': [3, 4, 5, np.nan]},
|
||
|
index=list('ABCD'))
|
||
|
assert_frame_equal(s3.to_frame() + s4.to_frame(), exp)
|
||
|
assert_frame_equal(s4.to_frame() + s3.to_frame(), exp)
|