1211 lines
43 KiB
Python
1211 lines
43 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
from __future__ import print_function
|
||
|
from collections import deque
|
||
|
from datetime import datetime
|
||
|
from decimal import Decimal
|
||
|
import operator
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
from numpy import nan, random
|
||
|
import numpy as np
|
||
|
|
||
|
from pandas.compat import range
|
||
|
from pandas import compat
|
||
|
from pandas import (DataFrame, Series, MultiIndex, Timestamp,
|
||
|
date_range)
|
||
|
import pandas.core.common as com
|
||
|
import pandas.io.formats.printing as printing
|
||
|
import pandas as pd
|
||
|
|
||
|
from pandas.util.testing import (assert_numpy_array_equal,
|
||
|
assert_series_equal,
|
||
|
assert_frame_equal)
|
||
|
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
from pandas.tests.frame.common import (TestData, _check_mixed_float,
|
||
|
_check_mixed_int)
|
||
|
|
||
|
|
||
|
class TestDataFrameOperators(TestData):
|
||
|
|
||
|
def test_operators(self):
|
||
|
garbage = random.random(4)
|
||
|
colSeries = Series(garbage, index=np.array(self.frame.columns))
|
||
|
|
||
|
idSum = self.frame + self.frame
|
||
|
seriesSum = self.frame + colSeries
|
||
|
|
||
|
for col, series in compat.iteritems(idSum):
|
||
|
for idx, val in compat.iteritems(series):
|
||
|
origVal = self.frame[col][idx] * 2
|
||
|
if not np.isnan(val):
|
||
|
assert val == origVal
|
||
|
else:
|
||
|
assert np.isnan(origVal)
|
||
|
|
||
|
for col, series in compat.iteritems(seriesSum):
|
||
|
for idx, val in compat.iteritems(series):
|
||
|
origVal = self.frame[col][idx] + colSeries[col]
|
||
|
if not np.isnan(val):
|
||
|
assert val == origVal
|
||
|
else:
|
||
|
assert np.isnan(origVal)
|
||
|
|
||
|
added = self.frame2 + self.frame2
|
||
|
expected = self.frame2 * 2
|
||
|
assert_frame_equal(added, expected)
|
||
|
|
||
|
df = DataFrame({'a': ['a', None, 'b']})
|
||
|
assert_frame_equal(df + df, DataFrame({'a': ['aa', np.nan, 'bb']}))
|
||
|
|
||
|
# Test for issue #10181
|
||
|
for dtype in ('float', 'int64'):
|
||
|
frames = [
|
||
|
DataFrame(dtype=dtype),
|
||
|
DataFrame(columns=['A'], dtype=dtype),
|
||
|
DataFrame(index=[0], dtype=dtype),
|
||
|
]
|
||
|
for df in frames:
|
||
|
assert (df + df).equals(df)
|
||
|
assert_frame_equal(df + df, df)
|
||
|
|
||
|
def test_ops_np_scalar(self):
|
||
|
vals, xs = np.random.rand(5, 3), [nan, 7, -23, 2.718, -3.14, np.inf]
|
||
|
f = lambda x: DataFrame(x, index=list('ABCDE'),
|
||
|
columns=['jim', 'joe', 'jolie'])
|
||
|
|
||
|
df = f(vals)
|
||
|
|
||
|
for x in xs:
|
||
|
assert_frame_equal(df / np.array(x), f(vals / x))
|
||
|
assert_frame_equal(np.array(x) * df, f(vals * x))
|
||
|
assert_frame_equal(df + np.array(x), f(vals + x))
|
||
|
assert_frame_equal(np.array(x) - df, f(x - vals))
|
||
|
|
||
|
def test_operators_boolean(self):
|
||
|
|
||
|
# GH 5808
|
||
|
# empty frames, non-mixed dtype
|
||
|
|
||
|
result = DataFrame(index=[1]) & DataFrame(index=[1])
|
||
|
assert_frame_equal(result, DataFrame(index=[1]))
|
||
|
|
||
|
result = DataFrame(index=[1]) | DataFrame(index=[1])
|
||
|
assert_frame_equal(result, DataFrame(index=[1]))
|
||
|
|
||
|
result = DataFrame(index=[1]) & DataFrame(index=[1, 2])
|
||
|
assert_frame_equal(result, DataFrame(index=[1, 2]))
|
||
|
|
||
|
result = DataFrame(index=[1], columns=['A']) & DataFrame(
|
||
|
index=[1], columns=['A'])
|
||
|
assert_frame_equal(result, DataFrame(index=[1], columns=['A']))
|
||
|
|
||
|
result = DataFrame(True, index=[1], columns=['A']) & DataFrame(
|
||
|
True, index=[1], columns=['A'])
|
||
|
assert_frame_equal(result, DataFrame(True, index=[1], columns=['A']))
|
||
|
|
||
|
result = DataFrame(True, index=[1], columns=['A']) | DataFrame(
|
||
|
True, index=[1], columns=['A'])
|
||
|
assert_frame_equal(result, DataFrame(True, index=[1], columns=['A']))
|
||
|
|
||
|
# boolean ops
|
||
|
result = DataFrame(1, index=[1], columns=['A']) | DataFrame(
|
||
|
True, index=[1], columns=['A'])
|
||
|
assert_frame_equal(result, DataFrame(1, index=[1], columns=['A']))
|
||
|
|
||
|
def f():
|
||
|
DataFrame(1.0, index=[1], columns=['A']) | DataFrame(
|
||
|
True, index=[1], columns=['A'])
|
||
|
pytest.raises(TypeError, f)
|
||
|
|
||
|
def f():
|
||
|
DataFrame('foo', index=[1], columns=['A']) | DataFrame(
|
||
|
True, index=[1], columns=['A'])
|
||
|
pytest.raises(TypeError, f)
|
||
|
|
||
|
def test_operators_none_as_na(self):
|
||
|
df = DataFrame({"col1": [2, 5.0, 123, None],
|
||
|
"col2": [1, 2, 3, 4]}, dtype=object)
|
||
|
|
||
|
ops = [operator.add, operator.sub, operator.mul, operator.truediv]
|
||
|
|
||
|
# since filling converts dtypes from object, changed expected to be
|
||
|
# object
|
||
|
for op in ops:
|
||
|
filled = df.fillna(np.nan)
|
||
|
result = op(df, 3)
|
||
|
expected = op(filled, 3).astype(object)
|
||
|
expected[com.isna(expected)] = None
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = op(df, df)
|
||
|
expected = op(filled, filled).astype(object)
|
||
|
expected[com.isna(expected)] = None
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = op(df, df.fillna(7))
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = op(df.fillna(7), df)
|
||
|
assert_frame_equal(result, expected, check_dtype=False)
|
||
|
|
||
|
def test_comparison_invalid(self):
|
||
|
|
||
|
def check(df, df2):
|
||
|
|
||
|
for (x, y) in [(df, df2), (df2, df)]:
|
||
|
pytest.raises(TypeError, lambda: x == y)
|
||
|
pytest.raises(TypeError, lambda: x != y)
|
||
|
pytest.raises(TypeError, lambda: x >= y)
|
||
|
pytest.raises(TypeError, lambda: x > y)
|
||
|
pytest.raises(TypeError, lambda: x < y)
|
||
|
pytest.raises(TypeError, lambda: x <= y)
|
||
|
|
||
|
# GH4968
|
||
|
# invalid date/int comparisons
|
||
|
df = DataFrame(np.random.randint(10, size=(10, 1)), columns=['a'])
|
||
|
df['dates'] = date_range('20010101', periods=len(df))
|
||
|
|
||
|
df2 = df.copy()
|
||
|
df2['dates'] = df['a']
|
||
|
check(df, df2)
|
||
|
|
||
|
df = DataFrame(np.random.randint(10, size=(10, 2)), columns=['a', 'b'])
|
||
|
df2 = DataFrame({'a': date_range('20010101', periods=len(
|
||
|
df)), 'b': date_range('20100101', periods=len(df))})
|
||
|
check(df, df2)
|
||
|
|
||
|
def test_timestamp_compare(self):
|
||
|
# make sure we can compare Timestamps on the right AND left hand side
|
||
|
# GH4982
|
||
|
df = DataFrame({'dates1': date_range('20010101', periods=10),
|
||
|
'dates2': date_range('20010102', periods=10),
|
||
|
'intcol': np.random.randint(1000000000, size=10),
|
||
|
'floatcol': np.random.randn(10),
|
||
|
'stringcol': list(tm.rands(10))})
|
||
|
df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT
|
||
|
ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
|
||
|
'ne': 'ne'}
|
||
|
|
||
|
for left, right in ops.items():
|
||
|
left_f = getattr(operator, left)
|
||
|
right_f = getattr(operator, right)
|
||
|
|
||
|
# no nats
|
||
|
expected = left_f(df, Timestamp('20010109'))
|
||
|
result = right_f(Timestamp('20010109'), df)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# nats
|
||
|
expected = left_f(df, Timestamp('nat'))
|
||
|
result = right_f(Timestamp('nat'), df)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_logical_operators(self):
|
||
|
|
||
|
def _check_bin_op(op):
|
||
|
result = op(df1, df2)
|
||
|
expected = DataFrame(op(df1.values, df2.values), index=df1.index,
|
||
|
columns=df1.columns)
|
||
|
assert result.values.dtype == np.bool_
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def _check_unary_op(op):
|
||
|
result = op(df1)
|
||
|
expected = DataFrame(op(df1.values), index=df1.index,
|
||
|
columns=df1.columns)
|
||
|
assert result.values.dtype == np.bool_
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
df1 = {'a': {'a': True, 'b': False, 'c': False, 'd': True, 'e': True},
|
||
|
'b': {'a': False, 'b': True, 'c': False,
|
||
|
'd': False, 'e': False},
|
||
|
'c': {'a': False, 'b': False, 'c': True,
|
||
|
'd': False, 'e': False},
|
||
|
'd': {'a': True, 'b': False, 'c': False, 'd': True, 'e': True},
|
||
|
'e': {'a': True, 'b': False, 'c': False, 'd': True, 'e': True}}
|
||
|
|
||
|
df2 = {'a': {'a': True, 'b': False, 'c': True, 'd': False, 'e': False},
|
||
|
'b': {'a': False, 'b': True, 'c': False,
|
||
|
'd': False, 'e': False},
|
||
|
'c': {'a': True, 'b': False, 'c': True, 'd': False, 'e': False},
|
||
|
'd': {'a': False, 'b': False, 'c': False,
|
||
|
'd': True, 'e': False},
|
||
|
'e': {'a': False, 'b': False, 'c': False,
|
||
|
'd': False, 'e': True}}
|
||
|
|
||
|
df1 = DataFrame(df1)
|
||
|
df2 = DataFrame(df2)
|
||
|
|
||
|
_check_bin_op(operator.and_)
|
||
|
_check_bin_op(operator.or_)
|
||
|
_check_bin_op(operator.xor)
|
||
|
|
||
|
# operator.neg is deprecated in numpy >= 1.9
|
||
|
_check_unary_op(operator.inv)
|
||
|
|
||
|
@pytest.mark.parametrize('op,res', [('__eq__', False),
|
||
|
('__ne__', True)])
|
||
|
def test_logical_typeerror_with_non_valid(self, op, res):
|
||
|
# we are comparing floats vs a string
|
||
|
result = getattr(self.frame, op)('foo')
|
||
|
assert bool(result.all().all()) is res
|
||
|
|
||
|
def test_logical_with_nas(self):
|
||
|
d = DataFrame({'a': [np.nan, False], 'b': [True, True]})
|
||
|
|
||
|
# GH4947
|
||
|
# bool comparisons should return bool
|
||
|
result = d['a'] | d['b']
|
||
|
expected = Series([False, True])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# GH4604, automatic casting here
|
||
|
result = d['a'].fillna(False) | d['b']
|
||
|
expected = Series([True, True])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = d['a'].fillna(False, downcast=False) | d['b']
|
||
|
expected = Series([True, True])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize('df,expected', [
|
||
|
(pd.DataFrame({'a': [-1, 1]}), pd.DataFrame({'a': [1, -1]})),
|
||
|
(pd.DataFrame({'a': [False, True]}),
|
||
|
pd.DataFrame({'a': [True, False]})),
|
||
|
(pd.DataFrame({'a': pd.Series(pd.to_timedelta([-1, 1]))}),
|
||
|
pd.DataFrame({'a': pd.Series(pd.to_timedelta([1, -1]))}))
|
||
|
])
|
||
|
def test_neg_numeric(self, df, expected):
|
||
|
assert_frame_equal(-df, expected)
|
||
|
assert_series_equal(-df['a'], expected['a'])
|
||
|
|
||
|
@pytest.mark.parametrize('df, expected', [
|
||
|
(np.array([1, 2], dtype=object), np.array([-1, -2], dtype=object)),
|
||
|
([Decimal('1.0'), Decimal('2.0')], [Decimal('-1.0'), Decimal('-2.0')]),
|
||
|
])
|
||
|
def test_neg_object(self, df, expected):
|
||
|
# GH 21380
|
||
|
df = pd.DataFrame({'a': df})
|
||
|
expected = pd.DataFrame({'a': expected})
|
||
|
assert_frame_equal(-df, expected)
|
||
|
assert_series_equal(-df['a'], expected['a'])
|
||
|
|
||
|
@pytest.mark.parametrize('df', [
|
||
|
pd.DataFrame({'a': ['a', 'b']}),
|
||
|
pd.DataFrame({'a': pd.to_datetime(['2017-01-22', '1970-01-01'])}),
|
||
|
])
|
||
|
def test_neg_raises(self, df):
|
||
|
with pytest.raises(TypeError):
|
||
|
(- df)
|
||
|
with pytest.raises(TypeError):
|
||
|
(- df['a'])
|
||
|
|
||
|
def test_invert(self):
|
||
|
assert_frame_equal(-(self.frame < 0), ~(self.frame < 0))
|
||
|
|
||
|
@pytest.mark.parametrize('df', [
|
||
|
pd.DataFrame({'a': [-1, 1]}),
|
||
|
pd.DataFrame({'a': [False, True]}),
|
||
|
pd.DataFrame({'a': pd.Series(pd.to_timedelta([-1, 1]))}),
|
||
|
])
|
||
|
def test_pos_numeric(self, df):
|
||
|
# GH 16073
|
||
|
assert_frame_equal(+df, df)
|
||
|
assert_series_equal(+df['a'], df['a'])
|
||
|
|
||
|
@pytest.mark.parametrize('df', [
|
||
|
pd.DataFrame({'a': ['a', 'b']}),
|
||
|
pd.DataFrame({'a': np.array([-1, 2], dtype=object)}),
|
||
|
pd.DataFrame({'a': [Decimal('-1.0'), Decimal('2.0')]}),
|
||
|
])
|
||
|
def test_pos_object(self, df):
|
||
|
# GH 21380
|
||
|
assert_frame_equal(+df, df)
|
||
|
assert_series_equal(+df['a'], df['a'])
|
||
|
|
||
|
@pytest.mark.parametrize('df', [
|
||
|
pd.DataFrame({'a': pd.to_datetime(['2017-01-22', '1970-01-01'])}),
|
||
|
])
|
||
|
def test_pos_raises(self, df):
|
||
|
with pytest.raises(TypeError):
|
||
|
(+ df)
|
||
|
with pytest.raises(TypeError):
|
||
|
(+ df['a'])
|
||
|
|
||
|
def test_arith_flex_frame(self):
|
||
|
ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod']
|
||
|
if not compat.PY3:
|
||
|
aliases = {}
|
||
|
else:
|
||
|
aliases = {'div': 'truediv'}
|
||
|
|
||
|
for op in ops:
|
||
|
try:
|
||
|
alias = aliases.get(op, op)
|
||
|
f = getattr(operator, alias)
|
||
|
result = getattr(self.frame, op)(2 * self.frame)
|
||
|
exp = f(self.frame, 2 * self.frame)
|
||
|
assert_frame_equal(result, exp)
|
||
|
|
||
|
# vs mix float
|
||
|
result = getattr(self.mixed_float, op)(2 * self.mixed_float)
|
||
|
exp = f(self.mixed_float, 2 * self.mixed_float)
|
||
|
assert_frame_equal(result, exp)
|
||
|
_check_mixed_float(result, dtype=dict(C=None))
|
||
|
|
||
|
# vs mix int
|
||
|
if op in ['add', 'sub', 'mul']:
|
||
|
result = getattr(self.mixed_int, op)(2 + self.mixed_int)
|
||
|
exp = f(self.mixed_int, 2 + self.mixed_int)
|
||
|
|
||
|
# no overflow in the uint
|
||
|
dtype = None
|
||
|
if op in ['sub']:
|
||
|
dtype = dict(B='uint64', C=None)
|
||
|
elif op in ['add', 'mul']:
|
||
|
dtype = dict(C=None)
|
||
|
assert_frame_equal(result, exp)
|
||
|
_check_mixed_int(result, dtype=dtype)
|
||
|
|
||
|
# rops
|
||
|
r_f = lambda x, y: f(y, x)
|
||
|
result = getattr(self.frame, 'r' + op)(2 * self.frame)
|
||
|
exp = r_f(self.frame, 2 * self.frame)
|
||
|
assert_frame_equal(result, exp)
|
||
|
|
||
|
# vs mix float
|
||
|
result = getattr(self.mixed_float, op)(
|
||
|
2 * self.mixed_float)
|
||
|
exp = f(self.mixed_float, 2 * self.mixed_float)
|
||
|
assert_frame_equal(result, exp)
|
||
|
_check_mixed_float(result, dtype=dict(C=None))
|
||
|
|
||
|
result = getattr(self.intframe, op)(2 * self.intframe)
|
||
|
exp = f(self.intframe, 2 * self.intframe)
|
||
|
assert_frame_equal(result, exp)
|
||
|
|
||
|
# vs mix int
|
||
|
if op in ['add', 'sub', 'mul']:
|
||
|
result = getattr(self.mixed_int, op)(
|
||
|
2 + self.mixed_int)
|
||
|
exp = f(self.mixed_int, 2 + self.mixed_int)
|
||
|
|
||
|
# no overflow in the uint
|
||
|
dtype = None
|
||
|
if op in ['sub']:
|
||
|
dtype = dict(B='uint64', C=None)
|
||
|
elif op in ['add', 'mul']:
|
||
|
dtype = dict(C=None)
|
||
|
assert_frame_equal(result, exp)
|
||
|
_check_mixed_int(result, dtype=dtype)
|
||
|
except:
|
||
|
printing.pprint_thing("Failing operation %r" % op)
|
||
|
raise
|
||
|
|
||
|
# ndim >= 3
|
||
|
ndim_5 = np.ones(self.frame.shape + (3, 4, 5))
|
||
|
msg = "Unable to coerce to Series/DataFrame"
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
f(self.frame, ndim_5)
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
getattr(self.frame, op)(ndim_5)
|
||
|
|
||
|
# res_add = self.frame.add(self.frame)
|
||
|
# res_sub = self.frame.sub(self.frame)
|
||
|
# res_mul = self.frame.mul(self.frame)
|
||
|
# res_div = self.frame.div(2 * self.frame)
|
||
|
|
||
|
# assert_frame_equal(res_add, self.frame + self.frame)
|
||
|
# assert_frame_equal(res_sub, self.frame - self.frame)
|
||
|
# assert_frame_equal(res_mul, self.frame * self.frame)
|
||
|
# assert_frame_equal(res_div, self.frame / (2 * self.frame))
|
||
|
|
||
|
const_add = self.frame.add(1)
|
||
|
assert_frame_equal(const_add, self.frame + 1)
|
||
|
|
||
|
# corner cases
|
||
|
result = self.frame.add(self.frame[:0])
|
||
|
assert_frame_equal(result, self.frame * np.nan)
|
||
|
|
||
|
result = self.frame[:0].add(self.frame)
|
||
|
assert_frame_equal(result, self.frame * np.nan)
|
||
|
with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
|
||
|
self.frame.add(self.frame.iloc[0], fill_value=3)
|
||
|
with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
|
||
|
self.frame.add(self.frame.iloc[0], axis='index', fill_value=3)
|
||
|
|
||
|
def test_arith_flex_zero_len_raises(self):
|
||
|
# GH#19522 passing fill_value to frame flex arith methods should
|
||
|
# raise even in the zero-length special cases
|
||
|
ser_len0 = pd.Series([])
|
||
|
df_len0 = pd.DataFrame([], columns=['A', 'B'])
|
||
|
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
|
||
|
|
||
|
with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
|
||
|
df.add(ser_len0, fill_value='E')
|
||
|
|
||
|
with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
|
||
|
df_len0.sub(df['A'], axis=None, fill_value=3)
|
||
|
|
||
|
def test_binary_ops_align(self):
|
||
|
|
||
|
# test aligning binary ops
|
||
|
|
||
|
# GH 6681
|
||
|
index = MultiIndex.from_product([list('abc'),
|
||
|
['one', 'two', 'three'],
|
||
|
[1, 2, 3]],
|
||
|
names=['first', 'second', 'third'])
|
||
|
|
||
|
df = DataFrame(np.arange(27 * 3).reshape(27, 3),
|
||
|
index=index,
|
||
|
columns=['value1', 'value2', 'value3']).sort_index()
|
||
|
|
||
|
idx = pd.IndexSlice
|
||
|
for op in ['add', 'sub', 'mul', 'div', 'truediv']:
|
||
|
opa = getattr(operator, op, None)
|
||
|
if opa is None:
|
||
|
continue
|
||
|
|
||
|
x = Series([1.0, 10.0, 100.0], [1, 2, 3])
|
||
|
result = getattr(df, op)(x, level='third', axis=0)
|
||
|
|
||
|
expected = pd.concat([opa(df.loc[idx[:, :, i], :], v)
|
||
|
for i, v in x.iteritems()]).sort_index()
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
x = Series([1.0, 10.0], ['two', 'three'])
|
||
|
result = getattr(df, op)(x, level='second', axis=0)
|
||
|
|
||
|
expected = (pd.concat([opa(df.loc[idx[:, i], :], v)
|
||
|
for i, v in x.iteritems()])
|
||
|
.reindex_like(df).sort_index())
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# GH9463 (alignment level of dataframe with series)
|
||
|
|
||
|
midx = MultiIndex.from_product([['A', 'B'], ['a', 'b']])
|
||
|
df = DataFrame(np.ones((2, 4), dtype='int64'), columns=midx)
|
||
|
s = pd.Series({'a': 1, 'b': 2})
|
||
|
|
||
|
df2 = df.copy()
|
||
|
df2.columns.names = ['lvl0', 'lvl1']
|
||
|
s2 = s.copy()
|
||
|
s2.index.name = 'lvl1'
|
||
|
|
||
|
# different cases of integer/string level names:
|
||
|
res1 = df.mul(s, axis=1, level=1)
|
||
|
res2 = df.mul(s2, axis=1, level=1)
|
||
|
res3 = df2.mul(s, axis=1, level=1)
|
||
|
res4 = df2.mul(s2, axis=1, level=1)
|
||
|
res5 = df2.mul(s, axis=1, level='lvl1')
|
||
|
res6 = df2.mul(s2, axis=1, level='lvl1')
|
||
|
|
||
|
exp = DataFrame(np.array([[1, 2, 1, 2], [1, 2, 1, 2]], dtype='int64'),
|
||
|
columns=midx)
|
||
|
|
||
|
for res in [res1, res2]:
|
||
|
assert_frame_equal(res, exp)
|
||
|
|
||
|
exp.columns.names = ['lvl0', 'lvl1']
|
||
|
for res in [res3, res4, res5, res6]:
|
||
|
assert_frame_equal(res, exp)
|
||
|
|
||
|
def test_arith_mixed(self):
|
||
|
|
||
|
left = DataFrame({'A': ['a', 'b', 'c'],
|
||
|
'B': [1, 2, 3]})
|
||
|
|
||
|
result = left + left
|
||
|
expected = DataFrame({'A': ['aa', 'bb', 'cc'],
|
||
|
'B': [2, 4, 6]})
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_arith_getitem_commute(self):
|
||
|
df = DataFrame({'A': [1.1, 3.3], 'B': [2.5, -3.9]})
|
||
|
|
||
|
self._test_op(df, operator.add)
|
||
|
self._test_op(df, operator.sub)
|
||
|
self._test_op(df, operator.mul)
|
||
|
self._test_op(df, operator.truediv)
|
||
|
self._test_op(df, operator.floordiv)
|
||
|
self._test_op(df, operator.pow)
|
||
|
|
||
|
self._test_op(df, lambda x, y: y + x)
|
||
|
self._test_op(df, lambda x, y: y - x)
|
||
|
self._test_op(df, lambda x, y: y * x)
|
||
|
self._test_op(df, lambda x, y: y / x)
|
||
|
self._test_op(df, lambda x, y: y ** x)
|
||
|
|
||
|
self._test_op(df, lambda x, y: x + y)
|
||
|
self._test_op(df, lambda x, y: x - y)
|
||
|
self._test_op(df, lambda x, y: x * y)
|
||
|
self._test_op(df, lambda x, y: x / y)
|
||
|
self._test_op(df, lambda x, y: x ** y)
|
||
|
|
||
|
@staticmethod
|
||
|
def _test_op(df, op):
|
||
|
result = op(df, 1)
|
||
|
|
||
|
if not df.columns.is_unique:
|
||
|
raise ValueError("Only unique columns supported by this test")
|
||
|
|
||
|
for col in result.columns:
|
||
|
assert_series_equal(result[col], op(df[col], 1))
|
||
|
|
||
|
def test_bool_flex_frame(self):
|
||
|
data = np.random.randn(5, 3)
|
||
|
other_data = np.random.randn(5, 3)
|
||
|
df = DataFrame(data)
|
||
|
other = DataFrame(other_data)
|
||
|
ndim_5 = np.ones(df.shape + (1, 3))
|
||
|
|
||
|
# Unaligned
|
||
|
def _check_unaligned_frame(meth, op, df, other):
|
||
|
part_o = other.loc[3:, 1:].copy()
|
||
|
rs = meth(part_o)
|
||
|
xp = op(df, part_o.reindex(index=df.index, columns=df.columns))
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
# DataFrame
|
||
|
assert df.eq(df).values.all()
|
||
|
assert not df.ne(df).values.any()
|
||
|
for op in ['eq', 'ne', 'gt', 'lt', 'ge', 'le']:
|
||
|
f = getattr(df, op)
|
||
|
o = getattr(operator, op)
|
||
|
# No NAs
|
||
|
assert_frame_equal(f(other), o(df, other))
|
||
|
_check_unaligned_frame(f, o, df, other)
|
||
|
# ndarray
|
||
|
assert_frame_equal(f(other.values), o(df, other.values))
|
||
|
# scalar
|
||
|
assert_frame_equal(f(0), o(df, 0))
|
||
|
# NAs
|
||
|
msg = "Unable to coerce to Series/DataFrame"
|
||
|
assert_frame_equal(f(np.nan), o(df, np.nan))
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
f(ndim_5)
|
||
|
|
||
|
# Series
|
||
|
def _test_seq(df, idx_ser, col_ser):
|
||
|
idx_eq = df.eq(idx_ser, axis=0)
|
||
|
col_eq = df.eq(col_ser)
|
||
|
idx_ne = df.ne(idx_ser, axis=0)
|
||
|
col_ne = df.ne(col_ser)
|
||
|
assert_frame_equal(col_eq, df == Series(col_ser))
|
||
|
assert_frame_equal(col_eq, -col_ne)
|
||
|
assert_frame_equal(idx_eq, -idx_ne)
|
||
|
assert_frame_equal(idx_eq, df.T.eq(idx_ser).T)
|
||
|
assert_frame_equal(col_eq, df.eq(list(col_ser)))
|
||
|
assert_frame_equal(idx_eq, df.eq(Series(idx_ser), axis=0))
|
||
|
assert_frame_equal(idx_eq, df.eq(list(idx_ser), axis=0))
|
||
|
|
||
|
idx_gt = df.gt(idx_ser, axis=0)
|
||
|
col_gt = df.gt(col_ser)
|
||
|
idx_le = df.le(idx_ser, axis=0)
|
||
|
col_le = df.le(col_ser)
|
||
|
|
||
|
assert_frame_equal(col_gt, df > Series(col_ser))
|
||
|
assert_frame_equal(col_gt, -col_le)
|
||
|
assert_frame_equal(idx_gt, -idx_le)
|
||
|
assert_frame_equal(idx_gt, df.T.gt(idx_ser).T)
|
||
|
|
||
|
idx_ge = df.ge(idx_ser, axis=0)
|
||
|
col_ge = df.ge(col_ser)
|
||
|
idx_lt = df.lt(idx_ser, axis=0)
|
||
|
col_lt = df.lt(col_ser)
|
||
|
assert_frame_equal(col_ge, df >= Series(col_ser))
|
||
|
assert_frame_equal(col_ge, -col_lt)
|
||
|
assert_frame_equal(idx_ge, -idx_lt)
|
||
|
assert_frame_equal(idx_ge, df.T.ge(idx_ser).T)
|
||
|
|
||
|
idx_ser = Series(np.random.randn(5))
|
||
|
col_ser = Series(np.random.randn(3))
|
||
|
_test_seq(df, idx_ser, col_ser)
|
||
|
|
||
|
# list/tuple
|
||
|
_test_seq(df, idx_ser.values, col_ser.values)
|
||
|
|
||
|
# NA
|
||
|
df.loc[0, 0] = np.nan
|
||
|
rs = df.eq(df)
|
||
|
assert not rs.loc[0, 0]
|
||
|
rs = df.ne(df)
|
||
|
assert rs.loc[0, 0]
|
||
|
rs = df.gt(df)
|
||
|
assert not rs.loc[0, 0]
|
||
|
rs = df.lt(df)
|
||
|
assert not rs.loc[0, 0]
|
||
|
rs = df.ge(df)
|
||
|
assert not rs.loc[0, 0]
|
||
|
rs = df.le(df)
|
||
|
assert not rs.loc[0, 0]
|
||
|
|
||
|
# complex
|
||
|
arr = np.array([np.nan, 1, 6, np.nan])
|
||
|
arr2 = np.array([2j, np.nan, 7, None])
|
||
|
df = DataFrame({'a': arr})
|
||
|
df2 = DataFrame({'a': arr2})
|
||
|
rs = df.gt(df2)
|
||
|
assert not rs.values.any()
|
||
|
rs = df.ne(df2)
|
||
|
assert rs.values.all()
|
||
|
|
||
|
arr3 = np.array([2j, np.nan, None])
|
||
|
df3 = DataFrame({'a': arr3})
|
||
|
rs = df3.gt(2j)
|
||
|
assert not rs.values.any()
|
||
|
|
||
|
# corner, dtype=object
|
||
|
df1 = DataFrame({'col': ['foo', np.nan, 'bar']})
|
||
|
df2 = DataFrame({'col': ['foo', datetime.now(), 'bar']})
|
||
|
result = df1.ne(df2)
|
||
|
exp = DataFrame({'col': [False, True, False]})
|
||
|
assert_frame_equal(result, exp)
|
||
|
|
||
|
def test_dti_tz_convert_to_utc(self):
|
||
|
base = pd.DatetimeIndex(['2011-01-01', '2011-01-02',
|
||
|
'2011-01-03'], tz='UTC')
|
||
|
idx1 = base.tz_convert('Asia/Tokyo')[:2]
|
||
|
idx2 = base.tz_convert('US/Eastern')[1:]
|
||
|
|
||
|
df1 = DataFrame({'A': [1, 2]}, index=idx1)
|
||
|
df2 = DataFrame({'A': [1, 1]}, index=idx2)
|
||
|
exp = DataFrame({'A': [np.nan, 3, np.nan]}, index=base)
|
||
|
assert_frame_equal(df1 + df2, exp)
|
||
|
|
||
|
def test_arith_flex_series(self):
|
||
|
df = self.simple
|
||
|
|
||
|
row = df.xs('a')
|
||
|
col = df['two']
|
||
|
# after arithmetic refactor, add truediv here
|
||
|
ops = ['add', 'sub', 'mul', 'mod']
|
||
|
for op in ops:
|
||
|
f = getattr(df, op)
|
||
|
op = getattr(operator, op)
|
||
|
assert_frame_equal(f(row), op(df, row))
|
||
|
assert_frame_equal(f(col, axis=0), op(df.T, col).T)
|
||
|
|
||
|
# special case for some reason
|
||
|
assert_frame_equal(df.add(row, axis=None), df + row)
|
||
|
|
||
|
# cases which will be refactored after big arithmetic refactor
|
||
|
assert_frame_equal(df.div(row), df / row)
|
||
|
assert_frame_equal(df.div(col, axis=0), (df.T / col).T)
|
||
|
|
||
|
# broadcasting issue in GH7325
|
||
|
df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype='int64')
|
||
|
expected = DataFrame([[nan, np.inf], [1.0, 1.5], [1.0, 1.25]])
|
||
|
result = df.div(df[0], axis='index')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
df = DataFrame(np.arange(3 * 2).reshape((3, 2)), dtype='float64')
|
||
|
expected = DataFrame([[np.nan, np.inf], [1.0, 1.5], [1.0, 1.25]])
|
||
|
result = df.div(df[0], axis='index')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_arith_non_pandas_object(self):
|
||
|
df = self.simple
|
||
|
|
||
|
val1 = df.xs('a').values
|
||
|
added = DataFrame(df.values + val1, index=df.index, columns=df.columns)
|
||
|
assert_frame_equal(df + val1, added)
|
||
|
|
||
|
added = DataFrame((df.values.T + val1).T,
|
||
|
index=df.index, columns=df.columns)
|
||
|
assert_frame_equal(df.add(val1, axis=0), added)
|
||
|
|
||
|
val2 = list(df['two'])
|
||
|
|
||
|
added = DataFrame(df.values + val2, index=df.index, columns=df.columns)
|
||
|
assert_frame_equal(df + val2, added)
|
||
|
|
||
|
added = DataFrame((df.values.T + val2).T, index=df.index,
|
||
|
columns=df.columns)
|
||
|
assert_frame_equal(df.add(val2, axis='index'), added)
|
||
|
|
||
|
val3 = np.random.rand(*df.shape)
|
||
|
added = DataFrame(df.values + val3, index=df.index, columns=df.columns)
|
||
|
assert_frame_equal(df.add(val3), added)
|
||
|
|
||
|
@pytest.mark.parametrize('values', [[1, 2], (1, 2), np.array([1, 2]),
|
||
|
range(1, 3), deque([1, 2])])
|
||
|
def test_arith_alignment_non_pandas_object(self, values):
|
||
|
# GH 17901
|
||
|
df = DataFrame({'A': [1, 1], 'B': [1, 1]})
|
||
|
expected = DataFrame({'A': [2, 2], 'B': [3, 3]})
|
||
|
result = df + values
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_combineFrame(self):
|
||
|
frame_copy = self.frame.reindex(self.frame.index[::2])
|
||
|
|
||
|
del frame_copy['D']
|
||
|
frame_copy['C'][:5] = nan
|
||
|
|
||
|
added = self.frame + frame_copy
|
||
|
|
||
|
indexer = added['A'].dropna().index
|
||
|
exp = (self.frame['A'] * 2).copy()
|
||
|
|
||
|
tm.assert_series_equal(added['A'].dropna(), exp.loc[indexer])
|
||
|
|
||
|
exp.loc[~exp.index.isin(indexer)] = np.nan
|
||
|
tm.assert_series_equal(added['A'], exp.loc[added['A'].index])
|
||
|
|
||
|
assert np.isnan(added['C'].reindex(frame_copy.index)[:5]).all()
|
||
|
|
||
|
# assert(False)
|
||
|
|
||
|
assert np.isnan(added['D']).all()
|
||
|
|
||
|
self_added = self.frame + self.frame
|
||
|
tm.assert_index_equal(self_added.index, self.frame.index)
|
||
|
|
||
|
added_rev = frame_copy + self.frame
|
||
|
assert np.isnan(added['D']).all()
|
||
|
assert np.isnan(added_rev['D']).all()
|
||
|
|
||
|
# corner cases
|
||
|
|
||
|
# empty
|
||
|
plus_empty = self.frame + self.empty
|
||
|
assert np.isnan(plus_empty.values).all()
|
||
|
|
||
|
empty_plus = self.empty + self.frame
|
||
|
assert np.isnan(empty_plus.values).all()
|
||
|
|
||
|
empty_empty = self.empty + self.empty
|
||
|
assert empty_empty.empty
|
||
|
|
||
|
# out of order
|
||
|
reverse = self.frame.reindex(columns=self.frame.columns[::-1])
|
||
|
|
||
|
assert_frame_equal(reverse + self.frame, self.frame * 2)
|
||
|
|
||
|
# mix vs float64, upcast
|
||
|
added = self.frame + self.mixed_float
|
||
|
_check_mixed_float(added, dtype='float64')
|
||
|
added = self.mixed_float + self.frame
|
||
|
_check_mixed_float(added, dtype='float64')
|
||
|
|
||
|
# mix vs mix
|
||
|
added = self.mixed_float + self.mixed_float2
|
||
|
_check_mixed_float(added, dtype=dict(C=None))
|
||
|
added = self.mixed_float2 + self.mixed_float
|
||
|
_check_mixed_float(added, dtype=dict(C=None))
|
||
|
|
||
|
# with int
|
||
|
added = self.frame + self.mixed_int
|
||
|
_check_mixed_float(added, dtype='float64')
|
||
|
|
||
|
def test_combineSeries(self):
|
||
|
|
||
|
# Series
|
||
|
series = self.frame.xs(self.frame.index[0])
|
||
|
|
||
|
added = self.frame + series
|
||
|
|
||
|
for key, s in compat.iteritems(added):
|
||
|
assert_series_equal(s, self.frame[key] + series[key])
|
||
|
|
||
|
larger_series = series.to_dict()
|
||
|
larger_series['E'] = 1
|
||
|
larger_series = Series(larger_series)
|
||
|
larger_added = self.frame + larger_series
|
||
|
|
||
|
for key, s in compat.iteritems(self.frame):
|
||
|
assert_series_equal(larger_added[key], s + series[key])
|
||
|
assert 'E' in larger_added
|
||
|
assert np.isnan(larger_added['E']).all()
|
||
|
|
||
|
# no upcast needed
|
||
|
added = self.mixed_float + series
|
||
|
_check_mixed_float(added)
|
||
|
|
||
|
# vs mix (upcast) as needed
|
||
|
added = self.mixed_float + series.astype('float32')
|
||
|
_check_mixed_float(added, dtype=dict(C=None))
|
||
|
added = self.mixed_float + series.astype('float16')
|
||
|
_check_mixed_float(added, dtype=dict(C=None))
|
||
|
|
||
|
# these raise with numexpr.....as we are adding an int64 to an
|
||
|
# uint64....weird vs int
|
||
|
|
||
|
# added = self.mixed_int + (100*series).astype('int64')
|
||
|
# _check_mixed_int(added, dtype = dict(A = 'int64', B = 'float64', C =
|
||
|
# 'int64', D = 'int64'))
|
||
|
# added = self.mixed_int + (100*series).astype('int32')
|
||
|
# _check_mixed_int(added, dtype = dict(A = 'int32', B = 'float64', C =
|
||
|
# 'int32', D = 'int64'))
|
||
|
|
||
|
# TimeSeries
|
||
|
ts = self.tsframe['A']
|
||
|
|
||
|
# 10890
|
||
|
# we no longer allow auto timeseries broadcasting
|
||
|
# and require explicit broadcasting
|
||
|
added = self.tsframe.add(ts, axis='index')
|
||
|
|
||
|
for key, col in compat.iteritems(self.tsframe):
|
||
|
result = col + ts
|
||
|
assert_series_equal(added[key], result, check_names=False)
|
||
|
assert added[key].name == key
|
||
|
if col.name == ts.name:
|
||
|
assert result.name == 'A'
|
||
|
else:
|
||
|
assert result.name is None
|
||
|
|
||
|
smaller_frame = self.tsframe[:-5]
|
||
|
smaller_added = smaller_frame.add(ts, axis='index')
|
||
|
|
||
|
tm.assert_index_equal(smaller_added.index, self.tsframe.index)
|
||
|
|
||
|
smaller_ts = ts[:-5]
|
||
|
smaller_added2 = self.tsframe.add(smaller_ts, axis='index')
|
||
|
assert_frame_equal(smaller_added, smaller_added2)
|
||
|
|
||
|
# length 0, result is all-nan
|
||
|
result = self.tsframe.add(ts[:0], axis='index')
|
||
|
expected = DataFrame(np.nan, index=self.tsframe.index,
|
||
|
columns=self.tsframe.columns)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# Frame is all-nan
|
||
|
result = self.tsframe[:0].add(ts, axis='index')
|
||
|
expected = DataFrame(np.nan, index=self.tsframe.index,
|
||
|
columns=self.tsframe.columns)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# empty but with non-empty index
|
||
|
frame = self.tsframe[:1].reindex(columns=[])
|
||
|
result = frame.mul(ts, axis='index')
|
||
|
assert len(result) == len(ts)
|
||
|
|
||
|
def test_combineFunc(self):
|
||
|
result = self.frame * 2
|
||
|
tm.assert_numpy_array_equal(result.values, self.frame.values * 2)
|
||
|
|
||
|
# vs mix
|
||
|
result = self.mixed_float * 2
|
||
|
for c, s in compat.iteritems(result):
|
||
|
tm.assert_numpy_array_equal(
|
||
|
s.values, self.mixed_float[c].values * 2)
|
||
|
_check_mixed_float(result, dtype=dict(C=None))
|
||
|
|
||
|
result = self.empty * 2
|
||
|
assert result.index is self.empty.index
|
||
|
assert len(result.columns) == 0
|
||
|
|
||
|
def test_comparisons(self):
|
||
|
df1 = tm.makeTimeDataFrame()
|
||
|
df2 = tm.makeTimeDataFrame()
|
||
|
|
||
|
row = self.simple.xs('a')
|
||
|
ndim_5 = np.ones(df1.shape + (1, 1, 1))
|
||
|
|
||
|
def test_comp(func):
|
||
|
result = func(df1, df2)
|
||
|
tm.assert_numpy_array_equal(result.values,
|
||
|
func(df1.values, df2.values))
|
||
|
with tm.assert_raises_regex(ValueError,
|
||
|
'Wrong number of dimensions'):
|
||
|
func(df1, ndim_5)
|
||
|
|
||
|
result2 = func(self.simple, row)
|
||
|
tm.assert_numpy_array_equal(result2.values,
|
||
|
func(self.simple.values, row.values))
|
||
|
|
||
|
result3 = func(self.frame, 0)
|
||
|
tm.assert_numpy_array_equal(result3.values,
|
||
|
func(self.frame.values, 0))
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError,
|
||
|
'Can only compare identically'
|
||
|
'-labeled DataFrame'):
|
||
|
func(self.simple, self.simple[:2])
|
||
|
|
||
|
test_comp(operator.eq)
|
||
|
test_comp(operator.ne)
|
||
|
test_comp(operator.lt)
|
||
|
test_comp(operator.gt)
|
||
|
test_comp(operator.ge)
|
||
|
test_comp(operator.le)
|
||
|
|
||
|
def test_comparison_protected_from_errstate(self):
|
||
|
missing_df = tm.makeDataFrame()
|
||
|
missing_df.iloc[0]['A'] = np.nan
|
||
|
with np.errstate(invalid='ignore'):
|
||
|
expected = missing_df.values < 0
|
||
|
with np.errstate(invalid='raise'):
|
||
|
result = (missing_df < 0).values
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
def test_boolean_comparison(self):
|
||
|
|
||
|
# GH 4576
|
||
|
# boolean comparisons with a tuple/list give unexpected results
|
||
|
df = DataFrame(np.arange(6).reshape((3, 2)))
|
||
|
b = np.array([2, 2])
|
||
|
b_r = np.atleast_2d([2, 2])
|
||
|
b_c = b_r.T
|
||
|
l = (2, 2, 2)
|
||
|
tup = tuple(l)
|
||
|
|
||
|
# gt
|
||
|
expected = DataFrame([[False, False], [False, True], [True, True]])
|
||
|
result = df > b
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.values > b
|
||
|
assert_numpy_array_equal(result, expected.values)
|
||
|
|
||
|
result = df > l
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df > tup
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df > b_r
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.values > b_r
|
||
|
assert_numpy_array_equal(result, expected.values)
|
||
|
|
||
|
pytest.raises(ValueError, df.__gt__, b_c)
|
||
|
pytest.raises(ValueError, df.values.__gt__, b_c)
|
||
|
|
||
|
# ==
|
||
|
expected = DataFrame([[False, False], [True, False], [False, False]])
|
||
|
result = df == b
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df == l
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df == tup
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df == b_r
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.values == b_r
|
||
|
assert_numpy_array_equal(result, expected.values)
|
||
|
|
||
|
pytest.raises(ValueError, lambda: df == b_c)
|
||
|
assert df.values.shape != b_c.shape
|
||
|
|
||
|
# with alignment
|
||
|
df = DataFrame(np.arange(6).reshape((3, 2)),
|
||
|
columns=list('AB'), index=list('abc'))
|
||
|
expected.index = df.index
|
||
|
expected.columns = df.columns
|
||
|
|
||
|
result = df == l
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df == tup
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_combine_generic(self):
|
||
|
df1 = self.frame
|
||
|
df2 = self.frame.loc[self.frame.index[:-5], ['A', 'B', 'C']]
|
||
|
|
||
|
combined = df1.combine(df2, np.add)
|
||
|
combined2 = df2.combine(df1, np.add)
|
||
|
assert combined['D'].isna().all()
|
||
|
assert combined2['D'].isna().all()
|
||
|
|
||
|
chunk = combined.loc[combined.index[:-5], ['A', 'B', 'C']]
|
||
|
chunk2 = combined2.loc[combined2.index[:-5], ['A', 'B', 'C']]
|
||
|
|
||
|
exp = self.frame.loc[self.frame.index[:-5],
|
||
|
['A', 'B', 'C']].reindex_like(chunk) * 2
|
||
|
assert_frame_equal(chunk, exp)
|
||
|
assert_frame_equal(chunk2, exp)
|
||
|
|
||
|
def test_inplace_ops_alignment(self):
|
||
|
|
||
|
# inplace ops / ops alignment
|
||
|
# GH 8511
|
||
|
|
||
|
columns = list('abcdefg')
|
||
|
X_orig = DataFrame(np.arange(10 * len(columns))
|
||
|
.reshape(-1, len(columns)),
|
||
|
columns=columns, index=range(10))
|
||
|
Z = 100 * X_orig.iloc[:, 1:-1].copy()
|
||
|
block1 = list('bedcf')
|
||
|
subs = list('bcdef')
|
||
|
|
||
|
# add
|
||
|
X = X_orig.copy()
|
||
|
result1 = (X[block1] + Z).reindex(columns=subs)
|
||
|
|
||
|
X[block1] += Z
|
||
|
result2 = X.reindex(columns=subs)
|
||
|
|
||
|
X = X_orig.copy()
|
||
|
result3 = (X[block1] + Z[block1]).reindex(columns=subs)
|
||
|
|
||
|
X[block1] += Z[block1]
|
||
|
result4 = X.reindex(columns=subs)
|
||
|
|
||
|
assert_frame_equal(result1, result2)
|
||
|
assert_frame_equal(result1, result3)
|
||
|
assert_frame_equal(result1, result4)
|
||
|
|
||
|
# sub
|
||
|
X = X_orig.copy()
|
||
|
result1 = (X[block1] - Z).reindex(columns=subs)
|
||
|
|
||
|
X[block1] -= Z
|
||
|
result2 = X.reindex(columns=subs)
|
||
|
|
||
|
X = X_orig.copy()
|
||
|
result3 = (X[block1] - Z[block1]).reindex(columns=subs)
|
||
|
|
||
|
X[block1] -= Z[block1]
|
||
|
result4 = X.reindex(columns=subs)
|
||
|
|
||
|
assert_frame_equal(result1, result2)
|
||
|
assert_frame_equal(result1, result3)
|
||
|
assert_frame_equal(result1, result4)
|
||
|
|
||
|
def test_inplace_ops_identity(self):
|
||
|
|
||
|
# GH 5104
|
||
|
# make sure that we are actually changing the object
|
||
|
s_orig = Series([1, 2, 3])
|
||
|
df_orig = DataFrame(np.random.randint(0, 5, size=10).reshape(-1, 5))
|
||
|
|
||
|
# no dtype change
|
||
|
s = s_orig.copy()
|
||
|
s2 = s
|
||
|
s += 1
|
||
|
assert_series_equal(s, s2)
|
||
|
assert_series_equal(s_orig + 1, s)
|
||
|
assert s is s2
|
||
|
assert s._data is s2._data
|
||
|
|
||
|
df = df_orig.copy()
|
||
|
df2 = df
|
||
|
df += 1
|
||
|
assert_frame_equal(df, df2)
|
||
|
assert_frame_equal(df_orig + 1, df)
|
||
|
assert df is df2
|
||
|
assert df._data is df2._data
|
||
|
|
||
|
# dtype change
|
||
|
s = s_orig.copy()
|
||
|
s2 = s
|
||
|
s += 1.5
|
||
|
assert_series_equal(s, s2)
|
||
|
assert_series_equal(s_orig + 1.5, s)
|
||
|
|
||
|
df = df_orig.copy()
|
||
|
df2 = df
|
||
|
df += 1.5
|
||
|
assert_frame_equal(df, df2)
|
||
|
assert_frame_equal(df_orig + 1.5, df)
|
||
|
assert df is df2
|
||
|
assert df._data is df2._data
|
||
|
|
||
|
# mixed dtype
|
||
|
arr = np.random.randint(0, 10, size=5)
|
||
|
df_orig = DataFrame({'A': arr.copy(), 'B': 'foo'})
|
||
|
df = df_orig.copy()
|
||
|
df2 = df
|
||
|
df['A'] += 1
|
||
|
expected = DataFrame({'A': arr.copy() + 1, 'B': 'foo'})
|
||
|
assert_frame_equal(df, expected)
|
||
|
assert_frame_equal(df2, expected)
|
||
|
assert df._data is df2._data
|
||
|
|
||
|
df = df_orig.copy()
|
||
|
df2 = df
|
||
|
df['A'] += 1.5
|
||
|
expected = DataFrame({'A': arr.copy() + 1.5, 'B': 'foo'})
|
||
|
assert_frame_equal(df, expected)
|
||
|
assert_frame_equal(df2, expected)
|
||
|
assert df._data is df2._data
|
||
|
|
||
|
@pytest.mark.parametrize('op', ['add', 'and', 'div', 'floordiv', 'mod',
|
||
|
'mul', 'or', 'pow', 'sub', 'truediv',
|
||
|
'xor'])
|
||
|
def test_inplace_ops_identity2(self, op):
|
||
|
|
||
|
if compat.PY3 and op == 'div':
|
||
|
return
|
||
|
|
||
|
df = DataFrame({'a': [1., 2., 3.],
|
||
|
'b': [1, 2, 3]})
|
||
|
|
||
|
operand = 2
|
||
|
if op in ('and', 'or', 'xor'):
|
||
|
# cannot use floats for boolean ops
|
||
|
df['a'] = [True, False, True]
|
||
|
|
||
|
df_copy = df.copy()
|
||
|
iop = '__i{}__'.format(op)
|
||
|
op = '__{}__'.format(op)
|
||
|
|
||
|
# no id change and value is correct
|
||
|
getattr(df, iop)(operand)
|
||
|
expected = getattr(df_copy, op)(operand)
|
||
|
assert_frame_equal(df, expected)
|
||
|
expected = id(df)
|
||
|
assert id(df) == expected
|
||
|
|
||
|
def test_alignment_non_pandas(self):
|
||
|
index = ['A', 'B', 'C']
|
||
|
columns = ['X', 'Y', 'Z']
|
||
|
df = pd.DataFrame(np.random.randn(3, 3), index=index, columns=columns)
|
||
|
|
||
|
align = pd.core.ops._align_method_FRAME
|
||
|
for val in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype=np.int64),
|
||
|
range(1, 4)]:
|
||
|
|
||
|
tm.assert_series_equal(align(df, val, 'index'),
|
||
|
Series([1, 2, 3], index=df.index))
|
||
|
tm.assert_series_equal(align(df, val, 'columns'),
|
||
|
Series([1, 2, 3], index=df.columns))
|
||
|
|
||
|
# length mismatch
|
||
|
msg = 'Unable to coerce to Series, length must be 3: given 2'
|
||
|
for val in [[1, 2], (1, 2), np.array([1, 2]), range(1, 3)]:
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
align(df, val, 'index')
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
align(df, val, 'columns')
|
||
|
|
||
|
val = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||
|
tm.assert_frame_equal(align(df, val, 'index'),
|
||
|
DataFrame(val, index=df.index,
|
||
|
columns=df.columns))
|
||
|
tm.assert_frame_equal(align(df, val, 'columns'),
|
||
|
DataFrame(val, index=df.index,
|
||
|
columns=df.columns))
|
||
|
|
||
|
# shape mismatch
|
||
|
msg = 'Unable to coerce to DataFrame, shape must be'
|
||
|
val = np.array([[1, 2, 3], [4, 5, 6]])
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
align(df, val, 'index')
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
align(df, val, 'columns')
|
||
|
|
||
|
val = np.zeros((3, 3, 3))
|
||
|
with pytest.raises(ValueError):
|
||
|
align(df, val, 'index')
|
||
|
with pytest.raises(ValueError):
|
||
|
align(df, val, 'columns')
|