445 lines
17 KiB
Python
445 lines
17 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
from __future__ import print_function
|
||
|
# pylint: disable-msg=W0612,E1101
|
||
|
|
||
|
from warnings import catch_warnings
|
||
|
import re
|
||
|
import operator
|
||
|
import pytest
|
||
|
|
||
|
from numpy.random import randn
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from pandas.core.api import DataFrame, Panel
|
||
|
from pandas.core.computation import expressions as expr
|
||
|
from pandas import compat, _np_version_under1p11, _np_version_under1p13
|
||
|
from pandas.util.testing import (assert_almost_equal, assert_series_equal,
|
||
|
assert_frame_equal, assert_panel_equal)
|
||
|
from pandas.io.formats.printing import pprint_thing
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
|
||
|
_frame = DataFrame(randn(10000, 4), columns=list('ABCD'), dtype='float64')
|
||
|
_frame2 = DataFrame(randn(100, 4), columns=list('ABCD'), dtype='float64')
|
||
|
_mixed = DataFrame({'A': _frame['A'].copy(),
|
||
|
'B': _frame['B'].astype('float32'),
|
||
|
'C': _frame['C'].astype('int64'),
|
||
|
'D': _frame['D'].astype('int32')})
|
||
|
_mixed2 = DataFrame({'A': _frame2['A'].copy(),
|
||
|
'B': _frame2['B'].astype('float32'),
|
||
|
'C': _frame2['C'].astype('int64'),
|
||
|
'D': _frame2['D'].astype('int32')})
|
||
|
_integer = DataFrame(
|
||
|
np.random.randint(1, 100,
|
||
|
size=(10001, 4)),
|
||
|
columns=list('ABCD'), dtype='int64')
|
||
|
_integer2 = DataFrame(np.random.randint(1, 100, size=(101, 4)),
|
||
|
columns=list('ABCD'), dtype='int64')
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
_frame_panel = Panel(dict(ItemA=_frame.copy(),
|
||
|
ItemB=(_frame.copy() + 3),
|
||
|
ItemC=_frame.copy(),
|
||
|
ItemD=_frame.copy()))
|
||
|
_frame2_panel = Panel(dict(ItemA=_frame2.copy(),
|
||
|
ItemB=(_frame2.copy() + 3),
|
||
|
ItemC=_frame2.copy(),
|
||
|
ItemD=_frame2.copy()))
|
||
|
_integer_panel = Panel(dict(ItemA=_integer,
|
||
|
ItemB=(_integer + 34).astype('int64')))
|
||
|
_integer2_panel = Panel(dict(ItemA=_integer2,
|
||
|
ItemB=(_integer2 + 34).astype('int64')))
|
||
|
_mixed_panel = Panel(dict(ItemA=_mixed, ItemB=(_mixed + 3)))
|
||
|
_mixed2_panel = Panel(dict(ItemA=_mixed2, ItemB=(_mixed2 + 3)))
|
||
|
|
||
|
|
||
|
@pytest.mark.skipif(not expr._USE_NUMEXPR, reason='not using numexpr')
|
||
|
class TestExpressions(object):
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
|
||
|
self.frame = _frame.copy()
|
||
|
self.frame2 = _frame2.copy()
|
||
|
self.mixed = _mixed.copy()
|
||
|
self.mixed2 = _mixed2.copy()
|
||
|
self.integer = _integer.copy()
|
||
|
self._MIN_ELEMENTS = expr._MIN_ELEMENTS
|
||
|
|
||
|
def teardown_method(self, method):
|
||
|
expr._MIN_ELEMENTS = self._MIN_ELEMENTS
|
||
|
|
||
|
def run_arithmetic(self, df, other, assert_func, check_dtype=False,
|
||
|
test_flex=True):
|
||
|
expr._MIN_ELEMENTS = 0
|
||
|
operations = ['add', 'sub', 'mul', 'mod', 'truediv', 'floordiv']
|
||
|
if not compat.PY3:
|
||
|
operations.append('div')
|
||
|
for arith in operations:
|
||
|
|
||
|
operator_name = arith
|
||
|
if arith == 'div':
|
||
|
operator_name = 'truediv'
|
||
|
|
||
|
if test_flex:
|
||
|
op = lambda x, y: getattr(df, arith)(y)
|
||
|
op.__name__ = arith
|
||
|
else:
|
||
|
op = getattr(operator, operator_name)
|
||
|
expr.set_use_numexpr(False)
|
||
|
expected = op(df, other)
|
||
|
expr.set_use_numexpr(True)
|
||
|
|
||
|
result = op(df, other)
|
||
|
try:
|
||
|
if check_dtype:
|
||
|
if arith == 'truediv':
|
||
|
assert expected.dtype.kind == 'f'
|
||
|
assert_func(expected, result)
|
||
|
except Exception:
|
||
|
pprint_thing("Failed test with operator %r" % op.__name__)
|
||
|
raise
|
||
|
|
||
|
def test_integer_arithmetic(self):
|
||
|
self.run_arithmetic(self.integer, self.integer,
|
||
|
assert_frame_equal)
|
||
|
self.run_arithmetic(self.integer.iloc[:, 0],
|
||
|
self.integer.iloc[:, 0], assert_series_equal,
|
||
|
check_dtype=True)
|
||
|
|
||
|
def run_binary(self, df, other, assert_func, test_flex=False,
|
||
|
numexpr_ops=set(['gt', 'lt', 'ge', 'le', 'eq', 'ne'])):
|
||
|
"""
|
||
|
tests solely that the result is the same whether or not numexpr is
|
||
|
enabled. Need to test whether the function does the correct thing
|
||
|
elsewhere.
|
||
|
"""
|
||
|
expr._MIN_ELEMENTS = 0
|
||
|
expr.set_test_mode(True)
|
||
|
operations = ['gt', 'lt', 'ge', 'le', 'eq', 'ne']
|
||
|
|
||
|
for arith in operations:
|
||
|
if test_flex:
|
||
|
op = lambda x, y: getattr(df, arith)(y)
|
||
|
op.__name__ = arith
|
||
|
else:
|
||
|
op = getattr(operator, arith)
|
||
|
expr.set_use_numexpr(False)
|
||
|
expected = op(df, other)
|
||
|
expr.set_use_numexpr(True)
|
||
|
expr.get_test_result()
|
||
|
result = op(df, other)
|
||
|
used_numexpr = expr.get_test_result()
|
||
|
try:
|
||
|
if arith in numexpr_ops:
|
||
|
assert used_numexpr, "Did not use numexpr as expected."
|
||
|
else:
|
||
|
assert not used_numexpr, "Used numexpr unexpectedly."
|
||
|
assert_func(expected, result)
|
||
|
except Exception:
|
||
|
pprint_thing("Failed test with operation %r" % arith)
|
||
|
pprint_thing("test_flex was %r" % test_flex)
|
||
|
raise
|
||
|
|
||
|
def run_frame(self, df, other, binary_comp=None, run_binary=True,
|
||
|
**kwargs):
|
||
|
self.run_arithmetic(df, other, assert_frame_equal,
|
||
|
test_flex=False, **kwargs)
|
||
|
self.run_arithmetic(df, other, assert_frame_equal, test_flex=True,
|
||
|
**kwargs)
|
||
|
if run_binary:
|
||
|
if binary_comp is None:
|
||
|
expr.set_use_numexpr(False)
|
||
|
binary_comp = other + 1
|
||
|
expr.set_use_numexpr(True)
|
||
|
self.run_binary(df, binary_comp, assert_frame_equal,
|
||
|
test_flex=False, **kwargs)
|
||
|
self.run_binary(df, binary_comp, assert_frame_equal,
|
||
|
test_flex=True, **kwargs)
|
||
|
|
||
|
def run_series(self, ser, other, binary_comp=None, **kwargs):
|
||
|
self.run_arithmetic(ser, other, assert_series_equal,
|
||
|
test_flex=False, **kwargs)
|
||
|
self.run_arithmetic(ser, other, assert_almost_equal,
|
||
|
test_flex=True, **kwargs)
|
||
|
# series doesn't uses vec_compare instead of numexpr...
|
||
|
# if binary_comp is None:
|
||
|
# binary_comp = other + 1
|
||
|
# self.run_binary(ser, binary_comp, assert_frame_equal,
|
||
|
# test_flex=False, **kwargs)
|
||
|
# self.run_binary(ser, binary_comp, assert_frame_equal,
|
||
|
# test_flex=True, **kwargs)
|
||
|
|
||
|
def run_panel(self, panel, other, binary_comp=None, run_binary=True,
|
||
|
assert_func=assert_panel_equal, **kwargs):
|
||
|
self.run_arithmetic(panel, other, assert_func, test_flex=False,
|
||
|
**kwargs)
|
||
|
self.run_arithmetic(panel, other, assert_func, test_flex=True,
|
||
|
**kwargs)
|
||
|
if run_binary:
|
||
|
if binary_comp is None:
|
||
|
binary_comp = other + 1
|
||
|
self.run_binary(panel, binary_comp, assert_func,
|
||
|
test_flex=False, **kwargs)
|
||
|
self.run_binary(panel, binary_comp, assert_func,
|
||
|
test_flex=True, **kwargs)
|
||
|
|
||
|
def test_integer_arithmetic_frame(self):
|
||
|
self.run_frame(self.integer, self.integer)
|
||
|
|
||
|
def test_integer_arithmetic_series(self):
|
||
|
self.run_series(self.integer.iloc[:, 0], self.integer.iloc[:, 0])
|
||
|
|
||
|
@pytest.mark.slow
|
||
|
def test_integer_panel(self):
|
||
|
self.run_panel(_integer2_panel, np.random.randint(1, 100))
|
||
|
|
||
|
def test_float_arithemtic_frame(self):
|
||
|
self.run_frame(self.frame2, self.frame2)
|
||
|
|
||
|
def test_float_arithmetic_series(self):
|
||
|
self.run_series(self.frame2.iloc[:, 0], self.frame2.iloc[:, 0])
|
||
|
|
||
|
@pytest.mark.slow
|
||
|
def test_float_panel(self):
|
||
|
self.run_panel(_frame2_panel, np.random.randn() + 0.1, binary_comp=0.8)
|
||
|
|
||
|
def test_mixed_arithmetic_frame(self):
|
||
|
# TODO: FIGURE OUT HOW TO GET IT TO WORK...
|
||
|
# can't do arithmetic because comparison methods try to do *entire*
|
||
|
# frame instead of by-column
|
||
|
self.run_frame(self.mixed2, self.mixed2, run_binary=False)
|
||
|
|
||
|
def test_mixed_arithmetic_series(self):
|
||
|
for col in self.mixed2.columns:
|
||
|
self.run_series(self.mixed2[col], self.mixed2[col], binary_comp=4)
|
||
|
|
||
|
@pytest.mark.slow
|
||
|
def test_mixed_panel(self):
|
||
|
self.run_panel(_mixed2_panel, np.random.randint(1, 100),
|
||
|
binary_comp=-2)
|
||
|
|
||
|
def test_float_arithemtic(self):
|
||
|
self.run_arithmetic(self.frame, self.frame, assert_frame_equal)
|
||
|
self.run_arithmetic(self.frame.iloc[:, 0], self.frame.iloc[:, 0],
|
||
|
assert_series_equal, check_dtype=True)
|
||
|
|
||
|
def test_mixed_arithmetic(self):
|
||
|
self.run_arithmetic(self.mixed, self.mixed, assert_frame_equal)
|
||
|
for col in self.mixed.columns:
|
||
|
self.run_arithmetic(self.mixed[col], self.mixed[col],
|
||
|
assert_series_equal)
|
||
|
|
||
|
def test_integer_with_zeros(self):
|
||
|
self.integer *= np.random.randint(0, 2, size=np.shape(self.integer))
|
||
|
self.run_arithmetic(self.integer, self.integer,
|
||
|
assert_frame_equal)
|
||
|
self.run_arithmetic(self.integer.iloc[:, 0],
|
||
|
self.integer.iloc[:, 0], assert_series_equal)
|
||
|
|
||
|
def test_invalid(self):
|
||
|
|
||
|
# no op
|
||
|
result = expr._can_use_numexpr(operator.add, None, self.frame,
|
||
|
self.frame, 'evaluate')
|
||
|
assert not result
|
||
|
|
||
|
# mixed
|
||
|
result = expr._can_use_numexpr(operator.add, '+', self.mixed,
|
||
|
self.frame, 'evaluate')
|
||
|
assert not result
|
||
|
|
||
|
# min elements
|
||
|
result = expr._can_use_numexpr(operator.add, '+', self.frame2,
|
||
|
self.frame2, 'evaluate')
|
||
|
assert not result
|
||
|
|
||
|
# ok, we only check on first part of expression
|
||
|
result = expr._can_use_numexpr(operator.add, '+', self.frame,
|
||
|
self.frame2, 'evaluate')
|
||
|
assert result
|
||
|
|
||
|
def test_binary_ops(self):
|
||
|
def testit():
|
||
|
|
||
|
for f, f2 in [(self.frame, self.frame2),
|
||
|
(self.mixed, self.mixed2)]:
|
||
|
|
||
|
for op, op_str in [('add', '+'), ('sub', '-'), ('mul', '*'),
|
||
|
('div', '/'), ('pow', '**')]:
|
||
|
|
||
|
# numpy >= 1.11 doesn't handle integers
|
||
|
# raised to integer powers
|
||
|
# https://github.com/pandas-dev/pandas/issues/15363
|
||
|
if op == 'pow' and not _np_version_under1p11:
|
||
|
continue
|
||
|
|
||
|
if op == 'div':
|
||
|
op = getattr(operator, 'truediv', None)
|
||
|
else:
|
||
|
op = getattr(operator, op, None)
|
||
|
if op is not None:
|
||
|
result = expr._can_use_numexpr(op, op_str, f, f,
|
||
|
'evaluate')
|
||
|
assert result != f._is_mixed_type
|
||
|
|
||
|
result = expr.evaluate(op, op_str, f, f,
|
||
|
use_numexpr=True)
|
||
|
expected = expr.evaluate(op, op_str, f, f,
|
||
|
use_numexpr=False)
|
||
|
|
||
|
if isinstance(result, DataFrame):
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
else:
|
||
|
tm.assert_numpy_array_equal(result,
|
||
|
expected.values)
|
||
|
|
||
|
result = expr._can_use_numexpr(op, op_str, f2, f2,
|
||
|
'evaluate')
|
||
|
assert not result
|
||
|
|
||
|
expr.set_use_numexpr(False)
|
||
|
testit()
|
||
|
expr.set_use_numexpr(True)
|
||
|
expr.set_numexpr_threads(1)
|
||
|
testit()
|
||
|
expr.set_numexpr_threads()
|
||
|
testit()
|
||
|
|
||
|
def test_boolean_ops(self):
|
||
|
def testit():
|
||
|
for f, f2 in [(self.frame, self.frame2),
|
||
|
(self.mixed, self.mixed2)]:
|
||
|
|
||
|
f11 = f
|
||
|
f12 = f + 1
|
||
|
|
||
|
f21 = f2
|
||
|
f22 = f2 + 1
|
||
|
|
||
|
for op, op_str in [('gt', '>'), ('lt', '<'), ('ge', '>='),
|
||
|
('le', '<='), ('eq', '=='), ('ne', '!=')]:
|
||
|
|
||
|
op = getattr(operator, op)
|
||
|
|
||
|
result = expr._can_use_numexpr(op, op_str, f11, f12,
|
||
|
'evaluate')
|
||
|
assert result != f11._is_mixed_type
|
||
|
|
||
|
result = expr.evaluate(op, op_str, f11, f12,
|
||
|
use_numexpr=True)
|
||
|
expected = expr.evaluate(op, op_str, f11, f12,
|
||
|
use_numexpr=False)
|
||
|
if isinstance(result, DataFrame):
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
else:
|
||
|
tm.assert_numpy_array_equal(result, expected.values)
|
||
|
|
||
|
result = expr._can_use_numexpr(op, op_str, f21, f22,
|
||
|
'evaluate')
|
||
|
assert not result
|
||
|
|
||
|
expr.set_use_numexpr(False)
|
||
|
testit()
|
||
|
expr.set_use_numexpr(True)
|
||
|
expr.set_numexpr_threads(1)
|
||
|
testit()
|
||
|
expr.set_numexpr_threads()
|
||
|
testit()
|
||
|
|
||
|
def test_where(self):
|
||
|
def testit():
|
||
|
for f in [self.frame, self.frame2, self.mixed, self.mixed2]:
|
||
|
|
||
|
for cond in [True, False]:
|
||
|
|
||
|
c = np.empty(f.shape, dtype=np.bool_)
|
||
|
c.fill(cond)
|
||
|
result = expr.where(c, f.values, f.values + 1)
|
||
|
expected = np.where(c, f.values, f.values + 1)
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
expr.set_use_numexpr(False)
|
||
|
testit()
|
||
|
expr.set_use_numexpr(True)
|
||
|
expr.set_numexpr_threads(1)
|
||
|
testit()
|
||
|
expr.set_numexpr_threads()
|
||
|
testit()
|
||
|
|
||
|
def test_bool_ops_raise_on_arithmetic(self):
|
||
|
df = DataFrame({'a': np.random.rand(10) > 0.5,
|
||
|
'b': np.random.rand(10) > 0.5})
|
||
|
names = 'div', 'truediv', 'floordiv', 'pow'
|
||
|
ops = '/', '/', '//', '**'
|
||
|
msg = 'operator %r not implemented for bool dtypes'
|
||
|
for op, name in zip(ops, names):
|
||
|
if not compat.PY3 or name != 'div':
|
||
|
f = getattr(operator, name)
|
||
|
err_msg = re.escape(msg % op)
|
||
|
|
||
|
with tm.assert_raises_regex(NotImplementedError, err_msg):
|
||
|
f(df, df)
|
||
|
|
||
|
with tm.assert_raises_regex(NotImplementedError, err_msg):
|
||
|
f(df.a, df.b)
|
||
|
|
||
|
with tm.assert_raises_regex(NotImplementedError, err_msg):
|
||
|
f(df.a, True)
|
||
|
|
||
|
with tm.assert_raises_regex(NotImplementedError, err_msg):
|
||
|
f(False, df.a)
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, err_msg):
|
||
|
f(False, df)
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, err_msg):
|
||
|
f(df, True)
|
||
|
|
||
|
def test_bool_ops_warn_on_arithmetic(self):
|
||
|
n = 10
|
||
|
df = DataFrame({'a': np.random.rand(n) > 0.5,
|
||
|
'b': np.random.rand(n) > 0.5})
|
||
|
names = 'add', 'mul', 'sub'
|
||
|
ops = '+', '*', '-'
|
||
|
subs = {'+': '|', '*': '&', '-': '^'}
|
||
|
sub_funcs = {'|': 'or_', '&': 'and_', '^': 'xor'}
|
||
|
for op, name in zip(ops, names):
|
||
|
f = getattr(operator, name)
|
||
|
fe = getattr(operator, sub_funcs[subs[op]])
|
||
|
|
||
|
# >= 1.13.0 these are now TypeErrors
|
||
|
if op == '-' and not _np_version_under1p13:
|
||
|
continue
|
||
|
|
||
|
with tm.use_numexpr(True, min_elements=5):
|
||
|
with tm.assert_produces_warning(check_stacklevel=False):
|
||
|
r = f(df, df)
|
||
|
e = fe(df, df)
|
||
|
tm.assert_frame_equal(r, e)
|
||
|
|
||
|
with tm.assert_produces_warning(check_stacklevel=False):
|
||
|
r = f(df.a, df.b)
|
||
|
e = fe(df.a, df.b)
|
||
|
tm.assert_series_equal(r, e)
|
||
|
|
||
|
with tm.assert_produces_warning(check_stacklevel=False):
|
||
|
r = f(df.a, True)
|
||
|
e = fe(df.a, True)
|
||
|
tm.assert_series_equal(r, e)
|
||
|
|
||
|
with tm.assert_produces_warning(check_stacklevel=False):
|
||
|
r = f(False, df.a)
|
||
|
e = fe(False, df.a)
|
||
|
tm.assert_series_equal(r, e)
|
||
|
|
||
|
with tm.assert_produces_warning(check_stacklevel=False):
|
||
|
r = f(False, df)
|
||
|
e = fe(False, df)
|
||
|
tm.assert_frame_equal(r, e)
|
||
|
|
||
|
with tm.assert_produces_warning(check_stacklevel=False):
|
||
|
r = f(df, True)
|
||
|
e = fe(df, True)
|
||
|
tm.assert_frame_equal(r, e)
|