laywerrobot/lib/python3.6/site-packages/pandas/tests/frame/test_arithmetic.py
2020-08-27 21:55:39 +02:00

277 lines
11 KiB
Python

# -*- coding: utf-8 -*-
import pytest
import numpy as np
from pandas.compat import range
import pandas as pd
import pandas.util.testing as tm
# -------------------------------------------------------------------
# Comparisons
class TestFrameComparisons(object):
def test_df_boolean_comparison_error(self):
# GH#4576
# boolean comparisons with a tuple/list give unexpected results
df = pd.DataFrame(np.arange(6).reshape((3, 2)))
# not shape compatible
with pytest.raises(ValueError):
df == (2, 2)
with pytest.raises(ValueError):
df == [2, 2]
def test_df_float_none_comparison(self):
df = pd.DataFrame(np.random.randn(8, 3), index=range(8),
columns=['A', 'B', 'C'])
with pytest.raises(TypeError):
df.__eq__(None)
def test_df_string_comparison(self):
df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
mask_a = df.a > 1
tm.assert_frame_equal(df[mask_a], df.loc[1:1, :])
tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :])
mask_b = df.b == "foo"
tm.assert_frame_equal(df[mask_b], df.loc[0:0, :])
tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :])
@pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le'])
def test_df_flex_cmp_constant_return_types(self, opname):
# GH#15077, non-empty DataFrame
df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]})
const = 2
result = getattr(df, opname)(const).get_dtype_counts()
tm.assert_series_equal(result, pd.Series([2], ['bool']))
@pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le'])
def test_df_flex_cmp_constant_return_types_empty(self, opname):
# GH#15077 empty DataFrame
df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]})
const = 2
empty = df.iloc[:0]
result = getattr(empty, opname)(const).get_dtype_counts()
tm.assert_series_equal(result, pd.Series([2], ['bool']))
@pytest.mark.parametrize('timestamps', [
[pd.Timestamp('2012-01-01 13:00:00+00:00')] * 2,
[pd.Timestamp('2012-01-01 13:00:00')] * 2])
def test_tz_aware_scalar_comparison(self, timestamps):
# Test for issue #15966
df = pd.DataFrame({'test': timestamps})
expected = pd.DataFrame({'test': [False, False]})
tm.assert_frame_equal(df == -1, expected)
# -------------------------------------------------------------------
# Arithmetic
class TestFrameFlexArithmetic(object):
def test_df_add_flex_filled_mixed_dtypes(self):
# GH#19611
dti = pd.date_range('2016-01-01', periods=3)
ser = pd.Series(['1 Day', 'NaT', '2 Days'], dtype='timedelta64[ns]')
df = pd.DataFrame({'A': dti, 'B': ser})
other = pd.DataFrame({'A': ser, 'B': ser})
fill = pd.Timedelta(days=1).to_timedelta64()
result = df.add(other, fill_value=fill)
expected = pd.DataFrame(
{'A': pd.Series(['2016-01-02', '2016-01-03', '2016-01-05'],
dtype='datetime64[ns]'),
'B': ser * 2})
tm.assert_frame_equal(result, expected)
class TestFrameMulDiv(object):
"""Tests for DataFrame multiplication and division"""
# ------------------------------------------------------------------
# Mod By Zero
def test_df_mod_zero_df(self):
# GH#3590, modulo as ints
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
# this is technically wrong, as the integer portion is coerced to float
# ###
first = pd.Series([0, 0, 0, 0], dtype='float64')
second = pd.Series([np.nan, np.nan, np.nan, 0])
expected = pd.DataFrame({'first': first, 'second': second})
result = df % df
tm.assert_frame_equal(result, expected)
def test_df_mod_zero_array(self):
# GH#3590, modulo as ints
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
# this is technically wrong, as the integer portion is coerced to float
# ###
first = pd.Series([0, 0, 0, 0], dtype='float64')
second = pd.Series([np.nan, np.nan, np.nan, 0])
expected = pd.DataFrame({'first': first, 'second': second})
# numpy has a slightly different (wrong) treatment
with np.errstate(all='ignore'):
arr = df.values % df.values
result2 = pd.DataFrame(arr, index=df.index,
columns=df.columns, dtype='float64')
result2.iloc[0:3, 1] = np.nan
tm.assert_frame_equal(result2, expected)
def test_df_mod_zero_int(self):
# GH#3590, modulo as ints
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
result = df % 0
expected = pd.DataFrame(np.nan, index=df.index, columns=df.columns)
tm.assert_frame_equal(result, expected)
# numpy has a slightly different (wrong) treatment
with np.errstate(all='ignore'):
arr = df.values.astype('float64') % 0
result2 = pd.DataFrame(arr, index=df.index, columns=df.columns)
tm.assert_frame_equal(result2, expected)
def test_df_mod_zero_series_does_not_commute(self):
# GH#3590, modulo as ints
# not commutative with series
df = pd.DataFrame(np.random.randn(10, 5))
ser = df[0]
res = ser % df
res2 = df % ser
assert not res.fillna(0).equals(res2.fillna(0))
# ------------------------------------------------------------------
# Division By Zero
def test_df_div_zero_df(self):
# integer div, but deal with the 0's (GH#9144)
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
result = df / df
first = pd.Series([1.0, 1.0, 1.0, 1.0])
second = pd.Series([np.nan, np.nan, np.nan, 1])
expected = pd.DataFrame({'first': first, 'second': second})
tm.assert_frame_equal(result, expected)
def test_df_div_zero_array(self):
# integer div, but deal with the 0's (GH#9144)
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
first = pd.Series([1.0, 1.0, 1.0, 1.0])
second = pd.Series([np.nan, np.nan, np.nan, 1])
expected = pd.DataFrame({'first': first, 'second': second})
with np.errstate(all='ignore'):
arr = df.values.astype('float') / df.values
result = pd.DataFrame(arr, index=df.index,
columns=df.columns)
tm.assert_frame_equal(result, expected)
def test_df_div_zero_int(self):
# integer div, but deal with the 0's (GH#9144)
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
result = df / 0
expected = pd.DataFrame(np.inf, index=df.index, columns=df.columns)
expected.iloc[0:3, 1] = np.nan
tm.assert_frame_equal(result, expected)
# numpy has a slightly different (wrong) treatment
with np.errstate(all='ignore'):
arr = df.values.astype('float64') / 0
result2 = pd.DataFrame(arr, index=df.index,
columns=df.columns)
tm.assert_frame_equal(result2, expected)
def test_df_div_zero_series_does_not_commute(self):
# integer div, but deal with the 0's (GH#9144)
df = pd.DataFrame(np.random.randn(10, 5))
ser = df[0]
res = ser / df
res2 = df / ser
assert not res.fillna(0).equals(res2.fillna(0))
class TestFrameArithmetic(object):
@pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano')
def test_df_sub_datetime64_not_ns(self):
df = pd.DataFrame(pd.date_range('20130101', periods=3))
dt64 = np.datetime64('2013-01-01')
assert dt64.dtype == 'datetime64[D]'
res = df - dt64
expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1),
pd.Timedelta(days=2)])
tm.assert_frame_equal(res, expected)
@pytest.mark.parametrize('data', [
[1, 2, 3],
[1.1, 2.2, 3.3],
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT],
['x', 'y', 1]])
@pytest.mark.parametrize('dtype', [None, object])
def test_df_radd_str_invalid(self, dtype, data):
df = pd.DataFrame(data, dtype=dtype)
with pytest.raises(TypeError):
'foo_' + df
@pytest.mark.parametrize('dtype', [None, object])
def test_df_with_dtype_radd_int(self, dtype):
df = pd.DataFrame([1, 2, 3], dtype=dtype)
expected = pd.DataFrame([2, 3, 4], dtype=dtype)
result = 1 + df
tm.assert_frame_equal(result, expected)
result = df + 1
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize('dtype', [None, object])
def test_df_with_dtype_radd_nan(self, dtype):
df = pd.DataFrame([1, 2, 3], dtype=dtype)
expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype)
result = np.nan + df
tm.assert_frame_equal(result, expected)
result = df + np.nan
tm.assert_frame_equal(result, expected)
def test_df_radd_str(self):
df = pd.DataFrame(['x', np.nan, 'x'])
tm.assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax']))
tm.assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa']))
class TestPeriodFrameArithmetic(object):
def test_ops_frame_period(self):
# GH 13043
df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'),
pd.Period('2015-02', freq='M')],
'B': [pd.Period('2014-01', freq='M'),
pd.Period('2014-02', freq='M')]})
assert df['A'].dtype == object
assert df['B'].dtype == object
p = pd.Period('2015-03', freq='M')
# dtype will be object because of original dtype
exp = pd.DataFrame({'A': np.array([2, 1], dtype=object),
'B': np.array([14, 13], dtype=object)})
tm.assert_frame_equal(p - df, exp)
tm.assert_frame_equal(df - p, -1 * exp)
df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'),
pd.Period('2015-06', freq='M')],
'B': [pd.Period('2015-05', freq='M'),
pd.Period('2015-06', freq='M')]})
assert df2['A'].dtype == object
assert df2['B'].dtype == object
exp = pd.DataFrame({'A': np.array([4, 4], dtype=object),
'B': np.array([16, 16], dtype=object)})
tm.assert_frame_equal(df2 - df, exp)
tm.assert_frame_equal(df - df2, -1 * exp)