277 lines
11 KiB
Python
277 lines
11 KiB
Python
# -*- coding: utf-8 -*-
|
|
import pytest
|
|
import numpy as np
|
|
|
|
from pandas.compat import range
|
|
|
|
import pandas as pd
|
|
import pandas.util.testing as tm
|
|
|
|
|
|
# -------------------------------------------------------------------
|
|
# Comparisons
|
|
|
|
class TestFrameComparisons(object):
|
|
def test_df_boolean_comparison_error(self):
|
|
# GH#4576
|
|
# boolean comparisons with a tuple/list give unexpected results
|
|
df = pd.DataFrame(np.arange(6).reshape((3, 2)))
|
|
|
|
# not shape compatible
|
|
with pytest.raises(ValueError):
|
|
df == (2, 2)
|
|
with pytest.raises(ValueError):
|
|
df == [2, 2]
|
|
|
|
def test_df_float_none_comparison(self):
|
|
df = pd.DataFrame(np.random.randn(8, 3), index=range(8),
|
|
columns=['A', 'B', 'C'])
|
|
|
|
with pytest.raises(TypeError):
|
|
df.__eq__(None)
|
|
|
|
def test_df_string_comparison(self):
|
|
df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}])
|
|
mask_a = df.a > 1
|
|
tm.assert_frame_equal(df[mask_a], df.loc[1:1, :])
|
|
tm.assert_frame_equal(df[-mask_a], df.loc[0:0, :])
|
|
|
|
mask_b = df.b == "foo"
|
|
tm.assert_frame_equal(df[mask_b], df.loc[0:0, :])
|
|
tm.assert_frame_equal(df[-mask_b], df.loc[1:1, :])
|
|
|
|
@pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le'])
|
|
def test_df_flex_cmp_constant_return_types(self, opname):
|
|
# GH#15077, non-empty DataFrame
|
|
df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]})
|
|
const = 2
|
|
|
|
result = getattr(df, opname)(const).get_dtype_counts()
|
|
tm.assert_series_equal(result, pd.Series([2], ['bool']))
|
|
|
|
@pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le'])
|
|
def test_df_flex_cmp_constant_return_types_empty(self, opname):
|
|
# GH#15077 empty DataFrame
|
|
df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]})
|
|
const = 2
|
|
|
|
empty = df.iloc[:0]
|
|
result = getattr(empty, opname)(const).get_dtype_counts()
|
|
tm.assert_series_equal(result, pd.Series([2], ['bool']))
|
|
|
|
@pytest.mark.parametrize('timestamps', [
|
|
[pd.Timestamp('2012-01-01 13:00:00+00:00')] * 2,
|
|
[pd.Timestamp('2012-01-01 13:00:00')] * 2])
|
|
def test_tz_aware_scalar_comparison(self, timestamps):
|
|
# Test for issue #15966
|
|
df = pd.DataFrame({'test': timestamps})
|
|
expected = pd.DataFrame({'test': [False, False]})
|
|
tm.assert_frame_equal(df == -1, expected)
|
|
|
|
|
|
# -------------------------------------------------------------------
|
|
# Arithmetic
|
|
|
|
class TestFrameFlexArithmetic(object):
|
|
def test_df_add_flex_filled_mixed_dtypes(self):
|
|
# GH#19611
|
|
dti = pd.date_range('2016-01-01', periods=3)
|
|
ser = pd.Series(['1 Day', 'NaT', '2 Days'], dtype='timedelta64[ns]')
|
|
df = pd.DataFrame({'A': dti, 'B': ser})
|
|
other = pd.DataFrame({'A': ser, 'B': ser})
|
|
fill = pd.Timedelta(days=1).to_timedelta64()
|
|
result = df.add(other, fill_value=fill)
|
|
|
|
expected = pd.DataFrame(
|
|
{'A': pd.Series(['2016-01-02', '2016-01-03', '2016-01-05'],
|
|
dtype='datetime64[ns]'),
|
|
'B': ser * 2})
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
class TestFrameMulDiv(object):
|
|
"""Tests for DataFrame multiplication and division"""
|
|
# ------------------------------------------------------------------
|
|
# Mod By Zero
|
|
|
|
def test_df_mod_zero_df(self):
|
|
# GH#3590, modulo as ints
|
|
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
|
|
|
|
# this is technically wrong, as the integer portion is coerced to float
|
|
# ###
|
|
first = pd.Series([0, 0, 0, 0], dtype='float64')
|
|
second = pd.Series([np.nan, np.nan, np.nan, 0])
|
|
expected = pd.DataFrame({'first': first, 'second': second})
|
|
result = df % df
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_df_mod_zero_array(self):
|
|
# GH#3590, modulo as ints
|
|
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
|
|
|
|
# this is technically wrong, as the integer portion is coerced to float
|
|
# ###
|
|
first = pd.Series([0, 0, 0, 0], dtype='float64')
|
|
second = pd.Series([np.nan, np.nan, np.nan, 0])
|
|
expected = pd.DataFrame({'first': first, 'second': second})
|
|
|
|
# numpy has a slightly different (wrong) treatment
|
|
with np.errstate(all='ignore'):
|
|
arr = df.values % df.values
|
|
result2 = pd.DataFrame(arr, index=df.index,
|
|
columns=df.columns, dtype='float64')
|
|
result2.iloc[0:3, 1] = np.nan
|
|
tm.assert_frame_equal(result2, expected)
|
|
|
|
def test_df_mod_zero_int(self):
|
|
# GH#3590, modulo as ints
|
|
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
|
|
|
|
result = df % 0
|
|
expected = pd.DataFrame(np.nan, index=df.index, columns=df.columns)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# numpy has a slightly different (wrong) treatment
|
|
with np.errstate(all='ignore'):
|
|
arr = df.values.astype('float64') % 0
|
|
result2 = pd.DataFrame(arr, index=df.index, columns=df.columns)
|
|
tm.assert_frame_equal(result2, expected)
|
|
|
|
def test_df_mod_zero_series_does_not_commute(self):
|
|
# GH#3590, modulo as ints
|
|
# not commutative with series
|
|
df = pd.DataFrame(np.random.randn(10, 5))
|
|
ser = df[0]
|
|
res = ser % df
|
|
res2 = df % ser
|
|
assert not res.fillna(0).equals(res2.fillna(0))
|
|
|
|
# ------------------------------------------------------------------
|
|
# Division By Zero
|
|
|
|
def test_df_div_zero_df(self):
|
|
# integer div, but deal with the 0's (GH#9144)
|
|
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
|
|
result = df / df
|
|
|
|
first = pd.Series([1.0, 1.0, 1.0, 1.0])
|
|
second = pd.Series([np.nan, np.nan, np.nan, 1])
|
|
expected = pd.DataFrame({'first': first, 'second': second})
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_df_div_zero_array(self):
|
|
# integer div, but deal with the 0's (GH#9144)
|
|
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
|
|
|
|
first = pd.Series([1.0, 1.0, 1.0, 1.0])
|
|
second = pd.Series([np.nan, np.nan, np.nan, 1])
|
|
expected = pd.DataFrame({'first': first, 'second': second})
|
|
|
|
with np.errstate(all='ignore'):
|
|
arr = df.values.astype('float') / df.values
|
|
result = pd.DataFrame(arr, index=df.index,
|
|
columns=df.columns)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_df_div_zero_int(self):
|
|
# integer div, but deal with the 0's (GH#9144)
|
|
df = pd.DataFrame({'first': [3, 4, 5, 8], 'second': [0, 0, 0, 3]})
|
|
|
|
result = df / 0
|
|
expected = pd.DataFrame(np.inf, index=df.index, columns=df.columns)
|
|
expected.iloc[0:3, 1] = np.nan
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# numpy has a slightly different (wrong) treatment
|
|
with np.errstate(all='ignore'):
|
|
arr = df.values.astype('float64') / 0
|
|
result2 = pd.DataFrame(arr, index=df.index,
|
|
columns=df.columns)
|
|
tm.assert_frame_equal(result2, expected)
|
|
|
|
def test_df_div_zero_series_does_not_commute(self):
|
|
# integer div, but deal with the 0's (GH#9144)
|
|
df = pd.DataFrame(np.random.randn(10, 5))
|
|
ser = df[0]
|
|
res = ser / df
|
|
res2 = df / ser
|
|
assert not res.fillna(0).equals(res2.fillna(0))
|
|
|
|
|
|
class TestFrameArithmetic(object):
|
|
|
|
@pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano')
|
|
def test_df_sub_datetime64_not_ns(self):
|
|
df = pd.DataFrame(pd.date_range('20130101', periods=3))
|
|
dt64 = np.datetime64('2013-01-01')
|
|
assert dt64.dtype == 'datetime64[D]'
|
|
res = df - dt64
|
|
expected = pd.DataFrame([pd.Timedelta(days=0), pd.Timedelta(days=1),
|
|
pd.Timedelta(days=2)])
|
|
tm.assert_frame_equal(res, expected)
|
|
|
|
@pytest.mark.parametrize('data', [
|
|
[1, 2, 3],
|
|
[1.1, 2.2, 3.3],
|
|
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'), pd.NaT],
|
|
['x', 'y', 1]])
|
|
@pytest.mark.parametrize('dtype', [None, object])
|
|
def test_df_radd_str_invalid(self, dtype, data):
|
|
df = pd.DataFrame(data, dtype=dtype)
|
|
with pytest.raises(TypeError):
|
|
'foo_' + df
|
|
|
|
@pytest.mark.parametrize('dtype', [None, object])
|
|
def test_df_with_dtype_radd_int(self, dtype):
|
|
df = pd.DataFrame([1, 2, 3], dtype=dtype)
|
|
expected = pd.DataFrame([2, 3, 4], dtype=dtype)
|
|
result = 1 + df
|
|
tm.assert_frame_equal(result, expected)
|
|
result = df + 1
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize('dtype', [None, object])
|
|
def test_df_with_dtype_radd_nan(self, dtype):
|
|
df = pd.DataFrame([1, 2, 3], dtype=dtype)
|
|
expected = pd.DataFrame([np.nan, np.nan, np.nan], dtype=dtype)
|
|
result = np.nan + df
|
|
tm.assert_frame_equal(result, expected)
|
|
result = df + np.nan
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_df_radd_str(self):
|
|
df = pd.DataFrame(['x', np.nan, 'x'])
|
|
tm.assert_frame_equal('a' + df, pd.DataFrame(['ax', np.nan, 'ax']))
|
|
tm.assert_frame_equal(df + 'a', pd.DataFrame(['xa', np.nan, 'xa']))
|
|
|
|
|
|
class TestPeriodFrameArithmetic(object):
|
|
|
|
def test_ops_frame_period(self):
|
|
# GH 13043
|
|
df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'),
|
|
pd.Period('2015-02', freq='M')],
|
|
'B': [pd.Period('2014-01', freq='M'),
|
|
pd.Period('2014-02', freq='M')]})
|
|
assert df['A'].dtype == object
|
|
assert df['B'].dtype == object
|
|
|
|
p = pd.Period('2015-03', freq='M')
|
|
# dtype will be object because of original dtype
|
|
exp = pd.DataFrame({'A': np.array([2, 1], dtype=object),
|
|
'B': np.array([14, 13], dtype=object)})
|
|
tm.assert_frame_equal(p - df, exp)
|
|
tm.assert_frame_equal(df - p, -1 * exp)
|
|
|
|
df2 = pd.DataFrame({'A': [pd.Period('2015-05', freq='M'),
|
|
pd.Period('2015-06', freq='M')],
|
|
'B': [pd.Period('2015-05', freq='M'),
|
|
pd.Period('2015-06', freq='M')]})
|
|
assert df2['A'].dtype == object
|
|
assert df2['B'].dtype == object
|
|
|
|
exp = pd.DataFrame({'A': np.array([4, 4], dtype=object),
|
|
'B': np.array([16, 16], dtype=object)})
|
|
tm.assert_frame_equal(df2 - df, exp)
|
|
tm.assert_frame_equal(df - df2, -1 * exp)
|