laywerrobot/lib/python3.6/site-packages/pandas/tests/indexing/test_coercion.py

921 lines
34 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
# -*- coding: utf-8 -*-
import itertools
import pytest
import numpy as np
import pandas as pd
import pandas.util.testing as tm
import pandas.compat as compat
###############################################################
# Index / Series common tests which may trigger dtype coercions
###############################################################
@pytest.fixture(autouse=True, scope='class')
def check_comprehensiveness(request):
# Iterate over combination of dtype, method and klass
# and ensure that each are contained within a collected test
cls = request.cls
combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
def has_test(combo):
klass, dtype, method = combo
cls_funcs = request.node.session.items
return any(klass in x.name and dtype in x.name and
method in x.name for x in cls_funcs)
for combo in combos:
if not has_test(combo):
msg = 'test method is not defined: {0}, {1}'
raise AssertionError(msg.format(type(cls), combo))
yield
class CoercionBase(object):
klasses = ['index', 'series']
dtypes = ['object', 'int64', 'float64', 'complex128', 'bool',
'datetime64', 'datetime64tz', 'timedelta64', 'period']
@property
def method(self):
raise NotImplementedError(self)
def _assert(self, left, right, dtype):
# explicitly check dtype to avoid any unexpected result
if isinstance(left, pd.Series):
tm.assert_series_equal(left, right)
elif isinstance(left, pd.Index):
tm.assert_index_equal(left, right)
else:
raise NotImplementedError
assert left.dtype == dtype
assert right.dtype == dtype
class TestSetitemCoercion(CoercionBase):
method = 'setitem'
def _assert_setitem_series_conversion(self, original_series, loc_value,
expected_series, expected_dtype):
""" test series value's coercion triggered by assignment """
temp = original_series.copy()
temp[1] = loc_value
tm.assert_series_equal(temp, expected_series)
# check dtype explicitly for sure
assert temp.dtype == expected_dtype
# .loc works different rule, temporary disable
# temp = original_series.copy()
# temp.loc[1] = loc_value
# tm.assert_series_equal(temp, expected_series)
@pytest.mark.parametrize("val,exp_dtype", [
(1, np.object),
(1.1, np.object),
(1 + 1j, np.object),
(True, np.object)])
def test_setitem_series_object(self, val, exp_dtype):
obj = pd.Series(list('abcd'))
assert obj.dtype == np.object
exp = pd.Series(['a', val, 'c', 'd'])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(1, np.int64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.object)])
def test_setitem_series_int64(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4])
assert obj.dtype == np.int64
if exp_dtype is np.float64:
exp = pd.Series([1, 1, 3, 4])
self._assert_setitem_series_conversion(obj, 1.1, exp, np.int64)
pytest.xfail("GH12747 The result must be float")
exp = pd.Series([1, val, 3, 4])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(np.int32(1), np.int8),
(np.int16(2**9), np.int16)])
def test_setitem_series_int8(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4], dtype=np.int8)
assert obj.dtype == np.int8
if exp_dtype is np.int16:
exp = pd.Series([1, 0, 3, 4], dtype=np.int8)
self._assert_setitem_series_conversion(obj, val, exp, np.int8)
pytest.xfail("BUG: it must be Series([1, 1, 3, 4], dtype=np.int16")
exp = pd.Series([1, val, 3, 4], dtype=np.int8)
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(1, np.float64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.object)])
def test_setitem_series_float64(self, val, exp_dtype):
obj = pd.Series([1.1, 2.2, 3.3, 4.4])
assert obj.dtype == np.float64
exp = pd.Series([1.1, val, 3.3, 4.4])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, np.object)])
def test_setitem_series_complex128(self, val, exp_dtype):
obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
assert obj.dtype == np.complex128
exp = pd.Series([1 + 1j, val, 3 + 3j, 4 + 4j])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(1, np.int64),
(3, np.int64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.bool)])
def test_setitem_series_bool(self, val, exp_dtype):
obj = pd.Series([True, False, True, False])
assert obj.dtype == np.bool
if exp_dtype is np.int64:
exp = pd.Series([True, True, True, False])
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
pytest.xfail("TODO_GH12747 The result must be int")
elif exp_dtype is np.float64:
exp = pd.Series([True, True, True, False])
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
pytest.xfail("TODO_GH12747 The result must be float")
elif exp_dtype is np.complex128:
exp = pd.Series([True, True, True, False])
self._assert_setitem_series_conversion(obj, val, exp, np.bool)
pytest.xfail("TODO_GH12747 The result must be complex")
exp = pd.Series([True, val, True, False])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
(1, np.object),
('x', np.object)])
def test_setitem_series_datetime64(self, val, exp_dtype):
obj = pd.Series([pd.Timestamp('2011-01-01'),
pd.Timestamp('2011-01-02'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
assert obj.dtype == 'datetime64[ns]'
exp = pd.Series([pd.Timestamp('2011-01-01'),
val,
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
'datetime64[ns, US/Eastern]'),
(pd.Timestamp('2012-01-01', tz='US/Pacific'), np.object),
(pd.Timestamp('2012-01-01'), np.object),
(1, np.object)])
def test_setitem_series_datetime64tz(self, val, exp_dtype):
tz = 'US/Eastern'
obj = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2011-01-02', tz=tz),
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
assert obj.dtype == 'datetime64[ns, US/Eastern]'
exp = pd.Series([pd.Timestamp('2011-01-01', tz=tz),
val,
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(pd.Timedelta('12 day'), 'timedelta64[ns]'),
(1, np.object),
('x', np.object)])
def test_setitem_series_timedelta64(self, val, exp_dtype):
obj = pd.Series([pd.Timedelta('1 day'),
pd.Timedelta('2 day'),
pd.Timedelta('3 day'),
pd.Timedelta('4 day')])
assert obj.dtype == 'timedelta64[ns]'
exp = pd.Series([pd.Timedelta('1 day'),
val,
pd.Timedelta('3 day'),
pd.Timedelta('4 day')])
self._assert_setitem_series_conversion(obj, val, exp, exp_dtype)
def _assert_setitem_index_conversion(self, original_series, loc_key,
expected_index, expected_dtype):
""" test index's coercion triggered by assign key """
temp = original_series.copy()
temp[loc_key] = 5
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
tm.assert_series_equal(temp, exp)
# check dtype explicitly for sure
assert temp.index.dtype == expected_dtype
temp = original_series.copy()
temp.loc[loc_key] = 5
exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
tm.assert_series_equal(temp, exp)
# check dtype explicitly for sure
assert temp.index.dtype == expected_dtype
@pytest.mark.parametrize("val,exp_dtype", [
('x', np.object),
(5, IndexError),
(1.1, np.object)])
def test_setitem_index_object(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4], index=list('abcd'))
assert obj.index.dtype == np.object
if exp_dtype is IndexError:
temp = obj.copy()
with pytest.raises(exp_dtype):
temp[5] = 5
else:
exp_index = pd.Index(list('abcd') + [val])
self._assert_setitem_index_conversion(obj, val, exp_index,
exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(5, np.int64),
(1.1, np.float64),
('x', np.object)])
def test_setitem_index_int64(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4])
assert obj.index.dtype == np.int64
exp_index = pd.Index([0, 1, 2, 3, val])
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
@pytest.mark.parametrize("val,exp_dtype", [
(5, IndexError),
(5.1, np.float64),
('x', np.object)])
def test_setitem_index_float64(self, val, exp_dtype):
obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1])
assert obj.index.dtype == np.float64
if exp_dtype is IndexError:
# float + int -> int
temp = obj.copy()
with pytest.raises(exp_dtype):
temp[5] = 5
pytest.xfail("TODO_GH12747 The result must be float")
exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val])
self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
def test_setitem_series_period(self):
pass
def test_setitem_index_complex128(self):
pass
def test_setitem_index_bool(self):
pass
def test_setitem_index_datetime64(self):
pass
def test_setitem_index_datetime64tz(self):
pass
def test_setitem_index_timedelta64(self):
pass
def test_setitem_index_period(self):
pass
class TestInsertIndexCoercion(CoercionBase):
klasses = ['index']
method = 'insert'
def _assert_insert_conversion(self, original, value,
expected, expected_dtype):
""" test coercion triggered by insert """
target = original.copy()
res = target.insert(1, value)
tm.assert_index_equal(res, expected)
assert res.dtype == expected_dtype
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
(1, 1, np.object),
(1.1, 1.1, np.object),
(False, False, np.object),
('x', 'x', np.object)])
def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
obj = pd.Index(list('abcd'))
assert obj.dtype == np.object
exp = pd.Index(['a', coerced_val, 'b', 'c', 'd'])
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
(1, 1, np.int64),
(1.1, 1.1, np.float64),
(False, 0, np.int64),
('x', 'x', np.object)])
def test_insert_index_int64(self, insert, coerced_val, coerced_dtype):
obj = pd.Int64Index([1, 2, 3, 4])
assert obj.dtype == np.int64
exp = pd.Index([1, coerced_val, 2, 3, 4])
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
(1, 1., np.float64),
(1.1, 1.1, np.float64),
(False, 0., np.float64),
('x', 'x', np.object)])
def test_insert_index_float64(self, insert, coerced_val, coerced_dtype):
obj = pd.Float64Index([1., 2., 3., 4.])
assert obj.dtype == np.float64
exp = pd.Index([1., coerced_val, 2., 3., 4.])
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
@pytest.mark.parametrize('fill_val,exp_dtype', [
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
'datetime64[ns, US/Eastern]')],
ids=['datetime64', 'datetime64tz'])
def test_insert_index_datetimes(self, fill_val, exp_dtype):
obj = pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03',
'2011-01-04'], tz=fill_val.tz)
assert obj.dtype == exp_dtype
exp = pd.DatetimeIndex(['2011-01-01', fill_val.date(), '2011-01-02',
'2011-01-03', '2011-01-04'], tz=fill_val.tz)
self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
msg = "Passed item and index have different timezone"
if fill_val.tz:
with tm.assert_raises_regex(ValueError, msg):
obj.insert(1, pd.Timestamp('2012-01-01'))
with tm.assert_raises_regex(ValueError, msg):
obj.insert(1, pd.Timestamp('2012-01-01', tz='Asia/Tokyo'))
msg = "cannot insert DatetimeIndex with incompatible label"
with tm.assert_raises_regex(TypeError, msg):
obj.insert(1, 1)
pytest.xfail("ToDo: must coerce to object")
def test_insert_index_timedelta64(self):
obj = pd.TimedeltaIndex(['1 day', '2 day', '3 day', '4 day'])
assert obj.dtype == 'timedelta64[ns]'
# timedelta64 + timedelta64 => timedelta64
exp = pd.TimedeltaIndex(['1 day', '10 day', '2 day', '3 day', '4 day'])
self._assert_insert_conversion(obj, pd.Timedelta('10 day'),
exp, 'timedelta64[ns]')
# ToDo: must coerce to object
msg = "cannot insert TimedeltaIndex with incompatible label"
with tm.assert_raises_regex(TypeError, msg):
obj.insert(1, pd.Timestamp('2012-01-01'))
# ToDo: must coerce to object
msg = "cannot insert TimedeltaIndex with incompatible label"
with tm.assert_raises_regex(TypeError, msg):
obj.insert(1, 1)
@pytest.mark.parametrize("insert, coerced_val, coerced_dtype", [
(pd.Period('2012-01', freq='M'), '2012-01', 'period[M]'),
(pd.Timestamp('2012-01-01'), pd.Timestamp('2012-01-01'), np.object),
(1, 1, np.object),
('x', 'x', np.object)])
def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
obj = pd.PeriodIndex(['2011-01', '2011-02', '2011-03', '2011-04'],
freq='M')
assert obj.dtype == 'period[M]'
if isinstance(insert, pd.Period):
index_type = pd.PeriodIndex
else:
index_type = pd.Index
exp = index_type([pd.Period('2011-01', freq='M'),
coerced_val,
pd.Period('2011-02', freq='M'),
pd.Period('2011-03', freq='M'),
pd.Period('2011-04', freq='M')], freq='M')
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
def test_insert_index_complex128(self):
pass
def test_insert_index_bool(self):
pass
class TestWhereCoercion(CoercionBase):
method = 'where'
def _assert_where_conversion(self, original, cond, values,
expected, expected_dtype):
""" test coercion triggered by where """
target = original.copy()
res = target.where(cond, values)
self._assert(res, expected, expected_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val,exp_dtype", [
(1, np.object),
(1.1, np.object),
(1 + 1j, np.object),
(True, np.object)])
def test_where_object(self, klass, fill_val, exp_dtype):
obj = klass(list('abcd'))
assert obj.dtype == np.object
cond = klass([True, False, True, False])
if fill_val is True and klass is pd.Series:
ret_val = 1
else:
ret_val = fill_val
exp = klass(['a', ret_val, 'c', ret_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = klass([True, False, True, True])
else:
values = klass(fill_val * x for x in [5, 6, 7, 8])
exp = klass(['a', values[1], 'c', values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val,exp_dtype", [
(1, np.int64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.object)])
def test_where_int64(self, klass, fill_val, exp_dtype):
if klass is pd.Index and exp_dtype is np.complex128:
pytest.skip("Complex Index not supported")
obj = klass([1, 2, 3, 4])
assert obj.dtype == np.int64
cond = klass([True, False, True, False])
exp = klass([1, fill_val, 3, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = klass([True, False, True, True])
else:
values = klass(x * fill_val for x in [5, 6, 7, 8])
exp = klass([1, values[1], 3, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val, exp_dtype", [
(1, np.float64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.object)])
def test_where_float64(self, klass, fill_val, exp_dtype):
if klass is pd.Index and exp_dtype is np.complex128:
pytest.skip("Complex Index not supported")
obj = klass([1.1, 2.2, 3.3, 4.4])
assert obj.dtype == np.float64
cond = klass([True, False, True, False])
exp = klass([1.1, fill_val, 3.3, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = klass([True, False, True, True])
else:
values = klass(x * fill_val for x in [5, 6, 7, 8])
exp = klass([1.1, values[1], 3.3, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("fill_val,exp_dtype", [
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, np.object)])
def test_where_series_complex128(self, fill_val, exp_dtype):
obj = pd.Series([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j])
assert obj.dtype == np.complex128
cond = pd.Series([True, False, True, False])
exp = pd.Series([1 + 1j, fill_val, 3 + 3j, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = pd.Series([True, False, True, True])
else:
values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
exp = pd.Series([1 + 1j, values[1], 3 + 3j, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("fill_val,exp_dtype", [
(1, np.object),
(1.1, np.object),
(1 + 1j, np.object),
(True, np.bool)])
def test_where_series_bool(self, fill_val, exp_dtype):
obj = pd.Series([True, False, True, False])
assert obj.dtype == np.bool
cond = pd.Series([True, False, True, False])
exp = pd.Series([True, fill_val, True, fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
if fill_val is True:
values = pd.Series([True, False, True, True])
else:
values = pd.Series(x * fill_val for x in [5, 6, 7, 8])
exp = pd.Series([True, values[1], True, values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("fill_val,exp_dtype", [
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)],
ids=['datetime64', 'datetime64tz'])
def test_where_series_datetime64(self, fill_val, exp_dtype):
obj = pd.Series([pd.Timestamp('2011-01-01'),
pd.Timestamp('2011-01-02'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
assert obj.dtype == 'datetime64[ns]'
cond = pd.Series([True, False, True, False])
exp = pd.Series([pd.Timestamp('2011-01-01'), fill_val,
pd.Timestamp('2011-01-03'), fill_val])
self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
values = pd.Series(pd.date_range(fill_val, periods=4))
if fill_val.tz:
exp = pd.Series([pd.Timestamp('2011-01-01'),
pd.Timestamp('2012-01-02 05:00'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2012-01-04 05:00')])
self._assert_where_conversion(obj, cond, values, exp,
'datetime64[ns]')
pytest.xfail("ToDo: do not coerce to UTC, must be object")
exp = pd.Series([pd.Timestamp('2011-01-01'), values[1],
pd.Timestamp('2011-01-03'), values[3]])
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
@pytest.mark.parametrize("fill_val,exp_dtype", [
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object)],
ids=['datetime64', 'datetime64tz'])
def test_where_index_datetime(self, fill_val, exp_dtype):
obj = pd.Index([pd.Timestamp('2011-01-01'),
pd.Timestamp('2011-01-02'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
assert obj.dtype == 'datetime64[ns]'
cond = pd.Index([True, False, True, False])
msg = ("Index\\(\\.\\.\\.\\) must be called with a collection "
"of some kind")
with tm.assert_raises_regex(TypeError, msg):
obj.where(cond, fill_val)
values = pd.Index(pd.date_range(fill_val, periods=4))
exp = pd.Index([pd.Timestamp('2011-01-01'),
pd.Timestamp('2012-01-02'),
pd.Timestamp('2011-01-03'),
pd.Timestamp('2012-01-04')])
if fill_val.tz:
self._assert_where_conversion(obj, cond, values, exp,
'datetime64[ns]')
pytest.xfail("ToDo: do not ignore timezone, must be object")
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
pytest.xfail("datetime64 + datetime64 -> datetime64 must support"
" scalar")
def test_where_index_complex128(self):
pass
def test_where_index_bool(self):
pass
def test_where_series_datetime64tz(self):
pass
def test_where_series_timedelta64(self):
pass
def test_where_series_period(self):
pass
def test_where_index_datetime64tz(self):
pass
def test_where_index_timedelta64(self):
pass
def test_where_index_period(self):
pass
class TestFillnaSeriesCoercion(CoercionBase):
# not indexing, but place here for consisntency
method = 'fillna'
def test_has_comprehensive_tests(self):
pass
def _assert_fillna_conversion(self, original, value,
expected, expected_dtype):
""" test coercion triggered by fillna """
target = original.copy()
res = target.fillna(value)
self._assert(res, expected, expected_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val, fill_dtype", [
(1, np.object),
(1.1, np.object),
(1 + 1j, np.object),
(True, np.object)])
def test_fillna_object(self, klass, fill_val, fill_dtype):
obj = klass(['a', np.nan, 'c', 'd'])
assert obj.dtype == np.object
exp = klass(['a', fill_val, 'c', 'd'])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val,fill_dtype", [
(1, np.float64),
(1.1, np.float64),
(1 + 1j, np.complex128),
(True, np.object)])
def test_fillna_float64(self, klass, fill_val, fill_dtype):
obj = klass([1.1, np.nan, 3.3, 4.4])
assert obj.dtype == np.float64
exp = klass([1.1, fill_val, 3.3, 4.4])
# float + complex -> we don't support a complex Index
# complex for Series,
# object for Index
if fill_dtype == np.complex128 and klass == pd.Index:
fill_dtype = np.object
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize("fill_val,fill_dtype", [
(1, np.complex128),
(1.1, np.complex128),
(1 + 1j, np.complex128),
(True, np.object)])
def test_fillna_series_complex128(self, fill_val, fill_dtype):
obj = pd.Series([1 + 1j, np.nan, 3 + 3j, 4 + 4j])
assert obj.dtype == np.complex128
exp = pd.Series([1 + 1j, fill_val, 3 + 3j, 4 + 4j])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index],
ids=['series', 'index'])
@pytest.mark.parametrize("fill_val,fill_dtype", [
(pd.Timestamp('2012-01-01'), 'datetime64[ns]'),
(pd.Timestamp('2012-01-01', tz='US/Eastern'), np.object),
(1, np.object), ('x', np.object)],
ids=['datetime64', 'datetime64tz', 'object', 'object'])
def test_fillna_datetime(self, klass, fill_val, fill_dtype):
obj = klass([pd.Timestamp('2011-01-01'),
pd.NaT,
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
assert obj.dtype == 'datetime64[ns]'
exp = klass([pd.Timestamp('2011-01-01'),
fill_val,
pd.Timestamp('2011-01-03'),
pd.Timestamp('2011-01-04')])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
@pytest.mark.parametrize("klass", [pd.Series, pd.Index])
@pytest.mark.parametrize("fill_val,fill_dtype", [
(pd.Timestamp('2012-01-01', tz='US/Eastern'),
'datetime64[ns, US/Eastern]'),
(pd.Timestamp('2012-01-01'), np.object),
(pd.Timestamp('2012-01-01', tz='Asia/Tokyo'), np.object),
(1, np.object),
('x', np.object)])
def test_fillna_datetime64tz(self, klass, fill_val, fill_dtype):
tz = 'US/Eastern'
obj = klass([pd.Timestamp('2011-01-01', tz=tz),
pd.NaT,
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
assert obj.dtype == 'datetime64[ns, US/Eastern]'
exp = klass([pd.Timestamp('2011-01-01', tz=tz),
fill_val,
pd.Timestamp('2011-01-03', tz=tz),
pd.Timestamp('2011-01-04', tz=tz)])
self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
def test_fillna_series_int64(self):
pass
def test_fillna_index_int64(self):
pass
def test_fillna_series_bool(self):
pass
def test_fillna_index_bool(self):
pass
def test_fillna_series_timedelta64(self):
pass
def test_fillna_series_period(self):
pass
def test_fillna_index_timedelta64(self):
pass
def test_fillna_index_period(self):
pass
class TestReplaceSeriesCoercion(CoercionBase):
klasses = ['series']
method = 'replace'
rep = {}
rep['object'] = ['a', 'b']
rep['int64'] = [4, 5]
rep['float64'] = [1.1, 2.2]
rep['complex128'] = [1 + 1j, 2 + 2j]
rep['bool'] = [True, False]
rep['datetime64[ns]'] = [pd.Timestamp('2011-01-01'),
pd.Timestamp('2011-01-03')]
for tz in ['UTC', 'US/Eastern']:
# to test tz => different tz replacement
key = 'datetime64[ns, {0}]'.format(tz)
rep[key] = [pd.Timestamp('2011-01-01', tz=tz),
pd.Timestamp('2011-01-03', tz=tz)]
rep['timedelta64[ns]'] = [pd.Timedelta('1 day'),
pd.Timedelta('2 day')]
@pytest.mark.parametrize('how', ['dict', 'series'])
@pytest.mark.parametrize('to_key', [
'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]',
'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]'
], ids=['object', 'int64', 'float64', 'complex128', 'bool',
'datetime64', 'datetime64tz', 'datetime64tz', 'timedelta64'])
@pytest.mark.parametrize('from_key', [
'object', 'int64', 'float64', 'complex128', 'bool', 'datetime64[ns]',
'datetime64[ns, UTC]', 'datetime64[ns, US/Eastern]', 'timedelta64[ns]']
)
def test_replace_series(self, how, to_key, from_key):
if from_key == 'bool' and how == 'series' and compat.PY3:
# doesn't work in PY3, though ...dict_from_bool works fine
pytest.skip("doesn't work as in PY3")
index = pd.Index([3, 4], name='xxx')
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
assert obj.dtype == from_key
if (from_key.startswith('datetime') and to_key.startswith('datetime')):
# tested below
return
elif from_key in ['datetime64[ns, US/Eastern]', 'datetime64[ns, UTC]']:
# tested below
return
if how == 'dict':
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
elif how == 'series':
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
else:
raise ValueError
result = obj.replace(replacer)
if ((from_key == 'float64' and to_key in ('int64')) or
(from_key == 'complex128' and
to_key in ('int64', 'float64'))):
if compat.is_platform_32bit() or compat.is_platform_windows():
pytest.skip("32-bit platform buggy: {0} -> {1}".format
(from_key, to_key))
# Expected: do not downcast by replacement
exp = pd.Series(self.rep[to_key], index=index,
name='yyy', dtype=from_key)
else:
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
assert exp.dtype == to_key
tm.assert_series_equal(result, exp)
# TODO(jbrockmendel) commented out to only have a single xfail printed
@pytest.mark.xfail(reason='GH #18376, tzawareness-compat bug '
'in BlockManager.replace_list')
# @pytest.mark.parametrize('how', ['dict', 'series'])
# @pytest.mark.parametrize('to_key', ['timedelta64[ns]', 'bool', 'object',
# 'complex128', 'float64', 'int64'])
# @pytest.mark.parametrize('from_key', ['datetime64[ns, UTC]',
# 'datetime64[ns, US/Eastern]'])
# def test_replace_series_datetime_tz(self, how, to_key, from_key):
def test_replace_series_datetime_tz(self):
how = 'series'
from_key = 'datetime64[ns, US/Eastern]'
to_key = 'timedelta64[ns]'
index = pd.Index([3, 4], name='xxx')
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
assert obj.dtype == from_key
if how == 'dict':
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
elif how == 'series':
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
else:
raise ValueError
result = obj.replace(replacer)
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
assert exp.dtype == to_key
tm.assert_series_equal(result, exp)
# TODO(jreback) commented out to only have a single xfail printed
@pytest.mark.xfail(reason="different tz, "
"currently mask_missing raises SystemError")
# @pytest.mark.parametrize('how', ['dict', 'series'])
# @pytest.mark.parametrize('to_key', [
# 'datetime64[ns]', 'datetime64[ns, UTC]',
# 'datetime64[ns, US/Eastern]'])
# @pytest.mark.parametrize('from_key', [
# 'datetime64[ns]', 'datetime64[ns, UTC]',
# 'datetime64[ns, US/Eastern]'])
# def test_replace_series_datetime_datetime(self, how, to_key, from_key):
def test_replace_series_datetime_datetime(self):
how = 'dict'
to_key = 'datetime64[ns]'
from_key = 'datetime64[ns]'
index = pd.Index([3, 4], name='xxx')
obj = pd.Series(self.rep[from_key], index=index, name='yyy')
assert obj.dtype == from_key
if how == 'dict':
replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
elif how == 'series':
replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
else:
raise ValueError
result = obj.replace(replacer)
exp = pd.Series(self.rep[to_key], index=index, name='yyy')
assert exp.dtype == to_key
tm.assert_series_equal(result, exp)
def test_replace_series_period(self):
pass