# coding=utf-8 # pylint: disable-msg=E1101,W0612 import pytest import pandas as pd import numpy as np from pandas import (Series, date_range, isna, Index, Timestamp) from pandas.compat import lrange, range from pandas.core.dtypes.common import is_integer from pandas.core.indexing import IndexingError from pandas.tseries.offsets import BDay from pandas.util.testing import (assert_series_equal) import pandas.util.testing as tm def test_getitem_boolean(test_data): s = test_data.series mask = s > s.median() # passing list is OK result = s[list(mask)] expected = s[mask] assert_series_equal(result, expected) tm.assert_index_equal(result.index, s.index[mask]) def test_getitem_boolean_empty(): s = Series([], dtype=np.int64) s.index.name = 'index_name' s = s[s.isna()] assert s.index.name == 'index_name' assert s.dtype == np.int64 # GH5877 # indexing with empty series s = Series(['A', 'B']) expected = Series(np.nan, index=['C'], dtype=object) result = s[Series(['C'], dtype=object)] assert_series_equal(result, expected) s = Series(['A', 'B']) expected = Series(dtype=object, index=Index([], dtype='int64')) result = s[Series([], dtype=object)] assert_series_equal(result, expected) # invalid because of the boolean indexer # that's empty or not-aligned def f(): s[Series([], dtype=bool)] pytest.raises(IndexingError, f) def f(): s[Series([True], dtype=bool)] pytest.raises(IndexingError, f) def test_getitem_boolean_object(test_data): # using column from DataFrame s = test_data.series mask = s > s.median() omask = mask.astype(object) # getitem result = s[omask] expected = s[mask] assert_series_equal(result, expected) # setitem s2 = s.copy() cop = s.copy() cop[omask] = 5 s2[mask] = 5 assert_series_equal(cop, s2) # nans raise exception omask[5:10] = np.nan pytest.raises(Exception, s.__getitem__, omask) pytest.raises(Exception, s.__setitem__, omask, 5) def test_getitem_setitem_boolean_corner(test_data): ts = test_data.ts mask_shifted = ts.shift(1, freq=BDay()) > ts.median() # these used to raise...?? pytest.raises(Exception, ts.__getitem__, mask_shifted) pytest.raises(Exception, ts.__setitem__, mask_shifted, 1) # ts[mask_shifted] # ts[mask_shifted] = 1 pytest.raises(Exception, ts.loc.__getitem__, mask_shifted) pytest.raises(Exception, ts.loc.__setitem__, mask_shifted, 1) # ts.loc[mask_shifted] # ts.loc[mask_shifted] = 2 def test_setitem_boolean(test_data): mask = test_data.series > test_data.series.median() # similar indexed series result = test_data.series.copy() result[mask] = test_data.series * 2 expected = test_data.series * 2 assert_series_equal(result[mask], expected[mask]) # needs alignment result = test_data.series.copy() result[mask] = (test_data.series * 2)[0:5] expected = (test_data.series * 2)[0:5].reindex_like(test_data.series) expected[-mask] = test_data.series[mask] assert_series_equal(result[mask], expected[mask]) def test_get_set_boolean_different_order(test_data): ordered = test_data.series.sort_values() # setting copy = test_data.series.copy() copy[ordered > 0] = 0 expected = test_data.series.copy() expected[expected > 0] = 0 assert_series_equal(copy, expected) # getting sel = test_data.series[ordered > 0] exp = test_data.series[test_data.series > 0] assert_series_equal(sel, exp) def test_where_unsafe(): # unsafe dtype changes for dtype in [np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64]: s = Series(np.arange(10), dtype=dtype) mask = s < 5 s[mask] = lrange(2, 7) expected = Series(lrange(2, 7) + lrange(5, 10), dtype=dtype) assert_series_equal(s, expected) assert s.dtype == expected.dtype # these are allowed operations, but are upcasted for dtype in [np.int64, np.float64]: s = Series(np.arange(10), dtype=dtype) mask = s < 5 values = [2.5, 3.5, 4.5, 5.5, 6.5] s[mask] = values expected = Series(values + lrange(5, 10), dtype='float64') assert_series_equal(s, expected) assert s.dtype == expected.dtype # GH 9731 s = Series(np.arange(10), dtype='int64') mask = s > 5 values = [2.5, 3.5, 4.5, 5.5] s[mask] = values expected = Series(lrange(6) + values, dtype='float64') assert_series_equal(s, expected) # can't do these as we are forced to change the itemsize of the input # to something we cannot for dtype in [np.int8, np.int16, np.int32, np.float16, np.float32]: s = Series(np.arange(10), dtype=dtype) mask = s < 5 values = [2.5, 3.5, 4.5, 5.5, 6.5] pytest.raises(Exception, s.__setitem__, tuple(mask), values) # GH3235 s = Series(np.arange(10), dtype='int64') mask = s < 5 s[mask] = lrange(2, 7) expected = Series(lrange(2, 7) + lrange(5, 10), dtype='int64') assert_series_equal(s, expected) assert s.dtype == expected.dtype s = Series(np.arange(10), dtype='int64') mask = s > 5 s[mask] = [0] * 4 expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype='int64') assert_series_equal(s, expected) s = Series(np.arange(10)) mask = s > 5 def f(): s[mask] = [5, 4, 3, 2, 1] pytest.raises(ValueError, f) def f(): s[mask] = [0] * 5 pytest.raises(ValueError, f) # dtype changes s = Series([1, 2, 3, 4]) result = s.where(s > 2, np.nan) expected = Series([np.nan, np.nan, 3, 4]) assert_series_equal(result, expected) # GH 4667 # setting with None changes dtype s = Series(range(10)).astype(float) s[8] = None result = s[8] assert isna(result) s = Series(range(10)).astype(float) s[s > 8] = None result = s[isna(s)] expected = Series(np.nan, index=[9]) assert_series_equal(result, expected) def test_where_raise_on_error_deprecation(): # gh-14968 # deprecation of raise_on_error s = Series(np.random.randn(5)) cond = s > 0 with tm.assert_produces_warning(FutureWarning): s.where(cond, raise_on_error=True) with tm.assert_produces_warning(FutureWarning): s.mask(cond, raise_on_error=True) def test_where(): s = Series(np.random.randn(5)) cond = s > 0 rs = s.where(cond).dropna() rs2 = s[cond] assert_series_equal(rs, rs2) rs = s.where(cond, -s) assert_series_equal(rs, s.abs()) rs = s.where(cond) assert (s.shape == rs.shape) assert (rs is not s) # test alignment cond = Series([True, False, False, True, False], index=s.index) s2 = -(s.abs()) expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index) rs = s2.where(cond[:3]) assert_series_equal(rs, expected) expected = s2.abs() expected.iloc[0] = s2[0] rs = s2.where(cond[:3], -s2) assert_series_equal(rs, expected) def test_where_error(): s = Series(np.random.randn(5)) cond = s > 0 pytest.raises(ValueError, s.where, 1) pytest.raises(ValueError, s.where, cond[:3].values, -s) # GH 2745 s = Series([1, 2]) s[[True, False]] = [0, 1] expected = Series([0, 2]) assert_series_equal(s, expected) # failures pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]), [0, 2, 3]) pytest.raises(ValueError, s.__setitem__, tuple([[[True, False]]]), []) @pytest.mark.parametrize('klass', [list, tuple, np.array, Series]) def test_where_array_like(klass): # see gh-15414 s = Series([1, 2, 3]) cond = [False, True, True] expected = Series([np.nan, 2, 3]) result = s.where(klass(cond)) assert_series_equal(result, expected) @pytest.mark.parametrize('cond', [ [1, 0, 1], Series([2, 5, 7]), ["True", "False", "True"], [Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")] ]) def test_where_invalid_input(cond): # see gh-15414: only boolean arrays accepted s = Series([1, 2, 3]) msg = "Boolean array expected for the condition" with tm.assert_raises_regex(ValueError, msg): s.where(cond) msg = "Array conditional must be same shape as self" with tm.assert_raises_regex(ValueError, msg): s.where([True]) def test_where_ndframe_align(): msg = "Array conditional must be same shape as self" s = Series([1, 2, 3]) cond = [True] with tm.assert_raises_regex(ValueError, msg): s.where(cond) expected = Series([1, np.nan, np.nan]) out = s.where(Series(cond)) tm.assert_series_equal(out, expected) cond = np.array([False, True, False, True]) with tm.assert_raises_regex(ValueError, msg): s.where(cond) expected = Series([np.nan, 2, np.nan]) out = s.where(Series(cond)) tm.assert_series_equal(out, expected) def test_where_setitem_invalid(): # GH 2702 # make sure correct exceptions are raised on invalid list assignment # slice s = Series(list('abc')) def f(): s[0:3] = list(range(27)) pytest.raises(ValueError, f) s[0:3] = list(range(3)) expected = Series([0, 1, 2]) assert_series_equal(s.astype(np.int64), expected, ) # slice with step s = Series(list('abcdef')) def f(): s[0:4:2] = list(range(27)) pytest.raises(ValueError, f) s = Series(list('abcdef')) s[0:4:2] = list(range(2)) expected = Series([0, 'b', 1, 'd', 'e', 'f']) assert_series_equal(s, expected) # neg slices s = Series(list('abcdef')) def f(): s[:-1] = list(range(27)) pytest.raises(ValueError, f) s[-3:-1] = list(range(2)) expected = Series(['a', 'b', 'c', 0, 1, 'f']) assert_series_equal(s, expected) # list s = Series(list('abc')) def f(): s[[0, 1, 2]] = list(range(27)) pytest.raises(ValueError, f) s = Series(list('abc')) def f(): s[[0, 1, 2]] = list(range(2)) pytest.raises(ValueError, f) # scalar s = Series(list('abc')) s[0] = list(range(10)) expected = Series([list(range(10)), 'b', 'c']) assert_series_equal(s, expected) @pytest.mark.parametrize('size', range(2, 6)) @pytest.mark.parametrize('mask', [ [True, False, False, False, False], [True, False], [False] ]) @pytest.mark.parametrize('item', [ 2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min ]) # Test numpy arrays, lists and tuples as the input to be # broadcast @pytest.mark.parametrize('box', [ lambda x: np.array([x]), lambda x: [x], lambda x: (x,) ]) def test_broadcast(size, mask, item, box): selection = np.resize(mask, size) data = np.arange(size, dtype=float) # Construct the expected series by taking the source # data or item based on the selection expected = Series([item if use_item else data[ i] for i, use_item in enumerate(selection)]) s = Series(data) s[selection] = box(item) assert_series_equal(s, expected) s = Series(data) result = s.where(~selection, box(item)) assert_series_equal(result, expected) s = Series(data) result = s.mask(selection, box(item)) assert_series_equal(result, expected) def test_where_inplace(): s = Series(np.random.randn(5)) cond = s > 0 rs = s.copy() rs.where(cond, inplace=True) assert_series_equal(rs.dropna(), s[cond]) assert_series_equal(rs, s.where(cond)) rs = s.copy() rs.where(cond, -s, inplace=True) assert_series_equal(rs, s.where(cond, -s)) def test_where_dups(): # GH 4550 # where crashes with dups in index s1 = Series(list(range(3))) s2 = Series(list(range(3))) comb = pd.concat([s1, s2]) result = comb.where(comb < 2) expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2]) assert_series_equal(result, expected) # GH 4548 # inplace updating not working with dups comb[comb < 1] = 5 expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2]) assert_series_equal(comb, expected) comb[comb < 2] += 10 expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) assert_series_equal(comb, expected) def test_where_numeric_with_string(): # GH 9280 s = pd.Series([1, 2, 3]) w = s.where(s > 1, 'X') assert not is_integer(w[0]) assert is_integer(w[1]) assert is_integer(w[2]) assert isinstance(w[0], str) assert w.dtype == 'object' w = s.where(s > 1, ['X', 'Y', 'Z']) assert not is_integer(w[0]) assert is_integer(w[1]) assert is_integer(w[2]) assert isinstance(w[0], str) assert w.dtype == 'object' w = s.where(s > 1, np.array(['X', 'Y', 'Z'])) assert not is_integer(w[0]) assert is_integer(w[1]) assert is_integer(w[2]) assert isinstance(w[0], str) assert w.dtype == 'object' def test_where_timedelta_coerce(): s = Series([1, 2], dtype='timedelta64[ns]') expected = Series([10, 10]) mask = np.array([False, False]) rs = s.where(mask, [10, 10]) assert_series_equal(rs, expected) rs = s.where(mask, 10) assert_series_equal(rs, expected) rs = s.where(mask, 10.0) assert_series_equal(rs, expected) rs = s.where(mask, [10.0, 10.0]) assert_series_equal(rs, expected) rs = s.where(mask, [10.0, np.nan]) expected = Series([10, None], dtype='object') assert_series_equal(rs, expected) def test_where_datetime_conversion(): s = Series(date_range('20130102', periods=2)) expected = Series([10, 10]) mask = np.array([False, False]) rs = s.where(mask, [10, 10]) assert_series_equal(rs, expected) rs = s.where(mask, 10) assert_series_equal(rs, expected) rs = s.where(mask, 10.0) assert_series_equal(rs, expected) rs = s.where(mask, [10.0, 10.0]) assert_series_equal(rs, expected) rs = s.where(mask, [10.0, np.nan]) expected = Series([10, None], dtype='object') assert_series_equal(rs, expected) # GH 15701 timestamps = ['2016-12-31 12:00:04+00:00', '2016-12-31 12:00:04.010000+00:00'] s = Series([pd.Timestamp(t) for t in timestamps]) rs = s.where(Series([False, True])) expected = Series([pd.NaT, s[1]]) assert_series_equal(rs, expected) def test_mask(): # compare with tested results in test_where s = Series(np.random.randn(5)) cond = s > 0 rs = s.where(~cond, np.nan) assert_series_equal(rs, s.mask(cond)) rs = s.where(~cond) rs2 = s.mask(cond) assert_series_equal(rs, rs2) rs = s.where(~cond, -s) rs2 = s.mask(cond, -s) assert_series_equal(rs, rs2) cond = Series([True, False, False, True, False], index=s.index) s2 = -(s.abs()) rs = s2.where(~cond[:3]) rs2 = s2.mask(cond[:3]) assert_series_equal(rs, rs2) rs = s2.where(~cond[:3], -s2) rs2 = s2.mask(cond[:3], -s2) assert_series_equal(rs, rs2) pytest.raises(ValueError, s.mask, 1) pytest.raises(ValueError, s.mask, cond[:3].values, -s) # dtype changes s = Series([1, 2, 3, 4]) result = s.mask(s > 2, np.nan) expected = Series([1, 2, np.nan, np.nan]) assert_series_equal(result, expected) def test_mask_inplace(): s = Series(np.random.randn(5)) cond = s > 0 rs = s.copy() rs.mask(cond, inplace=True) assert_series_equal(rs.dropna(), s[~cond]) assert_series_equal(rs, s.mask(cond)) rs = s.copy() rs.mask(cond, -s, inplace=True) assert_series_equal(rs, s.mask(cond, -s))