laywerrobot/lib/python3.6/site-packages/pandas/tests/indexing/test_ix.py

338 lines
12 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
""" test indexing with ix """
import pytest
from warnings import catch_warnings
import numpy as np
import pandas as pd
from pandas.core.dtypes.common import is_scalar
from pandas.compat import lrange
from pandas import Series, DataFrame, option_context, MultiIndex
from pandas.util import testing as tm
from pandas.errors import PerformanceWarning
class TestIX(object):
def test_ix_deprecation(self):
# GH 15114
df = DataFrame({'A': [1, 2, 3]})
with tm.assert_produces_warning(DeprecationWarning,
check_stacklevel=False):
df.ix[1, 'A']
def test_ix_loc_setitem_consistency(self):
# GH 5771
# loc with slice and series
s = Series(0, index=[4, 5, 6])
s.loc[4:5] += 1
expected = Series([1, 1, 0], index=[4, 5, 6])
tm.assert_series_equal(s, expected)
# GH 5928
# chained indexing assignment
df = DataFrame({'a': [0, 1, 2]})
expected = df.copy()
with catch_warnings(record=True):
expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a']
with catch_warnings(record=True):
df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]]
tm.assert_frame_equal(df, expected)
df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]})
with catch_warnings(record=True):
df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype(
'float64') + 0.5
expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]})
tm.assert_frame_equal(df, expected)
# GH 8607
# ix setitem consistency
df = DataFrame({'delta': [1174, 904, 161],
'elapsed': [7673, 9277, 1470],
'timestamp': [1413840976, 1413842580, 1413760580]})
expected = DataFrame({'delta': [1174, 904, 161],
'elapsed': [7673, 9277, 1470],
'timestamp': pd.to_datetime(
[1413840976, 1413842580, 1413760580],
unit='s')
})
df2 = df.copy()
df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
tm.assert_frame_equal(df2, expected)
df2 = df.copy()
df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
tm.assert_frame_equal(df2, expected)
df2 = df.copy()
with catch_warnings(record=True):
df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s')
tm.assert_frame_equal(df2, expected)
def test_ix_loc_consistency(self):
# GH 8613
# some edge cases where ix/loc should return the same
# this is not an exhaustive case
def compare(result, expected):
if is_scalar(expected):
assert result == expected
else:
assert expected.equals(result)
# failure cases for .loc, but these work for .ix
df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'))
for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]),
tuple([slice(0, 2), df.columns[0:2]])]:
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
tm.makeDateIndex, tm.makePeriodIndex,
tm.makeTimedeltaIndex]:
df.index = index(len(df.index))
with catch_warnings(record=True):
df.ix[key]
pytest.raises(TypeError, lambda: df.loc[key])
df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'),
index=pd.date_range('2012-01-01', periods=5))
for key in ['2012-01-03',
'2012-01-31',
slice('2012-01-03', '2012-01-03'),
slice('2012-01-03', '2012-01-04'),
slice('2012-01-03', '2012-01-06', 2),
slice('2012-01-03', '2012-01-31'),
tuple([[True, True, True, False, True]]), ]:
# getitem
# if the expected raises, then compare the exceptions
try:
with catch_warnings(record=True):
expected = df.ix[key]
except KeyError:
pytest.raises(KeyError, lambda: df.loc[key])
continue
result = df.loc[key]
compare(result, expected)
# setitem
df1 = df.copy()
df2 = df.copy()
with catch_warnings(record=True):
df1.ix[key] = 10
df2.loc[key] = 10
compare(df2, df1)
# edge cases
s = Series([1, 2, 3, 4], index=list('abde'))
result1 = s['a':'c']
with catch_warnings(record=True):
result2 = s.ix['a':'c']
result3 = s.loc['a':'c']
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
# now work rather than raising KeyError
s = Series(range(5), [-2, -1, 1, 2, 3])
with catch_warnings(record=True):
result1 = s.ix[-10:3]
result2 = s.loc[-10:3]
tm.assert_series_equal(result1, result2)
with catch_warnings(record=True):
result1 = s.ix[0:3]
result2 = s.loc[0:3]
tm.assert_series_equal(result1, result2)
def test_ix_weird_slicing(self):
# http://stackoverflow.com/q/17056560/1240268
df = DataFrame({'one': [1, 2, 3, np.nan, np.nan],
'two': [1, 2, 3, 4, 5]})
df.loc[df['one'] > 1, 'two'] = -df['two']
expected = DataFrame({'one': {0: 1.0,
1: 2.0,
2: 3.0,
3: np.nan,
4: np.nan},
'two': {0: 1,
1: -2,
2: -3,
3: 4,
4: 5}})
tm.assert_frame_equal(df, expected)
def test_ix_general(self):
# ix general issues
# GH 2817
data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
df = DataFrame(data).set_index(keys=['col', 'year'])
key = 4.0, 2012
# emits a PerformanceWarning, ok
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
# this is ok
df.sort_index(inplace=True)
res = df.loc[key]
# col has float dtype, result should be Float64Index
index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
names=['col', 'year'])
expected = DataFrame({'amount': [222, 333, 444]}, index=index)
tm.assert_frame_equal(res, expected)
def test_ix_assign_column_mixed(self):
# GH #1142
df = DataFrame(tm.getSeriesData())
df['foo'] = 'bar'
orig = df.loc[:, 'B'].copy()
df.loc[:, 'B'] = df.loc[:, 'B'] + 1
tm.assert_series_equal(df.B, orig + 1)
# GH 3668, mixed frame with series value
df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'})
expected = df.copy()
for i in range(5):
indexer = i * 2
v = 1000 + i * 200
expected.loc[indexer, 'y'] = v
assert expected.loc[indexer, 'y'] == v
df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100
tm.assert_frame_equal(df, expected)
# GH 4508, making sure consistency of assignments
df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]})
df.loc[[0, 2, ], 'b'] = [100, -100]
expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]})
tm.assert_frame_equal(df, expected)
df = DataFrame({'a': lrange(4)})
df['b'] = np.nan
df.loc[[1, 3], 'b'] = [100, -100]
expected = DataFrame({'a': [0, 1, 2, 3],
'b': [np.nan, 100, np.nan, -100]})
tm.assert_frame_equal(df, expected)
# ok, but chained assignments are dangerous
# if we turn off chained assignment it will work
with option_context('chained_assignment', None):
df = DataFrame({'a': lrange(4)})
df['b'] = np.nan
df['b'].loc[[1, 3]] = [100, -100]
tm.assert_frame_equal(df, expected)
def test_ix_get_set_consistency(self):
# GH 4544
# ix/loc get/set not consistent when
# a mixed int/string index
df = DataFrame(np.arange(16).reshape((4, 4)),
columns=['a', 'b', 8, 'c'],
index=['e', 7, 'f', 'g'])
with catch_warnings(record=True):
assert df.ix['e', 8] == 2
assert df.loc['e', 8] == 2
with catch_warnings(record=True):
df.ix['e', 8] = 42
assert df.ix['e', 8] == 42
assert df.loc['e', 8] == 42
df.loc['e', 8] = 45
with catch_warnings(record=True):
assert df.ix['e', 8] == 45
assert df.loc['e', 8] == 45
def test_ix_slicing_strings(self):
# see gh-3836
data = {'Classification':
['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'],
'Random': [1, 2, 3, 4, 5],
'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']}
df = DataFrame(data)
x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF'
])]
with catch_warnings(record=True):
df.ix[x.index, 'X'] = df['Classification']
expected = DataFrame({'Classification': {0: 'SA EQUITY CFD',
1: 'bbb',
2: 'SA EQUITY',
3: 'SA SSF',
4: 'aaa'},
'Random': {0: 1,
1: 2,
2: 3,
3: 4,
4: 5},
'X': {0: 'correct',
1: 'bbb',
2: 'correct',
3: 'correct',
4: 'aaa'}}) # bug was 4: 'bbb'
tm.assert_frame_equal(df, expected)
def test_ix_setitem_out_of_bounds_axis_0(self):
df = DataFrame(
np.random.randn(2, 5), index=["row%s" % i for i in range(2)],
columns=["col%s" % i for i in range(5)])
with catch_warnings(record=True):
pytest.raises(ValueError, df.ix.__setitem__, (2, 0), 100)
def test_ix_setitem_out_of_bounds_axis_1(self):
df = DataFrame(
np.random.randn(5, 2), index=["row%s" % i for i in range(5)],
columns=["col%s" % i for i in range(2)])
with catch_warnings(record=True):
pytest.raises(ValueError, df.ix.__setitem__, (0, 2), 100)
def test_ix_empty_list_indexer_is_ok(self):
with catch_warnings(record=True):
from pandas.util.testing import makeCustomDataframe as mkdf
df = mkdf(5, 2)
# vertical empty
tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0],
check_index_type=True,
check_column_type=True)
# horizontal empty
tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :],
check_index_type=True,
check_column_type=True)
# horizontal empty
tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :],
check_index_type=True,
check_column_type=True)
def test_ix_duplicate_returns_series(self):
df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2],
columns=list('abc'))
with catch_warnings(record=True):
r = df.ix[0.2, 'a']
e = df.loc[0.2, 'a']
tm.assert_series_equal(r, e)