338 lines
12 KiB
Python
338 lines
12 KiB
Python
|
""" test indexing with ix """
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
from warnings import catch_warnings
|
||
|
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
|
||
|
from pandas.core.dtypes.common import is_scalar
|
||
|
from pandas.compat import lrange
|
||
|
from pandas import Series, DataFrame, option_context, MultiIndex
|
||
|
from pandas.util import testing as tm
|
||
|
from pandas.errors import PerformanceWarning
|
||
|
|
||
|
|
||
|
class TestIX(object):
|
||
|
|
||
|
def test_ix_deprecation(self):
|
||
|
# GH 15114
|
||
|
|
||
|
df = DataFrame({'A': [1, 2, 3]})
|
||
|
with tm.assert_produces_warning(DeprecationWarning,
|
||
|
check_stacklevel=False):
|
||
|
df.ix[1, 'A']
|
||
|
|
||
|
def test_ix_loc_setitem_consistency(self):
|
||
|
|
||
|
# GH 5771
|
||
|
# loc with slice and series
|
||
|
s = Series(0, index=[4, 5, 6])
|
||
|
s.loc[4:5] += 1
|
||
|
expected = Series([1, 1, 0], index=[4, 5, 6])
|
||
|
tm.assert_series_equal(s, expected)
|
||
|
|
||
|
# GH 5928
|
||
|
# chained indexing assignment
|
||
|
df = DataFrame({'a': [0, 1, 2]})
|
||
|
expected = df.copy()
|
||
|
with catch_warnings(record=True):
|
||
|
expected.ix[[0, 1, 2], 'a'] = -expected.ix[[0, 1, 2], 'a']
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
df = DataFrame({'a': [0, 1, 2], 'b': [0, 1, 2]})
|
||
|
with catch_warnings(record=True):
|
||
|
df['a'].ix[[0, 1, 2]] = -df['a'].ix[[0, 1, 2]].astype(
|
||
|
'float64') + 0.5
|
||
|
expected = DataFrame({'a': [0.5, -0.5, -1.5], 'b': [0, 1, 2]})
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# GH 8607
|
||
|
# ix setitem consistency
|
||
|
df = DataFrame({'delta': [1174, 904, 161],
|
||
|
'elapsed': [7673, 9277, 1470],
|
||
|
'timestamp': [1413840976, 1413842580, 1413760580]})
|
||
|
expected = DataFrame({'delta': [1174, 904, 161],
|
||
|
'elapsed': [7673, 9277, 1470],
|
||
|
'timestamp': pd.to_datetime(
|
||
|
[1413840976, 1413842580, 1413760580],
|
||
|
unit='s')
|
||
|
})
|
||
|
|
||
|
df2 = df.copy()
|
||
|
df2['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
||
|
tm.assert_frame_equal(df2, expected)
|
||
|
|
||
|
df2 = df.copy()
|
||
|
df2.loc[:, 'timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
|
||
|
tm.assert_frame_equal(df2, expected)
|
||
|
|
||
|
df2 = df.copy()
|
||
|
with catch_warnings(record=True):
|
||
|
df2.ix[:, 2] = pd.to_datetime(df['timestamp'], unit='s')
|
||
|
tm.assert_frame_equal(df2, expected)
|
||
|
|
||
|
def test_ix_loc_consistency(self):
|
||
|
|
||
|
# GH 8613
|
||
|
# some edge cases where ix/loc should return the same
|
||
|
# this is not an exhaustive case
|
||
|
|
||
|
def compare(result, expected):
|
||
|
if is_scalar(expected):
|
||
|
assert result == expected
|
||
|
else:
|
||
|
assert expected.equals(result)
|
||
|
|
||
|
# failure cases for .loc, but these work for .ix
|
||
|
df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'))
|
||
|
for key in [slice(1, 3), tuple([slice(0, 2), slice(0, 2)]),
|
||
|
tuple([slice(0, 2), df.columns[0:2]])]:
|
||
|
|
||
|
for index in [tm.makeStringIndex, tm.makeUnicodeIndex,
|
||
|
tm.makeDateIndex, tm.makePeriodIndex,
|
||
|
tm.makeTimedeltaIndex]:
|
||
|
df.index = index(len(df.index))
|
||
|
with catch_warnings(record=True):
|
||
|
df.ix[key]
|
||
|
|
||
|
pytest.raises(TypeError, lambda: df.loc[key])
|
||
|
|
||
|
df = DataFrame(np.random.randn(5, 4), columns=list('ABCD'),
|
||
|
index=pd.date_range('2012-01-01', periods=5))
|
||
|
|
||
|
for key in ['2012-01-03',
|
||
|
'2012-01-31',
|
||
|
slice('2012-01-03', '2012-01-03'),
|
||
|
slice('2012-01-03', '2012-01-04'),
|
||
|
slice('2012-01-03', '2012-01-06', 2),
|
||
|
slice('2012-01-03', '2012-01-31'),
|
||
|
tuple([[True, True, True, False, True]]), ]:
|
||
|
|
||
|
# getitem
|
||
|
|
||
|
# if the expected raises, then compare the exceptions
|
||
|
try:
|
||
|
with catch_warnings(record=True):
|
||
|
expected = df.ix[key]
|
||
|
except KeyError:
|
||
|
pytest.raises(KeyError, lambda: df.loc[key])
|
||
|
continue
|
||
|
|
||
|
result = df.loc[key]
|
||
|
compare(result, expected)
|
||
|
|
||
|
# setitem
|
||
|
df1 = df.copy()
|
||
|
df2 = df.copy()
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
df1.ix[key] = 10
|
||
|
df2.loc[key] = 10
|
||
|
compare(df2, df1)
|
||
|
|
||
|
# edge cases
|
||
|
s = Series([1, 2, 3, 4], index=list('abde'))
|
||
|
|
||
|
result1 = s['a':'c']
|
||
|
with catch_warnings(record=True):
|
||
|
result2 = s.ix['a':'c']
|
||
|
result3 = s.loc['a':'c']
|
||
|
tm.assert_series_equal(result1, result2)
|
||
|
tm.assert_series_equal(result1, result3)
|
||
|
|
||
|
# now work rather than raising KeyError
|
||
|
s = Series(range(5), [-2, -1, 1, 2, 3])
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result1 = s.ix[-10:3]
|
||
|
result2 = s.loc[-10:3]
|
||
|
tm.assert_series_equal(result1, result2)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result1 = s.ix[0:3]
|
||
|
result2 = s.loc[0:3]
|
||
|
tm.assert_series_equal(result1, result2)
|
||
|
|
||
|
def test_ix_weird_slicing(self):
|
||
|
# http://stackoverflow.com/q/17056560/1240268
|
||
|
df = DataFrame({'one': [1, 2, 3, np.nan, np.nan],
|
||
|
'two': [1, 2, 3, 4, 5]})
|
||
|
df.loc[df['one'] > 1, 'two'] = -df['two']
|
||
|
|
||
|
expected = DataFrame({'one': {0: 1.0,
|
||
|
1: 2.0,
|
||
|
2: 3.0,
|
||
|
3: np.nan,
|
||
|
4: np.nan},
|
||
|
'two': {0: 1,
|
||
|
1: -2,
|
||
|
2: -3,
|
||
|
3: 4,
|
||
|
4: 5}})
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_ix_general(self):
|
||
|
|
||
|
# ix general issues
|
||
|
|
||
|
# GH 2817
|
||
|
data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
|
||
|
'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
|
||
|
'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
|
||
|
df = DataFrame(data).set_index(keys=['col', 'year'])
|
||
|
key = 4.0, 2012
|
||
|
|
||
|
# emits a PerformanceWarning, ok
|
||
|
with tm.assert_produces_warning(PerformanceWarning):
|
||
|
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
|
||
|
|
||
|
# this is ok
|
||
|
df.sort_index(inplace=True)
|
||
|
res = df.loc[key]
|
||
|
|
||
|
# col has float dtype, result should be Float64Index
|
||
|
index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
|
||
|
names=['col', 'year'])
|
||
|
expected = DataFrame({'amount': [222, 333, 444]}, index=index)
|
||
|
tm.assert_frame_equal(res, expected)
|
||
|
|
||
|
def test_ix_assign_column_mixed(self):
|
||
|
# GH #1142
|
||
|
df = DataFrame(tm.getSeriesData())
|
||
|
df['foo'] = 'bar'
|
||
|
|
||
|
orig = df.loc[:, 'B'].copy()
|
||
|
df.loc[:, 'B'] = df.loc[:, 'B'] + 1
|
||
|
tm.assert_series_equal(df.B, orig + 1)
|
||
|
|
||
|
# GH 3668, mixed frame with series value
|
||
|
df = DataFrame({'x': lrange(10), 'y': lrange(10, 20), 'z': 'bar'})
|
||
|
expected = df.copy()
|
||
|
|
||
|
for i in range(5):
|
||
|
indexer = i * 2
|
||
|
v = 1000 + i * 200
|
||
|
expected.loc[indexer, 'y'] = v
|
||
|
assert expected.loc[indexer, 'y'] == v
|
||
|
|
||
|
df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# GH 4508, making sure consistency of assignments
|
||
|
df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]})
|
||
|
df.loc[[0, 2, ], 'b'] = [100, -100]
|
||
|
expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]})
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
df = DataFrame({'a': lrange(4)})
|
||
|
df['b'] = np.nan
|
||
|
df.loc[[1, 3], 'b'] = [100, -100]
|
||
|
expected = DataFrame({'a': [0, 1, 2, 3],
|
||
|
'b': [np.nan, 100, np.nan, -100]})
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# ok, but chained assignments are dangerous
|
||
|
# if we turn off chained assignment it will work
|
||
|
with option_context('chained_assignment', None):
|
||
|
df = DataFrame({'a': lrange(4)})
|
||
|
df['b'] = np.nan
|
||
|
df['b'].loc[[1, 3]] = [100, -100]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_ix_get_set_consistency(self):
|
||
|
|
||
|
# GH 4544
|
||
|
# ix/loc get/set not consistent when
|
||
|
# a mixed int/string index
|
||
|
df = DataFrame(np.arange(16).reshape((4, 4)),
|
||
|
columns=['a', 'b', 8, 'c'],
|
||
|
index=['e', 7, 'f', 'g'])
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
assert df.ix['e', 8] == 2
|
||
|
assert df.loc['e', 8] == 2
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
df.ix['e', 8] = 42
|
||
|
assert df.ix['e', 8] == 42
|
||
|
assert df.loc['e', 8] == 42
|
||
|
|
||
|
df.loc['e', 8] = 45
|
||
|
with catch_warnings(record=True):
|
||
|
assert df.ix['e', 8] == 45
|
||
|
assert df.loc['e', 8] == 45
|
||
|
|
||
|
def test_ix_slicing_strings(self):
|
||
|
# see gh-3836
|
||
|
data = {'Classification':
|
||
|
['SA EQUITY CFD', 'bbb', 'SA EQUITY', 'SA SSF', 'aaa'],
|
||
|
'Random': [1, 2, 3, 4, 5],
|
||
|
'X': ['correct', 'wrong', 'correct', 'correct', 'wrong']}
|
||
|
df = DataFrame(data)
|
||
|
x = df[~df.Classification.isin(['SA EQUITY CFD', 'SA EQUITY', 'SA SSF'
|
||
|
])]
|
||
|
with catch_warnings(record=True):
|
||
|
df.ix[x.index, 'X'] = df['Classification']
|
||
|
|
||
|
expected = DataFrame({'Classification': {0: 'SA EQUITY CFD',
|
||
|
1: 'bbb',
|
||
|
2: 'SA EQUITY',
|
||
|
3: 'SA SSF',
|
||
|
4: 'aaa'},
|
||
|
'Random': {0: 1,
|
||
|
1: 2,
|
||
|
2: 3,
|
||
|
3: 4,
|
||
|
4: 5},
|
||
|
'X': {0: 'correct',
|
||
|
1: 'bbb',
|
||
|
2: 'correct',
|
||
|
3: 'correct',
|
||
|
4: 'aaa'}}) # bug was 4: 'bbb'
|
||
|
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_ix_setitem_out_of_bounds_axis_0(self):
|
||
|
df = DataFrame(
|
||
|
np.random.randn(2, 5), index=["row%s" % i for i in range(2)],
|
||
|
columns=["col%s" % i for i in range(5)])
|
||
|
with catch_warnings(record=True):
|
||
|
pytest.raises(ValueError, df.ix.__setitem__, (2, 0), 100)
|
||
|
|
||
|
def test_ix_setitem_out_of_bounds_axis_1(self):
|
||
|
df = DataFrame(
|
||
|
np.random.randn(5, 2), index=["row%s" % i for i in range(5)],
|
||
|
columns=["col%s" % i for i in range(2)])
|
||
|
with catch_warnings(record=True):
|
||
|
pytest.raises(ValueError, df.ix.__setitem__, (0, 2), 100)
|
||
|
|
||
|
def test_ix_empty_list_indexer_is_ok(self):
|
||
|
with catch_warnings(record=True):
|
||
|
from pandas.util.testing import makeCustomDataframe as mkdf
|
||
|
df = mkdf(5, 2)
|
||
|
# vertical empty
|
||
|
tm.assert_frame_equal(df.ix[:, []], df.iloc[:, :0],
|
||
|
check_index_type=True,
|
||
|
check_column_type=True)
|
||
|
# horizontal empty
|
||
|
tm.assert_frame_equal(df.ix[[], :], df.iloc[:0, :],
|
||
|
check_index_type=True,
|
||
|
check_column_type=True)
|
||
|
# horizontal empty
|
||
|
tm.assert_frame_equal(df.ix[[]], df.iloc[:0, :],
|
||
|
check_index_type=True,
|
||
|
check_column_type=True)
|
||
|
|
||
|
def test_ix_duplicate_returns_series(self):
|
||
|
df = DataFrame(np.random.randn(3, 3), index=[0.1, 0.2, 0.2],
|
||
|
columns=list('abc'))
|
||
|
with catch_warnings(record=True):
|
||
|
r = df.ix[0.2, 'a']
|
||
|
e = df.loc[0.2, 'a']
|
||
|
tm.assert_series_equal(r, e)
|