668 lines
24 KiB
Python
668 lines
24 KiB
Python
""" test positional based indexing with iloc """
|
|
|
|
import pytest
|
|
|
|
from warnings import catch_warnings
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
from pandas.compat import lrange, lmap
|
|
from pandas import Series, DataFrame, date_range, concat, isna
|
|
from pandas.util import testing as tm
|
|
from pandas.tests.indexing.common import Base
|
|
from pandas.api.types import is_scalar
|
|
|
|
|
|
class TestiLoc(Base):
|
|
|
|
def test_iloc_exceeds_bounds(self):
|
|
|
|
# GH6296
|
|
# iloc should allow indexers that exceed the bounds
|
|
df = DataFrame(np.random.random_sample((20, 5)), columns=list('ABCDE'))
|
|
expected = df
|
|
|
|
# lists of positions should raise IndexErrror!
|
|
with tm.assert_raises_regex(IndexError,
|
|
'positional indexers '
|
|
'are out-of-bounds'):
|
|
df.iloc[:, [0, 1, 2, 3, 4, 5]]
|
|
pytest.raises(IndexError, lambda: df.iloc[[1, 30]])
|
|
pytest.raises(IndexError, lambda: df.iloc[[1, -30]])
|
|
pytest.raises(IndexError, lambda: df.iloc[[100]])
|
|
|
|
s = df['A']
|
|
pytest.raises(IndexError, lambda: s.iloc[[100]])
|
|
pytest.raises(IndexError, lambda: s.iloc[[-100]])
|
|
|
|
# still raise on a single indexer
|
|
msg = 'single positional indexer is out-of-bounds'
|
|
with tm.assert_raises_regex(IndexError, msg):
|
|
df.iloc[30]
|
|
pytest.raises(IndexError, lambda: df.iloc[-30])
|
|
|
|
# GH10779
|
|
# single positive/negative indexer exceeding Series bounds should raise
|
|
# an IndexError
|
|
with tm.assert_raises_regex(IndexError, msg):
|
|
s.iloc[30]
|
|
pytest.raises(IndexError, lambda: s.iloc[-30])
|
|
|
|
# slices are ok
|
|
result = df.iloc[:, 4:10] # 0 < start < len < stop
|
|
expected = df.iloc[:, 4:]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.iloc[:, -4:-10] # stop < 0 < start < len
|
|
expected = df.iloc[:, :0]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down)
|
|
expected = df.iloc[:, :4:-1]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down)
|
|
expected = df.iloc[:, 4::-1]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.iloc[:, -10:4] # start < 0 < stop < len
|
|
expected = df.iloc[:, :4]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.iloc[:, 10:4] # 0 < stop < len < start
|
|
expected = df.iloc[:, :0]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down)
|
|
expected = df.iloc[:, :0]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.iloc[:, 10:11] # 0 < len < start < stop
|
|
expected = df.iloc[:, :0]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# slice bounds exceeding is ok
|
|
result = s.iloc[18:30]
|
|
expected = s.iloc[18:]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = s.iloc[30:]
|
|
expected = s.iloc[:0]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = s.iloc[30::-1]
|
|
expected = s.iloc[::-1]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# doc example
|
|
def check(result, expected):
|
|
str(result)
|
|
result.dtypes
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
dfl = DataFrame(np.random.randn(5, 2), columns=list('AB'))
|
|
check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
|
|
check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
|
|
check(dfl.iloc[4:6], dfl.iloc[[4]])
|
|
|
|
pytest.raises(IndexError, lambda: dfl.iloc[[4, 5, 6]])
|
|
pytest.raises(IndexError, lambda: dfl.iloc[:, 4])
|
|
|
|
def test_iloc_getitem_int(self):
|
|
|
|
# integer
|
|
self.check_result('integer', 'iloc', 2, 'ix',
|
|
{0: 4, 1: 6, 2: 8}, typs=['ints', 'uints'])
|
|
self.check_result('integer', 'iloc', 2, 'indexer', 2,
|
|
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
|
fails=IndexError)
|
|
|
|
def test_iloc_getitem_neg_int(self):
|
|
|
|
# neg integer
|
|
self.check_result('neg int', 'iloc', -1, 'ix',
|
|
{0: 6, 1: 9, 2: 12}, typs=['ints', 'uints'])
|
|
self.check_result('neg int', 'iloc', -1, 'indexer', -1,
|
|
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
|
fails=IndexError)
|
|
|
|
def test_iloc_getitem_list_int(self):
|
|
|
|
# list of ints
|
|
self.check_result('list int', 'iloc', [0, 1, 2], 'ix',
|
|
{0: [0, 2, 4], 1: [0, 3, 6], 2: [0, 4, 8]},
|
|
typs=['ints', 'uints'])
|
|
self.check_result('list int', 'iloc', [2], 'ix',
|
|
{0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints'])
|
|
self.check_result('list int', 'iloc', [0, 1, 2], 'indexer', [0, 1, 2],
|
|
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
|
fails=IndexError)
|
|
|
|
# array of ints (GH5006), make sure that a single indexer is returning
|
|
# the correct type
|
|
self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'ix',
|
|
{0: [0, 2, 4],
|
|
1: [0, 3, 6],
|
|
2: [0, 4, 8]}, typs=['ints', 'uints'])
|
|
self.check_result('array int', 'iloc', np.array([2]), 'ix',
|
|
{0: [4], 1: [6], 2: [8]}, typs=['ints', 'uints'])
|
|
self.check_result('array int', 'iloc', np.array([0, 1, 2]), 'indexer',
|
|
[0, 1, 2],
|
|
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
|
fails=IndexError)
|
|
|
|
def test_iloc_getitem_neg_int_can_reach_first_index(self):
|
|
# GH10547 and GH10779
|
|
# negative integers should be able to reach index 0
|
|
df = DataFrame({'A': [2, 3, 5], 'B': [7, 11, 13]})
|
|
s = df['A']
|
|
|
|
expected = df.iloc[0]
|
|
result = df.iloc[-3]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
expected = df.iloc[[0]]
|
|
result = df.iloc[[-3]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = s.iloc[0]
|
|
result = s.iloc[-3]
|
|
assert result == expected
|
|
|
|
expected = s.iloc[[0]]
|
|
result = s.iloc[[-3]]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# check the length 1 Series case highlighted in GH10547
|
|
expected = Series(['a'], index=['A'])
|
|
result = expected.iloc[[-1]]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_iloc_getitem_dups(self):
|
|
|
|
# no dups in panel (bug?)
|
|
self.check_result('list int (dups)', 'iloc', [0, 1, 1, 3], 'ix',
|
|
{0: [0, 2, 2, 6], 1: [0, 3, 3, 9]},
|
|
objs=['series', 'frame'], typs=['ints', 'uints'])
|
|
|
|
# GH 6766
|
|
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
|
|
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
|
|
df = concat([df1, df2], axis=1)
|
|
|
|
# cross-sectional indexing
|
|
result = df.iloc[0, 0]
|
|
assert isna(result)
|
|
|
|
result = df.iloc[0, :]
|
|
expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'],
|
|
name=0)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_iloc_getitem_array(self):
|
|
|
|
# array like
|
|
s = Series(index=lrange(1, 4))
|
|
self.check_result('array like', 'iloc', s.index, 'ix',
|
|
{0: [2, 4, 6], 1: [3, 6, 9], 2: [4, 8, 12]},
|
|
typs=['ints', 'uints'])
|
|
|
|
def test_iloc_getitem_bool(self):
|
|
|
|
# boolean indexers
|
|
b = [True, False, True, False, ]
|
|
self.check_result('bool', 'iloc', b, 'ix', b, typs=['ints', 'uints'])
|
|
self.check_result('bool', 'iloc', b, 'ix', b,
|
|
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
|
fails=IndexError)
|
|
|
|
def test_iloc_getitem_slice(self):
|
|
|
|
# slices
|
|
self.check_result('slice', 'iloc', slice(1, 3), 'ix',
|
|
{0: [2, 4], 1: [3, 6], 2: [4, 8]},
|
|
typs=['ints', 'uints'])
|
|
self.check_result('slice', 'iloc', slice(1, 3), 'indexer',
|
|
slice(1, 3),
|
|
typs=['labels', 'mixed', 'ts', 'floats', 'empty'],
|
|
fails=IndexError)
|
|
|
|
def test_iloc_getitem_slice_dups(self):
|
|
|
|
df1 = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B'])
|
|
df2 = DataFrame(np.random.randint(0, 10, size=20).reshape(10, 2),
|
|
columns=['A', 'C'])
|
|
|
|
# axis=1
|
|
df = concat([df1, df2], axis=1)
|
|
tm.assert_frame_equal(df.iloc[:, :4], df1)
|
|
tm.assert_frame_equal(df.iloc[:, 4:], df2)
|
|
|
|
df = concat([df2, df1], axis=1)
|
|
tm.assert_frame_equal(df.iloc[:, :2], df2)
|
|
tm.assert_frame_equal(df.iloc[:, 2:], df1)
|
|
|
|
exp = concat([df2, df1.iloc[:, [0]]], axis=1)
|
|
tm.assert_frame_equal(df.iloc[:, 0:3], exp)
|
|
|
|
# axis=0
|
|
df = concat([df, df], axis=0)
|
|
tm.assert_frame_equal(df.iloc[0:10, :2], df2)
|
|
tm.assert_frame_equal(df.iloc[0:10, 2:], df1)
|
|
tm.assert_frame_equal(df.iloc[10:, :2], df2)
|
|
tm.assert_frame_equal(df.iloc[10:, 2:], df1)
|
|
|
|
def test_iloc_setitem(self):
|
|
df = self.frame_ints
|
|
|
|
df.iloc[1, 1] = 1
|
|
result = df.iloc[1, 1]
|
|
assert result == 1
|
|
|
|
df.iloc[:, 2:3] = 0
|
|
expected = df.iloc[:, 2:3]
|
|
result = df.iloc[:, 2:3]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# GH5771
|
|
s = Series(0, index=[4, 5, 6])
|
|
s.iloc[1:2] += 1
|
|
expected = Series([0, 1, 0], index=[4, 5, 6])
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
@pytest.mark.parametrize(
|
|
'data, indexes, values, expected_k', [
|
|
# test without indexer value in first level of MultiIndex
|
|
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
|
|
# test like code sample 1 in the issue
|
|
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100],
|
|
[755, 1066]),
|
|
# test like code sample 2 in the issue
|
|
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
|
|
# test like code sample 3 in the issue
|
|
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10],
|
|
[8, 15, 13])
|
|
])
|
|
def test_iloc_setitem_int_multiindex_series(
|
|
self, data, indexes, values, expected_k):
|
|
# GH17148
|
|
df = DataFrame(data=data, columns=['i', 'j', 'k'])
|
|
df = df.set_index(['i', 'j'])
|
|
|
|
series = df.k.copy()
|
|
for i, v in zip(indexes, values):
|
|
series.iloc[i] += v
|
|
|
|
df['k'] = expected_k
|
|
expected = df.k
|
|
tm.assert_series_equal(series, expected)
|
|
|
|
def test_iloc_setitem_list(self):
|
|
|
|
# setitem with an iloc list
|
|
df = DataFrame(np.arange(9).reshape((3, 3)), index=["A", "B", "C"],
|
|
columns=["A", "B", "C"])
|
|
df.iloc[[0, 1], [1, 2]]
|
|
df.iloc[[0, 1], [1, 2]] += 100
|
|
|
|
expected = DataFrame(
|
|
np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)),
|
|
index=["A", "B", "C"], columns=["A", "B", "C"])
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
def test_iloc_setitem_pandas_object(self):
|
|
# GH 17193, affecting old numpy (1.7 and 1.8)
|
|
s_orig = Series([0, 1, 2, 3])
|
|
expected = Series([0, -1, -2, 3])
|
|
|
|
s = s_orig.copy()
|
|
s.iloc[Series([1, 2])] = [-1, -2]
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
s = s_orig.copy()
|
|
s.iloc[pd.Index([1, 2])] = [-1, -2]
|
|
tm.assert_series_equal(s, expected)
|
|
|
|
def test_iloc_setitem_dups(self):
|
|
|
|
# GH 6766
|
|
# iloc with a mask aligning from another iloc
|
|
df1 = DataFrame([{'A': None, 'B': 1}, {'A': 2, 'B': 2}])
|
|
df2 = DataFrame([{'A': 3, 'B': 3}, {'A': 4, 'B': 4}])
|
|
df = concat([df1, df2], axis=1)
|
|
|
|
expected = df.fillna(3)
|
|
expected['A'] = expected['A'].astype('float64')
|
|
inds = np.isnan(df.iloc[:, 0])
|
|
mask = inds[inds].index
|
|
df.iloc[mask, 0] = df.iloc[mask, 2]
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
# del a dup column across blocks
|
|
expected = DataFrame({0: [1, 2], 1: [3, 4]})
|
|
expected.columns = ['B', 'B']
|
|
del df['A']
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
# assign back to self
|
|
df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]]
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
# reversed x 2
|
|
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(
|
|
drop=True)
|
|
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(
|
|
drop=True)
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
def test_iloc_getitem_frame(self):
|
|
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2),
|
|
columns=lrange(0, 8, 2))
|
|
|
|
result = df.iloc[2]
|
|
with catch_warnings(record=True):
|
|
exp = df.ix[4]
|
|
tm.assert_series_equal(result, exp)
|
|
|
|
result = df.iloc[2, 2]
|
|
with catch_warnings(record=True):
|
|
exp = df.ix[4, 4]
|
|
assert result == exp
|
|
|
|
# slice
|
|
result = df.iloc[4:8]
|
|
with catch_warnings(record=True):
|
|
expected = df.ix[8:14]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.iloc[:, 2:3]
|
|
with catch_warnings(record=True):
|
|
expected = df.ix[:, 4:5]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# list of integers
|
|
result = df.iloc[[0, 1, 3]]
|
|
with catch_warnings(record=True):
|
|
expected = df.ix[[0, 2, 6]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = df.iloc[[0, 1, 3], [0, 1]]
|
|
with catch_warnings(record=True):
|
|
expected = df.ix[[0, 2, 6], [0, 2]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# neg indicies
|
|
result = df.iloc[[-1, 1, 3], [-1, 1]]
|
|
with catch_warnings(record=True):
|
|
expected = df.ix[[18, 2, 6], [6, 2]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# dups indicies
|
|
result = df.iloc[[-1, -1, 1, 3], [-1, 1]]
|
|
with catch_warnings(record=True):
|
|
expected = df.ix[[18, 18, 2, 6], [6, 2]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# with index-like
|
|
s = Series(index=lrange(1, 5))
|
|
result = df.iloc[s.index]
|
|
with catch_warnings(record=True):
|
|
expected = df.ix[[2, 4, 6, 8]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_iloc_getitem_labelled_frame(self):
|
|
# try with labelled frame
|
|
df = DataFrame(np.random.randn(10, 4),
|
|
index=list('abcdefghij'), columns=list('ABCD'))
|
|
|
|
result = df.iloc[1, 1]
|
|
exp = df.loc['b', 'B']
|
|
assert result == exp
|
|
|
|
result = df.iloc[:, 2:3]
|
|
expected = df.loc[:, ['C']]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# negative indexing
|
|
result = df.iloc[-1, -1]
|
|
exp = df.loc['j', 'D']
|
|
assert result == exp
|
|
|
|
# out-of-bounds exception
|
|
pytest.raises(IndexError, df.iloc.__getitem__, tuple([10, 5]))
|
|
|
|
# trying to use a label
|
|
pytest.raises(ValueError, df.iloc.__getitem__, tuple(['j', 'D']))
|
|
|
|
def test_iloc_getitem_doc_issue(self):
|
|
|
|
# multi axis slicing issue with single block
|
|
# surfaced in GH 6059
|
|
|
|
arr = np.random.randn(6, 4)
|
|
index = date_range('20130101', periods=6)
|
|
columns = list('ABCD')
|
|
df = DataFrame(arr, index=index, columns=columns)
|
|
|
|
# defines ref_locs
|
|
df.describe()
|
|
|
|
result = df.iloc[3:5, 0:2]
|
|
str(result)
|
|
result.dtypes
|
|
|
|
expected = DataFrame(arr[3:5, 0:2], index=index[3:5],
|
|
columns=columns[0:2])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# for dups
|
|
df.columns = list('aaaa')
|
|
result = df.iloc[3:5, 0:2]
|
|
str(result)
|
|
result.dtypes
|
|
|
|
expected = DataFrame(arr[3:5, 0:2], index=index[3:5],
|
|
columns=list('aa'))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# related
|
|
arr = np.random.randn(6, 4)
|
|
index = list(range(0, 12, 2))
|
|
columns = list(range(0, 8, 2))
|
|
df = DataFrame(arr, index=index, columns=columns)
|
|
|
|
df._data.blocks[0].mgr_locs
|
|
result = df.iloc[1:5, 2:4]
|
|
str(result)
|
|
result.dtypes
|
|
expected = DataFrame(arr[1:5, 2:4], index=index[1:5],
|
|
columns=columns[2:4])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_iloc_setitem_series(self):
|
|
df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'),
|
|
columns=list('ABCD'))
|
|
|
|
df.iloc[1, 1] = 1
|
|
result = df.iloc[1, 1]
|
|
assert result == 1
|
|
|
|
df.iloc[:, 2:3] = 0
|
|
expected = df.iloc[:, 2:3]
|
|
result = df.iloc[:, 2:3]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
s = Series(np.random.randn(10), index=lrange(0, 20, 2))
|
|
|
|
s.iloc[1] = 1
|
|
result = s.iloc[1]
|
|
assert result == 1
|
|
|
|
s.iloc[:4] = 0
|
|
expected = s.iloc[:4]
|
|
result = s.iloc[:4]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
s = Series([-1] * 6)
|
|
s.iloc[0::2] = [0, 2, 4]
|
|
s.iloc[1::2] = [1, 3, 5]
|
|
result = s
|
|
expected = Series([0, 1, 2, 3, 4, 5])
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_iloc_setitem_list_of_lists(self):
|
|
|
|
# GH 7551
|
|
# list-of-list is set incorrectly in mixed vs. single dtyped frames
|
|
df = DataFrame(dict(A=np.arange(5, dtype='int64'),
|
|
B=np.arange(5, 10, dtype='int64')))
|
|
df.iloc[2:4] = [[10, 11], [12, 13]]
|
|
expected = DataFrame(dict(A=[0, 1, 10, 12, 4], B=[5, 6, 11, 13, 9]))
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
df = DataFrame(
|
|
dict(A=list('abcde'), B=np.arange(5, 10, dtype='int64')))
|
|
df.iloc[2:4] = [['x', 11], ['y', 13]]
|
|
expected = DataFrame(dict(A=['a', 'b', 'x', 'y', 'e'],
|
|
B=[5, 6, 11, 13, 9]))
|
|
tm.assert_frame_equal(df, expected)
|
|
|
|
@pytest.mark.parametrize(
|
|
'indexer', [[0], slice(None, 1, None), np.array([0])])
|
|
@pytest.mark.parametrize(
|
|
'value', [['Z'], np.array(['Z'])])
|
|
def test_iloc_setitem_with_scalar_index(self, indexer, value):
|
|
# GH #19474
|
|
# assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
|
|
# elementwisely, not using "setter('A', ['Z'])".
|
|
|
|
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
|
|
df.iloc[0, indexer] = value
|
|
result = df.iloc[0, 0]
|
|
|
|
assert is_scalar(result) and result == 'Z'
|
|
|
|
def test_iloc_mask(self):
|
|
|
|
# GH 3631, iloc with a mask (of a series) should raise
|
|
df = DataFrame(lrange(5), list('ABCDE'), columns=['a'])
|
|
mask = (df.a % 2 == 0)
|
|
pytest.raises(ValueError, df.iloc.__getitem__, tuple([mask]))
|
|
mask.index = lrange(len(mask))
|
|
pytest.raises(NotImplementedError, df.iloc.__getitem__,
|
|
tuple([mask]))
|
|
|
|
# ndarray ok
|
|
result = df.iloc[np.array([True] * len(mask), dtype=bool)]
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# the possibilities
|
|
locs = np.arange(4)
|
|
nums = 2 ** locs
|
|
reps = lmap(bin, nums)
|
|
df = DataFrame({'locs': locs, 'nums': nums}, reps)
|
|
|
|
expected = {
|
|
(None, ''): '0b1100',
|
|
(None, '.loc'): '0b1100',
|
|
(None, '.iloc'): '0b1100',
|
|
('index', ''): '0b11',
|
|
('index', '.loc'): '0b11',
|
|
('index', '.iloc'): ('iLocation based boolean indexing '
|
|
'cannot use an indexable as a mask'),
|
|
('locs', ''): 'Unalignable boolean Series provided as indexer '
|
|
'(index of the boolean Series and of the indexed '
|
|
'object do not match',
|
|
('locs', '.loc'): 'Unalignable boolean Series provided as indexer '
|
|
'(index of the boolean Series and of the '
|
|
'indexed object do not match',
|
|
('locs', '.iloc'): ('iLocation based boolean indexing on an '
|
|
'integer type is not available'),
|
|
}
|
|
|
|
# UserWarnings from reindex of a boolean mask
|
|
with catch_warnings(record=True):
|
|
result = dict()
|
|
for idx in [None, 'index', 'locs']:
|
|
mask = (df.nums > 2).values
|
|
if idx:
|
|
mask = Series(mask, list(reversed(getattr(df, idx))))
|
|
for method in ['', '.loc', '.iloc']:
|
|
try:
|
|
if method:
|
|
accessor = getattr(df, method[1:])
|
|
else:
|
|
accessor = df
|
|
ans = str(bin(accessor[mask]['nums'].sum()))
|
|
except Exception as e:
|
|
ans = str(e)
|
|
|
|
key = tuple([idx, method])
|
|
r = expected.get(key)
|
|
if r != ans:
|
|
raise AssertionError(
|
|
"[%s] does not match [%s], received [%s]"
|
|
% (key, ans, r))
|
|
|
|
def test_iloc_non_unique_indexing(self):
|
|
|
|
# GH 4017, non-unique indexing (on the axis)
|
|
df = DataFrame({'A': [0.1] * 3000, 'B': [1] * 3000})
|
|
idx = np.array(lrange(30)) * 99
|
|
expected = df.iloc[idx]
|
|
|
|
df3 = concat([df, 2 * df, 3 * df])
|
|
result = df3.iloc[idx]
|
|
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
df2 = DataFrame({'A': [0.1] * 1000, 'B': [1] * 1000})
|
|
df2 = concat([df2, 2 * df2, 3 * df2])
|
|
|
|
sidx = df2.index.to_series()
|
|
expected = df2.iloc[idx[idx <= sidx.max()]]
|
|
|
|
new_list = []
|
|
for r, s in expected.iterrows():
|
|
new_list.append(s)
|
|
new_list.append(s * 2)
|
|
new_list.append(s * 3)
|
|
|
|
expected = DataFrame(new_list)
|
|
expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])],
|
|
sort=True)
|
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
|
result = df2.loc[idx]
|
|
tm.assert_frame_equal(result, expected, check_index_type=False)
|
|
|
|
def test_iloc_empty_list_indexer_is_ok(self):
|
|
from pandas.util.testing import makeCustomDataframe as mkdf
|
|
df = mkdf(5, 2)
|
|
# vertical empty
|
|
tm.assert_frame_equal(df.iloc[:, []], df.iloc[:, :0],
|
|
check_index_type=True, check_column_type=True)
|
|
# horizontal empty
|
|
tm.assert_frame_equal(df.iloc[[], :], df.iloc[:0, :],
|
|
check_index_type=True, check_column_type=True)
|
|
# horizontal empty
|
|
tm.assert_frame_equal(df.iloc[[]], df.iloc[:0, :],
|
|
check_index_type=True,
|
|
check_column_type=True)
|
|
|
|
def test_identity_slice_returns_new_object(self):
|
|
# GH13873
|
|
original_df = DataFrame({'a': [1, 2, 3]})
|
|
sliced_df = original_df.iloc[:]
|
|
assert sliced_df is not original_df
|
|
|
|
# should be a shallow copy
|
|
original_df['a'] = [4, 4, 4]
|
|
assert (sliced_df['a'] == 4).all()
|
|
|
|
original_series = Series([1, 2, 3, 4, 5, 6])
|
|
sliced_series = original_series.iloc[:]
|
|
assert sliced_series is not original_series
|
|
|
|
# should also be a shallow copy
|
|
original_series[:3] = [7, 8, 9]
|
|
assert all(sliced_series[:3] == [7, 8, 9])
|