806 lines
30 KiB
Python
806 lines
30 KiB
Python
|
""" test label based indexing with loc """
|
||
|
|
||
|
import itertools
|
||
|
import pytest
|
||
|
|
||
|
from warnings import catch_warnings
|
||
|
import numpy as np
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas.compat import lrange, StringIO
|
||
|
from pandas import Series, DataFrame, Timestamp, date_range, MultiIndex, Index
|
||
|
from pandas.util import testing as tm
|
||
|
from pandas.tests.indexing.common import Base
|
||
|
from pandas.api.types import is_scalar
|
||
|
from pandas.compat import PY2
|
||
|
|
||
|
|
||
|
class TestLoc(Base):
|
||
|
|
||
|
def test_loc_getitem_dups(self):
|
||
|
# GH 5678
|
||
|
# repeated gettitems on a dup index returning a ndarray
|
||
|
df = DataFrame(
|
||
|
np.random.random_sample((20, 5)),
|
||
|
index=['ABCDE' [x % 5] for x in range(20)])
|
||
|
expected = df.loc['A', 0]
|
||
|
result = df.loc[:, 0].loc['A']
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_loc_getitem_dups2(self):
|
||
|
|
||
|
# GH4726
|
||
|
# dup indexing with iloc/loc
|
||
|
df = DataFrame([[1, 2, 'foo', 'bar', Timestamp('20130101')]],
|
||
|
columns=['a', 'a', 'a', 'a', 'a'], index=[1])
|
||
|
expected = Series([1, 2, 'foo', 'bar', Timestamp('20130101')],
|
||
|
index=['a', 'a', 'a', 'a', 'a'], name=1)
|
||
|
|
||
|
result = df.iloc[0]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = df.loc[1]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_loc_setitem_dups(self):
|
||
|
|
||
|
# GH 6541
|
||
|
df_orig = DataFrame(
|
||
|
{'me': list('rttti'),
|
||
|
'foo': list('aaade'),
|
||
|
'bar': np.arange(5, dtype='float64') * 1.34 + 2,
|
||
|
'bar2': np.arange(5, dtype='float64') * -.34 + 2}).set_index('me')
|
||
|
|
||
|
indexer = tuple(['r', ['bar', 'bar2']])
|
||
|
df = df_orig.copy()
|
||
|
df.loc[indexer] *= 2.0
|
||
|
tm.assert_series_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
|
||
|
|
||
|
indexer = tuple(['r', 'bar'])
|
||
|
df = df_orig.copy()
|
||
|
df.loc[indexer] *= 2.0
|
||
|
assert df.loc[indexer] == 2.0 * df_orig.loc[indexer]
|
||
|
|
||
|
indexer = tuple(['t', ['bar', 'bar2']])
|
||
|
df = df_orig.copy()
|
||
|
df.loc[indexer] *= 2.0
|
||
|
tm.assert_frame_equal(df.loc[indexer], 2.0 * df_orig.loc[indexer])
|
||
|
|
||
|
def test_loc_setitem_slice(self):
|
||
|
# GH10503
|
||
|
|
||
|
# assigning the same type should not change the type
|
||
|
df1 = DataFrame({'a': [0, 1, 1],
|
||
|
'b': Series([100, 200, 300], dtype='uint32')})
|
||
|
ix = df1['a'] == 1
|
||
|
newb1 = df1.loc[ix, 'b'] + 1
|
||
|
df1.loc[ix, 'b'] = newb1
|
||
|
expected = DataFrame({'a': [0, 1, 1],
|
||
|
'b': Series([100, 201, 301], dtype='uint32')})
|
||
|
tm.assert_frame_equal(df1, expected)
|
||
|
|
||
|
# assigning a new type should get the inferred type
|
||
|
df2 = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
|
||
|
dtype='uint64')
|
||
|
ix = df1['a'] == 1
|
||
|
newb2 = df2.loc[ix, 'b']
|
||
|
df1.loc[ix, 'b'] = newb2
|
||
|
expected = DataFrame({'a': [0, 1, 1], 'b': [100, 200, 300]},
|
||
|
dtype='uint64')
|
||
|
tm.assert_frame_equal(df2, expected)
|
||
|
|
||
|
def test_loc_getitem_int(self):
|
||
|
|
||
|
# int label
|
||
|
self.check_result('int label', 'loc', 2, 'ix', 2,
|
||
|
typs=['ints', 'uints'], axes=0)
|
||
|
self.check_result('int label', 'loc', 3, 'ix', 3,
|
||
|
typs=['ints', 'uints'], axes=1)
|
||
|
self.check_result('int label', 'loc', 4, 'ix', 4,
|
||
|
typs=['ints', 'uints'], axes=2)
|
||
|
self.check_result('int label', 'loc', 2, 'ix', 2,
|
||
|
typs=['label'], fails=KeyError)
|
||
|
|
||
|
def test_loc_getitem_label(self):
|
||
|
|
||
|
# label
|
||
|
self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['labels'],
|
||
|
axes=0)
|
||
|
self.check_result('label', 'loc', 'null', 'ix', 'null', typs=['mixed'],
|
||
|
axes=0)
|
||
|
self.check_result('label', 'loc', 8, 'ix', 8, typs=['mixed'], axes=0)
|
||
|
self.check_result('label', 'loc', Timestamp('20130102'), 'ix', 1,
|
||
|
typs=['ts'], axes=0)
|
||
|
self.check_result('label', 'loc', 'c', 'ix', 'c', typs=['empty'],
|
||
|
fails=KeyError)
|
||
|
|
||
|
def test_loc_getitem_label_out_of_range(self):
|
||
|
|
||
|
# out of range label
|
||
|
self.check_result('label range', 'loc', 'f', 'ix', 'f',
|
||
|
typs=['ints', 'uints', 'labels', 'mixed', 'ts'],
|
||
|
fails=KeyError)
|
||
|
self.check_result('label range', 'loc', 'f', 'ix', 'f',
|
||
|
typs=['floats'], fails=KeyError)
|
||
|
self.check_result('label range', 'loc', 20, 'ix', 20,
|
||
|
typs=['ints', 'uints', 'mixed'], fails=KeyError)
|
||
|
self.check_result('label range', 'loc', 20, 'ix', 20,
|
||
|
typs=['labels'], fails=TypeError)
|
||
|
self.check_result('label range', 'loc', 20, 'ix', 20, typs=['ts'],
|
||
|
axes=0, fails=TypeError)
|
||
|
self.check_result('label range', 'loc', 20, 'ix', 20, typs=['floats'],
|
||
|
axes=0, fails=KeyError)
|
||
|
|
||
|
def test_loc_getitem_label_list(self):
|
||
|
|
||
|
# list of labels
|
||
|
self.check_result('list lbl', 'loc', [0, 2, 4], 'ix', [0, 2, 4],
|
||
|
typs=['ints', 'uints'], axes=0)
|
||
|
self.check_result('list lbl', 'loc', [3, 6, 9], 'ix', [3, 6, 9],
|
||
|
typs=['ints', 'uints'], axes=1)
|
||
|
self.check_result('list lbl', 'loc', [4, 8, 12], 'ix', [4, 8, 12],
|
||
|
typs=['ints', 'uints'], axes=2)
|
||
|
self.check_result('list lbl', 'loc', ['a', 'b', 'd'], 'ix',
|
||
|
['a', 'b', 'd'], typs=['labels'], axes=0)
|
||
|
self.check_result('list lbl', 'loc', ['A', 'B', 'C'], 'ix',
|
||
|
['A', 'B', 'C'], typs=['labels'], axes=1)
|
||
|
self.check_result('list lbl', 'loc', ['Z', 'Y', 'W'], 'ix',
|
||
|
['Z', 'Y', 'W'], typs=['labels'], axes=2)
|
||
|
self.check_result('list lbl', 'loc', [2, 8, 'null'], 'ix',
|
||
|
[2, 8, 'null'], typs=['mixed'], axes=0)
|
||
|
self.check_result('list lbl', 'loc',
|
||
|
[Timestamp('20130102'), Timestamp('20130103')], 'ix',
|
||
|
[Timestamp('20130102'), Timestamp('20130103')],
|
||
|
typs=['ts'], axes=0)
|
||
|
|
||
|
@pytest.mark.skipif(PY2, reason=("Catching warnings unreliable with "
|
||
|
"Python 2 (GH #20770)"))
|
||
|
def test_loc_getitem_label_list_with_missing(self):
|
||
|
self.check_result('list lbl', 'loc', [0, 1, 2], 'indexer', [0, 1, 2],
|
||
|
typs=['empty'], fails=KeyError)
|
||
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||
|
self.check_result('list lbl', 'loc', [0, 2, 10], 'ix', [0, 2, 10],
|
||
|
typs=['ints', 'uints', 'floats'],
|
||
|
axes=0, fails=KeyError)
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||
|
self.check_result('list lbl', 'loc', [3, 6, 7], 'ix', [3, 6, 7],
|
||
|
typs=['ints', 'uints', 'floats'],
|
||
|
axes=1, fails=KeyError)
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||
|
self.check_result('list lbl', 'loc', [4, 8, 10], 'ix', [4, 8, 10],
|
||
|
typs=['ints', 'uints', 'floats'],
|
||
|
axes=2, fails=KeyError)
|
||
|
|
||
|
# GH 17758 - MultiIndex and missing keys
|
||
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||
|
self.check_result('list lbl', 'loc', [(1, 3), (1, 4), (2, 5)],
|
||
|
'ix', [(1, 3), (1, 4), (2, 5)],
|
||
|
typs=['multi'],
|
||
|
axes=0)
|
||
|
|
||
|
def test_getitem_label_list_with_missing(self):
|
||
|
s = Series(range(3), index=['a', 'b', 'c'])
|
||
|
|
||
|
# consistency
|
||
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||
|
s[['a', 'd']]
|
||
|
|
||
|
s = Series(range(3))
|
||
|
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
|
||
|
s[[0, 3]]
|
||
|
|
||
|
def test_loc_getitem_label_list_fails(self):
|
||
|
# fails
|
||
|
self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
|
||
|
typs=['ints', 'uints'], axes=1, fails=KeyError)
|
||
|
self.check_result('list lbl', 'loc', [20, 30, 40], 'ix', [20, 30, 40],
|
||
|
typs=['ints', 'uints'], axes=2, fails=KeyError)
|
||
|
|
||
|
def test_loc_getitem_label_array_like(self):
|
||
|
# array like
|
||
|
self.check_result('array like', 'loc', Series(index=[0, 2, 4]).index,
|
||
|
'ix', [0, 2, 4], typs=['ints', 'uints'], axes=0)
|
||
|
self.check_result('array like', 'loc', Series(index=[3, 6, 9]).index,
|
||
|
'ix', [3, 6, 9], typs=['ints', 'uints'], axes=1)
|
||
|
self.check_result('array like', 'loc', Series(index=[4, 8, 12]).index,
|
||
|
'ix', [4, 8, 12], typs=['ints', 'uints'], axes=2)
|
||
|
|
||
|
def test_loc_getitem_bool(self):
|
||
|
# boolean indexers
|
||
|
b = [True, False, True, False]
|
||
|
self.check_result('bool', 'loc', b, 'ix', b,
|
||
|
typs=['ints', 'uints', 'labels',
|
||
|
'mixed', 'ts', 'floats'])
|
||
|
self.check_result('bool', 'loc', b, 'ix', b, typs=['empty'],
|
||
|
fails=KeyError)
|
||
|
|
||
|
def test_loc_getitem_int_slice(self):
|
||
|
|
||
|
# ok
|
||
|
self.check_result('int slice2', 'loc', slice(2, 4), 'ix', [2, 4],
|
||
|
typs=['ints', 'uints'], axes=0)
|
||
|
self.check_result('int slice2', 'loc', slice(3, 6), 'ix', [3, 6],
|
||
|
typs=['ints', 'uints'], axes=1)
|
||
|
self.check_result('int slice2', 'loc', slice(4, 8), 'ix', [4, 8],
|
||
|
typs=['ints', 'uints'], axes=2)
|
||
|
|
||
|
# GH 3053
|
||
|
# loc should treat integer slices like label slices
|
||
|
|
||
|
index = MultiIndex.from_tuples([t for t in itertools.product(
|
||
|
[6, 7, 8], ['a', 'b'])])
|
||
|
df = DataFrame(np.random.randn(6, 6), index, index)
|
||
|
result = df.loc[6:8, :]
|
||
|
expected = df
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
index = MultiIndex.from_tuples([t
|
||
|
for t in itertools.product(
|
||
|
[10, 20, 30], ['a', 'b'])])
|
||
|
df = DataFrame(np.random.randn(6, 6), index, index)
|
||
|
result = df.loc[20:30, :]
|
||
|
expected = df.iloc[2:]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# doc examples
|
||
|
result = df.loc[10, :]
|
||
|
expected = df.iloc[0:2]
|
||
|
expected.index = ['a', 'b']
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.loc[:, 10]
|
||
|
# expected = df.ix[:,10] (this fails)
|
||
|
expected = df[10]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_loc_to_fail(self):
|
||
|
|
||
|
# GH3449
|
||
|
df = DataFrame(np.random.random((3, 3)),
|
||
|
index=['a', 'b', 'c'],
|
||
|
columns=['e', 'f', 'g'])
|
||
|
|
||
|
# raise a KeyError?
|
||
|
pytest.raises(KeyError, df.loc.__getitem__,
|
||
|
tuple([[1, 2], [1, 2]]))
|
||
|
|
||
|
# GH 7496
|
||
|
# loc should not fallback
|
||
|
|
||
|
s = Series()
|
||
|
s.loc[1] = 1
|
||
|
s.loc['a'] = 2
|
||
|
|
||
|
pytest.raises(KeyError, lambda: s.loc[-1])
|
||
|
pytest.raises(KeyError, lambda: s.loc[[-1, -2]])
|
||
|
|
||
|
pytest.raises(KeyError, lambda: s.loc[['4']])
|
||
|
|
||
|
s.loc[-1] = 3
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
result = s.loc[[-1, -2]]
|
||
|
expected = Series([3, np.nan], index=[-1, -2])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s['a'] = 2
|
||
|
pytest.raises(KeyError, lambda: s.loc[[-2]])
|
||
|
|
||
|
del s['a']
|
||
|
|
||
|
def f():
|
||
|
s.loc[[-2]] = 0
|
||
|
|
||
|
pytest.raises(KeyError, f)
|
||
|
|
||
|
# inconsistency between .loc[values] and .loc[values,:]
|
||
|
# GH 7999
|
||
|
df = DataFrame([['a'], ['b']], index=[1, 2], columns=['value'])
|
||
|
|
||
|
def f():
|
||
|
df.loc[[3], :]
|
||
|
|
||
|
pytest.raises(KeyError, f)
|
||
|
|
||
|
def f():
|
||
|
df.loc[[3]]
|
||
|
|
||
|
pytest.raises(KeyError, f)
|
||
|
|
||
|
def test_loc_getitem_list_with_fail(self):
|
||
|
# 15747
|
||
|
# should KeyError if *any* missing labels
|
||
|
|
||
|
s = Series([1, 2, 3])
|
||
|
|
||
|
s.loc[[2]]
|
||
|
|
||
|
with pytest.raises(KeyError):
|
||
|
s.loc[[3]]
|
||
|
|
||
|
# a non-match and a match
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
expected = s.loc[[2, 3]]
|
||
|
result = s.reindex([2, 3])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_loc_getitem_label_slice(self):
|
||
|
|
||
|
# label slices (with ints)
|
||
|
self.check_result('lab slice', 'loc', slice(1, 3),
|
||
|
'ix', slice(1, 3),
|
||
|
typs=['labels', 'mixed', 'empty', 'ts', 'floats'],
|
||
|
fails=TypeError)
|
||
|
|
||
|
# real label slices
|
||
|
self.check_result('lab slice', 'loc', slice('a', 'c'),
|
||
|
'ix', slice('a', 'c'), typs=['labels'], axes=0)
|
||
|
self.check_result('lab slice', 'loc', slice('A', 'C'),
|
||
|
'ix', slice('A', 'C'), typs=['labels'], axes=1)
|
||
|
self.check_result('lab slice', 'loc', slice('W', 'Z'),
|
||
|
'ix', slice('W', 'Z'), typs=['labels'], axes=2)
|
||
|
|
||
|
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
|
||
|
'ix', slice('20130102', '20130104'),
|
||
|
typs=['ts'], axes=0)
|
||
|
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
|
||
|
'ix', slice('20130102', '20130104'),
|
||
|
typs=['ts'], axes=1, fails=TypeError)
|
||
|
self.check_result('ts slice', 'loc', slice('20130102', '20130104'),
|
||
|
'ix', slice('20130102', '20130104'),
|
||
|
typs=['ts'], axes=2, fails=TypeError)
|
||
|
|
||
|
# GH 14316
|
||
|
self.check_result('ts slice rev', 'loc', slice('20130104', '20130102'),
|
||
|
'indexer', [0, 1, 2], typs=['ts_rev'], axes=0)
|
||
|
|
||
|
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
|
||
|
typs=['mixed'], axes=0, fails=TypeError)
|
||
|
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
|
||
|
typs=['mixed'], axes=1, fails=KeyError)
|
||
|
self.check_result('mixed slice', 'loc', slice(2, 8), 'ix', slice(2, 8),
|
||
|
typs=['mixed'], axes=2, fails=KeyError)
|
||
|
|
||
|
self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice(
|
||
|
2, 4, 2), typs=['mixed'], axes=0, fails=TypeError)
|
||
|
|
||
|
def test_loc_index(self):
|
||
|
# gh-17131
|
||
|
# a boolean index should index like a boolean numpy array
|
||
|
|
||
|
df = DataFrame(
|
||
|
np.random.random(size=(5, 10)),
|
||
|
index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])
|
||
|
|
||
|
mask = df.index.map(lambda x: "alpha" in x)
|
||
|
expected = df.loc[np.array(mask)]
|
||
|
|
||
|
result = df.loc[mask]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.loc[mask.values]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_loc_general(self):
|
||
|
|
||
|
df = DataFrame(
|
||
|
np.random.rand(4, 4), columns=['A', 'B', 'C', 'D'],
|
||
|
index=['A', 'B', 'C', 'D'])
|
||
|
|
||
|
# want this to work
|
||
|
result = df.loc[:, "A":"B"].iloc[0:2, :]
|
||
|
assert (result.columns == ['A', 'B']).all()
|
||
|
assert (result.index == ['A', 'B']).all()
|
||
|
|
||
|
# mixed type
|
||
|
result = DataFrame({'a': [Timestamp('20130101')], 'b': [1]}).iloc[0]
|
||
|
expected = Series([Timestamp('20130101'), 1], index=['a', 'b'], name=0)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
assert result.dtype == object
|
||
|
|
||
|
def test_loc_setitem_consistency(self):
|
||
|
# GH 6149
|
||
|
# coerce similarly for setitem and loc when rows have a null-slice
|
||
|
expected = DataFrame({'date': Series(0, index=range(5),
|
||
|
dtype=np.int64),
|
||
|
'val': Series(range(5), dtype=np.int64)})
|
||
|
|
||
|
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||
|
'val': Series(
|
||
|
range(5), dtype=np.int64)})
|
||
|
df.loc[:, 'date'] = 0
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||
|
'val': Series(range(5), dtype=np.int64)})
|
||
|
df.loc[:, 'date'] = np.array(0, dtype=np.int64)
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||
|
'val': Series(range(5), dtype=np.int64)})
|
||
|
df.loc[:, 'date'] = np.array([0, 0, 0, 0, 0], dtype=np.int64)
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
expected = DataFrame({'date': Series('foo', index=range(5)),
|
||
|
'val': Series(range(5), dtype=np.int64)})
|
||
|
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||
|
'val': Series(range(5), dtype=np.int64)})
|
||
|
df.loc[:, 'date'] = 'foo'
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
expected = DataFrame({'date': Series(1.0, index=range(5)),
|
||
|
'val': Series(range(5), dtype=np.int64)})
|
||
|
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||
|
'val': Series(range(5), dtype=np.int64)})
|
||
|
df.loc[:, 'date'] = 1.0
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# GH 15494
|
||
|
# setting on frame with single row
|
||
|
df = DataFrame({'date': Series([Timestamp('20180101')])})
|
||
|
df.loc[:, 'date'] = 'string'
|
||
|
expected = DataFrame({'date': Series(['string'])})
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_loc_setitem_consistency_empty(self):
|
||
|
# empty (essentially noops)
|
||
|
expected = DataFrame(columns=['x', 'y'])
|
||
|
expected['x'] = expected['x'].astype(np.int64)
|
||
|
df = DataFrame(columns=['x', 'y'])
|
||
|
df.loc[:, 'x'] = 1
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
df = DataFrame(columns=['x', 'y'])
|
||
|
df['x'] = 1
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_loc_setitem_consistency_slice_column_len(self):
|
||
|
# .loc[:,column] setting with slice == len of the column
|
||
|
# GH10408
|
||
|
data = """Level_0,,,Respondent,Respondent,Respondent,OtherCat,OtherCat
|
||
|
Level_1,,,Something,StartDate,EndDate,Yes/No,SomethingElse
|
||
|
Region,Site,RespondentID,,,,,
|
||
|
Region_1,Site_1,3987227376,A,5/25/2015 10:59,5/25/2015 11:22,Yes,
|
||
|
Region_1,Site_1,3980680971,A,5/21/2015 9:40,5/21/2015 9:52,Yes,Yes
|
||
|
Region_1,Site_2,3977723249,A,5/20/2015 8:27,5/20/2015 8:41,Yes,
|
||
|
Region_1,Site_2,3977723089,A,5/20/2015 8:33,5/20/2015 9:09,Yes,No"""
|
||
|
|
||
|
df = pd.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1, 2])
|
||
|
df.loc[:, ('Respondent', 'StartDate')] = pd.to_datetime(df.loc[:, (
|
||
|
'Respondent', 'StartDate')])
|
||
|
df.loc[:, ('Respondent', 'EndDate')] = pd.to_datetime(df.loc[:, (
|
||
|
'Respondent', 'EndDate')])
|
||
|
df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
|
||
|
'Respondent', 'EndDate')] - df.loc[:, ('Respondent', 'StartDate')]
|
||
|
|
||
|
df.loc[:, ('Respondent', 'Duration')] = df.loc[:, (
|
||
|
'Respondent', 'Duration')].astype('timedelta64[s]')
|
||
|
expected = Series([1380, 720, 840, 2160.], index=df.index,
|
||
|
name=('Respondent', 'Duration'))
|
||
|
tm.assert_series_equal(df[('Respondent', 'Duration')], expected)
|
||
|
|
||
|
def test_loc_setitem_frame(self):
|
||
|
df = self.frame_labels
|
||
|
|
||
|
result = df.iloc[0, 0]
|
||
|
|
||
|
df.loc['a', 'A'] = 1
|
||
|
result = df.loc['a', 'A']
|
||
|
assert result == 1
|
||
|
|
||
|
result = df.iloc[0, 0]
|
||
|
assert result == 1
|
||
|
|
||
|
df.loc[:, 'B':'D'] = 0
|
||
|
expected = df.loc[:, 'B':'D']
|
||
|
result = df.iloc[:, 1:]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# GH 6254
|
||
|
# setting issue
|
||
|
df = DataFrame(index=[3, 5, 4], columns=['A'])
|
||
|
df.loc[[4, 3, 5], 'A'] = np.array([1, 2, 3], dtype='int64')
|
||
|
expected = DataFrame(dict(A=Series(
|
||
|
[1, 2, 3], index=[4, 3, 5]))).reindex(index=[3, 5, 4])
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# GH 6252
|
||
|
# setting with an empty frame
|
||
|
keys1 = ['@' + str(i) for i in range(5)]
|
||
|
val1 = np.arange(5, dtype='int64')
|
||
|
|
||
|
keys2 = ['@' + str(i) for i in range(4)]
|
||
|
val2 = np.arange(4, dtype='int64')
|
||
|
|
||
|
index = list(set(keys1).union(keys2))
|
||
|
df = DataFrame(index=index)
|
||
|
df['A'] = np.nan
|
||
|
df.loc[keys1, 'A'] = val1
|
||
|
|
||
|
df['B'] = np.nan
|
||
|
df.loc[keys2, 'B'] = val2
|
||
|
|
||
|
expected = DataFrame(dict(A=Series(val1, index=keys1), B=Series(
|
||
|
val2, index=keys2))).reindex(index=index)
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# GH 8669
|
||
|
# invalid coercion of nan -> int
|
||
|
df = DataFrame({'A': [1, 2, 3], 'B': np.nan})
|
||
|
df.loc[df.B > df.A, 'B'] = df.A
|
||
|
expected = DataFrame({'A': [1, 2, 3], 'B': np.nan})
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# GH 6546
|
||
|
# setting with mixed labels
|
||
|
df = DataFrame({1: [1, 2], 2: [3, 4], 'a': ['a', 'b']})
|
||
|
|
||
|
result = df.loc[0, [1, 2]]
|
||
|
expected = Series([1, 3], index=[1, 2], dtype=object, name=0)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
expected = DataFrame({1: [5, 2], 2: [6, 4], 'a': ['a', 'b']})
|
||
|
df.loc[0, [1, 2]] = [5, 6]
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_loc_setitem_frame_multiples(self):
|
||
|
# multiple setting
|
||
|
df = DataFrame({'A': ['foo', 'bar', 'baz'],
|
||
|
'B': Series(
|
||
|
range(3), dtype=np.int64)})
|
||
|
rhs = df.loc[1:2]
|
||
|
rhs.index = df.index[0:2]
|
||
|
df.loc[0:1] = rhs
|
||
|
expected = DataFrame({'A': ['bar', 'baz', 'baz'],
|
||
|
'B': Series(
|
||
|
[1, 2, 2], dtype=np.int64)})
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# multiple setting with frame on rhs (with M8)
|
||
|
df = DataFrame({'date': date_range('2000-01-01', '2000-01-5'),
|
||
|
'val': Series(
|
||
|
range(5), dtype=np.int64)})
|
||
|
expected = DataFrame({'date': [Timestamp('20000101'), Timestamp(
|
||
|
'20000102'), Timestamp('20000101'), Timestamp('20000102'),
|
||
|
Timestamp('20000103')],
|
||
|
'val': Series(
|
||
|
[0, 1, 0, 1, 2], dtype=np.int64)})
|
||
|
rhs = df.loc[0:2]
|
||
|
rhs.index = df.index[2:5]
|
||
|
df.loc[2:4] = rhs
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
'indexer', [['A'], slice(None, 'A', None), np.array(['A'])])
|
||
|
@pytest.mark.parametrize(
|
||
|
'value', [['Z'], np.array(['Z'])])
|
||
|
def test_loc_setitem_with_scalar_index(self, indexer, value):
|
||
|
# GH #19474
|
||
|
# assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated
|
||
|
# elementwisely, not using "setter('A', ['Z'])".
|
||
|
|
||
|
df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
|
||
|
df.loc[0, indexer] = value
|
||
|
result = df.loc[0, 'A']
|
||
|
|
||
|
assert is_scalar(result) and result == 'Z'
|
||
|
|
||
|
def test_loc_coerceion(self):
|
||
|
|
||
|
# 12411
|
||
|
df = DataFrame({'date': [Timestamp('20130101').tz_localize('UTC'),
|
||
|
pd.NaT]})
|
||
|
expected = df.dtypes
|
||
|
|
||
|
result = df.iloc[[0]]
|
||
|
tm.assert_series_equal(result.dtypes, expected)
|
||
|
|
||
|
result = df.iloc[[1]]
|
||
|
tm.assert_series_equal(result.dtypes, expected)
|
||
|
|
||
|
# 12045
|
||
|
import datetime
|
||
|
df = DataFrame({'date': [datetime.datetime(2012, 1, 1),
|
||
|
datetime.datetime(1012, 1, 2)]})
|
||
|
expected = df.dtypes
|
||
|
|
||
|
result = df.iloc[[0]]
|
||
|
tm.assert_series_equal(result.dtypes, expected)
|
||
|
|
||
|
result = df.iloc[[1]]
|
||
|
tm.assert_series_equal(result.dtypes, expected)
|
||
|
|
||
|
# 11594
|
||
|
df = DataFrame({'text': ['some words'] + [None] * 9})
|
||
|
expected = df.dtypes
|
||
|
|
||
|
result = df.iloc[0:2]
|
||
|
tm.assert_series_equal(result.dtypes, expected)
|
||
|
|
||
|
result = df.iloc[3:]
|
||
|
tm.assert_series_equal(result.dtypes, expected)
|
||
|
|
||
|
def test_loc_non_unique(self):
|
||
|
# GH3659
|
||
|
# non-unique indexer with loc slice
|
||
|
# https://groups.google.com/forum/?fromgroups#!topic/pydata/zTm2No0crYs
|
||
|
|
||
|
# these are going to raise because the we are non monotonic
|
||
|
df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
|
||
|
'B': [3, 4, 5, 6, 7, 8]}, index=[0, 1, 0, 1, 2, 3])
|
||
|
pytest.raises(KeyError, df.loc.__getitem__,
|
||
|
tuple([slice(1, None)]))
|
||
|
pytest.raises(KeyError, df.loc.__getitem__,
|
||
|
tuple([slice(0, None)]))
|
||
|
pytest.raises(KeyError, df.loc.__getitem__, tuple([slice(1, 2)]))
|
||
|
|
||
|
# monotonic are ok
|
||
|
df = DataFrame({'A': [1, 2, 3, 4, 5, 6],
|
||
|
'B': [3, 4, 5, 6, 7, 8]},
|
||
|
index=[0, 1, 0, 1, 2, 3]).sort_index(axis=0)
|
||
|
result = df.loc[1:]
|
||
|
expected = DataFrame({'A': [2, 4, 5, 6], 'B': [4, 6, 7, 8]},
|
||
|
index=[1, 1, 2, 3])
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.loc[0:]
|
||
|
tm.assert_frame_equal(result, df)
|
||
|
|
||
|
result = df.loc[1:2]
|
||
|
expected = DataFrame({'A': [2, 4, 5], 'B': [4, 6, 7]},
|
||
|
index=[1, 1, 2])
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_loc_non_unique_memory_error(self):
|
||
|
|
||
|
# GH 4280
|
||
|
# non_unique index with a large selection triggers a memory error
|
||
|
|
||
|
columns = list('ABCDEFG')
|
||
|
|
||
|
def gen_test(l, l2):
|
||
|
return pd.concat([
|
||
|
DataFrame(np.random.randn(l, len(columns)),
|
||
|
index=lrange(l), columns=columns),
|
||
|
DataFrame(np.ones((l2, len(columns))),
|
||
|
index=[0] * l2, columns=columns)])
|
||
|
|
||
|
def gen_expected(df, mask):
|
||
|
l = len(mask)
|
||
|
return pd.concat([df.take([0]),
|
||
|
DataFrame(np.ones((l, len(columns))),
|
||
|
index=[0] * l,
|
||
|
columns=columns),
|
||
|
df.take(mask[1:])])
|
||
|
|
||
|
df = gen_test(900, 100)
|
||
|
assert not df.index.is_unique
|
||
|
|
||
|
mask = np.arange(100)
|
||
|
result = df.loc[mask]
|
||
|
expected = gen_expected(df, mask)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
df = gen_test(900000, 100000)
|
||
|
assert not df.index.is_unique
|
||
|
|
||
|
mask = np.arange(100000)
|
||
|
result = df.loc[mask]
|
||
|
expected = gen_expected(df, mask)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_loc_name(self):
|
||
|
# GH 3880
|
||
|
df = DataFrame([[1, 1], [1, 1]])
|
||
|
df.index.name = 'index_name'
|
||
|
result = df.iloc[[0, 1]].index.name
|
||
|
assert result == 'index_name'
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = df.ix[[0, 1]].index.name
|
||
|
assert result == 'index_name'
|
||
|
|
||
|
result = df.loc[[0, 1]].index.name
|
||
|
assert result == 'index_name'
|
||
|
|
||
|
def test_loc_empty_list_indexer_is_ok(self):
|
||
|
from pandas.util.testing import makeCustomDataframe as mkdf
|
||
|
df = mkdf(5, 2)
|
||
|
# vertical empty
|
||
|
tm.assert_frame_equal(df.loc[:, []], df.iloc[:, :0],
|
||
|
check_index_type=True, check_column_type=True)
|
||
|
# horizontal empty
|
||
|
tm.assert_frame_equal(df.loc[[], :], df.iloc[:0, :],
|
||
|
check_index_type=True, check_column_type=True)
|
||
|
# horizontal empty
|
||
|
tm.assert_frame_equal(df.loc[[]], df.iloc[:0, :],
|
||
|
check_index_type=True,
|
||
|
check_column_type=True)
|
||
|
|
||
|
def test_identity_slice_returns_new_object(self):
|
||
|
# GH13873
|
||
|
original_df = DataFrame({'a': [1, 2, 3]})
|
||
|
sliced_df = original_df.loc[:]
|
||
|
assert sliced_df is not original_df
|
||
|
assert original_df[:] is not original_df
|
||
|
|
||
|
# should be a shallow copy
|
||
|
original_df['a'] = [4, 4, 4]
|
||
|
assert (sliced_df['a'] == 4).all()
|
||
|
|
||
|
# These should not return copies
|
||
|
assert original_df is original_df.loc[:, :]
|
||
|
df = DataFrame(np.random.randn(10, 4))
|
||
|
assert df[0] is df.loc[:, 0]
|
||
|
|
||
|
# Same tests for Series
|
||
|
original_series = Series([1, 2, 3, 4, 5, 6])
|
||
|
sliced_series = original_series.loc[:]
|
||
|
assert sliced_series is not original_series
|
||
|
assert original_series[:] is not original_series
|
||
|
|
||
|
original_series[:3] = [7, 8, 9]
|
||
|
assert all(sliced_series[:3] == [7, 8, 9])
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
'indexer_type_1',
|
||
|
(list, tuple, set, slice, np.ndarray, Series, Index))
|
||
|
@pytest.mark.parametrize(
|
||
|
'indexer_type_2',
|
||
|
(list, tuple, set, slice, np.ndarray, Series, Index))
|
||
|
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
|
||
|
# GH #19686
|
||
|
# .loc should work with nested indexers which can be
|
||
|
# any list-like objects (see `pandas.api.types.is_list_like`) or slices
|
||
|
|
||
|
def convert_nested_indexer(indexer_type, keys):
|
||
|
if indexer_type == np.ndarray:
|
||
|
return np.array(keys)
|
||
|
if indexer_type == slice:
|
||
|
return slice(*keys)
|
||
|
return indexer_type(keys)
|
||
|
|
||
|
a = [10, 20, 30]
|
||
|
b = [1, 2, 3]
|
||
|
index = pd.MultiIndex.from_product([a, b])
|
||
|
df = pd.DataFrame(
|
||
|
np.arange(len(index), dtype='int64'),
|
||
|
index=index, columns=['Data'])
|
||
|
|
||
|
keys = ([10, 20], [2, 3])
|
||
|
types = (indexer_type_1, indexer_type_2)
|
||
|
|
||
|
# check indexers with all the combinations of nested objects
|
||
|
# of all the valid types
|
||
|
indexer = tuple(
|
||
|
convert_nested_indexer(indexer_type, k)
|
||
|
for indexer_type, k in zip(types, keys))
|
||
|
|
||
|
result = df.loc[indexer, 'Data']
|
||
|
expected = pd.Series(
|
||
|
[1, 2, 4, 5], name='Data',
|
||
|
index=pd.MultiIndex.from_product(keys))
|
||
|
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_loc_uint64(self):
|
||
|
# GH20722
|
||
|
# Test whether loc accept uint64 max value as index.
|
||
|
s = pd.Series([1, 2],
|
||
|
index=[np.iinfo('uint64').max - 1,
|
||
|
np.iinfo('uint64').max])
|
||
|
|
||
|
result = s.loc[np.iinfo('uint64').max - 1]
|
||
|
expected = s.iloc[0]
|
||
|
assert result == expected
|
||
|
|
||
|
result = s.loc[[np.iinfo('uint64').max - 1]]
|
||
|
expected = s.iloc[[0]]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = s.loc[[np.iinfo('uint64').max - 1,
|
||
|
np.iinfo('uint64').max]]
|
||
|
tm.assert_series_equal(result, s)
|