laywerrobot/lib/python3.6/site-packages/pandas/tests/frame/test_indexing.py
2020-08-27 21:55:39 +02:00

3496 lines
119 KiB
Python

# -*- coding: utf-8 -*-
from __future__ import print_function
from warnings import catch_warnings
from datetime import datetime, date, timedelta, time
from pandas.compat import map, zip, range, lrange, lzip, long
from pandas import compat
from numpy import nan
from numpy.random import randn
import pytest
import numpy as np
import pandas.core.common as com
from pandas import (DataFrame, Index, Series, notna, isna,
MultiIndex, DatetimeIndex, Timestamp,
date_range, Categorical)
from pandas.core.dtypes.dtypes import CategoricalDtype
import pandas as pd
from pandas._libs.tslib import iNaT
from pandas.tseries.offsets import BDay
from pandas.core.dtypes.common import (
is_float_dtype,
is_integer,
is_scalar)
from pandas.util.testing import (assert_almost_equal,
assert_series_equal,
assert_frame_equal)
from pandas.core.indexing import IndexingError
import pandas.util.testing as tm
from pandas.tests.frame.common import TestData
class TestDataFrameIndexing(TestData):
def test_getitem(self):
# Slicing
sl = self.frame[:20]
assert len(sl.index) == 20
# Column access
for _, series in compat.iteritems(sl):
assert len(series.index) == 20
assert tm.equalContents(series.index, sl.index)
for key, _ in compat.iteritems(self.frame._series):
assert self.frame[key] is not None
assert 'random' not in self.frame
with tm.assert_raises_regex(KeyError, 'random'):
self.frame['random']
df = self.frame.copy()
df['$10'] = randn(len(df))
ad = randn(len(df))
df['@awesome_domain'] = ad
with pytest.raises(KeyError):
df.__getitem__('df["$10"]')
res = df['@awesome_domain']
tm.assert_numpy_array_equal(ad, res.values)
def test_getitem_dupe_cols(self):
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b'])
try:
df[['baf']]
except KeyError:
pass
else:
self.fail("Dataframe failed to raise KeyError")
def test_get(self):
b = self.frame.get('B')
assert_series_equal(b, self.frame['B'])
assert self.frame.get('foo') is None
assert_series_equal(self.frame.get('foo', self.frame['B']),
self.frame['B'])
# None
# GH 5652
for df in [DataFrame(), DataFrame(columns=list('AB')),
DataFrame(columns=list('AB'), index=range(3))]:
result = df.get(None)
assert result is None
def test_getitem_iterator(self):
idx = iter(['A', 'B', 'C'])
result = self.frame.loc[:, idx]
expected = self.frame.loc[:, ['A', 'B', 'C']]
assert_frame_equal(result, expected)
idx = iter(['A', 'B', 'C'])
result = self.frame.loc[:, idx]
expected = self.frame.loc[:, ['A', 'B', 'C']]
assert_frame_equal(result, expected)
def test_getitem_list(self):
self.frame.columns.name = 'foo'
result = self.frame[['B', 'A']]
result2 = self.frame[Index(['B', 'A'])]
expected = self.frame.loc[:, ['B', 'A']]
expected.columns.name = 'foo'
assert_frame_equal(result, expected)
assert_frame_equal(result2, expected)
assert result.columns.name == 'foo'
with tm.assert_raises_regex(KeyError, 'not in index'):
self.frame[['B', 'A', 'food']]
with tm.assert_raises_regex(KeyError, 'not in index'):
self.frame[Index(['B', 'A', 'foo'])]
# tuples
df = DataFrame(randn(8, 3),
columns=Index([('foo', 'bar'), ('baz', 'qux'),
('peek', 'aboo')], name=('sth', 'sth2')))
result = df[[('foo', 'bar'), ('baz', 'qux')]]
expected = df.iloc[:, :2]
assert_frame_equal(result, expected)
assert result.columns.names == ('sth', 'sth2')
def test_getitem_callable(self):
# GH 12533
result = self.frame[lambda x: 'A']
tm.assert_series_equal(result, self.frame.loc[:, 'A'])
result = self.frame[lambda x: ['A', 'B']]
tm.assert_frame_equal(result, self.frame.loc[:, ['A', 'B']])
df = self.frame[:3]
result = df[lambda x: [True, False, True]]
tm.assert_frame_equal(result, self.frame.iloc[[0, 2], :])
def test_setitem_list(self):
self.frame['E'] = 'foo'
data = self.frame[['A', 'B']]
self.frame[['B', 'A']] = data
assert_series_equal(self.frame['B'], data['A'], check_names=False)
assert_series_equal(self.frame['A'], data['B'], check_names=False)
with tm.assert_raises_regex(ValueError,
'Columns must be same length as key'):
data[['A']] = self.frame[['A', 'B']]
with tm.assert_raises_regex(ValueError, 'Length of values '
'does not match '
'length of index'):
data['A'] = range(len(data.index) - 1)
df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_)
df.loc[1, ['tt1', 'tt2']] = [1, 2]
result = df.loc[df.index[1], ['tt1', 'tt2']]
expected = Series([1, 2], df.columns, dtype=np.int_, name=1)
assert_series_equal(result, expected)
df['tt1'] = df['tt2'] = '0'
df.loc[df.index[1], ['tt1', 'tt2']] = ['1', '2']
result = df.loc[df.index[1], ['tt1', 'tt2']]
expected = Series(['1', '2'], df.columns, name=1)
assert_series_equal(result, expected)
def test_setitem_list_not_dataframe(self):
data = np.random.randn(len(self.frame), 2)
self.frame[['A', 'B']] = data
assert_almost_equal(self.frame[['A', 'B']].values, data)
def test_setitem_list_of_tuples(self):
tuples = lzip(self.frame['A'], self.frame['B'])
self.frame['tuples'] = tuples
result = self.frame['tuples']
expected = Series(tuples, index=self.frame.index, name='tuples')
assert_series_equal(result, expected)
def test_setitem_mulit_index(self):
# GH7655, test that assigning to a sub-frame of a frame
# with multi-index columns aligns both rows and columns
it = ['jim', 'joe', 'jolie'], ['first', 'last'], \
['left', 'center', 'right']
cols = MultiIndex.from_product(it)
index = pd.date_range('20141006', periods=20)
vals = np.random.randint(1, 1000, (len(index), len(cols)))
df = pd.DataFrame(vals, columns=cols, index=index)
i, j = df.index.values.copy(), it[-1][:]
np.random.shuffle(i)
df['jim'] = df['jolie'].loc[i, ::-1]
assert_frame_equal(df['jim'], df['jolie'])
np.random.shuffle(j)
df[('joe', 'first')] = df[('jolie', 'last')].loc[i, j]
assert_frame_equal(df[('joe', 'first')], df[('jolie', 'last')])
np.random.shuffle(j)
df[('joe', 'last')] = df[('jolie', 'first')].loc[i, j]
assert_frame_equal(df[('joe', 'last')], df[('jolie', 'first')])
def test_setitem_callable(self):
# GH 12533
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]})
df[lambda x: 'A'] = [11, 12, 13, 14]
exp = pd.DataFrame({'A': [11, 12, 13, 14], 'B': [5, 6, 7, 8]})
tm.assert_frame_equal(df, exp)
def test_setitem_other_callable(self):
# GH 13299
inc = lambda x: x + 1
df = pd.DataFrame([[-1, 1], [1, -1]])
df[df > 0] = inc
expected = pd.DataFrame([[-1, inc], [inc, -1]])
tm.assert_frame_equal(df, expected)
def test_getitem_boolean(self):
# boolean indexing
d = self.tsframe.index[10]
indexer = self.tsframe.index > d
indexer_obj = indexer.astype(object)
subindex = self.tsframe.index[indexer]
subframe = self.tsframe[indexer]
tm.assert_index_equal(subindex, subframe.index)
with tm.assert_raises_regex(ValueError, 'Item wrong length'):
self.tsframe[indexer[:-1]]
subframe_obj = self.tsframe[indexer_obj]
assert_frame_equal(subframe_obj, subframe)
with tm.assert_raises_regex(ValueError, 'boolean values only'):
self.tsframe[self.tsframe]
# test that Series work
indexer_obj = Series(indexer_obj, self.tsframe.index)
subframe_obj = self.tsframe[indexer_obj]
assert_frame_equal(subframe_obj, subframe)
# test that Series indexers reindex
# we are producing a warning that since the passed boolean
# key is not the same as the given index, we will reindex
# not sure this is really necessary
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
indexer_obj = indexer_obj.reindex(self.tsframe.index[::-1])
subframe_obj = self.tsframe[indexer_obj]
assert_frame_equal(subframe_obj, subframe)
# test df[df > 0]
for df in [self.tsframe, self.mixed_frame,
self.mixed_float, self.mixed_int]:
data = df._get_numeric_data()
bif = df[df > 0]
bifw = DataFrame(dict((c, np.where(data[c] > 0, data[c], np.nan))
for c in data.columns),
index=data.index, columns=data.columns)
# add back other columns to compare
for c in df.columns:
if c not in bifw:
bifw[c] = df[c]
bifw = bifw.reindex(columns=df.columns)
assert_frame_equal(bif, bifw, check_dtype=False)
for c in df.columns:
if bif[c].dtype != bifw[c].dtype:
assert bif[c].dtype == df[c].dtype
def test_getitem_boolean_casting(self):
# don't upcast if we don't need to
df = self.tsframe.copy()
df['E'] = 1
df['E'] = df['E'].astype('int32')
df['E1'] = df['E'].copy()
df['F'] = 1
df['F'] = df['F'].astype('int64')
df['F1'] = df['F'].copy()
casted = df[df > 0]
result = casted.get_dtype_counts()
expected = Series({'float64': 4, 'int32': 2, 'int64': 2})
assert_series_equal(result, expected)
# int block splitting
df.loc[df.index[1:3], ['E1', 'F1']] = 0
casted = df[df > 0]
result = casted.get_dtype_counts()
expected = Series({'float64': 6, 'int32': 1, 'int64': 1})
assert_series_equal(result, expected)
# where dtype conversions
# GH 3733
df = DataFrame(data=np.random.randn(100, 50))
df = df.where(df > 0) # create nans
bools = df > 0
mask = isna(df)
expected = bools.astype(float).mask(mask)
result = bools.mask(mask)
assert_frame_equal(result, expected)
def test_getitem_boolean_list(self):
df = DataFrame(np.arange(12).reshape(3, 4))
def _checkit(lst):
result = df[lst]
expected = df.loc[df.index[lst]]
assert_frame_equal(result, expected)
_checkit([True, False, True])
_checkit([True, True, True])
_checkit([False, False, False])
def test_getitem_boolean_iadd(self):
arr = randn(5, 5)
df = DataFrame(arr.copy(), columns=['A', 'B', 'C', 'D', 'E'])
df[df < 0] += 1
arr[arr < 0] += 1
assert_almost_equal(df.values, arr)
def test_boolean_index_empty_corner(self):
# #2096
blah = DataFrame(np.empty([0, 1]), columns=['A'],
index=DatetimeIndex([]))
# both of these should succeed trivially
k = np.array([], bool)
blah[k]
blah[k] = 0
def test_getitem_ix_mixed_integer(self):
df = DataFrame(np.random.randn(4, 3),
index=[1, 10, 'C', 'E'], columns=[1, 2, 3])
result = df.iloc[:-1]
expected = df.loc[df.index[:-1]]
assert_frame_equal(result, expected)
with catch_warnings(record=True):
result = df.ix[[1, 10]]
expected = df.ix[Index([1, 10], dtype=object)]
assert_frame_equal(result, expected)
# 11320
df = pd.DataFrame({"rna": (1.5, 2.2, 3.2, 4.5),
-1000: [11, 21, 36, 40],
0: [10, 22, 43, 34],
1000: [0, 10, 20, 30]},
columns=['rna', -1000, 0, 1000])
result = df[[1000]]
expected = df.iloc[:, [3]]
assert_frame_equal(result, expected)
result = df[[-1000]]
expected = df.iloc[:, [1]]
assert_frame_equal(result, expected)
def test_getitem_setitem_ix_negative_integers(self):
with catch_warnings(record=True):
result = self.frame.ix[:, -1]
assert_series_equal(result, self.frame['D'])
with catch_warnings(record=True):
result = self.frame.ix[:, [-1]]
assert_frame_equal(result, self.frame[['D']])
with catch_warnings(record=True):
result = self.frame.ix[:, [-1, -2]]
assert_frame_equal(result, self.frame[['D', 'C']])
with catch_warnings(record=True):
self.frame.ix[:, [-1]] = 0
assert (self.frame['D'] == 0).all()
df = DataFrame(np.random.randn(8, 4))
# ix does label-based indexing when having an integer index
with pytest.raises(KeyError):
df.ix[[-1]]
with pytest.raises(KeyError):
df.ix[:, [-1]]
# #1942
a = DataFrame(randn(20, 2), index=[chr(x + 65) for x in range(20)])
with catch_warnings(record=True):
a.ix[-1] = a.ix[-2]
with catch_warnings(record=True):
assert_series_equal(a.ix[-1], a.ix[-2], check_names=False)
assert a.ix[-1].name == 'T'
assert a.ix[-2].name == 'S'
def test_getattr(self):
assert_series_equal(self.frame.A, self.frame['A'])
pytest.raises(AttributeError, getattr, self.frame,
'NONEXISTENT_NAME')
def test_setattr_column(self):
df = DataFrame({'foobar': 1}, index=lrange(10))
df.foobar = 5
assert (df.foobar == 5).all()
def test_setitem(self):
# not sure what else to do here
series = self.frame['A'][::2]
self.frame['col5'] = series
assert 'col5' in self.frame
assert len(series) == 15
assert len(self.frame) == 30
exp = np.ravel(np.column_stack((series.values, [np.nan] * 15)))
exp = Series(exp, index=self.frame.index, name='col5')
tm.assert_series_equal(self.frame['col5'], exp)
series = self.frame['A']
self.frame['col6'] = series
tm.assert_series_equal(series, self.frame['col6'], check_names=False)
with pytest.raises(KeyError):
self.frame[randn(len(self.frame) + 1)] = 1
# set ndarray
arr = randn(len(self.frame))
self.frame['col9'] = arr
assert (self.frame['col9'] == arr).all()
self.frame['col7'] = 5
assert((self.frame['col7'] == 5).all())
self.frame['col0'] = 3.14
assert((self.frame['col0'] == 3.14).all())
self.frame['col8'] = 'foo'
assert((self.frame['col8'] == 'foo').all())
# this is partially a view (e.g. some blocks are view)
# so raise/warn
smaller = self.frame[:2]
def f():
smaller['col10'] = ['1', '2']
pytest.raises(com.SettingWithCopyError, f)
assert smaller['col10'].dtype == np.object_
assert (smaller['col10'] == ['1', '2']).all()
# with a dtype
for dtype in ['int32', 'int64', 'float32', 'float64']:
self.frame[dtype] = np.array(arr, dtype=dtype)
assert self.frame[dtype].dtype.name == dtype
# dtype changing GH4204
df = DataFrame([[0, 0]])
df.iloc[0] = np.nan
expected = DataFrame([[np.nan, np.nan]])
assert_frame_equal(df, expected)
df = DataFrame([[0, 0]])
df.loc[0] = np.nan
assert_frame_equal(df, expected)
def test_setitem_tuple(self):
self.frame['A', 'B'] = self.frame['A']
assert_series_equal(self.frame['A', 'B'], self.frame[
'A'], check_names=False)
def test_setitem_always_copy(self):
s = self.frame['A'].copy()
self.frame['E'] = s
self.frame['E'][5:10] = nan
assert notna(s[5:10]).all()
def test_setitem_boolean(self):
df = self.frame.copy()
values = self.frame.values
df[df['A'] > 0] = 4
values[values[:, 0] > 0] = 4
assert_almost_equal(df.values, values)
# test that column reindexing works
series = df['A'] == 4
series = series.reindex(df.index[::-1])
df[series] = 1
values[values[:, 0] == 4] = 1
assert_almost_equal(df.values, values)
df[df > 0] = 5
values[values > 0] = 5
assert_almost_equal(df.values, values)
df[df == 5] = 0
values[values == 5] = 0
assert_almost_equal(df.values, values)
# a df that needs alignment first
df[df[:-1] < 0] = 2
np.putmask(values[:-1], values[:-1] < 0, 2)
assert_almost_equal(df.values, values)
# indexed with same shape but rows-reversed df
df[df[::-1] == 2] = 3
values[values == 2] = 3
assert_almost_equal(df.values, values)
msg = "Must pass DataFrame or 2-d ndarray with boolean values only"
with tm.assert_raises_regex(TypeError, msg):
df[df * 0] = 2
# index with DataFrame
mask = df > np.abs(df)
expected = df.copy()
df[df > np.abs(df)] = nan
expected.values[mask.values] = nan
assert_frame_equal(df, expected)
# set from DataFrame
expected = df.copy()
df[df > np.abs(df)] = df * 2
np.putmask(expected.values, mask.values, df.values * 2)
assert_frame_equal(df, expected)
@pytest.mark.parametrize(
"mask_type",
[lambda df: df > np.abs(df) / 2,
lambda df: (df > np.abs(df) / 2).values],
ids=['dataframe', 'array'])
def test_setitem_boolean_mask(self, mask_type):
# Test for issue #18582
df = self.frame.copy()
mask = mask_type(df)
# index with boolean mask
result = df.copy()
result[mask] = np.nan
expected = df.copy()
expected.values[np.array(mask)] = np.nan
assert_frame_equal(result, expected)
def test_setitem_cast(self):
self.frame['D'] = self.frame['D'].astype('i8')
assert self.frame['D'].dtype == np.int64
# #669, should not cast?
# this is now set to int64, which means a replacement of the column to
# the value dtype (and nothing to do with the existing dtype)
self.frame['B'] = 0
assert self.frame['B'].dtype == np.int64
# cast if pass array of course
self.frame['B'] = np.arange(len(self.frame))
assert issubclass(self.frame['B'].dtype.type, np.integer)
self.frame['foo'] = 'bar'
self.frame['foo'] = 0
assert self.frame['foo'].dtype == np.int64
self.frame['foo'] = 'bar'
self.frame['foo'] = 2.5
assert self.frame['foo'].dtype == np.float64
self.frame['something'] = 0
assert self.frame['something'].dtype == np.int64
self.frame['something'] = 2
assert self.frame['something'].dtype == np.int64
self.frame['something'] = 2.5
assert self.frame['something'].dtype == np.float64
# GH 7704
# dtype conversion on setting
df = DataFrame(np.random.rand(30, 3), columns=tuple('ABC'))
df['event'] = np.nan
df.loc[10, 'event'] = 'foo'
result = df.get_dtype_counts().sort_values()
expected = Series({'float64': 3, 'object': 1}).sort_values()
assert_series_equal(result, expected)
# Test that data type is preserved . #5782
df = DataFrame({'one': np.arange(6, dtype=np.int8)})
df.loc[1, 'one'] = 6
assert df.dtypes.one == np.dtype(np.int8)
df.one = np.int8(7)
assert df.dtypes.one == np.dtype(np.int8)
def test_setitem_boolean_column(self):
expected = self.frame.copy()
mask = self.frame['A'] > 0
self.frame.loc[mask, 'B'] = 0
expected.values[mask.values, 1] = 0
assert_frame_equal(self.frame, expected)
def test_frame_setitem_timestamp(self):
# GH#2155
columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', freq=BDay())
index = lrange(10)
data = DataFrame(columns=columns, index=index)
t = datetime(2012, 11, 1)
ts = Timestamp(t)
data[ts] = np.nan # works, mostly a smoke-test
assert np.isnan(data[ts]).all()
def test_setitem_corner(self):
# corner case
df = DataFrame({'B': [1., 2., 3.],
'C': ['a', 'b', 'c']},
index=np.arange(3))
del df['B']
df['B'] = [1., 2., 3.]
assert 'B' in df
assert len(df.columns) == 2
df['A'] = 'beginning'
df['E'] = 'foo'
df['D'] = 'bar'
df[datetime.now()] = 'date'
df[datetime.now()] = 5.
# what to do when empty frame with index
dm = DataFrame(index=self.frame.index)
dm['A'] = 'foo'
dm['B'] = 'bar'
assert len(dm.columns) == 2
assert dm.values.dtype == np.object_
# upcast
dm['C'] = 1
assert dm['C'].dtype == np.int64
dm['E'] = 1.
assert dm['E'].dtype == np.float64
# set existing column
dm['A'] = 'bar'
assert 'bar' == dm['A'][0]
dm = DataFrame(index=np.arange(3))
dm['A'] = 1
dm['foo'] = 'bar'
del dm['foo']
dm['foo'] = 'bar'
assert dm['foo'].dtype == np.object_
dm['coercable'] = ['1', '2', '3']
assert dm['coercable'].dtype == np.object_
def test_setitem_corner2(self):
data = {"title": ['foobar', 'bar', 'foobar'] + ['foobar'] * 17,
"cruft": np.random.random(20)}
df = DataFrame(data)
ix = df[df['title'] == 'bar'].index
df.loc[ix, ['title']] = 'foobar'
df.loc[ix, ['cruft']] = 0
assert df.loc[1, 'title'] == 'foobar'
assert df.loc[1, 'cruft'] == 0
def test_setitem_ambig(self):
# Difficulties with mixed-type data
from decimal import Decimal
# Created as float type
dm = DataFrame(index=lrange(3), columns=lrange(3))
coercable_series = Series([Decimal(1) for _ in range(3)],
index=lrange(3))
uncoercable_series = Series(['foo', 'bzr', 'baz'], index=lrange(3))
dm[0] = np.ones(3)
assert len(dm.columns) == 3
dm[1] = coercable_series
assert len(dm.columns) == 3
dm[2] = uncoercable_series
assert len(dm.columns) == 3
assert dm[2].dtype == np.object_
def test_setitem_clear_caches(self):
# see gh-304
df = DataFrame({'x': [1.1, 2.1, 3.1, 4.1], 'y': [5.1, 6.1, 7.1, 8.1]},
index=[0, 1, 2, 3])
df.insert(2, 'z', np.nan)
# cache it
foo = df['z']
df.loc[df.index[2:], 'z'] = 42
expected = Series([np.nan, np.nan, 42, 42], index=df.index, name='z')
assert df['z'] is not foo
tm.assert_series_equal(df['z'], expected)
def test_setitem_None(self):
# GH #766
self.frame[None] = self.frame['A']
assert_series_equal(
self.frame.iloc[:, -1], self.frame['A'], check_names=False)
assert_series_equal(self.frame.loc[:, None], self.frame[
'A'], check_names=False)
assert_series_equal(self.frame[None], self.frame[
'A'], check_names=False)
repr(self.frame)
def test_setitem_empty(self):
# GH 9596
df = pd.DataFrame({'a': ['1', '2', '3'],
'b': ['11', '22', '33'],
'c': ['111', '222', '333']})
result = df.copy()
result.loc[result.b.isna(), 'a'] = result.a
assert_frame_equal(result, df)
def test_setitem_empty_frame_with_boolean(self):
# Test for issue #10126
for dtype in ('float', 'int64'):
for df in [
pd.DataFrame(dtype=dtype),
pd.DataFrame(dtype=dtype, index=[1]),
pd.DataFrame(dtype=dtype, columns=['A']),
]:
df2 = df.copy()
df[df > df2] = 47
assert_frame_equal(df, df2)
def test_setitem_scalars_no_index(self):
# GH16823 / 17894
df = DataFrame()
df['foo'] = 1
expected = DataFrame(columns=['foo']).astype(np.int64)
assert_frame_equal(df, expected)
def test_getitem_empty_frame_with_boolean(self):
# Test for issue #11859
df = pd.DataFrame()
df2 = df[df > 0]
assert_frame_equal(df, df2)
def test_delitem_corner(self):
f = self.frame.copy()
del f['D']
assert len(f.columns) == 3
pytest.raises(KeyError, f.__delitem__, 'D')
del f['B']
assert len(f.columns) == 2
def test_getitem_fancy_2d(self):
f = self.frame
with catch_warnings(record=True):
assert_frame_equal(f.ix[:, ['B', 'A']],
f.reindex(columns=['B', 'A']))
subidx = self.frame.index[[5, 4, 1]]
with catch_warnings(record=True):
assert_frame_equal(f.ix[subidx, ['B', 'A']],
f.reindex(index=subidx, columns=['B', 'A']))
# slicing rows, etc.
with catch_warnings(record=True):
assert_frame_equal(f.ix[5:10], f[5:10])
assert_frame_equal(f.ix[5:10, :], f[5:10])
assert_frame_equal(f.ix[:5, ['A', 'B']],
f.reindex(index=f.index[:5],
columns=['A', 'B']))
# slice rows with labels, inclusive!
with catch_warnings(record=True):
expected = f.ix[5:11]
result = f.ix[f.index[5]:f.index[10]]
assert_frame_equal(expected, result)
# slice columns
with catch_warnings(record=True):
assert_frame_equal(f.ix[:, :2], f.reindex(columns=['A', 'B']))
# get view
with catch_warnings(record=True):
exp = f.copy()
f.ix[5:10].values[:] = 5
exp.values[5:10] = 5
assert_frame_equal(f, exp)
with catch_warnings(record=True):
pytest.raises(ValueError, f.ix.__getitem__, f > 0.5)
def test_slice_floats(self):
index = [52195.504153, 52196.303147, 52198.369883]
df = DataFrame(np.random.rand(3, 2), index=index)
s1 = df.loc[52195.1:52196.5]
assert len(s1) == 2
s1 = df.loc[52195.1:52196.6]
assert len(s1) == 2
s1 = df.loc[52195.1:52198.9]
assert len(s1) == 3
def test_getitem_fancy_slice_integers_step(self):
df = DataFrame(np.random.randn(10, 5))
# this is OK
result = df.iloc[:8:2] # noqa
df.iloc[:8:2] = np.nan
assert isna(df.iloc[:8:2]).values.all()
def test_getitem_setitem_integer_slice_keyerrors(self):
df = DataFrame(np.random.randn(10, 5), index=lrange(0, 20, 2))
# this is OK
cp = df.copy()
cp.iloc[4:10] = 0
assert (cp.iloc[4:10] == 0).values.all()
# so is this
cp = df.copy()
cp.iloc[3:11] = 0
assert (cp.iloc[3:11] == 0).values.all()
result = df.iloc[2:6]
result2 = df.loc[3:11]
expected = df.reindex([4, 6, 8, 10])
assert_frame_equal(result, expected)
assert_frame_equal(result2, expected)
# non-monotonic, raise KeyError
df2 = df.iloc[lrange(5) + lrange(5, 10)[::-1]]
pytest.raises(KeyError, df2.loc.__getitem__, slice(3, 11))
pytest.raises(KeyError, df2.loc.__setitem__, slice(3, 11), 0)
def test_setitem_fancy_2d(self):
# case 1
frame = self.frame.copy()
expected = frame.copy()
with catch_warnings(record=True):
frame.ix[:, ['B', 'A']] = 1
expected['B'] = 1.
expected['A'] = 1.
assert_frame_equal(frame, expected)
# case 2
frame = self.frame.copy()
frame2 = self.frame.copy()
expected = frame.copy()
subidx = self.frame.index[[5, 4, 1]]
values = randn(3, 2)
with catch_warnings(record=True):
frame.ix[subidx, ['B', 'A']] = values
frame2.ix[[5, 4, 1], ['B', 'A']] = values
expected['B'].ix[subidx] = values[:, 0]
expected['A'].ix[subidx] = values[:, 1]
assert_frame_equal(frame, expected)
assert_frame_equal(frame2, expected)
# case 3: slicing rows, etc.
frame = self.frame.copy()
with catch_warnings(record=True):
expected1 = self.frame.copy()
frame.ix[5:10] = 1.
expected1.values[5:10] = 1.
assert_frame_equal(frame, expected1)
with catch_warnings(record=True):
expected2 = self.frame.copy()
arr = randn(5, len(frame.columns))
frame.ix[5:10] = arr
expected2.values[5:10] = arr
assert_frame_equal(frame, expected2)
# case 4
with catch_warnings(record=True):
frame = self.frame.copy()
frame.ix[5:10, :] = 1.
assert_frame_equal(frame, expected1)
frame.ix[5:10, :] = arr
assert_frame_equal(frame, expected2)
# case 5
with catch_warnings(record=True):
frame = self.frame.copy()
frame2 = self.frame.copy()
expected = self.frame.copy()
values = randn(5, 2)
frame.ix[:5, ['A', 'B']] = values
expected['A'][:5] = values[:, 0]
expected['B'][:5] = values[:, 1]
assert_frame_equal(frame, expected)
with catch_warnings(record=True):
frame2.ix[:5, [0, 1]] = values
assert_frame_equal(frame2, expected)
# case 6: slice rows with labels, inclusive!
with catch_warnings(record=True):
frame = self.frame.copy()
expected = self.frame.copy()
frame.ix[frame.index[5]:frame.index[10]] = 5.
expected.values[5:11] = 5
assert_frame_equal(frame, expected)
# case 7: slice columns
with catch_warnings(record=True):
frame = self.frame.copy()
frame2 = self.frame.copy()
expected = self.frame.copy()
# slice indices
frame.ix[:, 1:3] = 4.
expected.values[:, 1:3] = 4.
assert_frame_equal(frame, expected)
# slice with labels
frame.ix[:, 'B':'C'] = 4.
assert_frame_equal(frame, expected)
# new corner case of boolean slicing / setting
frame = DataFrame(lzip([2, 3, 9, 6, 7], [np.nan] * 5),
columns=['a', 'b'])
lst = [100]
lst.extend([np.nan] * 4)
expected = DataFrame(lzip([100, 3, 9, 6, 7], lst),
columns=['a', 'b'])
frame[frame['a'] == 2] = 100
assert_frame_equal(frame, expected)
def test_fancy_getitem_slice_mixed(self):
sliced = self.mixed_frame.iloc[:, -3:]
assert sliced['D'].dtype == np.float64
# get view with single block
# setting it triggers setting with copy
sliced = self.frame.iloc[:, -3:]
def f():
sliced['C'] = 4.
pytest.raises(com.SettingWithCopyError, f)
assert (self.frame['C'] == 4).all()
def test_fancy_setitem_int_labels(self):
# integer index defers to label-based indexing
df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
with catch_warnings(record=True):
tmp = df.copy()
exp = df.copy()
tmp.ix[[0, 2, 4]] = 5
exp.values[:3] = 5
assert_frame_equal(tmp, exp)
with catch_warnings(record=True):
tmp = df.copy()
exp = df.copy()
tmp.ix[6] = 5
exp.values[3] = 5
assert_frame_equal(tmp, exp)
with catch_warnings(record=True):
tmp = df.copy()
exp = df.copy()
tmp.ix[:, 2] = 5
# tmp correctly sets the dtype
# so match the exp way
exp[2] = 5
assert_frame_equal(tmp, exp)
def test_fancy_getitem_int_labels(self):
df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
with catch_warnings(record=True):
result = df.ix[[4, 2, 0], [2, 0]]
expected = df.reindex(index=[4, 2, 0], columns=[2, 0])
assert_frame_equal(result, expected)
with catch_warnings(record=True):
result = df.ix[[4, 2, 0]]
expected = df.reindex(index=[4, 2, 0])
assert_frame_equal(result, expected)
with catch_warnings(record=True):
result = df.ix[4]
expected = df.xs(4)
assert_series_equal(result, expected)
with catch_warnings(record=True):
result = df.ix[:, 3]
expected = df[3]
assert_series_equal(result, expected)
def test_fancy_index_int_labels_exceptions(self):
df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
with catch_warnings(record=True):
# labels that aren't contained
pytest.raises(KeyError, df.ix.__setitem__,
([0, 1, 2], [2, 3, 4]), 5)
# try to set indices not contained in frame
pytest.raises(KeyError, self.frame.ix.__setitem__,
['foo', 'bar', 'baz'], 1)
pytest.raises(KeyError, self.frame.ix.__setitem__,
(slice(None, None), ['E']), 1)
# partial setting now allows this GH2578
# pytest.raises(KeyError, self.frame.ix.__setitem__,
# (slice(None, None), 'E'), 1)
def test_setitem_fancy_mixed_2d(self):
with catch_warnings(record=True):
self.mixed_frame.ix[:5, ['C', 'B', 'A']] = 5
result = self.mixed_frame.ix[:5, ['C', 'B', 'A']]
assert (result.values == 5).all()
self.mixed_frame.ix[5] = np.nan
assert isna(self.mixed_frame.ix[5]).all()
self.mixed_frame.ix[5] = self.mixed_frame.ix[6]
assert_series_equal(self.mixed_frame.ix[5], self.mixed_frame.ix[6],
check_names=False)
# #1432
with catch_warnings(record=True):
df = DataFrame({1: [1., 2., 3.],
2: [3, 4, 5]})
assert df._is_mixed_type
df.ix[1] = [5, 10]
expected = DataFrame({1: [1., 5., 3.],
2: [3, 10, 5]})
assert_frame_equal(df, expected)
def test_ix_align(self):
b = Series(randn(10), name=0).sort_values()
df_orig = DataFrame(randn(10, 4))
df = df_orig.copy()
with catch_warnings(record=True):
df.ix[:, 0] = b
assert_series_equal(df.ix[:, 0].reindex(b.index), b)
with catch_warnings(record=True):
dft = df_orig.T
dft.ix[0, :] = b
assert_series_equal(dft.ix[0, :].reindex(b.index), b)
with catch_warnings(record=True):
df = df_orig.copy()
df.ix[:5, 0] = b
s = df.ix[:5, 0]
assert_series_equal(s, b.reindex(s.index))
with catch_warnings(record=True):
dft = df_orig.T
dft.ix[0, :5] = b
s = dft.ix[0, :5]
assert_series_equal(s, b.reindex(s.index))
with catch_warnings(record=True):
df = df_orig.copy()
idx = [0, 1, 3, 5]
df.ix[idx, 0] = b
s = df.ix[idx, 0]
assert_series_equal(s, b.reindex(s.index))
with catch_warnings(record=True):
dft = df_orig.T
dft.ix[0, idx] = b
s = dft.ix[0, idx]
assert_series_equal(s, b.reindex(s.index))
def test_ix_frame_align(self):
b = DataFrame(np.random.randn(3, 4))
df_orig = DataFrame(randn(10, 4))
df = df_orig.copy()
with catch_warnings(record=True):
df.ix[:3] = b
out = b.ix[:3]
assert_frame_equal(out, b)
b.sort_index(inplace=True)
with catch_warnings(record=True):
df = df_orig.copy()
df.ix[[0, 1, 2]] = b
out = df.ix[[0, 1, 2]].reindex(b.index)
assert_frame_equal(out, b)
with catch_warnings(record=True):
df = df_orig.copy()
df.ix[:3] = b
out = df.ix[:3]
assert_frame_equal(out, b.reindex(out.index))
def test_getitem_setitem_non_ix_labels(self):
df = tm.makeTimeDataFrame()
start, end = df.index[[5, 10]]
result = df.loc[start:end]
result2 = df[start:end]
expected = df[5:11]
assert_frame_equal(result, expected)
assert_frame_equal(result2, expected)
result = df.copy()
result.loc[start:end] = 0
result2 = df.copy()
result2[start:end] = 0
expected = df.copy()
expected[5:11] = 0
assert_frame_equal(result, expected)
assert_frame_equal(result2, expected)
def test_ix_multi_take(self):
df = DataFrame(np.random.randn(3, 2))
rs = df.loc[df.index == 0, :]
xp = df.reindex([0])
assert_frame_equal(rs, xp)
""" #1321
df = DataFrame(np.random.randn(3, 2))
rs = df.loc[df.index==0, df.columns==1]
xp = df.reindex([0], [1])
assert_frame_equal(rs, xp)
"""
def test_ix_multi_take_nonint_index(self):
df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'],
columns=['a', 'b'])
with catch_warnings(record=True):
rs = df.ix[[0], [0]]
xp = df.reindex(['x'], columns=['a'])
assert_frame_equal(rs, xp)
def test_ix_multi_take_multiindex(self):
df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'],
columns=[['a', 'b'], ['1', '2']])
with catch_warnings(record=True):
rs = df.ix[[0], [0]]
xp = df.reindex(['x'], columns=[('a', '1')])
assert_frame_equal(rs, xp)
def test_ix_dup(self):
idx = Index(['a', 'a', 'b', 'c', 'd', 'd'])
df = DataFrame(np.random.randn(len(idx), 3), idx)
with catch_warnings(record=True):
sub = df.ix[:'d']
assert_frame_equal(sub, df)
with catch_warnings(record=True):
sub = df.ix['a':'c']
assert_frame_equal(sub, df.ix[0:4])
with catch_warnings(record=True):
sub = df.ix['b':'d']
assert_frame_equal(sub, df.ix[2:])
def test_getitem_fancy_1d(self):
f = self.frame
# return self if no slicing...for now
with catch_warnings(record=True):
assert f.ix[:, :] is f
# low dimensional slice
with catch_warnings(record=True):
xs1 = f.ix[2, ['C', 'B', 'A']]
xs2 = f.xs(f.index[2]).reindex(['C', 'B', 'A'])
tm.assert_series_equal(xs1, xs2)
with catch_warnings(record=True):
ts1 = f.ix[5:10, 2]
ts2 = f[f.columns[2]][5:10]
tm.assert_series_equal(ts1, ts2)
# positional xs
with catch_warnings(record=True):
xs1 = f.ix[0]
xs2 = f.xs(f.index[0])
tm.assert_series_equal(xs1, xs2)
with catch_warnings(record=True):
xs1 = f.ix[f.index[5]]
xs2 = f.xs(f.index[5])
tm.assert_series_equal(xs1, xs2)
# single column
with catch_warnings(record=True):
assert_series_equal(f.ix[:, 'A'], f['A'])
# return view
with catch_warnings(record=True):
exp = f.copy()
exp.values[5] = 4
f.ix[5][:] = 4
tm.assert_frame_equal(exp, f)
with catch_warnings(record=True):
exp.values[:, 1] = 6
f.ix[:, 1][:] = 6
tm.assert_frame_equal(exp, f)
# slice of mixed-frame
with catch_warnings(record=True):
xs = self.mixed_frame.ix[5]
exp = self.mixed_frame.xs(self.mixed_frame.index[5])
tm.assert_series_equal(xs, exp)
def test_setitem_fancy_1d(self):
# case 1: set cross-section for indices
frame = self.frame.copy()
expected = self.frame.copy()
with catch_warnings(record=True):
frame.ix[2, ['C', 'B', 'A']] = [1., 2., 3.]
expected['C'][2] = 1.
expected['B'][2] = 2.
expected['A'][2] = 3.
assert_frame_equal(frame, expected)
with catch_warnings(record=True):
frame2 = self.frame.copy()
frame2.ix[2, [3, 2, 1]] = [1., 2., 3.]
assert_frame_equal(frame, expected)
# case 2, set a section of a column
frame = self.frame.copy()
expected = self.frame.copy()
with catch_warnings(record=True):
vals = randn(5)
expected.values[5:10, 2] = vals
frame.ix[5:10, 2] = vals
assert_frame_equal(frame, expected)
with catch_warnings(record=True):
frame2 = self.frame.copy()
frame2.ix[5:10, 'B'] = vals
assert_frame_equal(frame, expected)
# case 3: full xs
frame = self.frame.copy()
expected = self.frame.copy()
with catch_warnings(record=True):
frame.ix[4] = 5.
expected.values[4] = 5.
assert_frame_equal(frame, expected)
with catch_warnings(record=True):
frame.ix[frame.index[4]] = 6.
expected.values[4] = 6.
assert_frame_equal(frame, expected)
# single column
frame = self.frame.copy()
expected = self.frame.copy()
with catch_warnings(record=True):
frame.ix[:, 'A'] = 7.
expected['A'] = 7.
assert_frame_equal(frame, expected)
def test_getitem_fancy_scalar(self):
f = self.frame
ix = f.loc
# individual value
for col in f.columns:
ts = f[col]
for idx in f.index[::5]:
assert ix[idx, col] == ts[idx]
def test_setitem_fancy_scalar(self):
f = self.frame
expected = self.frame.copy()
ix = f.loc
# individual value
for j, col in enumerate(f.columns):
ts = f[col] # noqa
for idx in f.index[::5]:
i = f.index.get_loc(idx)
val = randn()
expected.values[i, j] = val
ix[idx, col] = val
assert_frame_equal(f, expected)
def test_getitem_fancy_boolean(self):
f = self.frame
ix = f.loc
expected = f.reindex(columns=['B', 'D'])
result = ix[:, [False, True, False, True]]
assert_frame_equal(result, expected)
expected = f.reindex(index=f.index[5:10], columns=['B', 'D'])
result = ix[f.index[5:10], [False, True, False, True]]
assert_frame_equal(result, expected)
boolvec = f.index > f.index[7]
expected = f.reindex(index=f.index[boolvec])
result = ix[boolvec]
assert_frame_equal(result, expected)
result = ix[boolvec, :]
assert_frame_equal(result, expected)
result = ix[boolvec, f.columns[2:]]
expected = f.reindex(index=f.index[boolvec],
columns=['C', 'D'])
assert_frame_equal(result, expected)
def test_setitem_fancy_boolean(self):
# from 2d, set with booleans
frame = self.frame.copy()
expected = self.frame.copy()
mask = frame['A'] > 0
frame.loc[mask] = 0.
expected.values[mask.values] = 0.
assert_frame_equal(frame, expected)
frame = self.frame.copy()
expected = self.frame.copy()
frame.loc[mask, ['A', 'B']] = 0.
expected.values[mask.values, :2] = 0.
assert_frame_equal(frame, expected)
def test_getitem_fancy_ints(self):
result = self.frame.iloc[[1, 4, 7]]
expected = self.frame.loc[self.frame.index[[1, 4, 7]]]
assert_frame_equal(result, expected)
result = self.frame.iloc[:, [2, 0, 1]]
expected = self.frame.loc[:, self.frame.columns[[2, 0, 1]]]
assert_frame_equal(result, expected)
def test_getitem_setitem_fancy_exceptions(self):
ix = self.frame.iloc
with tm.assert_raises_regex(IndexingError, 'Too many indexers'):
ix[:, :, :]
with pytest.raises(IndexingError):
ix[:, :, :] = 1
def test_getitem_setitem_boolean_misaligned(self):
# boolean index misaligned labels
mask = self.frame['A'][::-1] > 1
result = self.frame.loc[mask]
expected = self.frame.loc[mask[::-1]]
assert_frame_equal(result, expected)
cp = self.frame.copy()
expected = self.frame.copy()
cp.loc[mask] = 0
expected.loc[mask] = 0
assert_frame_equal(cp, expected)
def test_getitem_setitem_boolean_multi(self):
df = DataFrame(np.random.randn(3, 2))
# get
k1 = np.array([True, False, True])
k2 = np.array([False, True])
result = df.loc[k1, k2]
expected = df.loc[[0, 2], [1]]
assert_frame_equal(result, expected)
expected = df.copy()
df.loc[np.array([True, False, True]),
np.array([False, True])] = 5
expected.loc[[0, 2], [1]] = 5
assert_frame_equal(df, expected)
def test_getitem_setitem_float_labels(self):
index = Index([1.5, 2, 3, 4, 5])
df = DataFrame(np.random.randn(5, 5), index=index)
result = df.loc[1.5:4]
expected = df.reindex([1.5, 2, 3, 4])
assert_frame_equal(result, expected)
assert len(result) == 4
result = df.loc[4:5]
expected = df.reindex([4, 5]) # reindex with int
assert_frame_equal(result, expected, check_index_type=False)
assert len(result) == 2
result = df.loc[4:5]
expected = df.reindex([4.0, 5.0]) # reindex with float
assert_frame_equal(result, expected)
assert len(result) == 2
# loc_float changes this to work properly
result = df.loc[1:2]
expected = df.iloc[0:2]
assert_frame_equal(result, expected)
df.loc[1:2] = 0
result = df[1:2]
assert (result == 0).all().all()
# #2727
index = Index([1.0, 2.5, 3.5, 4.5, 5.0])
df = DataFrame(np.random.randn(5, 5), index=index)
# positional slicing only via iloc!
pytest.raises(TypeError, lambda: df.iloc[1.0:5])
result = df.iloc[4:5]
expected = df.reindex([5.0])
assert_frame_equal(result, expected)
assert len(result) == 1
cp = df.copy()
def f():
cp.iloc[1.0:5] = 0
pytest.raises(TypeError, f)
def f():
result = cp.iloc[1.0:5] == 0 # noqa
pytest.raises(TypeError, f)
assert result.values.all()
assert (cp.iloc[0:1] == df.iloc[0:1]).values.all()
cp = df.copy()
cp.iloc[4:5] = 0
assert (cp.iloc[4:5] == 0).values.all()
assert (cp.iloc[0:4] == df.iloc[0:4]).values.all()
# float slicing
result = df.loc[1.0:5]
expected = df
assert_frame_equal(result, expected)
assert len(result) == 5
result = df.loc[1.1:5]
expected = df.reindex([2.5, 3.5, 4.5, 5.0])
assert_frame_equal(result, expected)
assert len(result) == 4
result = df.loc[4.51:5]
expected = df.reindex([5.0])
assert_frame_equal(result, expected)
assert len(result) == 1
result = df.loc[1.0:5.0]
expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0])
assert_frame_equal(result, expected)
assert len(result) == 5
cp = df.copy()
cp.loc[1.0:5.0] = 0
result = cp.loc[1.0:5.0]
assert (result == 0).values.all()
def test_setitem_single_column_mixed(self):
df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
columns=['foo', 'bar', 'baz'])
df['str'] = 'qux'
df.loc[df.index[::2], 'str'] = nan
expected = np.array([nan, 'qux', nan, 'qux', nan], dtype=object)
assert_almost_equal(df['str'].values, expected)
def test_setitem_single_column_mixed_datetime(self):
df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
columns=['foo', 'bar', 'baz'])
df['timestamp'] = Timestamp('20010102')
# check our dtypes
result = df.get_dtype_counts()
expected = Series({'float64': 3, 'datetime64[ns]': 1})
assert_series_equal(result, expected)
# set an allowable datetime64 type
df.loc['b', 'timestamp'] = iNaT
assert isna(df.loc['b', 'timestamp'])
# allow this syntax
df.loc['c', 'timestamp'] = nan
assert isna(df.loc['c', 'timestamp'])
# allow this syntax
df.loc['d', :] = nan
assert not isna(df.loc['c', :]).all()
# as of GH 3216 this will now work!
# try to set with a list like item
# pytest.raises(
# Exception, df.loc.__setitem__, ('d', 'timestamp'), [nan])
def test_setitem_frame(self):
piece = self.frame.loc[self.frame.index[:2], ['A', 'B']]
self.frame.loc[self.frame.index[-2]:, ['A', 'B']] = piece.values
result = self.frame.loc[self.frame.index[-2:], ['A', 'B']].values
expected = piece.values
assert_almost_equal(result, expected)
# GH 3216
# already aligned
f = self.mixed_frame.copy()
piece = DataFrame([[1., 2.], [3., 4.]],
index=f.index[0:2], columns=['A', 'B'])
key = (slice(None, 2), ['A', 'B'])
f.loc[key] = piece
assert_almost_equal(f.loc[f.index[0:2], ['A', 'B']].values,
piece.values)
# rows unaligned
f = self.mixed_frame.copy()
piece = DataFrame([[1., 2.], [3., 4.], [5., 6.], [7., 8.]],
index=list(f.index[0:2]) + ['foo', 'bar'],
columns=['A', 'B'])
key = (slice(None, 2), ['A', 'B'])
f.loc[key] = piece
assert_almost_equal(f.loc[f.index[0:2:], ['A', 'B']].values,
piece.values[0:2])
# key is unaligned with values
f = self.mixed_frame.copy()
piece = f.loc[f.index[:2], ['A']]
piece.index = f.index[-2:]
key = (slice(-2, None), ['A', 'B'])
f.loc[key] = piece
piece['B'] = np.nan
assert_almost_equal(f.loc[f.index[-2:], ['A', 'B']].values,
piece.values)
# ndarray
f = self.mixed_frame.copy()
piece = self.mixed_frame.loc[f.index[:2], ['A', 'B']]
key = (slice(-2, None), ['A', 'B'])
f.loc[key] = piece.values
assert_almost_equal(f.loc[f.index[-2:], ['A', 'B']].values,
piece.values)
# needs upcasting
df = DataFrame([[1, 2, 'foo'], [3, 4, 'bar']], columns=['A', 'B', 'C'])
df2 = df.copy()
df2.loc[:, ['A', 'B']] = df.loc[:, ['A', 'B']] + 0.5
expected = df.reindex(columns=['A', 'B'])
expected += 0.5
expected['C'] = df['C']
assert_frame_equal(df2, expected)
def test_setitem_frame_align(self):
piece = self.frame.loc[self.frame.index[:2], ['A', 'B']]
piece.index = self.frame.index[-2:]
piece.columns = ['A', 'B']
self.frame.loc[self.frame.index[-2:], ['A', 'B']] = piece
result = self.frame.loc[self.frame.index[-2:], ['A', 'B']].values
expected = piece.values
assert_almost_equal(result, expected)
def test_getitem_setitem_ix_duplicates(self):
# #1201
df = DataFrame(np.random.randn(5, 3),
index=['foo', 'foo', 'bar', 'baz', 'bar'])
result = df.loc['foo']
expected = df[:2]
assert_frame_equal(result, expected)
result = df.loc['bar']
expected = df.iloc[[2, 4]]
assert_frame_equal(result, expected)
result = df.loc['baz']
expected = df.iloc[3]
assert_series_equal(result, expected)
def test_getitem_ix_boolean_duplicates_multiple(self):
# #1201
df = DataFrame(np.random.randn(5, 3),
index=['foo', 'foo', 'bar', 'baz', 'bar'])
result = df.loc[['bar']]
exp = df.iloc[[2, 4]]
assert_frame_equal(result, exp)
result = df.loc[df[1] > 0]
exp = df[df[1] > 0]
assert_frame_equal(result, exp)
result = df.loc[df[0] > 0]
exp = df[df[0] > 0]
assert_frame_equal(result, exp)
def test_getitem_setitem_ix_bool_keyerror(self):
# #2199
df = DataFrame({'a': [1, 2, 3]})
pytest.raises(KeyError, df.loc.__getitem__, False)
pytest.raises(KeyError, df.loc.__getitem__, True)
pytest.raises(KeyError, df.loc.__setitem__, False, 0)
pytest.raises(KeyError, df.loc.__setitem__, True, 0)
def test_getitem_list_duplicates(self):
# #1943
df = DataFrame(np.random.randn(4, 4), columns=list('AABC'))
df.columns.name = 'foo'
result = df[['B', 'C']]
assert result.columns.name == 'foo'
expected = df.iloc[:, 2:]
assert_frame_equal(result, expected)
def test_get_value(self):
for idx in self.frame.index:
for col in self.frame.columns:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = self.frame.get_value(idx, col)
expected = self.frame[col][idx]
assert result == expected
def test_lookup(self):
def alt(df, rows, cols, dtype):
result = []
for r, c in zip(rows, cols):
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result.append(df.get_value(r, c))
return np.array(result, dtype=dtype)
def testit(df):
rows = list(df.index) * len(df.columns)
cols = list(df.columns) * len(df.index)
result = df.lookup(rows, cols)
expected = alt(df, rows, cols, dtype=np.object_)
tm.assert_almost_equal(result, expected, check_dtype=False)
testit(self.mixed_frame)
testit(self.frame)
df = DataFrame({'label': ['a', 'b', 'a', 'c'],
'mask_a': [True, True, False, True],
'mask_b': [True, False, False, False],
'mask_c': [False, True, False, True]})
df['mask'] = df.lookup(df.index, 'mask_' + df['label'])
exp_mask = alt(df, df.index, 'mask_' + df['label'], dtype=np.bool_)
tm.assert_series_equal(df['mask'], pd.Series(exp_mask, name='mask'))
assert df['mask'].dtype == np.bool_
with pytest.raises(KeyError):
self.frame.lookup(['xyz'], ['A'])
with pytest.raises(KeyError):
self.frame.lookup([self.frame.index[0]], ['xyz'])
with tm.assert_raises_regex(ValueError, 'same size'):
self.frame.lookup(['a', 'b', 'c'], ['a'])
def test_set_value(self):
for idx in self.frame.index:
for col in self.frame.columns:
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
self.frame.set_value(idx, col, 1)
assert self.frame[col][idx] == 1
def test_set_value_resize(self):
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
res = self.frame.set_value('foobar', 'B', 0)
assert res is self.frame
assert res.index[-1] == 'foobar'
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
assert res.get_value('foobar', 'B') == 0
self.frame.loc['foobar', 'qux'] = 0
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
assert self.frame.get_value('foobar', 'qux') == 0
res = self.frame.copy()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
res3 = res.set_value('foobar', 'baz', 'sam')
assert res3['baz'].dtype == np.object_
res = self.frame.copy()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
res3 = res.set_value('foobar', 'baz', True)
assert res3['baz'].dtype == np.object_
res = self.frame.copy()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
res3 = res.set_value('foobar', 'baz', 5)
assert is_float_dtype(res3['baz'])
assert isna(res3['baz'].drop(['foobar'])).all()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
pytest.raises(ValueError, res3.set_value, 'foobar', 'baz', 'sam')
def test_set_value_with_index_dtype_change(self):
df_orig = DataFrame(randn(3, 3), index=lrange(3), columns=list('ABC'))
# this is actually ambiguous as the 2 is interpreted as a positional
# so column is not created
df = df_orig.copy()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
df.set_value('C', 2, 1.0)
assert list(df.index) == list(df_orig.index) + ['C']
# assert list(df.columns) == list(df_orig.columns) + [2]
df = df_orig.copy()
df.loc['C', 2] = 1.0
assert list(df.index) == list(df_orig.index) + ['C']
# assert list(df.columns) == list(df_orig.columns) + [2]
# create both new
df = df_orig.copy()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
df.set_value('C', 'D', 1.0)
assert list(df.index) == list(df_orig.index) + ['C']
assert list(df.columns) == list(df_orig.columns) + ['D']
df = df_orig.copy()
df.loc['C', 'D'] = 1.0
assert list(df.index) == list(df_orig.index) + ['C']
assert list(df.columns) == list(df_orig.columns) + ['D']
def test_get_set_value_no_partial_indexing(self):
# partial w/ MultiIndex raise exception
index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)])
df = DataFrame(index=index, columns=lrange(4))
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
pytest.raises(KeyError, df.get_value, 0, 1)
def test_single_element_ix_dont_upcast(self):
self.frame['E'] = 1
assert issubclass(self.frame['E'].dtype.type, (int, np.integer))
with catch_warnings(record=True):
result = self.frame.ix[self.frame.index[5], 'E']
assert is_integer(result)
result = self.frame.loc[self.frame.index[5], 'E']
assert is_integer(result)
# GH 11617
df = pd.DataFrame(dict(a=[1.23]))
df["b"] = 666
with catch_warnings(record=True):
result = df.ix[0, "b"]
assert is_integer(result)
result = df.loc[0, "b"]
assert is_integer(result)
expected = Series([666], [0], name='b')
with catch_warnings(record=True):
result = df.ix[[0], "b"]
assert_series_equal(result, expected)
result = df.loc[[0], "b"]
assert_series_equal(result, expected)
def test_iloc_row(self):
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2))
result = df.iloc[1]
exp = df.loc[2]
assert_series_equal(result, exp)
result = df.iloc[2]
exp = df.loc[4]
assert_series_equal(result, exp)
# slice
result = df.iloc[slice(4, 8)]
expected = df.loc[8:14]
assert_frame_equal(result, expected)
# verify slice is view
# setting it makes it raise/warn
def f():
result[2] = 0.
pytest.raises(com.SettingWithCopyError, f)
exp_col = df[2].copy()
exp_col[4:8] = 0.
assert_series_equal(df[2], exp_col)
# list of integers
result = df.iloc[[1, 2, 4, 6]]
expected = df.reindex(df.index[[1, 2, 4, 6]])
assert_frame_equal(result, expected)
def test_iloc_col(self):
df = DataFrame(np.random.randn(4, 10), columns=lrange(0, 20, 2))
result = df.iloc[:, 1]
exp = df.loc[:, 2]
assert_series_equal(result, exp)
result = df.iloc[:, 2]
exp = df.loc[:, 4]
assert_series_equal(result, exp)
# slice
result = df.iloc[:, slice(4, 8)]
expected = df.loc[:, 8:14]
assert_frame_equal(result, expected)
# verify slice is view
# and that we are setting a copy
def f():
result[8] = 0.
pytest.raises(com.SettingWithCopyError, f)
assert (df[8] == 0).all()
# list of integers
result = df.iloc[:, [1, 2, 4, 6]]
expected = df.reindex(columns=df.columns[[1, 2, 4, 6]])
assert_frame_equal(result, expected)
def test_iloc_duplicates(self):
df = DataFrame(np.random.rand(3, 3), columns=list('ABC'),
index=list('aab'))
result = df.iloc[0]
with catch_warnings(record=True):
result2 = df.ix[0]
assert isinstance(result, Series)
assert_almost_equal(result.values, df.values[0])
assert_series_equal(result, result2)
with catch_warnings(record=True):
result = df.T.iloc[:, 0]
result2 = df.T.ix[:, 0]
assert isinstance(result, Series)
assert_almost_equal(result.values, df.values[0])
assert_series_equal(result, result2)
# multiindex
df = DataFrame(np.random.randn(3, 3),
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
with catch_warnings(record=True):
rs = df.iloc[0]
xp = df.ix[0]
assert_series_equal(rs, xp)
with catch_warnings(record=True):
rs = df.iloc[:, 0]
xp = df.T.ix[0]
assert_series_equal(rs, xp)
with catch_warnings(record=True):
rs = df.iloc[:, [0]]
xp = df.ix[:, [0]]
assert_frame_equal(rs, xp)
# #2259
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2])
result = df.iloc[:, [0]]
expected = df.take([0], axis=1)
assert_frame_equal(result, expected)
def test_loc_duplicates(self):
# gh-17105
# insert a duplicate element to the index
trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1),
end=pd.Timestamp(year=2017, month=1, day=5))
trange = trange.insert(loc=5,
item=pd.Timestamp(year=2017, month=1, day=5))
df = pd.DataFrame(0, index=trange, columns=["A", "B"])
bool_idx = np.array([False, False, False, False, False, True])
# assignment
df.loc[trange[bool_idx], "A"] = 6
expected = pd.DataFrame({'A': [0, 0, 0, 0, 6, 6],
'B': [0, 0, 0, 0, 0, 0]},
index=trange)
tm.assert_frame_equal(df, expected)
# in-place
df = pd.DataFrame(0, index=trange, columns=["A", "B"])
df.loc[trange[bool_idx], "A"] += 6
tm.assert_frame_equal(df, expected)
def test_iloc_sparse_propegate_fill_value(self):
from pandas.core.sparse.api import SparseDataFrame
df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999)
assert len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values)
def test_iat(self):
for i, row in enumerate(self.frame.index):
for j, col in enumerate(self.frame.columns):
result = self.frame.iat[i, j]
expected = self.frame.at[row, col]
assert result == expected
def test_nested_exception(self):
# Ignore the strange way of triggering the problem
# (which may get fixed), it's just a way to trigger
# the issue or reraising an outer exception without
# a named argument
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6],
"c": [7, 8, 9]}).set_index(["a", "b"])
l = list(df.index)
l[0] = ["a", "b"]
df.index = l
try:
repr(df)
except Exception as e:
assert type(e) != UnboundLocalError
def test_reindex_methods(self):
df = pd.DataFrame({'x': list(range(5))})
target = np.array([-0.1, 0.9, 1.1, 1.5])
for method, expected_values in [('nearest', [0, 1, 1, 2]),
('pad', [np.nan, 0, 1, 1]),
('backfill', [0, 1, 2, 2])]:
expected = pd.DataFrame({'x': expected_values}, index=target)
actual = df.reindex(target, method=method)
assert_frame_equal(expected, actual)
actual = df.reindex_like(df, method=method, tolerance=0)
assert_frame_equal(df, actual)
actual = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0])
assert_frame_equal(df, actual)
actual = df.reindex(target, method=method, tolerance=1)
assert_frame_equal(expected, actual)
actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1])
assert_frame_equal(expected, actual)
e2 = expected[::-1]
actual = df.reindex(target[::-1], method=method)
assert_frame_equal(e2, actual)
new_order = [3, 0, 2, 1]
e2 = expected.iloc[new_order]
actual = df.reindex(target[new_order], method=method)
assert_frame_equal(e2, actual)
switched_method = ('pad' if method == 'backfill'
else 'backfill' if method == 'pad'
else method)
actual = df[::-1].reindex(target, method=switched_method)
assert_frame_equal(expected, actual)
expected = pd.DataFrame({'x': [0, 1, 1, np.nan]}, index=target)
actual = df.reindex(target, method='nearest', tolerance=0.2)
assert_frame_equal(expected, actual)
expected = pd.DataFrame({'x': [0, np.nan, 1, np.nan]}, index=target)
actual = df.reindex(target, method='nearest',
tolerance=[0.5, 0.01, 0.4, 0.1])
assert_frame_equal(expected, actual)
def test_reindex_frame_add_nat(self):
rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng})
result = df.reindex(lrange(15))
assert np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))
mask = com.isna(result)['B']
assert mask[-5:].all()
assert not mask[:-5].any()
def test_set_dataframe_column_ns_dtype(self):
x = DataFrame([datetime.now(), datetime.now()])
assert x[0].dtype == np.dtype('M8[ns]')
def test_non_monotonic_reindex_methods(self):
dr = pd.date_range('2013-08-01', periods=6, freq='B')
data = np.random.randn(6, 1)
df = pd.DataFrame(data, index=dr, columns=list('A'))
df_rev = pd.DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]],
columns=list('A'))
# index is not monotonic increasing or decreasing
pytest.raises(ValueError, df_rev.reindex, df.index, method='pad')
pytest.raises(ValueError, df_rev.reindex, df.index, method='ffill')
pytest.raises(ValueError, df_rev.reindex, df.index, method='bfill')
pytest.raises(ValueError, df_rev.reindex, df.index, method='nearest')
def test_reindex_level(self):
from itertools import permutations
icol = ['jim', 'joe', 'jolie']
def verify_first_level(df, level, idx, check_index_type=True):
f = lambda val: np.nonzero(df[level] == val)[0]
i = np.concatenate(list(map(f, idx)))
left = df.set_index(icol).reindex(idx, level=level)
right = df.iloc[i].set_index(icol)
assert_frame_equal(left, right, check_index_type=check_index_type)
def verify(df, level, idx, indexer, check_index_type=True):
left = df.set_index(icol).reindex(idx, level=level)
right = df.iloc[indexer].set_index(icol)
assert_frame_equal(left, right, check_index_type=check_index_type)
df = pd.DataFrame({'jim': list('B' * 4 + 'A' * 2 + 'C' * 3),
'joe': list('abcdeabcd')[::-1],
'jolie': [10, 20, 30] * 3,
'joline': np.random.randint(0, 1000, 9)})
target = [['C', 'B', 'A'], ['F', 'C', 'A', 'D'], ['A'],
['A', 'B', 'C'], ['C', 'A', 'B'], ['C', 'B'], ['C', 'A'],
['A', 'B'], ['B', 'A', 'C']]
for idx in target:
verify_first_level(df, 'jim', idx)
# reindex by these causes different MultiIndex levels
for idx in [['D', 'F'], ['A', 'C', 'B']]:
verify_first_level(df, 'jim', idx, check_index_type=False)
verify(df, 'joe', list('abcde'), [3, 2, 1, 0, 5, 4, 8, 7, 6])
verify(df, 'joe', list('abcd'), [3, 2, 1, 0, 5, 8, 7, 6])
verify(df, 'joe', list('abc'), [3, 2, 1, 8, 7, 6])
verify(df, 'joe', list('eca'), [1, 3, 4, 6, 8])
verify(df, 'joe', list('edc'), [0, 1, 4, 5, 6])
verify(df, 'joe', list('eadbc'), [3, 0, 2, 1, 4, 5, 8, 7, 6])
verify(df, 'joe', list('edwq'), [0, 4, 5])
verify(df, 'joe', list('wq'), [], check_index_type=False)
df = DataFrame({'jim': ['mid'] * 5 + ['btm'] * 8 + ['top'] * 7,
'joe': ['3rd'] * 2 + ['1st'] * 3 + ['2nd'] * 3 +
['1st'] * 2 + ['3rd'] * 3 + ['1st'] * 2 +
['3rd'] * 3 + ['2nd'] * 2,
# this needs to be jointly unique with jim and joe or
# reindexing will fail ~1.5% of the time, this works
# out to needing unique groups of same size as joe
'jolie': np.concatenate([
np.random.choice(1000, x, replace=False)
for x in [2, 3, 3, 2, 3, 2, 3, 2]]),
'joline': np.random.randn(20).round(3) * 10})
for idx in permutations(df['jim'].unique()):
for i in range(3):
verify_first_level(df, 'jim', idx[:i + 1])
i = [2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10,
11, 12, 13, 14, 18, 19, 15, 16, 17]
verify(df, 'joe', ['1st', '2nd', '3rd'], i)
i = [0, 1, 2, 3, 4, 10, 11, 12, 5, 6,
7, 8, 9, 15, 16, 17, 18, 19, 13, 14]
verify(df, 'joe', ['3rd', '2nd', '1st'], i)
i = [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17]
verify(df, 'joe', ['2nd', '3rd'], i)
i = [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14]
verify(df, 'joe', ['3rd', '1st'], i)
def test_getitem_ix_float_duplicates(self):
df = pd.DataFrame(np.random.randn(3, 3),
index=[0.1, 0.2, 0.2], columns=list('abc'))
expect = df.iloc[1:]
assert_frame_equal(df.loc[0.2], expect)
with catch_warnings(record=True):
assert_frame_equal(df.ix[0.2], expect)
expect = df.iloc[1:, 0]
assert_series_equal(df.loc[0.2, 'a'], expect)
df.index = [1, 0.2, 0.2]
expect = df.iloc[1:]
assert_frame_equal(df.loc[0.2], expect)
with catch_warnings(record=True):
assert_frame_equal(df.ix[0.2], expect)
expect = df.iloc[1:, 0]
assert_series_equal(df.loc[0.2, 'a'], expect)
df = pd.DataFrame(np.random.randn(4, 3),
index=[1, 0.2, 0.2, 1], columns=list('abc'))
expect = df.iloc[1:-1]
assert_frame_equal(df.loc[0.2], expect)
with catch_warnings(record=True):
assert_frame_equal(df.ix[0.2], expect)
expect = df.iloc[1:-1, 0]
assert_series_equal(df.loc[0.2, 'a'], expect)
df.index = [0.1, 0.2, 2, 0.2]
expect = df.iloc[[1, -1]]
assert_frame_equal(df.loc[0.2], expect)
with catch_warnings(record=True):
assert_frame_equal(df.ix[0.2], expect)
expect = df.iloc[[1, -1], 0]
assert_series_equal(df.loc[0.2, 'a'], expect)
def test_setitem_with_sparse_value(self):
# GH8131
df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
sp_series = pd.Series([0, 0, 1]).to_sparse(fill_value=0)
df['new_column'] = sp_series
assert_series_equal(df['new_column'], sp_series, check_names=False)
def test_setitem_with_unaligned_sparse_value(self):
df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
sp_series = (pd.Series([0, 0, 1], index=[2, 1, 0])
.to_sparse(fill_value=0))
df['new_column'] = sp_series
exp = pd.Series([1, 0, 0], name='new_column')
assert_series_equal(df['new_column'], exp)
def test_setitem_with_unaligned_tz_aware_datetime_column(self):
# GH 12981
# Assignment of unaligned offset-aware datetime series.
# Make sure timezone isn't lost
column = pd.Series(pd.date_range('2015-01-01', periods=3, tz='utc'),
name='dates')
df = pd.DataFrame({'dates': column})
df['dates'] = column[[1, 0, 2]]
assert_series_equal(df['dates'], column)
df = pd.DataFrame({'dates': column})
df.loc[[0, 1, 2], 'dates'] = column[[1, 0, 2]]
assert_series_equal(df['dates'], column)
def test_setitem_datetime_coercion(self):
# gh-1048
df = pd.DataFrame({'c': [pd.Timestamp('2010-10-01')] * 3})
df.loc[0:1, 'c'] = np.datetime64('2008-08-08')
assert pd.Timestamp('2008-08-08') == df.loc[0, 'c']
assert pd.Timestamp('2008-08-08') == df.loc[1, 'c']
df.loc[2, 'c'] = date(2005, 5, 5)
assert pd.Timestamp('2005-05-05') == df.loc[2, 'c']
def test_setitem_datetimelike_with_inference(self):
# GH 7592
# assignment of timedeltas with NaT
one_hour = timedelta(hours=1)
df = DataFrame(index=date_range('20130101', periods=4))
df['A'] = np.array([1 * one_hour] * 4, dtype='m8[ns]')
df.loc[:, 'B'] = np.array([2 * one_hour] * 4, dtype='m8[ns]')
df.loc[:3, 'C'] = np.array([3 * one_hour] * 3, dtype='m8[ns]')
df.loc[:, 'D'] = np.array([4 * one_hour] * 4, dtype='m8[ns]')
df.loc[df.index[:3], 'E'] = np.array([5 * one_hour] * 3,
dtype='m8[ns]')
df['F'] = np.timedelta64('NaT')
df.loc[df.index[:-1], 'F'] = np.array([6 * one_hour] * 3,
dtype='m8[ns]')
df.loc[df.index[-3]:, 'G'] = date_range('20130101', periods=3)
df['H'] = np.datetime64('NaT')
result = df.dtypes
expected = Series([np.dtype('timedelta64[ns]')] * 6 +
[np.dtype('datetime64[ns]')] * 2,
index=list('ABCDEFGH'))
assert_series_equal(result, expected)
def test_at_time_between_time_datetimeindex(self):
index = date_range("2012-01-01", "2012-01-05", freq='30min')
df = DataFrame(randn(len(index), 5), index=index)
akey = time(12, 0, 0)
bkey = slice(time(13, 0, 0), time(14, 0, 0))
ainds = [24, 72, 120, 168]
binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172]
result = df.at_time(akey)
expected = df.loc[akey]
expected2 = df.iloc[ainds]
assert_frame_equal(result, expected)
assert_frame_equal(result, expected2)
assert len(result) == 4
result = df.between_time(bkey.start, bkey.stop)
expected = df.loc[bkey]
expected2 = df.iloc[binds]
assert_frame_equal(result, expected)
assert_frame_equal(result, expected2)
assert len(result) == 12
result = df.copy()
result.loc[akey] = 0
result = result.loc[akey]
expected = df.loc[akey].copy()
expected.loc[:] = 0
assert_frame_equal(result, expected)
result = df.copy()
result.loc[akey] = 0
result.loc[akey] = df.iloc[ainds]
assert_frame_equal(result, df)
result = df.copy()
result.loc[bkey] = 0
result = result.loc[bkey]
expected = df.loc[bkey].copy()
expected.loc[:] = 0
assert_frame_equal(result, expected)
result = df.copy()
result.loc[bkey] = 0
result.loc[bkey] = df.iloc[binds]
assert_frame_equal(result, df)
def test_xs(self):
idx = self.frame.index[5]
xs = self.frame.xs(idx)
for item, value in compat.iteritems(xs):
if np.isnan(value):
assert np.isnan(self.frame[item][idx])
else:
assert value == self.frame[item][idx]
# mixed-type xs
test_data = {
'A': {'1': 1, '2': 2},
'B': {'1': '1', '2': '2', '3': '3'},
}
frame = DataFrame(test_data)
xs = frame.xs('1')
assert xs.dtype == np.object_
assert xs['A'] == 1
assert xs['B'] == '1'
with pytest.raises(KeyError):
self.tsframe.xs(self.tsframe.index[0] - BDay())
# xs get column
series = self.frame.xs('A', axis=1)
expected = self.frame['A']
assert_series_equal(series, expected)
# view is returned if possible
series = self.frame.xs('A', axis=1)
series[:] = 5
assert (expected == 5).all()
def test_xs_corner(self):
# pathological mixed-type reordering case
df = DataFrame(index=[0])
df['A'] = 1.
df['B'] = 'foo'
df['C'] = 2.
df['D'] = 'bar'
df['E'] = 3.
xs = df.xs(0)
exp = pd.Series([1., 'foo', 2., 'bar', 3.],
index=list('ABCDE'), name=0)
tm.assert_series_equal(xs, exp)
# no columns but Index(dtype=object)
df = DataFrame(index=['a', 'b', 'c'])
result = df.xs('a')
expected = Series([], name='a', index=pd.Index([], dtype=object))
assert_series_equal(result, expected)
def test_xs_duplicates(self):
df = DataFrame(randn(5, 2), index=['b', 'b', 'c', 'b', 'a'])
cross = df.xs('c')
exp = df.iloc[2]
assert_series_equal(cross, exp)
def test_xs_keep_level(self):
df = (DataFrame({'day': {0: 'sat', 1: 'sun'},
'flavour': {0: 'strawberry', 1: 'strawberry'},
'sales': {0: 10, 1: 12},
'year': {0: 2008, 1: 2008}})
.set_index(['year', 'flavour', 'day']))
result = df.xs('sat', level='day', drop_level=False)
expected = df[:1]
assert_frame_equal(result, expected)
result = df.xs([2008, 'sat'], level=['year', 'day'], drop_level=False)
assert_frame_equal(result, expected)
def test_xs_view(self):
# in 0.14 this will return a view if possible a copy otherwise, but
# this is numpy dependent
dm = DataFrame(np.arange(20.).reshape(4, 5),
index=lrange(4), columns=lrange(5))
dm.xs(2)[:] = 10
assert (dm.xs(2) == 10).all()
def test_index_namedtuple(self):
from collections import namedtuple
IndexType = namedtuple("IndexType", ["a", "b"])
idx1 = IndexType("foo", "bar")
idx2 = IndexType("baz", "bof")
index = Index([idx1, idx2],
name="composite_index", tupleize_cols=False)
df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])
with catch_warnings(record=True):
result = df.ix[IndexType("foo", "bar")]["A"]
assert result == 1
result = df.loc[IndexType("foo", "bar")]["A"]
assert result == 1
def test_boolean_indexing(self):
idx = lrange(3)
cols = ['A', 'B', 'C']
df1 = DataFrame(index=idx, columns=cols,
data=np.array([[0.0, 0.5, 1.0],
[1.5, 2.0, 2.5],
[3.0, 3.5, 4.0]],
dtype=float))
df2 = DataFrame(index=idx, columns=cols,
data=np.ones((len(idx), len(cols))))
expected = DataFrame(index=idx, columns=cols,
data=np.array([[0.0, 0.5, 1.0],
[1.5, 2.0, -1],
[-1, -1, -1]], dtype=float))
df1[df1 > 2.0 * df2] = -1
assert_frame_equal(df1, expected)
with tm.assert_raises_regex(ValueError, 'Item wrong length'):
df1[df1.index[:-1] > 2] = -1
def test_boolean_indexing_mixed(self):
df = DataFrame({
long(0): {35: np.nan, 40: np.nan, 43: np.nan,
49: np.nan, 50: np.nan},
long(1): {35: np.nan,
40: 0.32632316859446198,
43: np.nan,
49: 0.32632316859446198,
50: 0.39114724480578139},
long(2): {35: np.nan, 40: np.nan, 43: 0.29012581014105987,
49: np.nan, 50: np.nan},
long(3): {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan,
50: np.nan},
long(4): {35: 0.34215328467153283, 40: np.nan, 43: np.nan,
49: np.nan, 50: np.nan},
'y': {35: 0, 40: 0, 43: 0, 49: 0, 50: 1}})
# mixed int/float ok
df2 = df.copy()
df2[df2 > 0.3] = 1
expected = df.copy()
expected.loc[40, 1] = 1
expected.loc[49, 1] = 1
expected.loc[50, 1] = 1
expected.loc[35, 4] = 1
assert_frame_equal(df2, expected)
df['foo'] = 'test'
with tm.assert_raises_regex(TypeError, 'boolean setting '
'on mixed-type'):
df[df > 0.3] = 1
def test_where(self):
default_frame = DataFrame(np.random.randn(5, 3),
columns=['A', 'B', 'C'])
def _safe_add(df):
# only add to the numeric items
def is_ok(s):
return (issubclass(s.dtype.type, (np.integer, np.floating)) and
s.dtype != 'uint8')
return DataFrame(dict((c, s + 1) if is_ok(s) else (c, s)
for c, s in compat.iteritems(df)))
def _check_get(df, cond, check_dtypes=True):
other1 = _safe_add(df)
rs = df.where(cond, other1)
rs2 = df.where(cond.values, other1)
for k, v in rs.iteritems():
exp = Series(
np.where(cond[k], df[k], other1[k]), index=v.index)
assert_series_equal(v, exp, check_names=False)
assert_frame_equal(rs, rs2)
# dtypes
if check_dtypes:
assert (rs.dtypes == df.dtypes).all()
# check getting
for df in [default_frame, self.mixed_frame,
self.mixed_float, self.mixed_int]:
cond = df > 0
_check_get(df, cond)
# upcasting case (GH # 2794)
df = DataFrame(dict((c, Series([1] * 3, dtype=c))
for c in ['float32', 'float64',
'int32', 'int64']))
df.iloc[1, :] = 0
result = df.where(df >= 0).get_dtype_counts()
# when we don't preserve boolean casts
#
# expected = Series({ 'float32' : 1, 'float64' : 3 })
expected = Series({'float32': 1, 'float64': 1, 'int32': 1, 'int64': 1})
assert_series_equal(result, expected)
# aligning
def _check_align(df, cond, other, check_dtypes=True):
rs = df.where(cond, other)
for i, k in enumerate(rs.columns):
result = rs[k]
d = df[k].values
c = cond[k].reindex(df[k].index).fillna(False).values
if is_scalar(other):
o = other
else:
if isinstance(other, np.ndarray):
o = Series(other[:, i], index=result.index).values
else:
o = other[k].values
new_values = d if c.all() else np.where(c, d, o)
expected = Series(new_values, index=result.index, name=k)
# since we can't always have the correct numpy dtype
# as numpy doesn't know how to downcast, don't check
assert_series_equal(result, expected, check_dtype=False)
# dtypes
# can't check dtype when other is an ndarray
if check_dtypes and not isinstance(other, np.ndarray):
assert (rs.dtypes == df.dtypes).all()
for df in [self.mixed_frame, self.mixed_float, self.mixed_int]:
# other is a frame
cond = (df > 0)[1:]
_check_align(df, cond, _safe_add(df))
# check other is ndarray
cond = df > 0
_check_align(df, cond, (_safe_add(df).values))
# integers are upcast, so don't check the dtypes
cond = df > 0
check_dtypes = all(not issubclass(s.type, np.integer)
for s in df.dtypes)
_check_align(df, cond, np.nan, check_dtypes=check_dtypes)
# invalid conditions
df = default_frame
err1 = (df + 1).values[0:2, :]
pytest.raises(ValueError, df.where, cond, err1)
err2 = cond.iloc[:2, :].values
other1 = _safe_add(df)
pytest.raises(ValueError, df.where, err2, other1)
pytest.raises(ValueError, df.mask, True)
pytest.raises(ValueError, df.mask, 0)
# where inplace
def _check_set(df, cond, check_dtypes=True):
dfi = df.copy()
econd = cond.reindex_like(df).fillna(True)
expected = dfi.mask(~econd)
dfi.where(cond, np.nan, inplace=True)
assert_frame_equal(dfi, expected)
# dtypes (and confirm upcasts)x
if check_dtypes:
for k, v in compat.iteritems(df.dtypes):
if issubclass(v.type, np.integer) and not cond[k].all():
v = np.dtype('float64')
assert dfi[k].dtype == v
for df in [default_frame, self.mixed_frame, self.mixed_float,
self.mixed_int]:
cond = df > 0
_check_set(df, cond)
cond = df >= 0
_check_set(df, cond)
# aligining
cond = (df >= 0)[1:]
_check_set(df, cond)
# GH 10218
# test DataFrame.where with Series slicing
df = DataFrame({'a': range(3), 'b': range(4, 7)})
result = df.where(df['a'] == 1)
expected = df[df['a'] == 1].reindex(df.index)
assert_frame_equal(result, expected)
def test_where_array_like(self):
# see gh-15414
klasses = [list, tuple, np.array]
df = DataFrame({'a': [1, 2, 3]})
cond = [[False], [True], [True]]
expected = DataFrame({'a': [np.nan, 2, 3]})
for klass in klasses:
result = df.where(klass(cond))
assert_frame_equal(result, expected)
df['b'] = 2
expected['b'] = [2, np.nan, 2]
cond = [[False, True], [True, False], [True, True]]
for klass in klasses:
result = df.where(klass(cond))
assert_frame_equal(result, expected)
def test_where_invalid_input(self):
# see gh-15414: only boolean arrays accepted
df = DataFrame({'a': [1, 2, 3]})
msg = "Boolean array expected for the condition"
conds = [
[[1], [0], [1]],
Series([[2], [5], [7]]),
DataFrame({'a': [2, 5, 7]}),
[["True"], ["False"], ["True"]],
[[Timestamp("2017-01-01")],
[pd.NaT], [Timestamp("2017-01-02")]]
]
for cond in conds:
with tm.assert_raises_regex(ValueError, msg):
df.where(cond)
df['b'] = 2
conds = [
[[0, 1], [1, 0], [1, 1]],
Series([[0, 2], [5, 0], [4, 7]]),
[["False", "True"], ["True", "False"],
["True", "True"]],
DataFrame({'a': [2, 5, 7], 'b': [4, 8, 9]}),
[[pd.NaT, Timestamp("2017-01-01")],
[Timestamp("2017-01-02"), pd.NaT],
[Timestamp("2017-01-03"), Timestamp("2017-01-03")]]
]
for cond in conds:
with tm.assert_raises_regex(ValueError, msg):
df.where(cond)
def test_where_dataframe_col_match(self):
df = DataFrame([[1, 2, 3], [4, 5, 6]])
cond = DataFrame([[True, False, True], [False, False, True]])
result = df.where(cond)
expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]])
tm.assert_frame_equal(result, expected)
# this *does* align, though has no matching columns
cond.columns = ["a", "b", "c"]
result = df.where(cond)
expected = DataFrame(np.nan, index=df.index, columns=df.columns)
tm.assert_frame_equal(result, expected)
def test_where_ndframe_align(self):
msg = "Array conditional must be same shape as self"
df = DataFrame([[1, 2, 3], [4, 5, 6]])
cond = [True]
with tm.assert_raises_regex(ValueError, msg):
df.where(cond)
expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]])
out = df.where(Series(cond))
tm.assert_frame_equal(out, expected)
cond = np.array([False, True, False, True])
with tm.assert_raises_regex(ValueError, msg):
df.where(cond)
expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]])
out = df.where(Series(cond))
tm.assert_frame_equal(out, expected)
def test_where_bug(self):
# GH 2793
df = DataFrame({'a': [1.0, 2.0, 3.0, 4.0], 'b': [
4.0, 3.0, 2.0, 1.0]}, dtype='float64')
expected = DataFrame({'a': [np.nan, np.nan, 3.0, 4.0], 'b': [
4.0, 3.0, np.nan, np.nan]}, dtype='float64')
result = df.where(df > 2, np.nan)
assert_frame_equal(result, expected)
result = df.copy()
result.where(result > 2, np.nan, inplace=True)
assert_frame_equal(result, expected)
# mixed
for dtype in ['int16', 'int8', 'int32', 'int64']:
df = DataFrame({'a': np.array([1, 2, 3, 4], dtype=dtype),
'b': np.array([4.0, 3.0, 2.0, 1.0],
dtype='float64')})
expected = DataFrame({'a': [np.nan, np.nan, 3.0, 4.0],
'b': [4.0, 3.0, np.nan, np.nan]},
dtype='float64')
result = df.where(df > 2, np.nan)
assert_frame_equal(result, expected)
result = df.copy()
result.where(result > 2, np.nan, inplace=True)
assert_frame_equal(result, expected)
# transpositional issue
# GH7506
a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]})
b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]})
do_not_replace = b.isna() | (a > b)
expected = a.copy()
expected[~do_not_replace] = b
result = a.where(do_not_replace, b)
assert_frame_equal(result, expected)
a = DataFrame({0: [4, 6], 1: [1, 0]})
b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]})
do_not_replace = b.isna() | (a > b)
expected = a.copy()
expected[~do_not_replace] = b
result = a.where(do_not_replace, b)
assert_frame_equal(result, expected)
def test_where_datetime(self):
# GH 3311
df = DataFrame(dict(A=date_range('20130102', periods=5),
B=date_range('20130104', periods=5),
C=np.random.randn(5)))
stamp = datetime(2013, 1, 3)
result = df[df > stamp]
expected = df.copy()
expected.loc[[0, 1], 'A'] = np.nan
assert_frame_equal(result, expected)
def test_where_none(self):
# GH 4667
# setting with None changes dtype
df = DataFrame({'series': Series(range(10))}).astype(float)
df[df > 7] = None
expected = DataFrame(
{'series': Series([0, 1, 2, 3, 4, 5, 6, 7, np.nan, np.nan])})
assert_frame_equal(df, expected)
# GH 7656
df = DataFrame([{'A': 1, 'B': np.nan, 'C': 'Test'}, {
'A': np.nan, 'B': 'Test', 'C': np.nan}])
expected = df.where(~isna(df), None)
with tm.assert_raises_regex(TypeError, 'boolean setting '
'on mixed-type'):
df.where(~isna(df), None, inplace=True)
def test_where_align(self):
def create():
df = DataFrame(np.random.randn(10, 3))
df.iloc[3:5, 0] = np.nan
df.iloc[4:6, 1] = np.nan
df.iloc[5:8, 2] = np.nan
return df
# series
df = create()
expected = df.fillna(df.mean())
result = df.where(pd.notna(df), df.mean(), axis='columns')
assert_frame_equal(result, expected)
df.where(pd.notna(df), df.mean(), inplace=True, axis='columns')
assert_frame_equal(df, expected)
df = create().fillna(0)
expected = df.apply(lambda x, y: x.where(x > 0, y), y=df[0])
result = df.where(df > 0, df[0], axis='index')
assert_frame_equal(result, expected)
result = df.where(df > 0, df[0], axis='rows')
assert_frame_equal(result, expected)
# frame
df = create()
expected = df.fillna(1)
result = df.where(pd.notna(df), DataFrame(
1, index=df.index, columns=df.columns))
assert_frame_equal(result, expected)
def test_where_complex(self):
# GH 6345
expected = DataFrame(
[[1 + 1j, 2], [np.nan, 4 + 1j]], columns=['a', 'b'])
df = DataFrame([[1 + 1j, 2], [5 + 1j, 4 + 1j]], columns=['a', 'b'])
df[df.abs() >= 5] = np.nan
assert_frame_equal(df, expected)
def test_where_axis(self):
# GH 9736
df = DataFrame(np.random.randn(2, 2))
mask = DataFrame([[False, False], [False, False]])
s = Series([0, 1])
expected = DataFrame([[0, 0], [1, 1]], dtype='float64')
result = df.where(mask, s, axis='index')
assert_frame_equal(result, expected)
result = df.copy()
result.where(mask, s, axis='index', inplace=True)
assert_frame_equal(result, expected)
expected = DataFrame([[0, 1], [0, 1]], dtype='float64')
result = df.where(mask, s, axis='columns')
assert_frame_equal(result, expected)
result = df.copy()
result.where(mask, s, axis='columns', inplace=True)
assert_frame_equal(result, expected)
# Upcast needed
df = DataFrame([[1, 2], [3, 4]], dtype='int64')
mask = DataFrame([[False, False], [False, False]])
s = Series([0, np.nan])
expected = DataFrame([[0, 0], [np.nan, np.nan]], dtype='float64')
result = df.where(mask, s, axis='index')
assert_frame_equal(result, expected)
result = df.copy()
result.where(mask, s, axis='index', inplace=True)
assert_frame_equal(result, expected)
expected = DataFrame([[0, np.nan], [0, np.nan]])
result = df.where(mask, s, axis='columns')
assert_frame_equal(result, expected)
expected = DataFrame({0: np.array([0, 0], dtype='int64'),
1: np.array([np.nan, np.nan], dtype='float64')})
result = df.copy()
result.where(mask, s, axis='columns', inplace=True)
assert_frame_equal(result, expected)
# Multiple dtypes (=> multiple Blocks)
df = pd.concat([
DataFrame(np.random.randn(10, 2)),
DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype='int64')],
ignore_index=True, axis=1)
mask = DataFrame(False, columns=df.columns, index=df.index)
s1 = Series(1, index=df.columns)
s2 = Series(2, index=df.index)
result = df.where(mask, s1, axis='columns')
expected = DataFrame(1.0, columns=df.columns, index=df.index)
expected[2] = expected[2].astype('int64')
expected[3] = expected[3].astype('int64')
assert_frame_equal(result, expected)
result = df.copy()
result.where(mask, s1, axis='columns', inplace=True)
assert_frame_equal(result, expected)
result = df.where(mask, s2, axis='index')
expected = DataFrame(2.0, columns=df.columns, index=df.index)
expected[2] = expected[2].astype('int64')
expected[3] = expected[3].astype('int64')
assert_frame_equal(result, expected)
result = df.copy()
result.where(mask, s2, axis='index', inplace=True)
assert_frame_equal(result, expected)
# DataFrame vs DataFrame
d1 = df.copy().drop(1, axis=0)
expected = df.copy()
expected.loc[1, :] = np.nan
result = df.where(mask, d1)
assert_frame_equal(result, expected)
result = df.where(mask, d1, axis='index')
assert_frame_equal(result, expected)
result = df.copy()
result.where(mask, d1, inplace=True)
assert_frame_equal(result, expected)
result = df.copy()
result.where(mask, d1, inplace=True, axis='index')
assert_frame_equal(result, expected)
d2 = df.copy().drop(1, axis=1)
expected = df.copy()
expected.loc[:, 1] = np.nan
result = df.where(mask, d2)
assert_frame_equal(result, expected)
result = df.where(mask, d2, axis='columns')
assert_frame_equal(result, expected)
result = df.copy()
result.where(mask, d2, inplace=True)
assert_frame_equal(result, expected)
result = df.copy()
result.where(mask, d2, inplace=True, axis='columns')
assert_frame_equal(result, expected)
def test_where_callable(self):
# GH 12533
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
result = df.where(lambda x: x > 4, lambda x: x + 1)
exp = DataFrame([[2, 3, 4], [5, 5, 6], [7, 8, 9]])
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result, df.where(df > 4, df + 1))
# return ndarray and scalar
result = df.where(lambda x: (x % 2 == 0).values, lambda x: 99)
exp = DataFrame([[99, 2, 99], [4, 99, 6], [99, 8, 99]])
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result, df.where(df % 2 == 0, 99))
# chain
result = (df + 2).where(lambda x: x > 8, lambda x: x + 10)
exp = DataFrame([[13, 14, 15], [16, 17, 18], [9, 10, 11]])
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result,
(df + 2).where((df + 2) > 8, (df + 2) + 10))
def test_mask(self):
df = DataFrame(np.random.randn(5, 3))
cond = df > 0
rs = df.where(cond, np.nan)
assert_frame_equal(rs, df.mask(df <= 0))
assert_frame_equal(rs, df.mask(~cond))
other = DataFrame(np.random.randn(5, 3))
rs = df.where(cond, other)
assert_frame_equal(rs, df.mask(df <= 0, other))
assert_frame_equal(rs, df.mask(~cond, other))
def test_mask_inplace(self):
# GH8801
df = DataFrame(np.random.randn(5, 3))
cond = df > 0
rdf = df.copy()
rdf.where(cond, inplace=True)
assert_frame_equal(rdf, df.where(cond))
assert_frame_equal(rdf, df.mask(~cond))
rdf = df.copy()
rdf.where(cond, -df, inplace=True)
assert_frame_equal(rdf, df.where(cond, -df))
assert_frame_equal(rdf, df.mask(~cond, -df))
def test_mask_edge_case_1xN_frame(self):
# GH4071
df = DataFrame([[1, 2]])
res = df.mask(DataFrame([[True, False]]))
expec = DataFrame([[nan, 2]])
assert_frame_equal(res, expec)
def test_mask_callable(self):
# GH 12533
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
result = df.mask(lambda x: x > 4, lambda x: x + 1)
exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]])
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result, df.mask(df > 4, df + 1))
# return ndarray and scalar
result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99)
exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]])
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99))
# chain
result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10)
exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]])
tm.assert_frame_equal(result, exp)
tm.assert_frame_equal(result,
(df + 2).mask((df + 2) > 8, (df + 2) + 10))
def test_head_tail(self):
assert_frame_equal(self.frame.head(), self.frame[:5])
assert_frame_equal(self.frame.tail(), self.frame[-5:])
assert_frame_equal(self.frame.head(0), self.frame[0:0])
assert_frame_equal(self.frame.tail(0), self.frame[0:0])
assert_frame_equal(self.frame.head(-1), self.frame[:-1])
assert_frame_equal(self.frame.tail(-1), self.frame[1:])
assert_frame_equal(self.frame.head(1), self.frame[:1])
assert_frame_equal(self.frame.tail(1), self.frame[-1:])
# with a float index
df = self.frame.copy()
df.index = np.arange(len(self.frame)) + 0.1
assert_frame_equal(df.head(), df.iloc[:5])
assert_frame_equal(df.tail(), df.iloc[-5:])
assert_frame_equal(df.head(0), df[0:0])
assert_frame_equal(df.tail(0), df[0:0])
assert_frame_equal(df.head(-1), df.iloc[:-1])
assert_frame_equal(df.tail(-1), df.iloc[1:])
# test empty dataframe
empty_df = DataFrame()
assert_frame_equal(empty_df.tail(), empty_df)
assert_frame_equal(empty_df.head(), empty_df)
def test_type_error_multiindex(self):
# See gh-12218
df = DataFrame(columns=['i', 'c', 'x', 'y'],
data=[[0, 0, 1, 2], [1, 0, 3, 4],
[0, 1, 1, 2], [1, 1, 3, 4]])
dg = df.pivot_table(index='i', columns='c',
values=['x', 'y'])
with tm.assert_raises_regex(TypeError, "is an invalid key"):
str(dg[:, 0])
index = Index(range(2), name='i')
columns = MultiIndex(levels=[['x', 'y'], [0, 1]],
labels=[[0, 1], [0, 0]],
names=[None, 'c'])
expected = DataFrame([[1, 2], [3, 4]], columns=columns, index=index)
result = dg.loc[:, (slice(None), 0)]
assert_frame_equal(result, expected)
name = ('x', 0)
index = Index(range(2), name='i')
expected = Series([1, 3], index=index, name=name)
result = dg['x', 0]
assert_series_equal(result, expected)
class TestDataFrameIndexingDatetimeWithTZ(TestData):
def setup_method(self, method):
self.idx = Index(date_range('20130101', periods=3, tz='US/Eastern'),
name='foo')
self.dr = date_range('20130110', periods=3)
self.df = DataFrame({'A': self.idx, 'B': self.dr})
def test_setitem(self):
df = self.df
idx = self.idx
# setitem
df['C'] = idx
assert_series_equal(df['C'], Series(idx, name='C'))
df['D'] = 'foo'
df['D'] = idx
assert_series_equal(df['D'], Series(idx, name='D'))
del df['D']
# assert that A & C are not sharing the same base (e.g. they
# are copies)
b1 = df._data.blocks[1]
b2 = df._data.blocks[2]
assert b1.values.equals(b2.values)
assert id(b1.values.values.base) != id(b2.values.values.base)
# with nan
df2 = df.copy()
df2.iloc[1, 1] = pd.NaT
df2.iloc[1, 2] = pd.NaT
result = df2['B']
assert_series_equal(notna(result), Series(
[True, False, True], name='B'))
assert_series_equal(df2.dtypes, df.dtypes)
def test_set_reset(self):
idx = self.idx
# set/reset
df = DataFrame({'A': [0, 1, 2]}, index=idx)
result = df.reset_index()
assert result['foo'].dtype, 'M8[ns, US/Eastern'
df = result.set_index('foo')
tm.assert_index_equal(df.index, idx)
def test_transpose(self):
result = self.df.T
expected = DataFrame(self.df.values.T)
expected.index = ['A', 'B']
assert_frame_equal(result, expected)
class TestDataFrameIndexingUInt64(TestData):
def setup_method(self, method):
self.ir = Index(np.arange(3), dtype=np.uint64)
self.idx = Index([2**63, 2**63 + 5, 2**63 + 10], name='foo')
self.df = DataFrame({'A': self.idx, 'B': self.ir})
def test_setitem(self):
df = self.df
idx = self.idx
# setitem
df['C'] = idx
assert_series_equal(df['C'], Series(idx, name='C'))
df['D'] = 'foo'
df['D'] = idx
assert_series_equal(df['D'], Series(idx, name='D'))
del df['D']
# With NaN: because uint64 has no NaN element,
# the column should be cast to object.
df2 = df.copy()
df2.iloc[1, 1] = pd.NaT
df2.iloc[1, 2] = pd.NaT
result = df2['B']
assert_series_equal(notna(result), Series(
[True, False, True], name='B'))
assert_series_equal(df2.dtypes, Series([np.dtype('uint64'),
np.dtype('O'), np.dtype('O')],
index=['A', 'B', 'C']))
def test_set_reset(self):
idx = self.idx
# set/reset
df = DataFrame({'A': [0, 1, 2]}, index=idx)
result = df.reset_index()
assert result['foo'].dtype == np.dtype('uint64')
df = result.set_index('foo')
tm.assert_index_equal(df.index, idx)
def test_transpose(self):
result = self.df.T
expected = DataFrame(self.df.values.T)
expected.index = ['A', 'B']
assert_frame_equal(result, expected)
class TestDataFrameIndexingCategorical(object):
def test_assignment(self):
# assignment
df = DataFrame({'value': np.array(
np.random.randint(0, 10000, 100), dtype='int32')})
labels = Categorical(["{0} - {1}".format(i, i + 499)
for i in range(0, 10000, 500)])
df = df.sort_values(by=['value'], ascending=True)
s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels)
d = s.values
df['D'] = d
str(df)
result = df.dtypes
expected = Series(
[np.dtype('int32'), CategoricalDtype(categories=labels,
ordered=False)],
index=['value', 'D'])
tm.assert_series_equal(result, expected)
df['E'] = s
str(df)
result = df.dtypes
expected = Series([np.dtype('int32'),
CategoricalDtype(categories=labels, ordered=False),
CategoricalDtype(categories=labels, ordered=False)],
index=['value', 'D', 'E'])
tm.assert_series_equal(result, expected)
result1 = df['D']
result2 = df['E']
tm.assert_categorical_equal(result1._data._block.values, d)
# sorting
s.name = 'E'
tm.assert_series_equal(result2.sort_index(), s.sort_index())
cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
df = DataFrame(Series(cat))
def test_assigning_ops(self):
# systematically test the assigning operations:
# for all slicing ops:
# for value in categories and value not in categories:
# - assign a single value -> exp_single_cats_value
# - assign a complete row (mixed values) -> exp_single_row
# assign multiple rows (mixed values) (-> array) -> exp_multi_row
# assign a part of a column with dtype == categorical ->
# exp_parts_cats_col
# assign a part of a column with dtype != categorical ->
# exp_parts_cats_col
cats = Categorical(["a", "a", "a", "a", "a", "a", "a"],
categories=["a", "b"])
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
values = [1, 1, 1, 1, 1, 1, 1]
orig = DataFrame({"cats": cats, "values": values}, index=idx)
# the expected values
# changed single row
cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"],
categories=["a", "b"])
idx1 = Index(["h", "i", "j", "k", "l", "m", "n"])
values1 = [1, 1, 2, 1, 1, 1, 1]
exp_single_row = DataFrame({"cats": cats1,
"values": values1}, index=idx1)
# changed multiple rows
cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"],
categories=["a", "b"])
idx2 = Index(["h", "i", "j", "k", "l", "m", "n"])
values2 = [1, 1, 2, 2, 1, 1, 1]
exp_multi_row = DataFrame({"cats": cats2,
"values": values2}, index=idx2)
# changed part of the cats column
cats3 = Categorical(
["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
idx3 = Index(["h", "i", "j", "k", "l", "m", "n"])
values3 = [1, 1, 1, 1, 1, 1, 1]
exp_parts_cats_col = DataFrame({"cats": cats3,
"values": values3}, index=idx3)
# changed single value in cats col
cats4 = Categorical(
["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
idx4 = Index(["h", "i", "j", "k", "l", "m", "n"])
values4 = [1, 1, 1, 1, 1, 1, 1]
exp_single_cats_value = DataFrame({"cats": cats4,
"values": values4}, index=idx4)
# iloc
# ###############
# - assign a single value -> exp_single_cats_value
df = orig.copy()
df.iloc[2, 0] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)
df = orig.copy()
df.iloc[df.index == "j", 0] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)
# - assign a single value not in the current categories set
def f():
df = orig.copy()
df.iloc[2, 0] = "c"
pytest.raises(ValueError, f)
# - assign a complete row (mixed values) -> exp_single_row
df = orig.copy()
df.iloc[2, :] = ["b", 2]
tm.assert_frame_equal(df, exp_single_row)
# - assign a complete row (mixed values) not in categories set
def f():
df = orig.copy()
df.iloc[2, :] = ["c", 2]
pytest.raises(ValueError, f)
# - assign multiple rows (mixed values) -> exp_multi_row
df = orig.copy()
df.iloc[2:4, :] = [["b", 2], ["b", 2]]
tm.assert_frame_equal(df, exp_multi_row)
def f():
df = orig.copy()
df.iloc[2:4, :] = [["c", 2], ["c", 2]]
pytest.raises(ValueError, f)
# assign a part of a column with dtype == categorical ->
# exp_parts_cats_col
df = orig.copy()
df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"])
tm.assert_frame_equal(df, exp_parts_cats_col)
with pytest.raises(ValueError):
# different categories -> not sure if this should fail or pass
df = orig.copy()
df.iloc[2:4, 0] = Categorical(list('bb'), categories=list('abc'))
with pytest.raises(ValueError):
# different values
df = orig.copy()
df.iloc[2:4, 0] = Categorical(list('cc'), categories=list('abc'))
# assign a part of a column with dtype != categorical ->
# exp_parts_cats_col
df = orig.copy()
df.iloc[2:4, 0] = ["b", "b"]
tm.assert_frame_equal(df, exp_parts_cats_col)
with pytest.raises(ValueError):
df.iloc[2:4, 0] = ["c", "c"]
# loc
# ##############
# - assign a single value -> exp_single_cats_value
df = orig.copy()
df.loc["j", "cats"] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)
df = orig.copy()
df.loc[df.index == "j", "cats"] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)
# - assign a single value not in the current categories set
def f():
df = orig.copy()
df.loc["j", "cats"] = "c"
pytest.raises(ValueError, f)
# - assign a complete row (mixed values) -> exp_single_row
df = orig.copy()
df.loc["j", :] = ["b", 2]
tm.assert_frame_equal(df, exp_single_row)
# - assign a complete row (mixed values) not in categories set
def f():
df = orig.copy()
df.loc["j", :] = ["c", 2]
pytest.raises(ValueError, f)
# - assign multiple rows (mixed values) -> exp_multi_row
df = orig.copy()
df.loc["j":"k", :] = [["b", 2], ["b", 2]]
tm.assert_frame_equal(df, exp_multi_row)
def f():
df = orig.copy()
df.loc["j":"k", :] = [["c", 2], ["c", 2]]
pytest.raises(ValueError, f)
# assign a part of a column with dtype == categorical ->
# exp_parts_cats_col
df = orig.copy()
df.loc["j":"k", "cats"] = Categorical(
["b", "b"], categories=["a", "b"])
tm.assert_frame_equal(df, exp_parts_cats_col)
with pytest.raises(ValueError):
# different categories -> not sure if this should fail or pass
df = orig.copy()
df.loc["j":"k", "cats"] = Categorical(
["b", "b"], categories=["a", "b", "c"])
with pytest.raises(ValueError):
# different values
df = orig.copy()
df.loc["j":"k", "cats"] = Categorical(
["c", "c"], categories=["a", "b", "c"])
# assign a part of a column with dtype != categorical ->
# exp_parts_cats_col
df = orig.copy()
df.loc["j":"k", "cats"] = ["b", "b"]
tm.assert_frame_equal(df, exp_parts_cats_col)
with pytest.raises(ValueError):
df.loc["j":"k", "cats"] = ["c", "c"]
# loc
# ##############
# - assign a single value -> exp_single_cats_value
df = orig.copy()
df.loc["j", df.columns[0]] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)
df = orig.copy()
df.loc[df.index == "j", df.columns[0]] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)
# - assign a single value not in the current categories set
def f():
df = orig.copy()
df.loc["j", df.columns[0]] = "c"
pytest.raises(ValueError, f)
# - assign a complete row (mixed values) -> exp_single_row
df = orig.copy()
df.loc["j", :] = ["b", 2]
tm.assert_frame_equal(df, exp_single_row)
# - assign a complete row (mixed values) not in categories set
def f():
df = orig.copy()
df.loc["j", :] = ["c", 2]
pytest.raises(ValueError, f)
# - assign multiple rows (mixed values) -> exp_multi_row
df = orig.copy()
df.loc["j":"k", :] = [["b", 2], ["b", 2]]
tm.assert_frame_equal(df, exp_multi_row)
def f():
df = orig.copy()
df.loc["j":"k", :] = [["c", 2], ["c", 2]]
pytest.raises(ValueError, f)
# assign a part of a column with dtype == categorical ->
# exp_parts_cats_col
df = orig.copy()
df.loc["j":"k", df.columns[0]] = Categorical(
["b", "b"], categories=["a", "b"])
tm.assert_frame_equal(df, exp_parts_cats_col)
with pytest.raises(ValueError):
# different categories -> not sure if this should fail or pass
df = orig.copy()
df.loc["j":"k", df.columns[0]] = Categorical(
["b", "b"], categories=["a", "b", "c"])
with pytest.raises(ValueError):
# different values
df = orig.copy()
df.loc["j":"k", df.columns[0]] = Categorical(
["c", "c"], categories=["a", "b", "c"])
# assign a part of a column with dtype != categorical ->
# exp_parts_cats_col
df = orig.copy()
df.loc["j":"k", df.columns[0]] = ["b", "b"]
tm.assert_frame_equal(df, exp_parts_cats_col)
with pytest.raises(ValueError):
df.loc["j":"k", df.columns[0]] = ["c", "c"]
# iat
df = orig.copy()
df.iat[2, 0] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)
# - assign a single value not in the current categories set
def f():
df = orig.copy()
df.iat[2, 0] = "c"
pytest.raises(ValueError, f)
# at
# - assign a single value -> exp_single_cats_value
df = orig.copy()
df.at["j", "cats"] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)
# - assign a single value not in the current categories set
def f():
df = orig.copy()
df.at["j", "cats"] = "c"
pytest.raises(ValueError, f)
# fancy indexing
catsf = Categorical(["a", "a", "c", "c", "a", "a", "a"],
categories=["a", "b", "c"])
idxf = Index(["h", "i", "j", "k", "l", "m", "n"])
valuesf = [1, 1, 3, 3, 1, 1, 1]
df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf)
exp_fancy = exp_multi_row.copy()
exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True)
df[df["cats"] == "c"] = ["b", 2]
# category c is kept in .categories
tm.assert_frame_equal(df, exp_fancy)
# set_value
df = orig.copy()
df.at["j", "cats"] = "b"
tm.assert_frame_equal(df, exp_single_cats_value)
def f():
df = orig.copy()
df.at["j", "cats"] = "c"
pytest.raises(ValueError, f)
# Assigning a Category to parts of a int/... column uses the values of
# the Catgorical
df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")})
df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
tm.assert_frame_equal(df, exp)
def test_functions_no_warnings(self):
df = DataFrame({'value': np.random.randint(0, 100, 20)})
labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)]
with tm.assert_produces_warning(False):
df['group'] = pd.cut(df.value, range(0, 105, 10), right=False,
labels=labels)