3497 lines
119 KiB
Python
3497 lines
119 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
from __future__ import print_function
|
||
|
from warnings import catch_warnings
|
||
|
|
||
|
from datetime import datetime, date, timedelta, time
|
||
|
|
||
|
from pandas.compat import map, zip, range, lrange, lzip, long
|
||
|
from pandas import compat
|
||
|
|
||
|
from numpy import nan
|
||
|
from numpy.random import randn
|
||
|
|
||
|
import pytest
|
||
|
import numpy as np
|
||
|
|
||
|
import pandas.core.common as com
|
||
|
from pandas import (DataFrame, Index, Series, notna, isna,
|
||
|
MultiIndex, DatetimeIndex, Timestamp,
|
||
|
date_range, Categorical)
|
||
|
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||
|
|
||
|
import pandas as pd
|
||
|
|
||
|
from pandas._libs.tslib import iNaT
|
||
|
from pandas.tseries.offsets import BDay
|
||
|
from pandas.core.dtypes.common import (
|
||
|
is_float_dtype,
|
||
|
is_integer,
|
||
|
is_scalar)
|
||
|
from pandas.util.testing import (assert_almost_equal,
|
||
|
assert_series_equal,
|
||
|
assert_frame_equal)
|
||
|
from pandas.core.indexing import IndexingError
|
||
|
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
from pandas.tests.frame.common import TestData
|
||
|
|
||
|
|
||
|
class TestDataFrameIndexing(TestData):
|
||
|
|
||
|
def test_getitem(self):
|
||
|
# Slicing
|
||
|
sl = self.frame[:20]
|
||
|
assert len(sl.index) == 20
|
||
|
|
||
|
# Column access
|
||
|
for _, series in compat.iteritems(sl):
|
||
|
assert len(series.index) == 20
|
||
|
assert tm.equalContents(series.index, sl.index)
|
||
|
|
||
|
for key, _ in compat.iteritems(self.frame._series):
|
||
|
assert self.frame[key] is not None
|
||
|
|
||
|
assert 'random' not in self.frame
|
||
|
with tm.assert_raises_regex(KeyError, 'random'):
|
||
|
self.frame['random']
|
||
|
|
||
|
df = self.frame.copy()
|
||
|
df['$10'] = randn(len(df))
|
||
|
|
||
|
ad = randn(len(df))
|
||
|
df['@awesome_domain'] = ad
|
||
|
|
||
|
with pytest.raises(KeyError):
|
||
|
df.__getitem__('df["$10"]')
|
||
|
|
||
|
res = df['@awesome_domain']
|
||
|
tm.assert_numpy_array_equal(ad, res.values)
|
||
|
|
||
|
def test_getitem_dupe_cols(self):
|
||
|
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b'])
|
||
|
try:
|
||
|
df[['baf']]
|
||
|
except KeyError:
|
||
|
pass
|
||
|
else:
|
||
|
self.fail("Dataframe failed to raise KeyError")
|
||
|
|
||
|
def test_get(self):
|
||
|
b = self.frame.get('B')
|
||
|
assert_series_equal(b, self.frame['B'])
|
||
|
|
||
|
assert self.frame.get('foo') is None
|
||
|
assert_series_equal(self.frame.get('foo', self.frame['B']),
|
||
|
self.frame['B'])
|
||
|
# None
|
||
|
# GH 5652
|
||
|
for df in [DataFrame(), DataFrame(columns=list('AB')),
|
||
|
DataFrame(columns=list('AB'), index=range(3))]:
|
||
|
result = df.get(None)
|
||
|
assert result is None
|
||
|
|
||
|
def test_getitem_iterator(self):
|
||
|
idx = iter(['A', 'B', 'C'])
|
||
|
result = self.frame.loc[:, idx]
|
||
|
expected = self.frame.loc[:, ['A', 'B', 'C']]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
idx = iter(['A', 'B', 'C'])
|
||
|
result = self.frame.loc[:, idx]
|
||
|
expected = self.frame.loc[:, ['A', 'B', 'C']]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_getitem_list(self):
|
||
|
self.frame.columns.name = 'foo'
|
||
|
|
||
|
result = self.frame[['B', 'A']]
|
||
|
result2 = self.frame[Index(['B', 'A'])]
|
||
|
|
||
|
expected = self.frame.loc[:, ['B', 'A']]
|
||
|
expected.columns.name = 'foo'
|
||
|
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert_frame_equal(result2, expected)
|
||
|
|
||
|
assert result.columns.name == 'foo'
|
||
|
|
||
|
with tm.assert_raises_regex(KeyError, 'not in index'):
|
||
|
self.frame[['B', 'A', 'food']]
|
||
|
with tm.assert_raises_regex(KeyError, 'not in index'):
|
||
|
self.frame[Index(['B', 'A', 'foo'])]
|
||
|
|
||
|
# tuples
|
||
|
df = DataFrame(randn(8, 3),
|
||
|
columns=Index([('foo', 'bar'), ('baz', 'qux'),
|
||
|
('peek', 'aboo')], name=('sth', 'sth2')))
|
||
|
|
||
|
result = df[[('foo', 'bar'), ('baz', 'qux')]]
|
||
|
expected = df.iloc[:, :2]
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert result.columns.names == ('sth', 'sth2')
|
||
|
|
||
|
def test_getitem_callable(self):
|
||
|
# GH 12533
|
||
|
result = self.frame[lambda x: 'A']
|
||
|
tm.assert_series_equal(result, self.frame.loc[:, 'A'])
|
||
|
|
||
|
result = self.frame[lambda x: ['A', 'B']]
|
||
|
tm.assert_frame_equal(result, self.frame.loc[:, ['A', 'B']])
|
||
|
|
||
|
df = self.frame[:3]
|
||
|
result = df[lambda x: [True, False, True]]
|
||
|
tm.assert_frame_equal(result, self.frame.iloc[[0, 2], :])
|
||
|
|
||
|
def test_setitem_list(self):
|
||
|
|
||
|
self.frame['E'] = 'foo'
|
||
|
data = self.frame[['A', 'B']]
|
||
|
self.frame[['B', 'A']] = data
|
||
|
|
||
|
assert_series_equal(self.frame['B'], data['A'], check_names=False)
|
||
|
assert_series_equal(self.frame['A'], data['B'], check_names=False)
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError,
|
||
|
'Columns must be same length as key'):
|
||
|
data[['A']] = self.frame[['A', 'B']]
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, 'Length of values '
|
||
|
'does not match '
|
||
|
'length of index'):
|
||
|
data['A'] = range(len(data.index) - 1)
|
||
|
|
||
|
df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_)
|
||
|
df.loc[1, ['tt1', 'tt2']] = [1, 2]
|
||
|
|
||
|
result = df.loc[df.index[1], ['tt1', 'tt2']]
|
||
|
expected = Series([1, 2], df.columns, dtype=np.int_, name=1)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
df['tt1'] = df['tt2'] = '0'
|
||
|
df.loc[df.index[1], ['tt1', 'tt2']] = ['1', '2']
|
||
|
result = df.loc[df.index[1], ['tt1', 'tt2']]
|
||
|
expected = Series(['1', '2'], df.columns, name=1)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_setitem_list_not_dataframe(self):
|
||
|
data = np.random.randn(len(self.frame), 2)
|
||
|
self.frame[['A', 'B']] = data
|
||
|
assert_almost_equal(self.frame[['A', 'B']].values, data)
|
||
|
|
||
|
def test_setitem_list_of_tuples(self):
|
||
|
tuples = lzip(self.frame['A'], self.frame['B'])
|
||
|
self.frame['tuples'] = tuples
|
||
|
|
||
|
result = self.frame['tuples']
|
||
|
expected = Series(tuples, index=self.frame.index, name='tuples')
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_setitem_mulit_index(self):
|
||
|
# GH7655, test that assigning to a sub-frame of a frame
|
||
|
# with multi-index columns aligns both rows and columns
|
||
|
it = ['jim', 'joe', 'jolie'], ['first', 'last'], \
|
||
|
['left', 'center', 'right']
|
||
|
|
||
|
cols = MultiIndex.from_product(it)
|
||
|
index = pd.date_range('20141006', periods=20)
|
||
|
vals = np.random.randint(1, 1000, (len(index), len(cols)))
|
||
|
df = pd.DataFrame(vals, columns=cols, index=index)
|
||
|
|
||
|
i, j = df.index.values.copy(), it[-1][:]
|
||
|
|
||
|
np.random.shuffle(i)
|
||
|
df['jim'] = df['jolie'].loc[i, ::-1]
|
||
|
assert_frame_equal(df['jim'], df['jolie'])
|
||
|
|
||
|
np.random.shuffle(j)
|
||
|
df[('joe', 'first')] = df[('jolie', 'last')].loc[i, j]
|
||
|
assert_frame_equal(df[('joe', 'first')], df[('jolie', 'last')])
|
||
|
|
||
|
np.random.shuffle(j)
|
||
|
df[('joe', 'last')] = df[('jolie', 'first')].loc[i, j]
|
||
|
assert_frame_equal(df[('joe', 'last')], df[('jolie', 'first')])
|
||
|
|
||
|
def test_setitem_callable(self):
|
||
|
# GH 12533
|
||
|
df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]})
|
||
|
df[lambda x: 'A'] = [11, 12, 13, 14]
|
||
|
|
||
|
exp = pd.DataFrame({'A': [11, 12, 13, 14], 'B': [5, 6, 7, 8]})
|
||
|
tm.assert_frame_equal(df, exp)
|
||
|
|
||
|
def test_setitem_other_callable(self):
|
||
|
# GH 13299
|
||
|
inc = lambda x: x + 1
|
||
|
|
||
|
df = pd.DataFrame([[-1, 1], [1, -1]])
|
||
|
df[df > 0] = inc
|
||
|
|
||
|
expected = pd.DataFrame([[-1, inc], [inc, -1]])
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_getitem_boolean(self):
|
||
|
# boolean indexing
|
||
|
d = self.tsframe.index[10]
|
||
|
indexer = self.tsframe.index > d
|
||
|
indexer_obj = indexer.astype(object)
|
||
|
|
||
|
subindex = self.tsframe.index[indexer]
|
||
|
subframe = self.tsframe[indexer]
|
||
|
|
||
|
tm.assert_index_equal(subindex, subframe.index)
|
||
|
with tm.assert_raises_regex(ValueError, 'Item wrong length'):
|
||
|
self.tsframe[indexer[:-1]]
|
||
|
|
||
|
subframe_obj = self.tsframe[indexer_obj]
|
||
|
assert_frame_equal(subframe_obj, subframe)
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, 'boolean values only'):
|
||
|
self.tsframe[self.tsframe]
|
||
|
|
||
|
# test that Series work
|
||
|
indexer_obj = Series(indexer_obj, self.tsframe.index)
|
||
|
|
||
|
subframe_obj = self.tsframe[indexer_obj]
|
||
|
assert_frame_equal(subframe_obj, subframe)
|
||
|
|
||
|
# test that Series indexers reindex
|
||
|
# we are producing a warning that since the passed boolean
|
||
|
# key is not the same as the given index, we will reindex
|
||
|
# not sure this is really necessary
|
||
|
with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
|
||
|
indexer_obj = indexer_obj.reindex(self.tsframe.index[::-1])
|
||
|
subframe_obj = self.tsframe[indexer_obj]
|
||
|
assert_frame_equal(subframe_obj, subframe)
|
||
|
|
||
|
# test df[df > 0]
|
||
|
for df in [self.tsframe, self.mixed_frame,
|
||
|
self.mixed_float, self.mixed_int]:
|
||
|
|
||
|
data = df._get_numeric_data()
|
||
|
bif = df[df > 0]
|
||
|
bifw = DataFrame(dict((c, np.where(data[c] > 0, data[c], np.nan))
|
||
|
for c in data.columns),
|
||
|
index=data.index, columns=data.columns)
|
||
|
|
||
|
# add back other columns to compare
|
||
|
for c in df.columns:
|
||
|
if c not in bifw:
|
||
|
bifw[c] = df[c]
|
||
|
bifw = bifw.reindex(columns=df.columns)
|
||
|
|
||
|
assert_frame_equal(bif, bifw, check_dtype=False)
|
||
|
for c in df.columns:
|
||
|
if bif[c].dtype != bifw[c].dtype:
|
||
|
assert bif[c].dtype == df[c].dtype
|
||
|
|
||
|
def test_getitem_boolean_casting(self):
|
||
|
|
||
|
# don't upcast if we don't need to
|
||
|
df = self.tsframe.copy()
|
||
|
df['E'] = 1
|
||
|
df['E'] = df['E'].astype('int32')
|
||
|
df['E1'] = df['E'].copy()
|
||
|
df['F'] = 1
|
||
|
df['F'] = df['F'].astype('int64')
|
||
|
df['F1'] = df['F'].copy()
|
||
|
|
||
|
casted = df[df > 0]
|
||
|
result = casted.get_dtype_counts()
|
||
|
expected = Series({'float64': 4, 'int32': 2, 'int64': 2})
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# int block splitting
|
||
|
df.loc[df.index[1:3], ['E1', 'F1']] = 0
|
||
|
casted = df[df > 0]
|
||
|
result = casted.get_dtype_counts()
|
||
|
expected = Series({'float64': 6, 'int32': 1, 'int64': 1})
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# where dtype conversions
|
||
|
# GH 3733
|
||
|
df = DataFrame(data=np.random.randn(100, 50))
|
||
|
df = df.where(df > 0) # create nans
|
||
|
bools = df > 0
|
||
|
mask = isna(df)
|
||
|
expected = bools.astype(float).mask(mask)
|
||
|
result = bools.mask(mask)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_getitem_boolean_list(self):
|
||
|
df = DataFrame(np.arange(12).reshape(3, 4))
|
||
|
|
||
|
def _checkit(lst):
|
||
|
result = df[lst]
|
||
|
expected = df.loc[df.index[lst]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
_checkit([True, False, True])
|
||
|
_checkit([True, True, True])
|
||
|
_checkit([False, False, False])
|
||
|
|
||
|
def test_getitem_boolean_iadd(self):
|
||
|
arr = randn(5, 5)
|
||
|
|
||
|
df = DataFrame(arr.copy(), columns=['A', 'B', 'C', 'D', 'E'])
|
||
|
|
||
|
df[df < 0] += 1
|
||
|
arr[arr < 0] += 1
|
||
|
|
||
|
assert_almost_equal(df.values, arr)
|
||
|
|
||
|
def test_boolean_index_empty_corner(self):
|
||
|
# #2096
|
||
|
blah = DataFrame(np.empty([0, 1]), columns=['A'],
|
||
|
index=DatetimeIndex([]))
|
||
|
|
||
|
# both of these should succeed trivially
|
||
|
k = np.array([], bool)
|
||
|
|
||
|
blah[k]
|
||
|
blah[k] = 0
|
||
|
|
||
|
def test_getitem_ix_mixed_integer(self):
|
||
|
df = DataFrame(np.random.randn(4, 3),
|
||
|
index=[1, 10, 'C', 'E'], columns=[1, 2, 3])
|
||
|
|
||
|
result = df.iloc[:-1]
|
||
|
expected = df.loc[df.index[:-1]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = df.ix[[1, 10]]
|
||
|
expected = df.ix[Index([1, 10], dtype=object)]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# 11320
|
||
|
df = pd.DataFrame({"rna": (1.5, 2.2, 3.2, 4.5),
|
||
|
-1000: [11, 21, 36, 40],
|
||
|
0: [10, 22, 43, 34],
|
||
|
1000: [0, 10, 20, 30]},
|
||
|
columns=['rna', -1000, 0, 1000])
|
||
|
result = df[[1000]]
|
||
|
expected = df.iloc[:, [3]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
result = df[[-1000]]
|
||
|
expected = df.iloc[:, [1]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_getitem_setitem_ix_negative_integers(self):
|
||
|
with catch_warnings(record=True):
|
||
|
result = self.frame.ix[:, -1]
|
||
|
assert_series_equal(result, self.frame['D'])
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = self.frame.ix[:, [-1]]
|
||
|
assert_frame_equal(result, self.frame[['D']])
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = self.frame.ix[:, [-1, -2]]
|
||
|
assert_frame_equal(result, self.frame[['D', 'C']])
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
self.frame.ix[:, [-1]] = 0
|
||
|
assert (self.frame['D'] == 0).all()
|
||
|
|
||
|
df = DataFrame(np.random.randn(8, 4))
|
||
|
# ix does label-based indexing when having an integer index
|
||
|
with pytest.raises(KeyError):
|
||
|
df.ix[[-1]]
|
||
|
|
||
|
with pytest.raises(KeyError):
|
||
|
df.ix[:, [-1]]
|
||
|
|
||
|
# #1942
|
||
|
a = DataFrame(randn(20, 2), index=[chr(x + 65) for x in range(20)])
|
||
|
with catch_warnings(record=True):
|
||
|
a.ix[-1] = a.ix[-2]
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
assert_series_equal(a.ix[-1], a.ix[-2], check_names=False)
|
||
|
assert a.ix[-1].name == 'T'
|
||
|
assert a.ix[-2].name == 'S'
|
||
|
|
||
|
def test_getattr(self):
|
||
|
assert_series_equal(self.frame.A, self.frame['A'])
|
||
|
pytest.raises(AttributeError, getattr, self.frame,
|
||
|
'NONEXISTENT_NAME')
|
||
|
|
||
|
def test_setattr_column(self):
|
||
|
df = DataFrame({'foobar': 1}, index=lrange(10))
|
||
|
|
||
|
df.foobar = 5
|
||
|
assert (df.foobar == 5).all()
|
||
|
|
||
|
def test_setitem(self):
|
||
|
# not sure what else to do here
|
||
|
series = self.frame['A'][::2]
|
||
|
self.frame['col5'] = series
|
||
|
assert 'col5' in self.frame
|
||
|
|
||
|
assert len(series) == 15
|
||
|
assert len(self.frame) == 30
|
||
|
|
||
|
exp = np.ravel(np.column_stack((series.values, [np.nan] * 15)))
|
||
|
exp = Series(exp, index=self.frame.index, name='col5')
|
||
|
tm.assert_series_equal(self.frame['col5'], exp)
|
||
|
|
||
|
series = self.frame['A']
|
||
|
self.frame['col6'] = series
|
||
|
tm.assert_series_equal(series, self.frame['col6'], check_names=False)
|
||
|
|
||
|
with pytest.raises(KeyError):
|
||
|
self.frame[randn(len(self.frame) + 1)] = 1
|
||
|
|
||
|
# set ndarray
|
||
|
arr = randn(len(self.frame))
|
||
|
self.frame['col9'] = arr
|
||
|
assert (self.frame['col9'] == arr).all()
|
||
|
|
||
|
self.frame['col7'] = 5
|
||
|
assert((self.frame['col7'] == 5).all())
|
||
|
|
||
|
self.frame['col0'] = 3.14
|
||
|
assert((self.frame['col0'] == 3.14).all())
|
||
|
|
||
|
self.frame['col8'] = 'foo'
|
||
|
assert((self.frame['col8'] == 'foo').all())
|
||
|
|
||
|
# this is partially a view (e.g. some blocks are view)
|
||
|
# so raise/warn
|
||
|
smaller = self.frame[:2]
|
||
|
|
||
|
def f():
|
||
|
smaller['col10'] = ['1', '2']
|
||
|
pytest.raises(com.SettingWithCopyError, f)
|
||
|
assert smaller['col10'].dtype == np.object_
|
||
|
assert (smaller['col10'] == ['1', '2']).all()
|
||
|
|
||
|
# with a dtype
|
||
|
for dtype in ['int32', 'int64', 'float32', 'float64']:
|
||
|
self.frame[dtype] = np.array(arr, dtype=dtype)
|
||
|
assert self.frame[dtype].dtype.name == dtype
|
||
|
|
||
|
# dtype changing GH4204
|
||
|
df = DataFrame([[0, 0]])
|
||
|
df.iloc[0] = np.nan
|
||
|
expected = DataFrame([[np.nan, np.nan]])
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
df = DataFrame([[0, 0]])
|
||
|
df.loc[0] = np.nan
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_setitem_tuple(self):
|
||
|
self.frame['A', 'B'] = self.frame['A']
|
||
|
assert_series_equal(self.frame['A', 'B'], self.frame[
|
||
|
'A'], check_names=False)
|
||
|
|
||
|
def test_setitem_always_copy(self):
|
||
|
s = self.frame['A'].copy()
|
||
|
self.frame['E'] = s
|
||
|
|
||
|
self.frame['E'][5:10] = nan
|
||
|
assert notna(s[5:10]).all()
|
||
|
|
||
|
def test_setitem_boolean(self):
|
||
|
df = self.frame.copy()
|
||
|
values = self.frame.values
|
||
|
|
||
|
df[df['A'] > 0] = 4
|
||
|
values[values[:, 0] > 0] = 4
|
||
|
assert_almost_equal(df.values, values)
|
||
|
|
||
|
# test that column reindexing works
|
||
|
series = df['A'] == 4
|
||
|
series = series.reindex(df.index[::-1])
|
||
|
df[series] = 1
|
||
|
values[values[:, 0] == 4] = 1
|
||
|
assert_almost_equal(df.values, values)
|
||
|
|
||
|
df[df > 0] = 5
|
||
|
values[values > 0] = 5
|
||
|
assert_almost_equal(df.values, values)
|
||
|
|
||
|
df[df == 5] = 0
|
||
|
values[values == 5] = 0
|
||
|
assert_almost_equal(df.values, values)
|
||
|
|
||
|
# a df that needs alignment first
|
||
|
df[df[:-1] < 0] = 2
|
||
|
np.putmask(values[:-1], values[:-1] < 0, 2)
|
||
|
assert_almost_equal(df.values, values)
|
||
|
|
||
|
# indexed with same shape but rows-reversed df
|
||
|
df[df[::-1] == 2] = 3
|
||
|
values[values == 2] = 3
|
||
|
assert_almost_equal(df.values, values)
|
||
|
|
||
|
msg = "Must pass DataFrame or 2-d ndarray with boolean values only"
|
||
|
with tm.assert_raises_regex(TypeError, msg):
|
||
|
df[df * 0] = 2
|
||
|
|
||
|
# index with DataFrame
|
||
|
mask = df > np.abs(df)
|
||
|
expected = df.copy()
|
||
|
df[df > np.abs(df)] = nan
|
||
|
expected.values[mask.values] = nan
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
# set from DataFrame
|
||
|
expected = df.copy()
|
||
|
df[df > np.abs(df)] = df * 2
|
||
|
np.putmask(expected.values, mask.values, df.values * 2)
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"mask_type",
|
||
|
[lambda df: df > np.abs(df) / 2,
|
||
|
lambda df: (df > np.abs(df) / 2).values],
|
||
|
ids=['dataframe', 'array'])
|
||
|
def test_setitem_boolean_mask(self, mask_type):
|
||
|
|
||
|
# Test for issue #18582
|
||
|
df = self.frame.copy()
|
||
|
mask = mask_type(df)
|
||
|
|
||
|
# index with boolean mask
|
||
|
result = df.copy()
|
||
|
result[mask] = np.nan
|
||
|
|
||
|
expected = df.copy()
|
||
|
expected.values[np.array(mask)] = np.nan
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_setitem_cast(self):
|
||
|
self.frame['D'] = self.frame['D'].astype('i8')
|
||
|
assert self.frame['D'].dtype == np.int64
|
||
|
|
||
|
# #669, should not cast?
|
||
|
# this is now set to int64, which means a replacement of the column to
|
||
|
# the value dtype (and nothing to do with the existing dtype)
|
||
|
self.frame['B'] = 0
|
||
|
assert self.frame['B'].dtype == np.int64
|
||
|
|
||
|
# cast if pass array of course
|
||
|
self.frame['B'] = np.arange(len(self.frame))
|
||
|
assert issubclass(self.frame['B'].dtype.type, np.integer)
|
||
|
|
||
|
self.frame['foo'] = 'bar'
|
||
|
self.frame['foo'] = 0
|
||
|
assert self.frame['foo'].dtype == np.int64
|
||
|
|
||
|
self.frame['foo'] = 'bar'
|
||
|
self.frame['foo'] = 2.5
|
||
|
assert self.frame['foo'].dtype == np.float64
|
||
|
|
||
|
self.frame['something'] = 0
|
||
|
assert self.frame['something'].dtype == np.int64
|
||
|
self.frame['something'] = 2
|
||
|
assert self.frame['something'].dtype == np.int64
|
||
|
self.frame['something'] = 2.5
|
||
|
assert self.frame['something'].dtype == np.float64
|
||
|
|
||
|
# GH 7704
|
||
|
# dtype conversion on setting
|
||
|
df = DataFrame(np.random.rand(30, 3), columns=tuple('ABC'))
|
||
|
df['event'] = np.nan
|
||
|
df.loc[10, 'event'] = 'foo'
|
||
|
result = df.get_dtype_counts().sort_values()
|
||
|
expected = Series({'float64': 3, 'object': 1}).sort_values()
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# Test that data type is preserved . #5782
|
||
|
df = DataFrame({'one': np.arange(6, dtype=np.int8)})
|
||
|
df.loc[1, 'one'] = 6
|
||
|
assert df.dtypes.one == np.dtype(np.int8)
|
||
|
df.one = np.int8(7)
|
||
|
assert df.dtypes.one == np.dtype(np.int8)
|
||
|
|
||
|
def test_setitem_boolean_column(self):
|
||
|
expected = self.frame.copy()
|
||
|
mask = self.frame['A'] > 0
|
||
|
|
||
|
self.frame.loc[mask, 'B'] = 0
|
||
|
expected.values[mask.values, 1] = 0
|
||
|
|
||
|
assert_frame_equal(self.frame, expected)
|
||
|
|
||
|
def test_frame_setitem_timestamp(self):
|
||
|
# GH#2155
|
||
|
columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', freq=BDay())
|
||
|
index = lrange(10)
|
||
|
data = DataFrame(columns=columns, index=index)
|
||
|
t = datetime(2012, 11, 1)
|
||
|
ts = Timestamp(t)
|
||
|
data[ts] = np.nan # works, mostly a smoke-test
|
||
|
assert np.isnan(data[ts]).all()
|
||
|
|
||
|
def test_setitem_corner(self):
|
||
|
# corner case
|
||
|
df = DataFrame({'B': [1., 2., 3.],
|
||
|
'C': ['a', 'b', 'c']},
|
||
|
index=np.arange(3))
|
||
|
del df['B']
|
||
|
df['B'] = [1., 2., 3.]
|
||
|
assert 'B' in df
|
||
|
assert len(df.columns) == 2
|
||
|
|
||
|
df['A'] = 'beginning'
|
||
|
df['E'] = 'foo'
|
||
|
df['D'] = 'bar'
|
||
|
df[datetime.now()] = 'date'
|
||
|
df[datetime.now()] = 5.
|
||
|
|
||
|
# what to do when empty frame with index
|
||
|
dm = DataFrame(index=self.frame.index)
|
||
|
dm['A'] = 'foo'
|
||
|
dm['B'] = 'bar'
|
||
|
assert len(dm.columns) == 2
|
||
|
assert dm.values.dtype == np.object_
|
||
|
|
||
|
# upcast
|
||
|
dm['C'] = 1
|
||
|
assert dm['C'].dtype == np.int64
|
||
|
|
||
|
dm['E'] = 1.
|
||
|
assert dm['E'].dtype == np.float64
|
||
|
|
||
|
# set existing column
|
||
|
dm['A'] = 'bar'
|
||
|
assert 'bar' == dm['A'][0]
|
||
|
|
||
|
dm = DataFrame(index=np.arange(3))
|
||
|
dm['A'] = 1
|
||
|
dm['foo'] = 'bar'
|
||
|
del dm['foo']
|
||
|
dm['foo'] = 'bar'
|
||
|
assert dm['foo'].dtype == np.object_
|
||
|
|
||
|
dm['coercable'] = ['1', '2', '3']
|
||
|
assert dm['coercable'].dtype == np.object_
|
||
|
|
||
|
def test_setitem_corner2(self):
|
||
|
data = {"title": ['foobar', 'bar', 'foobar'] + ['foobar'] * 17,
|
||
|
"cruft": np.random.random(20)}
|
||
|
|
||
|
df = DataFrame(data)
|
||
|
ix = df[df['title'] == 'bar'].index
|
||
|
|
||
|
df.loc[ix, ['title']] = 'foobar'
|
||
|
df.loc[ix, ['cruft']] = 0
|
||
|
|
||
|
assert df.loc[1, 'title'] == 'foobar'
|
||
|
assert df.loc[1, 'cruft'] == 0
|
||
|
|
||
|
def test_setitem_ambig(self):
|
||
|
# Difficulties with mixed-type data
|
||
|
from decimal import Decimal
|
||
|
|
||
|
# Created as float type
|
||
|
dm = DataFrame(index=lrange(3), columns=lrange(3))
|
||
|
|
||
|
coercable_series = Series([Decimal(1) for _ in range(3)],
|
||
|
index=lrange(3))
|
||
|
uncoercable_series = Series(['foo', 'bzr', 'baz'], index=lrange(3))
|
||
|
|
||
|
dm[0] = np.ones(3)
|
||
|
assert len(dm.columns) == 3
|
||
|
|
||
|
dm[1] = coercable_series
|
||
|
assert len(dm.columns) == 3
|
||
|
|
||
|
dm[2] = uncoercable_series
|
||
|
assert len(dm.columns) == 3
|
||
|
assert dm[2].dtype == np.object_
|
||
|
|
||
|
def test_setitem_clear_caches(self):
|
||
|
# see gh-304
|
||
|
df = DataFrame({'x': [1.1, 2.1, 3.1, 4.1], 'y': [5.1, 6.1, 7.1, 8.1]},
|
||
|
index=[0, 1, 2, 3])
|
||
|
df.insert(2, 'z', np.nan)
|
||
|
|
||
|
# cache it
|
||
|
foo = df['z']
|
||
|
df.loc[df.index[2:], 'z'] = 42
|
||
|
|
||
|
expected = Series([np.nan, np.nan, 42, 42], index=df.index, name='z')
|
||
|
|
||
|
assert df['z'] is not foo
|
||
|
tm.assert_series_equal(df['z'], expected)
|
||
|
|
||
|
def test_setitem_None(self):
|
||
|
# GH #766
|
||
|
self.frame[None] = self.frame['A']
|
||
|
assert_series_equal(
|
||
|
self.frame.iloc[:, -1], self.frame['A'], check_names=False)
|
||
|
assert_series_equal(self.frame.loc[:, None], self.frame[
|
||
|
'A'], check_names=False)
|
||
|
assert_series_equal(self.frame[None], self.frame[
|
||
|
'A'], check_names=False)
|
||
|
repr(self.frame)
|
||
|
|
||
|
def test_setitem_empty(self):
|
||
|
# GH 9596
|
||
|
df = pd.DataFrame({'a': ['1', '2', '3'],
|
||
|
'b': ['11', '22', '33'],
|
||
|
'c': ['111', '222', '333']})
|
||
|
|
||
|
result = df.copy()
|
||
|
result.loc[result.b.isna(), 'a'] = result.a
|
||
|
assert_frame_equal(result, df)
|
||
|
|
||
|
def test_setitem_empty_frame_with_boolean(self):
|
||
|
# Test for issue #10126
|
||
|
|
||
|
for dtype in ('float', 'int64'):
|
||
|
for df in [
|
||
|
pd.DataFrame(dtype=dtype),
|
||
|
pd.DataFrame(dtype=dtype, index=[1]),
|
||
|
pd.DataFrame(dtype=dtype, columns=['A']),
|
||
|
]:
|
||
|
df2 = df.copy()
|
||
|
df[df > df2] = 47
|
||
|
assert_frame_equal(df, df2)
|
||
|
|
||
|
def test_setitem_scalars_no_index(self):
|
||
|
# GH16823 / 17894
|
||
|
df = DataFrame()
|
||
|
df['foo'] = 1
|
||
|
expected = DataFrame(columns=['foo']).astype(np.int64)
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_getitem_empty_frame_with_boolean(self):
|
||
|
# Test for issue #11859
|
||
|
|
||
|
df = pd.DataFrame()
|
||
|
df2 = df[df > 0]
|
||
|
assert_frame_equal(df, df2)
|
||
|
|
||
|
def test_delitem_corner(self):
|
||
|
f = self.frame.copy()
|
||
|
del f['D']
|
||
|
assert len(f.columns) == 3
|
||
|
pytest.raises(KeyError, f.__delitem__, 'D')
|
||
|
del f['B']
|
||
|
assert len(f.columns) == 2
|
||
|
|
||
|
def test_getitem_fancy_2d(self):
|
||
|
f = self.frame
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
assert_frame_equal(f.ix[:, ['B', 'A']],
|
||
|
f.reindex(columns=['B', 'A']))
|
||
|
|
||
|
subidx = self.frame.index[[5, 4, 1]]
|
||
|
with catch_warnings(record=True):
|
||
|
assert_frame_equal(f.ix[subidx, ['B', 'A']],
|
||
|
f.reindex(index=subidx, columns=['B', 'A']))
|
||
|
|
||
|
# slicing rows, etc.
|
||
|
with catch_warnings(record=True):
|
||
|
assert_frame_equal(f.ix[5:10], f[5:10])
|
||
|
assert_frame_equal(f.ix[5:10, :], f[5:10])
|
||
|
assert_frame_equal(f.ix[:5, ['A', 'B']],
|
||
|
f.reindex(index=f.index[:5],
|
||
|
columns=['A', 'B']))
|
||
|
|
||
|
# slice rows with labels, inclusive!
|
||
|
with catch_warnings(record=True):
|
||
|
expected = f.ix[5:11]
|
||
|
result = f.ix[f.index[5]:f.index[10]]
|
||
|
assert_frame_equal(expected, result)
|
||
|
|
||
|
# slice columns
|
||
|
with catch_warnings(record=True):
|
||
|
assert_frame_equal(f.ix[:, :2], f.reindex(columns=['A', 'B']))
|
||
|
|
||
|
# get view
|
||
|
with catch_warnings(record=True):
|
||
|
exp = f.copy()
|
||
|
f.ix[5:10].values[:] = 5
|
||
|
exp.values[5:10] = 5
|
||
|
assert_frame_equal(f, exp)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
pytest.raises(ValueError, f.ix.__getitem__, f > 0.5)
|
||
|
|
||
|
def test_slice_floats(self):
|
||
|
index = [52195.504153, 52196.303147, 52198.369883]
|
||
|
df = DataFrame(np.random.rand(3, 2), index=index)
|
||
|
|
||
|
s1 = df.loc[52195.1:52196.5]
|
||
|
assert len(s1) == 2
|
||
|
|
||
|
s1 = df.loc[52195.1:52196.6]
|
||
|
assert len(s1) == 2
|
||
|
|
||
|
s1 = df.loc[52195.1:52198.9]
|
||
|
assert len(s1) == 3
|
||
|
|
||
|
def test_getitem_fancy_slice_integers_step(self):
|
||
|
df = DataFrame(np.random.randn(10, 5))
|
||
|
|
||
|
# this is OK
|
||
|
result = df.iloc[:8:2] # noqa
|
||
|
df.iloc[:8:2] = np.nan
|
||
|
assert isna(df.iloc[:8:2]).values.all()
|
||
|
|
||
|
def test_getitem_setitem_integer_slice_keyerrors(self):
|
||
|
df = DataFrame(np.random.randn(10, 5), index=lrange(0, 20, 2))
|
||
|
|
||
|
# this is OK
|
||
|
cp = df.copy()
|
||
|
cp.iloc[4:10] = 0
|
||
|
assert (cp.iloc[4:10] == 0).values.all()
|
||
|
|
||
|
# so is this
|
||
|
cp = df.copy()
|
||
|
cp.iloc[3:11] = 0
|
||
|
assert (cp.iloc[3:11] == 0).values.all()
|
||
|
|
||
|
result = df.iloc[2:6]
|
||
|
result2 = df.loc[3:11]
|
||
|
expected = df.reindex([4, 6, 8, 10])
|
||
|
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert_frame_equal(result2, expected)
|
||
|
|
||
|
# non-monotonic, raise KeyError
|
||
|
df2 = df.iloc[lrange(5) + lrange(5, 10)[::-1]]
|
||
|
pytest.raises(KeyError, df2.loc.__getitem__, slice(3, 11))
|
||
|
pytest.raises(KeyError, df2.loc.__setitem__, slice(3, 11), 0)
|
||
|
|
||
|
def test_setitem_fancy_2d(self):
|
||
|
|
||
|
# case 1
|
||
|
frame = self.frame.copy()
|
||
|
expected = frame.copy()
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
frame.ix[:, ['B', 'A']] = 1
|
||
|
expected['B'] = 1.
|
||
|
expected['A'] = 1.
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
# case 2
|
||
|
frame = self.frame.copy()
|
||
|
frame2 = self.frame.copy()
|
||
|
|
||
|
expected = frame.copy()
|
||
|
|
||
|
subidx = self.frame.index[[5, 4, 1]]
|
||
|
values = randn(3, 2)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
frame.ix[subidx, ['B', 'A']] = values
|
||
|
frame2.ix[[5, 4, 1], ['B', 'A']] = values
|
||
|
|
||
|
expected['B'].ix[subidx] = values[:, 0]
|
||
|
expected['A'].ix[subidx] = values[:, 1]
|
||
|
|
||
|
assert_frame_equal(frame, expected)
|
||
|
assert_frame_equal(frame2, expected)
|
||
|
|
||
|
# case 3: slicing rows, etc.
|
||
|
frame = self.frame.copy()
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
expected1 = self.frame.copy()
|
||
|
frame.ix[5:10] = 1.
|
||
|
expected1.values[5:10] = 1.
|
||
|
assert_frame_equal(frame, expected1)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
expected2 = self.frame.copy()
|
||
|
arr = randn(5, len(frame.columns))
|
||
|
frame.ix[5:10] = arr
|
||
|
expected2.values[5:10] = arr
|
||
|
assert_frame_equal(frame, expected2)
|
||
|
|
||
|
# case 4
|
||
|
with catch_warnings(record=True):
|
||
|
frame = self.frame.copy()
|
||
|
frame.ix[5:10, :] = 1.
|
||
|
assert_frame_equal(frame, expected1)
|
||
|
frame.ix[5:10, :] = arr
|
||
|
assert_frame_equal(frame, expected2)
|
||
|
|
||
|
# case 5
|
||
|
with catch_warnings(record=True):
|
||
|
frame = self.frame.copy()
|
||
|
frame2 = self.frame.copy()
|
||
|
|
||
|
expected = self.frame.copy()
|
||
|
values = randn(5, 2)
|
||
|
|
||
|
frame.ix[:5, ['A', 'B']] = values
|
||
|
expected['A'][:5] = values[:, 0]
|
||
|
expected['B'][:5] = values[:, 1]
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
frame2.ix[:5, [0, 1]] = values
|
||
|
assert_frame_equal(frame2, expected)
|
||
|
|
||
|
# case 6: slice rows with labels, inclusive!
|
||
|
with catch_warnings(record=True):
|
||
|
frame = self.frame.copy()
|
||
|
expected = self.frame.copy()
|
||
|
|
||
|
frame.ix[frame.index[5]:frame.index[10]] = 5.
|
||
|
expected.values[5:11] = 5
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
# case 7: slice columns
|
||
|
with catch_warnings(record=True):
|
||
|
frame = self.frame.copy()
|
||
|
frame2 = self.frame.copy()
|
||
|
expected = self.frame.copy()
|
||
|
|
||
|
# slice indices
|
||
|
frame.ix[:, 1:3] = 4.
|
||
|
expected.values[:, 1:3] = 4.
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
# slice with labels
|
||
|
frame.ix[:, 'B':'C'] = 4.
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
# new corner case of boolean slicing / setting
|
||
|
frame = DataFrame(lzip([2, 3, 9, 6, 7], [np.nan] * 5),
|
||
|
columns=['a', 'b'])
|
||
|
lst = [100]
|
||
|
lst.extend([np.nan] * 4)
|
||
|
expected = DataFrame(lzip([100, 3, 9, 6, 7], lst),
|
||
|
columns=['a', 'b'])
|
||
|
frame[frame['a'] == 2] = 100
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
def test_fancy_getitem_slice_mixed(self):
|
||
|
sliced = self.mixed_frame.iloc[:, -3:]
|
||
|
assert sliced['D'].dtype == np.float64
|
||
|
|
||
|
# get view with single block
|
||
|
# setting it triggers setting with copy
|
||
|
sliced = self.frame.iloc[:, -3:]
|
||
|
|
||
|
def f():
|
||
|
sliced['C'] = 4.
|
||
|
pytest.raises(com.SettingWithCopyError, f)
|
||
|
assert (self.frame['C'] == 4).all()
|
||
|
|
||
|
def test_fancy_setitem_int_labels(self):
|
||
|
# integer index defers to label-based indexing
|
||
|
|
||
|
df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
tmp = df.copy()
|
||
|
exp = df.copy()
|
||
|
tmp.ix[[0, 2, 4]] = 5
|
||
|
exp.values[:3] = 5
|
||
|
assert_frame_equal(tmp, exp)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
tmp = df.copy()
|
||
|
exp = df.copy()
|
||
|
tmp.ix[6] = 5
|
||
|
exp.values[3] = 5
|
||
|
assert_frame_equal(tmp, exp)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
tmp = df.copy()
|
||
|
exp = df.copy()
|
||
|
tmp.ix[:, 2] = 5
|
||
|
|
||
|
# tmp correctly sets the dtype
|
||
|
# so match the exp way
|
||
|
exp[2] = 5
|
||
|
assert_frame_equal(tmp, exp)
|
||
|
|
||
|
def test_fancy_getitem_int_labels(self):
|
||
|
df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = df.ix[[4, 2, 0], [2, 0]]
|
||
|
expected = df.reindex(index=[4, 2, 0], columns=[2, 0])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = df.ix[[4, 2, 0]]
|
||
|
expected = df.reindex(index=[4, 2, 0])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = df.ix[4]
|
||
|
expected = df.xs(4)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = df.ix[:, 3]
|
||
|
expected = df[3]
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_fancy_index_int_labels_exceptions(self):
|
||
|
df = DataFrame(np.random.randn(10, 5), index=np.arange(0, 20, 2))
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
|
||
|
# labels that aren't contained
|
||
|
pytest.raises(KeyError, df.ix.__setitem__,
|
||
|
([0, 1, 2], [2, 3, 4]), 5)
|
||
|
|
||
|
# try to set indices not contained in frame
|
||
|
pytest.raises(KeyError, self.frame.ix.__setitem__,
|
||
|
['foo', 'bar', 'baz'], 1)
|
||
|
pytest.raises(KeyError, self.frame.ix.__setitem__,
|
||
|
(slice(None, None), ['E']), 1)
|
||
|
|
||
|
# partial setting now allows this GH2578
|
||
|
# pytest.raises(KeyError, self.frame.ix.__setitem__,
|
||
|
# (slice(None, None), 'E'), 1)
|
||
|
|
||
|
def test_setitem_fancy_mixed_2d(self):
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
self.mixed_frame.ix[:5, ['C', 'B', 'A']] = 5
|
||
|
result = self.mixed_frame.ix[:5, ['C', 'B', 'A']]
|
||
|
assert (result.values == 5).all()
|
||
|
|
||
|
self.mixed_frame.ix[5] = np.nan
|
||
|
assert isna(self.mixed_frame.ix[5]).all()
|
||
|
|
||
|
self.mixed_frame.ix[5] = self.mixed_frame.ix[6]
|
||
|
assert_series_equal(self.mixed_frame.ix[5], self.mixed_frame.ix[6],
|
||
|
check_names=False)
|
||
|
|
||
|
# #1432
|
||
|
with catch_warnings(record=True):
|
||
|
df = DataFrame({1: [1., 2., 3.],
|
||
|
2: [3, 4, 5]})
|
||
|
assert df._is_mixed_type
|
||
|
|
||
|
df.ix[1] = [5, 10]
|
||
|
|
||
|
expected = DataFrame({1: [1., 5., 3.],
|
||
|
2: [3, 10, 5]})
|
||
|
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_ix_align(self):
|
||
|
b = Series(randn(10), name=0).sort_values()
|
||
|
df_orig = DataFrame(randn(10, 4))
|
||
|
df = df_orig.copy()
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
df.ix[:, 0] = b
|
||
|
assert_series_equal(df.ix[:, 0].reindex(b.index), b)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
dft = df_orig.T
|
||
|
dft.ix[0, :] = b
|
||
|
assert_series_equal(dft.ix[0, :].reindex(b.index), b)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
df = df_orig.copy()
|
||
|
df.ix[:5, 0] = b
|
||
|
s = df.ix[:5, 0]
|
||
|
assert_series_equal(s, b.reindex(s.index))
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
dft = df_orig.T
|
||
|
dft.ix[0, :5] = b
|
||
|
s = dft.ix[0, :5]
|
||
|
assert_series_equal(s, b.reindex(s.index))
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
df = df_orig.copy()
|
||
|
idx = [0, 1, 3, 5]
|
||
|
df.ix[idx, 0] = b
|
||
|
s = df.ix[idx, 0]
|
||
|
assert_series_equal(s, b.reindex(s.index))
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
dft = df_orig.T
|
||
|
dft.ix[0, idx] = b
|
||
|
s = dft.ix[0, idx]
|
||
|
assert_series_equal(s, b.reindex(s.index))
|
||
|
|
||
|
def test_ix_frame_align(self):
|
||
|
b = DataFrame(np.random.randn(3, 4))
|
||
|
df_orig = DataFrame(randn(10, 4))
|
||
|
df = df_orig.copy()
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
df.ix[:3] = b
|
||
|
out = b.ix[:3]
|
||
|
assert_frame_equal(out, b)
|
||
|
|
||
|
b.sort_index(inplace=True)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
df = df_orig.copy()
|
||
|
df.ix[[0, 1, 2]] = b
|
||
|
out = df.ix[[0, 1, 2]].reindex(b.index)
|
||
|
assert_frame_equal(out, b)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
df = df_orig.copy()
|
||
|
df.ix[:3] = b
|
||
|
out = df.ix[:3]
|
||
|
assert_frame_equal(out, b.reindex(out.index))
|
||
|
|
||
|
def test_getitem_setitem_non_ix_labels(self):
|
||
|
df = tm.makeTimeDataFrame()
|
||
|
|
||
|
start, end = df.index[[5, 10]]
|
||
|
|
||
|
result = df.loc[start:end]
|
||
|
result2 = df[start:end]
|
||
|
expected = df[5:11]
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert_frame_equal(result2, expected)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.loc[start:end] = 0
|
||
|
result2 = df.copy()
|
||
|
result2[start:end] = 0
|
||
|
expected = df.copy()
|
||
|
expected[5:11] = 0
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert_frame_equal(result2, expected)
|
||
|
|
||
|
def test_ix_multi_take(self):
|
||
|
df = DataFrame(np.random.randn(3, 2))
|
||
|
rs = df.loc[df.index == 0, :]
|
||
|
xp = df.reindex([0])
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
""" #1321
|
||
|
df = DataFrame(np.random.randn(3, 2))
|
||
|
rs = df.loc[df.index==0, df.columns==1]
|
||
|
xp = df.reindex([0], [1])
|
||
|
assert_frame_equal(rs, xp)
|
||
|
"""
|
||
|
|
||
|
def test_ix_multi_take_nonint_index(self):
|
||
|
df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'],
|
||
|
columns=['a', 'b'])
|
||
|
with catch_warnings(record=True):
|
||
|
rs = df.ix[[0], [0]]
|
||
|
xp = df.reindex(['x'], columns=['a'])
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
def test_ix_multi_take_multiindex(self):
|
||
|
df = DataFrame(np.random.randn(3, 2), index=['x', 'y', 'z'],
|
||
|
columns=[['a', 'b'], ['1', '2']])
|
||
|
with catch_warnings(record=True):
|
||
|
rs = df.ix[[0], [0]]
|
||
|
xp = df.reindex(['x'], columns=[('a', '1')])
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
def test_ix_dup(self):
|
||
|
idx = Index(['a', 'a', 'b', 'c', 'd', 'd'])
|
||
|
df = DataFrame(np.random.randn(len(idx), 3), idx)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
sub = df.ix[:'d']
|
||
|
assert_frame_equal(sub, df)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
sub = df.ix['a':'c']
|
||
|
assert_frame_equal(sub, df.ix[0:4])
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
sub = df.ix['b':'d']
|
||
|
assert_frame_equal(sub, df.ix[2:])
|
||
|
|
||
|
def test_getitem_fancy_1d(self):
|
||
|
f = self.frame
|
||
|
|
||
|
# return self if no slicing...for now
|
||
|
with catch_warnings(record=True):
|
||
|
assert f.ix[:, :] is f
|
||
|
|
||
|
# low dimensional slice
|
||
|
with catch_warnings(record=True):
|
||
|
xs1 = f.ix[2, ['C', 'B', 'A']]
|
||
|
xs2 = f.xs(f.index[2]).reindex(['C', 'B', 'A'])
|
||
|
tm.assert_series_equal(xs1, xs2)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
ts1 = f.ix[5:10, 2]
|
||
|
ts2 = f[f.columns[2]][5:10]
|
||
|
tm.assert_series_equal(ts1, ts2)
|
||
|
|
||
|
# positional xs
|
||
|
with catch_warnings(record=True):
|
||
|
xs1 = f.ix[0]
|
||
|
xs2 = f.xs(f.index[0])
|
||
|
tm.assert_series_equal(xs1, xs2)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
xs1 = f.ix[f.index[5]]
|
||
|
xs2 = f.xs(f.index[5])
|
||
|
tm.assert_series_equal(xs1, xs2)
|
||
|
|
||
|
# single column
|
||
|
with catch_warnings(record=True):
|
||
|
assert_series_equal(f.ix[:, 'A'], f['A'])
|
||
|
|
||
|
# return view
|
||
|
with catch_warnings(record=True):
|
||
|
exp = f.copy()
|
||
|
exp.values[5] = 4
|
||
|
f.ix[5][:] = 4
|
||
|
tm.assert_frame_equal(exp, f)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
exp.values[:, 1] = 6
|
||
|
f.ix[:, 1][:] = 6
|
||
|
tm.assert_frame_equal(exp, f)
|
||
|
|
||
|
# slice of mixed-frame
|
||
|
with catch_warnings(record=True):
|
||
|
xs = self.mixed_frame.ix[5]
|
||
|
exp = self.mixed_frame.xs(self.mixed_frame.index[5])
|
||
|
tm.assert_series_equal(xs, exp)
|
||
|
|
||
|
def test_setitem_fancy_1d(self):
|
||
|
|
||
|
# case 1: set cross-section for indices
|
||
|
frame = self.frame.copy()
|
||
|
expected = self.frame.copy()
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
frame.ix[2, ['C', 'B', 'A']] = [1., 2., 3.]
|
||
|
expected['C'][2] = 1.
|
||
|
expected['B'][2] = 2.
|
||
|
expected['A'][2] = 3.
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
frame2 = self.frame.copy()
|
||
|
frame2.ix[2, [3, 2, 1]] = [1., 2., 3.]
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
# case 2, set a section of a column
|
||
|
frame = self.frame.copy()
|
||
|
expected = self.frame.copy()
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
vals = randn(5)
|
||
|
expected.values[5:10, 2] = vals
|
||
|
frame.ix[5:10, 2] = vals
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
frame2 = self.frame.copy()
|
||
|
frame2.ix[5:10, 'B'] = vals
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
# case 3: full xs
|
||
|
frame = self.frame.copy()
|
||
|
expected = self.frame.copy()
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
frame.ix[4] = 5.
|
||
|
expected.values[4] = 5.
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
frame.ix[frame.index[4]] = 6.
|
||
|
expected.values[4] = 6.
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
# single column
|
||
|
frame = self.frame.copy()
|
||
|
expected = self.frame.copy()
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
frame.ix[:, 'A'] = 7.
|
||
|
expected['A'] = 7.
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
def test_getitem_fancy_scalar(self):
|
||
|
f = self.frame
|
||
|
ix = f.loc
|
||
|
|
||
|
# individual value
|
||
|
for col in f.columns:
|
||
|
ts = f[col]
|
||
|
for idx in f.index[::5]:
|
||
|
assert ix[idx, col] == ts[idx]
|
||
|
|
||
|
def test_setitem_fancy_scalar(self):
|
||
|
f = self.frame
|
||
|
expected = self.frame.copy()
|
||
|
ix = f.loc
|
||
|
|
||
|
# individual value
|
||
|
for j, col in enumerate(f.columns):
|
||
|
ts = f[col] # noqa
|
||
|
for idx in f.index[::5]:
|
||
|
i = f.index.get_loc(idx)
|
||
|
val = randn()
|
||
|
expected.values[i, j] = val
|
||
|
|
||
|
ix[idx, col] = val
|
||
|
assert_frame_equal(f, expected)
|
||
|
|
||
|
def test_getitem_fancy_boolean(self):
|
||
|
f = self.frame
|
||
|
ix = f.loc
|
||
|
|
||
|
expected = f.reindex(columns=['B', 'D'])
|
||
|
result = ix[:, [False, True, False, True]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
expected = f.reindex(index=f.index[5:10], columns=['B', 'D'])
|
||
|
result = ix[f.index[5:10], [False, True, False, True]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
boolvec = f.index > f.index[7]
|
||
|
expected = f.reindex(index=f.index[boolvec])
|
||
|
result = ix[boolvec]
|
||
|
assert_frame_equal(result, expected)
|
||
|
result = ix[boolvec, :]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = ix[boolvec, f.columns[2:]]
|
||
|
expected = f.reindex(index=f.index[boolvec],
|
||
|
columns=['C', 'D'])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_setitem_fancy_boolean(self):
|
||
|
# from 2d, set with booleans
|
||
|
frame = self.frame.copy()
|
||
|
expected = self.frame.copy()
|
||
|
|
||
|
mask = frame['A'] > 0
|
||
|
frame.loc[mask] = 0.
|
||
|
expected.values[mask.values] = 0.
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
frame = self.frame.copy()
|
||
|
expected = self.frame.copy()
|
||
|
frame.loc[mask, ['A', 'B']] = 0.
|
||
|
expected.values[mask.values, :2] = 0.
|
||
|
assert_frame_equal(frame, expected)
|
||
|
|
||
|
def test_getitem_fancy_ints(self):
|
||
|
result = self.frame.iloc[[1, 4, 7]]
|
||
|
expected = self.frame.loc[self.frame.index[[1, 4, 7]]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = self.frame.iloc[:, [2, 0, 1]]
|
||
|
expected = self.frame.loc[:, self.frame.columns[[2, 0, 1]]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_getitem_setitem_fancy_exceptions(self):
|
||
|
ix = self.frame.iloc
|
||
|
with tm.assert_raises_regex(IndexingError, 'Too many indexers'):
|
||
|
ix[:, :, :]
|
||
|
|
||
|
with pytest.raises(IndexingError):
|
||
|
ix[:, :, :] = 1
|
||
|
|
||
|
def test_getitem_setitem_boolean_misaligned(self):
|
||
|
# boolean index misaligned labels
|
||
|
mask = self.frame['A'][::-1] > 1
|
||
|
|
||
|
result = self.frame.loc[mask]
|
||
|
expected = self.frame.loc[mask[::-1]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
cp = self.frame.copy()
|
||
|
expected = self.frame.copy()
|
||
|
cp.loc[mask] = 0
|
||
|
expected.loc[mask] = 0
|
||
|
assert_frame_equal(cp, expected)
|
||
|
|
||
|
def test_getitem_setitem_boolean_multi(self):
|
||
|
df = DataFrame(np.random.randn(3, 2))
|
||
|
|
||
|
# get
|
||
|
k1 = np.array([True, False, True])
|
||
|
k2 = np.array([False, True])
|
||
|
result = df.loc[k1, k2]
|
||
|
expected = df.loc[[0, 2], [1]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
expected = df.copy()
|
||
|
df.loc[np.array([True, False, True]),
|
||
|
np.array([False, True])] = 5
|
||
|
expected.loc[[0, 2], [1]] = 5
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_getitem_setitem_float_labels(self):
|
||
|
index = Index([1.5, 2, 3, 4, 5])
|
||
|
df = DataFrame(np.random.randn(5, 5), index=index)
|
||
|
|
||
|
result = df.loc[1.5:4]
|
||
|
expected = df.reindex([1.5, 2, 3, 4])
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert len(result) == 4
|
||
|
|
||
|
result = df.loc[4:5]
|
||
|
expected = df.reindex([4, 5]) # reindex with int
|
||
|
assert_frame_equal(result, expected, check_index_type=False)
|
||
|
assert len(result) == 2
|
||
|
|
||
|
result = df.loc[4:5]
|
||
|
expected = df.reindex([4.0, 5.0]) # reindex with float
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert len(result) == 2
|
||
|
|
||
|
# loc_float changes this to work properly
|
||
|
result = df.loc[1:2]
|
||
|
expected = df.iloc[0:2]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
df.loc[1:2] = 0
|
||
|
result = df[1:2]
|
||
|
assert (result == 0).all().all()
|
||
|
|
||
|
# #2727
|
||
|
index = Index([1.0, 2.5, 3.5, 4.5, 5.0])
|
||
|
df = DataFrame(np.random.randn(5, 5), index=index)
|
||
|
|
||
|
# positional slicing only via iloc!
|
||
|
pytest.raises(TypeError, lambda: df.iloc[1.0:5])
|
||
|
|
||
|
result = df.iloc[4:5]
|
||
|
expected = df.reindex([5.0])
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert len(result) == 1
|
||
|
|
||
|
cp = df.copy()
|
||
|
|
||
|
def f():
|
||
|
cp.iloc[1.0:5] = 0
|
||
|
pytest.raises(TypeError, f)
|
||
|
|
||
|
def f():
|
||
|
result = cp.iloc[1.0:5] == 0 # noqa
|
||
|
|
||
|
pytest.raises(TypeError, f)
|
||
|
assert result.values.all()
|
||
|
assert (cp.iloc[0:1] == df.iloc[0:1]).values.all()
|
||
|
|
||
|
cp = df.copy()
|
||
|
cp.iloc[4:5] = 0
|
||
|
assert (cp.iloc[4:5] == 0).values.all()
|
||
|
assert (cp.iloc[0:4] == df.iloc[0:4]).values.all()
|
||
|
|
||
|
# float slicing
|
||
|
result = df.loc[1.0:5]
|
||
|
expected = df
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert len(result) == 5
|
||
|
|
||
|
result = df.loc[1.1:5]
|
||
|
expected = df.reindex([2.5, 3.5, 4.5, 5.0])
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert len(result) == 4
|
||
|
|
||
|
result = df.loc[4.51:5]
|
||
|
expected = df.reindex([5.0])
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert len(result) == 1
|
||
|
|
||
|
result = df.loc[1.0:5.0]
|
||
|
expected = df.reindex([1.0, 2.5, 3.5, 4.5, 5.0])
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert len(result) == 5
|
||
|
|
||
|
cp = df.copy()
|
||
|
cp.loc[1.0:5.0] = 0
|
||
|
result = cp.loc[1.0:5.0]
|
||
|
assert (result == 0).values.all()
|
||
|
|
||
|
def test_setitem_single_column_mixed(self):
|
||
|
df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
|
||
|
columns=['foo', 'bar', 'baz'])
|
||
|
df['str'] = 'qux'
|
||
|
df.loc[df.index[::2], 'str'] = nan
|
||
|
expected = np.array([nan, 'qux', nan, 'qux', nan], dtype=object)
|
||
|
assert_almost_equal(df['str'].values, expected)
|
||
|
|
||
|
def test_setitem_single_column_mixed_datetime(self):
|
||
|
df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
|
||
|
columns=['foo', 'bar', 'baz'])
|
||
|
|
||
|
df['timestamp'] = Timestamp('20010102')
|
||
|
|
||
|
# check our dtypes
|
||
|
result = df.get_dtype_counts()
|
||
|
expected = Series({'float64': 3, 'datetime64[ns]': 1})
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# set an allowable datetime64 type
|
||
|
df.loc['b', 'timestamp'] = iNaT
|
||
|
assert isna(df.loc['b', 'timestamp'])
|
||
|
|
||
|
# allow this syntax
|
||
|
df.loc['c', 'timestamp'] = nan
|
||
|
assert isna(df.loc['c', 'timestamp'])
|
||
|
|
||
|
# allow this syntax
|
||
|
df.loc['d', :] = nan
|
||
|
assert not isna(df.loc['c', :]).all()
|
||
|
|
||
|
# as of GH 3216 this will now work!
|
||
|
# try to set with a list like item
|
||
|
# pytest.raises(
|
||
|
# Exception, df.loc.__setitem__, ('d', 'timestamp'), [nan])
|
||
|
|
||
|
def test_setitem_frame(self):
|
||
|
piece = self.frame.loc[self.frame.index[:2], ['A', 'B']]
|
||
|
self.frame.loc[self.frame.index[-2]:, ['A', 'B']] = piece.values
|
||
|
result = self.frame.loc[self.frame.index[-2:], ['A', 'B']].values
|
||
|
expected = piece.values
|
||
|
assert_almost_equal(result, expected)
|
||
|
|
||
|
# GH 3216
|
||
|
|
||
|
# already aligned
|
||
|
f = self.mixed_frame.copy()
|
||
|
piece = DataFrame([[1., 2.], [3., 4.]],
|
||
|
index=f.index[0:2], columns=['A', 'B'])
|
||
|
key = (slice(None, 2), ['A', 'B'])
|
||
|
f.loc[key] = piece
|
||
|
assert_almost_equal(f.loc[f.index[0:2], ['A', 'B']].values,
|
||
|
piece.values)
|
||
|
|
||
|
# rows unaligned
|
||
|
f = self.mixed_frame.copy()
|
||
|
piece = DataFrame([[1., 2.], [3., 4.], [5., 6.], [7., 8.]],
|
||
|
index=list(f.index[0:2]) + ['foo', 'bar'],
|
||
|
columns=['A', 'B'])
|
||
|
key = (slice(None, 2), ['A', 'B'])
|
||
|
f.loc[key] = piece
|
||
|
assert_almost_equal(f.loc[f.index[0:2:], ['A', 'B']].values,
|
||
|
piece.values[0:2])
|
||
|
|
||
|
# key is unaligned with values
|
||
|
f = self.mixed_frame.copy()
|
||
|
piece = f.loc[f.index[:2], ['A']]
|
||
|
piece.index = f.index[-2:]
|
||
|
key = (slice(-2, None), ['A', 'B'])
|
||
|
f.loc[key] = piece
|
||
|
piece['B'] = np.nan
|
||
|
assert_almost_equal(f.loc[f.index[-2:], ['A', 'B']].values,
|
||
|
piece.values)
|
||
|
|
||
|
# ndarray
|
||
|
f = self.mixed_frame.copy()
|
||
|
piece = self.mixed_frame.loc[f.index[:2], ['A', 'B']]
|
||
|
key = (slice(-2, None), ['A', 'B'])
|
||
|
f.loc[key] = piece.values
|
||
|
assert_almost_equal(f.loc[f.index[-2:], ['A', 'B']].values,
|
||
|
piece.values)
|
||
|
|
||
|
# needs upcasting
|
||
|
df = DataFrame([[1, 2, 'foo'], [3, 4, 'bar']], columns=['A', 'B', 'C'])
|
||
|
df2 = df.copy()
|
||
|
df2.loc[:, ['A', 'B']] = df.loc[:, ['A', 'B']] + 0.5
|
||
|
expected = df.reindex(columns=['A', 'B'])
|
||
|
expected += 0.5
|
||
|
expected['C'] = df['C']
|
||
|
assert_frame_equal(df2, expected)
|
||
|
|
||
|
def test_setitem_frame_align(self):
|
||
|
piece = self.frame.loc[self.frame.index[:2], ['A', 'B']]
|
||
|
piece.index = self.frame.index[-2:]
|
||
|
piece.columns = ['A', 'B']
|
||
|
self.frame.loc[self.frame.index[-2:], ['A', 'B']] = piece
|
||
|
result = self.frame.loc[self.frame.index[-2:], ['A', 'B']].values
|
||
|
expected = piece.values
|
||
|
assert_almost_equal(result, expected)
|
||
|
|
||
|
def test_getitem_setitem_ix_duplicates(self):
|
||
|
# #1201
|
||
|
df = DataFrame(np.random.randn(5, 3),
|
||
|
index=['foo', 'foo', 'bar', 'baz', 'bar'])
|
||
|
|
||
|
result = df.loc['foo']
|
||
|
expected = df[:2]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.loc['bar']
|
||
|
expected = df.iloc[[2, 4]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.loc['baz']
|
||
|
expected = df.iloc[3]
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_getitem_ix_boolean_duplicates_multiple(self):
|
||
|
# #1201
|
||
|
df = DataFrame(np.random.randn(5, 3),
|
||
|
index=['foo', 'foo', 'bar', 'baz', 'bar'])
|
||
|
|
||
|
result = df.loc[['bar']]
|
||
|
exp = df.iloc[[2, 4]]
|
||
|
assert_frame_equal(result, exp)
|
||
|
|
||
|
result = df.loc[df[1] > 0]
|
||
|
exp = df[df[1] > 0]
|
||
|
assert_frame_equal(result, exp)
|
||
|
|
||
|
result = df.loc[df[0] > 0]
|
||
|
exp = df[df[0] > 0]
|
||
|
assert_frame_equal(result, exp)
|
||
|
|
||
|
def test_getitem_setitem_ix_bool_keyerror(self):
|
||
|
# #2199
|
||
|
df = DataFrame({'a': [1, 2, 3]})
|
||
|
|
||
|
pytest.raises(KeyError, df.loc.__getitem__, False)
|
||
|
pytest.raises(KeyError, df.loc.__getitem__, True)
|
||
|
|
||
|
pytest.raises(KeyError, df.loc.__setitem__, False, 0)
|
||
|
pytest.raises(KeyError, df.loc.__setitem__, True, 0)
|
||
|
|
||
|
def test_getitem_list_duplicates(self):
|
||
|
# #1943
|
||
|
df = DataFrame(np.random.randn(4, 4), columns=list('AABC'))
|
||
|
df.columns.name = 'foo'
|
||
|
|
||
|
result = df[['B', 'C']]
|
||
|
assert result.columns.name == 'foo'
|
||
|
|
||
|
expected = df.iloc[:, 2:]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_get_value(self):
|
||
|
for idx in self.frame.index:
|
||
|
for col in self.frame.columns:
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
result = self.frame.get_value(idx, col)
|
||
|
expected = self.frame[col][idx]
|
||
|
assert result == expected
|
||
|
|
||
|
def test_lookup(self):
|
||
|
def alt(df, rows, cols, dtype):
|
||
|
result = []
|
||
|
for r, c in zip(rows, cols):
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
result.append(df.get_value(r, c))
|
||
|
return np.array(result, dtype=dtype)
|
||
|
|
||
|
def testit(df):
|
||
|
rows = list(df.index) * len(df.columns)
|
||
|
cols = list(df.columns) * len(df.index)
|
||
|
result = df.lookup(rows, cols)
|
||
|
expected = alt(df, rows, cols, dtype=np.object_)
|
||
|
tm.assert_almost_equal(result, expected, check_dtype=False)
|
||
|
|
||
|
testit(self.mixed_frame)
|
||
|
testit(self.frame)
|
||
|
|
||
|
df = DataFrame({'label': ['a', 'b', 'a', 'c'],
|
||
|
'mask_a': [True, True, False, True],
|
||
|
'mask_b': [True, False, False, False],
|
||
|
'mask_c': [False, True, False, True]})
|
||
|
df['mask'] = df.lookup(df.index, 'mask_' + df['label'])
|
||
|
exp_mask = alt(df, df.index, 'mask_' + df['label'], dtype=np.bool_)
|
||
|
tm.assert_series_equal(df['mask'], pd.Series(exp_mask, name='mask'))
|
||
|
assert df['mask'].dtype == np.bool_
|
||
|
|
||
|
with pytest.raises(KeyError):
|
||
|
self.frame.lookup(['xyz'], ['A'])
|
||
|
|
||
|
with pytest.raises(KeyError):
|
||
|
self.frame.lookup([self.frame.index[0]], ['xyz'])
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, 'same size'):
|
||
|
self.frame.lookup(['a', 'b', 'c'], ['a'])
|
||
|
|
||
|
def test_set_value(self):
|
||
|
for idx in self.frame.index:
|
||
|
for col in self.frame.columns:
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
self.frame.set_value(idx, col, 1)
|
||
|
assert self.frame[col][idx] == 1
|
||
|
|
||
|
def test_set_value_resize(self):
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
res = self.frame.set_value('foobar', 'B', 0)
|
||
|
assert res is self.frame
|
||
|
assert res.index[-1] == 'foobar'
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
assert res.get_value('foobar', 'B') == 0
|
||
|
|
||
|
self.frame.loc['foobar', 'qux'] = 0
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
assert self.frame.get_value('foobar', 'qux') == 0
|
||
|
|
||
|
res = self.frame.copy()
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
res3 = res.set_value('foobar', 'baz', 'sam')
|
||
|
assert res3['baz'].dtype == np.object_
|
||
|
|
||
|
res = self.frame.copy()
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
res3 = res.set_value('foobar', 'baz', True)
|
||
|
assert res3['baz'].dtype == np.object_
|
||
|
|
||
|
res = self.frame.copy()
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
res3 = res.set_value('foobar', 'baz', 5)
|
||
|
assert is_float_dtype(res3['baz'])
|
||
|
assert isna(res3['baz'].drop(['foobar'])).all()
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
pytest.raises(ValueError, res3.set_value, 'foobar', 'baz', 'sam')
|
||
|
|
||
|
def test_set_value_with_index_dtype_change(self):
|
||
|
df_orig = DataFrame(randn(3, 3), index=lrange(3), columns=list('ABC'))
|
||
|
|
||
|
# this is actually ambiguous as the 2 is interpreted as a positional
|
||
|
# so column is not created
|
||
|
df = df_orig.copy()
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
df.set_value('C', 2, 1.0)
|
||
|
assert list(df.index) == list(df_orig.index) + ['C']
|
||
|
# assert list(df.columns) == list(df_orig.columns) + [2]
|
||
|
|
||
|
df = df_orig.copy()
|
||
|
df.loc['C', 2] = 1.0
|
||
|
assert list(df.index) == list(df_orig.index) + ['C']
|
||
|
# assert list(df.columns) == list(df_orig.columns) + [2]
|
||
|
|
||
|
# create both new
|
||
|
df = df_orig.copy()
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
df.set_value('C', 'D', 1.0)
|
||
|
assert list(df.index) == list(df_orig.index) + ['C']
|
||
|
assert list(df.columns) == list(df_orig.columns) + ['D']
|
||
|
|
||
|
df = df_orig.copy()
|
||
|
df.loc['C', 'D'] = 1.0
|
||
|
assert list(df.index) == list(df_orig.index) + ['C']
|
||
|
assert list(df.columns) == list(df_orig.columns) + ['D']
|
||
|
|
||
|
def test_get_set_value_no_partial_indexing(self):
|
||
|
# partial w/ MultiIndex raise exception
|
||
|
index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)])
|
||
|
df = DataFrame(index=index, columns=lrange(4))
|
||
|
with tm.assert_produces_warning(FutureWarning,
|
||
|
check_stacklevel=False):
|
||
|
pytest.raises(KeyError, df.get_value, 0, 1)
|
||
|
|
||
|
def test_single_element_ix_dont_upcast(self):
|
||
|
self.frame['E'] = 1
|
||
|
assert issubclass(self.frame['E'].dtype.type, (int, np.integer))
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = self.frame.ix[self.frame.index[5], 'E']
|
||
|
assert is_integer(result)
|
||
|
|
||
|
result = self.frame.loc[self.frame.index[5], 'E']
|
||
|
assert is_integer(result)
|
||
|
|
||
|
# GH 11617
|
||
|
df = pd.DataFrame(dict(a=[1.23]))
|
||
|
df["b"] = 666
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = df.ix[0, "b"]
|
||
|
assert is_integer(result)
|
||
|
result = df.loc[0, "b"]
|
||
|
assert is_integer(result)
|
||
|
|
||
|
expected = Series([666], [0], name='b')
|
||
|
with catch_warnings(record=True):
|
||
|
result = df.ix[[0], "b"]
|
||
|
assert_series_equal(result, expected)
|
||
|
result = df.loc[[0], "b"]
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_iloc_row(self):
|
||
|
df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2))
|
||
|
|
||
|
result = df.iloc[1]
|
||
|
exp = df.loc[2]
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
result = df.iloc[2]
|
||
|
exp = df.loc[4]
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
# slice
|
||
|
result = df.iloc[slice(4, 8)]
|
||
|
expected = df.loc[8:14]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# verify slice is view
|
||
|
# setting it makes it raise/warn
|
||
|
def f():
|
||
|
result[2] = 0.
|
||
|
pytest.raises(com.SettingWithCopyError, f)
|
||
|
exp_col = df[2].copy()
|
||
|
exp_col[4:8] = 0.
|
||
|
assert_series_equal(df[2], exp_col)
|
||
|
|
||
|
# list of integers
|
||
|
result = df.iloc[[1, 2, 4, 6]]
|
||
|
expected = df.reindex(df.index[[1, 2, 4, 6]])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_iloc_col(self):
|
||
|
|
||
|
df = DataFrame(np.random.randn(4, 10), columns=lrange(0, 20, 2))
|
||
|
|
||
|
result = df.iloc[:, 1]
|
||
|
exp = df.loc[:, 2]
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
result = df.iloc[:, 2]
|
||
|
exp = df.loc[:, 4]
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
# slice
|
||
|
result = df.iloc[:, slice(4, 8)]
|
||
|
expected = df.loc[:, 8:14]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# verify slice is view
|
||
|
# and that we are setting a copy
|
||
|
def f():
|
||
|
result[8] = 0.
|
||
|
pytest.raises(com.SettingWithCopyError, f)
|
||
|
assert (df[8] == 0).all()
|
||
|
|
||
|
# list of integers
|
||
|
result = df.iloc[:, [1, 2, 4, 6]]
|
||
|
expected = df.reindex(columns=df.columns[[1, 2, 4, 6]])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_iloc_duplicates(self):
|
||
|
|
||
|
df = DataFrame(np.random.rand(3, 3), columns=list('ABC'),
|
||
|
index=list('aab'))
|
||
|
|
||
|
result = df.iloc[0]
|
||
|
with catch_warnings(record=True):
|
||
|
result2 = df.ix[0]
|
||
|
assert isinstance(result, Series)
|
||
|
assert_almost_equal(result.values, df.values[0])
|
||
|
assert_series_equal(result, result2)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = df.T.iloc[:, 0]
|
||
|
result2 = df.T.ix[:, 0]
|
||
|
assert isinstance(result, Series)
|
||
|
assert_almost_equal(result.values, df.values[0])
|
||
|
assert_series_equal(result, result2)
|
||
|
|
||
|
# multiindex
|
||
|
df = DataFrame(np.random.randn(3, 3),
|
||
|
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
|
||
|
index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
rs = df.iloc[0]
|
||
|
xp = df.ix[0]
|
||
|
assert_series_equal(rs, xp)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
rs = df.iloc[:, 0]
|
||
|
xp = df.T.ix[0]
|
||
|
assert_series_equal(rs, xp)
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
rs = df.iloc[:, [0]]
|
||
|
xp = df.ix[:, [0]]
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
# #2259
|
||
|
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2])
|
||
|
result = df.iloc[:, [0]]
|
||
|
expected = df.take([0], axis=1)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_loc_duplicates(self):
|
||
|
# gh-17105
|
||
|
|
||
|
# insert a duplicate element to the index
|
||
|
trange = pd.date_range(start=pd.Timestamp(year=2017, month=1, day=1),
|
||
|
end=pd.Timestamp(year=2017, month=1, day=5))
|
||
|
|
||
|
trange = trange.insert(loc=5,
|
||
|
item=pd.Timestamp(year=2017, month=1, day=5))
|
||
|
|
||
|
df = pd.DataFrame(0, index=trange, columns=["A", "B"])
|
||
|
bool_idx = np.array([False, False, False, False, False, True])
|
||
|
|
||
|
# assignment
|
||
|
df.loc[trange[bool_idx], "A"] = 6
|
||
|
|
||
|
expected = pd.DataFrame({'A': [0, 0, 0, 0, 6, 6],
|
||
|
'B': [0, 0, 0, 0, 0, 0]},
|
||
|
index=trange)
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
# in-place
|
||
|
df = pd.DataFrame(0, index=trange, columns=["A", "B"])
|
||
|
df.loc[trange[bool_idx], "A"] += 6
|
||
|
tm.assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_iloc_sparse_propegate_fill_value(self):
|
||
|
from pandas.core.sparse.api import SparseDataFrame
|
||
|
df = SparseDataFrame({'A': [999, 1]}, default_fill_value=999)
|
||
|
assert len(df['A'].sp_values) == len(df.iloc[:, 0].sp_values)
|
||
|
|
||
|
def test_iat(self):
|
||
|
|
||
|
for i, row in enumerate(self.frame.index):
|
||
|
for j, col in enumerate(self.frame.columns):
|
||
|
result = self.frame.iat[i, j]
|
||
|
expected = self.frame.at[row, col]
|
||
|
assert result == expected
|
||
|
|
||
|
def test_nested_exception(self):
|
||
|
# Ignore the strange way of triggering the problem
|
||
|
# (which may get fixed), it's just a way to trigger
|
||
|
# the issue or reraising an outer exception without
|
||
|
# a named argument
|
||
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6],
|
||
|
"c": [7, 8, 9]}).set_index(["a", "b"])
|
||
|
l = list(df.index)
|
||
|
l[0] = ["a", "b"]
|
||
|
df.index = l
|
||
|
|
||
|
try:
|
||
|
repr(df)
|
||
|
except Exception as e:
|
||
|
assert type(e) != UnboundLocalError
|
||
|
|
||
|
def test_reindex_methods(self):
|
||
|
df = pd.DataFrame({'x': list(range(5))})
|
||
|
target = np.array([-0.1, 0.9, 1.1, 1.5])
|
||
|
|
||
|
for method, expected_values in [('nearest', [0, 1, 1, 2]),
|
||
|
('pad', [np.nan, 0, 1, 1]),
|
||
|
('backfill', [0, 1, 2, 2])]:
|
||
|
expected = pd.DataFrame({'x': expected_values}, index=target)
|
||
|
actual = df.reindex(target, method=method)
|
||
|
assert_frame_equal(expected, actual)
|
||
|
|
||
|
actual = df.reindex_like(df, method=method, tolerance=0)
|
||
|
assert_frame_equal(df, actual)
|
||
|
actual = df.reindex_like(df, method=method, tolerance=[0, 0, 0, 0])
|
||
|
assert_frame_equal(df, actual)
|
||
|
|
||
|
actual = df.reindex(target, method=method, tolerance=1)
|
||
|
assert_frame_equal(expected, actual)
|
||
|
actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1])
|
||
|
assert_frame_equal(expected, actual)
|
||
|
|
||
|
e2 = expected[::-1]
|
||
|
actual = df.reindex(target[::-1], method=method)
|
||
|
assert_frame_equal(e2, actual)
|
||
|
|
||
|
new_order = [3, 0, 2, 1]
|
||
|
e2 = expected.iloc[new_order]
|
||
|
actual = df.reindex(target[new_order], method=method)
|
||
|
assert_frame_equal(e2, actual)
|
||
|
|
||
|
switched_method = ('pad' if method == 'backfill'
|
||
|
else 'backfill' if method == 'pad'
|
||
|
else method)
|
||
|
actual = df[::-1].reindex(target, method=switched_method)
|
||
|
assert_frame_equal(expected, actual)
|
||
|
|
||
|
expected = pd.DataFrame({'x': [0, 1, 1, np.nan]}, index=target)
|
||
|
actual = df.reindex(target, method='nearest', tolerance=0.2)
|
||
|
assert_frame_equal(expected, actual)
|
||
|
|
||
|
expected = pd.DataFrame({'x': [0, np.nan, 1, np.nan]}, index=target)
|
||
|
actual = df.reindex(target, method='nearest',
|
||
|
tolerance=[0.5, 0.01, 0.4, 0.1])
|
||
|
assert_frame_equal(expected, actual)
|
||
|
|
||
|
def test_reindex_frame_add_nat(self):
|
||
|
rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
|
||
|
df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng})
|
||
|
|
||
|
result = df.reindex(lrange(15))
|
||
|
assert np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))
|
||
|
|
||
|
mask = com.isna(result)['B']
|
||
|
assert mask[-5:].all()
|
||
|
assert not mask[:-5].any()
|
||
|
|
||
|
def test_set_dataframe_column_ns_dtype(self):
|
||
|
x = DataFrame([datetime.now(), datetime.now()])
|
||
|
assert x[0].dtype == np.dtype('M8[ns]')
|
||
|
|
||
|
def test_non_monotonic_reindex_methods(self):
|
||
|
dr = pd.date_range('2013-08-01', periods=6, freq='B')
|
||
|
data = np.random.randn(6, 1)
|
||
|
df = pd.DataFrame(data, index=dr, columns=list('A'))
|
||
|
df_rev = pd.DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]],
|
||
|
columns=list('A'))
|
||
|
# index is not monotonic increasing or decreasing
|
||
|
pytest.raises(ValueError, df_rev.reindex, df.index, method='pad')
|
||
|
pytest.raises(ValueError, df_rev.reindex, df.index, method='ffill')
|
||
|
pytest.raises(ValueError, df_rev.reindex, df.index, method='bfill')
|
||
|
pytest.raises(ValueError, df_rev.reindex, df.index, method='nearest')
|
||
|
|
||
|
def test_reindex_level(self):
|
||
|
from itertools import permutations
|
||
|
icol = ['jim', 'joe', 'jolie']
|
||
|
|
||
|
def verify_first_level(df, level, idx, check_index_type=True):
|
||
|
f = lambda val: np.nonzero(df[level] == val)[0]
|
||
|
i = np.concatenate(list(map(f, idx)))
|
||
|
left = df.set_index(icol).reindex(idx, level=level)
|
||
|
right = df.iloc[i].set_index(icol)
|
||
|
assert_frame_equal(left, right, check_index_type=check_index_type)
|
||
|
|
||
|
def verify(df, level, idx, indexer, check_index_type=True):
|
||
|
left = df.set_index(icol).reindex(idx, level=level)
|
||
|
right = df.iloc[indexer].set_index(icol)
|
||
|
assert_frame_equal(left, right, check_index_type=check_index_type)
|
||
|
|
||
|
df = pd.DataFrame({'jim': list('B' * 4 + 'A' * 2 + 'C' * 3),
|
||
|
'joe': list('abcdeabcd')[::-1],
|
||
|
'jolie': [10, 20, 30] * 3,
|
||
|
'joline': np.random.randint(0, 1000, 9)})
|
||
|
|
||
|
target = [['C', 'B', 'A'], ['F', 'C', 'A', 'D'], ['A'],
|
||
|
['A', 'B', 'C'], ['C', 'A', 'B'], ['C', 'B'], ['C', 'A'],
|
||
|
['A', 'B'], ['B', 'A', 'C']]
|
||
|
|
||
|
for idx in target:
|
||
|
verify_first_level(df, 'jim', idx)
|
||
|
|
||
|
# reindex by these causes different MultiIndex levels
|
||
|
for idx in [['D', 'F'], ['A', 'C', 'B']]:
|
||
|
verify_first_level(df, 'jim', idx, check_index_type=False)
|
||
|
|
||
|
verify(df, 'joe', list('abcde'), [3, 2, 1, 0, 5, 4, 8, 7, 6])
|
||
|
verify(df, 'joe', list('abcd'), [3, 2, 1, 0, 5, 8, 7, 6])
|
||
|
verify(df, 'joe', list('abc'), [3, 2, 1, 8, 7, 6])
|
||
|
verify(df, 'joe', list('eca'), [1, 3, 4, 6, 8])
|
||
|
verify(df, 'joe', list('edc'), [0, 1, 4, 5, 6])
|
||
|
verify(df, 'joe', list('eadbc'), [3, 0, 2, 1, 4, 5, 8, 7, 6])
|
||
|
verify(df, 'joe', list('edwq'), [0, 4, 5])
|
||
|
verify(df, 'joe', list('wq'), [], check_index_type=False)
|
||
|
|
||
|
df = DataFrame({'jim': ['mid'] * 5 + ['btm'] * 8 + ['top'] * 7,
|
||
|
'joe': ['3rd'] * 2 + ['1st'] * 3 + ['2nd'] * 3 +
|
||
|
['1st'] * 2 + ['3rd'] * 3 + ['1st'] * 2 +
|
||
|
['3rd'] * 3 + ['2nd'] * 2,
|
||
|
# this needs to be jointly unique with jim and joe or
|
||
|
# reindexing will fail ~1.5% of the time, this works
|
||
|
# out to needing unique groups of same size as joe
|
||
|
'jolie': np.concatenate([
|
||
|
np.random.choice(1000, x, replace=False)
|
||
|
for x in [2, 3, 3, 2, 3, 2, 3, 2]]),
|
||
|
'joline': np.random.randn(20).round(3) * 10})
|
||
|
|
||
|
for idx in permutations(df['jim'].unique()):
|
||
|
for i in range(3):
|
||
|
verify_first_level(df, 'jim', idx[:i + 1])
|
||
|
|
||
|
i = [2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10,
|
||
|
11, 12, 13, 14, 18, 19, 15, 16, 17]
|
||
|
verify(df, 'joe', ['1st', '2nd', '3rd'], i)
|
||
|
|
||
|
i = [0, 1, 2, 3, 4, 10, 11, 12, 5, 6,
|
||
|
7, 8, 9, 15, 16, 17, 18, 19, 13, 14]
|
||
|
verify(df, 'joe', ['3rd', '2nd', '1st'], i)
|
||
|
|
||
|
i = [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17]
|
||
|
verify(df, 'joe', ['2nd', '3rd'], i)
|
||
|
|
||
|
i = [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14]
|
||
|
verify(df, 'joe', ['3rd', '1st'], i)
|
||
|
|
||
|
def test_getitem_ix_float_duplicates(self):
|
||
|
df = pd.DataFrame(np.random.randn(3, 3),
|
||
|
index=[0.1, 0.2, 0.2], columns=list('abc'))
|
||
|
expect = df.iloc[1:]
|
||
|
assert_frame_equal(df.loc[0.2], expect)
|
||
|
with catch_warnings(record=True):
|
||
|
assert_frame_equal(df.ix[0.2], expect)
|
||
|
|
||
|
expect = df.iloc[1:, 0]
|
||
|
assert_series_equal(df.loc[0.2, 'a'], expect)
|
||
|
|
||
|
df.index = [1, 0.2, 0.2]
|
||
|
expect = df.iloc[1:]
|
||
|
assert_frame_equal(df.loc[0.2], expect)
|
||
|
with catch_warnings(record=True):
|
||
|
assert_frame_equal(df.ix[0.2], expect)
|
||
|
|
||
|
expect = df.iloc[1:, 0]
|
||
|
assert_series_equal(df.loc[0.2, 'a'], expect)
|
||
|
|
||
|
df = pd.DataFrame(np.random.randn(4, 3),
|
||
|
index=[1, 0.2, 0.2, 1], columns=list('abc'))
|
||
|
expect = df.iloc[1:-1]
|
||
|
assert_frame_equal(df.loc[0.2], expect)
|
||
|
with catch_warnings(record=True):
|
||
|
assert_frame_equal(df.ix[0.2], expect)
|
||
|
|
||
|
expect = df.iloc[1:-1, 0]
|
||
|
assert_series_equal(df.loc[0.2, 'a'], expect)
|
||
|
|
||
|
df.index = [0.1, 0.2, 2, 0.2]
|
||
|
expect = df.iloc[[1, -1]]
|
||
|
assert_frame_equal(df.loc[0.2], expect)
|
||
|
with catch_warnings(record=True):
|
||
|
assert_frame_equal(df.ix[0.2], expect)
|
||
|
|
||
|
expect = df.iloc[[1, -1], 0]
|
||
|
assert_series_equal(df.loc[0.2, 'a'], expect)
|
||
|
|
||
|
def test_setitem_with_sparse_value(self):
|
||
|
# GH8131
|
||
|
df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
|
||
|
sp_series = pd.Series([0, 0, 1]).to_sparse(fill_value=0)
|
||
|
df['new_column'] = sp_series
|
||
|
assert_series_equal(df['new_column'], sp_series, check_names=False)
|
||
|
|
||
|
def test_setitem_with_unaligned_sparse_value(self):
|
||
|
df = pd.DataFrame({'c_1': ['a', 'b', 'c'], 'n_1': [1., 2., 3.]})
|
||
|
sp_series = (pd.Series([0, 0, 1], index=[2, 1, 0])
|
||
|
.to_sparse(fill_value=0))
|
||
|
df['new_column'] = sp_series
|
||
|
exp = pd.Series([1, 0, 0], name='new_column')
|
||
|
assert_series_equal(df['new_column'], exp)
|
||
|
|
||
|
def test_setitem_with_unaligned_tz_aware_datetime_column(self):
|
||
|
# GH 12981
|
||
|
# Assignment of unaligned offset-aware datetime series.
|
||
|
# Make sure timezone isn't lost
|
||
|
column = pd.Series(pd.date_range('2015-01-01', periods=3, tz='utc'),
|
||
|
name='dates')
|
||
|
df = pd.DataFrame({'dates': column})
|
||
|
df['dates'] = column[[1, 0, 2]]
|
||
|
assert_series_equal(df['dates'], column)
|
||
|
|
||
|
df = pd.DataFrame({'dates': column})
|
||
|
df.loc[[0, 1, 2], 'dates'] = column[[1, 0, 2]]
|
||
|
assert_series_equal(df['dates'], column)
|
||
|
|
||
|
def test_setitem_datetime_coercion(self):
|
||
|
# gh-1048
|
||
|
df = pd.DataFrame({'c': [pd.Timestamp('2010-10-01')] * 3})
|
||
|
df.loc[0:1, 'c'] = np.datetime64('2008-08-08')
|
||
|
assert pd.Timestamp('2008-08-08') == df.loc[0, 'c']
|
||
|
assert pd.Timestamp('2008-08-08') == df.loc[1, 'c']
|
||
|
df.loc[2, 'c'] = date(2005, 5, 5)
|
||
|
assert pd.Timestamp('2005-05-05') == df.loc[2, 'c']
|
||
|
|
||
|
def test_setitem_datetimelike_with_inference(self):
|
||
|
# GH 7592
|
||
|
# assignment of timedeltas with NaT
|
||
|
|
||
|
one_hour = timedelta(hours=1)
|
||
|
df = DataFrame(index=date_range('20130101', periods=4))
|
||
|
df['A'] = np.array([1 * one_hour] * 4, dtype='m8[ns]')
|
||
|
df.loc[:, 'B'] = np.array([2 * one_hour] * 4, dtype='m8[ns]')
|
||
|
df.loc[:3, 'C'] = np.array([3 * one_hour] * 3, dtype='m8[ns]')
|
||
|
df.loc[:, 'D'] = np.array([4 * one_hour] * 4, dtype='m8[ns]')
|
||
|
df.loc[df.index[:3], 'E'] = np.array([5 * one_hour] * 3,
|
||
|
dtype='m8[ns]')
|
||
|
df['F'] = np.timedelta64('NaT')
|
||
|
df.loc[df.index[:-1], 'F'] = np.array([6 * one_hour] * 3,
|
||
|
dtype='m8[ns]')
|
||
|
df.loc[df.index[-3]:, 'G'] = date_range('20130101', periods=3)
|
||
|
df['H'] = np.datetime64('NaT')
|
||
|
result = df.dtypes
|
||
|
expected = Series([np.dtype('timedelta64[ns]')] * 6 +
|
||
|
[np.dtype('datetime64[ns]')] * 2,
|
||
|
index=list('ABCDEFGH'))
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_at_time_between_time_datetimeindex(self):
|
||
|
index = date_range("2012-01-01", "2012-01-05", freq='30min')
|
||
|
df = DataFrame(randn(len(index), 5), index=index)
|
||
|
akey = time(12, 0, 0)
|
||
|
bkey = slice(time(13, 0, 0), time(14, 0, 0))
|
||
|
ainds = [24, 72, 120, 168]
|
||
|
binds = [26, 27, 28, 74, 75, 76, 122, 123, 124, 170, 171, 172]
|
||
|
|
||
|
result = df.at_time(akey)
|
||
|
expected = df.loc[akey]
|
||
|
expected2 = df.iloc[ainds]
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert_frame_equal(result, expected2)
|
||
|
assert len(result) == 4
|
||
|
|
||
|
result = df.between_time(bkey.start, bkey.stop)
|
||
|
expected = df.loc[bkey]
|
||
|
expected2 = df.iloc[binds]
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert_frame_equal(result, expected2)
|
||
|
assert len(result) == 12
|
||
|
|
||
|
result = df.copy()
|
||
|
result.loc[akey] = 0
|
||
|
result = result.loc[akey]
|
||
|
expected = df.loc[akey].copy()
|
||
|
expected.loc[:] = 0
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.loc[akey] = 0
|
||
|
result.loc[akey] = df.iloc[ainds]
|
||
|
assert_frame_equal(result, df)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.loc[bkey] = 0
|
||
|
result = result.loc[bkey]
|
||
|
expected = df.loc[bkey].copy()
|
||
|
expected.loc[:] = 0
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.loc[bkey] = 0
|
||
|
result.loc[bkey] = df.iloc[binds]
|
||
|
assert_frame_equal(result, df)
|
||
|
|
||
|
def test_xs(self):
|
||
|
idx = self.frame.index[5]
|
||
|
xs = self.frame.xs(idx)
|
||
|
for item, value in compat.iteritems(xs):
|
||
|
if np.isnan(value):
|
||
|
assert np.isnan(self.frame[item][idx])
|
||
|
else:
|
||
|
assert value == self.frame[item][idx]
|
||
|
|
||
|
# mixed-type xs
|
||
|
test_data = {
|
||
|
'A': {'1': 1, '2': 2},
|
||
|
'B': {'1': '1', '2': '2', '3': '3'},
|
||
|
}
|
||
|
frame = DataFrame(test_data)
|
||
|
xs = frame.xs('1')
|
||
|
assert xs.dtype == np.object_
|
||
|
assert xs['A'] == 1
|
||
|
assert xs['B'] == '1'
|
||
|
|
||
|
with pytest.raises(KeyError):
|
||
|
self.tsframe.xs(self.tsframe.index[0] - BDay())
|
||
|
|
||
|
# xs get column
|
||
|
series = self.frame.xs('A', axis=1)
|
||
|
expected = self.frame['A']
|
||
|
assert_series_equal(series, expected)
|
||
|
|
||
|
# view is returned if possible
|
||
|
series = self.frame.xs('A', axis=1)
|
||
|
series[:] = 5
|
||
|
assert (expected == 5).all()
|
||
|
|
||
|
def test_xs_corner(self):
|
||
|
# pathological mixed-type reordering case
|
||
|
df = DataFrame(index=[0])
|
||
|
df['A'] = 1.
|
||
|
df['B'] = 'foo'
|
||
|
df['C'] = 2.
|
||
|
df['D'] = 'bar'
|
||
|
df['E'] = 3.
|
||
|
|
||
|
xs = df.xs(0)
|
||
|
exp = pd.Series([1., 'foo', 2., 'bar', 3.],
|
||
|
index=list('ABCDE'), name=0)
|
||
|
tm.assert_series_equal(xs, exp)
|
||
|
|
||
|
# no columns but Index(dtype=object)
|
||
|
df = DataFrame(index=['a', 'b', 'c'])
|
||
|
result = df.xs('a')
|
||
|
expected = Series([], name='a', index=pd.Index([], dtype=object))
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_xs_duplicates(self):
|
||
|
df = DataFrame(randn(5, 2), index=['b', 'b', 'c', 'b', 'a'])
|
||
|
|
||
|
cross = df.xs('c')
|
||
|
exp = df.iloc[2]
|
||
|
assert_series_equal(cross, exp)
|
||
|
|
||
|
def test_xs_keep_level(self):
|
||
|
df = (DataFrame({'day': {0: 'sat', 1: 'sun'},
|
||
|
'flavour': {0: 'strawberry', 1: 'strawberry'},
|
||
|
'sales': {0: 10, 1: 12},
|
||
|
'year': {0: 2008, 1: 2008}})
|
||
|
.set_index(['year', 'flavour', 'day']))
|
||
|
result = df.xs('sat', level='day', drop_level=False)
|
||
|
expected = df[:1]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.xs([2008, 'sat'], level=['year', 'day'], drop_level=False)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_xs_view(self):
|
||
|
# in 0.14 this will return a view if possible a copy otherwise, but
|
||
|
# this is numpy dependent
|
||
|
|
||
|
dm = DataFrame(np.arange(20.).reshape(4, 5),
|
||
|
index=lrange(4), columns=lrange(5))
|
||
|
|
||
|
dm.xs(2)[:] = 10
|
||
|
assert (dm.xs(2) == 10).all()
|
||
|
|
||
|
def test_index_namedtuple(self):
|
||
|
from collections import namedtuple
|
||
|
IndexType = namedtuple("IndexType", ["a", "b"])
|
||
|
idx1 = IndexType("foo", "bar")
|
||
|
idx2 = IndexType("baz", "bof")
|
||
|
index = Index([idx1, idx2],
|
||
|
name="composite_index", tupleize_cols=False)
|
||
|
df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"])
|
||
|
|
||
|
with catch_warnings(record=True):
|
||
|
result = df.ix[IndexType("foo", "bar")]["A"]
|
||
|
assert result == 1
|
||
|
|
||
|
result = df.loc[IndexType("foo", "bar")]["A"]
|
||
|
assert result == 1
|
||
|
|
||
|
def test_boolean_indexing(self):
|
||
|
idx = lrange(3)
|
||
|
cols = ['A', 'B', 'C']
|
||
|
df1 = DataFrame(index=idx, columns=cols,
|
||
|
data=np.array([[0.0, 0.5, 1.0],
|
||
|
[1.5, 2.0, 2.5],
|
||
|
[3.0, 3.5, 4.0]],
|
||
|
dtype=float))
|
||
|
df2 = DataFrame(index=idx, columns=cols,
|
||
|
data=np.ones((len(idx), len(cols))))
|
||
|
|
||
|
expected = DataFrame(index=idx, columns=cols,
|
||
|
data=np.array([[0.0, 0.5, 1.0],
|
||
|
[1.5, 2.0, -1],
|
||
|
[-1, -1, -1]], dtype=float))
|
||
|
|
||
|
df1[df1 > 2.0 * df2] = -1
|
||
|
assert_frame_equal(df1, expected)
|
||
|
with tm.assert_raises_regex(ValueError, 'Item wrong length'):
|
||
|
df1[df1.index[:-1] > 2] = -1
|
||
|
|
||
|
def test_boolean_indexing_mixed(self):
|
||
|
df = DataFrame({
|
||
|
long(0): {35: np.nan, 40: np.nan, 43: np.nan,
|
||
|
49: np.nan, 50: np.nan},
|
||
|
long(1): {35: np.nan,
|
||
|
40: 0.32632316859446198,
|
||
|
43: np.nan,
|
||
|
49: 0.32632316859446198,
|
||
|
50: 0.39114724480578139},
|
||
|
long(2): {35: np.nan, 40: np.nan, 43: 0.29012581014105987,
|
||
|
49: np.nan, 50: np.nan},
|
||
|
long(3): {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan,
|
||
|
50: np.nan},
|
||
|
long(4): {35: 0.34215328467153283, 40: np.nan, 43: np.nan,
|
||
|
49: np.nan, 50: np.nan},
|
||
|
'y': {35: 0, 40: 0, 43: 0, 49: 0, 50: 1}})
|
||
|
|
||
|
# mixed int/float ok
|
||
|
df2 = df.copy()
|
||
|
df2[df2 > 0.3] = 1
|
||
|
expected = df.copy()
|
||
|
expected.loc[40, 1] = 1
|
||
|
expected.loc[49, 1] = 1
|
||
|
expected.loc[50, 1] = 1
|
||
|
expected.loc[35, 4] = 1
|
||
|
assert_frame_equal(df2, expected)
|
||
|
|
||
|
df['foo'] = 'test'
|
||
|
with tm.assert_raises_regex(TypeError, 'boolean setting '
|
||
|
'on mixed-type'):
|
||
|
df[df > 0.3] = 1
|
||
|
|
||
|
def test_where(self):
|
||
|
default_frame = DataFrame(np.random.randn(5, 3),
|
||
|
columns=['A', 'B', 'C'])
|
||
|
|
||
|
def _safe_add(df):
|
||
|
# only add to the numeric items
|
||
|
def is_ok(s):
|
||
|
return (issubclass(s.dtype.type, (np.integer, np.floating)) and
|
||
|
s.dtype != 'uint8')
|
||
|
|
||
|
return DataFrame(dict((c, s + 1) if is_ok(s) else (c, s)
|
||
|
for c, s in compat.iteritems(df)))
|
||
|
|
||
|
def _check_get(df, cond, check_dtypes=True):
|
||
|
other1 = _safe_add(df)
|
||
|
rs = df.where(cond, other1)
|
||
|
rs2 = df.where(cond.values, other1)
|
||
|
for k, v in rs.iteritems():
|
||
|
exp = Series(
|
||
|
np.where(cond[k], df[k], other1[k]), index=v.index)
|
||
|
assert_series_equal(v, exp, check_names=False)
|
||
|
assert_frame_equal(rs, rs2)
|
||
|
|
||
|
# dtypes
|
||
|
if check_dtypes:
|
||
|
assert (rs.dtypes == df.dtypes).all()
|
||
|
|
||
|
# check getting
|
||
|
for df in [default_frame, self.mixed_frame,
|
||
|
self.mixed_float, self.mixed_int]:
|
||
|
cond = df > 0
|
||
|
_check_get(df, cond)
|
||
|
|
||
|
# upcasting case (GH # 2794)
|
||
|
df = DataFrame(dict((c, Series([1] * 3, dtype=c))
|
||
|
for c in ['float32', 'float64',
|
||
|
'int32', 'int64']))
|
||
|
df.iloc[1, :] = 0
|
||
|
result = df.where(df >= 0).get_dtype_counts()
|
||
|
|
||
|
# when we don't preserve boolean casts
|
||
|
#
|
||
|
# expected = Series({ 'float32' : 1, 'float64' : 3 })
|
||
|
|
||
|
expected = Series({'float32': 1, 'float64': 1, 'int32': 1, 'int64': 1})
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# aligning
|
||
|
def _check_align(df, cond, other, check_dtypes=True):
|
||
|
rs = df.where(cond, other)
|
||
|
for i, k in enumerate(rs.columns):
|
||
|
result = rs[k]
|
||
|
d = df[k].values
|
||
|
c = cond[k].reindex(df[k].index).fillna(False).values
|
||
|
|
||
|
if is_scalar(other):
|
||
|
o = other
|
||
|
else:
|
||
|
if isinstance(other, np.ndarray):
|
||
|
o = Series(other[:, i], index=result.index).values
|
||
|
else:
|
||
|
o = other[k].values
|
||
|
|
||
|
new_values = d if c.all() else np.where(c, d, o)
|
||
|
expected = Series(new_values, index=result.index, name=k)
|
||
|
|
||
|
# since we can't always have the correct numpy dtype
|
||
|
# as numpy doesn't know how to downcast, don't check
|
||
|
assert_series_equal(result, expected, check_dtype=False)
|
||
|
|
||
|
# dtypes
|
||
|
# can't check dtype when other is an ndarray
|
||
|
|
||
|
if check_dtypes and not isinstance(other, np.ndarray):
|
||
|
assert (rs.dtypes == df.dtypes).all()
|
||
|
|
||
|
for df in [self.mixed_frame, self.mixed_float, self.mixed_int]:
|
||
|
|
||
|
# other is a frame
|
||
|
cond = (df > 0)[1:]
|
||
|
_check_align(df, cond, _safe_add(df))
|
||
|
|
||
|
# check other is ndarray
|
||
|
cond = df > 0
|
||
|
_check_align(df, cond, (_safe_add(df).values))
|
||
|
|
||
|
# integers are upcast, so don't check the dtypes
|
||
|
cond = df > 0
|
||
|
check_dtypes = all(not issubclass(s.type, np.integer)
|
||
|
for s in df.dtypes)
|
||
|
_check_align(df, cond, np.nan, check_dtypes=check_dtypes)
|
||
|
|
||
|
# invalid conditions
|
||
|
df = default_frame
|
||
|
err1 = (df + 1).values[0:2, :]
|
||
|
pytest.raises(ValueError, df.where, cond, err1)
|
||
|
|
||
|
err2 = cond.iloc[:2, :].values
|
||
|
other1 = _safe_add(df)
|
||
|
pytest.raises(ValueError, df.where, err2, other1)
|
||
|
|
||
|
pytest.raises(ValueError, df.mask, True)
|
||
|
pytest.raises(ValueError, df.mask, 0)
|
||
|
|
||
|
# where inplace
|
||
|
def _check_set(df, cond, check_dtypes=True):
|
||
|
dfi = df.copy()
|
||
|
econd = cond.reindex_like(df).fillna(True)
|
||
|
expected = dfi.mask(~econd)
|
||
|
|
||
|
dfi.where(cond, np.nan, inplace=True)
|
||
|
assert_frame_equal(dfi, expected)
|
||
|
|
||
|
# dtypes (and confirm upcasts)x
|
||
|
if check_dtypes:
|
||
|
for k, v in compat.iteritems(df.dtypes):
|
||
|
if issubclass(v.type, np.integer) and not cond[k].all():
|
||
|
v = np.dtype('float64')
|
||
|
assert dfi[k].dtype == v
|
||
|
|
||
|
for df in [default_frame, self.mixed_frame, self.mixed_float,
|
||
|
self.mixed_int]:
|
||
|
|
||
|
cond = df > 0
|
||
|
_check_set(df, cond)
|
||
|
|
||
|
cond = df >= 0
|
||
|
_check_set(df, cond)
|
||
|
|
||
|
# aligining
|
||
|
cond = (df >= 0)[1:]
|
||
|
_check_set(df, cond)
|
||
|
|
||
|
# GH 10218
|
||
|
# test DataFrame.where with Series slicing
|
||
|
df = DataFrame({'a': range(3), 'b': range(4, 7)})
|
||
|
result = df.where(df['a'] == 1)
|
||
|
expected = df[df['a'] == 1].reindex(df.index)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_where_array_like(self):
|
||
|
# see gh-15414
|
||
|
klasses = [list, tuple, np.array]
|
||
|
|
||
|
df = DataFrame({'a': [1, 2, 3]})
|
||
|
cond = [[False], [True], [True]]
|
||
|
expected = DataFrame({'a': [np.nan, 2, 3]})
|
||
|
|
||
|
for klass in klasses:
|
||
|
result = df.where(klass(cond))
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
df['b'] = 2
|
||
|
expected['b'] = [2, np.nan, 2]
|
||
|
cond = [[False, True], [True, False], [True, True]]
|
||
|
|
||
|
for klass in klasses:
|
||
|
result = df.where(klass(cond))
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_where_invalid_input(self):
|
||
|
# see gh-15414: only boolean arrays accepted
|
||
|
df = DataFrame({'a': [1, 2, 3]})
|
||
|
msg = "Boolean array expected for the condition"
|
||
|
|
||
|
conds = [
|
||
|
[[1], [0], [1]],
|
||
|
Series([[2], [5], [7]]),
|
||
|
DataFrame({'a': [2, 5, 7]}),
|
||
|
[["True"], ["False"], ["True"]],
|
||
|
[[Timestamp("2017-01-01")],
|
||
|
[pd.NaT], [Timestamp("2017-01-02")]]
|
||
|
]
|
||
|
|
||
|
for cond in conds:
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
df.where(cond)
|
||
|
|
||
|
df['b'] = 2
|
||
|
conds = [
|
||
|
[[0, 1], [1, 0], [1, 1]],
|
||
|
Series([[0, 2], [5, 0], [4, 7]]),
|
||
|
[["False", "True"], ["True", "False"],
|
||
|
["True", "True"]],
|
||
|
DataFrame({'a': [2, 5, 7], 'b': [4, 8, 9]}),
|
||
|
[[pd.NaT, Timestamp("2017-01-01")],
|
||
|
[Timestamp("2017-01-02"), pd.NaT],
|
||
|
[Timestamp("2017-01-03"), Timestamp("2017-01-03")]]
|
||
|
]
|
||
|
|
||
|
for cond in conds:
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
df.where(cond)
|
||
|
|
||
|
def test_where_dataframe_col_match(self):
|
||
|
df = DataFrame([[1, 2, 3], [4, 5, 6]])
|
||
|
cond = DataFrame([[True, False, True], [False, False, True]])
|
||
|
|
||
|
result = df.where(cond)
|
||
|
expected = DataFrame([[1.0, np.nan, 3], [np.nan, np.nan, 6]])
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# this *does* align, though has no matching columns
|
||
|
cond.columns = ["a", "b", "c"]
|
||
|
result = df.where(cond)
|
||
|
expected = DataFrame(np.nan, index=df.index, columns=df.columns)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_where_ndframe_align(self):
|
||
|
msg = "Array conditional must be same shape as self"
|
||
|
df = DataFrame([[1, 2, 3], [4, 5, 6]])
|
||
|
|
||
|
cond = [True]
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
df.where(cond)
|
||
|
|
||
|
expected = DataFrame([[1, 2, 3], [np.nan, np.nan, np.nan]])
|
||
|
|
||
|
out = df.where(Series(cond))
|
||
|
tm.assert_frame_equal(out, expected)
|
||
|
|
||
|
cond = np.array([False, True, False, True])
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
df.where(cond)
|
||
|
|
||
|
expected = DataFrame([[np.nan, np.nan, np.nan], [4, 5, 6]])
|
||
|
|
||
|
out = df.where(Series(cond))
|
||
|
tm.assert_frame_equal(out, expected)
|
||
|
|
||
|
def test_where_bug(self):
|
||
|
|
||
|
# GH 2793
|
||
|
|
||
|
df = DataFrame({'a': [1.0, 2.0, 3.0, 4.0], 'b': [
|
||
|
4.0, 3.0, 2.0, 1.0]}, dtype='float64')
|
||
|
expected = DataFrame({'a': [np.nan, np.nan, 3.0, 4.0], 'b': [
|
||
|
4.0, 3.0, np.nan, np.nan]}, dtype='float64')
|
||
|
result = df.where(df > 2, np.nan)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.where(result > 2, np.nan, inplace=True)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# mixed
|
||
|
for dtype in ['int16', 'int8', 'int32', 'int64']:
|
||
|
df = DataFrame({'a': np.array([1, 2, 3, 4], dtype=dtype),
|
||
|
'b': np.array([4.0, 3.0, 2.0, 1.0],
|
||
|
dtype='float64')})
|
||
|
|
||
|
expected = DataFrame({'a': [np.nan, np.nan, 3.0, 4.0],
|
||
|
'b': [4.0, 3.0, np.nan, np.nan]},
|
||
|
dtype='float64')
|
||
|
|
||
|
result = df.where(df > 2, np.nan)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.where(result > 2, np.nan, inplace=True)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# transpositional issue
|
||
|
# GH7506
|
||
|
a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]})
|
||
|
b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]})
|
||
|
do_not_replace = b.isna() | (a > b)
|
||
|
|
||
|
expected = a.copy()
|
||
|
expected[~do_not_replace] = b
|
||
|
|
||
|
result = a.where(do_not_replace, b)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
a = DataFrame({0: [4, 6], 1: [1, 0]})
|
||
|
b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]})
|
||
|
do_not_replace = b.isna() | (a > b)
|
||
|
|
||
|
expected = a.copy()
|
||
|
expected[~do_not_replace] = b
|
||
|
|
||
|
result = a.where(do_not_replace, b)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_where_datetime(self):
|
||
|
|
||
|
# GH 3311
|
||
|
df = DataFrame(dict(A=date_range('20130102', periods=5),
|
||
|
B=date_range('20130104', periods=5),
|
||
|
C=np.random.randn(5)))
|
||
|
|
||
|
stamp = datetime(2013, 1, 3)
|
||
|
result = df[df > stamp]
|
||
|
expected = df.copy()
|
||
|
expected.loc[[0, 1], 'A'] = np.nan
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_where_none(self):
|
||
|
# GH 4667
|
||
|
# setting with None changes dtype
|
||
|
df = DataFrame({'series': Series(range(10))}).astype(float)
|
||
|
df[df > 7] = None
|
||
|
expected = DataFrame(
|
||
|
{'series': Series([0, 1, 2, 3, 4, 5, 6, 7, np.nan, np.nan])})
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
# GH 7656
|
||
|
df = DataFrame([{'A': 1, 'B': np.nan, 'C': 'Test'}, {
|
||
|
'A': np.nan, 'B': 'Test', 'C': np.nan}])
|
||
|
expected = df.where(~isna(df), None)
|
||
|
with tm.assert_raises_regex(TypeError, 'boolean setting '
|
||
|
'on mixed-type'):
|
||
|
df.where(~isna(df), None, inplace=True)
|
||
|
|
||
|
def test_where_align(self):
|
||
|
|
||
|
def create():
|
||
|
df = DataFrame(np.random.randn(10, 3))
|
||
|
df.iloc[3:5, 0] = np.nan
|
||
|
df.iloc[4:6, 1] = np.nan
|
||
|
df.iloc[5:8, 2] = np.nan
|
||
|
return df
|
||
|
|
||
|
# series
|
||
|
df = create()
|
||
|
expected = df.fillna(df.mean())
|
||
|
result = df.where(pd.notna(df), df.mean(), axis='columns')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
df.where(pd.notna(df), df.mean(), inplace=True, axis='columns')
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
df = create().fillna(0)
|
||
|
expected = df.apply(lambda x, y: x.where(x > 0, y), y=df[0])
|
||
|
result = df.where(df > 0, df[0], axis='index')
|
||
|
assert_frame_equal(result, expected)
|
||
|
result = df.where(df > 0, df[0], axis='rows')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# frame
|
||
|
df = create()
|
||
|
expected = df.fillna(1)
|
||
|
result = df.where(pd.notna(df), DataFrame(
|
||
|
1, index=df.index, columns=df.columns))
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_where_complex(self):
|
||
|
# GH 6345
|
||
|
expected = DataFrame(
|
||
|
[[1 + 1j, 2], [np.nan, 4 + 1j]], columns=['a', 'b'])
|
||
|
df = DataFrame([[1 + 1j, 2], [5 + 1j, 4 + 1j]], columns=['a', 'b'])
|
||
|
df[df.abs() >= 5] = np.nan
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_where_axis(self):
|
||
|
# GH 9736
|
||
|
df = DataFrame(np.random.randn(2, 2))
|
||
|
mask = DataFrame([[False, False], [False, False]])
|
||
|
s = Series([0, 1])
|
||
|
|
||
|
expected = DataFrame([[0, 0], [1, 1]], dtype='float64')
|
||
|
result = df.where(mask, s, axis='index')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.where(mask, s, axis='index', inplace=True)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
expected = DataFrame([[0, 1], [0, 1]], dtype='float64')
|
||
|
result = df.where(mask, s, axis='columns')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.where(mask, s, axis='columns', inplace=True)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# Upcast needed
|
||
|
df = DataFrame([[1, 2], [3, 4]], dtype='int64')
|
||
|
mask = DataFrame([[False, False], [False, False]])
|
||
|
s = Series([0, np.nan])
|
||
|
|
||
|
expected = DataFrame([[0, 0], [np.nan, np.nan]], dtype='float64')
|
||
|
result = df.where(mask, s, axis='index')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.where(mask, s, axis='index', inplace=True)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
expected = DataFrame([[0, np.nan], [0, np.nan]])
|
||
|
result = df.where(mask, s, axis='columns')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
expected = DataFrame({0: np.array([0, 0], dtype='int64'),
|
||
|
1: np.array([np.nan, np.nan], dtype='float64')})
|
||
|
result = df.copy()
|
||
|
result.where(mask, s, axis='columns', inplace=True)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# Multiple dtypes (=> multiple Blocks)
|
||
|
df = pd.concat([
|
||
|
DataFrame(np.random.randn(10, 2)),
|
||
|
DataFrame(np.random.randint(0, 10, size=(10, 2)), dtype='int64')],
|
||
|
ignore_index=True, axis=1)
|
||
|
mask = DataFrame(False, columns=df.columns, index=df.index)
|
||
|
s1 = Series(1, index=df.columns)
|
||
|
s2 = Series(2, index=df.index)
|
||
|
|
||
|
result = df.where(mask, s1, axis='columns')
|
||
|
expected = DataFrame(1.0, columns=df.columns, index=df.index)
|
||
|
expected[2] = expected[2].astype('int64')
|
||
|
expected[3] = expected[3].astype('int64')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.where(mask, s1, axis='columns', inplace=True)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.where(mask, s2, axis='index')
|
||
|
expected = DataFrame(2.0, columns=df.columns, index=df.index)
|
||
|
expected[2] = expected[2].astype('int64')
|
||
|
expected[3] = expected[3].astype('int64')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.copy()
|
||
|
result.where(mask, s2, axis='index', inplace=True)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# DataFrame vs DataFrame
|
||
|
d1 = df.copy().drop(1, axis=0)
|
||
|
expected = df.copy()
|
||
|
expected.loc[1, :] = np.nan
|
||
|
|
||
|
result = df.where(mask, d1)
|
||
|
assert_frame_equal(result, expected)
|
||
|
result = df.where(mask, d1, axis='index')
|
||
|
assert_frame_equal(result, expected)
|
||
|
result = df.copy()
|
||
|
result.where(mask, d1, inplace=True)
|
||
|
assert_frame_equal(result, expected)
|
||
|
result = df.copy()
|
||
|
result.where(mask, d1, inplace=True, axis='index')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
d2 = df.copy().drop(1, axis=1)
|
||
|
expected = df.copy()
|
||
|
expected.loc[:, 1] = np.nan
|
||
|
|
||
|
result = df.where(mask, d2)
|
||
|
assert_frame_equal(result, expected)
|
||
|
result = df.where(mask, d2, axis='columns')
|
||
|
assert_frame_equal(result, expected)
|
||
|
result = df.copy()
|
||
|
result.where(mask, d2, inplace=True)
|
||
|
assert_frame_equal(result, expected)
|
||
|
result = df.copy()
|
||
|
result.where(mask, d2, inplace=True, axis='columns')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_where_callable(self):
|
||
|
# GH 12533
|
||
|
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||
|
result = df.where(lambda x: x > 4, lambda x: x + 1)
|
||
|
exp = DataFrame([[2, 3, 4], [5, 5, 6], [7, 8, 9]])
|
||
|
tm.assert_frame_equal(result, exp)
|
||
|
tm.assert_frame_equal(result, df.where(df > 4, df + 1))
|
||
|
|
||
|
# return ndarray and scalar
|
||
|
result = df.where(lambda x: (x % 2 == 0).values, lambda x: 99)
|
||
|
exp = DataFrame([[99, 2, 99], [4, 99, 6], [99, 8, 99]])
|
||
|
tm.assert_frame_equal(result, exp)
|
||
|
tm.assert_frame_equal(result, df.where(df % 2 == 0, 99))
|
||
|
|
||
|
# chain
|
||
|
result = (df + 2).where(lambda x: x > 8, lambda x: x + 10)
|
||
|
exp = DataFrame([[13, 14, 15], [16, 17, 18], [9, 10, 11]])
|
||
|
tm.assert_frame_equal(result, exp)
|
||
|
tm.assert_frame_equal(result,
|
||
|
(df + 2).where((df + 2) > 8, (df + 2) + 10))
|
||
|
|
||
|
def test_mask(self):
|
||
|
df = DataFrame(np.random.randn(5, 3))
|
||
|
cond = df > 0
|
||
|
|
||
|
rs = df.where(cond, np.nan)
|
||
|
assert_frame_equal(rs, df.mask(df <= 0))
|
||
|
assert_frame_equal(rs, df.mask(~cond))
|
||
|
|
||
|
other = DataFrame(np.random.randn(5, 3))
|
||
|
rs = df.where(cond, other)
|
||
|
assert_frame_equal(rs, df.mask(df <= 0, other))
|
||
|
assert_frame_equal(rs, df.mask(~cond, other))
|
||
|
|
||
|
def test_mask_inplace(self):
|
||
|
# GH8801
|
||
|
df = DataFrame(np.random.randn(5, 3))
|
||
|
cond = df > 0
|
||
|
|
||
|
rdf = df.copy()
|
||
|
|
||
|
rdf.where(cond, inplace=True)
|
||
|
assert_frame_equal(rdf, df.where(cond))
|
||
|
assert_frame_equal(rdf, df.mask(~cond))
|
||
|
|
||
|
rdf = df.copy()
|
||
|
rdf.where(cond, -df, inplace=True)
|
||
|
assert_frame_equal(rdf, df.where(cond, -df))
|
||
|
assert_frame_equal(rdf, df.mask(~cond, -df))
|
||
|
|
||
|
def test_mask_edge_case_1xN_frame(self):
|
||
|
# GH4071
|
||
|
df = DataFrame([[1, 2]])
|
||
|
res = df.mask(DataFrame([[True, False]]))
|
||
|
expec = DataFrame([[nan, 2]])
|
||
|
assert_frame_equal(res, expec)
|
||
|
|
||
|
def test_mask_callable(self):
|
||
|
# GH 12533
|
||
|
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
|
||
|
result = df.mask(lambda x: x > 4, lambda x: x + 1)
|
||
|
exp = DataFrame([[1, 2, 3], [4, 6, 7], [8, 9, 10]])
|
||
|
tm.assert_frame_equal(result, exp)
|
||
|
tm.assert_frame_equal(result, df.mask(df > 4, df + 1))
|
||
|
|
||
|
# return ndarray and scalar
|
||
|
result = df.mask(lambda x: (x % 2 == 0).values, lambda x: 99)
|
||
|
exp = DataFrame([[1, 99, 3], [99, 5, 99], [7, 99, 9]])
|
||
|
tm.assert_frame_equal(result, exp)
|
||
|
tm.assert_frame_equal(result, df.mask(df % 2 == 0, 99))
|
||
|
|
||
|
# chain
|
||
|
result = (df + 2).mask(lambda x: x > 8, lambda x: x + 10)
|
||
|
exp = DataFrame([[3, 4, 5], [6, 7, 8], [19, 20, 21]])
|
||
|
tm.assert_frame_equal(result, exp)
|
||
|
tm.assert_frame_equal(result,
|
||
|
(df + 2).mask((df + 2) > 8, (df + 2) + 10))
|
||
|
|
||
|
def test_head_tail(self):
|
||
|
assert_frame_equal(self.frame.head(), self.frame[:5])
|
||
|
assert_frame_equal(self.frame.tail(), self.frame[-5:])
|
||
|
|
||
|
assert_frame_equal(self.frame.head(0), self.frame[0:0])
|
||
|
assert_frame_equal(self.frame.tail(0), self.frame[0:0])
|
||
|
|
||
|
assert_frame_equal(self.frame.head(-1), self.frame[:-1])
|
||
|
assert_frame_equal(self.frame.tail(-1), self.frame[1:])
|
||
|
assert_frame_equal(self.frame.head(1), self.frame[:1])
|
||
|
assert_frame_equal(self.frame.tail(1), self.frame[-1:])
|
||
|
# with a float index
|
||
|
df = self.frame.copy()
|
||
|
df.index = np.arange(len(self.frame)) + 0.1
|
||
|
assert_frame_equal(df.head(), df.iloc[:5])
|
||
|
assert_frame_equal(df.tail(), df.iloc[-5:])
|
||
|
assert_frame_equal(df.head(0), df[0:0])
|
||
|
assert_frame_equal(df.tail(0), df[0:0])
|
||
|
assert_frame_equal(df.head(-1), df.iloc[:-1])
|
||
|
assert_frame_equal(df.tail(-1), df.iloc[1:])
|
||
|
# test empty dataframe
|
||
|
empty_df = DataFrame()
|
||
|
assert_frame_equal(empty_df.tail(), empty_df)
|
||
|
assert_frame_equal(empty_df.head(), empty_df)
|
||
|
|
||
|
def test_type_error_multiindex(self):
|
||
|
# See gh-12218
|
||
|
df = DataFrame(columns=['i', 'c', 'x', 'y'],
|
||
|
data=[[0, 0, 1, 2], [1, 0, 3, 4],
|
||
|
[0, 1, 1, 2], [1, 1, 3, 4]])
|
||
|
dg = df.pivot_table(index='i', columns='c',
|
||
|
values=['x', 'y'])
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, "is an invalid key"):
|
||
|
str(dg[:, 0])
|
||
|
|
||
|
index = Index(range(2), name='i')
|
||
|
columns = MultiIndex(levels=[['x', 'y'], [0, 1]],
|
||
|
labels=[[0, 1], [0, 0]],
|
||
|
names=[None, 'c'])
|
||
|
expected = DataFrame([[1, 2], [3, 4]], columns=columns, index=index)
|
||
|
|
||
|
result = dg.loc[:, (slice(None), 0)]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
name = ('x', 0)
|
||
|
index = Index(range(2), name='i')
|
||
|
expected = Series([1, 3], index=index, name=name)
|
||
|
|
||
|
result = dg['x', 0]
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
class TestDataFrameIndexingDatetimeWithTZ(TestData):
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
self.idx = Index(date_range('20130101', periods=3, tz='US/Eastern'),
|
||
|
name='foo')
|
||
|
self.dr = date_range('20130110', periods=3)
|
||
|
self.df = DataFrame({'A': self.idx, 'B': self.dr})
|
||
|
|
||
|
def test_setitem(self):
|
||
|
|
||
|
df = self.df
|
||
|
idx = self.idx
|
||
|
|
||
|
# setitem
|
||
|
df['C'] = idx
|
||
|
assert_series_equal(df['C'], Series(idx, name='C'))
|
||
|
|
||
|
df['D'] = 'foo'
|
||
|
df['D'] = idx
|
||
|
assert_series_equal(df['D'], Series(idx, name='D'))
|
||
|
del df['D']
|
||
|
|
||
|
# assert that A & C are not sharing the same base (e.g. they
|
||
|
# are copies)
|
||
|
b1 = df._data.blocks[1]
|
||
|
b2 = df._data.blocks[2]
|
||
|
assert b1.values.equals(b2.values)
|
||
|
assert id(b1.values.values.base) != id(b2.values.values.base)
|
||
|
|
||
|
# with nan
|
||
|
df2 = df.copy()
|
||
|
df2.iloc[1, 1] = pd.NaT
|
||
|
df2.iloc[1, 2] = pd.NaT
|
||
|
result = df2['B']
|
||
|
assert_series_equal(notna(result), Series(
|
||
|
[True, False, True], name='B'))
|
||
|
assert_series_equal(df2.dtypes, df.dtypes)
|
||
|
|
||
|
def test_set_reset(self):
|
||
|
|
||
|
idx = self.idx
|
||
|
|
||
|
# set/reset
|
||
|
df = DataFrame({'A': [0, 1, 2]}, index=idx)
|
||
|
result = df.reset_index()
|
||
|
assert result['foo'].dtype, 'M8[ns, US/Eastern'
|
||
|
|
||
|
df = result.set_index('foo')
|
||
|
tm.assert_index_equal(df.index, idx)
|
||
|
|
||
|
def test_transpose(self):
|
||
|
|
||
|
result = self.df.T
|
||
|
expected = DataFrame(self.df.values.T)
|
||
|
expected.index = ['A', 'B']
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
class TestDataFrameIndexingUInt64(TestData):
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
self.ir = Index(np.arange(3), dtype=np.uint64)
|
||
|
self.idx = Index([2**63, 2**63 + 5, 2**63 + 10], name='foo')
|
||
|
|
||
|
self.df = DataFrame({'A': self.idx, 'B': self.ir})
|
||
|
|
||
|
def test_setitem(self):
|
||
|
|
||
|
df = self.df
|
||
|
idx = self.idx
|
||
|
|
||
|
# setitem
|
||
|
df['C'] = idx
|
||
|
assert_series_equal(df['C'], Series(idx, name='C'))
|
||
|
|
||
|
df['D'] = 'foo'
|
||
|
df['D'] = idx
|
||
|
assert_series_equal(df['D'], Series(idx, name='D'))
|
||
|
del df['D']
|
||
|
|
||
|
# With NaN: because uint64 has no NaN element,
|
||
|
# the column should be cast to object.
|
||
|
df2 = df.copy()
|
||
|
df2.iloc[1, 1] = pd.NaT
|
||
|
df2.iloc[1, 2] = pd.NaT
|
||
|
result = df2['B']
|
||
|
assert_series_equal(notna(result), Series(
|
||
|
[True, False, True], name='B'))
|
||
|
assert_series_equal(df2.dtypes, Series([np.dtype('uint64'),
|
||
|
np.dtype('O'), np.dtype('O')],
|
||
|
index=['A', 'B', 'C']))
|
||
|
|
||
|
def test_set_reset(self):
|
||
|
|
||
|
idx = self.idx
|
||
|
|
||
|
# set/reset
|
||
|
df = DataFrame({'A': [0, 1, 2]}, index=idx)
|
||
|
result = df.reset_index()
|
||
|
assert result['foo'].dtype == np.dtype('uint64')
|
||
|
|
||
|
df = result.set_index('foo')
|
||
|
tm.assert_index_equal(df.index, idx)
|
||
|
|
||
|
def test_transpose(self):
|
||
|
|
||
|
result = self.df.T
|
||
|
expected = DataFrame(self.df.values.T)
|
||
|
expected.index = ['A', 'B']
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
class TestDataFrameIndexingCategorical(object):
|
||
|
|
||
|
def test_assignment(self):
|
||
|
# assignment
|
||
|
df = DataFrame({'value': np.array(
|
||
|
np.random.randint(0, 10000, 100), dtype='int32')})
|
||
|
labels = Categorical(["{0} - {1}".format(i, i + 499)
|
||
|
for i in range(0, 10000, 500)])
|
||
|
|
||
|
df = df.sort_values(by=['value'], ascending=True)
|
||
|
s = pd.cut(df.value, range(0, 10500, 500), right=False, labels=labels)
|
||
|
d = s.values
|
||
|
df['D'] = d
|
||
|
str(df)
|
||
|
|
||
|
result = df.dtypes
|
||
|
expected = Series(
|
||
|
[np.dtype('int32'), CategoricalDtype(categories=labels,
|
||
|
ordered=False)],
|
||
|
index=['value', 'D'])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
df['E'] = s
|
||
|
str(df)
|
||
|
|
||
|
result = df.dtypes
|
||
|
expected = Series([np.dtype('int32'),
|
||
|
CategoricalDtype(categories=labels, ordered=False),
|
||
|
CategoricalDtype(categories=labels, ordered=False)],
|
||
|
index=['value', 'D', 'E'])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result1 = df['D']
|
||
|
result2 = df['E']
|
||
|
tm.assert_categorical_equal(result1._data._block.values, d)
|
||
|
|
||
|
# sorting
|
||
|
s.name = 'E'
|
||
|
tm.assert_series_equal(result2.sort_index(), s.sort_index())
|
||
|
|
||
|
cat = Categorical([1, 2, 3, 10], categories=[1, 2, 3, 4, 10])
|
||
|
df = DataFrame(Series(cat))
|
||
|
|
||
|
def test_assigning_ops(self):
|
||
|
# systematically test the assigning operations:
|
||
|
# for all slicing ops:
|
||
|
# for value in categories and value not in categories:
|
||
|
|
||
|
# - assign a single value -> exp_single_cats_value
|
||
|
|
||
|
# - assign a complete row (mixed values) -> exp_single_row
|
||
|
|
||
|
# assign multiple rows (mixed values) (-> array) -> exp_multi_row
|
||
|
|
||
|
# assign a part of a column with dtype == categorical ->
|
||
|
# exp_parts_cats_col
|
||
|
|
||
|
# assign a part of a column with dtype != categorical ->
|
||
|
# exp_parts_cats_col
|
||
|
|
||
|
cats = Categorical(["a", "a", "a", "a", "a", "a", "a"],
|
||
|
categories=["a", "b"])
|
||
|
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||
|
values = [1, 1, 1, 1, 1, 1, 1]
|
||
|
orig = DataFrame({"cats": cats, "values": values}, index=idx)
|
||
|
|
||
|
# the expected values
|
||
|
# changed single row
|
||
|
cats1 = Categorical(["a", "a", "b", "a", "a", "a", "a"],
|
||
|
categories=["a", "b"])
|
||
|
idx1 = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||
|
values1 = [1, 1, 2, 1, 1, 1, 1]
|
||
|
exp_single_row = DataFrame({"cats": cats1,
|
||
|
"values": values1}, index=idx1)
|
||
|
|
||
|
# changed multiple rows
|
||
|
cats2 = Categorical(["a", "a", "b", "b", "a", "a", "a"],
|
||
|
categories=["a", "b"])
|
||
|
idx2 = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||
|
values2 = [1, 1, 2, 2, 1, 1, 1]
|
||
|
exp_multi_row = DataFrame({"cats": cats2,
|
||
|
"values": values2}, index=idx2)
|
||
|
|
||
|
# changed part of the cats column
|
||
|
cats3 = Categorical(
|
||
|
["a", "a", "b", "b", "a", "a", "a"], categories=["a", "b"])
|
||
|
idx3 = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||
|
values3 = [1, 1, 1, 1, 1, 1, 1]
|
||
|
exp_parts_cats_col = DataFrame({"cats": cats3,
|
||
|
"values": values3}, index=idx3)
|
||
|
|
||
|
# changed single value in cats col
|
||
|
cats4 = Categorical(
|
||
|
["a", "a", "b", "a", "a", "a", "a"], categories=["a", "b"])
|
||
|
idx4 = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||
|
values4 = [1, 1, 1, 1, 1, 1, 1]
|
||
|
exp_single_cats_value = DataFrame({"cats": cats4,
|
||
|
"values": values4}, index=idx4)
|
||
|
|
||
|
# iloc
|
||
|
# ###############
|
||
|
# - assign a single value -> exp_single_cats_value
|
||
|
df = orig.copy()
|
||
|
df.iloc[2, 0] = "b"
|
||
|
tm.assert_frame_equal(df, exp_single_cats_value)
|
||
|
|
||
|
df = orig.copy()
|
||
|
df.iloc[df.index == "j", 0] = "b"
|
||
|
tm.assert_frame_equal(df, exp_single_cats_value)
|
||
|
|
||
|
# - assign a single value not in the current categories set
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.iloc[2, 0] = "c"
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# - assign a complete row (mixed values) -> exp_single_row
|
||
|
df = orig.copy()
|
||
|
df.iloc[2, :] = ["b", 2]
|
||
|
tm.assert_frame_equal(df, exp_single_row)
|
||
|
|
||
|
# - assign a complete row (mixed values) not in categories set
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.iloc[2, :] = ["c", 2]
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# - assign multiple rows (mixed values) -> exp_multi_row
|
||
|
df = orig.copy()
|
||
|
df.iloc[2:4, :] = [["b", 2], ["b", 2]]
|
||
|
tm.assert_frame_equal(df, exp_multi_row)
|
||
|
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.iloc[2:4, :] = [["c", 2], ["c", 2]]
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# assign a part of a column with dtype == categorical ->
|
||
|
# exp_parts_cats_col
|
||
|
df = orig.copy()
|
||
|
df.iloc[2:4, 0] = Categorical(["b", "b"], categories=["a", "b"])
|
||
|
tm.assert_frame_equal(df, exp_parts_cats_col)
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
# different categories -> not sure if this should fail or pass
|
||
|
df = orig.copy()
|
||
|
df.iloc[2:4, 0] = Categorical(list('bb'), categories=list('abc'))
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
# different values
|
||
|
df = orig.copy()
|
||
|
df.iloc[2:4, 0] = Categorical(list('cc'), categories=list('abc'))
|
||
|
|
||
|
# assign a part of a column with dtype != categorical ->
|
||
|
# exp_parts_cats_col
|
||
|
df = orig.copy()
|
||
|
df.iloc[2:4, 0] = ["b", "b"]
|
||
|
tm.assert_frame_equal(df, exp_parts_cats_col)
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
df.iloc[2:4, 0] = ["c", "c"]
|
||
|
|
||
|
# loc
|
||
|
# ##############
|
||
|
# - assign a single value -> exp_single_cats_value
|
||
|
df = orig.copy()
|
||
|
df.loc["j", "cats"] = "b"
|
||
|
tm.assert_frame_equal(df, exp_single_cats_value)
|
||
|
|
||
|
df = orig.copy()
|
||
|
df.loc[df.index == "j", "cats"] = "b"
|
||
|
tm.assert_frame_equal(df, exp_single_cats_value)
|
||
|
|
||
|
# - assign a single value not in the current categories set
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.loc["j", "cats"] = "c"
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# - assign a complete row (mixed values) -> exp_single_row
|
||
|
df = orig.copy()
|
||
|
df.loc["j", :] = ["b", 2]
|
||
|
tm.assert_frame_equal(df, exp_single_row)
|
||
|
|
||
|
# - assign a complete row (mixed values) not in categories set
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.loc["j", :] = ["c", 2]
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# - assign multiple rows (mixed values) -> exp_multi_row
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", :] = [["b", 2], ["b", 2]]
|
||
|
tm.assert_frame_equal(df, exp_multi_row)
|
||
|
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", :] = [["c", 2], ["c", 2]]
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# assign a part of a column with dtype == categorical ->
|
||
|
# exp_parts_cats_col
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", "cats"] = Categorical(
|
||
|
["b", "b"], categories=["a", "b"])
|
||
|
tm.assert_frame_equal(df, exp_parts_cats_col)
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
# different categories -> not sure if this should fail or pass
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", "cats"] = Categorical(
|
||
|
["b", "b"], categories=["a", "b", "c"])
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
# different values
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", "cats"] = Categorical(
|
||
|
["c", "c"], categories=["a", "b", "c"])
|
||
|
|
||
|
# assign a part of a column with dtype != categorical ->
|
||
|
# exp_parts_cats_col
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", "cats"] = ["b", "b"]
|
||
|
tm.assert_frame_equal(df, exp_parts_cats_col)
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
df.loc["j":"k", "cats"] = ["c", "c"]
|
||
|
|
||
|
# loc
|
||
|
# ##############
|
||
|
# - assign a single value -> exp_single_cats_value
|
||
|
df = orig.copy()
|
||
|
df.loc["j", df.columns[0]] = "b"
|
||
|
tm.assert_frame_equal(df, exp_single_cats_value)
|
||
|
|
||
|
df = orig.copy()
|
||
|
df.loc[df.index == "j", df.columns[0]] = "b"
|
||
|
tm.assert_frame_equal(df, exp_single_cats_value)
|
||
|
|
||
|
# - assign a single value not in the current categories set
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.loc["j", df.columns[0]] = "c"
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# - assign a complete row (mixed values) -> exp_single_row
|
||
|
df = orig.copy()
|
||
|
df.loc["j", :] = ["b", 2]
|
||
|
tm.assert_frame_equal(df, exp_single_row)
|
||
|
|
||
|
# - assign a complete row (mixed values) not in categories set
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.loc["j", :] = ["c", 2]
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# - assign multiple rows (mixed values) -> exp_multi_row
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", :] = [["b", 2], ["b", 2]]
|
||
|
tm.assert_frame_equal(df, exp_multi_row)
|
||
|
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", :] = [["c", 2], ["c", 2]]
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# assign a part of a column with dtype == categorical ->
|
||
|
# exp_parts_cats_col
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", df.columns[0]] = Categorical(
|
||
|
["b", "b"], categories=["a", "b"])
|
||
|
tm.assert_frame_equal(df, exp_parts_cats_col)
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
# different categories -> not sure if this should fail or pass
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", df.columns[0]] = Categorical(
|
||
|
["b", "b"], categories=["a", "b", "c"])
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
# different values
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", df.columns[0]] = Categorical(
|
||
|
["c", "c"], categories=["a", "b", "c"])
|
||
|
|
||
|
# assign a part of a column with dtype != categorical ->
|
||
|
# exp_parts_cats_col
|
||
|
df = orig.copy()
|
||
|
df.loc["j":"k", df.columns[0]] = ["b", "b"]
|
||
|
tm.assert_frame_equal(df, exp_parts_cats_col)
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
df.loc["j":"k", df.columns[0]] = ["c", "c"]
|
||
|
|
||
|
# iat
|
||
|
df = orig.copy()
|
||
|
df.iat[2, 0] = "b"
|
||
|
tm.assert_frame_equal(df, exp_single_cats_value)
|
||
|
|
||
|
# - assign a single value not in the current categories set
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.iat[2, 0] = "c"
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# at
|
||
|
# - assign a single value -> exp_single_cats_value
|
||
|
df = orig.copy()
|
||
|
df.at["j", "cats"] = "b"
|
||
|
tm.assert_frame_equal(df, exp_single_cats_value)
|
||
|
|
||
|
# - assign a single value not in the current categories set
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.at["j", "cats"] = "c"
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# fancy indexing
|
||
|
catsf = Categorical(["a", "a", "c", "c", "a", "a", "a"],
|
||
|
categories=["a", "b", "c"])
|
||
|
idxf = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||
|
valuesf = [1, 1, 3, 3, 1, 1, 1]
|
||
|
df = DataFrame({"cats": catsf, "values": valuesf}, index=idxf)
|
||
|
|
||
|
exp_fancy = exp_multi_row.copy()
|
||
|
exp_fancy["cats"].cat.set_categories(["a", "b", "c"], inplace=True)
|
||
|
|
||
|
df[df["cats"] == "c"] = ["b", 2]
|
||
|
# category c is kept in .categories
|
||
|
tm.assert_frame_equal(df, exp_fancy)
|
||
|
|
||
|
# set_value
|
||
|
df = orig.copy()
|
||
|
df.at["j", "cats"] = "b"
|
||
|
tm.assert_frame_equal(df, exp_single_cats_value)
|
||
|
|
||
|
def f():
|
||
|
df = orig.copy()
|
||
|
df.at["j", "cats"] = "c"
|
||
|
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
# Assigning a Category to parts of a int/... column uses the values of
|
||
|
# the Catgorical
|
||
|
df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")})
|
||
|
exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")})
|
||
|
df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
|
||
|
df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
|
||
|
tm.assert_frame_equal(df, exp)
|
||
|
|
||
|
def test_functions_no_warnings(self):
|
||
|
df = DataFrame({'value': np.random.randint(0, 100, 20)})
|
||
|
labels = ["{0} - {1}".format(i, i + 9) for i in range(0, 100, 10)]
|
||
|
with tm.assert_produces_warning(False):
|
||
|
df['group'] = pd.cut(df.value, range(0, 105, 10), right=False,
|
||
|
labels=labels)
|