1167 lines
44 KiB
Python
1167 lines
44 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import inspect
|
||
|
import pytest
|
||
|
|
||
|
from datetime import datetime, timedelta
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from pandas.compat import lrange, PY2
|
||
|
from pandas import (DataFrame, Series, Index, MultiIndex,
|
||
|
RangeIndex, date_range, IntervalIndex,
|
||
|
to_datetime)
|
||
|
from pandas.core.dtypes.common import (
|
||
|
is_object_dtype,
|
||
|
is_categorical_dtype,
|
||
|
is_interval_dtype)
|
||
|
import pandas as pd
|
||
|
|
||
|
from pandas.util.testing import assert_series_equal, assert_frame_equal
|
||
|
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
from pandas.tests.frame.common import TestData
|
||
|
|
||
|
|
||
|
class TestDataFrameAlterAxes(TestData):
|
||
|
|
||
|
def test_set_index(self):
|
||
|
idx = Index(np.arange(len(self.mixed_frame)))
|
||
|
|
||
|
# cache it
|
||
|
_ = self.mixed_frame['foo'] # noqa
|
||
|
self.mixed_frame.index = idx
|
||
|
assert self.mixed_frame['foo'].index is idx
|
||
|
with tm.assert_raises_regex(ValueError, 'Length mismatch'):
|
||
|
self.mixed_frame.index = idx[::2]
|
||
|
|
||
|
def test_set_index_cast(self):
|
||
|
|
||
|
# issue casting an index then set_index
|
||
|
df = DataFrame({'A': [1.1, 2.2, 3.3], 'B': [5.0, 6.1, 7.2]},
|
||
|
index=[2010, 2011, 2012])
|
||
|
expected = df.loc[2010]
|
||
|
new_index = df.index.astype(np.int32)
|
||
|
df.index = new_index
|
||
|
result = df.loc[2010]
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_set_index2(self):
|
||
|
df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
|
||
|
'B': ['one', 'two', 'three', 'one', 'two'],
|
||
|
'C': ['a', 'b', 'c', 'd', 'e'],
|
||
|
'D': np.random.randn(5),
|
||
|
'E': np.random.randn(5)})
|
||
|
|
||
|
# new object, single-column
|
||
|
result = df.set_index('C')
|
||
|
result_nodrop = df.set_index('C', drop=False)
|
||
|
|
||
|
index = Index(df['C'], name='C')
|
||
|
|
||
|
expected = df.loc[:, ['A', 'B', 'D', 'E']]
|
||
|
expected.index = index
|
||
|
|
||
|
expected_nodrop = df.copy()
|
||
|
expected_nodrop.index = index
|
||
|
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert_frame_equal(result_nodrop, expected_nodrop)
|
||
|
assert result.index.name == index.name
|
||
|
|
||
|
# inplace, single
|
||
|
df2 = df.copy()
|
||
|
|
||
|
df2.set_index('C', inplace=True)
|
||
|
|
||
|
assert_frame_equal(df2, expected)
|
||
|
|
||
|
df3 = df.copy()
|
||
|
df3.set_index('C', drop=False, inplace=True)
|
||
|
|
||
|
assert_frame_equal(df3, expected_nodrop)
|
||
|
|
||
|
# create new object, multi-column
|
||
|
result = df.set_index(['A', 'B'])
|
||
|
result_nodrop = df.set_index(['A', 'B'], drop=False)
|
||
|
|
||
|
index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B'])
|
||
|
|
||
|
expected = df.loc[:, ['C', 'D', 'E']]
|
||
|
expected.index = index
|
||
|
|
||
|
expected_nodrop = df.copy()
|
||
|
expected_nodrop.index = index
|
||
|
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert_frame_equal(result_nodrop, expected_nodrop)
|
||
|
assert result.index.names == index.names
|
||
|
|
||
|
# inplace
|
||
|
df2 = df.copy()
|
||
|
df2.set_index(['A', 'B'], inplace=True)
|
||
|
assert_frame_equal(df2, expected)
|
||
|
|
||
|
df3 = df.copy()
|
||
|
df3.set_index(['A', 'B'], drop=False, inplace=True)
|
||
|
assert_frame_equal(df3, expected_nodrop)
|
||
|
|
||
|
# corner case
|
||
|
with tm.assert_raises_regex(ValueError,
|
||
|
'Index has duplicate keys'):
|
||
|
df.set_index('A', verify_integrity=True)
|
||
|
|
||
|
# append
|
||
|
result = df.set_index(['A', 'B'], append=True)
|
||
|
xp = df.reset_index().set_index(['index', 'A', 'B'])
|
||
|
xp.index.names = [None, 'A', 'B']
|
||
|
assert_frame_equal(result, xp)
|
||
|
|
||
|
# append to existing multiindex
|
||
|
rdf = df.set_index(['A'], append=True)
|
||
|
rdf = rdf.set_index(['B', 'C'], append=True)
|
||
|
expected = df.set_index(['A', 'B', 'C'], append=True)
|
||
|
assert_frame_equal(rdf, expected)
|
||
|
|
||
|
# Series
|
||
|
result = df.set_index(df.C)
|
||
|
assert result.index.name == 'C'
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
'level', ['a', pd.Series(range(0, 8, 2), name='a')])
|
||
|
def test_set_index_duplicate_names(self, level):
|
||
|
# GH18872 - GH19029
|
||
|
df = pd.DataFrame(np.arange(8).reshape(4, 2), columns=['a', 'b'])
|
||
|
|
||
|
# Pass an existing level name:
|
||
|
df.index.name = 'a'
|
||
|
expected = pd.MultiIndex.from_tuples([(0, 0), (1, 2), (2, 4), (3, 6)],
|
||
|
names=['a', 'a'])
|
||
|
result = df.set_index(level, append=True)
|
||
|
tm.assert_index_equal(result.index, expected)
|
||
|
result = df.set_index([level], append=True)
|
||
|
tm.assert_index_equal(result.index, expected)
|
||
|
|
||
|
# Pass twice the same level name (only works with passing actual data)
|
||
|
if isinstance(level, pd.Series):
|
||
|
result = df.set_index([level, level])
|
||
|
expected = pd.MultiIndex.from_tuples(
|
||
|
[(0, 0), (2, 2), (4, 4), (6, 6)], names=['a', 'a'])
|
||
|
tm.assert_index_equal(result.index, expected)
|
||
|
|
||
|
def test_set_index_nonuniq(self):
|
||
|
df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
|
||
|
'B': ['one', 'two', 'three', 'one', 'two'],
|
||
|
'C': ['a', 'b', 'c', 'd', 'e'],
|
||
|
'D': np.random.randn(5),
|
||
|
'E': np.random.randn(5)})
|
||
|
with tm.assert_raises_regex(ValueError,
|
||
|
'Index has duplicate keys'):
|
||
|
df.set_index('A', verify_integrity=True, inplace=True)
|
||
|
assert 'A' in df
|
||
|
|
||
|
def test_set_index_bug(self):
|
||
|
# GH1590
|
||
|
df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']})
|
||
|
xp = DataFrame({'val': [1, 2]},
|
||
|
Index(['b', 'c'], name='key'))
|
||
|
|
||
|
df2 = df.loc[df.index.map(lambda indx: indx >= 1)]
|
||
|
rs = df2.set_index('key')
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
def test_set_index_pass_arrays(self):
|
||
|
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
|
||
|
'foo', 'bar', 'foo', 'foo'],
|
||
|
'B': ['one', 'one', 'two', 'three',
|
||
|
'two', 'two', 'one', 'three'],
|
||
|
'C': np.random.randn(8),
|
||
|
'D': np.random.randn(8)})
|
||
|
|
||
|
# multiple columns
|
||
|
result = df.set_index(['A', df['B'].values], drop=False)
|
||
|
expected = df.set_index(['A', 'B'], drop=False)
|
||
|
|
||
|
# TODO should set_index check_names ?
|
||
|
assert_frame_equal(result, expected, check_names=False)
|
||
|
|
||
|
def test_construction_with_categorical_index(self):
|
||
|
|
||
|
ci = tm.makeCategoricalIndex(10)
|
||
|
|
||
|
# with Categorical
|
||
|
df = DataFrame({'A': np.random.randn(10),
|
||
|
'B': ci.values})
|
||
|
idf = df.set_index('B')
|
||
|
str(idf)
|
||
|
tm.assert_index_equal(idf.index, ci, check_names=False)
|
||
|
assert idf.index.name == 'B'
|
||
|
|
||
|
# from a CategoricalIndex
|
||
|
df = DataFrame({'A': np.random.randn(10),
|
||
|
'B': ci})
|
||
|
idf = df.set_index('B')
|
||
|
str(idf)
|
||
|
tm.assert_index_equal(idf.index, ci, check_names=False)
|
||
|
assert idf.index.name == 'B'
|
||
|
|
||
|
idf = df.set_index('B').reset_index().set_index('B')
|
||
|
str(idf)
|
||
|
tm.assert_index_equal(idf.index, ci, check_names=False)
|
||
|
assert idf.index.name == 'B'
|
||
|
|
||
|
new_df = idf.reset_index()
|
||
|
new_df.index = df.B
|
||
|
tm.assert_index_equal(new_df.index, ci, check_names=False)
|
||
|
assert idf.index.name == 'B'
|
||
|
|
||
|
def test_set_index_cast_datetimeindex(self):
|
||
|
df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i)
|
||
|
for i in range(1000)],
|
||
|
'B': np.random.randn(1000)})
|
||
|
|
||
|
idf = df.set_index('A')
|
||
|
assert isinstance(idf.index, pd.DatetimeIndex)
|
||
|
|
||
|
# don't cast a DatetimeIndex WITH a tz, leave as object
|
||
|
# GH 6032
|
||
|
i = (pd.DatetimeIndex(
|
||
|
to_datetime(['2013-1-1 13:00',
|
||
|
'2013-1-2 14:00'], errors="raise"))
|
||
|
.tz_localize('US/Pacific'))
|
||
|
df = DataFrame(np.random.randn(2, 1), columns=['A'])
|
||
|
|
||
|
expected = Series(np.array([pd.Timestamp('2013-01-01 13:00:00-0800',
|
||
|
tz='US/Pacific'),
|
||
|
pd.Timestamp('2013-01-02 14:00:00-0800',
|
||
|
tz='US/Pacific')],
|
||
|
dtype="object"))
|
||
|
|
||
|
# convert index to series
|
||
|
result = Series(i)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# assignt to frame
|
||
|
df['B'] = i
|
||
|
result = df['B']
|
||
|
assert_series_equal(result, expected, check_names=False)
|
||
|
assert result.name == 'B'
|
||
|
|
||
|
# keep the timezone
|
||
|
result = i.to_series(keep_tz=True)
|
||
|
assert_series_equal(result.reset_index(drop=True), expected)
|
||
|
|
||
|
# convert to utc
|
||
|
df['C'] = i.to_series().reset_index(drop=True)
|
||
|
result = df['C']
|
||
|
comp = pd.DatetimeIndex(expected.values)
|
||
|
comp = comp.tz_localize(None)
|
||
|
tm.assert_numpy_array_equal(result.values, comp.values)
|
||
|
|
||
|
# list of datetimes with a tz
|
||
|
df['D'] = i.to_pydatetime()
|
||
|
result = df['D']
|
||
|
assert_series_equal(result, expected, check_names=False)
|
||
|
assert result.name == 'D'
|
||
|
|
||
|
# GH 6785
|
||
|
# set the index manually
|
||
|
import pytz
|
||
|
df = DataFrame(
|
||
|
[{'ts': datetime(2014, 4, 1, tzinfo=pytz.utc), 'foo': 1}])
|
||
|
expected = df.set_index('ts')
|
||
|
df.index = df['ts']
|
||
|
df.pop('ts')
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_reset_index_tz(self, tz_aware_fixture):
|
||
|
# GH 3950
|
||
|
# reset_index with single level
|
||
|
tz = tz_aware_fixture
|
||
|
idx = pd.date_range('1/1/2011', periods=5,
|
||
|
freq='D', tz=tz, name='idx')
|
||
|
df = pd.DataFrame(
|
||
|
{'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx)
|
||
|
|
||
|
expected = pd.DataFrame({'idx': [datetime(2011, 1, 1),
|
||
|
datetime(2011, 1, 2),
|
||
|
datetime(2011, 1, 3),
|
||
|
datetime(2011, 1, 4),
|
||
|
datetime(2011, 1, 5)],
|
||
|
'a': range(5),
|
||
|
'b': ['A', 'B', 'C', 'D', 'E']},
|
||
|
columns=['idx', 'a', 'b'])
|
||
|
expected['idx'] = expected['idx'].apply(
|
||
|
lambda d: pd.Timestamp(d, tz=tz))
|
||
|
assert_frame_equal(df.reset_index(), expected)
|
||
|
|
||
|
def test_set_index_timezone(self):
|
||
|
# GH 12358
|
||
|
# tz-aware Series should retain the tz
|
||
|
i = pd.to_datetime(["2014-01-01 10:10:10"],
|
||
|
utc=True).tz_convert('Europe/Rome')
|
||
|
df = DataFrame({'i': i})
|
||
|
assert df.set_index(i).index[0].hour == 11
|
||
|
assert pd.DatetimeIndex(pd.Series(df.i))[0].hour == 11
|
||
|
assert df.set_index(df.i).index[0].hour == 11
|
||
|
|
||
|
def test_set_index_dst(self):
|
||
|
di = pd.date_range('2006-10-29 00:00:00', periods=3,
|
||
|
freq='H', tz='US/Pacific')
|
||
|
|
||
|
df = pd.DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]},
|
||
|
index=di).reset_index()
|
||
|
# single level
|
||
|
res = df.set_index('index')
|
||
|
exp = pd.DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]},
|
||
|
index=pd.Index(di, name='index'))
|
||
|
tm.assert_frame_equal(res, exp)
|
||
|
|
||
|
# GH 12920
|
||
|
res = df.set_index(['index', 'a'])
|
||
|
exp_index = pd.MultiIndex.from_arrays([di, [0, 1, 2]],
|
||
|
names=['index', 'a'])
|
||
|
exp = pd.DataFrame({'b': [3, 4, 5]}, index=exp_index)
|
||
|
tm.assert_frame_equal(res, exp)
|
||
|
|
||
|
def test_reset_index_with_intervals(self):
|
||
|
idx = pd.IntervalIndex.from_breaks(np.arange(11), name='x')
|
||
|
original = pd.DataFrame({'x': idx, 'y': np.arange(10)})[['x', 'y']]
|
||
|
|
||
|
result = original.set_index('x')
|
||
|
expected = pd.DataFrame({'y': np.arange(10)}, index=idx)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result2 = result.reset_index()
|
||
|
assert_frame_equal(result2, original)
|
||
|
|
||
|
def test_set_index_multiindexcolumns(self):
|
||
|
columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)])
|
||
|
df = DataFrame(np.random.randn(3, 3), columns=columns)
|
||
|
rs = df.set_index(df.columns[0])
|
||
|
xp = df.iloc[:, 1:]
|
||
|
xp.index = df.iloc[:, 0].values
|
||
|
xp.index.names = [df.columns[0]]
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
def test_set_index_empty_column(self):
|
||
|
# #1971
|
||
|
df = DataFrame([
|
||
|
dict(a=1, p=0),
|
||
|
dict(a=2, m=10),
|
||
|
dict(a=3, m=11, p=20),
|
||
|
dict(a=4, m=12, p=21)
|
||
|
], columns=('a', 'm', 'p', 'x'))
|
||
|
|
||
|
# it works!
|
||
|
result = df.set_index(['a', 'x'])
|
||
|
repr(result)
|
||
|
|
||
|
def test_set_columns(self):
|
||
|
cols = Index(np.arange(len(self.mixed_frame.columns)))
|
||
|
self.mixed_frame.columns = cols
|
||
|
with tm.assert_raises_regex(ValueError, 'Length mismatch'):
|
||
|
self.mixed_frame.columns = cols[::2]
|
||
|
|
||
|
def test_dti_set_index_reindex(self):
|
||
|
# GH 6631
|
||
|
df = DataFrame(np.random.random(6))
|
||
|
idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern')
|
||
|
idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo')
|
||
|
|
||
|
df = df.set_index(idx1)
|
||
|
tm.assert_index_equal(df.index, idx1)
|
||
|
df = df.reindex(idx2)
|
||
|
tm.assert_index_equal(df.index, idx2)
|
||
|
|
||
|
# 11314
|
||
|
# with tz
|
||
|
index = date_range(datetime(2015, 10, 1),
|
||
|
datetime(2015, 10, 1, 23),
|
||
|
freq='H', tz='US/Eastern')
|
||
|
df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index)
|
||
|
new_index = date_range(datetime(2015, 10, 2),
|
||
|
datetime(2015, 10, 2, 23),
|
||
|
freq='H', tz='US/Eastern')
|
||
|
|
||
|
# TODO: unused?
|
||
|
result = df.set_index(new_index) # noqa
|
||
|
|
||
|
assert new_index.freq == index.freq
|
||
|
|
||
|
# Renaming
|
||
|
|
||
|
def test_rename(self):
|
||
|
mapping = {
|
||
|
'A': 'a',
|
||
|
'B': 'b',
|
||
|
'C': 'c',
|
||
|
'D': 'd'
|
||
|
}
|
||
|
|
||
|
renamed = self.frame.rename(columns=mapping)
|
||
|
renamed2 = self.frame.rename(columns=str.lower)
|
||
|
|
||
|
assert_frame_equal(renamed, renamed2)
|
||
|
assert_frame_equal(renamed2.rename(columns=str.upper),
|
||
|
self.frame, check_names=False)
|
||
|
|
||
|
# index
|
||
|
data = {
|
||
|
'A': {'foo': 0, 'bar': 1}
|
||
|
}
|
||
|
|
||
|
# gets sorted alphabetical
|
||
|
df = DataFrame(data)
|
||
|
renamed = df.rename(index={'foo': 'bar', 'bar': 'foo'})
|
||
|
tm.assert_index_equal(renamed.index, pd.Index(['foo', 'bar']))
|
||
|
|
||
|
renamed = df.rename(index=str.upper)
|
||
|
tm.assert_index_equal(renamed.index, pd.Index(['BAR', 'FOO']))
|
||
|
|
||
|
# have to pass something
|
||
|
pytest.raises(TypeError, self.frame.rename)
|
||
|
|
||
|
# partial columns
|
||
|
renamed = self.frame.rename(columns={'C': 'foo', 'D': 'bar'})
|
||
|
tm.assert_index_equal(renamed.columns,
|
||
|
pd.Index(['A', 'B', 'foo', 'bar']))
|
||
|
|
||
|
# other axis
|
||
|
renamed = self.frame.T.rename(index={'C': 'foo', 'D': 'bar'})
|
||
|
tm.assert_index_equal(renamed.index,
|
||
|
pd.Index(['A', 'B', 'foo', 'bar']))
|
||
|
|
||
|
# index with name
|
||
|
index = Index(['foo', 'bar'], name='name')
|
||
|
renamer = DataFrame(data, index=index)
|
||
|
renamed = renamer.rename(index={'foo': 'bar', 'bar': 'foo'})
|
||
|
tm.assert_index_equal(renamed.index,
|
||
|
pd.Index(['bar', 'foo'], name='name'))
|
||
|
assert renamed.index.name == renamer.index.name
|
||
|
|
||
|
def test_rename_axis_inplace(self):
|
||
|
# GH 15704
|
||
|
frame = self.frame.copy()
|
||
|
expected = frame.rename_axis('foo')
|
||
|
result = frame.copy()
|
||
|
no_return = result.rename_axis('foo', inplace=True)
|
||
|
|
||
|
assert no_return is None
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
expected = frame.rename_axis('bar', axis=1)
|
||
|
result = frame.copy()
|
||
|
no_return = result.rename_axis('bar', axis=1, inplace=True)
|
||
|
|
||
|
assert no_return is None
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_rename_axis_warns(self):
|
||
|
# https://github.com/pandas-dev/pandas/issues/17833
|
||
|
df = pd.DataFrame({"A": [1, 2], "B": [1, 2]})
|
||
|
with tm.assert_produces_warning(FutureWarning) as w:
|
||
|
df.rename_axis(id, axis=0)
|
||
|
assert 'rename' in str(w[0].message)
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning) as w:
|
||
|
df.rename_axis({0: 10, 1: 20}, axis=0)
|
||
|
assert 'rename' in str(w[0].message)
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning) as w:
|
||
|
df.rename_axis(id, axis=1)
|
||
|
assert 'rename' in str(w[0].message)
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning) as w:
|
||
|
df['A'].rename_axis(id)
|
||
|
assert 'rename' in str(w[0].message)
|
||
|
|
||
|
def test_rename_multiindex(self):
|
||
|
|
||
|
tuples_index = [('foo1', 'bar1'), ('foo2', 'bar2')]
|
||
|
tuples_columns = [('fizz1', 'buzz1'), ('fizz2', 'buzz2')]
|
||
|
index = MultiIndex.from_tuples(tuples_index, names=['foo', 'bar'])
|
||
|
columns = MultiIndex.from_tuples(
|
||
|
tuples_columns, names=['fizz', 'buzz'])
|
||
|
df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns)
|
||
|
|
||
|
#
|
||
|
# without specifying level -> across all levels
|
||
|
|
||
|
renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
|
||
|
columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'})
|
||
|
new_index = MultiIndex.from_tuples([('foo3', 'bar1'),
|
||
|
('foo2', 'bar3')],
|
||
|
names=['foo', 'bar'])
|
||
|
new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'),
|
||
|
('fizz2', 'buzz3')],
|
||
|
names=['fizz', 'buzz'])
|
||
|
tm.assert_index_equal(renamed.index, new_index)
|
||
|
tm.assert_index_equal(renamed.columns, new_columns)
|
||
|
assert renamed.index.names == df.index.names
|
||
|
assert renamed.columns.names == df.columns.names
|
||
|
|
||
|
#
|
||
|
# with specifying a level (GH13766)
|
||
|
|
||
|
# dict
|
||
|
new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'),
|
||
|
('fizz2', 'buzz2')],
|
||
|
names=['fizz', 'buzz'])
|
||
|
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||
|
level=0)
|
||
|
tm.assert_index_equal(renamed.columns, new_columns)
|
||
|
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||
|
level='fizz')
|
||
|
tm.assert_index_equal(renamed.columns, new_columns)
|
||
|
|
||
|
new_columns = MultiIndex.from_tuples([('fizz1', 'buzz1'),
|
||
|
('fizz2', 'buzz3')],
|
||
|
names=['fizz', 'buzz'])
|
||
|
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||
|
level=1)
|
||
|
tm.assert_index_equal(renamed.columns, new_columns)
|
||
|
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
|
||
|
level='buzz')
|
||
|
tm.assert_index_equal(renamed.columns, new_columns)
|
||
|
|
||
|
# function
|
||
|
func = str.upper
|
||
|
new_columns = MultiIndex.from_tuples([('FIZZ1', 'buzz1'),
|
||
|
('FIZZ2', 'buzz2')],
|
||
|
names=['fizz', 'buzz'])
|
||
|
renamed = df.rename(columns=func, level=0)
|
||
|
tm.assert_index_equal(renamed.columns, new_columns)
|
||
|
renamed = df.rename(columns=func, level='fizz')
|
||
|
tm.assert_index_equal(renamed.columns, new_columns)
|
||
|
|
||
|
new_columns = MultiIndex.from_tuples([('fizz1', 'BUZZ1'),
|
||
|
('fizz2', 'BUZZ2')],
|
||
|
names=['fizz', 'buzz'])
|
||
|
renamed = df.rename(columns=func, level=1)
|
||
|
tm.assert_index_equal(renamed.columns, new_columns)
|
||
|
renamed = df.rename(columns=func, level='buzz')
|
||
|
tm.assert_index_equal(renamed.columns, new_columns)
|
||
|
|
||
|
# index
|
||
|
new_index = MultiIndex.from_tuples([('foo3', 'bar1'),
|
||
|
('foo2', 'bar2')],
|
||
|
names=['foo', 'bar'])
|
||
|
renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
|
||
|
level=0)
|
||
|
tm.assert_index_equal(renamed.index, new_index)
|
||
|
|
||
|
def test_rename_nocopy(self):
|
||
|
renamed = self.frame.rename(columns={'C': 'foo'}, copy=False)
|
||
|
renamed['foo'] = 1.
|
||
|
assert (self.frame['C'] == 1.).all()
|
||
|
|
||
|
def test_rename_inplace(self):
|
||
|
self.frame.rename(columns={'C': 'foo'})
|
||
|
assert 'C' in self.frame
|
||
|
assert 'foo' not in self.frame
|
||
|
|
||
|
c_id = id(self.frame['C'])
|
||
|
frame = self.frame.copy()
|
||
|
frame.rename(columns={'C': 'foo'}, inplace=True)
|
||
|
|
||
|
assert 'C' not in frame
|
||
|
assert 'foo' in frame
|
||
|
assert id(frame['foo']) != c_id
|
||
|
|
||
|
def test_rename_bug(self):
|
||
|
# GH 5344
|
||
|
# rename set ref_locs, and set_index was not resetting
|
||
|
df = DataFrame({0: ['foo', 'bar'], 1: ['bah', 'bas'], 2: [1, 2]})
|
||
|
df = df.rename(columns={0: 'a'})
|
||
|
df = df.rename(columns={1: 'b'})
|
||
|
df = df.set_index(['a', 'b'])
|
||
|
df.columns = ['2001-01-01']
|
||
|
expected = DataFrame([[1], [2]],
|
||
|
index=MultiIndex.from_tuples(
|
||
|
[('foo', 'bah'), ('bar', 'bas')],
|
||
|
names=['a', 'b']),
|
||
|
columns=['2001-01-01'])
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_rename_bug2(self):
|
||
|
# GH 19497
|
||
|
# rename was changing Index to MultiIndex if Index contained tuples
|
||
|
|
||
|
df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)],
|
||
|
columns=["a"])
|
||
|
df = df.rename({(1, 1): (5, 4)}, axis="index")
|
||
|
expected = DataFrame(data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)],
|
||
|
columns=["a"])
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
def test_reorder_levels(self):
|
||
|
index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
|
||
|
labels=[[0, 0, 0, 0, 0, 0],
|
||
|
[0, 1, 2, 0, 1, 2],
|
||
|
[0, 1, 0, 1, 0, 1]],
|
||
|
names=['L0', 'L1', 'L2'])
|
||
|
df = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, index=index)
|
||
|
|
||
|
# no change, position
|
||
|
result = df.reorder_levels([0, 1, 2])
|
||
|
assert_frame_equal(df, result)
|
||
|
|
||
|
# no change, labels
|
||
|
result = df.reorder_levels(['L0', 'L1', 'L2'])
|
||
|
assert_frame_equal(df, result)
|
||
|
|
||
|
# rotate, position
|
||
|
result = df.reorder_levels([1, 2, 0])
|
||
|
e_idx = MultiIndex(levels=[['one', 'two', 'three'], [0, 1], ['bar']],
|
||
|
labels=[[0, 1, 2, 0, 1, 2],
|
||
|
[0, 1, 0, 1, 0, 1],
|
||
|
[0, 0, 0, 0, 0, 0]],
|
||
|
names=['L1', 'L2', 'L0'])
|
||
|
expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)},
|
||
|
index=e_idx)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.reorder_levels([0, 0, 0])
|
||
|
e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']],
|
||
|
labels=[[0, 0, 0, 0, 0, 0],
|
||
|
[0, 0, 0, 0, 0, 0],
|
||
|
[0, 0, 0, 0, 0, 0]],
|
||
|
names=['L0', 'L0', 'L0'])
|
||
|
expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)},
|
||
|
index=e_idx)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.reorder_levels(['L0', 'L0', 'L0'])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_reset_index(self):
|
||
|
stacked = self.frame.stack()[::2]
|
||
|
stacked = DataFrame({'foo': stacked, 'bar': stacked})
|
||
|
|
||
|
names = ['first', 'second']
|
||
|
stacked.index.names = names
|
||
|
deleveled = stacked.reset_index()
|
||
|
for i, (lev, lab) in enumerate(zip(stacked.index.levels,
|
||
|
stacked.index.labels)):
|
||
|
values = lev.take(lab)
|
||
|
name = names[i]
|
||
|
tm.assert_index_equal(values, Index(deleveled[name]))
|
||
|
|
||
|
stacked.index.names = [None, None]
|
||
|
deleveled2 = stacked.reset_index()
|
||
|
tm.assert_series_equal(deleveled['first'], deleveled2['level_0'],
|
||
|
check_names=False)
|
||
|
tm.assert_series_equal(deleveled['second'], deleveled2['level_1'],
|
||
|
check_names=False)
|
||
|
|
||
|
# default name assigned
|
||
|
rdf = self.frame.reset_index()
|
||
|
exp = pd.Series(self.frame.index.values, name='index')
|
||
|
tm.assert_series_equal(rdf['index'], exp)
|
||
|
|
||
|
# default name assigned, corner case
|
||
|
df = self.frame.copy()
|
||
|
df['index'] = 'foo'
|
||
|
rdf = df.reset_index()
|
||
|
exp = pd.Series(self.frame.index.values, name='level_0')
|
||
|
tm.assert_series_equal(rdf['level_0'], exp)
|
||
|
|
||
|
# but this is ok
|
||
|
self.frame.index.name = 'index'
|
||
|
deleveled = self.frame.reset_index()
|
||
|
tm.assert_series_equal(deleveled['index'],
|
||
|
pd.Series(self.frame.index))
|
||
|
tm.assert_index_equal(deleveled.index,
|
||
|
pd.Index(np.arange(len(deleveled))))
|
||
|
|
||
|
# preserve column names
|
||
|
self.frame.columns.name = 'columns'
|
||
|
resetted = self.frame.reset_index()
|
||
|
assert resetted.columns.name == 'columns'
|
||
|
|
||
|
# only remove certain columns
|
||
|
frame = self.frame.reset_index().set_index(['index', 'A', 'B'])
|
||
|
rs = frame.reset_index(['A', 'B'])
|
||
|
|
||
|
# TODO should reset_index check_names ?
|
||
|
assert_frame_equal(rs, self.frame, check_names=False)
|
||
|
|
||
|
rs = frame.reset_index(['index', 'A', 'B'])
|
||
|
assert_frame_equal(rs, self.frame.reset_index(), check_names=False)
|
||
|
|
||
|
rs = frame.reset_index(['index', 'A', 'B'])
|
||
|
assert_frame_equal(rs, self.frame.reset_index(), check_names=False)
|
||
|
|
||
|
rs = frame.reset_index('A')
|
||
|
xp = self.frame.reset_index().set_index(['index', 'B'])
|
||
|
assert_frame_equal(rs, xp, check_names=False)
|
||
|
|
||
|
# test resetting in place
|
||
|
df = self.frame.copy()
|
||
|
resetted = self.frame.reset_index()
|
||
|
df.reset_index(inplace=True)
|
||
|
assert_frame_equal(df, resetted, check_names=False)
|
||
|
|
||
|
frame = self.frame.reset_index().set_index(['index', 'A', 'B'])
|
||
|
rs = frame.reset_index('A', drop=True)
|
||
|
xp = self.frame.copy()
|
||
|
del xp['A']
|
||
|
xp = xp.set_index(['B'], append=True)
|
||
|
assert_frame_equal(rs, xp, check_names=False)
|
||
|
|
||
|
def test_reset_index_level(self):
|
||
|
df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
|
||
|
columns=['A', 'B', 'C', 'D'])
|
||
|
|
||
|
for levels in ['A', 'B'], [0, 1]:
|
||
|
# With MultiIndex
|
||
|
result = df.set_index(['A', 'B']).reset_index(level=levels[0])
|
||
|
tm.assert_frame_equal(result, df.set_index('B'))
|
||
|
|
||
|
result = df.set_index(['A', 'B']).reset_index(level=levels[:1])
|
||
|
tm.assert_frame_equal(result, df.set_index('B'))
|
||
|
|
||
|
result = df.set_index(['A', 'B']).reset_index(level=levels)
|
||
|
tm.assert_frame_equal(result, df)
|
||
|
|
||
|
result = df.set_index(['A', 'B']).reset_index(level=levels,
|
||
|
drop=True)
|
||
|
tm.assert_frame_equal(result, df[['C', 'D']])
|
||
|
|
||
|
# With single-level Index (GH 16263)
|
||
|
result = df.set_index('A').reset_index(level=levels[0])
|
||
|
tm.assert_frame_equal(result, df)
|
||
|
|
||
|
result = df.set_index('A').reset_index(level=levels[:1])
|
||
|
tm.assert_frame_equal(result, df)
|
||
|
|
||
|
result = df.set_index(['A']).reset_index(level=levels[0],
|
||
|
drop=True)
|
||
|
tm.assert_frame_equal(result, df[['B', 'C', 'D']])
|
||
|
|
||
|
# Missing levels - for both MultiIndex and single-level Index:
|
||
|
for idx_lev in ['A', 'B'], ['A']:
|
||
|
with tm.assert_raises_regex(KeyError, 'Level E '):
|
||
|
df.set_index(idx_lev).reset_index(level=['A', 'E'])
|
||
|
with tm.assert_raises_regex(IndexError, 'Too many levels'):
|
||
|
df.set_index(idx_lev).reset_index(level=[0, 1, 2])
|
||
|
|
||
|
def test_reset_index_right_dtype(self):
|
||
|
time = np.arange(0.0, 10, np.sqrt(2) / 2)
|
||
|
s1 = Series((9.81 * time ** 2) / 2,
|
||
|
index=Index(time, name='time'),
|
||
|
name='speed')
|
||
|
df = DataFrame(s1)
|
||
|
|
||
|
resetted = s1.reset_index()
|
||
|
assert resetted['time'].dtype == np.float64
|
||
|
|
||
|
resetted = df.reset_index()
|
||
|
assert resetted['time'].dtype == np.float64
|
||
|
|
||
|
def test_reset_index_multiindex_col(self):
|
||
|
vals = np.random.randn(3, 3).astype(object)
|
||
|
idx = ['x', 'y', 'z']
|
||
|
full = np.hstack(([[x] for x in idx], vals))
|
||
|
df = DataFrame(vals, Index(idx, name='a'),
|
||
|
columns=[['b', 'b', 'c'], ['mean', 'median', 'mean']])
|
||
|
rs = df.reset_index()
|
||
|
xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'],
|
||
|
['', 'mean', 'median', 'mean']])
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
rs = df.reset_index(col_fill=None)
|
||
|
xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'],
|
||
|
['a', 'mean', 'median', 'mean']])
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
rs = df.reset_index(col_level=1, col_fill='blah')
|
||
|
xp = DataFrame(full, columns=[['blah', 'b', 'b', 'c'],
|
||
|
['a', 'mean', 'median', 'mean']])
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
df = DataFrame(vals,
|
||
|
MultiIndex.from_arrays([[0, 1, 2], ['x', 'y', 'z']],
|
||
|
names=['d', 'a']),
|
||
|
columns=[['b', 'b', 'c'], ['mean', 'median', 'mean']])
|
||
|
rs = df.reset_index('a', )
|
||
|
xp = DataFrame(full, Index([0, 1, 2], name='d'),
|
||
|
columns=[['a', 'b', 'b', 'c'],
|
||
|
['', 'mean', 'median', 'mean']])
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
rs = df.reset_index('a', col_fill=None)
|
||
|
xp = DataFrame(full, Index(lrange(3), name='d'),
|
||
|
columns=[['a', 'b', 'b', 'c'],
|
||
|
['a', 'mean', 'median', 'mean']])
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
rs = df.reset_index('a', col_fill='blah', col_level=1)
|
||
|
xp = DataFrame(full, Index(lrange(3), name='d'),
|
||
|
columns=[['blah', 'b', 'b', 'c'],
|
||
|
['a', 'mean', 'median', 'mean']])
|
||
|
assert_frame_equal(rs, xp)
|
||
|
|
||
|
def test_reset_index_multiindex_nan(self):
|
||
|
# GH6322, testing reset_index on MultiIndexes
|
||
|
# when we have a nan or all nan
|
||
|
df = pd.DataFrame({'A': ['a', 'b', 'c'],
|
||
|
'B': [0, 1, np.nan],
|
||
|
'C': np.random.rand(3)})
|
||
|
rs = df.set_index(['A', 'B']).reset_index()
|
||
|
assert_frame_equal(rs, df)
|
||
|
|
||
|
df = pd.DataFrame({'A': [np.nan, 'b', 'c'],
|
||
|
'B': [0, 1, 2],
|
||
|
'C': np.random.rand(3)})
|
||
|
rs = df.set_index(['A', 'B']).reset_index()
|
||
|
assert_frame_equal(rs, df)
|
||
|
|
||
|
df = pd.DataFrame({'A': ['a', 'b', 'c'],
|
||
|
'B': [0, 1, 2],
|
||
|
'C': [np.nan, 1.1, 2.2]})
|
||
|
rs = df.set_index(['A', 'B']).reset_index()
|
||
|
assert_frame_equal(rs, df)
|
||
|
|
||
|
df = pd.DataFrame({'A': ['a', 'b', 'c'],
|
||
|
'B': [np.nan, np.nan, np.nan],
|
||
|
'C': np.random.rand(3)})
|
||
|
rs = df.set_index(['A', 'B']).reset_index()
|
||
|
assert_frame_equal(rs, df)
|
||
|
|
||
|
def test_reset_index_with_datetimeindex_cols(self):
|
||
|
# GH5818
|
||
|
#
|
||
|
df = pd.DataFrame([[1, 2], [3, 4]],
|
||
|
columns=pd.date_range('1/1/2013', '1/2/2013'),
|
||
|
index=['A', 'B'])
|
||
|
|
||
|
result = df.reset_index()
|
||
|
expected = pd.DataFrame([['A', 1, 2], ['B', 3, 4]],
|
||
|
columns=['index', datetime(2013, 1, 1),
|
||
|
datetime(2013, 1, 2)])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_reset_index_range(self):
|
||
|
# GH 12071
|
||
|
df = pd.DataFrame([[0, 0], [1, 1]], columns=['A', 'B'],
|
||
|
index=RangeIndex(stop=2))
|
||
|
result = df.reset_index()
|
||
|
assert isinstance(result.index, RangeIndex)
|
||
|
expected = pd.DataFrame([[0, 0, 0], [1, 1, 1]],
|
||
|
columns=['index', 'A', 'B'],
|
||
|
index=RangeIndex(stop=2))
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_set_index_names(self):
|
||
|
df = pd.util.testing.makeDataFrame()
|
||
|
df.index.name = 'name'
|
||
|
|
||
|
assert df.set_index(df.index).index.names == ['name']
|
||
|
|
||
|
mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B'])
|
||
|
mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values,
|
||
|
names=['A', 'B', 'C', 'D'])
|
||
|
|
||
|
df = df.set_index(['A', 'B'])
|
||
|
|
||
|
assert df.set_index(df.index).index.names == ['A', 'B']
|
||
|
|
||
|
# Check that set_index isn't converting a MultiIndex into an Index
|
||
|
assert isinstance(df.set_index(df.index).index, MultiIndex)
|
||
|
|
||
|
# Check actual equality
|
||
|
tm.assert_index_equal(df.set_index(df.index).index, mi)
|
||
|
|
||
|
idx2 = df.index.rename(['C', 'D'])
|
||
|
|
||
|
# Check that [MultiIndex, MultiIndex] yields a MultiIndex rather
|
||
|
# than a pair of tuples
|
||
|
assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex)
|
||
|
|
||
|
# Check equality
|
||
|
tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2)
|
||
|
|
||
|
def test_rename_objects(self):
|
||
|
renamed = self.mixed_frame.rename(columns=str.upper)
|
||
|
|
||
|
assert 'FOO' in renamed
|
||
|
assert 'foo' not in renamed
|
||
|
|
||
|
def test_rename_axis_style(self):
|
||
|
# https://github.com/pandas-dev/pandas/issues/12392
|
||
|
df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['X', 'Y'])
|
||
|
expected = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y'])
|
||
|
|
||
|
result = df.rename(str.lower, axis=1)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.rename(str.lower, axis='columns')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.rename({"A": 'a', 'B': 'b'}, axis=1)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.rename({"A": 'a', 'B': 'b'}, axis='columns')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# Index
|
||
|
expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y'])
|
||
|
result = df.rename(str.lower, axis=0)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.rename(str.lower, axis='index')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.rename({'X': 'x', 'Y': 'y'}, axis=0)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.rename({'X': 'x', 'Y': 'y'}, axis='index')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.rename(mapper=str.lower, axis='index')
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_rename_mapper_multi(self):
|
||
|
df = pd.DataFrame({"A": ['a', 'b'], "B": ['c', 'd'],
|
||
|
'C': [1, 2]}).set_index(["A", "B"])
|
||
|
result = df.rename(str.upper)
|
||
|
expected = df.rename(index=str.upper)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_rename_positional_named(self):
|
||
|
# https://github.com/pandas-dev/pandas/issues/12392
|
||
|
df = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y'])
|
||
|
result = df.rename(str.lower, columns=str.upper)
|
||
|
expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y'])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_rename_axis_style_raises(self):
|
||
|
# https://github.com/pandas-dev/pandas/issues/12392
|
||
|
df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['0', '1'])
|
||
|
|
||
|
# Named target and axis
|
||
|
with tm.assert_raises_regex(TypeError, None):
|
||
|
df.rename(index=str.lower, axis=1)
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, None):
|
||
|
df.rename(index=str.lower, axis='columns')
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, None):
|
||
|
df.rename(index=str.lower, axis='columns')
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, None):
|
||
|
df.rename(columns=str.lower, axis='columns')
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, None):
|
||
|
df.rename(index=str.lower, axis=0)
|
||
|
|
||
|
# Multiple targets and axis
|
||
|
with tm.assert_raises_regex(TypeError, None):
|
||
|
df.rename(str.lower, str.lower, axis='columns')
|
||
|
|
||
|
# Too many targets
|
||
|
with tm.assert_raises_regex(TypeError, None):
|
||
|
df.rename(str.lower, str.lower, str.lower)
|
||
|
|
||
|
# Duplicates
|
||
|
with tm.assert_raises_regex(TypeError, "multiple values"):
|
||
|
df.rename(id, mapper=id)
|
||
|
|
||
|
def test_reindex_api_equivalence(self):
|
||
|
# equivalence of the labels/axis and index/columns API's
|
||
|
df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]],
|
||
|
index=['a', 'b', 'c'],
|
||
|
columns=['d', 'e', 'f'])
|
||
|
|
||
|
res1 = df.reindex(['b', 'a'])
|
||
|
res2 = df.reindex(index=['b', 'a'])
|
||
|
res3 = df.reindex(labels=['b', 'a'])
|
||
|
res4 = df.reindex(labels=['b', 'a'], axis=0)
|
||
|
res5 = df.reindex(['b', 'a'], axis=0)
|
||
|
for res in [res2, res3, res4, res5]:
|
||
|
tm.assert_frame_equal(res1, res)
|
||
|
|
||
|
res1 = df.reindex(columns=['e', 'd'])
|
||
|
res2 = df.reindex(['e', 'd'], axis=1)
|
||
|
res3 = df.reindex(labels=['e', 'd'], axis=1)
|
||
|
for res in [res2, res3]:
|
||
|
tm.assert_frame_equal(res1, res)
|
||
|
|
||
|
res1 = df.reindex(index=['b', 'a'], columns=['e', 'd'])
|
||
|
res2 = df.reindex(columns=['e', 'd'], index=['b', 'a'])
|
||
|
res3 = df.reindex(labels=['b', 'a'], axis=0).reindex(labels=['e', 'd'],
|
||
|
axis=1)
|
||
|
for res in [res2, res3]:
|
||
|
tm.assert_frame_equal(res1, res)
|
||
|
|
||
|
def test_rename_positional(self):
|
||
|
df = pd.DataFrame(columns=['A', 'B'])
|
||
|
with tm.assert_produces_warning(FutureWarning) as rec:
|
||
|
result = df.rename(None, str.lower)
|
||
|
expected = pd.DataFrame(columns=['a', 'b'])
|
||
|
assert_frame_equal(result, expected)
|
||
|
assert len(rec) == 1
|
||
|
message = str(rec[0].message)
|
||
|
assert 'rename' in message
|
||
|
assert 'Use named arguments' in message
|
||
|
|
||
|
def test_assign_columns(self):
|
||
|
self.frame['hi'] = 'there'
|
||
|
|
||
|
frame = self.frame.copy()
|
||
|
frame.columns = ['foo', 'bar', 'baz', 'quux', 'foo2']
|
||
|
assert_series_equal(self.frame['C'], frame['baz'], check_names=False)
|
||
|
assert_series_equal(self.frame['hi'], frame['foo2'], check_names=False)
|
||
|
|
||
|
def test_set_index_preserve_categorical_dtype(self):
|
||
|
# GH13743, GH13854
|
||
|
df = DataFrame({'A': [1, 2, 1, 1, 2],
|
||
|
'B': [10, 16, 22, 28, 34],
|
||
|
'C1': pd.Categorical(list("abaab"),
|
||
|
categories=list("bac"),
|
||
|
ordered=False),
|
||
|
'C2': pd.Categorical(list("abaab"),
|
||
|
categories=list("bac"),
|
||
|
ordered=True)})
|
||
|
for cols in ['C1', 'C2', ['A', 'C1'], ['A', 'C2'], ['C1', 'C2']]:
|
||
|
result = df.set_index(cols).reset_index()
|
||
|
result = result.reindex(columns=df.columns)
|
||
|
tm.assert_frame_equal(result, df)
|
||
|
|
||
|
def test_ambiguous_warns(self):
|
||
|
df = pd.DataFrame({"A": [1, 2]})
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
df.rename(id, id)
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
df.rename({0: 10}, {"A": "B"})
|
||
|
|
||
|
@pytest.mark.skipif(PY2, reason="inspect.signature")
|
||
|
def test_rename_signature(self):
|
||
|
sig = inspect.signature(pd.DataFrame.rename)
|
||
|
parameters = set(sig.parameters)
|
||
|
assert parameters == {"self", "mapper", "index", "columns", "axis",
|
||
|
"inplace", "copy", "level"}
|
||
|
|
||
|
@pytest.mark.skipif(PY2, reason="inspect.signature")
|
||
|
def test_reindex_signature(self):
|
||
|
sig = inspect.signature(pd.DataFrame.reindex)
|
||
|
parameters = set(sig.parameters)
|
||
|
assert parameters == {"self", "labels", "index", "columns", "axis",
|
||
|
"limit", "copy", "level", "method",
|
||
|
"fill_value", "tolerance"}
|
||
|
|
||
|
|
||
|
class TestIntervalIndex(object):
|
||
|
|
||
|
def test_setitem(self):
|
||
|
|
||
|
df = DataFrame({'A': range(10)})
|
||
|
s = pd.cut(df.A, 5)
|
||
|
assert isinstance(s.cat.categories, IntervalIndex)
|
||
|
|
||
|
# B & D end up as Categoricals
|
||
|
# the remainer are converted to in-line objects
|
||
|
# contining an IntervalIndex.values
|
||
|
df['B'] = s
|
||
|
df['C'] = np.array(s)
|
||
|
df['D'] = s.values
|
||
|
df['E'] = np.array(s.values)
|
||
|
|
||
|
assert is_categorical_dtype(df['B'])
|
||
|
assert is_interval_dtype(df['B'].cat.categories)
|
||
|
assert is_categorical_dtype(df['D'])
|
||
|
assert is_interval_dtype(df['D'].cat.categories)
|
||
|
|
||
|
assert is_object_dtype(df['C'])
|
||
|
assert is_object_dtype(df['E'])
|
||
|
|
||
|
# they compare equal as Index
|
||
|
# when converted to numpy objects
|
||
|
c = lambda x: Index(np.array(x))
|
||
|
tm.assert_index_equal(c(df.B), c(df.B), check_names=False)
|
||
|
tm.assert_index_equal(c(df.B), c(df.C), check_names=False)
|
||
|
tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
|
||
|
tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
|
||
|
|
||
|
# B & D are the same Series
|
||
|
tm.assert_series_equal(df['B'], df['B'], check_names=False)
|
||
|
tm.assert_series_equal(df['B'], df['D'], check_names=False)
|
||
|
|
||
|
# C & E are the same Series
|
||
|
tm.assert_series_equal(df['C'], df['C'], check_names=False)
|
||
|
tm.assert_series_equal(df['C'], df['E'], check_names=False)
|
||
|
|
||
|
def test_set_reset_index(self):
|
||
|
|
||
|
df = DataFrame({'A': range(10)})
|
||
|
s = pd.cut(df.A, 5)
|
||
|
df['B'] = s
|
||
|
df = df.set_index('B')
|
||
|
|
||
|
df = df.reset_index()
|
||
|
|
||
|
def test_set_axis_inplace(self):
|
||
|
# GH14636
|
||
|
df = DataFrame({'A': [1.1, 2.2, 3.3],
|
||
|
'B': [5.0, 6.1, 7.2],
|
||
|
'C': [4.4, 5.5, 6.6]},
|
||
|
index=[2010, 2011, 2012])
|
||
|
|
||
|
expected = {0: df.copy(),
|
||
|
1: df.copy()}
|
||
|
expected[0].index = list('abc')
|
||
|
expected[1].columns = list('abc')
|
||
|
expected['index'] = expected[0]
|
||
|
expected['columns'] = expected[1]
|
||
|
|
||
|
for axis in expected:
|
||
|
# inplace=True
|
||
|
# The FutureWarning comes from the fact that we would like to have
|
||
|
# inplace default to False some day
|
||
|
for inplace, warn in (None, FutureWarning), (True, None):
|
||
|
kwargs = {'inplace': inplace}
|
||
|
|
||
|
result = df.copy()
|
||
|
with tm.assert_produces_warning(warn):
|
||
|
result.set_axis(list('abc'), axis=axis, **kwargs)
|
||
|
tm.assert_frame_equal(result, expected[axis])
|
||
|
|
||
|
# inplace=False
|
||
|
result = df.set_axis(list('abc'), axis=axis, inplace=False)
|
||
|
tm.assert_frame_equal(expected[axis], result)
|
||
|
|
||
|
# omitting the "axis" parameter
|
||
|
with tm.assert_produces_warning(None):
|
||
|
result = df.set_axis(list('abc'), inplace=False)
|
||
|
tm.assert_frame_equal(result, expected[0])
|
||
|
|
||
|
# wrong values for the "axis" parameter
|
||
|
for axis in 3, 'foo':
|
||
|
with tm.assert_raises_regex(ValueError, 'No axis named'):
|
||
|
df.set_axis(list('abc'), axis=axis, inplace=False)
|
||
|
|
||
|
def test_set_axis_prior_to_deprecation_signature(self):
|
||
|
df = DataFrame({'A': [1.1, 2.2, 3.3],
|
||
|
'B': [5.0, 6.1, 7.2],
|
||
|
'C': [4.4, 5.5, 6.6]},
|
||
|
index=[2010, 2011, 2012])
|
||
|
|
||
|
expected = {0: df.copy(),
|
||
|
1: df.copy()}
|
||
|
expected[0].index = list('abc')
|
||
|
expected[1].columns = list('abc')
|
||
|
expected['index'] = expected[0]
|
||
|
expected['columns'] = expected[1]
|
||
|
|
||
|
# old signature
|
||
|
for axis in expected:
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
result = df.set_axis(axis, list('abc'), inplace=False)
|
||
|
tm.assert_frame_equal(result, expected[axis])
|