laywerrobot/lib/python3.6/site-packages/pandas/tests/frame/test_alter_axes.py
2020-08-27 21:55:39 +02:00

1166 lines
44 KiB
Python

# -*- coding: utf-8 -*-
from __future__ import print_function
import inspect
import pytest
from datetime import datetime, timedelta
import numpy as np
from pandas.compat import lrange, PY2
from pandas import (DataFrame, Series, Index, MultiIndex,
RangeIndex, date_range, IntervalIndex,
to_datetime)
from pandas.core.dtypes.common import (
is_object_dtype,
is_categorical_dtype,
is_interval_dtype)
import pandas as pd
from pandas.util.testing import assert_series_equal, assert_frame_equal
import pandas.util.testing as tm
from pandas.tests.frame.common import TestData
class TestDataFrameAlterAxes(TestData):
def test_set_index(self):
idx = Index(np.arange(len(self.mixed_frame)))
# cache it
_ = self.mixed_frame['foo'] # noqa
self.mixed_frame.index = idx
assert self.mixed_frame['foo'].index is idx
with tm.assert_raises_regex(ValueError, 'Length mismatch'):
self.mixed_frame.index = idx[::2]
def test_set_index_cast(self):
# issue casting an index then set_index
df = DataFrame({'A': [1.1, 2.2, 3.3], 'B': [5.0, 6.1, 7.2]},
index=[2010, 2011, 2012])
expected = df.loc[2010]
new_index = df.index.astype(np.int32)
df.index = new_index
result = df.loc[2010]
assert_series_equal(result, expected)
def test_set_index2(self):
df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
'B': ['one', 'two', 'three', 'one', 'two'],
'C': ['a', 'b', 'c', 'd', 'e'],
'D': np.random.randn(5),
'E': np.random.randn(5)})
# new object, single-column
result = df.set_index('C')
result_nodrop = df.set_index('C', drop=False)
index = Index(df['C'], name='C')
expected = df.loc[:, ['A', 'B', 'D', 'E']]
expected.index = index
expected_nodrop = df.copy()
expected_nodrop.index = index
assert_frame_equal(result, expected)
assert_frame_equal(result_nodrop, expected_nodrop)
assert result.index.name == index.name
# inplace, single
df2 = df.copy()
df2.set_index('C', inplace=True)
assert_frame_equal(df2, expected)
df3 = df.copy()
df3.set_index('C', drop=False, inplace=True)
assert_frame_equal(df3, expected_nodrop)
# create new object, multi-column
result = df.set_index(['A', 'B'])
result_nodrop = df.set_index(['A', 'B'], drop=False)
index = MultiIndex.from_arrays([df['A'], df['B']], names=['A', 'B'])
expected = df.loc[:, ['C', 'D', 'E']]
expected.index = index
expected_nodrop = df.copy()
expected_nodrop.index = index
assert_frame_equal(result, expected)
assert_frame_equal(result_nodrop, expected_nodrop)
assert result.index.names == index.names
# inplace
df2 = df.copy()
df2.set_index(['A', 'B'], inplace=True)
assert_frame_equal(df2, expected)
df3 = df.copy()
df3.set_index(['A', 'B'], drop=False, inplace=True)
assert_frame_equal(df3, expected_nodrop)
# corner case
with tm.assert_raises_regex(ValueError,
'Index has duplicate keys'):
df.set_index('A', verify_integrity=True)
# append
result = df.set_index(['A', 'B'], append=True)
xp = df.reset_index().set_index(['index', 'A', 'B'])
xp.index.names = [None, 'A', 'B']
assert_frame_equal(result, xp)
# append to existing multiindex
rdf = df.set_index(['A'], append=True)
rdf = rdf.set_index(['B', 'C'], append=True)
expected = df.set_index(['A', 'B', 'C'], append=True)
assert_frame_equal(rdf, expected)
# Series
result = df.set_index(df.C)
assert result.index.name == 'C'
@pytest.mark.parametrize(
'level', ['a', pd.Series(range(0, 8, 2), name='a')])
def test_set_index_duplicate_names(self, level):
# GH18872 - GH19029
df = pd.DataFrame(np.arange(8).reshape(4, 2), columns=['a', 'b'])
# Pass an existing level name:
df.index.name = 'a'
expected = pd.MultiIndex.from_tuples([(0, 0), (1, 2), (2, 4), (3, 6)],
names=['a', 'a'])
result = df.set_index(level, append=True)
tm.assert_index_equal(result.index, expected)
result = df.set_index([level], append=True)
tm.assert_index_equal(result.index, expected)
# Pass twice the same level name (only works with passing actual data)
if isinstance(level, pd.Series):
result = df.set_index([level, level])
expected = pd.MultiIndex.from_tuples(
[(0, 0), (2, 2), (4, 4), (6, 6)], names=['a', 'a'])
tm.assert_index_equal(result.index, expected)
def test_set_index_nonuniq(self):
df = DataFrame({'A': ['foo', 'foo', 'foo', 'bar', 'bar'],
'B': ['one', 'two', 'three', 'one', 'two'],
'C': ['a', 'b', 'c', 'd', 'e'],
'D': np.random.randn(5),
'E': np.random.randn(5)})
with tm.assert_raises_regex(ValueError,
'Index has duplicate keys'):
df.set_index('A', verify_integrity=True, inplace=True)
assert 'A' in df
def test_set_index_bug(self):
# GH1590
df = DataFrame({'val': [0, 1, 2], 'key': ['a', 'b', 'c']})
xp = DataFrame({'val': [1, 2]},
Index(['b', 'c'], name='key'))
df2 = df.loc[df.index.map(lambda indx: indx >= 1)]
rs = df2.set_index('key')
assert_frame_equal(rs, xp)
def test_set_index_pass_arrays(self):
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three',
'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)})
# multiple columns
result = df.set_index(['A', df['B'].values], drop=False)
expected = df.set_index(['A', 'B'], drop=False)
# TODO should set_index check_names ?
assert_frame_equal(result, expected, check_names=False)
def test_construction_with_categorical_index(self):
ci = tm.makeCategoricalIndex(10)
# with Categorical
df = DataFrame({'A': np.random.randn(10),
'B': ci.values})
idf = df.set_index('B')
str(idf)
tm.assert_index_equal(idf.index, ci, check_names=False)
assert idf.index.name == 'B'
# from a CategoricalIndex
df = DataFrame({'A': np.random.randn(10),
'B': ci})
idf = df.set_index('B')
str(idf)
tm.assert_index_equal(idf.index, ci, check_names=False)
assert idf.index.name == 'B'
idf = df.set_index('B').reset_index().set_index('B')
str(idf)
tm.assert_index_equal(idf.index, ci, check_names=False)
assert idf.index.name == 'B'
new_df = idf.reset_index()
new_df.index = df.B
tm.assert_index_equal(new_df.index, ci, check_names=False)
assert idf.index.name == 'B'
def test_set_index_cast_datetimeindex(self):
df = DataFrame({'A': [datetime(2000, 1, 1) + timedelta(i)
for i in range(1000)],
'B': np.random.randn(1000)})
idf = df.set_index('A')
assert isinstance(idf.index, pd.DatetimeIndex)
# don't cast a DatetimeIndex WITH a tz, leave as object
# GH 6032
i = (pd.DatetimeIndex(
to_datetime(['2013-1-1 13:00',
'2013-1-2 14:00'], errors="raise"))
.tz_localize('US/Pacific'))
df = DataFrame(np.random.randn(2, 1), columns=['A'])
expected = Series(np.array([pd.Timestamp('2013-01-01 13:00:00-0800',
tz='US/Pacific'),
pd.Timestamp('2013-01-02 14:00:00-0800',
tz='US/Pacific')],
dtype="object"))
# convert index to series
result = Series(i)
assert_series_equal(result, expected)
# assignt to frame
df['B'] = i
result = df['B']
assert_series_equal(result, expected, check_names=False)
assert result.name == 'B'
# keep the timezone
result = i.to_series(keep_tz=True)
assert_series_equal(result.reset_index(drop=True), expected)
# convert to utc
df['C'] = i.to_series().reset_index(drop=True)
result = df['C']
comp = pd.DatetimeIndex(expected.values)
comp = comp.tz_localize(None)
tm.assert_numpy_array_equal(result.values, comp.values)
# list of datetimes with a tz
df['D'] = i.to_pydatetime()
result = df['D']
assert_series_equal(result, expected, check_names=False)
assert result.name == 'D'
# GH 6785
# set the index manually
import pytz
df = DataFrame(
[{'ts': datetime(2014, 4, 1, tzinfo=pytz.utc), 'foo': 1}])
expected = df.set_index('ts')
df.index = df['ts']
df.pop('ts')
assert_frame_equal(df, expected)
def test_reset_index_tz(self, tz_aware_fixture):
# GH 3950
# reset_index with single level
tz = tz_aware_fixture
idx = pd.date_range('1/1/2011', periods=5,
freq='D', tz=tz, name='idx')
df = pd.DataFrame(
{'a': range(5), 'b': ['A', 'B', 'C', 'D', 'E']}, index=idx)
expected = pd.DataFrame({'idx': [datetime(2011, 1, 1),
datetime(2011, 1, 2),
datetime(2011, 1, 3),
datetime(2011, 1, 4),
datetime(2011, 1, 5)],
'a': range(5),
'b': ['A', 'B', 'C', 'D', 'E']},
columns=['idx', 'a', 'b'])
expected['idx'] = expected['idx'].apply(
lambda d: pd.Timestamp(d, tz=tz))
assert_frame_equal(df.reset_index(), expected)
def test_set_index_timezone(self):
# GH 12358
# tz-aware Series should retain the tz
i = pd.to_datetime(["2014-01-01 10:10:10"],
utc=True).tz_convert('Europe/Rome')
df = DataFrame({'i': i})
assert df.set_index(i).index[0].hour == 11
assert pd.DatetimeIndex(pd.Series(df.i))[0].hour == 11
assert df.set_index(df.i).index[0].hour == 11
def test_set_index_dst(self):
di = pd.date_range('2006-10-29 00:00:00', periods=3,
freq='H', tz='US/Pacific')
df = pd.DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]},
index=di).reset_index()
# single level
res = df.set_index('index')
exp = pd.DataFrame(data={'a': [0, 1, 2], 'b': [3, 4, 5]},
index=pd.Index(di, name='index'))
tm.assert_frame_equal(res, exp)
# GH 12920
res = df.set_index(['index', 'a'])
exp_index = pd.MultiIndex.from_arrays([di, [0, 1, 2]],
names=['index', 'a'])
exp = pd.DataFrame({'b': [3, 4, 5]}, index=exp_index)
tm.assert_frame_equal(res, exp)
def test_reset_index_with_intervals(self):
idx = pd.IntervalIndex.from_breaks(np.arange(11), name='x')
original = pd.DataFrame({'x': idx, 'y': np.arange(10)})[['x', 'y']]
result = original.set_index('x')
expected = pd.DataFrame({'y': np.arange(10)}, index=idx)
assert_frame_equal(result, expected)
result2 = result.reset_index()
assert_frame_equal(result2, original)
def test_set_index_multiindexcolumns(self):
columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)])
df = DataFrame(np.random.randn(3, 3), columns=columns)
rs = df.set_index(df.columns[0])
xp = df.iloc[:, 1:]
xp.index = df.iloc[:, 0].values
xp.index.names = [df.columns[0]]
assert_frame_equal(rs, xp)
def test_set_index_empty_column(self):
# #1971
df = DataFrame([
dict(a=1, p=0),
dict(a=2, m=10),
dict(a=3, m=11, p=20),
dict(a=4, m=12, p=21)
], columns=('a', 'm', 'p', 'x'))
# it works!
result = df.set_index(['a', 'x'])
repr(result)
def test_set_columns(self):
cols = Index(np.arange(len(self.mixed_frame.columns)))
self.mixed_frame.columns = cols
with tm.assert_raises_regex(ValueError, 'Length mismatch'):
self.mixed_frame.columns = cols[::2]
def test_dti_set_index_reindex(self):
# GH 6631
df = DataFrame(np.random.random(6))
idx1 = date_range('2011/01/01', periods=6, freq='M', tz='US/Eastern')
idx2 = date_range('2013', periods=6, freq='A', tz='Asia/Tokyo')
df = df.set_index(idx1)
tm.assert_index_equal(df.index, idx1)
df = df.reindex(idx2)
tm.assert_index_equal(df.index, idx2)
# 11314
# with tz
index = date_range(datetime(2015, 10, 1),
datetime(2015, 10, 1, 23),
freq='H', tz='US/Eastern')
df = DataFrame(np.random.randn(24, 1), columns=['a'], index=index)
new_index = date_range(datetime(2015, 10, 2),
datetime(2015, 10, 2, 23),
freq='H', tz='US/Eastern')
# TODO: unused?
result = df.set_index(new_index) # noqa
assert new_index.freq == index.freq
# Renaming
def test_rename(self):
mapping = {
'A': 'a',
'B': 'b',
'C': 'c',
'D': 'd'
}
renamed = self.frame.rename(columns=mapping)
renamed2 = self.frame.rename(columns=str.lower)
assert_frame_equal(renamed, renamed2)
assert_frame_equal(renamed2.rename(columns=str.upper),
self.frame, check_names=False)
# index
data = {
'A': {'foo': 0, 'bar': 1}
}
# gets sorted alphabetical
df = DataFrame(data)
renamed = df.rename(index={'foo': 'bar', 'bar': 'foo'})
tm.assert_index_equal(renamed.index, pd.Index(['foo', 'bar']))
renamed = df.rename(index=str.upper)
tm.assert_index_equal(renamed.index, pd.Index(['BAR', 'FOO']))
# have to pass something
pytest.raises(TypeError, self.frame.rename)
# partial columns
renamed = self.frame.rename(columns={'C': 'foo', 'D': 'bar'})
tm.assert_index_equal(renamed.columns,
pd.Index(['A', 'B', 'foo', 'bar']))
# other axis
renamed = self.frame.T.rename(index={'C': 'foo', 'D': 'bar'})
tm.assert_index_equal(renamed.index,
pd.Index(['A', 'B', 'foo', 'bar']))
# index with name
index = Index(['foo', 'bar'], name='name')
renamer = DataFrame(data, index=index)
renamed = renamer.rename(index={'foo': 'bar', 'bar': 'foo'})
tm.assert_index_equal(renamed.index,
pd.Index(['bar', 'foo'], name='name'))
assert renamed.index.name == renamer.index.name
def test_rename_axis_inplace(self):
# GH 15704
frame = self.frame.copy()
expected = frame.rename_axis('foo')
result = frame.copy()
no_return = result.rename_axis('foo', inplace=True)
assert no_return is None
assert_frame_equal(result, expected)
expected = frame.rename_axis('bar', axis=1)
result = frame.copy()
no_return = result.rename_axis('bar', axis=1, inplace=True)
assert no_return is None
assert_frame_equal(result, expected)
def test_rename_axis_warns(self):
# https://github.com/pandas-dev/pandas/issues/17833
df = pd.DataFrame({"A": [1, 2], "B": [1, 2]})
with tm.assert_produces_warning(FutureWarning) as w:
df.rename_axis(id, axis=0)
assert 'rename' in str(w[0].message)
with tm.assert_produces_warning(FutureWarning) as w:
df.rename_axis({0: 10, 1: 20}, axis=0)
assert 'rename' in str(w[0].message)
with tm.assert_produces_warning(FutureWarning) as w:
df.rename_axis(id, axis=1)
assert 'rename' in str(w[0].message)
with tm.assert_produces_warning(FutureWarning) as w:
df['A'].rename_axis(id)
assert 'rename' in str(w[0].message)
def test_rename_multiindex(self):
tuples_index = [('foo1', 'bar1'), ('foo2', 'bar2')]
tuples_columns = [('fizz1', 'buzz1'), ('fizz2', 'buzz2')]
index = MultiIndex.from_tuples(tuples_index, names=['foo', 'bar'])
columns = MultiIndex.from_tuples(
tuples_columns, names=['fizz', 'buzz'])
df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns)
#
# without specifying level -> across all levels
renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'})
new_index = MultiIndex.from_tuples([('foo3', 'bar1'),
('foo2', 'bar3')],
names=['foo', 'bar'])
new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'),
('fizz2', 'buzz3')],
names=['fizz', 'buzz'])
tm.assert_index_equal(renamed.index, new_index)
tm.assert_index_equal(renamed.columns, new_columns)
assert renamed.index.names == df.index.names
assert renamed.columns.names == df.columns.names
#
# with specifying a level (GH13766)
# dict
new_columns = MultiIndex.from_tuples([('fizz3', 'buzz1'),
('fizz2', 'buzz2')],
names=['fizz', 'buzz'])
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
level=0)
tm.assert_index_equal(renamed.columns, new_columns)
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
level='fizz')
tm.assert_index_equal(renamed.columns, new_columns)
new_columns = MultiIndex.from_tuples([('fizz1', 'buzz1'),
('fizz2', 'buzz3')],
names=['fizz', 'buzz'])
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
level=1)
tm.assert_index_equal(renamed.columns, new_columns)
renamed = df.rename(columns={'fizz1': 'fizz3', 'buzz2': 'buzz3'},
level='buzz')
tm.assert_index_equal(renamed.columns, new_columns)
# function
func = str.upper
new_columns = MultiIndex.from_tuples([('FIZZ1', 'buzz1'),
('FIZZ2', 'buzz2')],
names=['fizz', 'buzz'])
renamed = df.rename(columns=func, level=0)
tm.assert_index_equal(renamed.columns, new_columns)
renamed = df.rename(columns=func, level='fizz')
tm.assert_index_equal(renamed.columns, new_columns)
new_columns = MultiIndex.from_tuples([('fizz1', 'BUZZ1'),
('fizz2', 'BUZZ2')],
names=['fizz', 'buzz'])
renamed = df.rename(columns=func, level=1)
tm.assert_index_equal(renamed.columns, new_columns)
renamed = df.rename(columns=func, level='buzz')
tm.assert_index_equal(renamed.columns, new_columns)
# index
new_index = MultiIndex.from_tuples([('foo3', 'bar1'),
('foo2', 'bar2')],
names=['foo', 'bar'])
renamed = df.rename(index={'foo1': 'foo3', 'bar2': 'bar3'},
level=0)
tm.assert_index_equal(renamed.index, new_index)
def test_rename_nocopy(self):
renamed = self.frame.rename(columns={'C': 'foo'}, copy=False)
renamed['foo'] = 1.
assert (self.frame['C'] == 1.).all()
def test_rename_inplace(self):
self.frame.rename(columns={'C': 'foo'})
assert 'C' in self.frame
assert 'foo' not in self.frame
c_id = id(self.frame['C'])
frame = self.frame.copy()
frame.rename(columns={'C': 'foo'}, inplace=True)
assert 'C' not in frame
assert 'foo' in frame
assert id(frame['foo']) != c_id
def test_rename_bug(self):
# GH 5344
# rename set ref_locs, and set_index was not resetting
df = DataFrame({0: ['foo', 'bar'], 1: ['bah', 'bas'], 2: [1, 2]})
df = df.rename(columns={0: 'a'})
df = df.rename(columns={1: 'b'})
df = df.set_index(['a', 'b'])
df.columns = ['2001-01-01']
expected = DataFrame([[1], [2]],
index=MultiIndex.from_tuples(
[('foo', 'bah'), ('bar', 'bas')],
names=['a', 'b']),
columns=['2001-01-01'])
assert_frame_equal(df, expected)
def test_rename_bug2(self):
# GH 19497
# rename was changing Index to MultiIndex if Index contained tuples
df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)],
columns=["a"])
df = df.rename({(1, 1): (5, 4)}, axis="index")
expected = DataFrame(data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)],
columns=["a"])
assert_frame_equal(df, expected)
def test_reorder_levels(self):
index = MultiIndex(levels=[['bar'], ['one', 'two', 'three'], [0, 1]],
labels=[[0, 0, 0, 0, 0, 0],
[0, 1, 2, 0, 1, 2],
[0, 1, 0, 1, 0, 1]],
names=['L0', 'L1', 'L2'])
df = DataFrame({'A': np.arange(6), 'B': np.arange(6)}, index=index)
# no change, position
result = df.reorder_levels([0, 1, 2])
assert_frame_equal(df, result)
# no change, labels
result = df.reorder_levels(['L0', 'L1', 'L2'])
assert_frame_equal(df, result)
# rotate, position
result = df.reorder_levels([1, 2, 0])
e_idx = MultiIndex(levels=[['one', 'two', 'three'], [0, 1], ['bar']],
labels=[[0, 1, 2, 0, 1, 2],
[0, 1, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0]],
names=['L1', 'L2', 'L0'])
expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)},
index=e_idx)
assert_frame_equal(result, expected)
result = df.reorder_levels([0, 0, 0])
e_idx = MultiIndex(levels=[['bar'], ['bar'], ['bar']],
labels=[[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0]],
names=['L0', 'L0', 'L0'])
expected = DataFrame({'A': np.arange(6), 'B': np.arange(6)},
index=e_idx)
assert_frame_equal(result, expected)
result = df.reorder_levels(['L0', 'L0', 'L0'])
assert_frame_equal(result, expected)
def test_reset_index(self):
stacked = self.frame.stack()[::2]
stacked = DataFrame({'foo': stacked, 'bar': stacked})
names = ['first', 'second']
stacked.index.names = names
deleveled = stacked.reset_index()
for i, (lev, lab) in enumerate(zip(stacked.index.levels,
stacked.index.labels)):
values = lev.take(lab)
name = names[i]
tm.assert_index_equal(values, Index(deleveled[name]))
stacked.index.names = [None, None]
deleveled2 = stacked.reset_index()
tm.assert_series_equal(deleveled['first'], deleveled2['level_0'],
check_names=False)
tm.assert_series_equal(deleveled['second'], deleveled2['level_1'],
check_names=False)
# default name assigned
rdf = self.frame.reset_index()
exp = pd.Series(self.frame.index.values, name='index')
tm.assert_series_equal(rdf['index'], exp)
# default name assigned, corner case
df = self.frame.copy()
df['index'] = 'foo'
rdf = df.reset_index()
exp = pd.Series(self.frame.index.values, name='level_0')
tm.assert_series_equal(rdf['level_0'], exp)
# but this is ok
self.frame.index.name = 'index'
deleveled = self.frame.reset_index()
tm.assert_series_equal(deleveled['index'],
pd.Series(self.frame.index))
tm.assert_index_equal(deleveled.index,
pd.Index(np.arange(len(deleveled))))
# preserve column names
self.frame.columns.name = 'columns'
resetted = self.frame.reset_index()
assert resetted.columns.name == 'columns'
# only remove certain columns
frame = self.frame.reset_index().set_index(['index', 'A', 'B'])
rs = frame.reset_index(['A', 'B'])
# TODO should reset_index check_names ?
assert_frame_equal(rs, self.frame, check_names=False)
rs = frame.reset_index(['index', 'A', 'B'])
assert_frame_equal(rs, self.frame.reset_index(), check_names=False)
rs = frame.reset_index(['index', 'A', 'B'])
assert_frame_equal(rs, self.frame.reset_index(), check_names=False)
rs = frame.reset_index('A')
xp = self.frame.reset_index().set_index(['index', 'B'])
assert_frame_equal(rs, xp, check_names=False)
# test resetting in place
df = self.frame.copy()
resetted = self.frame.reset_index()
df.reset_index(inplace=True)
assert_frame_equal(df, resetted, check_names=False)
frame = self.frame.reset_index().set_index(['index', 'A', 'B'])
rs = frame.reset_index('A', drop=True)
xp = self.frame.copy()
del xp['A']
xp = xp.set_index(['B'], append=True)
assert_frame_equal(rs, xp, check_names=False)
def test_reset_index_level(self):
df = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
columns=['A', 'B', 'C', 'D'])
for levels in ['A', 'B'], [0, 1]:
# With MultiIndex
result = df.set_index(['A', 'B']).reset_index(level=levels[0])
tm.assert_frame_equal(result, df.set_index('B'))
result = df.set_index(['A', 'B']).reset_index(level=levels[:1])
tm.assert_frame_equal(result, df.set_index('B'))
result = df.set_index(['A', 'B']).reset_index(level=levels)
tm.assert_frame_equal(result, df)
result = df.set_index(['A', 'B']).reset_index(level=levels,
drop=True)
tm.assert_frame_equal(result, df[['C', 'D']])
# With single-level Index (GH 16263)
result = df.set_index('A').reset_index(level=levels[0])
tm.assert_frame_equal(result, df)
result = df.set_index('A').reset_index(level=levels[:1])
tm.assert_frame_equal(result, df)
result = df.set_index(['A']).reset_index(level=levels[0],
drop=True)
tm.assert_frame_equal(result, df[['B', 'C', 'D']])
# Missing levels - for both MultiIndex and single-level Index:
for idx_lev in ['A', 'B'], ['A']:
with tm.assert_raises_regex(KeyError, 'Level E '):
df.set_index(idx_lev).reset_index(level=['A', 'E'])
with tm.assert_raises_regex(IndexError, 'Too many levels'):
df.set_index(idx_lev).reset_index(level=[0, 1, 2])
def test_reset_index_right_dtype(self):
time = np.arange(0.0, 10, np.sqrt(2) / 2)
s1 = Series((9.81 * time ** 2) / 2,
index=Index(time, name='time'),
name='speed')
df = DataFrame(s1)
resetted = s1.reset_index()
assert resetted['time'].dtype == np.float64
resetted = df.reset_index()
assert resetted['time'].dtype == np.float64
def test_reset_index_multiindex_col(self):
vals = np.random.randn(3, 3).astype(object)
idx = ['x', 'y', 'z']
full = np.hstack(([[x] for x in idx], vals))
df = DataFrame(vals, Index(idx, name='a'),
columns=[['b', 'b', 'c'], ['mean', 'median', 'mean']])
rs = df.reset_index()
xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'],
['', 'mean', 'median', 'mean']])
assert_frame_equal(rs, xp)
rs = df.reset_index(col_fill=None)
xp = DataFrame(full, columns=[['a', 'b', 'b', 'c'],
['a', 'mean', 'median', 'mean']])
assert_frame_equal(rs, xp)
rs = df.reset_index(col_level=1, col_fill='blah')
xp = DataFrame(full, columns=[['blah', 'b', 'b', 'c'],
['a', 'mean', 'median', 'mean']])
assert_frame_equal(rs, xp)
df = DataFrame(vals,
MultiIndex.from_arrays([[0, 1, 2], ['x', 'y', 'z']],
names=['d', 'a']),
columns=[['b', 'b', 'c'], ['mean', 'median', 'mean']])
rs = df.reset_index('a', )
xp = DataFrame(full, Index([0, 1, 2], name='d'),
columns=[['a', 'b', 'b', 'c'],
['', 'mean', 'median', 'mean']])
assert_frame_equal(rs, xp)
rs = df.reset_index('a', col_fill=None)
xp = DataFrame(full, Index(lrange(3), name='d'),
columns=[['a', 'b', 'b', 'c'],
['a', 'mean', 'median', 'mean']])
assert_frame_equal(rs, xp)
rs = df.reset_index('a', col_fill='blah', col_level=1)
xp = DataFrame(full, Index(lrange(3), name='d'),
columns=[['blah', 'b', 'b', 'c'],
['a', 'mean', 'median', 'mean']])
assert_frame_equal(rs, xp)
def test_reset_index_multiindex_nan(self):
# GH6322, testing reset_index on MultiIndexes
# when we have a nan or all nan
df = pd.DataFrame({'A': ['a', 'b', 'c'],
'B': [0, 1, np.nan],
'C': np.random.rand(3)})
rs = df.set_index(['A', 'B']).reset_index()
assert_frame_equal(rs, df)
df = pd.DataFrame({'A': [np.nan, 'b', 'c'],
'B': [0, 1, 2],
'C': np.random.rand(3)})
rs = df.set_index(['A', 'B']).reset_index()
assert_frame_equal(rs, df)
df = pd.DataFrame({'A': ['a', 'b', 'c'],
'B': [0, 1, 2],
'C': [np.nan, 1.1, 2.2]})
rs = df.set_index(['A', 'B']).reset_index()
assert_frame_equal(rs, df)
df = pd.DataFrame({'A': ['a', 'b', 'c'],
'B': [np.nan, np.nan, np.nan],
'C': np.random.rand(3)})
rs = df.set_index(['A', 'B']).reset_index()
assert_frame_equal(rs, df)
def test_reset_index_with_datetimeindex_cols(self):
# GH5818
#
df = pd.DataFrame([[1, 2], [3, 4]],
columns=pd.date_range('1/1/2013', '1/2/2013'),
index=['A', 'B'])
result = df.reset_index()
expected = pd.DataFrame([['A', 1, 2], ['B', 3, 4]],
columns=['index', datetime(2013, 1, 1),
datetime(2013, 1, 2)])
assert_frame_equal(result, expected)
def test_reset_index_range(self):
# GH 12071
df = pd.DataFrame([[0, 0], [1, 1]], columns=['A', 'B'],
index=RangeIndex(stop=2))
result = df.reset_index()
assert isinstance(result.index, RangeIndex)
expected = pd.DataFrame([[0, 0, 0], [1, 1, 1]],
columns=['index', 'A', 'B'],
index=RangeIndex(stop=2))
assert_frame_equal(result, expected)
def test_set_index_names(self):
df = pd.util.testing.makeDataFrame()
df.index.name = 'name'
assert df.set_index(df.index).index.names == ['name']
mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B'])
mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values,
names=['A', 'B', 'C', 'D'])
df = df.set_index(['A', 'B'])
assert df.set_index(df.index).index.names == ['A', 'B']
# Check that set_index isn't converting a MultiIndex into an Index
assert isinstance(df.set_index(df.index).index, MultiIndex)
# Check actual equality
tm.assert_index_equal(df.set_index(df.index).index, mi)
idx2 = df.index.rename(['C', 'D'])
# Check that [MultiIndex, MultiIndex] yields a MultiIndex rather
# than a pair of tuples
assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex)
# Check equality
tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2)
def test_rename_objects(self):
renamed = self.mixed_frame.rename(columns=str.upper)
assert 'FOO' in renamed
assert 'foo' not in renamed
def test_rename_axis_style(self):
# https://github.com/pandas-dev/pandas/issues/12392
df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['X', 'Y'])
expected = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y'])
result = df.rename(str.lower, axis=1)
assert_frame_equal(result, expected)
result = df.rename(str.lower, axis='columns')
assert_frame_equal(result, expected)
result = df.rename({"A": 'a', 'B': 'b'}, axis=1)
assert_frame_equal(result, expected)
result = df.rename({"A": 'a', 'B': 'b'}, axis='columns')
assert_frame_equal(result, expected)
# Index
expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y'])
result = df.rename(str.lower, axis=0)
assert_frame_equal(result, expected)
result = df.rename(str.lower, axis='index')
assert_frame_equal(result, expected)
result = df.rename({'X': 'x', 'Y': 'y'}, axis=0)
assert_frame_equal(result, expected)
result = df.rename({'X': 'x', 'Y': 'y'}, axis='index')
assert_frame_equal(result, expected)
result = df.rename(mapper=str.lower, axis='index')
assert_frame_equal(result, expected)
def test_rename_mapper_multi(self):
df = pd.DataFrame({"A": ['a', 'b'], "B": ['c', 'd'],
'C': [1, 2]}).set_index(["A", "B"])
result = df.rename(str.upper)
expected = df.rename(index=str.upper)
assert_frame_equal(result, expected)
def test_rename_positional_named(self):
# https://github.com/pandas-dev/pandas/issues/12392
df = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, index=['X', 'Y'])
result = df.rename(str.lower, columns=str.upper)
expected = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['x', 'y'])
assert_frame_equal(result, expected)
def test_rename_axis_style_raises(self):
# https://github.com/pandas-dev/pandas/issues/12392
df = pd.DataFrame({"A": [1, 2], "B": [1, 2]}, index=['0', '1'])
# Named target and axis
with tm.assert_raises_regex(TypeError, None):
df.rename(index=str.lower, axis=1)
with tm.assert_raises_regex(TypeError, None):
df.rename(index=str.lower, axis='columns')
with tm.assert_raises_regex(TypeError, None):
df.rename(index=str.lower, axis='columns')
with tm.assert_raises_regex(TypeError, None):
df.rename(columns=str.lower, axis='columns')
with tm.assert_raises_regex(TypeError, None):
df.rename(index=str.lower, axis=0)
# Multiple targets and axis
with tm.assert_raises_regex(TypeError, None):
df.rename(str.lower, str.lower, axis='columns')
# Too many targets
with tm.assert_raises_regex(TypeError, None):
df.rename(str.lower, str.lower, str.lower)
# Duplicates
with tm.assert_raises_regex(TypeError, "multiple values"):
df.rename(id, mapper=id)
def test_reindex_api_equivalence(self):
# equivalence of the labels/axis and index/columns API's
df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]],
index=['a', 'b', 'c'],
columns=['d', 'e', 'f'])
res1 = df.reindex(['b', 'a'])
res2 = df.reindex(index=['b', 'a'])
res3 = df.reindex(labels=['b', 'a'])
res4 = df.reindex(labels=['b', 'a'], axis=0)
res5 = df.reindex(['b', 'a'], axis=0)
for res in [res2, res3, res4, res5]:
tm.assert_frame_equal(res1, res)
res1 = df.reindex(columns=['e', 'd'])
res2 = df.reindex(['e', 'd'], axis=1)
res3 = df.reindex(labels=['e', 'd'], axis=1)
for res in [res2, res3]:
tm.assert_frame_equal(res1, res)
res1 = df.reindex(index=['b', 'a'], columns=['e', 'd'])
res2 = df.reindex(columns=['e', 'd'], index=['b', 'a'])
res3 = df.reindex(labels=['b', 'a'], axis=0).reindex(labels=['e', 'd'],
axis=1)
for res in [res2, res3]:
tm.assert_frame_equal(res1, res)
def test_rename_positional(self):
df = pd.DataFrame(columns=['A', 'B'])
with tm.assert_produces_warning(FutureWarning) as rec:
result = df.rename(None, str.lower)
expected = pd.DataFrame(columns=['a', 'b'])
assert_frame_equal(result, expected)
assert len(rec) == 1
message = str(rec[0].message)
assert 'rename' in message
assert 'Use named arguments' in message
def test_assign_columns(self):
self.frame['hi'] = 'there'
frame = self.frame.copy()
frame.columns = ['foo', 'bar', 'baz', 'quux', 'foo2']
assert_series_equal(self.frame['C'], frame['baz'], check_names=False)
assert_series_equal(self.frame['hi'], frame['foo2'], check_names=False)
def test_set_index_preserve_categorical_dtype(self):
# GH13743, GH13854
df = DataFrame({'A': [1, 2, 1, 1, 2],
'B': [10, 16, 22, 28, 34],
'C1': pd.Categorical(list("abaab"),
categories=list("bac"),
ordered=False),
'C2': pd.Categorical(list("abaab"),
categories=list("bac"),
ordered=True)})
for cols in ['C1', 'C2', ['A', 'C1'], ['A', 'C2'], ['C1', 'C2']]:
result = df.set_index(cols).reset_index()
result = result.reindex(columns=df.columns)
tm.assert_frame_equal(result, df)
def test_ambiguous_warns(self):
df = pd.DataFrame({"A": [1, 2]})
with tm.assert_produces_warning(FutureWarning):
df.rename(id, id)
with tm.assert_produces_warning(FutureWarning):
df.rename({0: 10}, {"A": "B"})
@pytest.mark.skipif(PY2, reason="inspect.signature")
def test_rename_signature(self):
sig = inspect.signature(pd.DataFrame.rename)
parameters = set(sig.parameters)
assert parameters == {"self", "mapper", "index", "columns", "axis",
"inplace", "copy", "level"}
@pytest.mark.skipif(PY2, reason="inspect.signature")
def test_reindex_signature(self):
sig = inspect.signature(pd.DataFrame.reindex)
parameters = set(sig.parameters)
assert parameters == {"self", "labels", "index", "columns", "axis",
"limit", "copy", "level", "method",
"fill_value", "tolerance"}
class TestIntervalIndex(object):
def test_setitem(self):
df = DataFrame({'A': range(10)})
s = pd.cut(df.A, 5)
assert isinstance(s.cat.categories, IntervalIndex)
# B & D end up as Categoricals
# the remainer are converted to in-line objects
# contining an IntervalIndex.values
df['B'] = s
df['C'] = np.array(s)
df['D'] = s.values
df['E'] = np.array(s.values)
assert is_categorical_dtype(df['B'])
assert is_interval_dtype(df['B'].cat.categories)
assert is_categorical_dtype(df['D'])
assert is_interval_dtype(df['D'].cat.categories)
assert is_object_dtype(df['C'])
assert is_object_dtype(df['E'])
# they compare equal as Index
# when converted to numpy objects
c = lambda x: Index(np.array(x))
tm.assert_index_equal(c(df.B), c(df.B), check_names=False)
tm.assert_index_equal(c(df.B), c(df.C), check_names=False)
tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
tm.assert_index_equal(c(df.B), c(df.D), check_names=False)
# B & D are the same Series
tm.assert_series_equal(df['B'], df['B'], check_names=False)
tm.assert_series_equal(df['B'], df['D'], check_names=False)
# C & E are the same Series
tm.assert_series_equal(df['C'], df['C'], check_names=False)
tm.assert_series_equal(df['C'], df['E'], check_names=False)
def test_set_reset_index(self):
df = DataFrame({'A': range(10)})
s = pd.cut(df.A, 5)
df['B'] = s
df = df.set_index('B')
df = df.reset_index()
def test_set_axis_inplace(self):
# GH14636
df = DataFrame({'A': [1.1, 2.2, 3.3],
'B': [5.0, 6.1, 7.2],
'C': [4.4, 5.5, 6.6]},
index=[2010, 2011, 2012])
expected = {0: df.copy(),
1: df.copy()}
expected[0].index = list('abc')
expected[1].columns = list('abc')
expected['index'] = expected[0]
expected['columns'] = expected[1]
for axis in expected:
# inplace=True
# The FutureWarning comes from the fact that we would like to have
# inplace default to False some day
for inplace, warn in (None, FutureWarning), (True, None):
kwargs = {'inplace': inplace}
result = df.copy()
with tm.assert_produces_warning(warn):
result.set_axis(list('abc'), axis=axis, **kwargs)
tm.assert_frame_equal(result, expected[axis])
# inplace=False
result = df.set_axis(list('abc'), axis=axis, inplace=False)
tm.assert_frame_equal(expected[axis], result)
# omitting the "axis" parameter
with tm.assert_produces_warning(None):
result = df.set_axis(list('abc'), inplace=False)
tm.assert_frame_equal(result, expected[0])
# wrong values for the "axis" parameter
for axis in 3, 'foo':
with tm.assert_raises_regex(ValueError, 'No axis named'):
df.set_axis(list('abc'), axis=axis, inplace=False)
def test_set_axis_prior_to_deprecation_signature(self):
df = DataFrame({'A': [1.1, 2.2, 3.3],
'B': [5.0, 6.1, 7.2],
'C': [4.4, 5.5, 6.6]},
index=[2010, 2011, 2012])
expected = {0: df.copy(),
1: df.copy()}
expected[0].index = list('abc')
expected[1].columns = list('abc')
expected['index'] = expected[0]
expected['columns'] = expected[1]
# old signature
for axis in expected:
with tm.assert_produces_warning(FutureWarning):
result = df.set_axis(axis, list('abc'), inplace=False)
tm.assert_frame_equal(result, expected[axis])