laywerrobot/lib/python3.6/site-packages/pandas/tests/indexes/test_multi.py
2020-08-27 21:55:39 +02:00

3327 lines
129 KiB
Python

# -*- coding: utf-8 -*-
import re
import warnings
from datetime import timedelta
from itertools import product
import pytest
import numpy as np
import pandas as pd
from pandas import (CategoricalIndex, Categorical, DataFrame, Index,
MultiIndex, compat, date_range, period_range)
from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY
from pandas.errors import PerformanceWarning, UnsortedIndexError
from pandas.core.dtypes.dtypes import CategoricalDtype
from pandas.core.indexes.base import InvalidIndexError
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
from pandas._libs.tslib import Timestamp
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal, assert_copy
from .common import Base
class TestMultiIndex(Base):
_holder = MultiIndex
_compat_props = ['shape', 'ndim', 'size']
def setup_method(self, method):
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_labels = np.array([0, 0, 1, 2, 3, 3])
minor_labels = np.array([0, 1, 0, 1, 0, 1])
self.index_names = ['first', 'second']
self.indices = dict(index=MultiIndex(levels=[major_axis, minor_axis],
labels=[major_labels, minor_labels
], names=self.index_names,
verify_integrity=False))
self.setup_indices()
def create_index(self):
return self.index
def test_can_hold_identifiers(self):
idx = self.create_index()
key = idx[0]
assert idx._can_hold_identifiers_and_holds_name(key) is True
def test_boolean_context_compat2(self):
# boolean context compat
# GH7897
i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)])
i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)])
common = i1.intersection(i2)
def f():
if common:
pass
tm.assert_raises_regex(ValueError, 'The truth value of a', f)
def test_labels_dtypes(self):
# GH 8456
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
assert i.labels[0].dtype == 'int8'
assert i.labels[1].dtype == 'int8'
i = MultiIndex.from_product([['a'], range(40)])
assert i.labels[1].dtype == 'int8'
i = MultiIndex.from_product([['a'], range(400)])
assert i.labels[1].dtype == 'int16'
i = MultiIndex.from_product([['a'], range(40000)])
assert i.labels[1].dtype == 'int32'
i = pd.MultiIndex.from_product([['a'], range(1000)])
assert (i.labels[0] >= 0).all()
assert (i.labels[1] >= 0).all()
def test_where(self):
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
def f():
i.where(True)
pytest.raises(NotImplementedError, f)
def test_where_array_like(self):
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
klasses = [list, tuple, np.array, pd.Series]
cond = [False, True]
for klass in klasses:
f = lambda: i.where(klass(cond))
pytest.raises(NotImplementedError, f)
def test_repeat(self):
reps = 2
numbers = [1, 2, 3]
names = np.array(['foo', 'bar'])
m = MultiIndex.from_product([
numbers, names], names=names)
expected = MultiIndex.from_product([
numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(m.repeat(reps), expected)
with tm.assert_produces_warning(FutureWarning):
result = m.repeat(n=reps)
tm.assert_index_equal(result, expected)
def test_numpy_repeat(self):
reps = 2
numbers = [1, 2, 3]
names = np.array(['foo', 'bar'])
m = MultiIndex.from_product([
numbers, names], names=names)
expected = MultiIndex.from_product([
numbers, names.repeat(reps)], names=names)
tm.assert_index_equal(np.repeat(m, reps), expected)
msg = "the 'axis' parameter is not supported"
tm.assert_raises_regex(
ValueError, msg, np.repeat, m, reps, axis=1)
def test_set_name_methods(self):
# so long as these are synonyms, we don't need to test set_names
assert self.index.rename == self.index.set_names
new_names = [name + "SUFFIX" for name in self.index_names]
ind = self.index.set_names(new_names)
assert self.index.names == self.index_names
assert ind.names == new_names
with tm.assert_raises_regex(ValueError, "^Length"):
ind.set_names(new_names + new_names)
new_names2 = [name + "SUFFIX2" for name in new_names]
res = ind.set_names(new_names2, inplace=True)
assert res is None
assert ind.names == new_names2
# set names for specific level (# GH7792)
ind = self.index.set_names(new_names[0], level=0)
assert self.index.names == self.index_names
assert ind.names == [new_names[0], self.index_names[1]]
res = ind.set_names(new_names2[0], level=0, inplace=True)
assert res is None
assert ind.names == [new_names2[0], self.index_names[1]]
# set names for multiple levels
ind = self.index.set_names(new_names, level=[0, 1])
assert self.index.names == self.index_names
assert ind.names == new_names
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
assert res is None
assert ind.names == new_names2
@pytest.mark.parametrize('inplace', [True, False])
def test_set_names_with_nlevel_1(self, inplace):
# GH 21149
# Ensure that .set_names for MultiIndex with
# nlevels == 1 does not raise any errors
expected = pd.MultiIndex(levels=[[0, 1]],
labels=[[0, 1]],
names=['first'])
m = pd.MultiIndex.from_product([[0, 1]])
result = m.set_names('first', level=0, inplace=inplace)
if inplace:
result = m
tm.assert_index_equal(result, expected)
def test_set_levels_labels_directly(self):
# setting levels/labels directly raises AttributeError
levels = self.index.levels
new_levels = [[lev + 'a' for lev in level] for level in levels]
labels = self.index.labels
major_labels, minor_labels = labels
major_labels = [(x + 1) % 3 for x in major_labels]
minor_labels = [(x + 1) % 1 for x in minor_labels]
new_labels = [major_labels, minor_labels]
with pytest.raises(AttributeError):
self.index.levels = new_levels
with pytest.raises(AttributeError):
self.index.labels = new_labels
def test_set_levels(self):
# side note - you probably wouldn't want to use levels and labels
# directly like this - but it is possible.
levels = self.index.levels
new_levels = [[lev + 'a' for lev in level] for level in levels]
def assert_matching(actual, expected, check_dtype=False):
# avoid specifying internal representation
# as much as possible
assert len(actual) == len(expected)
for act, exp in zip(actual, expected):
act = np.asarray(act)
exp = np.asarray(exp)
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
# level changing [w/o mutation]
ind2 = self.index.set_levels(new_levels)
assert_matching(ind2.levels, new_levels)
assert_matching(self.index.levels, levels)
# level changing [w/ mutation]
ind2 = self.index.copy()
inplace_return = ind2.set_levels(new_levels, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
# level changing specific level [w/o mutation]
ind2 = self.index.set_levels(new_levels[0], level=0)
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(self.index.levels, levels)
ind2 = self.index.set_levels(new_levels[1], level=1)
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(self.index.levels, levels)
# level changing multiple levels [w/o mutation]
ind2 = self.index.set_levels(new_levels, level=[0, 1])
assert_matching(ind2.levels, new_levels)
assert_matching(self.index.levels, levels)
# level changing specific level [w/ mutation]
ind2 = self.index.copy()
inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [new_levels[0], levels[1]])
assert_matching(self.index.levels, levels)
ind2 = self.index.copy()
inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, [levels[0], new_levels[1]])
assert_matching(self.index.levels, levels)
# level changing multiple levels [w/ mutation]
ind2 = self.index.copy()
inplace_return = ind2.set_levels(new_levels, level=[0, 1],
inplace=True)
assert inplace_return is None
assert_matching(ind2.levels, new_levels)
assert_matching(self.index.levels, levels)
# illegal level changing should not change levels
# GH 13754
original_index = self.index.copy()
for inplace in [True, False]:
with tm.assert_raises_regex(ValueError, "^On"):
self.index.set_levels(['c'], level=0, inplace=inplace)
assert_matching(self.index.levels, original_index.levels,
check_dtype=True)
with tm.assert_raises_regex(ValueError, "^On"):
self.index.set_labels([0, 1, 2, 3, 4, 5], level=0,
inplace=inplace)
assert_matching(self.index.labels, original_index.labels,
check_dtype=True)
with tm.assert_raises_regex(TypeError, "^Levels"):
self.index.set_levels('c', level=0, inplace=inplace)
assert_matching(self.index.levels, original_index.levels,
check_dtype=True)
with tm.assert_raises_regex(TypeError, "^Labels"):
self.index.set_labels(1, level=0, inplace=inplace)
assert_matching(self.index.labels, original_index.labels,
check_dtype=True)
def test_set_labels(self):
# side note - you probably wouldn't want to use levels and labels
# directly like this - but it is possible.
labels = self.index.labels
major_labels, minor_labels = labels
major_labels = [(x + 1) % 3 for x in major_labels]
minor_labels = [(x + 1) % 1 for x in minor_labels]
new_labels = [major_labels, minor_labels]
def assert_matching(actual, expected):
# avoid specifying internal representation
# as much as possible
assert len(actual) == len(expected)
for act, exp in zip(actual, expected):
act = np.asarray(act)
exp = np.asarray(exp, dtype=np.int8)
tm.assert_numpy_array_equal(act, exp)
# label changing [w/o mutation]
ind2 = self.index.set_labels(new_labels)
assert_matching(ind2.labels, new_labels)
assert_matching(self.index.labels, labels)
# label changing [w/ mutation]
ind2 = self.index.copy()
inplace_return = ind2.set_labels(new_labels, inplace=True)
assert inplace_return is None
assert_matching(ind2.labels, new_labels)
# label changing specific level [w/o mutation]
ind2 = self.index.set_labels(new_labels[0], level=0)
assert_matching(ind2.labels, [new_labels[0], labels[1]])
assert_matching(self.index.labels, labels)
ind2 = self.index.set_labels(new_labels[1], level=1)
assert_matching(ind2.labels, [labels[0], new_labels[1]])
assert_matching(self.index.labels, labels)
# label changing multiple levels [w/o mutation]
ind2 = self.index.set_labels(new_labels, level=[0, 1])
assert_matching(ind2.labels, new_labels)
assert_matching(self.index.labels, labels)
# label changing specific level [w/ mutation]
ind2 = self.index.copy()
inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True)
assert inplace_return is None
assert_matching(ind2.labels, [new_labels[0], labels[1]])
assert_matching(self.index.labels, labels)
ind2 = self.index.copy()
inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True)
assert inplace_return is None
assert_matching(ind2.labels, [labels[0], new_labels[1]])
assert_matching(self.index.labels, labels)
# label changing multiple levels [w/ mutation]
ind2 = self.index.copy()
inplace_return = ind2.set_labels(new_labels, level=[0, 1],
inplace=True)
assert inplace_return is None
assert_matching(ind2.labels, new_labels)
assert_matching(self.index.labels, labels)
# label changing for levels of different magnitude of categories
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
new_labels = range(129, -1, -1)
expected = pd.MultiIndex.from_tuples(
[(0, i) for i in new_labels])
# [w/o mutation]
result = ind.set_labels(labels=new_labels, level=1)
assert result.equals(expected)
# [w/ mutation]
result = ind.copy()
result.set_labels(labels=new_labels, level=1, inplace=True)
assert result.equals(expected)
def test_set_levels_labels_names_bad_input(self):
levels, labels = self.index.levels, self.index.labels
names = self.index.names
with tm.assert_raises_regex(ValueError, 'Length of levels'):
self.index.set_levels([levels[0]])
with tm.assert_raises_regex(ValueError, 'Length of labels'):
self.index.set_labels([labels[0]])
with tm.assert_raises_regex(ValueError, 'Length of names'):
self.index.set_names([names[0]])
# shouldn't scalar data error, instead should demand list-like
with tm.assert_raises_regex(TypeError, 'list of lists-like'):
self.index.set_levels(levels[0])
# shouldn't scalar data error, instead should demand list-like
with tm.assert_raises_regex(TypeError, 'list of lists-like'):
self.index.set_labels(labels[0])
# shouldn't scalar data error, instead should demand list-like
with tm.assert_raises_regex(TypeError, 'list-like'):
self.index.set_names(names[0])
# should have equal lengths
with tm.assert_raises_regex(TypeError, 'list of lists-like'):
self.index.set_levels(levels[0], level=[0, 1])
with tm.assert_raises_regex(TypeError, 'list-like'):
self.index.set_levels(levels, level=0)
# should have equal lengths
with tm.assert_raises_regex(TypeError, 'list of lists-like'):
self.index.set_labels(labels[0], level=[0, 1])
with tm.assert_raises_regex(TypeError, 'list-like'):
self.index.set_labels(labels, level=0)
# should have equal lengths
with tm.assert_raises_regex(ValueError, 'Length of names'):
self.index.set_names(names[0], level=[0, 1])
with tm.assert_raises_regex(TypeError, 'string'):
self.index.set_names(names, level=0)
def test_set_levels_categorical(self):
# GH13854
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
for ordered in [False, True]:
cidx = CategoricalIndex(list("bac"), ordered=ordered)
result = index.set_levels(cidx, 0)
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]],
labels=index.labels)
tm.assert_index_equal(result, expected)
result_lvl = result.get_level_values(0)
expected_lvl = CategoricalIndex(list("bacb"),
categories=cidx.categories,
ordered=cidx.ordered)
tm.assert_index_equal(result_lvl, expected_lvl)
def test_metadata_immutable(self):
levels, labels = self.index.levels, self.index.labels
# shouldn't be able to set at either the top level or base level
mutable_regex = re.compile('does not support mutable operations')
with tm.assert_raises_regex(TypeError, mutable_regex):
levels[0] = levels[0]
with tm.assert_raises_regex(TypeError, mutable_regex):
levels[0][0] = levels[0][0]
# ditto for labels
with tm.assert_raises_regex(TypeError, mutable_regex):
labels[0] = labels[0]
with tm.assert_raises_regex(TypeError, mutable_regex):
labels[0][0] = labels[0][0]
# and for names
names = self.index.names
with tm.assert_raises_regex(TypeError, mutable_regex):
names[0] = names[0]
def test_inplace_mutation_resets_values(self):
levels = [['a', 'b', 'c'], [4]]
levels2 = [[1, 2, 3], ['a']]
labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
mi1 = MultiIndex(levels=levels, labels=labels)
mi2 = MultiIndex(levels=levels2, labels=labels)
vals = mi1.values.copy()
vals2 = mi2.values.copy()
assert mi1._tuples is not None
# Make sure level setting works
new_vals = mi1.set_levels(levels2).values
tm.assert_almost_equal(vals2, new_vals)
# Non-inplace doesn't kill _tuples [implementation detail]
tm.assert_almost_equal(mi1._tuples, vals)
# ...and values is still same too
tm.assert_almost_equal(mi1.values, vals)
# Inplace should kill _tuples
mi1.set_levels(levels2, inplace=True)
tm.assert_almost_equal(mi1.values, vals2)
# Make sure label setting works too
labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
exp_values = np.empty((6,), dtype=object)
exp_values[:] = [(long(1), 'a')] * 6
# Must be 1d array of tuples
assert exp_values.shape == (6,)
new_values = mi2.set_labels(labels2).values
# Not inplace shouldn't change
tm.assert_almost_equal(mi2._tuples, vals2)
# Should have correct values
tm.assert_almost_equal(exp_values, new_values)
# ...and again setting inplace should kill _tuples, etc
mi2.set_labels(labels2, inplace=True)
tm.assert_almost_equal(mi2.values, new_values)
def test_copy_in_constructor(self):
levels = np.array(["a", "b", "c"])
labels = np.array([1, 1, 2, 0, 0, 1, 1])
val = labels[0]
mi = MultiIndex(levels=[levels, levels], labels=[labels, labels],
copy=True)
assert mi.labels[0][0] == val
labels[0] = 15
assert mi.labels[0][0] == val
val = levels[0]
levels[0] = "PANDA"
assert mi.levels[0][0] == val
def test_set_value_keeps_names(self):
# motivating example from #3742
lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe']
lev2 = ['1', '2', '3'] * 2
idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number'])
df = pd.DataFrame(
np.random.randn(6, 4),
columns=['one', 'two', 'three', 'four'],
index=idx)
df = df.sort_index()
assert df._is_copy is None
assert df.index.names == ('Name', 'Number')
df.at[('grethe', '4'), 'one'] = 99.34
assert df._is_copy is None
assert df.index.names == ('Name', 'Number')
def test_copy_names(self):
# Check that adding a "names" parameter to the copy is honored
# GH14302
multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2'])
multi_idx1 = multi_idx.copy()
assert multi_idx.equals(multi_idx1)
assert multi_idx.names == ['MyName1', 'MyName2']
assert multi_idx1.names == ['MyName1', 'MyName2']
multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2'])
assert multi_idx.equals(multi_idx2)
assert multi_idx.names == ['MyName1', 'MyName2']
assert multi_idx2.names == ['NewName1', 'NewName2']
multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2'])
assert multi_idx.equals(multi_idx3)
assert multi_idx.names == ['MyName1', 'MyName2']
assert multi_idx3.names == ['NewName1', 'NewName2']
def test_names(self):
# names are assigned in setup
names = self.index_names
level_names = [level.name for level in self.index.levels]
assert names == level_names
# setting bad names on existing
index = self.index
tm.assert_raises_regex(ValueError, "^Length of names",
setattr, index, "names",
list(index.names) + ["third"])
tm.assert_raises_regex(ValueError, "^Length of names",
setattr, index, "names", [])
# initializing with bad names (should always be equivalent)
major_axis, minor_axis = self.index.levels
major_labels, minor_labels = self.index.labels
tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex,
levels=[major_axis, minor_axis],
labels=[major_labels, minor_labels],
names=['first'])
tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex,
levels=[major_axis, minor_axis],
labels=[major_labels, minor_labels],
names=['first', 'second', 'third'])
# names are assigned
index.names = ["a", "b"]
ind_names = list(index.names)
level_names = [level.name for level in index.levels]
assert ind_names == level_names
def test_astype(self):
expected = self.index.copy()
actual = self.index.astype('O')
assert_copy(actual.levels, expected.levels)
assert_copy(actual.labels, expected.labels)
self.check_level_names(actual, expected.names)
with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"):
self.index.astype(np.dtype(int))
@pytest.mark.parametrize('ordered', [True, False])
def test_astype_category(self, ordered):
# GH 18630
msg = '> 1 ndim Categorical are not supported at this time'
with tm.assert_raises_regex(NotImplementedError, msg):
self.index.astype(CategoricalDtype(ordered=ordered))
if ordered is False:
# dtype='category' defaults to ordered=False, so only test once
with tm.assert_raises_regex(NotImplementedError, msg):
self.index.astype('category')
def test_constructor_single_level(self):
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
assert isinstance(result, MultiIndex)
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ['first']
def test_constructor_no_levels(self):
tm.assert_raises_regex(ValueError, "non-zero number "
"of levels/labels",
MultiIndex, levels=[], labels=[])
both_re = re.compile('Must pass both levels and labels')
with tm.assert_raises_regex(TypeError, both_re):
MultiIndex(levels=[])
with tm.assert_raises_regex(TypeError, both_re):
MultiIndex(labels=[])
def test_constructor_mismatched_label_levels(self):
labels = [np.array([1]), np.array([2]), np.array([3])]
levels = ["a"]
tm.assert_raises_regex(ValueError, "Length of levels and labels "
"must be the same", MultiIndex,
levels=levels, labels=labels)
length_error = re.compile('>= length of level')
label_error = re.compile(r'Unequal label lengths: \[4, 2\]')
# important to check that it's looking at the right thing.
with tm.assert_raises_regex(ValueError, length_error):
MultiIndex(levels=[['a'], ['b']],
labels=[[0, 1, 2, 3], [0, 3, 4, 1]])
with tm.assert_raises_regex(ValueError, label_error):
MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]])
# external API
with tm.assert_raises_regex(ValueError, length_error):
self.index.copy().set_levels([['a'], ['b']])
with tm.assert_raises_regex(ValueError, label_error):
self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]])
def test_constructor_nonhashable_names(self):
# GH 20527
levels = [[1, 2], [u'one', u'two']]
labels = [[0, 0, 1, 1], [0, 1, 0, 1]]
names = ((['foo'], ['bar']))
message = "MultiIndex.name must be a hashable type"
tm.assert_raises_regex(TypeError, message,
MultiIndex, levels=levels,
labels=labels, names=names)
# With .rename()
mi = MultiIndex(levels=[[1, 2], [u'one', u'two']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=('foo', 'bar'))
renamed = [['foor'], ['barr']]
tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed)
# With .set_names()
tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed)
@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2],
[1, 'a', 1]])
def test_duplicate_level_names(self, names):
# GH18872, GH19029
mi = pd.MultiIndex.from_product([[0, 1]] * 3, names=names)
assert mi.names == names
# With .rename()
mi = pd.MultiIndex.from_product([[0, 1]] * 3)
mi = mi.rename(names)
assert mi.names == names
# With .rename(., level=)
mi.rename(names[1], level=1, inplace=True)
mi = mi.rename([names[0], names[2]], level=[0, 2])
assert mi.names == names
def test_duplicate_level_names_access_raises(self):
self.index.names = ['foo', 'foo']
tm.assert_raises_regex(KeyError, 'Level foo not found',
self.index._get_level_number, 'foo')
def assert_multiindex_copied(self, copy, original):
# Levels should be (at least, shallow copied)
tm.assert_copy(copy.levels, original.levels)
tm.assert_almost_equal(copy.labels, original.labels)
# Labels doesn't matter which way copied
tm.assert_almost_equal(copy.labels, original.labels)
assert copy.labels is not original.labels
# Names doesn't matter which way copied
assert copy.names == original.names
assert copy.names is not original.names
# Sort order should be copied
assert copy.sortorder == original.sortorder
def test_copy(self):
i_copy = self.index.copy()
self.assert_multiindex_copied(i_copy, self.index)
def test_shallow_copy(self):
i_copy = self.index._shallow_copy()
self.assert_multiindex_copied(i_copy, self.index)
def test_view(self):
i_view = self.index.view()
self.assert_multiindex_copied(i_view, self.index)
def check_level_names(self, index, names):
assert [level.name for level in index.levels] == list(names)
def test_changing_names(self):
# names should be applied to levels
level_names = [level.name for level in self.index.levels]
self.check_level_names(self.index, self.index.names)
view = self.index.view()
copy = self.index.copy()
shallow_copy = self.index._shallow_copy()
# changing names should change level names on object
new_names = [name + "a" for name in self.index.names]
self.index.names = new_names
self.check_level_names(self.index, new_names)
# but not on copies
self.check_level_names(view, level_names)
self.check_level_names(copy, level_names)
self.check_level_names(shallow_copy, level_names)
# and copies shouldn't change original
shallow_copy.names = [name + "c" for name in shallow_copy.names]
self.check_level_names(self.index, new_names)
def test_get_level_number_integer(self):
self.index.names = [1, 0]
assert self.index._get_level_number(1) == 0
assert self.index._get_level_number(0) == 1
pytest.raises(IndexError, self.index._get_level_number, 2)
tm.assert_raises_regex(KeyError, 'Level fourth not found',
self.index._get_level_number, 'fourth')
def test_from_arrays(self):
arrays = []
for lev, lab in zip(self.index.levels, self.index.labels):
arrays.append(np.asarray(lev).take(lab))
# list of arrays as input
result = MultiIndex.from_arrays(arrays, names=self.index.names)
tm.assert_index_equal(result, self.index)
# infer correctly
result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')],
['a', 'b']])
assert result.levels[0].equals(Index([Timestamp('20130101')]))
assert result.levels[1].equals(Index(['a', 'b']))
def test_from_arrays_iterator(self):
# GH 18434
arrays = []
for lev, lab in zip(self.index.levels, self.index.labels):
arrays.append(np.asarray(lev).take(lab))
# iterator as input
result = MultiIndex.from_arrays(iter(arrays), names=self.index.names)
tm.assert_index_equal(result, self.index)
# invalid iterator input
with tm.assert_raises_regex(
TypeError, "Input must be a list / sequence of array-likes."):
MultiIndex.from_arrays(0)
def test_from_arrays_index_series_datetimetz(self):
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
tz='US/Eastern')
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3,
tz='Asia/Tokyo')
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_timedelta(self):
idx1 = pd.timedelta_range('1 days', freq='D', periods=3)
idx2 = pd.timedelta_range('2 hours', freq='H', periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_period(self):
idx1 = pd.period_range('2011-01-01', freq='D', periods=3)
idx2 = pd.period_range('2015-01-01', freq='H', periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_datetimelike_mixed(self):
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
tz='US/Eastern')
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3)
idx3 = pd.timedelta_range('1 days', freq='D', periods=3)
idx4 = pd.period_range('2011-01-01', freq='D', periods=3)
result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
tm.assert_index_equal(result.get_level_values(2), idx3)
tm.assert_index_equal(result.get_level_values(3), idx4)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1),
pd.Series(idx2),
pd.Series(idx3),
pd.Series(idx4)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
tm.assert_index_equal(result2.get_level_values(2), idx3)
tm.assert_index_equal(result2.get_level_values(3), idx4)
tm.assert_index_equal(result, result2)
def test_from_arrays_index_series_categorical(self):
# GH13743
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
ordered=False)
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
ordered=True)
result = pd.MultiIndex.from_arrays([idx1, idx2])
tm.assert_index_equal(result.get_level_values(0), idx1)
tm.assert_index_equal(result.get_level_values(1), idx2)
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
tm.assert_index_equal(result2.get_level_values(0), idx1)
tm.assert_index_equal(result2.get_level_values(1), idx2)
result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
tm.assert_index_equal(result3.get_level_values(0), idx1)
tm.assert_index_equal(result3.get_level_values(1), idx2)
def test_from_arrays_empty(self):
# 0 levels
with tm.assert_raises_regex(
ValueError, "Must pass non-zero number of levels/labels"):
MultiIndex.from_arrays(arrays=[])
# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
assert isinstance(result, MultiIndex)
expected = Index([], name='A')
tm.assert_index_equal(result.levels[0], expected)
# N levels
for N in [2, 3]:
arrays = [[]] * N
names = list('ABC')[:N]
result = MultiIndex.from_arrays(arrays=arrays, names=names)
expected = MultiIndex(levels=[[]] * N, labels=[[]] * N,
names=names)
tm.assert_index_equal(result, expected)
def test_from_arrays_invalid_input(self):
invalid_inputs = [1, [1], [1, 2], [[1], 2],
'a', ['a'], ['a', 'b'], [['a'], 'b']]
for i in invalid_inputs:
pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i)
def test_from_arrays_different_lengths(self):
# see gh-13599
idx1 = [1, 2, 3]
idx2 = ['a', 'b']
tm.assert_raises_regex(ValueError, '^all arrays must '
'be same length$',
MultiIndex.from_arrays, [idx1, idx2])
idx1 = []
idx2 = ['a', 'b']
tm.assert_raises_regex(ValueError, '^all arrays must '
'be same length$',
MultiIndex.from_arrays, [idx1, idx2])
idx1 = [1, 2, 3]
idx2 = []
tm.assert_raises_regex(ValueError, '^all arrays must '
'be same length$',
MultiIndex.from_arrays, [idx1, idx2])
def test_from_product(self):
first = ['foo', 'bar', 'buz']
second = ['a', 'b', 'c']
names = ['first', 'second']
result = MultiIndex.from_product([first, second], names=names)
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
('buz', 'c')]
expected = MultiIndex.from_tuples(tuples, names=names)
tm.assert_index_equal(result, expected)
def test_from_product_iterator(self):
# GH 18434
first = ['foo', 'bar', 'buz']
second = ['a', 'b', 'c']
names = ['first', 'second']
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
('buz', 'c')]
expected = MultiIndex.from_tuples(tuples, names=names)
# iterator as input
result = MultiIndex.from_product(iter([first, second]), names=names)
tm.assert_index_equal(result, expected)
# Invalid non-iterable input
with tm.assert_raises_regex(
TypeError, "Input must be a list / sequence of iterables."):
MultiIndex.from_product(0)
def test_from_product_empty(self):
# 0 levels
with tm.assert_raises_regex(
ValueError, "Must pass non-zero number of levels/labels"):
MultiIndex.from_product([])
# 1 level
result = MultiIndex.from_product([[]], names=['A'])
expected = pd.Index([], name='A')
tm.assert_index_equal(result.levels[0], expected)
# 2 levels
l1 = [[], ['foo', 'bar', 'baz'], []]
l2 = [[], [], ['a', 'b', 'c']]
names = ['A', 'B']
for first, second in zip(l1, l2):
result = MultiIndex.from_product([first, second], names=names)
expected = MultiIndex(levels=[first, second],
labels=[[], []], names=names)
tm.assert_index_equal(result, expected)
# GH12258
names = ['A', 'B', 'C']
for N in range(4):
lvl2 = lrange(N)
result = MultiIndex.from_product([[], lvl2, []], names=names)
expected = MultiIndex(levels=[[], lvl2, []],
labels=[[], [], []], names=names)
tm.assert_index_equal(result, expected)
def test_from_product_invalid_input(self):
invalid_inputs = [1, [1], [1, 2], [[1], 2],
'a', ['a'], ['a', 'b'], [['a'], 'b']]
for i in invalid_inputs:
pytest.raises(TypeError, MultiIndex.from_product, iterables=i)
def test_from_product_datetimeindex(self):
dt_index = date_range('2000-01-01', periods=2)
mi = pd.MultiIndex.from_product([[1, 2], dt_index])
etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp(
'2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp(
'2000-01-01')), (2, pd.Timestamp('2000-01-02'))])
tm.assert_numpy_array_equal(mi.values, etalon)
def test_from_product_index_series_categorical(self):
# GH13743
first = ['foo', 'bar']
for ordered in [False, True]:
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
ordered=ordered)
expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"),
categories=list("bac"),
ordered=ordered)
for arr in [idx, pd.Series(idx), idx.values]:
result = pd.MultiIndex.from_product([first, arr])
tm.assert_index_equal(result.get_level_values(1), expected)
def test_values_boxed(self):
tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT),
(3, pd.Timestamp('2000-01-03')),
(1, pd.Timestamp('2000-01-04')),
(2, pd.Timestamp('2000-01-02')),
(3, pd.Timestamp('2000-01-03'))]
result = pd.MultiIndex.from_tuples(tuples)
expected = construct_1d_object_array_from_listlike(tuples)
tm.assert_numpy_array_equal(result.values, expected)
# Check that code branches for boxed values produce identical results
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
def test_values_multiindex_datetimeindex(self):
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(10 ** 18, 10 ** 18 + 5)
naive = pd.DatetimeIndex(ints)
aware = pd.DatetimeIndex(ints, tz='US/Central')
idx = pd.MultiIndex.from_arrays([naive, aware])
result = idx.values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive)
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware)
# n_lev > n_lab
result = idx[:2].values
outer = pd.DatetimeIndex([x[0] for x in result])
tm.assert_index_equal(outer, naive[:2])
inner = pd.DatetimeIndex([x[1] for x in result])
tm.assert_index_equal(inner, aware[:2])
def test_values_multiindex_periodindex(self):
# Test to ensure we hit the boxing / nobox part of MI.values
ints = np.arange(2007, 2012)
pidx = pd.PeriodIndex(ints, freq='D')
idx = pd.MultiIndex.from_arrays([ints, pidx])
result = idx.values
outer = pd.Int64Index([x[0] for x in result])
tm.assert_index_equal(outer, pd.Int64Index(ints))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx)
# n_lev > n_lab
result = idx[:2].values
outer = pd.Int64Index([x[0] for x in result])
tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
inner = pd.PeriodIndex([x[1] for x in result])
tm.assert_index_equal(inner, pidx[:2])
def test_append(self):
result = self.index[:3].append(self.index[3:])
assert result.equals(self.index)
foos = [self.index[:1], self.index[1:3], self.index[3:]]
result = foos[0].append(foos[1:])
assert result.equals(self.index)
# empty
result = self.index.append([])
assert result.equals(self.index)
def test_append_mixed_dtypes(self):
# GH 13660
dti = date_range('2011-01-01', freq='M', periods=3, )
dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern')
pi = period_range('2011-01', freq='M', periods=3)
mi = MultiIndex.from_arrays([[1, 2, 3],
[1.1, np.nan, 3.3],
['a', 'b', 'c'],
dti, dti_tz, pi])
assert mi.nlevels == 6
res = mi.append(mi)
exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3],
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
['a', 'b', 'c', 'a', 'b', 'c'],
dti.append(dti),
dti_tz.append(dti_tz),
pi.append(pi)])
tm.assert_index_equal(res, exp)
other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'],
['x', 'y', 'z'], ['x', 'y', 'z'],
['x', 'y', 'z'], ['x', 'y', 'z']])
res = mi.append(other)
exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'],
[1.1, np.nan, 3.3, 'x', 'y', 'z'],
['a', 'b', 'c', 'x', 'y', 'z'],
dti.append(pd.Index(['x', 'y', 'z'])),
dti_tz.append(pd.Index(['x', 'y', 'z'])),
pi.append(pd.Index(['x', 'y', 'z']))])
tm.assert_index_equal(res, exp)
def test_get_level_values(self):
result = self.index.get_level_values(0)
expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'],
name='first')
tm.assert_index_equal(result, expected)
assert result.name == 'first'
result = self.index.get_level_values('first')
expected = self.index.get_level_values(0)
tm.assert_index_equal(result, expected)
# GH 10460
index = MultiIndex(
levels=[CategoricalIndex(['A', 'B']),
CategoricalIndex([1, 2, 3])],
labels=[np.array([0, 0, 0, 1, 1, 1]),
np.array([0, 1, 2, 0, 1, 2])])
exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B'])
tm.assert_index_equal(index.get_level_values(0), exp)
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
tm.assert_index_equal(index.get_level_values(1), exp)
def test_get_level_values_int_with_na(self):
# GH 17924
arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([1, np.nan, 2])
tm.assert_index_equal(result, expected)
arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = Index([np.nan, np.nan, 2])
tm.assert_index_equal(result, expected)
def test_get_level_values_na(self):
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([np.nan, np.nan, np.nan])
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = pd.Index(['a', np.nan, 1])
tm.assert_index_equal(result, expected)
arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(1)
expected = pd.DatetimeIndex([0, 1, pd.NaT])
tm.assert_index_equal(result, expected)
arrays = [[], []]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([], dtype=object)
tm.assert_index_equal(result, expected)
def test_get_level_values_all_na(self):
# GH 17924 when level entirely consists of nan
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
index = pd.MultiIndex.from_arrays(arrays)
result = index.get_level_values(0)
expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64)
tm.assert_index_equal(result, expected)
result = index.get_level_values(1)
expected = pd.Index(['a', np.nan, 1], dtype=object)
tm.assert_index_equal(result, expected)
def test_reorder_levels(self):
# this blows up
tm.assert_raises_regex(IndexError, '^Too many levels',
self.index.reorder_levels, [2, 1, 0])
def test_nlevels(self):
assert self.index.nlevels == 2
def test_iter(self):
result = list(self.index)
expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
('baz', 'two'), ('qux', 'one'), ('qux', 'two')]
assert result == expected
def test_legacy_pickle(self, datapath):
if PY3:
pytest.skip("testing for legacy pickles not "
"support on py3")
path = datapath('indexes', 'data', 'multiindex_v1.pickle')
obj = pd.read_pickle(path)
obj2 = MultiIndex.from_tuples(obj.values)
assert obj.equals(obj2)
res = obj.get_indexer(obj)
exp = np.arange(len(obj), dtype=np.intp)
assert_almost_equal(res, exp)
res = obj.get_indexer(obj2[::-1])
exp = obj.get_indexer(obj[::-1])
exp2 = obj2.get_indexer(obj2[::-1])
assert_almost_equal(res, exp)
assert_almost_equal(exp, exp2)
def test_legacy_v2_unpickle(self, datapath):
# 0.7.3 -> 0.8.0 format manage
path = datapath('indexes', 'data', 'mindex_073.pickle')
obj = pd.read_pickle(path)
obj2 = MultiIndex.from_tuples(obj.values)
assert obj.equals(obj2)
res = obj.get_indexer(obj)
exp = np.arange(len(obj), dtype=np.intp)
assert_almost_equal(res, exp)
res = obj.get_indexer(obj2[::-1])
exp = obj.get_indexer(obj[::-1])
exp2 = obj2.get_indexer(obj2[::-1])
assert_almost_equal(res, exp)
assert_almost_equal(exp, exp2)
def test_roundtrip_pickle_with_tz(self):
# GH 8367
# round-trip of timezone
index = MultiIndex.from_product(
[[1, 2], ['a', 'b'], date_range('20130101', periods=3,
tz='US/Eastern')
], names=['one', 'two', 'three'])
unpickled = tm.round_trip_pickle(index)
assert index.equal_levels(unpickled)
def test_from_tuples_index_values(self):
result = MultiIndex.from_tuples(self.index)
assert (result.values == self.index.values).all()
def test_contains(self):
assert ('foo', 'two') in self.index
assert ('bar', 'two') not in self.index
assert None not in self.index
def test_contains_top_level(self):
midx = MultiIndex.from_product([['A', 'B'], [1, 2]])
assert 'A' in midx
assert 'A' not in midx._engine
def test_contains_with_nat(self):
# MI with a NaT
mi = MultiIndex(levels=[['C'],
pd.date_range('2012-01-01', periods=5)],
labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
names=[None, 'B'])
assert ('C', pd.Timestamp('2012-01-01')) in mi
for val in mi.values:
assert val in mi
def test_is_all_dates(self):
assert not self.index.is_all_dates
def test_is_numeric(self):
# MultiIndex is never numeric
assert not self.index.is_numeric()
def test_getitem(self):
# scalar
assert self.index[2] == ('bar', 'one')
# slice
result = self.index[2:5]
expected = self.index[[2, 3, 4]]
assert result.equals(expected)
# boolean
result = self.index[[True, False, True, False, True, True]]
result2 = self.index[np.array([True, False, True, False, True, True])]
expected = self.index[[0, 2, 4, 5]]
assert result.equals(expected)
assert result2.equals(expected)
def test_getitem_group_select(self):
sorted_idx, _ = self.index.sortlevel(0)
assert sorted_idx.get_loc('baz') == slice(3, 4)
assert sorted_idx.get_loc('foo') == slice(0, 2)
def test_get_loc(self):
assert self.index.get_loc(('foo', 'two')) == 1
assert self.index.get_loc(('baz', 'two')) == 3
pytest.raises(KeyError, self.index.get_loc, ('bar', 'two'))
pytest.raises(KeyError, self.index.get_loc, 'quux')
pytest.raises(NotImplementedError, self.index.get_loc, 'foo',
method='nearest')
# 3 levels
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
pytest.raises(KeyError, index.get_loc, (1, 1))
assert index.get_loc((2, 0)) == slice(3, 5)
def test_get_loc_duplicates(self):
index = Index([2, 2, 2, 2])
result = index.get_loc(2)
expected = slice(0, 4)
assert result == expected
# pytest.raises(Exception, index.get_loc, 2)
index = Index(['c', 'a', 'a', 'b', 'b'])
rs = index.get_loc('c')
xp = 0
assert rs == xp
def test_get_value_duplicates(self):
index = MultiIndex(levels=[['D', 'B', 'C'],
[0, 26, 27, 37, 57, 67, 75, 82]],
labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
names=['tag', 'day'])
assert index.get_loc('D') == slice(0, 3)
with pytest.raises(KeyError):
index._engine.get_value(np.array([]), 'D')
def test_get_loc_level(self):
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
loc, new_index = index.get_loc_level((0, 1))
expected = slice(1, 2)
exp_index = index[expected].droplevel(0).droplevel(0)
assert loc == expected
assert new_index.equals(exp_index)
loc, new_index = index.get_loc_level((0, 1, 0))
expected = 1
assert loc == expected
assert new_index is None
pytest.raises(KeyError, index.get_loc_level, (2, 2))
index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array(
[0, 0, 0, 0]), np.array([0, 1, 2, 3])])
result, new_index = index.get_loc_level((2000, slice(None, None)))
expected = slice(None, None)
assert result == expected
assert new_index.equals(index.droplevel(0))
@pytest.mark.parametrize('level', [0, 1])
@pytest.mark.parametrize('null_val', [np.nan, pd.NaT, None])
def test_get_loc_nan(self, level, null_val):
# GH 18485 : NaN in MultiIndex
levels = [['a', 'b'], ['c', 'd']]
key = ['b', 'd']
levels[level] = np.array([0, null_val], dtype=type(null_val))
key[level] = null_val
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_missing_nan(self):
# GH 8569
idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
assert isinstance(idx.get_loc(1), slice)
pytest.raises(KeyError, idx.get_loc, 3)
pytest.raises(KeyError, idx.get_loc, np.nan)
pytest.raises(KeyError, idx.get_loc, [np.nan])
@pytest.mark.parametrize('dtype1', [int, float, bool, str])
@pytest.mark.parametrize('dtype2', [int, float, bool, str])
def test_get_loc_multiple_dtypes(self, dtype1, dtype2):
# GH 18520
levels = [np.array([0, 1]).astype(dtype1),
np.array([0, 1]).astype(dtype2)]
idx = pd.MultiIndex.from_product(levels)
assert idx.get_loc(idx[2]) == 2
@pytest.mark.parametrize('level', [0, 1])
@pytest.mark.parametrize('dtypes', [[int, float], [float, int]])
def test_get_loc_implicit_cast(self, level, dtypes):
# GH 18818, GH 15994 : as flat index, cast int to float and vice-versa
levels = [['a', 'b'], ['c', 'd']]
key = ['b', 'd']
lev_dtype, key_dtype = dtypes
levels[level] = np.array([0, 1], dtype=lev_dtype)
key[level] = key_dtype(1)
idx = MultiIndex.from_product(levels)
assert idx.get_loc(tuple(key)) == 3
def test_get_loc_cast_bool(self):
# GH 19086 : int is casted to bool, but not vice-versa
levels = [[False, True], np.arange(2, dtype='int64')]
idx = MultiIndex.from_product(levels)
assert idx.get_loc((0, 1)) == 1
assert idx.get_loc((1, 0)) == 2
pytest.raises(KeyError, idx.get_loc, (False, True))
pytest.raises(KeyError, idx.get_loc, (True, False))
def test_slice_locs(self):
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
sliced = stacked[slob]
expected = df[5:16].stack()
tm.assert_almost_equal(sliced.values, expected.values)
slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30),
df.index[15] - timedelta(seconds=30)))
sliced = stacked[slob]
expected = df[6:15].stack()
tm.assert_almost_equal(sliced.values, expected.values)
def test_slice_locs_with_type_mismatch(self):
df = tm.makeTimeDataFrame()
stacked = df.stack()
idx = stacked.index
tm.assert_raises_regex(TypeError, '^Level type mismatch',
idx.slice_locs, (1, 3))
tm.assert_raises_regex(TypeError, '^Level type mismatch',
idx.slice_locs,
df.index[5] + timedelta(
seconds=30), (5, 2))
df = tm.makeCustomDataframe(5, 5)
stacked = df.stack()
idx = stacked.index
with tm.assert_raises_regex(TypeError, '^Level type mismatch'):
idx.slice_locs(timedelta(seconds=30))
# TODO: Try creating a UnicodeDecodeError in exception message
with tm.assert_raises_regex(TypeError, '^Level type mismatch'):
idx.slice_locs(df.index[1], (16, "a"))
def test_slice_locs_not_sorted(self):
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
tm.assert_raises_regex(KeyError, "[Kk]ey length.*greater than "
"MultiIndex lexsort depth",
index.slice_locs, (1, 0, 1), (2, 1, 0))
# works
sorted_index, _ = index.sortlevel(0)
# should there be a test case here???
sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
def test_slice_locs_partial(self):
sorted_idx, _ = self.index.sortlevel(0)
result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one'))
assert result == (1, 5)
result = sorted_idx.slice_locs(None, ('qux', 'one'))
assert result == (0, 5)
result = sorted_idx.slice_locs(('foo', 'two'), None)
assert result == (1, len(sorted_idx))
result = sorted_idx.slice_locs('bar', 'baz')
assert result == (2, 4)
def test_slice_locs_not_contained(self):
# some searchsorted action
index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]],
labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3],
[0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0)
result = index.slice_locs((1, 0), (5, 2))
assert result == (3, 6)
result = index.slice_locs(1, 5)
assert result == (3, 6)
result = index.slice_locs((2, 2), (5, 2))
assert result == (3, 6)
result = index.slice_locs(2, 5)
assert result == (3, 6)
result = index.slice_locs((1, 0), (6, 3))
assert result == (3, 8)
result = index.slice_locs(-1, 10)
assert result == (0, len(index))
def test_consistency(self):
# need to construct an overflow
major_axis = lrange(70000)
minor_axis = lrange(10)
major_labels = np.arange(70000)
minor_labels = np.repeat(lrange(10), 7000)
# the fact that is works means it's consistent
index = MultiIndex(levels=[major_axis, minor_axis],
labels=[major_labels, minor_labels])
# inconsistent
major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
index = MultiIndex(levels=[major_axis, minor_axis],
labels=[major_labels, minor_labels])
assert not index.is_unique
def test_truncate(self):
major_axis = Index(lrange(4))
minor_axis = Index(lrange(2))
major_labels = np.array([0, 0, 1, 2, 3, 3])
minor_labels = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(levels=[major_axis, minor_axis],
labels=[major_labels, minor_labels])
result = index.truncate(before=1)
assert 'foo' not in result.levels[0]
assert 1 in result.levels[0]
result = index.truncate(after=1)
assert 2 not in result.levels[0]
assert 1 in result.levels[0]
result = index.truncate(before=1, after=2)
assert len(result.levels[0]) == 2
# after < before
pytest.raises(ValueError, index.truncate, 3, 1)
def test_get_indexer(self):
major_axis = Index(lrange(4))
minor_axis = Index(lrange(2))
major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
index = MultiIndex(levels=[major_axis, minor_axis],
labels=[major_labels, minor_labels])
idx1 = index[:5]
idx2 = index[[1, 3, 5]]
r1 = idx1.get_indexer(idx2)
assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
r1 = idx2.get_indexer(idx1, method='pad')
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method='pad')
assert_almost_equal(r2, e1[::-1])
rffill1 = idx2.get_indexer(idx1, method='ffill')
assert_almost_equal(r1, rffill1)
r1 = idx2.get_indexer(idx1, method='backfill')
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
assert_almost_equal(r1, e1)
r2 = idx2.get_indexer(idx1[::-1], method='backfill')
assert_almost_equal(r2, e1[::-1])
rbfill1 = idx2.get_indexer(idx1, method='bfill')
assert_almost_equal(r1, rbfill1)
# pass non-MultiIndex
r1 = idx1.get_indexer(idx2.values)
rexp1 = idx1.get_indexer(idx2)
assert_almost_equal(r1, rexp1)
r1 = idx1.get_indexer([1, 2, 3])
assert (r1 == [-1, -1, -1]).all()
# create index with duplicates
idx1 = Index(lrange(10) + lrange(10))
idx2 = Index(lrange(20))
msg = "Reindexing only valid with uniquely valued Index objects"
with tm.assert_raises_regex(InvalidIndexError, msg):
idx1.get_indexer(idx2)
def test_get_indexer_nearest(self):
midx = MultiIndex.from_tuples([('a', 1), ('b', 2)])
with pytest.raises(NotImplementedError):
midx.get_indexer(['a'], method='nearest')
with pytest.raises(NotImplementedError):
midx.get_indexer(['a'], method='pad', tolerance=2)
def test_get_indexer_categorical_time(self):
# https://github.com/pandas-dev/pandas/issues/21390
midx = MultiIndex.from_product(
[Categorical(['a', 'b', 'c']),
Categorical(date_range("2012-01-01", periods=3, freq='H'))])
result = midx.get_indexer(midx)
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
def test_hash_collisions(self):
# non-smoke test that we don't get hash collisions
index = MultiIndex.from_product([np.arange(1000), np.arange(1000)],
names=['one', 'two'])
result = index.get_indexer(index.values)
tm.assert_numpy_array_equal(result, np.arange(
len(index), dtype='intp'))
for i in [0, 1, len(index) - 2, len(index) - 1]:
result = index.get_loc(index[i])
assert result == i
def test_format(self):
self.index.format()
self.index[:0].format()
def test_format_integer_names(self):
index = MultiIndex(levels=[[0, 1], [0, 1]],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1])
index.format(names=True)
def test_format_sparse_display(self):
index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]],
labels=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1],
[0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]])
result = index.format()
assert result[3] == '1 0 0 0'
def test_format_sparse_config(self):
warn_filters = warnings.filters
warnings.filterwarnings('ignore', category=FutureWarning,
module=".*format")
# GH1538
pd.set_option('display.multi_sparse', False)
result = self.index.format()
assert result[1] == 'foo two'
tm.reset_display_options()
warnings.filters = warn_filters
def test_to_frame(self):
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
index = MultiIndex.from_tuples(tuples)
result = index.to_frame(index=False)
expected = DataFrame(tuples)
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
result = index.to_frame(index=False)
expected = DataFrame(tuples)
expected.columns = ['first', 'second']
tm.assert_frame_equal(result, expected)
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame(index=False)
expected = DataFrame(
{0: np.repeat(np.arange(5, dtype='int64'), 3),
1: np.tile(pd.date_range('20130101', periods=3), 5)})
tm.assert_frame_equal(result, expected)
index = MultiIndex.from_product([range(5),
pd.date_range('20130101', periods=3)])
result = index.to_frame()
expected.index = index
tm.assert_frame_equal(result, expected)
def test_to_hierarchical(self):
index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
2, 'two')])
result = index.to_hierarchical(3)
expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]])
tm.assert_index_equal(result, expected)
assert result.names == index.names
# K > 1
result = index.to_hierarchical(3, 2)
expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])
tm.assert_index_equal(result, expected)
assert result.names == index.names
# non-sorted
index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'),
(2, 'a'), (2, 'b')],
names=['N1', 'N2'])
result = index.to_hierarchical(2)
expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'),
(1, 'b'),
(2, 'a'), (2, 'a'),
(2, 'b'), (2, 'b')],
names=['N1', 'N2'])
tm.assert_index_equal(result, expected)
assert result.names == index.names
def test_bounds(self):
self.index._bounds
def test_equals_multi(self):
assert self.index.equals(self.index)
assert not self.index.equals(self.index.values)
assert self.index.equals(Index(self.index.values))
assert self.index.equal_levels(self.index)
assert not self.index.equals(self.index[:-1])
assert not self.index.equals(self.index[-1])
# different number of levels
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1])
assert not index.equals(index2)
assert not index.equal_levels(index2)
# levels are different
major_axis = Index(lrange(4))
minor_axis = Index(lrange(2))
major_labels = np.array([0, 0, 1, 2, 2, 3])
minor_labels = np.array([0, 1, 0, 0, 1, 0])
index = MultiIndex(levels=[major_axis, minor_axis],
labels=[major_labels, minor_labels])
assert not self.index.equals(index)
assert not self.index.equal_levels(index)
# some of the labels are different
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
minor_axis = Index(['one', 'two'])
major_labels = np.array([0, 0, 2, 2, 3, 3])
minor_labels = np.array([0, 1, 0, 1, 0, 1])
index = MultiIndex(levels=[major_axis, minor_axis],
labels=[major_labels, minor_labels])
assert not self.index.equals(index)
def test_equals_missing_values(self):
# make sure take is not using -1
i = pd.MultiIndex.from_tuples([(0, pd.NaT),
(0, pd.Timestamp('20130101'))])
result = i[0:1].equals(i[0])
assert not result
result = i[1:2].equals(i[1])
assert not result
def test_identical(self):
mi = self.index.copy()
mi2 = self.index.copy()
assert mi.identical(mi2)
mi = mi.set_names(['new1', 'new2'])
assert mi.equals(mi2)
assert not mi.identical(mi2)
mi2 = mi2.set_names(['new1', 'new2'])
assert mi.identical(mi2)
mi3 = Index(mi.tolist(), names=mi.names)
mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False)
assert mi.identical(mi3)
assert not mi.identical(mi4)
assert mi.equals(mi4)
def test_is_(self):
mi = MultiIndex.from_tuples(lzip(range(10), range(10)))
assert mi.is_(mi)
assert mi.is_(mi.view())
assert mi.is_(mi.view().view().view().view())
mi2 = mi.view()
# names are metadata, they don't change id
mi2.names = ["A", "B"]
assert mi2.is_(mi)
assert mi.is_(mi2)
assert mi.is_(mi.set_names(["C", "D"]))
mi2 = mi.view()
mi2.set_names(["E", "F"], inplace=True)
assert mi.is_(mi2)
# levels are inherent properties, they change identity
mi3 = mi2.set_levels([lrange(10), lrange(10)])
assert not mi3.is_(mi2)
# shouldn't change
assert mi2.is_(mi)
mi4 = mi3.view()
# GH 17464 - Remove duplicate MultiIndex levels
mi4.set_levels([lrange(10), lrange(10)], inplace=True)
assert not mi4.is_(mi3)
mi5 = mi.view()
mi5.set_levels(mi5.levels, inplace=True)
assert not mi5.is_(mi)
def test_union(self):
piece1 = self.index[:5][::-1]
piece2 = self.index[3:]
the_union = piece1 | piece2
tups = sorted(self.index.values)
expected = MultiIndex.from_tuples(tups)
assert the_union.equals(expected)
# corner case, pass self or empty thing:
the_union = self.index.union(self.index)
assert the_union is self.index
the_union = self.index.union(self.index[:0])
assert the_union is self.index
# won't work in python 3
# tuples = self.index.values
# result = self.index[:4] | tuples[4:]
# assert result.equals(tuples)
# not valid for python 3
# def test_union_with_regular_index(self):
# other = Index(['A', 'B', 'C'])
# result = other.union(self.index)
# assert ('foo', 'one') in result
# assert 'B' in result
# result2 = self.index.union(other)
# assert result.equals(result2)
def test_intersection(self):
piece1 = self.index[:5][::-1]
piece2 = self.index[3:]
the_int = piece1 & piece2
tups = sorted(self.index[3:5].values)
expected = MultiIndex.from_tuples(tups)
assert the_int.equals(expected)
# corner case, pass self
the_int = self.index.intersection(self.index)
assert the_int is self.index
# empty intersection: disjoint
empty = self.index[:2] & self.index[2:]
expected = self.index[:0]
assert empty.equals(expected)
# can't do in python 3
# tuples = self.index.values
# result = self.index & tuples
# assert result.equals(tuples)
def test_sub(self):
first = self.index
# - now raises (previously was set op difference)
with pytest.raises(TypeError):
first - self.index[-3:]
with pytest.raises(TypeError):
self.index[-3:] - first
with pytest.raises(TypeError):
self.index[-3:] - first.tolist()
with pytest.raises(TypeError):
first.tolist() - self.index[-3:]
def test_difference(self):
first = self.index
result = first.difference(self.index[-3:])
expected = MultiIndex.from_tuples(sorted(self.index[:-3].values),
sortorder=0,
names=self.index.names)
assert isinstance(result, MultiIndex)
assert result.equals(expected)
assert result.names == self.index.names
# empty difference: reflexive
result = self.index.difference(self.index)
expected = self.index[:0]
assert result.equals(expected)
assert result.names == self.index.names
# empty difference: superset
result = self.index[-3:].difference(self.index)
expected = self.index[:0]
assert result.equals(expected)
assert result.names == self.index.names
# empty difference: degenerate
result = self.index[:0].difference(self.index)
expected = self.index[:0]
assert result.equals(expected)
assert result.names == self.index.names
# names not the same
chunklet = self.index[-3:]
chunklet.names = ['foo', 'baz']
result = first.difference(chunklet)
assert result.names == (None, None)
# empty, but non-equal
result = self.index.difference(self.index.sortlevel(1)[0])
assert len(result) == 0
# raise Exception called with non-MultiIndex
result = first.difference(first.values)
assert result.equals(first[:0])
# name from empty array
result = first.difference([])
assert first.equals(result)
assert first.names == result.names
# name from non-empty array
result = first.difference([('foo', 'one')])
expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), (
'foo', 'two'), ('qux', 'one'), ('qux', 'two')])
expected.names = first.names
assert first.names == result.names
tm.assert_raises_regex(TypeError, "other must be a MultiIndex "
"or a list of tuples",
first.difference, [1, 2, 3, 4, 5])
def test_from_tuples(self):
tm.assert_raises_regex(TypeError, 'Cannot infer number of levels '
'from empty list',
MultiIndex.from_tuples, [])
expected = MultiIndex(levels=[[1, 3], [2, 4]],
labels=[[0, 1], [0, 1]],
names=['a', 'b'])
# input tuples
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
tm.assert_index_equal(result, expected)
def test_from_tuples_iterator(self):
# GH 18434
# input iterator for tuples
expected = MultiIndex(levels=[[1, 3], [2, 4]],
labels=[[0, 1], [0, 1]],
names=['a', 'b'])
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b'])
tm.assert_index_equal(result, expected)
# input non-iterables
with tm.assert_raises_regex(
TypeError, 'Input must be a list / sequence of tuple-likes.'):
MultiIndex.from_tuples(0)
def test_from_tuples_empty(self):
# GH 16777
result = MultiIndex.from_tuples([], names=['a', 'b'])
expected = MultiIndex.from_arrays(arrays=[[], []],
names=['a', 'b'])
tm.assert_index_equal(result, expected)
def test_argsort(self):
result = self.index.argsort()
expected = self.index.values.argsort()
tm.assert_numpy_array_equal(result, expected)
def test_sortlevel(self):
import random
tuples = list(self.index)
random.shuffle(tuples)
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_sortlevel_not_sort_remaining(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
sorted_idx, _ = mi.sortlevel('A', sort_remaining=False)
assert sorted_idx.equals(mi)
def test_sortlevel_deterministic(self):
tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'),
('foo', 'one'), ('baz', 'two'), ('qux', 'one')]
index = MultiIndex.from_tuples(tuples)
sorted_idx, _ = index.sortlevel(0)
expected = MultiIndex.from_tuples(sorted(tuples))
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(0, ascending=False)
assert sorted_idx.equals(expected[::-1])
sorted_idx, _ = index.sortlevel(1)
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
expected = MultiIndex.from_tuples(by1)
assert sorted_idx.equals(expected)
sorted_idx, _ = index.sortlevel(1, ascending=False)
assert sorted_idx.equals(expected[::-1])
def test_dims(self):
pass
def test_drop(self):
dropped = self.index.drop([('foo', 'two'), ('qux', 'one')])
index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')])
dropped2 = self.index.drop(index)
expected = self.index[[0, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
tm.assert_index_equal(dropped2, expected)
dropped = self.index.drop(['bar'])
expected = self.index[[0, 1, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = self.index.drop('foo')
expected = self.index[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
index = MultiIndex.from_tuples([('bar', 'two')])
pytest.raises(KeyError, self.index.drop, [('bar', 'two')])
pytest.raises(KeyError, self.index.drop, index)
pytest.raises(KeyError, self.index.drop, ['foo', 'two'])
# partially correct argument
mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')])
pytest.raises(KeyError, self.index.drop, mixed_index)
# error='ignore'
dropped = self.index.drop(index, errors='ignore')
expected = self.index[[0, 1, 2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
dropped = self.index.drop(mixed_index, errors='ignore')
expected = self.index[[0, 1, 2, 3, 5]]
tm.assert_index_equal(dropped, expected)
dropped = self.index.drop(['foo', 'two'], errors='ignore')
expected = self.index[[2, 3, 4, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop
dropped = self.index.drop(['foo', ('qux', 'one')])
expected = self.index[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
# mixed partial / full drop / error='ignore'
mixed_index = ['foo', ('qux', 'one'), 'two']
pytest.raises(KeyError, self.index.drop, mixed_index)
dropped = self.index.drop(mixed_index, errors='ignore')
expected = self.index[[2, 3, 5]]
tm.assert_index_equal(dropped, expected)
def test_droplevel_with_names(self):
index = self.index[self.index.get_loc('foo')]
dropped = index.droplevel(0)
assert dropped.name == 'second'
index = MultiIndex(
levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))],
labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])],
names=['one', 'two', 'three'])
dropped = index.droplevel(0)
assert dropped.names == ('two', 'three')
dropped = index.droplevel('two')
expected = index.droplevel(1)
assert dropped.equals(expected)
def test_droplevel_multiple(self):
index = MultiIndex(
levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))],
labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])],
names=['one', 'two', 'three'])
dropped = index[:2].droplevel(['three', 'one'])
expected = index[:2].droplevel(2).droplevel(0)
assert dropped.equals(expected)
def test_drop_not_lexsorted(self):
# GH 12078
# define the lexsorted version of the multi-index
tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')]
lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c'])
assert lexsorted_mi.is_lexsorted()
# and the not-lexsorted version
df = pd.DataFrame(columns=['a', 'b', 'c', 'd'],
data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]])
df = df.pivot_table(index='a', columns=['b', 'c'], values='d')
df = df.reset_index()
not_lexsorted_mi = df.columns
assert not not_lexsorted_mi.is_lexsorted()
# compare the results
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_index_equal(lexsorted_mi.drop('a'),
not_lexsorted_mi.drop('a'))
def test_insert(self):
# key contained in all levels
new_index = self.index.insert(0, ('bar', 'two'))
assert new_index.equal_levels(self.index)
assert new_index[0] == ('bar', 'two')
# key not contained in all levels
new_index = self.index.insert(0, ('abc', 'three'))
exp0 = Index(list(self.index.levels[0]) + ['abc'], name='first')
tm.assert_index_equal(new_index.levels[0], exp0)
exp1 = Index(list(self.index.levels[1]) + ['three'], name='second')
tm.assert_index_equal(new_index.levels[1], exp1)
assert new_index[0] == ('abc', 'three')
# key wrong length
msg = "Item must have length equal to number of levels"
with tm.assert_raises_regex(ValueError, msg):
self.index.insert(0, ('foo2',))
left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]],
columns=['1st', '2nd', '3rd'])
left.set_index(['1st', '2nd'], inplace=True)
ts = left['3rd'].copy(deep=True)
left.loc[('b', 'x'), '3rd'] = 2
left.loc[('b', 'a'), '3rd'] = -1
left.loc[('b', 'b'), '3rd'] = 3
left.loc[('a', 'x'), '3rd'] = 4
left.loc[('a', 'w'), '3rd'] = 5
left.loc[('a', 'a'), '3rd'] = 6
ts.loc[('b', 'x')] = 2
ts.loc['b', 'a'] = -1
ts.loc[('b', 'b')] = 3
ts.loc['a', 'x'] = 4
ts.loc[('a', 'w')] = 5
ts.loc['a', 'a'] = 6
right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2],
['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4],
['a', 'w', 5], ['a', 'a', 6]],
columns=['1st', '2nd', '3rd'])
right.set_index(['1st', '2nd'], inplace=True)
# FIXME data types changes to float because
# of intermediate nan insertion;
tm.assert_frame_equal(left, right, check_dtype=False)
tm.assert_series_equal(ts, right['3rd'])
# GH9250
idx = [('test1', i) for i in range(5)] + \
[('test2', i) for i in range(6)] + \
[('test', 17), ('test', 18)]
left = pd.Series(np.linspace(0, 10, 11),
pd.MultiIndex.from_tuples(idx[:-2]))
left.loc[('test', 17)] = 11
left.loc[('test', 18)] = 12
right = pd.Series(np.linspace(0, 12, 13),
pd.MultiIndex.from_tuples(idx))
tm.assert_series_equal(left, right)
def test_take_preserve_name(self):
taken = self.index.take([3, 0, 1])
assert taken.names == self.index.names
def test_take_fill_value(self):
# GH 12631
vals = [['A', 'B'],
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
result = idx.take(np.array([1, 0, -1]))
exp_vals = [('A', pd.Timestamp('2011-01-02')),
('A', pd.Timestamp('2011-01-01')),
('B', pd.Timestamp('2011-01-02'))]
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
tm.assert_index_equal(result, expected)
# fill_value
result = idx.take(np.array([1, 0, -1]), fill_value=True)
exp_vals = [('A', pd.Timestamp('2011-01-02')),
('A', pd.Timestamp('2011-01-01')),
(np.nan, pd.NaT)]
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
tm.assert_index_equal(result, expected)
# allow_fill=False
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
fill_value=True)
exp_vals = [('A', pd.Timestamp('2011-01-02')),
('A', pd.Timestamp('2011-01-01')),
('B', pd.Timestamp('2011-01-02'))]
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
tm.assert_index_equal(result, expected)
msg = ('When allow_fill=True and fill_value is not None, '
'all indices must be >= -1')
with tm.assert_raises_regex(ValueError, msg):
idx.take(np.array([1, 0, -2]), fill_value=True)
with tm.assert_raises_regex(ValueError, msg):
idx.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
idx.take(np.array([1, -5]))
def take_invalid_kwargs(self):
vals = [['A', 'B'],
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
indices = [1, 2]
msg = r"take\(\) got an unexpected keyword argument 'foo'"
tm.assert_raises_regex(TypeError, msg, idx.take,
indices, foo=2)
msg = "the 'out' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, idx.take,
indices, out=indices)
msg = "the 'mode' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, idx.take,
indices, mode='clip')
@pytest.mark.parametrize('other',
[Index(['three', 'one', 'two']),
Index(['one']),
Index(['one', 'three'])])
def test_join_level(self, other, join_type):
join_index, lidx, ridx = other.join(self.index, how=join_type,
level='second',
return_indexers=True)
exp_level = other.join(self.index.levels[1], how=join_type)
assert join_index.levels[0].equals(self.index.levels[0])
assert join_index.levels[1].equals(exp_level)
# pare down levels
mask = np.array(
[x[1] in exp_level for x in self.index], dtype=bool)
exp_values = self.index.values[mask]
tm.assert_numpy_array_equal(join_index.values, exp_values)
if join_type in ('outer', 'inner'):
join_index2, ridx2, lidx2 = \
self.index.join(other, how=join_type, level='second',
return_indexers=True)
assert join_index.equals(join_index2)
tm.assert_numpy_array_equal(lidx, lidx2)
tm.assert_numpy_array_equal(ridx, ridx2)
tm.assert_numpy_array_equal(join_index2.values, exp_values)
def test_join_level_corner_case(self):
# some corner cases
idx = Index(['three', 'one', 'two'])
result = idx.join(self.index, level='second')
assert isinstance(result, MultiIndex)
tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous",
self.index.join, self.index, level=1)
def test_join_self(self, join_type):
res = self.index
joined = res.join(res, how=join_type)
assert res is joined
def test_join_multi(self):
# GH 10665
midx = pd.MultiIndex.from_product(
[np.arange(4), np.arange(4)], names=['a', 'b'])
idx = pd.Index([1, 2, 5], name='b')
# inner
jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True)
exp_idx = pd.MultiIndex.from_product(
[np.arange(4), [1, 2]], names=['a', 'b'])
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True)
tm.assert_index_equal(jidx, exp_idx)
tm.assert_numpy_array_equal(lidx, exp_lidx)
tm.assert_numpy_array_equal(ridx, exp_ridx)
# keep MultiIndex
jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True)
exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0,
1, -1], dtype=np.intp)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
# flip
jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True)
tm.assert_index_equal(jidx, midx)
assert lidx is None
tm.assert_numpy_array_equal(ridx, exp_ridx)
def test_reindex(self):
result, indexer = self.index.reindex(list(self.index[:4]))
assert isinstance(result, MultiIndex)
self.check_level_names(result, self.index[:4].names)
result, indexer = self.index.reindex(list(self.index))
assert isinstance(result, MultiIndex)
assert indexer is None
self.check_level_names(result, self.index.names)
def test_reindex_level(self):
idx = Index(['one'])
target, indexer = self.index.reindex(idx, level='second')
target2, indexer2 = idx.reindex(self.index, level='second')
exp_index = self.index.join(idx, level='second', how='right')
exp_index2 = self.index.join(idx, level='second', how='left')
assert target.equals(exp_index)
exp_indexer = np.array([0, 2, 4])
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
assert target2.equals(exp_index2)
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
tm.assert_raises_regex(TypeError, "Fill method not supported",
self.index.reindex, self.index,
method='pad', level='second')
tm.assert_raises_regex(TypeError, "Fill method not supported",
idx.reindex, idx, method='bfill',
level='first')
def test_duplicates(self):
assert not self.index.has_duplicates
assert self.index.append(self.index).has_duplicates
index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[
[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]])
assert index.has_duplicates
# GH 9075
t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169),
(u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119),
(u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135),
(u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145),
(u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158),
(u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122),
(u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160),
(u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180),
(u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143),
(u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128),
(u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129),
(u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111),
(u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114),
(u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121),
(u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126),
(u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155),
(u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123),
(u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)]
index = pd.MultiIndex.from_tuples(t)
assert not index.has_duplicates
# handle int64 overflow if possible
def check(nlevels, with_nulls):
labels = np.tile(np.arange(500), 2)
level = np.arange(500)
if with_nulls: # inject some null values
labels[500] = -1 # common nan value
labels = [labels.copy() for i in range(nlevels)]
for i in range(nlevels):
labels[i][500 + i - nlevels // 2] = -1
labels += [np.array([-1, 1]).repeat(500)]
else:
labels = [labels] * nlevels + [np.arange(2).repeat(500)]
levels = [level] * nlevels + [[0, 1]]
# no dups
index = MultiIndex(levels=levels, labels=labels)
assert not index.has_duplicates
# with a dup
if with_nulls:
f = lambda a: np.insert(a, 1000, a[0])
labels = list(map(f, labels))
index = MultiIndex(levels=levels, labels=labels)
else:
values = index.values.tolist()
index = MultiIndex.from_tuples(values + [values[0]])
assert index.has_duplicates
# no overflow
check(4, False)
check(4, True)
# overflow possible
check(8, False)
check(8, True)
# GH 9125
n, k = 200, 5000
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
labels = [np.random.choice(n, k * n) for lev in levels]
mi = MultiIndex(levels=levels, labels=labels)
for keep in ['first', 'last', False]:
left = mi.duplicated(keep=keep)
right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep)
tm.assert_numpy_array_equal(left, right)
# GH5873
for a in [101, 102]:
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
assert not mi.has_duplicates
with warnings.catch_warnings(record=True):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays(
[[], []]))
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(
2, dtype='bool'))
for n in range(1, 6): # 1st level shape
for m in range(1, 5): # 2nd level shape
# all possible unique combinations, including nan
lab = product(range(-1, n), range(-1, m))
mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]],
labels=np.random.permutation(list(lab)).T)
assert len(mi) == (n + 1) * (m + 1)
assert not mi.has_duplicates
with warnings.catch_warnings(record=True):
# Deprecated - see GH20239
assert mi.get_duplicates().equals(MultiIndex.from_arrays(
[[], []]))
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(
len(mi), dtype='bool'))
def test_duplicate_meta_data(self):
# GH 10115
index = MultiIndex(
levels=[[0, 1], [0, 1, 2]],
labels=[[0, 0, 0, 0, 1, 1, 1],
[0, 1, 2, 0, 0, 1, 2]])
for idx in [index,
index.set_names([None, None]),
index.set_names([None, 'Num']),
index.set_names(['Upper', 'Num']), ]:
assert idx.has_duplicates
assert idx.drop_duplicates().names == idx.names
def test_get_unique_index(self):
idx = self.index[[0, 1, 0, 1, 1, 0, 0]]
expected = self.index._shallow_copy(idx[[0, 1]])
for dropna in [False, True]:
result = idx._get_unique_index(dropna=dropna)
assert result.unique
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize('names', [None, ['first', 'second']])
def test_unique(self, names):
mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]],
names=names)
res = mi.unique()
exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
tm.assert_index_equal(res, exp)
mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')],
names=names)
res = mi.unique()
exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')],
names=mi.names)
tm.assert_index_equal(res, exp)
mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')],
names=names)
res = mi.unique()
exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names)
tm.assert_index_equal(res, exp)
# GH #20568 - empty MI
mi = pd.MultiIndex.from_arrays([[], []], names=names)
res = mi.unique()
tm.assert_index_equal(mi, res)
@pytest.mark.parametrize('level', [0, 'first', 1, 'second'])
def test_unique_level(self, level):
# GH #17896 - with level= argument
result = self.index.unique(level=level)
expected = self.index.get_level_values(level).unique()
tm.assert_index_equal(result, expected)
# With already unique level
mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
names=['first', 'second'])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
tm.assert_index_equal(result, expected)
# With empty MI
mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second'])
result = mi.unique(level=level)
expected = mi.get_level_values(level)
def test_unique_datetimelike(self):
idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
'2015-01-01', 'NaT', 'NaT'])
idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02',
'2015-01-02', 'NaT', '2015-01-01'],
tz='Asia/Tokyo')
result = pd.MultiIndex.from_arrays([idx1, idx2]).unique()
eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT'])
eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02',
'NaT', '2015-01-01'],
tz='Asia/Tokyo')
exp = pd.MultiIndex.from_arrays([eidx1, eidx2])
tm.assert_index_equal(result, exp)
def test_tolist(self):
result = self.index.tolist()
exp = list(self.index.values)
assert result == exp
def test_repr_with_unicode_data(self):
with pd.core.config.option_context("display.encoding", 'UTF-8'):
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
index = pd.DataFrame(d).set_index(["a", "b"]).index
assert "\\u" not in repr(index) # we don't want unicode-escaped
def test_repr_roundtrip(self):
mi = MultiIndex.from_product([list('ab'), range(3)],
names=['first', 'second'])
str(mi)
if PY3:
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
else:
result = eval(repr(mi))
# string coerces to unicode
tm.assert_index_equal(result, mi, exact=False)
assert mi.get_level_values('first').inferred_type == 'string'
assert result.get_level_values('first').inferred_type == 'unicode'
mi_u = MultiIndex.from_product(
[list(u'ab'), range(3)], names=['first', 'second'])
result = eval(repr(mi_u))
tm.assert_index_equal(result, mi_u, exact=True)
# formatting
if PY3:
str(mi)
else:
compat.text_type(mi)
# long format
mi = MultiIndex.from_product([list('abcdefg'), range(10)],
names=['first', 'second'])
if PY3:
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
else:
result = eval(repr(mi))
# string coerces to unicode
tm.assert_index_equal(result, mi, exact=False)
assert mi.get_level_values('first').inferred_type == 'string'
assert result.get_level_values('first').inferred_type == 'unicode'
result = eval(repr(mi_u))
tm.assert_index_equal(result, mi_u, exact=True)
def test_str(self):
# tested elsewhere
pass
def test_unicode_string_with_unicode(self):
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
idx = pd.DataFrame(d).set_index(["a", "b"]).index
if PY3:
str(idx)
else:
compat.text_type(idx)
def test_bytestring_with_unicode(self):
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
idx = pd.DataFrame(d).set_index(["a", "b"]).index
if PY3:
bytes(idx)
else:
str(idx)
def test_slice_keep_name(self):
x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')],
names=['x', 'y'])
assert x[1:].names == x.names
def test_isna_behavior(self):
# should not segfault GH5123
# NOTE: if MI representation changes, may make sense to allow
# isna(MI)
with pytest.raises(NotImplementedError):
pd.isna(self.index)
def test_level_setting_resets_attributes(self):
ind = pd.MultiIndex.from_arrays([
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
])
assert ind.is_monotonic
ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True)
# if this fails, probably didn't reset the cache correctly.
assert not ind.is_monotonic
def test_is_monotonic_increasing(self):
i = MultiIndex.from_product([np.arange(10),
np.arange(10)], names=['one', 'two'])
assert i.is_monotonic
assert i._is_strictly_monotonic_increasing
assert Index(i.values).is_monotonic
assert i._is_strictly_monotonic_increasing
i = MultiIndex.from_product([np.arange(10, 0, -1),
np.arange(10)], names=['one', 'two'])
assert not i.is_monotonic
assert not i._is_strictly_monotonic_increasing
assert not Index(i.values).is_monotonic
assert not Index(i.values)._is_strictly_monotonic_increasing
i = MultiIndex.from_product([np.arange(10),
np.arange(10, 0, -1)],
names=['one', 'two'])
assert not i.is_monotonic
assert not i._is_strictly_monotonic_increasing
assert not Index(i.values).is_monotonic
assert not Index(i.values)._is_strictly_monotonic_increasing
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']])
assert not i.is_monotonic
assert not i._is_strictly_monotonic_increasing
assert not Index(i.values).is_monotonic
assert not Index(i.values)._is_strictly_monotonic_increasing
# string ordering
i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
['one', 'two', 'three']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert not i.is_monotonic
assert not Index(i.values).is_monotonic
assert not i._is_strictly_monotonic_increasing
assert not Index(i.values)._is_strictly_monotonic_increasing
i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'],
['mom', 'next', 'zenith']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert i.is_monotonic
assert Index(i.values).is_monotonic
assert i._is_strictly_monotonic_increasing
assert Index(i.values)._is_strictly_monotonic_increasing
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237',
'nl0000289783',
'nl0000289965', 'nl0000301109']],
labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=['household_id', 'asset_id'])
assert not i.is_monotonic
assert not i._is_strictly_monotonic_increasing
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic
assert Index(i.values).is_monotonic
assert i._is_strictly_monotonic_increasing
assert Index(i.values)._is_strictly_monotonic_increasing
def test_is_monotonic_decreasing(self):
i = MultiIndex.from_product([np.arange(9, -1, -1),
np.arange(9, -1, -1)],
names=['one', 'two'])
assert i.is_monotonic_decreasing
assert i._is_strictly_monotonic_decreasing
assert Index(i.values).is_monotonic_decreasing
assert i._is_strictly_monotonic_decreasing
i = MultiIndex.from_product([np.arange(10),
np.arange(10, 0, -1)],
names=['one', 'two'])
assert not i.is_monotonic_decreasing
assert not i._is_strictly_monotonic_decreasing
assert not Index(i.values).is_monotonic_decreasing
assert not Index(i.values)._is_strictly_monotonic_decreasing
i = MultiIndex.from_product([np.arange(10, 0, -1),
np.arange(10)], names=['one', 'two'])
assert not i.is_monotonic_decreasing
assert not i._is_strictly_monotonic_decreasing
assert not Index(i.values).is_monotonic_decreasing
assert not Index(i.values)._is_strictly_monotonic_decreasing
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']])
assert not i.is_monotonic_decreasing
assert not i._is_strictly_monotonic_decreasing
assert not Index(i.values).is_monotonic_decreasing
assert not Index(i.values)._is_strictly_monotonic_decreasing
# string ordering
i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
['three', 'two', 'one']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert not i.is_monotonic_decreasing
assert not Index(i.values).is_monotonic_decreasing
assert not i._is_strictly_monotonic_decreasing
assert not Index(i.values)._is_strictly_monotonic_decreasing
i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
['zenith', 'next', 'mom']],
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=['first', 'second'])
assert i.is_monotonic_decreasing
assert Index(i.values).is_monotonic_decreasing
assert i._is_strictly_monotonic_decreasing
assert Index(i.values)._is_strictly_monotonic_decreasing
# mixed levels, hits the TypeError
i = MultiIndex(
levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965',
'nl0000289783', 'lu0197800237',
'gb00b03mlx29']],
labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
names=['household_id', 'asset_id'])
assert not i.is_monotonic_decreasing
assert not i._is_strictly_monotonic_decreasing
# empty
i = MultiIndex.from_arrays([[], []])
assert i.is_monotonic_decreasing
assert Index(i.values).is_monotonic_decreasing
assert i._is_strictly_monotonic_decreasing
assert Index(i.values)._is_strictly_monotonic_decreasing
def test_is_strictly_monotonic_increasing(self):
idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']],
labels=[[0, 0, 1, 1], [0, 0, 0, 1]])
assert idx.is_monotonic_increasing
assert not idx._is_strictly_monotonic_increasing
def test_is_strictly_monotonic_decreasing(self):
idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']],
labels=[[0, 0, 1, 1], [0, 0, 0, 1]])
assert idx.is_monotonic_decreasing
assert not idx._is_strictly_monotonic_decreasing
def test_reconstruct_sort(self):
# starts off lexsorted & monotonic
mi = MultiIndex.from_arrays([
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
])
assert mi.is_lexsorted()
assert mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert recons.is_lexsorted()
assert recons.is_monotonic
assert mi is recons
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
('x', 'b'), ('y', 'a'), ('z', 'b')],
names=['one', 'two'])
assert not mi.is_lexsorted()
assert not mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert not recons.is_lexsorted()
assert not recons.is_monotonic
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
# cannot convert to lexsorted
mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
names=['col1', 'col2'])
assert not mi.is_lexsorted()
assert not mi.is_monotonic
recons = mi._sort_levels_monotonic()
assert not recons.is_lexsorted()
assert not recons.is_monotonic
assert mi.equals(recons)
assert Index(mi.values).equals(Index(recons.values))
def test_reconstruct_remove_unused(self):
# xref to GH 2770
df = DataFrame([['deleteMe', 1, 9],
['keepMe', 2, 9],
['keepMeToo', 3, 9]],
columns=['first', 'second', 'third'])
df2 = df.set_index(['first', 'second'], drop=False)
df2 = df2[df2['first'] != 'deleteMe']
# removed levels are there
expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'],
[1, 2, 3]],
labels=[[1, 2], [1, 2]],
names=['first', 'second'])
result = df2.index
tm.assert_index_equal(result, expected)
expected = MultiIndex(levels=[['keepMe', 'keepMeToo'],
[2, 3]],
labels=[[0, 1], [0, 1]],
names=['first', 'second'])
result = df2.index.remove_unused_levels()
tm.assert_index_equal(result, expected)
# idempotent
result2 = result.remove_unused_levels()
tm.assert_index_equal(result2, expected)
assert result2.is_(result)
@pytest.mark.parametrize('level0', [['a', 'd', 'b'],
['a', 'd', 'b', 'unused']])
@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'],
['w', 'x', 'y', 'z', 'unused']])
def test_remove_unused_nan(self, level0, level1):
# GH 18417
mi = pd.MultiIndex(levels=[level0, level1],
labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
result = mi.remove_unused_levels()
tm.assert_index_equal(result, mi)
for level in 0, 1:
assert('unused' not in result.levels[level])
@pytest.mark.parametrize('first_type,second_type', [
('int64', 'int64'),
('datetime64[D]', 'str')])
def test_remove_unused_levels_large(self, first_type, second_type):
# GH16556
# because tests should be deterministic (and this test in particular
# checks that levels are removed, which is not the case for every
# random input):
rng = np.random.RandomState(4) # seed is arbitrary value that works
size = 1 << 16
df = DataFrame(dict(
first=rng.randint(0, 1 << 13, size).astype(first_type),
second=rng.randint(0, 1 << 10, size).astype(second_type),
third=rng.rand(size)))
df = df.groupby(['first', 'second']).sum()
df = df[df.third < 0.1]
result = df.index.remove_unused_levels()
assert len(result.levels[0]) < len(df.index.levels[0])
assert len(result.levels[1]) < len(df.index.levels[1])
assert result.equals(df.index)
expected = df.reset_index().set_index(['first', 'second']).index
tm.assert_index_equal(result, expected)
def test_isin(self):
values = [('foo', 2), ('bar', 3), ('quux', 4)]
idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange(
4)])
result = idx.isin(values)
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(result, expected)
# empty, return dtype bool
idx = MultiIndex.from_arrays([[], []])
result = idx.isin(values)
assert len(result) == 0
assert result.dtype == np.bool_
@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy")
def test_isin_nan_not_pypy(self):
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
np.array([False, False]))
tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
np.array([False, False]))
@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy")
def test_isin_nan_pypy(self):
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
np.array([False, True]))
tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
np.array([False, True]))
def test_isin_level_kwarg(self):
idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange(
4)])
vals_0 = ['foo', 'bar', 'quux']
vals_1 = [2, 3, 10]
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
pytest.raises(IndexError, idx.isin, vals_0, level=5)
pytest.raises(IndexError, idx.isin, vals_0, level=-5)
pytest.raises(KeyError, idx.isin, vals_0, level=1.0)
pytest.raises(KeyError, idx.isin, vals_1, level=-1.0)
pytest.raises(KeyError, idx.isin, vals_1, level='A')
idx.names = ['A', 'B']
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A'))
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B'))
pytest.raises(KeyError, idx.isin, vals_1, level='C')
def test_reindex_preserves_names_when_target_is_list_or_ndarray(self):
# GH6552
idx = self.index.copy()
target = idx.copy()
idx.names = target.names = [None, None]
other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]])
# list & ndarray cases
assert idx.reindex([])[0].names == [None, None]
assert idx.reindex(np.array([]))[0].names == [None, None]
assert idx.reindex(target.tolist())[0].names == [None, None]
assert idx.reindex(target.values)[0].names == [None, None]
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
assert idx.reindex(other_dtype.values)[0].names == [None, None]
idx.names = ['foo', 'bar']
assert idx.reindex([])[0].names == ['foo', 'bar']
assert idx.reindex(np.array([]))[0].names == ['foo', 'bar']
assert idx.reindex(target.tolist())[0].names == ['foo', 'bar']
assert idx.reindex(target.values)[0].names == ['foo', 'bar']
assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar']
assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar']
def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self):
# GH7774
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']],
names=['foo', 'bar'])
assert idx.reindex([], level=0)[0].names == ['foo', 'bar']
assert idx.reindex([], level=1)[0].names == ['foo', 'bar']
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self):
# GH7774
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
def test_groupby(self):
groups = self.index.groupby(np.array([1, 1, 1, 2, 2, 2]))
labels = self.index.get_values().tolist()
exp = {1: labels[:3], 2: labels[3:]}
tm.assert_dict_equal(groups, exp)
# GH5620
groups = self.index.groupby(self.index)
exp = {key: [key] for key in self.index}
tm.assert_dict_equal(groups, exp)
def test_index_name_retained(self):
# GH9857
result = pd.DataFrame({'x': [1, 2, 6],
'y': [2, 2, 8],
'z': [-5, 0, 5]})
result = result.set_index('z')
result.loc[10] = [9, 10]
df_expected = pd.DataFrame({'x': [1, 2, 6, 9],
'y': [2, 2, 8, 10],
'z': [-5, 0, 5, 10]})
df_expected = df_expected.set_index('z')
tm.assert_frame_equal(result, df_expected)
def test_equals_operator(self):
# GH9785
assert (self.index == self.index).all()
def test_large_multiindex_error(self):
# GH12527
df_below_1000000 = pd.DataFrame(
1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]),
columns=['dest'])
with pytest.raises(KeyError):
df_below_1000000.loc[(-1, 0), 'dest']
with pytest.raises(KeyError):
df_below_1000000.loc[(3, 0), 'dest']
df_above_1000000 = pd.DataFrame(
1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]),
columns=['dest'])
with pytest.raises(KeyError):
df_above_1000000.loc[(-1, 0), 'dest']
with pytest.raises(KeyError):
df_above_1000000.loc[(3, 0), 'dest']
def test_partial_string_timestamp_multiindex(self):
# GH10331
dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H')
abc = ['a', 'b', 'c']
ix = pd.MultiIndex.from_product([dr, abc])
df = pd.DataFrame({'c1': range(0, 15)}, index=ix)
idx = pd.IndexSlice
# c1
# 2016-01-01 00:00:00 a 0
# b 1
# c 2
# 2016-01-01 12:00:00 a 3
# b 4
# c 5
# 2016-01-02 00:00:00 a 6
# b 7
# c 8
# 2016-01-02 12:00:00 a 9
# b 10
# c 11
# 2016-01-03 00:00:00 a 12
# b 13
# c 14
# partial string matching on a single index
for df_swap in (df.swaplevel(),
df.swaplevel(0),
df.swaplevel(0, 1)):
df_swap = df_swap.sort_index()
just_a = df_swap.loc['a']
result = just_a.loc['2016-01-01']
expected = df.loc[idx[:, 'a'], :].iloc[0:2]
expected.index = expected.index.droplevel(1)
tm.assert_frame_equal(result, expected)
# indexing with IndexSlice
result = df.loc[idx['2016-01-01':'2016-02-01', :], :]
expected = df
tm.assert_frame_equal(result, expected)
# match on secondary index
result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :]
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
tm.assert_frame_equal(result, expected)
# Even though this syntax works on a single index, this is somewhat
# ambiguous and we don't want to extend this behavior forward to work
# in multi-indexes. This would amount to selecting a scalar from a
# column.
with pytest.raises(KeyError):
df['2016-01-01']
# partial string match on year only
result = df.loc['2016']
expected = df
tm.assert_frame_equal(result, expected)
# partial string match on date
result = df.loc['2016-01-01']
expected = df.iloc[0:6]
tm.assert_frame_equal(result, expected)
# partial string match on date and hour, from middle
result = df.loc['2016-01-02 12']
expected = df.iloc[9:12]
tm.assert_frame_equal(result, expected)
# partial string match on secondary index
result = df_swap.loc[idx[:, '2016-01-02'], :]
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
tm.assert_frame_equal(result, expected)
# tuple selector with partial string match on date
result = df.loc[('2016-01-01', 'a'), :]
expected = df.iloc[[0, 3]]
tm.assert_frame_equal(result, expected)
# Slicing date on first level should break (of course)
with pytest.raises(KeyError):
df_swap.loc['2016-01-01']
# GH12685 (partial string with daily resolution or below)
dr = date_range('2013-01-01', periods=100, freq='D')
ix = MultiIndex.from_product([dr, ['a', 'b']])
df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix)
result = df.loc[idx['2013-03':'2013-03', :], :]
expected = df.iloc[118:180]
tm.assert_frame_equal(result, expected)
def test_rangeindex_fallback_coercion_bug(self):
# GH 12893
foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1)
df.index.names = ['fizz', 'buzz']
str(df)
expected = pd.DataFrame({'bar': np.arange(100),
'foo': np.arange(100)},
index=pd.MultiIndex.from_product(
[range(10), range(10)],
names=['fizz', 'buzz']))
tm.assert_frame_equal(df, expected, check_like=True)
result = df.index.get_level_values('fizz')
expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10)
tm.assert_index_equal(result, expected)
result = df.index.get_level_values('buzz')
expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz')
tm.assert_index_equal(result, expected)
def test_dropna(self):
# GH 6194
idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5],
[1, 2, np.nan, np.nan, 5],
['a', 'b', 'c', np.nan, 'e']])
exp = pd.MultiIndex.from_arrays([[1, 5],
[1, 5],
['a', 'e']])
tm.assert_index_equal(idx.dropna(), exp)
tm.assert_index_equal(idx.dropna(how='any'), exp)
exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5],
[1, 2, np.nan, 5],
['a', 'b', 'c', 'e']])
tm.assert_index_equal(idx.dropna(how='all'), exp)
msg = "invalid how option: xxx"
with tm.assert_raises_regex(ValueError, msg):
idx.dropna(how='xxx')
def test_unsortedindex(self):
# GH 11897
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
('x', 'b'), ('y', 'a'), ('z', 'b')],
names=['one', 'two'])
df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
columns=['one', 'two'])
# GH 16734: not sorted, but no real slicing
result = df.loc(axis=0)['z', 'a']
expected = df.iloc[0]
tm.assert_series_equal(result, expected)
with pytest.raises(UnsortedIndexError):
df.loc(axis=0)['z', slice('a')]
df.sort_index(inplace=True)
assert len(df.loc(axis=0)['z', :]) == 2
with pytest.raises(KeyError):
df.loc(axis=0)['q', :]
def test_unsortedindex_doc_examples(self):
# http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa
dfm = DataFrame({'jim': [0, 0, 1, 1],
'joe': ['x', 'x', 'z', 'y'],
'jolie': np.random.rand(4)})
dfm = dfm.set_index(['jim', 'joe'])
with tm.assert_produces_warning(PerformanceWarning):
dfm.loc[(1, 'z')]
with pytest.raises(UnsortedIndexError):
dfm.loc[(0, 'y'):(1, 'z')]
assert not dfm.index.is_lexsorted()
assert dfm.index.lexsort_depth == 1
# sort it
dfm = dfm.sort_index()
dfm.loc[(1, 'z')]
dfm.loc[(0, 'y'):(1, 'z')]
assert dfm.index.is_lexsorted()
assert dfm.index.lexsort_depth == 2
def test_tuples_with_name_string(self):
# GH 15110 and GH 14848
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
with pytest.raises(ValueError):
pd.Index(li, name='abc')
with pytest.raises(ValueError):
pd.Index(li, name='a')
def test_nan_stays_float(self):
# GH 7031
idx0 = pd.MultiIndex(levels=[["A", "B"], []],
labels=[[1, 0], [-1, -1]],
names=[0, 1])
idx1 = pd.MultiIndex(levels=[["C"], ["D"]],
labels=[[0], [0]],
names=[0, 1])
idxm = idx0.join(idx1, how='outer')
assert pd.isna(idx0.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
df0 = pd.DataFrame([[1, 2]], index=idx0)
df1 = pd.DataFrame([[3, 4]], index=idx1)
dfm = df0 - df1
assert pd.isna(df0.index.get_level_values(1)).all()
# the following failed in 0.14.1
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
def test_million_record_attribute_error(self):
# GH 18165
r = list(range(1000000))
df = pd.DataFrame({'a': r, 'b': r},
index=pd.MultiIndex.from_tuples([(x, x) for x in r]))
with tm.assert_raises_regex(AttributeError,
"'Series' object has no attribute 'foo'"):
df['a'].foo()
def test_duplicate_multiindex_labels(self):
# GH 17464
# Make sure that a MultiIndex with duplicate levels throws a ValueError
with pytest.raises(ValueError):
ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)])
# And that using set_levels with duplicate levels fails
ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'],
[1, 2, 1, 2, 3]])
with pytest.raises(ValueError):
ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]],
inplace=True)
def test_multiindex_compare(self):
# GH 21149
# Ensure comparison operations for MultiIndex with nlevels == 1
# behave consistently with those for MultiIndex with nlevels > 1
midx = pd.MultiIndex.from_product([[0, 1]])
# Equality self-test: MultiIndex object vs self
expected = pd.Series([True, True])
result = pd.Series(midx == midx)
tm.assert_series_equal(result, expected)
# Greater than comparison: MultiIndex object vs self
expected = pd.Series([False, False])
result = pd.Series(midx > midx)
tm.assert_series_equal(result, expected)