3328 lines
129 KiB
Python
3328 lines
129 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
import re
|
||
|
import warnings
|
||
|
|
||
|
from datetime import timedelta
|
||
|
from itertools import product
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
import pandas as pd
|
||
|
|
||
|
from pandas import (CategoricalIndex, Categorical, DataFrame, Index,
|
||
|
MultiIndex, compat, date_range, period_range)
|
||
|
from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY
|
||
|
from pandas.errors import PerformanceWarning, UnsortedIndexError
|
||
|
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||
|
from pandas.core.indexes.base import InvalidIndexError
|
||
|
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||
|
from pandas._libs.tslib import Timestamp
|
||
|
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
from pandas.util.testing import assert_almost_equal, assert_copy
|
||
|
|
||
|
from .common import Base
|
||
|
|
||
|
|
||
|
class TestMultiIndex(Base):
|
||
|
_holder = MultiIndex
|
||
|
_compat_props = ['shape', 'ndim', 'size']
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
|
||
|
minor_axis = Index(['one', 'two'])
|
||
|
|
||
|
major_labels = np.array([0, 0, 1, 2, 3, 3])
|
||
|
minor_labels = np.array([0, 1, 0, 1, 0, 1])
|
||
|
self.index_names = ['first', 'second']
|
||
|
self.indices = dict(index=MultiIndex(levels=[major_axis, minor_axis],
|
||
|
labels=[major_labels, minor_labels
|
||
|
], names=self.index_names,
|
||
|
verify_integrity=False))
|
||
|
self.setup_indices()
|
||
|
|
||
|
def create_index(self):
|
||
|
return self.index
|
||
|
|
||
|
def test_can_hold_identifiers(self):
|
||
|
idx = self.create_index()
|
||
|
key = idx[0]
|
||
|
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||
|
|
||
|
def test_boolean_context_compat2(self):
|
||
|
|
||
|
# boolean context compat
|
||
|
# GH7897
|
||
|
i1 = MultiIndex.from_tuples([('A', 1), ('A', 2)])
|
||
|
i2 = MultiIndex.from_tuples([('A', 1), ('A', 3)])
|
||
|
common = i1.intersection(i2)
|
||
|
|
||
|
def f():
|
||
|
if common:
|
||
|
pass
|
||
|
|
||
|
tm.assert_raises_regex(ValueError, 'The truth value of a', f)
|
||
|
|
||
|
def test_labels_dtypes(self):
|
||
|
|
||
|
# GH 8456
|
||
|
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
|
||
|
assert i.labels[0].dtype == 'int8'
|
||
|
assert i.labels[1].dtype == 'int8'
|
||
|
|
||
|
i = MultiIndex.from_product([['a'], range(40)])
|
||
|
assert i.labels[1].dtype == 'int8'
|
||
|
i = MultiIndex.from_product([['a'], range(400)])
|
||
|
assert i.labels[1].dtype == 'int16'
|
||
|
i = MultiIndex.from_product([['a'], range(40000)])
|
||
|
assert i.labels[1].dtype == 'int32'
|
||
|
|
||
|
i = pd.MultiIndex.from_product([['a'], range(1000)])
|
||
|
assert (i.labels[0] >= 0).all()
|
||
|
assert (i.labels[1] >= 0).all()
|
||
|
|
||
|
def test_where(self):
|
||
|
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
|
||
|
|
||
|
def f():
|
||
|
i.where(True)
|
||
|
|
||
|
pytest.raises(NotImplementedError, f)
|
||
|
|
||
|
def test_where_array_like(self):
|
||
|
i = MultiIndex.from_tuples([('A', 1), ('A', 2)])
|
||
|
klasses = [list, tuple, np.array, pd.Series]
|
||
|
cond = [False, True]
|
||
|
|
||
|
for klass in klasses:
|
||
|
f = lambda: i.where(klass(cond))
|
||
|
pytest.raises(NotImplementedError, f)
|
||
|
|
||
|
def test_repeat(self):
|
||
|
reps = 2
|
||
|
numbers = [1, 2, 3]
|
||
|
names = np.array(['foo', 'bar'])
|
||
|
|
||
|
m = MultiIndex.from_product([
|
||
|
numbers, names], names=names)
|
||
|
expected = MultiIndex.from_product([
|
||
|
numbers, names.repeat(reps)], names=names)
|
||
|
tm.assert_index_equal(m.repeat(reps), expected)
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
result = m.repeat(n=reps)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_numpy_repeat(self):
|
||
|
reps = 2
|
||
|
numbers = [1, 2, 3]
|
||
|
names = np.array(['foo', 'bar'])
|
||
|
|
||
|
m = MultiIndex.from_product([
|
||
|
numbers, names], names=names)
|
||
|
expected = MultiIndex.from_product([
|
||
|
numbers, names.repeat(reps)], names=names)
|
||
|
tm.assert_index_equal(np.repeat(m, reps), expected)
|
||
|
|
||
|
msg = "the 'axis' parameter is not supported"
|
||
|
tm.assert_raises_regex(
|
||
|
ValueError, msg, np.repeat, m, reps, axis=1)
|
||
|
|
||
|
def test_set_name_methods(self):
|
||
|
# so long as these are synonyms, we don't need to test set_names
|
||
|
assert self.index.rename == self.index.set_names
|
||
|
new_names = [name + "SUFFIX" for name in self.index_names]
|
||
|
ind = self.index.set_names(new_names)
|
||
|
assert self.index.names == self.index_names
|
||
|
assert ind.names == new_names
|
||
|
with tm.assert_raises_regex(ValueError, "^Length"):
|
||
|
ind.set_names(new_names + new_names)
|
||
|
new_names2 = [name + "SUFFIX2" for name in new_names]
|
||
|
res = ind.set_names(new_names2, inplace=True)
|
||
|
assert res is None
|
||
|
assert ind.names == new_names2
|
||
|
|
||
|
# set names for specific level (# GH7792)
|
||
|
ind = self.index.set_names(new_names[0], level=0)
|
||
|
assert self.index.names == self.index_names
|
||
|
assert ind.names == [new_names[0], self.index_names[1]]
|
||
|
|
||
|
res = ind.set_names(new_names2[0], level=0, inplace=True)
|
||
|
assert res is None
|
||
|
assert ind.names == [new_names2[0], self.index_names[1]]
|
||
|
|
||
|
# set names for multiple levels
|
||
|
ind = self.index.set_names(new_names, level=[0, 1])
|
||
|
assert self.index.names == self.index_names
|
||
|
assert ind.names == new_names
|
||
|
|
||
|
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
|
||
|
assert res is None
|
||
|
assert ind.names == new_names2
|
||
|
|
||
|
@pytest.mark.parametrize('inplace', [True, False])
|
||
|
def test_set_names_with_nlevel_1(self, inplace):
|
||
|
# GH 21149
|
||
|
# Ensure that .set_names for MultiIndex with
|
||
|
# nlevels == 1 does not raise any errors
|
||
|
expected = pd.MultiIndex(levels=[[0, 1]],
|
||
|
labels=[[0, 1]],
|
||
|
names=['first'])
|
||
|
m = pd.MultiIndex.from_product([[0, 1]])
|
||
|
result = m.set_names('first', level=0, inplace=inplace)
|
||
|
|
||
|
if inplace:
|
||
|
result = m
|
||
|
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_set_levels_labels_directly(self):
|
||
|
# setting levels/labels directly raises AttributeError
|
||
|
|
||
|
levels = self.index.levels
|
||
|
new_levels = [[lev + 'a' for lev in level] for level in levels]
|
||
|
|
||
|
labels = self.index.labels
|
||
|
major_labels, minor_labels = labels
|
||
|
major_labels = [(x + 1) % 3 for x in major_labels]
|
||
|
minor_labels = [(x + 1) % 1 for x in minor_labels]
|
||
|
new_labels = [major_labels, minor_labels]
|
||
|
|
||
|
with pytest.raises(AttributeError):
|
||
|
self.index.levels = new_levels
|
||
|
|
||
|
with pytest.raises(AttributeError):
|
||
|
self.index.labels = new_labels
|
||
|
|
||
|
def test_set_levels(self):
|
||
|
# side note - you probably wouldn't want to use levels and labels
|
||
|
# directly like this - but it is possible.
|
||
|
levels = self.index.levels
|
||
|
new_levels = [[lev + 'a' for lev in level] for level in levels]
|
||
|
|
||
|
def assert_matching(actual, expected, check_dtype=False):
|
||
|
# avoid specifying internal representation
|
||
|
# as much as possible
|
||
|
assert len(actual) == len(expected)
|
||
|
for act, exp in zip(actual, expected):
|
||
|
act = np.asarray(act)
|
||
|
exp = np.asarray(exp)
|
||
|
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
|
||
|
|
||
|
# level changing [w/o mutation]
|
||
|
ind2 = self.index.set_levels(new_levels)
|
||
|
assert_matching(ind2.levels, new_levels)
|
||
|
assert_matching(self.index.levels, levels)
|
||
|
|
||
|
# level changing [w/ mutation]
|
||
|
ind2 = self.index.copy()
|
||
|
inplace_return = ind2.set_levels(new_levels, inplace=True)
|
||
|
assert inplace_return is None
|
||
|
assert_matching(ind2.levels, new_levels)
|
||
|
|
||
|
# level changing specific level [w/o mutation]
|
||
|
ind2 = self.index.set_levels(new_levels[0], level=0)
|
||
|
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||
|
assert_matching(self.index.levels, levels)
|
||
|
|
||
|
ind2 = self.index.set_levels(new_levels[1], level=1)
|
||
|
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||
|
assert_matching(self.index.levels, levels)
|
||
|
|
||
|
# level changing multiple levels [w/o mutation]
|
||
|
ind2 = self.index.set_levels(new_levels, level=[0, 1])
|
||
|
assert_matching(ind2.levels, new_levels)
|
||
|
assert_matching(self.index.levels, levels)
|
||
|
|
||
|
# level changing specific level [w/ mutation]
|
||
|
ind2 = self.index.copy()
|
||
|
inplace_return = ind2.set_levels(new_levels[0], level=0, inplace=True)
|
||
|
assert inplace_return is None
|
||
|
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||
|
assert_matching(self.index.levels, levels)
|
||
|
|
||
|
ind2 = self.index.copy()
|
||
|
inplace_return = ind2.set_levels(new_levels[1], level=1, inplace=True)
|
||
|
assert inplace_return is None
|
||
|
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||
|
assert_matching(self.index.levels, levels)
|
||
|
|
||
|
# level changing multiple levels [w/ mutation]
|
||
|
ind2 = self.index.copy()
|
||
|
inplace_return = ind2.set_levels(new_levels, level=[0, 1],
|
||
|
inplace=True)
|
||
|
assert inplace_return is None
|
||
|
assert_matching(ind2.levels, new_levels)
|
||
|
assert_matching(self.index.levels, levels)
|
||
|
|
||
|
# illegal level changing should not change levels
|
||
|
# GH 13754
|
||
|
original_index = self.index.copy()
|
||
|
for inplace in [True, False]:
|
||
|
with tm.assert_raises_regex(ValueError, "^On"):
|
||
|
self.index.set_levels(['c'], level=0, inplace=inplace)
|
||
|
assert_matching(self.index.levels, original_index.levels,
|
||
|
check_dtype=True)
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, "^On"):
|
||
|
self.index.set_labels([0, 1, 2, 3, 4, 5], level=0,
|
||
|
inplace=inplace)
|
||
|
assert_matching(self.index.labels, original_index.labels,
|
||
|
check_dtype=True)
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, "^Levels"):
|
||
|
self.index.set_levels('c', level=0, inplace=inplace)
|
||
|
assert_matching(self.index.levels, original_index.levels,
|
||
|
check_dtype=True)
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, "^Labels"):
|
||
|
self.index.set_labels(1, level=0, inplace=inplace)
|
||
|
assert_matching(self.index.labels, original_index.labels,
|
||
|
check_dtype=True)
|
||
|
|
||
|
def test_set_labels(self):
|
||
|
# side note - you probably wouldn't want to use levels and labels
|
||
|
# directly like this - but it is possible.
|
||
|
labels = self.index.labels
|
||
|
major_labels, minor_labels = labels
|
||
|
major_labels = [(x + 1) % 3 for x in major_labels]
|
||
|
minor_labels = [(x + 1) % 1 for x in minor_labels]
|
||
|
new_labels = [major_labels, minor_labels]
|
||
|
|
||
|
def assert_matching(actual, expected):
|
||
|
# avoid specifying internal representation
|
||
|
# as much as possible
|
||
|
assert len(actual) == len(expected)
|
||
|
for act, exp in zip(actual, expected):
|
||
|
act = np.asarray(act)
|
||
|
exp = np.asarray(exp, dtype=np.int8)
|
||
|
tm.assert_numpy_array_equal(act, exp)
|
||
|
|
||
|
# label changing [w/o mutation]
|
||
|
ind2 = self.index.set_labels(new_labels)
|
||
|
assert_matching(ind2.labels, new_labels)
|
||
|
assert_matching(self.index.labels, labels)
|
||
|
|
||
|
# label changing [w/ mutation]
|
||
|
ind2 = self.index.copy()
|
||
|
inplace_return = ind2.set_labels(new_labels, inplace=True)
|
||
|
assert inplace_return is None
|
||
|
assert_matching(ind2.labels, new_labels)
|
||
|
|
||
|
# label changing specific level [w/o mutation]
|
||
|
ind2 = self.index.set_labels(new_labels[0], level=0)
|
||
|
assert_matching(ind2.labels, [new_labels[0], labels[1]])
|
||
|
assert_matching(self.index.labels, labels)
|
||
|
|
||
|
ind2 = self.index.set_labels(new_labels[1], level=1)
|
||
|
assert_matching(ind2.labels, [labels[0], new_labels[1]])
|
||
|
assert_matching(self.index.labels, labels)
|
||
|
|
||
|
# label changing multiple levels [w/o mutation]
|
||
|
ind2 = self.index.set_labels(new_labels, level=[0, 1])
|
||
|
assert_matching(ind2.labels, new_labels)
|
||
|
assert_matching(self.index.labels, labels)
|
||
|
|
||
|
# label changing specific level [w/ mutation]
|
||
|
ind2 = self.index.copy()
|
||
|
inplace_return = ind2.set_labels(new_labels[0], level=0, inplace=True)
|
||
|
assert inplace_return is None
|
||
|
assert_matching(ind2.labels, [new_labels[0], labels[1]])
|
||
|
assert_matching(self.index.labels, labels)
|
||
|
|
||
|
ind2 = self.index.copy()
|
||
|
inplace_return = ind2.set_labels(new_labels[1], level=1, inplace=True)
|
||
|
assert inplace_return is None
|
||
|
assert_matching(ind2.labels, [labels[0], new_labels[1]])
|
||
|
assert_matching(self.index.labels, labels)
|
||
|
|
||
|
# label changing multiple levels [w/ mutation]
|
||
|
ind2 = self.index.copy()
|
||
|
inplace_return = ind2.set_labels(new_labels, level=[0, 1],
|
||
|
inplace=True)
|
||
|
assert inplace_return is None
|
||
|
assert_matching(ind2.labels, new_labels)
|
||
|
assert_matching(self.index.labels, labels)
|
||
|
|
||
|
# label changing for levels of different magnitude of categories
|
||
|
ind = pd.MultiIndex.from_tuples([(0, i) for i in range(130)])
|
||
|
new_labels = range(129, -1, -1)
|
||
|
expected = pd.MultiIndex.from_tuples(
|
||
|
[(0, i) for i in new_labels])
|
||
|
|
||
|
# [w/o mutation]
|
||
|
result = ind.set_labels(labels=new_labels, level=1)
|
||
|
assert result.equals(expected)
|
||
|
|
||
|
# [w/ mutation]
|
||
|
result = ind.copy()
|
||
|
result.set_labels(labels=new_labels, level=1, inplace=True)
|
||
|
assert result.equals(expected)
|
||
|
|
||
|
def test_set_levels_labels_names_bad_input(self):
|
||
|
levels, labels = self.index.levels, self.index.labels
|
||
|
names = self.index.names
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, 'Length of levels'):
|
||
|
self.index.set_levels([levels[0]])
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, 'Length of labels'):
|
||
|
self.index.set_labels([labels[0]])
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, 'Length of names'):
|
||
|
self.index.set_names([names[0]])
|
||
|
|
||
|
# shouldn't scalar data error, instead should demand list-like
|
||
|
with tm.assert_raises_regex(TypeError, 'list of lists-like'):
|
||
|
self.index.set_levels(levels[0])
|
||
|
|
||
|
# shouldn't scalar data error, instead should demand list-like
|
||
|
with tm.assert_raises_regex(TypeError, 'list of lists-like'):
|
||
|
self.index.set_labels(labels[0])
|
||
|
|
||
|
# shouldn't scalar data error, instead should demand list-like
|
||
|
with tm.assert_raises_regex(TypeError, 'list-like'):
|
||
|
self.index.set_names(names[0])
|
||
|
|
||
|
# should have equal lengths
|
||
|
with tm.assert_raises_regex(TypeError, 'list of lists-like'):
|
||
|
self.index.set_levels(levels[0], level=[0, 1])
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, 'list-like'):
|
||
|
self.index.set_levels(levels, level=0)
|
||
|
|
||
|
# should have equal lengths
|
||
|
with tm.assert_raises_regex(TypeError, 'list of lists-like'):
|
||
|
self.index.set_labels(labels[0], level=[0, 1])
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, 'list-like'):
|
||
|
self.index.set_labels(labels, level=0)
|
||
|
|
||
|
# should have equal lengths
|
||
|
with tm.assert_raises_regex(ValueError, 'Length of names'):
|
||
|
self.index.set_names(names[0], level=[0, 1])
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, 'string'):
|
||
|
self.index.set_names(names, level=0)
|
||
|
|
||
|
def test_set_levels_categorical(self):
|
||
|
# GH13854
|
||
|
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
|
||
|
for ordered in [False, True]:
|
||
|
cidx = CategoricalIndex(list("bac"), ordered=ordered)
|
||
|
result = index.set_levels(cidx, 0)
|
||
|
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]],
|
||
|
labels=index.labels)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
result_lvl = result.get_level_values(0)
|
||
|
expected_lvl = CategoricalIndex(list("bacb"),
|
||
|
categories=cidx.categories,
|
||
|
ordered=cidx.ordered)
|
||
|
tm.assert_index_equal(result_lvl, expected_lvl)
|
||
|
|
||
|
def test_metadata_immutable(self):
|
||
|
levels, labels = self.index.levels, self.index.labels
|
||
|
# shouldn't be able to set at either the top level or base level
|
||
|
mutable_regex = re.compile('does not support mutable operations')
|
||
|
with tm.assert_raises_regex(TypeError, mutable_regex):
|
||
|
levels[0] = levels[0]
|
||
|
with tm.assert_raises_regex(TypeError, mutable_regex):
|
||
|
levels[0][0] = levels[0][0]
|
||
|
# ditto for labels
|
||
|
with tm.assert_raises_regex(TypeError, mutable_regex):
|
||
|
labels[0] = labels[0]
|
||
|
with tm.assert_raises_regex(TypeError, mutable_regex):
|
||
|
labels[0][0] = labels[0][0]
|
||
|
# and for names
|
||
|
names = self.index.names
|
||
|
with tm.assert_raises_regex(TypeError, mutable_regex):
|
||
|
names[0] = names[0]
|
||
|
|
||
|
def test_inplace_mutation_resets_values(self):
|
||
|
levels = [['a', 'b', 'c'], [4]]
|
||
|
levels2 = [[1, 2, 3], ['a']]
|
||
|
labels = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
|
||
|
|
||
|
mi1 = MultiIndex(levels=levels, labels=labels)
|
||
|
mi2 = MultiIndex(levels=levels2, labels=labels)
|
||
|
vals = mi1.values.copy()
|
||
|
vals2 = mi2.values.copy()
|
||
|
|
||
|
assert mi1._tuples is not None
|
||
|
|
||
|
# Make sure level setting works
|
||
|
new_vals = mi1.set_levels(levels2).values
|
||
|
tm.assert_almost_equal(vals2, new_vals)
|
||
|
|
||
|
# Non-inplace doesn't kill _tuples [implementation detail]
|
||
|
tm.assert_almost_equal(mi1._tuples, vals)
|
||
|
|
||
|
# ...and values is still same too
|
||
|
tm.assert_almost_equal(mi1.values, vals)
|
||
|
|
||
|
# Inplace should kill _tuples
|
||
|
mi1.set_levels(levels2, inplace=True)
|
||
|
tm.assert_almost_equal(mi1.values, vals2)
|
||
|
|
||
|
# Make sure label setting works too
|
||
|
labels2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
|
||
|
exp_values = np.empty((6,), dtype=object)
|
||
|
exp_values[:] = [(long(1), 'a')] * 6
|
||
|
|
||
|
# Must be 1d array of tuples
|
||
|
assert exp_values.shape == (6,)
|
||
|
new_values = mi2.set_labels(labels2).values
|
||
|
|
||
|
# Not inplace shouldn't change
|
||
|
tm.assert_almost_equal(mi2._tuples, vals2)
|
||
|
|
||
|
# Should have correct values
|
||
|
tm.assert_almost_equal(exp_values, new_values)
|
||
|
|
||
|
# ...and again setting inplace should kill _tuples, etc
|
||
|
mi2.set_labels(labels2, inplace=True)
|
||
|
tm.assert_almost_equal(mi2.values, new_values)
|
||
|
|
||
|
def test_copy_in_constructor(self):
|
||
|
levels = np.array(["a", "b", "c"])
|
||
|
labels = np.array([1, 1, 2, 0, 0, 1, 1])
|
||
|
val = labels[0]
|
||
|
mi = MultiIndex(levels=[levels, levels], labels=[labels, labels],
|
||
|
copy=True)
|
||
|
assert mi.labels[0][0] == val
|
||
|
labels[0] = 15
|
||
|
assert mi.labels[0][0] == val
|
||
|
val = levels[0]
|
||
|
levels[0] = "PANDA"
|
||
|
assert mi.levels[0][0] == val
|
||
|
|
||
|
def test_set_value_keeps_names(self):
|
||
|
# motivating example from #3742
|
||
|
lev1 = ['hans', 'hans', 'hans', 'grethe', 'grethe', 'grethe']
|
||
|
lev2 = ['1', '2', '3'] * 2
|
||
|
idx = pd.MultiIndex.from_arrays([lev1, lev2], names=['Name', 'Number'])
|
||
|
df = pd.DataFrame(
|
||
|
np.random.randn(6, 4),
|
||
|
columns=['one', 'two', 'three', 'four'],
|
||
|
index=idx)
|
||
|
df = df.sort_index()
|
||
|
assert df._is_copy is None
|
||
|
assert df.index.names == ('Name', 'Number')
|
||
|
df.at[('grethe', '4'), 'one'] = 99.34
|
||
|
assert df._is_copy is None
|
||
|
assert df.index.names == ('Name', 'Number')
|
||
|
|
||
|
def test_copy_names(self):
|
||
|
# Check that adding a "names" parameter to the copy is honored
|
||
|
# GH14302
|
||
|
multi_idx = pd.Index([(1, 2), (3, 4)], names=['MyName1', 'MyName2'])
|
||
|
multi_idx1 = multi_idx.copy()
|
||
|
|
||
|
assert multi_idx.equals(multi_idx1)
|
||
|
assert multi_idx.names == ['MyName1', 'MyName2']
|
||
|
assert multi_idx1.names == ['MyName1', 'MyName2']
|
||
|
|
||
|
multi_idx2 = multi_idx.copy(names=['NewName1', 'NewName2'])
|
||
|
|
||
|
assert multi_idx.equals(multi_idx2)
|
||
|
assert multi_idx.names == ['MyName1', 'MyName2']
|
||
|
assert multi_idx2.names == ['NewName1', 'NewName2']
|
||
|
|
||
|
multi_idx3 = multi_idx.copy(name=['NewName1', 'NewName2'])
|
||
|
|
||
|
assert multi_idx.equals(multi_idx3)
|
||
|
assert multi_idx.names == ['MyName1', 'MyName2']
|
||
|
assert multi_idx3.names == ['NewName1', 'NewName2']
|
||
|
|
||
|
def test_names(self):
|
||
|
|
||
|
# names are assigned in setup
|
||
|
names = self.index_names
|
||
|
level_names = [level.name for level in self.index.levels]
|
||
|
assert names == level_names
|
||
|
|
||
|
# setting bad names on existing
|
||
|
index = self.index
|
||
|
tm.assert_raises_regex(ValueError, "^Length of names",
|
||
|
setattr, index, "names",
|
||
|
list(index.names) + ["third"])
|
||
|
tm.assert_raises_regex(ValueError, "^Length of names",
|
||
|
setattr, index, "names", [])
|
||
|
|
||
|
# initializing with bad names (should always be equivalent)
|
||
|
major_axis, minor_axis = self.index.levels
|
||
|
major_labels, minor_labels = self.index.labels
|
||
|
tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex,
|
||
|
levels=[major_axis, minor_axis],
|
||
|
labels=[major_labels, minor_labels],
|
||
|
names=['first'])
|
||
|
tm.assert_raises_regex(ValueError, "^Length of names", MultiIndex,
|
||
|
levels=[major_axis, minor_axis],
|
||
|
labels=[major_labels, minor_labels],
|
||
|
names=['first', 'second', 'third'])
|
||
|
|
||
|
# names are assigned
|
||
|
index.names = ["a", "b"]
|
||
|
ind_names = list(index.names)
|
||
|
level_names = [level.name for level in index.levels]
|
||
|
assert ind_names == level_names
|
||
|
|
||
|
def test_astype(self):
|
||
|
expected = self.index.copy()
|
||
|
actual = self.index.astype('O')
|
||
|
assert_copy(actual.levels, expected.levels)
|
||
|
assert_copy(actual.labels, expected.labels)
|
||
|
self.check_level_names(actual, expected.names)
|
||
|
|
||
|
with tm.assert_raises_regex(TypeError, "^Setting.*dtype.*object"):
|
||
|
self.index.astype(np.dtype(int))
|
||
|
|
||
|
@pytest.mark.parametrize('ordered', [True, False])
|
||
|
def test_astype_category(self, ordered):
|
||
|
# GH 18630
|
||
|
msg = '> 1 ndim Categorical are not supported at this time'
|
||
|
with tm.assert_raises_regex(NotImplementedError, msg):
|
||
|
self.index.astype(CategoricalDtype(ordered=ordered))
|
||
|
|
||
|
if ordered is False:
|
||
|
# dtype='category' defaults to ordered=False, so only test once
|
||
|
with tm.assert_raises_regex(NotImplementedError, msg):
|
||
|
self.index.astype('category')
|
||
|
|
||
|
def test_constructor_single_level(self):
|
||
|
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
|
||
|
labels=[[0, 1, 2, 3]], names=['first'])
|
||
|
assert isinstance(result, MultiIndex)
|
||
|
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
|
||
|
tm.assert_index_equal(result.levels[0], expected)
|
||
|
assert result.names == ['first']
|
||
|
|
||
|
def test_constructor_no_levels(self):
|
||
|
tm.assert_raises_regex(ValueError, "non-zero number "
|
||
|
"of levels/labels",
|
||
|
MultiIndex, levels=[], labels=[])
|
||
|
both_re = re.compile('Must pass both levels and labels')
|
||
|
with tm.assert_raises_regex(TypeError, both_re):
|
||
|
MultiIndex(levels=[])
|
||
|
with tm.assert_raises_regex(TypeError, both_re):
|
||
|
MultiIndex(labels=[])
|
||
|
|
||
|
def test_constructor_mismatched_label_levels(self):
|
||
|
labels = [np.array([1]), np.array([2]), np.array([3])]
|
||
|
levels = ["a"]
|
||
|
tm.assert_raises_regex(ValueError, "Length of levels and labels "
|
||
|
"must be the same", MultiIndex,
|
||
|
levels=levels, labels=labels)
|
||
|
length_error = re.compile('>= length of level')
|
||
|
label_error = re.compile(r'Unequal label lengths: \[4, 2\]')
|
||
|
|
||
|
# important to check that it's looking at the right thing.
|
||
|
with tm.assert_raises_regex(ValueError, length_error):
|
||
|
MultiIndex(levels=[['a'], ['b']],
|
||
|
labels=[[0, 1, 2, 3], [0, 3, 4, 1]])
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, label_error):
|
||
|
MultiIndex(levels=[['a'], ['b']], labels=[[0, 0, 0, 0], [0, 0]])
|
||
|
|
||
|
# external API
|
||
|
with tm.assert_raises_regex(ValueError, length_error):
|
||
|
self.index.copy().set_levels([['a'], ['b']])
|
||
|
|
||
|
with tm.assert_raises_regex(ValueError, label_error):
|
||
|
self.index.copy().set_labels([[0, 0, 0, 0], [0, 0]])
|
||
|
|
||
|
def test_constructor_nonhashable_names(self):
|
||
|
# GH 20527
|
||
|
levels = [[1, 2], [u'one', u'two']]
|
||
|
labels = [[0, 0, 1, 1], [0, 1, 0, 1]]
|
||
|
names = ((['foo'], ['bar']))
|
||
|
message = "MultiIndex.name must be a hashable type"
|
||
|
tm.assert_raises_regex(TypeError, message,
|
||
|
MultiIndex, levels=levels,
|
||
|
labels=labels, names=names)
|
||
|
|
||
|
# With .rename()
|
||
|
mi = MultiIndex(levels=[[1, 2], [u'one', u'two']],
|
||
|
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
|
||
|
names=('foo', 'bar'))
|
||
|
renamed = [['foor'], ['barr']]
|
||
|
tm.assert_raises_regex(TypeError, message, mi.rename, names=renamed)
|
||
|
# With .set_names()
|
||
|
tm.assert_raises_regex(TypeError, message, mi.set_names, names=renamed)
|
||
|
|
||
|
@pytest.mark.parametrize('names', [['a', 'b', 'a'], [1, 1, 2],
|
||
|
[1, 'a', 1]])
|
||
|
def test_duplicate_level_names(self, names):
|
||
|
# GH18872, GH19029
|
||
|
mi = pd.MultiIndex.from_product([[0, 1]] * 3, names=names)
|
||
|
assert mi.names == names
|
||
|
|
||
|
# With .rename()
|
||
|
mi = pd.MultiIndex.from_product([[0, 1]] * 3)
|
||
|
mi = mi.rename(names)
|
||
|
assert mi.names == names
|
||
|
|
||
|
# With .rename(., level=)
|
||
|
mi.rename(names[1], level=1, inplace=True)
|
||
|
mi = mi.rename([names[0], names[2]], level=[0, 2])
|
||
|
assert mi.names == names
|
||
|
|
||
|
def test_duplicate_level_names_access_raises(self):
|
||
|
self.index.names = ['foo', 'foo']
|
||
|
tm.assert_raises_regex(KeyError, 'Level foo not found',
|
||
|
self.index._get_level_number, 'foo')
|
||
|
|
||
|
def assert_multiindex_copied(self, copy, original):
|
||
|
# Levels should be (at least, shallow copied)
|
||
|
tm.assert_copy(copy.levels, original.levels)
|
||
|
tm.assert_almost_equal(copy.labels, original.labels)
|
||
|
|
||
|
# Labels doesn't matter which way copied
|
||
|
tm.assert_almost_equal(copy.labels, original.labels)
|
||
|
assert copy.labels is not original.labels
|
||
|
|
||
|
# Names doesn't matter which way copied
|
||
|
assert copy.names == original.names
|
||
|
assert copy.names is not original.names
|
||
|
|
||
|
# Sort order should be copied
|
||
|
assert copy.sortorder == original.sortorder
|
||
|
|
||
|
def test_copy(self):
|
||
|
i_copy = self.index.copy()
|
||
|
|
||
|
self.assert_multiindex_copied(i_copy, self.index)
|
||
|
|
||
|
def test_shallow_copy(self):
|
||
|
i_copy = self.index._shallow_copy()
|
||
|
|
||
|
self.assert_multiindex_copied(i_copy, self.index)
|
||
|
|
||
|
def test_view(self):
|
||
|
i_view = self.index.view()
|
||
|
|
||
|
self.assert_multiindex_copied(i_view, self.index)
|
||
|
|
||
|
def check_level_names(self, index, names):
|
||
|
assert [level.name for level in index.levels] == list(names)
|
||
|
|
||
|
def test_changing_names(self):
|
||
|
|
||
|
# names should be applied to levels
|
||
|
level_names = [level.name for level in self.index.levels]
|
||
|
self.check_level_names(self.index, self.index.names)
|
||
|
|
||
|
view = self.index.view()
|
||
|
copy = self.index.copy()
|
||
|
shallow_copy = self.index._shallow_copy()
|
||
|
|
||
|
# changing names should change level names on object
|
||
|
new_names = [name + "a" for name in self.index.names]
|
||
|
self.index.names = new_names
|
||
|
self.check_level_names(self.index, new_names)
|
||
|
|
||
|
# but not on copies
|
||
|
self.check_level_names(view, level_names)
|
||
|
self.check_level_names(copy, level_names)
|
||
|
self.check_level_names(shallow_copy, level_names)
|
||
|
|
||
|
# and copies shouldn't change original
|
||
|
shallow_copy.names = [name + "c" for name in shallow_copy.names]
|
||
|
self.check_level_names(self.index, new_names)
|
||
|
|
||
|
def test_get_level_number_integer(self):
|
||
|
self.index.names = [1, 0]
|
||
|
assert self.index._get_level_number(1) == 0
|
||
|
assert self.index._get_level_number(0) == 1
|
||
|
pytest.raises(IndexError, self.index._get_level_number, 2)
|
||
|
tm.assert_raises_regex(KeyError, 'Level fourth not found',
|
||
|
self.index._get_level_number, 'fourth')
|
||
|
|
||
|
def test_from_arrays(self):
|
||
|
arrays = []
|
||
|
for lev, lab in zip(self.index.levels, self.index.labels):
|
||
|
arrays.append(np.asarray(lev).take(lab))
|
||
|
|
||
|
# list of arrays as input
|
||
|
result = MultiIndex.from_arrays(arrays, names=self.index.names)
|
||
|
tm.assert_index_equal(result, self.index)
|
||
|
|
||
|
# infer correctly
|
||
|
result = MultiIndex.from_arrays([[pd.NaT, Timestamp('20130101')],
|
||
|
['a', 'b']])
|
||
|
assert result.levels[0].equals(Index([Timestamp('20130101')]))
|
||
|
assert result.levels[1].equals(Index(['a', 'b']))
|
||
|
|
||
|
def test_from_arrays_iterator(self):
|
||
|
# GH 18434
|
||
|
arrays = []
|
||
|
for lev, lab in zip(self.index.levels, self.index.labels):
|
||
|
arrays.append(np.asarray(lev).take(lab))
|
||
|
|
||
|
# iterator as input
|
||
|
result = MultiIndex.from_arrays(iter(arrays), names=self.index.names)
|
||
|
tm.assert_index_equal(result, self.index)
|
||
|
|
||
|
# invalid iterator input
|
||
|
with tm.assert_raises_regex(
|
||
|
TypeError, "Input must be a list / sequence of array-likes."):
|
||
|
MultiIndex.from_arrays(0)
|
||
|
|
||
|
def test_from_arrays_index_series_datetimetz(self):
|
||
|
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
|
||
|
tz='US/Eastern')
|
||
|
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3,
|
||
|
tz='Asia/Tokyo')
|
||
|
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||
|
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||
|
|
||
|
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||
|
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||
|
|
||
|
tm.assert_index_equal(result, result2)
|
||
|
|
||
|
def test_from_arrays_index_series_timedelta(self):
|
||
|
idx1 = pd.timedelta_range('1 days', freq='D', periods=3)
|
||
|
idx2 = pd.timedelta_range('2 hours', freq='H', periods=3)
|
||
|
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||
|
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||
|
|
||
|
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||
|
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||
|
|
||
|
tm.assert_index_equal(result, result2)
|
||
|
|
||
|
def test_from_arrays_index_series_period(self):
|
||
|
idx1 = pd.period_range('2011-01-01', freq='D', periods=3)
|
||
|
idx2 = pd.period_range('2015-01-01', freq='H', periods=3)
|
||
|
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||
|
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||
|
|
||
|
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||
|
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||
|
|
||
|
tm.assert_index_equal(result, result2)
|
||
|
|
||
|
def test_from_arrays_index_datetimelike_mixed(self):
|
||
|
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
|
||
|
tz='US/Eastern')
|
||
|
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3)
|
||
|
idx3 = pd.timedelta_range('1 days', freq='D', periods=3)
|
||
|
idx4 = pd.period_range('2011-01-01', freq='D', periods=3)
|
||
|
|
||
|
result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
|
||
|
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||
|
tm.assert_index_equal(result.get_level_values(2), idx3)
|
||
|
tm.assert_index_equal(result.get_level_values(3), idx4)
|
||
|
|
||
|
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1),
|
||
|
pd.Series(idx2),
|
||
|
pd.Series(idx3),
|
||
|
pd.Series(idx4)])
|
||
|
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||
|
tm.assert_index_equal(result2.get_level_values(2), idx3)
|
||
|
tm.assert_index_equal(result2.get_level_values(3), idx4)
|
||
|
|
||
|
tm.assert_index_equal(result, result2)
|
||
|
|
||
|
def test_from_arrays_index_series_categorical(self):
|
||
|
# GH13743
|
||
|
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
|
||
|
ordered=False)
|
||
|
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
|
||
|
ordered=True)
|
||
|
|
||
|
result = pd.MultiIndex.from_arrays([idx1, idx2])
|
||
|
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||
|
|
||
|
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)])
|
||
|
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||
|
|
||
|
result3 = pd.MultiIndex.from_arrays([idx1.values, idx2.values])
|
||
|
tm.assert_index_equal(result3.get_level_values(0), idx1)
|
||
|
tm.assert_index_equal(result3.get_level_values(1), idx2)
|
||
|
|
||
|
def test_from_arrays_empty(self):
|
||
|
# 0 levels
|
||
|
with tm.assert_raises_regex(
|
||
|
ValueError, "Must pass non-zero number of levels/labels"):
|
||
|
MultiIndex.from_arrays(arrays=[])
|
||
|
|
||
|
# 1 level
|
||
|
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
|
||
|
assert isinstance(result, MultiIndex)
|
||
|
expected = Index([], name='A')
|
||
|
tm.assert_index_equal(result.levels[0], expected)
|
||
|
|
||
|
# N levels
|
||
|
for N in [2, 3]:
|
||
|
arrays = [[]] * N
|
||
|
names = list('ABC')[:N]
|
||
|
result = MultiIndex.from_arrays(arrays=arrays, names=names)
|
||
|
expected = MultiIndex(levels=[[]] * N, labels=[[]] * N,
|
||
|
names=names)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_from_arrays_invalid_input(self):
|
||
|
invalid_inputs = [1, [1], [1, 2], [[1], 2],
|
||
|
'a', ['a'], ['a', 'b'], [['a'], 'b']]
|
||
|
for i in invalid_inputs:
|
||
|
pytest.raises(TypeError, MultiIndex.from_arrays, arrays=i)
|
||
|
|
||
|
def test_from_arrays_different_lengths(self):
|
||
|
# see gh-13599
|
||
|
idx1 = [1, 2, 3]
|
||
|
idx2 = ['a', 'b']
|
||
|
tm.assert_raises_regex(ValueError, '^all arrays must '
|
||
|
'be same length$',
|
||
|
MultiIndex.from_arrays, [idx1, idx2])
|
||
|
|
||
|
idx1 = []
|
||
|
idx2 = ['a', 'b']
|
||
|
tm.assert_raises_regex(ValueError, '^all arrays must '
|
||
|
'be same length$',
|
||
|
MultiIndex.from_arrays, [idx1, idx2])
|
||
|
|
||
|
idx1 = [1, 2, 3]
|
||
|
idx2 = []
|
||
|
tm.assert_raises_regex(ValueError, '^all arrays must '
|
||
|
'be same length$',
|
||
|
MultiIndex.from_arrays, [idx1, idx2])
|
||
|
|
||
|
def test_from_product(self):
|
||
|
|
||
|
first = ['foo', 'bar', 'buz']
|
||
|
second = ['a', 'b', 'c']
|
||
|
names = ['first', 'second']
|
||
|
result = MultiIndex.from_product([first, second], names=names)
|
||
|
|
||
|
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
|
||
|
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
|
||
|
('buz', 'c')]
|
||
|
expected = MultiIndex.from_tuples(tuples, names=names)
|
||
|
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_from_product_iterator(self):
|
||
|
# GH 18434
|
||
|
first = ['foo', 'bar', 'buz']
|
||
|
second = ['a', 'b', 'c']
|
||
|
names = ['first', 'second']
|
||
|
tuples = [('foo', 'a'), ('foo', 'b'), ('foo', 'c'), ('bar', 'a'),
|
||
|
('bar', 'b'), ('bar', 'c'), ('buz', 'a'), ('buz', 'b'),
|
||
|
('buz', 'c')]
|
||
|
expected = MultiIndex.from_tuples(tuples, names=names)
|
||
|
|
||
|
# iterator as input
|
||
|
result = MultiIndex.from_product(iter([first, second]), names=names)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
# Invalid non-iterable input
|
||
|
with tm.assert_raises_regex(
|
||
|
TypeError, "Input must be a list / sequence of iterables."):
|
||
|
MultiIndex.from_product(0)
|
||
|
|
||
|
def test_from_product_empty(self):
|
||
|
# 0 levels
|
||
|
with tm.assert_raises_regex(
|
||
|
ValueError, "Must pass non-zero number of levels/labels"):
|
||
|
MultiIndex.from_product([])
|
||
|
|
||
|
# 1 level
|
||
|
result = MultiIndex.from_product([[]], names=['A'])
|
||
|
expected = pd.Index([], name='A')
|
||
|
tm.assert_index_equal(result.levels[0], expected)
|
||
|
|
||
|
# 2 levels
|
||
|
l1 = [[], ['foo', 'bar', 'baz'], []]
|
||
|
l2 = [[], [], ['a', 'b', 'c']]
|
||
|
names = ['A', 'B']
|
||
|
for first, second in zip(l1, l2):
|
||
|
result = MultiIndex.from_product([first, second], names=names)
|
||
|
expected = MultiIndex(levels=[first, second],
|
||
|
labels=[[], []], names=names)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
# GH12258
|
||
|
names = ['A', 'B', 'C']
|
||
|
for N in range(4):
|
||
|
lvl2 = lrange(N)
|
||
|
result = MultiIndex.from_product([[], lvl2, []], names=names)
|
||
|
expected = MultiIndex(levels=[[], lvl2, []],
|
||
|
labels=[[], [], []], names=names)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_from_product_invalid_input(self):
|
||
|
invalid_inputs = [1, [1], [1, 2], [[1], 2],
|
||
|
'a', ['a'], ['a', 'b'], [['a'], 'b']]
|
||
|
for i in invalid_inputs:
|
||
|
pytest.raises(TypeError, MultiIndex.from_product, iterables=i)
|
||
|
|
||
|
def test_from_product_datetimeindex(self):
|
||
|
dt_index = date_range('2000-01-01', periods=2)
|
||
|
mi = pd.MultiIndex.from_product([[1, 2], dt_index])
|
||
|
etalon = construct_1d_object_array_from_listlike([(1, pd.Timestamp(
|
||
|
'2000-01-01')), (1, pd.Timestamp('2000-01-02')), (2, pd.Timestamp(
|
||
|
'2000-01-01')), (2, pd.Timestamp('2000-01-02'))])
|
||
|
tm.assert_numpy_array_equal(mi.values, etalon)
|
||
|
|
||
|
def test_from_product_index_series_categorical(self):
|
||
|
# GH13743
|
||
|
first = ['foo', 'bar']
|
||
|
for ordered in [False, True]:
|
||
|
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"),
|
||
|
ordered=ordered)
|
||
|
expected = pd.CategoricalIndex(list("abcaab") + list("abcaab"),
|
||
|
categories=list("bac"),
|
||
|
ordered=ordered)
|
||
|
|
||
|
for arr in [idx, pd.Series(idx), idx.values]:
|
||
|
result = pd.MultiIndex.from_product([first, arr])
|
||
|
tm.assert_index_equal(result.get_level_values(1), expected)
|
||
|
|
||
|
def test_values_boxed(self):
|
||
|
tuples = [(1, pd.Timestamp('2000-01-01')), (2, pd.NaT),
|
||
|
(3, pd.Timestamp('2000-01-03')),
|
||
|
(1, pd.Timestamp('2000-01-04')),
|
||
|
(2, pd.Timestamp('2000-01-02')),
|
||
|
(3, pd.Timestamp('2000-01-03'))]
|
||
|
result = pd.MultiIndex.from_tuples(tuples)
|
||
|
expected = construct_1d_object_array_from_listlike(tuples)
|
||
|
tm.assert_numpy_array_equal(result.values, expected)
|
||
|
# Check that code branches for boxed values produce identical results
|
||
|
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
|
||
|
|
||
|
def test_values_multiindex_datetimeindex(self):
|
||
|
# Test to ensure we hit the boxing / nobox part of MI.values
|
||
|
ints = np.arange(10 ** 18, 10 ** 18 + 5)
|
||
|
naive = pd.DatetimeIndex(ints)
|
||
|
aware = pd.DatetimeIndex(ints, tz='US/Central')
|
||
|
|
||
|
idx = pd.MultiIndex.from_arrays([naive, aware])
|
||
|
result = idx.values
|
||
|
|
||
|
outer = pd.DatetimeIndex([x[0] for x in result])
|
||
|
tm.assert_index_equal(outer, naive)
|
||
|
|
||
|
inner = pd.DatetimeIndex([x[1] for x in result])
|
||
|
tm.assert_index_equal(inner, aware)
|
||
|
|
||
|
# n_lev > n_lab
|
||
|
result = idx[:2].values
|
||
|
|
||
|
outer = pd.DatetimeIndex([x[0] for x in result])
|
||
|
tm.assert_index_equal(outer, naive[:2])
|
||
|
|
||
|
inner = pd.DatetimeIndex([x[1] for x in result])
|
||
|
tm.assert_index_equal(inner, aware[:2])
|
||
|
|
||
|
def test_values_multiindex_periodindex(self):
|
||
|
# Test to ensure we hit the boxing / nobox part of MI.values
|
||
|
ints = np.arange(2007, 2012)
|
||
|
pidx = pd.PeriodIndex(ints, freq='D')
|
||
|
|
||
|
idx = pd.MultiIndex.from_arrays([ints, pidx])
|
||
|
result = idx.values
|
||
|
|
||
|
outer = pd.Int64Index([x[0] for x in result])
|
||
|
tm.assert_index_equal(outer, pd.Int64Index(ints))
|
||
|
|
||
|
inner = pd.PeriodIndex([x[1] for x in result])
|
||
|
tm.assert_index_equal(inner, pidx)
|
||
|
|
||
|
# n_lev > n_lab
|
||
|
result = idx[:2].values
|
||
|
|
||
|
outer = pd.Int64Index([x[0] for x in result])
|
||
|
tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
|
||
|
|
||
|
inner = pd.PeriodIndex([x[1] for x in result])
|
||
|
tm.assert_index_equal(inner, pidx[:2])
|
||
|
|
||
|
def test_append(self):
|
||
|
result = self.index[:3].append(self.index[3:])
|
||
|
assert result.equals(self.index)
|
||
|
|
||
|
foos = [self.index[:1], self.index[1:3], self.index[3:]]
|
||
|
result = foos[0].append(foos[1:])
|
||
|
assert result.equals(self.index)
|
||
|
|
||
|
# empty
|
||
|
result = self.index.append([])
|
||
|
assert result.equals(self.index)
|
||
|
|
||
|
def test_append_mixed_dtypes(self):
|
||
|
# GH 13660
|
||
|
dti = date_range('2011-01-01', freq='M', periods=3, )
|
||
|
dti_tz = date_range('2011-01-01', freq='M', periods=3, tz='US/Eastern')
|
||
|
pi = period_range('2011-01', freq='M', periods=3)
|
||
|
|
||
|
mi = MultiIndex.from_arrays([[1, 2, 3],
|
||
|
[1.1, np.nan, 3.3],
|
||
|
['a', 'b', 'c'],
|
||
|
dti, dti_tz, pi])
|
||
|
assert mi.nlevels == 6
|
||
|
|
||
|
res = mi.append(mi)
|
||
|
exp = MultiIndex.from_arrays([[1, 2, 3, 1, 2, 3],
|
||
|
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
|
||
|
['a', 'b', 'c', 'a', 'b', 'c'],
|
||
|
dti.append(dti),
|
||
|
dti_tz.append(dti_tz),
|
||
|
pi.append(pi)])
|
||
|
tm.assert_index_equal(res, exp)
|
||
|
|
||
|
other = MultiIndex.from_arrays([['x', 'y', 'z'], ['x', 'y', 'z'],
|
||
|
['x', 'y', 'z'], ['x', 'y', 'z'],
|
||
|
['x', 'y', 'z'], ['x', 'y', 'z']])
|
||
|
|
||
|
res = mi.append(other)
|
||
|
exp = MultiIndex.from_arrays([[1, 2, 3, 'x', 'y', 'z'],
|
||
|
[1.1, np.nan, 3.3, 'x', 'y', 'z'],
|
||
|
['a', 'b', 'c', 'x', 'y', 'z'],
|
||
|
dti.append(pd.Index(['x', 'y', 'z'])),
|
||
|
dti_tz.append(pd.Index(['x', 'y', 'z'])),
|
||
|
pi.append(pd.Index(['x', 'y', 'z']))])
|
||
|
tm.assert_index_equal(res, exp)
|
||
|
|
||
|
def test_get_level_values(self):
|
||
|
result = self.index.get_level_values(0)
|
||
|
expected = Index(['foo', 'foo', 'bar', 'baz', 'qux', 'qux'],
|
||
|
name='first')
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
assert result.name == 'first'
|
||
|
|
||
|
result = self.index.get_level_values('first')
|
||
|
expected = self.index.get_level_values(0)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
# GH 10460
|
||
|
index = MultiIndex(
|
||
|
levels=[CategoricalIndex(['A', 'B']),
|
||
|
CategoricalIndex([1, 2, 3])],
|
||
|
labels=[np.array([0, 0, 0, 1, 1, 1]),
|
||
|
np.array([0, 1, 2, 0, 1, 2])])
|
||
|
|
||
|
exp = CategoricalIndex(['A', 'A', 'A', 'B', 'B', 'B'])
|
||
|
tm.assert_index_equal(index.get_level_values(0), exp)
|
||
|
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
|
||
|
tm.assert_index_equal(index.get_level_values(1), exp)
|
||
|
|
||
|
def test_get_level_values_int_with_na(self):
|
||
|
# GH 17924
|
||
|
arrays = [['a', 'b', 'b'], [1, np.nan, 2]]
|
||
|
index = pd.MultiIndex.from_arrays(arrays)
|
||
|
result = index.get_level_values(1)
|
||
|
expected = Index([1, np.nan, 2])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
arrays = [['a', 'b', 'b'], [np.nan, np.nan, 2]]
|
||
|
index = pd.MultiIndex.from_arrays(arrays)
|
||
|
result = index.get_level_values(1)
|
||
|
expected = Index([np.nan, np.nan, 2])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_get_level_values_na(self):
|
||
|
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
|
||
|
index = pd.MultiIndex.from_arrays(arrays)
|
||
|
result = index.get_level_values(0)
|
||
|
expected = pd.Index([np.nan, np.nan, np.nan])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
result = index.get_level_values(1)
|
||
|
expected = pd.Index(['a', np.nan, 1])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
arrays = [['a', 'b', 'b'], pd.DatetimeIndex([0, 1, pd.NaT])]
|
||
|
index = pd.MultiIndex.from_arrays(arrays)
|
||
|
result = index.get_level_values(1)
|
||
|
expected = pd.DatetimeIndex([0, 1, pd.NaT])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
arrays = [[], []]
|
||
|
index = pd.MultiIndex.from_arrays(arrays)
|
||
|
result = index.get_level_values(0)
|
||
|
expected = pd.Index([], dtype=object)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_get_level_values_all_na(self):
|
||
|
# GH 17924 when level entirely consists of nan
|
||
|
arrays = [[np.nan, np.nan, np.nan], ['a', np.nan, 1]]
|
||
|
index = pd.MultiIndex.from_arrays(arrays)
|
||
|
result = index.get_level_values(0)
|
||
|
expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
result = index.get_level_values(1)
|
||
|
expected = pd.Index(['a', np.nan, 1], dtype=object)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_reorder_levels(self):
|
||
|
# this blows up
|
||
|
tm.assert_raises_regex(IndexError, '^Too many levels',
|
||
|
self.index.reorder_levels, [2, 1, 0])
|
||
|
|
||
|
def test_nlevels(self):
|
||
|
assert self.index.nlevels == 2
|
||
|
|
||
|
def test_iter(self):
|
||
|
result = list(self.index)
|
||
|
expected = [('foo', 'one'), ('foo', 'two'), ('bar', 'one'),
|
||
|
('baz', 'two'), ('qux', 'one'), ('qux', 'two')]
|
||
|
assert result == expected
|
||
|
|
||
|
def test_legacy_pickle(self, datapath):
|
||
|
if PY3:
|
||
|
pytest.skip("testing for legacy pickles not "
|
||
|
"support on py3")
|
||
|
|
||
|
path = datapath('indexes', 'data', 'multiindex_v1.pickle')
|
||
|
obj = pd.read_pickle(path)
|
||
|
|
||
|
obj2 = MultiIndex.from_tuples(obj.values)
|
||
|
assert obj.equals(obj2)
|
||
|
|
||
|
res = obj.get_indexer(obj)
|
||
|
exp = np.arange(len(obj), dtype=np.intp)
|
||
|
assert_almost_equal(res, exp)
|
||
|
|
||
|
res = obj.get_indexer(obj2[::-1])
|
||
|
exp = obj.get_indexer(obj[::-1])
|
||
|
exp2 = obj2.get_indexer(obj2[::-1])
|
||
|
assert_almost_equal(res, exp)
|
||
|
assert_almost_equal(exp, exp2)
|
||
|
|
||
|
def test_legacy_v2_unpickle(self, datapath):
|
||
|
|
||
|
# 0.7.3 -> 0.8.0 format manage
|
||
|
path = datapath('indexes', 'data', 'mindex_073.pickle')
|
||
|
obj = pd.read_pickle(path)
|
||
|
|
||
|
obj2 = MultiIndex.from_tuples(obj.values)
|
||
|
assert obj.equals(obj2)
|
||
|
|
||
|
res = obj.get_indexer(obj)
|
||
|
exp = np.arange(len(obj), dtype=np.intp)
|
||
|
assert_almost_equal(res, exp)
|
||
|
|
||
|
res = obj.get_indexer(obj2[::-1])
|
||
|
exp = obj.get_indexer(obj[::-1])
|
||
|
exp2 = obj2.get_indexer(obj2[::-1])
|
||
|
assert_almost_equal(res, exp)
|
||
|
assert_almost_equal(exp, exp2)
|
||
|
|
||
|
def test_roundtrip_pickle_with_tz(self):
|
||
|
|
||
|
# GH 8367
|
||
|
# round-trip of timezone
|
||
|
index = MultiIndex.from_product(
|
||
|
[[1, 2], ['a', 'b'], date_range('20130101', periods=3,
|
||
|
tz='US/Eastern')
|
||
|
], names=['one', 'two', 'three'])
|
||
|
unpickled = tm.round_trip_pickle(index)
|
||
|
assert index.equal_levels(unpickled)
|
||
|
|
||
|
def test_from_tuples_index_values(self):
|
||
|
result = MultiIndex.from_tuples(self.index)
|
||
|
assert (result.values == self.index.values).all()
|
||
|
|
||
|
def test_contains(self):
|
||
|
assert ('foo', 'two') in self.index
|
||
|
assert ('bar', 'two') not in self.index
|
||
|
assert None not in self.index
|
||
|
|
||
|
def test_contains_top_level(self):
|
||
|
midx = MultiIndex.from_product([['A', 'B'], [1, 2]])
|
||
|
assert 'A' in midx
|
||
|
assert 'A' not in midx._engine
|
||
|
|
||
|
def test_contains_with_nat(self):
|
||
|
# MI with a NaT
|
||
|
mi = MultiIndex(levels=[['C'],
|
||
|
pd.date_range('2012-01-01', periods=5)],
|
||
|
labels=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
|
||
|
names=[None, 'B'])
|
||
|
assert ('C', pd.Timestamp('2012-01-01')) in mi
|
||
|
for val in mi.values:
|
||
|
assert val in mi
|
||
|
|
||
|
def test_is_all_dates(self):
|
||
|
assert not self.index.is_all_dates
|
||
|
|
||
|
def test_is_numeric(self):
|
||
|
# MultiIndex is never numeric
|
||
|
assert not self.index.is_numeric()
|
||
|
|
||
|
def test_getitem(self):
|
||
|
# scalar
|
||
|
assert self.index[2] == ('bar', 'one')
|
||
|
|
||
|
# slice
|
||
|
result = self.index[2:5]
|
||
|
expected = self.index[[2, 3, 4]]
|
||
|
assert result.equals(expected)
|
||
|
|
||
|
# boolean
|
||
|
result = self.index[[True, False, True, False, True, True]]
|
||
|
result2 = self.index[np.array([True, False, True, False, True, True])]
|
||
|
expected = self.index[[0, 2, 4, 5]]
|
||
|
assert result.equals(expected)
|
||
|
assert result2.equals(expected)
|
||
|
|
||
|
def test_getitem_group_select(self):
|
||
|
sorted_idx, _ = self.index.sortlevel(0)
|
||
|
assert sorted_idx.get_loc('baz') == slice(3, 4)
|
||
|
assert sorted_idx.get_loc('foo') == slice(0, 2)
|
||
|
|
||
|
def test_get_loc(self):
|
||
|
assert self.index.get_loc(('foo', 'two')) == 1
|
||
|
assert self.index.get_loc(('baz', 'two')) == 3
|
||
|
pytest.raises(KeyError, self.index.get_loc, ('bar', 'two'))
|
||
|
pytest.raises(KeyError, self.index.get_loc, 'quux')
|
||
|
|
||
|
pytest.raises(NotImplementedError, self.index.get_loc, 'foo',
|
||
|
method='nearest')
|
||
|
|
||
|
# 3 levels
|
||
|
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
|
||
|
lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||
|
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
|
||
|
pytest.raises(KeyError, index.get_loc, (1, 1))
|
||
|
assert index.get_loc((2, 0)) == slice(3, 5)
|
||
|
|
||
|
def test_get_loc_duplicates(self):
|
||
|
index = Index([2, 2, 2, 2])
|
||
|
result = index.get_loc(2)
|
||
|
expected = slice(0, 4)
|
||
|
assert result == expected
|
||
|
# pytest.raises(Exception, index.get_loc, 2)
|
||
|
|
||
|
index = Index(['c', 'a', 'a', 'b', 'b'])
|
||
|
rs = index.get_loc('c')
|
||
|
xp = 0
|
||
|
assert rs == xp
|
||
|
|
||
|
def test_get_value_duplicates(self):
|
||
|
index = MultiIndex(levels=[['D', 'B', 'C'],
|
||
|
[0, 26, 27, 37, 57, 67, 75, 82]],
|
||
|
labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
|
||
|
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
|
||
|
names=['tag', 'day'])
|
||
|
|
||
|
assert index.get_loc('D') == slice(0, 3)
|
||
|
with pytest.raises(KeyError):
|
||
|
index._engine.get_value(np.array([]), 'D')
|
||
|
|
||
|
def test_get_loc_level(self):
|
||
|
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
|
||
|
lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||
|
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
|
||
|
|
||
|
loc, new_index = index.get_loc_level((0, 1))
|
||
|
expected = slice(1, 2)
|
||
|
exp_index = index[expected].droplevel(0).droplevel(0)
|
||
|
assert loc == expected
|
||
|
assert new_index.equals(exp_index)
|
||
|
|
||
|
loc, new_index = index.get_loc_level((0, 1, 0))
|
||
|
expected = 1
|
||
|
assert loc == expected
|
||
|
assert new_index is None
|
||
|
|
||
|
pytest.raises(KeyError, index.get_loc_level, (2, 2))
|
||
|
|
||
|
index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array(
|
||
|
[0, 0, 0, 0]), np.array([0, 1, 2, 3])])
|
||
|
result, new_index = index.get_loc_level((2000, slice(None, None)))
|
||
|
expected = slice(None, None)
|
||
|
assert result == expected
|
||
|
assert new_index.equals(index.droplevel(0))
|
||
|
|
||
|
@pytest.mark.parametrize('level', [0, 1])
|
||
|
@pytest.mark.parametrize('null_val', [np.nan, pd.NaT, None])
|
||
|
def test_get_loc_nan(self, level, null_val):
|
||
|
# GH 18485 : NaN in MultiIndex
|
||
|
levels = [['a', 'b'], ['c', 'd']]
|
||
|
key = ['b', 'd']
|
||
|
levels[level] = np.array([0, null_val], dtype=type(null_val))
|
||
|
key[level] = null_val
|
||
|
idx = MultiIndex.from_product(levels)
|
||
|
assert idx.get_loc(tuple(key)) == 3
|
||
|
|
||
|
def test_get_loc_missing_nan(self):
|
||
|
# GH 8569
|
||
|
idx = MultiIndex.from_arrays([[1.0, 2.0], [3.0, 4.0]])
|
||
|
assert isinstance(idx.get_loc(1), slice)
|
||
|
pytest.raises(KeyError, idx.get_loc, 3)
|
||
|
pytest.raises(KeyError, idx.get_loc, np.nan)
|
||
|
pytest.raises(KeyError, idx.get_loc, [np.nan])
|
||
|
|
||
|
@pytest.mark.parametrize('dtype1', [int, float, bool, str])
|
||
|
@pytest.mark.parametrize('dtype2', [int, float, bool, str])
|
||
|
def test_get_loc_multiple_dtypes(self, dtype1, dtype2):
|
||
|
# GH 18520
|
||
|
levels = [np.array([0, 1]).astype(dtype1),
|
||
|
np.array([0, 1]).astype(dtype2)]
|
||
|
idx = pd.MultiIndex.from_product(levels)
|
||
|
assert idx.get_loc(idx[2]) == 2
|
||
|
|
||
|
@pytest.mark.parametrize('level', [0, 1])
|
||
|
@pytest.mark.parametrize('dtypes', [[int, float], [float, int]])
|
||
|
def test_get_loc_implicit_cast(self, level, dtypes):
|
||
|
# GH 18818, GH 15994 : as flat index, cast int to float and vice-versa
|
||
|
levels = [['a', 'b'], ['c', 'd']]
|
||
|
key = ['b', 'd']
|
||
|
lev_dtype, key_dtype = dtypes
|
||
|
levels[level] = np.array([0, 1], dtype=lev_dtype)
|
||
|
key[level] = key_dtype(1)
|
||
|
idx = MultiIndex.from_product(levels)
|
||
|
assert idx.get_loc(tuple(key)) == 3
|
||
|
|
||
|
def test_get_loc_cast_bool(self):
|
||
|
# GH 19086 : int is casted to bool, but not vice-versa
|
||
|
levels = [[False, True], np.arange(2, dtype='int64')]
|
||
|
idx = MultiIndex.from_product(levels)
|
||
|
|
||
|
assert idx.get_loc((0, 1)) == 1
|
||
|
assert idx.get_loc((1, 0)) == 2
|
||
|
|
||
|
pytest.raises(KeyError, idx.get_loc, (False, True))
|
||
|
pytest.raises(KeyError, idx.get_loc, (True, False))
|
||
|
|
||
|
def test_slice_locs(self):
|
||
|
df = tm.makeTimeDataFrame()
|
||
|
stacked = df.stack()
|
||
|
idx = stacked.index
|
||
|
|
||
|
slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
|
||
|
sliced = stacked[slob]
|
||
|
expected = df[5:16].stack()
|
||
|
tm.assert_almost_equal(sliced.values, expected.values)
|
||
|
|
||
|
slob = slice(*idx.slice_locs(df.index[5] + timedelta(seconds=30),
|
||
|
df.index[15] - timedelta(seconds=30)))
|
||
|
sliced = stacked[slob]
|
||
|
expected = df[6:15].stack()
|
||
|
tm.assert_almost_equal(sliced.values, expected.values)
|
||
|
|
||
|
def test_slice_locs_with_type_mismatch(self):
|
||
|
df = tm.makeTimeDataFrame()
|
||
|
stacked = df.stack()
|
||
|
idx = stacked.index
|
||
|
tm.assert_raises_regex(TypeError, '^Level type mismatch',
|
||
|
idx.slice_locs, (1, 3))
|
||
|
tm.assert_raises_regex(TypeError, '^Level type mismatch',
|
||
|
idx.slice_locs,
|
||
|
df.index[5] + timedelta(
|
||
|
seconds=30), (5, 2))
|
||
|
df = tm.makeCustomDataframe(5, 5)
|
||
|
stacked = df.stack()
|
||
|
idx = stacked.index
|
||
|
with tm.assert_raises_regex(TypeError, '^Level type mismatch'):
|
||
|
idx.slice_locs(timedelta(seconds=30))
|
||
|
# TODO: Try creating a UnicodeDecodeError in exception message
|
||
|
with tm.assert_raises_regex(TypeError, '^Level type mismatch'):
|
||
|
idx.slice_locs(df.index[1], (16, "a"))
|
||
|
|
||
|
def test_slice_locs_not_sorted(self):
|
||
|
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
|
||
|
lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||
|
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
|
||
|
|
||
|
tm.assert_raises_regex(KeyError, "[Kk]ey length.*greater than "
|
||
|
"MultiIndex lexsort depth",
|
||
|
index.slice_locs, (1, 0, 1), (2, 1, 0))
|
||
|
|
||
|
# works
|
||
|
sorted_index, _ = index.sortlevel(0)
|
||
|
# should there be a test case here???
|
||
|
sorted_index.slice_locs((1, 0, 1), (2, 1, 0))
|
||
|
|
||
|
def test_slice_locs_partial(self):
|
||
|
sorted_idx, _ = self.index.sortlevel(0)
|
||
|
|
||
|
result = sorted_idx.slice_locs(('foo', 'two'), ('qux', 'one'))
|
||
|
assert result == (1, 5)
|
||
|
|
||
|
result = sorted_idx.slice_locs(None, ('qux', 'one'))
|
||
|
assert result == (0, 5)
|
||
|
|
||
|
result = sorted_idx.slice_locs(('foo', 'two'), None)
|
||
|
assert result == (1, len(sorted_idx))
|
||
|
|
||
|
result = sorted_idx.slice_locs('bar', 'baz')
|
||
|
assert result == (2, 4)
|
||
|
|
||
|
def test_slice_locs_not_contained(self):
|
||
|
# some searchsorted action
|
||
|
|
||
|
index = MultiIndex(levels=[[0, 2, 4, 6], [0, 2, 4]],
|
||
|
labels=[[0, 0, 0, 1, 1, 2, 3, 3, 3],
|
||
|
[0, 1, 2, 1, 2, 2, 0, 1, 2]], sortorder=0)
|
||
|
|
||
|
result = index.slice_locs((1, 0), (5, 2))
|
||
|
assert result == (3, 6)
|
||
|
|
||
|
result = index.slice_locs(1, 5)
|
||
|
assert result == (3, 6)
|
||
|
|
||
|
result = index.slice_locs((2, 2), (5, 2))
|
||
|
assert result == (3, 6)
|
||
|
|
||
|
result = index.slice_locs(2, 5)
|
||
|
assert result == (3, 6)
|
||
|
|
||
|
result = index.slice_locs((1, 0), (6, 3))
|
||
|
assert result == (3, 8)
|
||
|
|
||
|
result = index.slice_locs(-1, 10)
|
||
|
assert result == (0, len(index))
|
||
|
|
||
|
def test_consistency(self):
|
||
|
# need to construct an overflow
|
||
|
major_axis = lrange(70000)
|
||
|
minor_axis = lrange(10)
|
||
|
|
||
|
major_labels = np.arange(70000)
|
||
|
minor_labels = np.repeat(lrange(10), 7000)
|
||
|
|
||
|
# the fact that is works means it's consistent
|
||
|
index = MultiIndex(levels=[major_axis, minor_axis],
|
||
|
labels=[major_labels, minor_labels])
|
||
|
|
||
|
# inconsistent
|
||
|
major_labels = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
|
||
|
minor_labels = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
|
||
|
index = MultiIndex(levels=[major_axis, minor_axis],
|
||
|
labels=[major_labels, minor_labels])
|
||
|
|
||
|
assert not index.is_unique
|
||
|
|
||
|
def test_truncate(self):
|
||
|
major_axis = Index(lrange(4))
|
||
|
minor_axis = Index(lrange(2))
|
||
|
|
||
|
major_labels = np.array([0, 0, 1, 2, 3, 3])
|
||
|
minor_labels = np.array([0, 1, 0, 1, 0, 1])
|
||
|
|
||
|
index = MultiIndex(levels=[major_axis, minor_axis],
|
||
|
labels=[major_labels, minor_labels])
|
||
|
|
||
|
result = index.truncate(before=1)
|
||
|
assert 'foo' not in result.levels[0]
|
||
|
assert 1 in result.levels[0]
|
||
|
|
||
|
result = index.truncate(after=1)
|
||
|
assert 2 not in result.levels[0]
|
||
|
assert 1 in result.levels[0]
|
||
|
|
||
|
result = index.truncate(before=1, after=2)
|
||
|
assert len(result.levels[0]) == 2
|
||
|
|
||
|
# after < before
|
||
|
pytest.raises(ValueError, index.truncate, 3, 1)
|
||
|
|
||
|
def test_get_indexer(self):
|
||
|
major_axis = Index(lrange(4))
|
||
|
minor_axis = Index(lrange(2))
|
||
|
|
||
|
major_labels = np.array([0, 0, 1, 2, 2, 3, 3], dtype=np.intp)
|
||
|
minor_labels = np.array([0, 1, 0, 0, 1, 0, 1], dtype=np.intp)
|
||
|
|
||
|
index = MultiIndex(levels=[major_axis, minor_axis],
|
||
|
labels=[major_labels, minor_labels])
|
||
|
idx1 = index[:5]
|
||
|
idx2 = index[[1, 3, 5]]
|
||
|
|
||
|
r1 = idx1.get_indexer(idx2)
|
||
|
assert_almost_equal(r1, np.array([1, 3, -1], dtype=np.intp))
|
||
|
|
||
|
r1 = idx2.get_indexer(idx1, method='pad')
|
||
|
e1 = np.array([-1, 0, 0, 1, 1], dtype=np.intp)
|
||
|
assert_almost_equal(r1, e1)
|
||
|
|
||
|
r2 = idx2.get_indexer(idx1[::-1], method='pad')
|
||
|
assert_almost_equal(r2, e1[::-1])
|
||
|
|
||
|
rffill1 = idx2.get_indexer(idx1, method='ffill')
|
||
|
assert_almost_equal(r1, rffill1)
|
||
|
|
||
|
r1 = idx2.get_indexer(idx1, method='backfill')
|
||
|
e1 = np.array([0, 0, 1, 1, 2], dtype=np.intp)
|
||
|
assert_almost_equal(r1, e1)
|
||
|
|
||
|
r2 = idx2.get_indexer(idx1[::-1], method='backfill')
|
||
|
assert_almost_equal(r2, e1[::-1])
|
||
|
|
||
|
rbfill1 = idx2.get_indexer(idx1, method='bfill')
|
||
|
assert_almost_equal(r1, rbfill1)
|
||
|
|
||
|
# pass non-MultiIndex
|
||
|
r1 = idx1.get_indexer(idx2.values)
|
||
|
rexp1 = idx1.get_indexer(idx2)
|
||
|
assert_almost_equal(r1, rexp1)
|
||
|
|
||
|
r1 = idx1.get_indexer([1, 2, 3])
|
||
|
assert (r1 == [-1, -1, -1]).all()
|
||
|
|
||
|
# create index with duplicates
|
||
|
idx1 = Index(lrange(10) + lrange(10))
|
||
|
idx2 = Index(lrange(20))
|
||
|
|
||
|
msg = "Reindexing only valid with uniquely valued Index objects"
|
||
|
with tm.assert_raises_regex(InvalidIndexError, msg):
|
||
|
idx1.get_indexer(idx2)
|
||
|
|
||
|
def test_get_indexer_nearest(self):
|
||
|
midx = MultiIndex.from_tuples([('a', 1), ('b', 2)])
|
||
|
with pytest.raises(NotImplementedError):
|
||
|
midx.get_indexer(['a'], method='nearest')
|
||
|
with pytest.raises(NotImplementedError):
|
||
|
midx.get_indexer(['a'], method='pad', tolerance=2)
|
||
|
|
||
|
def test_get_indexer_categorical_time(self):
|
||
|
# https://github.com/pandas-dev/pandas/issues/21390
|
||
|
midx = MultiIndex.from_product(
|
||
|
[Categorical(['a', 'b', 'c']),
|
||
|
Categorical(date_range("2012-01-01", periods=3, freq='H'))])
|
||
|
result = midx.get_indexer(midx)
|
||
|
tm.assert_numpy_array_equal(result, np.arange(9, dtype=np.intp))
|
||
|
|
||
|
def test_hash_collisions(self):
|
||
|
# non-smoke test that we don't get hash collisions
|
||
|
|
||
|
index = MultiIndex.from_product([np.arange(1000), np.arange(1000)],
|
||
|
names=['one', 'two'])
|
||
|
result = index.get_indexer(index.values)
|
||
|
tm.assert_numpy_array_equal(result, np.arange(
|
||
|
len(index), dtype='intp'))
|
||
|
|
||
|
for i in [0, 1, len(index) - 2, len(index) - 1]:
|
||
|
result = index.get_loc(index[i])
|
||
|
assert result == i
|
||
|
|
||
|
def test_format(self):
|
||
|
self.index.format()
|
||
|
self.index[:0].format()
|
||
|
|
||
|
def test_format_integer_names(self):
|
||
|
index = MultiIndex(levels=[[0, 1], [0, 1]],
|
||
|
labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1])
|
||
|
index.format(names=True)
|
||
|
|
||
|
def test_format_sparse_display(self):
|
||
|
index = MultiIndex(levels=[[0, 1], [0, 1], [0, 1], [0]],
|
||
|
labels=[[0, 0, 0, 1, 1, 1], [0, 0, 1, 0, 0, 1],
|
||
|
[0, 1, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0]])
|
||
|
|
||
|
result = index.format()
|
||
|
assert result[3] == '1 0 0 0'
|
||
|
|
||
|
def test_format_sparse_config(self):
|
||
|
warn_filters = warnings.filters
|
||
|
warnings.filterwarnings('ignore', category=FutureWarning,
|
||
|
module=".*format")
|
||
|
# GH1538
|
||
|
pd.set_option('display.multi_sparse', False)
|
||
|
|
||
|
result = self.index.format()
|
||
|
assert result[1] == 'foo two'
|
||
|
|
||
|
tm.reset_display_options()
|
||
|
|
||
|
warnings.filters = warn_filters
|
||
|
|
||
|
def test_to_frame(self):
|
||
|
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
|
||
|
|
||
|
index = MultiIndex.from_tuples(tuples)
|
||
|
result = index.to_frame(index=False)
|
||
|
expected = DataFrame(tuples)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = index.to_frame()
|
||
|
expected.index = index
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
|
||
|
index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
|
||
|
result = index.to_frame(index=False)
|
||
|
expected = DataFrame(tuples)
|
||
|
expected.columns = ['first', 'second']
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = index.to_frame()
|
||
|
expected.index = index
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
index = MultiIndex.from_product([range(5),
|
||
|
pd.date_range('20130101', periods=3)])
|
||
|
result = index.to_frame(index=False)
|
||
|
expected = DataFrame(
|
||
|
{0: np.repeat(np.arange(5, dtype='int64'), 3),
|
||
|
1: np.tile(pd.date_range('20130101', periods=3), 5)})
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
index = MultiIndex.from_product([range(5),
|
||
|
pd.date_range('20130101', periods=3)])
|
||
|
result = index.to_frame()
|
||
|
expected.index = index
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_to_hierarchical(self):
|
||
|
index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
|
||
|
2, 'two')])
|
||
|
result = index.to_hierarchical(3)
|
||
|
expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
|
||
|
labels=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1],
|
||
|
[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
assert result.names == index.names
|
||
|
|
||
|
# K > 1
|
||
|
result = index.to_hierarchical(3, 2)
|
||
|
expected = MultiIndex(levels=[[1, 2], ['one', 'two']],
|
||
|
labels=[[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],
|
||
|
[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
assert result.names == index.names
|
||
|
|
||
|
# non-sorted
|
||
|
index = MultiIndex.from_tuples([(2, 'c'), (1, 'b'),
|
||
|
(2, 'a'), (2, 'b')],
|
||
|
names=['N1', 'N2'])
|
||
|
|
||
|
result = index.to_hierarchical(2)
|
||
|
expected = MultiIndex.from_tuples([(2, 'c'), (2, 'c'), (1, 'b'),
|
||
|
(1, 'b'),
|
||
|
(2, 'a'), (2, 'a'),
|
||
|
(2, 'b'), (2, 'b')],
|
||
|
names=['N1', 'N2'])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
assert result.names == index.names
|
||
|
|
||
|
def test_bounds(self):
|
||
|
self.index._bounds
|
||
|
|
||
|
def test_equals_multi(self):
|
||
|
assert self.index.equals(self.index)
|
||
|
assert not self.index.equals(self.index.values)
|
||
|
assert self.index.equals(Index(self.index.values))
|
||
|
|
||
|
assert self.index.equal_levels(self.index)
|
||
|
assert not self.index.equals(self.index[:-1])
|
||
|
assert not self.index.equals(self.index[-1])
|
||
|
|
||
|
# different number of levels
|
||
|
index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
|
||
|
lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||
|
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
|
||
|
|
||
|
index2 = MultiIndex(levels=index.levels[:-1], labels=index.labels[:-1])
|
||
|
assert not index.equals(index2)
|
||
|
assert not index.equal_levels(index2)
|
||
|
|
||
|
# levels are different
|
||
|
major_axis = Index(lrange(4))
|
||
|
minor_axis = Index(lrange(2))
|
||
|
|
||
|
major_labels = np.array([0, 0, 1, 2, 2, 3])
|
||
|
minor_labels = np.array([0, 1, 0, 0, 1, 0])
|
||
|
|
||
|
index = MultiIndex(levels=[major_axis, minor_axis],
|
||
|
labels=[major_labels, minor_labels])
|
||
|
assert not self.index.equals(index)
|
||
|
assert not self.index.equal_levels(index)
|
||
|
|
||
|
# some of the labels are different
|
||
|
major_axis = Index(['foo', 'bar', 'baz', 'qux'])
|
||
|
minor_axis = Index(['one', 'two'])
|
||
|
|
||
|
major_labels = np.array([0, 0, 2, 2, 3, 3])
|
||
|
minor_labels = np.array([0, 1, 0, 1, 0, 1])
|
||
|
|
||
|
index = MultiIndex(levels=[major_axis, minor_axis],
|
||
|
labels=[major_labels, minor_labels])
|
||
|
assert not self.index.equals(index)
|
||
|
|
||
|
def test_equals_missing_values(self):
|
||
|
# make sure take is not using -1
|
||
|
i = pd.MultiIndex.from_tuples([(0, pd.NaT),
|
||
|
(0, pd.Timestamp('20130101'))])
|
||
|
result = i[0:1].equals(i[0])
|
||
|
assert not result
|
||
|
result = i[1:2].equals(i[1])
|
||
|
assert not result
|
||
|
|
||
|
def test_identical(self):
|
||
|
mi = self.index.copy()
|
||
|
mi2 = self.index.copy()
|
||
|
assert mi.identical(mi2)
|
||
|
|
||
|
mi = mi.set_names(['new1', 'new2'])
|
||
|
assert mi.equals(mi2)
|
||
|
assert not mi.identical(mi2)
|
||
|
|
||
|
mi2 = mi2.set_names(['new1', 'new2'])
|
||
|
assert mi.identical(mi2)
|
||
|
|
||
|
mi3 = Index(mi.tolist(), names=mi.names)
|
||
|
mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False)
|
||
|
assert mi.identical(mi3)
|
||
|
assert not mi.identical(mi4)
|
||
|
assert mi.equals(mi4)
|
||
|
|
||
|
def test_is_(self):
|
||
|
mi = MultiIndex.from_tuples(lzip(range(10), range(10)))
|
||
|
assert mi.is_(mi)
|
||
|
assert mi.is_(mi.view())
|
||
|
assert mi.is_(mi.view().view().view().view())
|
||
|
mi2 = mi.view()
|
||
|
# names are metadata, they don't change id
|
||
|
mi2.names = ["A", "B"]
|
||
|
assert mi2.is_(mi)
|
||
|
assert mi.is_(mi2)
|
||
|
|
||
|
assert mi.is_(mi.set_names(["C", "D"]))
|
||
|
mi2 = mi.view()
|
||
|
mi2.set_names(["E", "F"], inplace=True)
|
||
|
assert mi.is_(mi2)
|
||
|
# levels are inherent properties, they change identity
|
||
|
mi3 = mi2.set_levels([lrange(10), lrange(10)])
|
||
|
assert not mi3.is_(mi2)
|
||
|
# shouldn't change
|
||
|
assert mi2.is_(mi)
|
||
|
mi4 = mi3.view()
|
||
|
|
||
|
# GH 17464 - Remove duplicate MultiIndex levels
|
||
|
mi4.set_levels([lrange(10), lrange(10)], inplace=True)
|
||
|
assert not mi4.is_(mi3)
|
||
|
mi5 = mi.view()
|
||
|
mi5.set_levels(mi5.levels, inplace=True)
|
||
|
assert not mi5.is_(mi)
|
||
|
|
||
|
def test_union(self):
|
||
|
piece1 = self.index[:5][::-1]
|
||
|
piece2 = self.index[3:]
|
||
|
|
||
|
the_union = piece1 | piece2
|
||
|
|
||
|
tups = sorted(self.index.values)
|
||
|
expected = MultiIndex.from_tuples(tups)
|
||
|
|
||
|
assert the_union.equals(expected)
|
||
|
|
||
|
# corner case, pass self or empty thing:
|
||
|
the_union = self.index.union(self.index)
|
||
|
assert the_union is self.index
|
||
|
|
||
|
the_union = self.index.union(self.index[:0])
|
||
|
assert the_union is self.index
|
||
|
|
||
|
# won't work in python 3
|
||
|
# tuples = self.index.values
|
||
|
# result = self.index[:4] | tuples[4:]
|
||
|
# assert result.equals(tuples)
|
||
|
|
||
|
# not valid for python 3
|
||
|
# def test_union_with_regular_index(self):
|
||
|
# other = Index(['A', 'B', 'C'])
|
||
|
|
||
|
# result = other.union(self.index)
|
||
|
# assert ('foo', 'one') in result
|
||
|
# assert 'B' in result
|
||
|
|
||
|
# result2 = self.index.union(other)
|
||
|
# assert result.equals(result2)
|
||
|
|
||
|
def test_intersection(self):
|
||
|
piece1 = self.index[:5][::-1]
|
||
|
piece2 = self.index[3:]
|
||
|
|
||
|
the_int = piece1 & piece2
|
||
|
tups = sorted(self.index[3:5].values)
|
||
|
expected = MultiIndex.from_tuples(tups)
|
||
|
assert the_int.equals(expected)
|
||
|
|
||
|
# corner case, pass self
|
||
|
the_int = self.index.intersection(self.index)
|
||
|
assert the_int is self.index
|
||
|
|
||
|
# empty intersection: disjoint
|
||
|
empty = self.index[:2] & self.index[2:]
|
||
|
expected = self.index[:0]
|
||
|
assert empty.equals(expected)
|
||
|
|
||
|
# can't do in python 3
|
||
|
# tuples = self.index.values
|
||
|
# result = self.index & tuples
|
||
|
# assert result.equals(tuples)
|
||
|
|
||
|
def test_sub(self):
|
||
|
|
||
|
first = self.index
|
||
|
|
||
|
# - now raises (previously was set op difference)
|
||
|
with pytest.raises(TypeError):
|
||
|
first - self.index[-3:]
|
||
|
with pytest.raises(TypeError):
|
||
|
self.index[-3:] - first
|
||
|
with pytest.raises(TypeError):
|
||
|
self.index[-3:] - first.tolist()
|
||
|
with pytest.raises(TypeError):
|
||
|
first.tolist() - self.index[-3:]
|
||
|
|
||
|
def test_difference(self):
|
||
|
|
||
|
first = self.index
|
||
|
result = first.difference(self.index[-3:])
|
||
|
expected = MultiIndex.from_tuples(sorted(self.index[:-3].values),
|
||
|
sortorder=0,
|
||
|
names=self.index.names)
|
||
|
|
||
|
assert isinstance(result, MultiIndex)
|
||
|
assert result.equals(expected)
|
||
|
assert result.names == self.index.names
|
||
|
|
||
|
# empty difference: reflexive
|
||
|
result = self.index.difference(self.index)
|
||
|
expected = self.index[:0]
|
||
|
assert result.equals(expected)
|
||
|
assert result.names == self.index.names
|
||
|
|
||
|
# empty difference: superset
|
||
|
result = self.index[-3:].difference(self.index)
|
||
|
expected = self.index[:0]
|
||
|
assert result.equals(expected)
|
||
|
assert result.names == self.index.names
|
||
|
|
||
|
# empty difference: degenerate
|
||
|
result = self.index[:0].difference(self.index)
|
||
|
expected = self.index[:0]
|
||
|
assert result.equals(expected)
|
||
|
assert result.names == self.index.names
|
||
|
|
||
|
# names not the same
|
||
|
chunklet = self.index[-3:]
|
||
|
chunklet.names = ['foo', 'baz']
|
||
|
result = first.difference(chunklet)
|
||
|
assert result.names == (None, None)
|
||
|
|
||
|
# empty, but non-equal
|
||
|
result = self.index.difference(self.index.sortlevel(1)[0])
|
||
|
assert len(result) == 0
|
||
|
|
||
|
# raise Exception called with non-MultiIndex
|
||
|
result = first.difference(first.values)
|
||
|
assert result.equals(first[:0])
|
||
|
|
||
|
# name from empty array
|
||
|
result = first.difference([])
|
||
|
assert first.equals(result)
|
||
|
assert first.names == result.names
|
||
|
|
||
|
# name from non-empty array
|
||
|
result = first.difference([('foo', 'one')])
|
||
|
expected = pd.MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), (
|
||
|
'foo', 'two'), ('qux', 'one'), ('qux', 'two')])
|
||
|
expected.names = first.names
|
||
|
assert first.names == result.names
|
||
|
tm.assert_raises_regex(TypeError, "other must be a MultiIndex "
|
||
|
"or a list of tuples",
|
||
|
first.difference, [1, 2, 3, 4, 5])
|
||
|
|
||
|
def test_from_tuples(self):
|
||
|
tm.assert_raises_regex(TypeError, 'Cannot infer number of levels '
|
||
|
'from empty list',
|
||
|
MultiIndex.from_tuples, [])
|
||
|
|
||
|
expected = MultiIndex(levels=[[1, 3], [2, 4]],
|
||
|
labels=[[0, 1], [0, 1]],
|
||
|
names=['a', 'b'])
|
||
|
|
||
|
# input tuples
|
||
|
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=['a', 'b'])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_from_tuples_iterator(self):
|
||
|
# GH 18434
|
||
|
# input iterator for tuples
|
||
|
expected = MultiIndex(levels=[[1, 3], [2, 4]],
|
||
|
labels=[[0, 1], [0, 1]],
|
||
|
names=['a', 'b'])
|
||
|
|
||
|
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=['a', 'b'])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
# input non-iterables
|
||
|
with tm.assert_raises_regex(
|
||
|
TypeError, 'Input must be a list / sequence of tuple-likes.'):
|
||
|
MultiIndex.from_tuples(0)
|
||
|
|
||
|
def test_from_tuples_empty(self):
|
||
|
# GH 16777
|
||
|
result = MultiIndex.from_tuples([], names=['a', 'b'])
|
||
|
expected = MultiIndex.from_arrays(arrays=[[], []],
|
||
|
names=['a', 'b'])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_argsort(self):
|
||
|
result = self.index.argsort()
|
||
|
expected = self.index.values.argsort()
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
def test_sortlevel(self):
|
||
|
import random
|
||
|
|
||
|
tuples = list(self.index)
|
||
|
random.shuffle(tuples)
|
||
|
|
||
|
index = MultiIndex.from_tuples(tuples)
|
||
|
|
||
|
sorted_idx, _ = index.sortlevel(0)
|
||
|
expected = MultiIndex.from_tuples(sorted(tuples))
|
||
|
assert sorted_idx.equals(expected)
|
||
|
|
||
|
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||
|
assert sorted_idx.equals(expected[::-1])
|
||
|
|
||
|
sorted_idx, _ = index.sortlevel(1)
|
||
|
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||
|
expected = MultiIndex.from_tuples(by1)
|
||
|
assert sorted_idx.equals(expected)
|
||
|
|
||
|
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||
|
assert sorted_idx.equals(expected[::-1])
|
||
|
|
||
|
def test_sortlevel_not_sort_remaining(self):
|
||
|
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC'))
|
||
|
sorted_idx, _ = mi.sortlevel('A', sort_remaining=False)
|
||
|
assert sorted_idx.equals(mi)
|
||
|
|
||
|
def test_sortlevel_deterministic(self):
|
||
|
tuples = [('bar', 'one'), ('foo', 'two'), ('qux', 'two'),
|
||
|
('foo', 'one'), ('baz', 'two'), ('qux', 'one')]
|
||
|
|
||
|
index = MultiIndex.from_tuples(tuples)
|
||
|
|
||
|
sorted_idx, _ = index.sortlevel(0)
|
||
|
expected = MultiIndex.from_tuples(sorted(tuples))
|
||
|
assert sorted_idx.equals(expected)
|
||
|
|
||
|
sorted_idx, _ = index.sortlevel(0, ascending=False)
|
||
|
assert sorted_idx.equals(expected[::-1])
|
||
|
|
||
|
sorted_idx, _ = index.sortlevel(1)
|
||
|
by1 = sorted(tuples, key=lambda x: (x[1], x[0]))
|
||
|
expected = MultiIndex.from_tuples(by1)
|
||
|
assert sorted_idx.equals(expected)
|
||
|
|
||
|
sorted_idx, _ = index.sortlevel(1, ascending=False)
|
||
|
assert sorted_idx.equals(expected[::-1])
|
||
|
|
||
|
def test_dims(self):
|
||
|
pass
|
||
|
|
||
|
def test_drop(self):
|
||
|
dropped = self.index.drop([('foo', 'two'), ('qux', 'one')])
|
||
|
|
||
|
index = MultiIndex.from_tuples([('foo', 'two'), ('qux', 'one')])
|
||
|
dropped2 = self.index.drop(index)
|
||
|
|
||
|
expected = self.index[[0, 2, 3, 5]]
|
||
|
tm.assert_index_equal(dropped, expected)
|
||
|
tm.assert_index_equal(dropped2, expected)
|
||
|
|
||
|
dropped = self.index.drop(['bar'])
|
||
|
expected = self.index[[0, 1, 3, 4, 5]]
|
||
|
tm.assert_index_equal(dropped, expected)
|
||
|
|
||
|
dropped = self.index.drop('foo')
|
||
|
expected = self.index[[2, 3, 4, 5]]
|
||
|
tm.assert_index_equal(dropped, expected)
|
||
|
|
||
|
index = MultiIndex.from_tuples([('bar', 'two')])
|
||
|
pytest.raises(KeyError, self.index.drop, [('bar', 'two')])
|
||
|
pytest.raises(KeyError, self.index.drop, index)
|
||
|
pytest.raises(KeyError, self.index.drop, ['foo', 'two'])
|
||
|
|
||
|
# partially correct argument
|
||
|
mixed_index = MultiIndex.from_tuples([('qux', 'one'), ('bar', 'two')])
|
||
|
pytest.raises(KeyError, self.index.drop, mixed_index)
|
||
|
|
||
|
# error='ignore'
|
||
|
dropped = self.index.drop(index, errors='ignore')
|
||
|
expected = self.index[[0, 1, 2, 3, 4, 5]]
|
||
|
tm.assert_index_equal(dropped, expected)
|
||
|
|
||
|
dropped = self.index.drop(mixed_index, errors='ignore')
|
||
|
expected = self.index[[0, 1, 2, 3, 5]]
|
||
|
tm.assert_index_equal(dropped, expected)
|
||
|
|
||
|
dropped = self.index.drop(['foo', 'two'], errors='ignore')
|
||
|
expected = self.index[[2, 3, 4, 5]]
|
||
|
tm.assert_index_equal(dropped, expected)
|
||
|
|
||
|
# mixed partial / full drop
|
||
|
dropped = self.index.drop(['foo', ('qux', 'one')])
|
||
|
expected = self.index[[2, 3, 5]]
|
||
|
tm.assert_index_equal(dropped, expected)
|
||
|
|
||
|
# mixed partial / full drop / error='ignore'
|
||
|
mixed_index = ['foo', ('qux', 'one'), 'two']
|
||
|
pytest.raises(KeyError, self.index.drop, mixed_index)
|
||
|
dropped = self.index.drop(mixed_index, errors='ignore')
|
||
|
expected = self.index[[2, 3, 5]]
|
||
|
tm.assert_index_equal(dropped, expected)
|
||
|
|
||
|
def test_droplevel_with_names(self):
|
||
|
index = self.index[self.index.get_loc('foo')]
|
||
|
dropped = index.droplevel(0)
|
||
|
assert dropped.name == 'second'
|
||
|
|
||
|
index = MultiIndex(
|
||
|
levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))],
|
||
|
labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||
|
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])],
|
||
|
names=['one', 'two', 'three'])
|
||
|
dropped = index.droplevel(0)
|
||
|
assert dropped.names == ('two', 'three')
|
||
|
|
||
|
dropped = index.droplevel('two')
|
||
|
expected = index.droplevel(1)
|
||
|
assert dropped.equals(expected)
|
||
|
|
||
|
def test_droplevel_multiple(self):
|
||
|
index = MultiIndex(
|
||
|
levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))],
|
||
|
labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
|
||
|
[0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])],
|
||
|
names=['one', 'two', 'three'])
|
||
|
|
||
|
dropped = index[:2].droplevel(['three', 'one'])
|
||
|
expected = index[:2].droplevel(2).droplevel(0)
|
||
|
assert dropped.equals(expected)
|
||
|
|
||
|
def test_drop_not_lexsorted(self):
|
||
|
# GH 12078
|
||
|
|
||
|
# define the lexsorted version of the multi-index
|
||
|
tuples = [('a', ''), ('b1', 'c1'), ('b2', 'c2')]
|
||
|
lexsorted_mi = MultiIndex.from_tuples(tuples, names=['b', 'c'])
|
||
|
assert lexsorted_mi.is_lexsorted()
|
||
|
|
||
|
# and the not-lexsorted version
|
||
|
df = pd.DataFrame(columns=['a', 'b', 'c', 'd'],
|
||
|
data=[[1, 'b1', 'c1', 3], [1, 'b2', 'c2', 4]])
|
||
|
df = df.pivot_table(index='a', columns=['b', 'c'], values='d')
|
||
|
df = df.reset_index()
|
||
|
not_lexsorted_mi = df.columns
|
||
|
assert not not_lexsorted_mi.is_lexsorted()
|
||
|
|
||
|
# compare the results
|
||
|
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
|
||
|
with tm.assert_produces_warning(PerformanceWarning):
|
||
|
tm.assert_index_equal(lexsorted_mi.drop('a'),
|
||
|
not_lexsorted_mi.drop('a'))
|
||
|
|
||
|
def test_insert(self):
|
||
|
# key contained in all levels
|
||
|
new_index = self.index.insert(0, ('bar', 'two'))
|
||
|
assert new_index.equal_levels(self.index)
|
||
|
assert new_index[0] == ('bar', 'two')
|
||
|
|
||
|
# key not contained in all levels
|
||
|
new_index = self.index.insert(0, ('abc', 'three'))
|
||
|
|
||
|
exp0 = Index(list(self.index.levels[0]) + ['abc'], name='first')
|
||
|
tm.assert_index_equal(new_index.levels[0], exp0)
|
||
|
|
||
|
exp1 = Index(list(self.index.levels[1]) + ['three'], name='second')
|
||
|
tm.assert_index_equal(new_index.levels[1], exp1)
|
||
|
assert new_index[0] == ('abc', 'three')
|
||
|
|
||
|
# key wrong length
|
||
|
msg = "Item must have length equal to number of levels"
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
self.index.insert(0, ('foo2',))
|
||
|
|
||
|
left = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1]],
|
||
|
columns=['1st', '2nd', '3rd'])
|
||
|
left.set_index(['1st', '2nd'], inplace=True)
|
||
|
ts = left['3rd'].copy(deep=True)
|
||
|
|
||
|
left.loc[('b', 'x'), '3rd'] = 2
|
||
|
left.loc[('b', 'a'), '3rd'] = -1
|
||
|
left.loc[('b', 'b'), '3rd'] = 3
|
||
|
left.loc[('a', 'x'), '3rd'] = 4
|
||
|
left.loc[('a', 'w'), '3rd'] = 5
|
||
|
left.loc[('a', 'a'), '3rd'] = 6
|
||
|
|
||
|
ts.loc[('b', 'x')] = 2
|
||
|
ts.loc['b', 'a'] = -1
|
||
|
ts.loc[('b', 'b')] = 3
|
||
|
ts.loc['a', 'x'] = 4
|
||
|
ts.loc[('a', 'w')] = 5
|
||
|
ts.loc['a', 'a'] = 6
|
||
|
|
||
|
right = pd.DataFrame([['a', 'b', 0], ['b', 'd', 1], ['b', 'x', 2],
|
||
|
['b', 'a', -1], ['b', 'b', 3], ['a', 'x', 4],
|
||
|
['a', 'w', 5], ['a', 'a', 6]],
|
||
|
columns=['1st', '2nd', '3rd'])
|
||
|
right.set_index(['1st', '2nd'], inplace=True)
|
||
|
# FIXME data types changes to float because
|
||
|
# of intermediate nan insertion;
|
||
|
tm.assert_frame_equal(left, right, check_dtype=False)
|
||
|
tm.assert_series_equal(ts, right['3rd'])
|
||
|
|
||
|
# GH9250
|
||
|
idx = [('test1', i) for i in range(5)] + \
|
||
|
[('test2', i) for i in range(6)] + \
|
||
|
[('test', 17), ('test', 18)]
|
||
|
|
||
|
left = pd.Series(np.linspace(0, 10, 11),
|
||
|
pd.MultiIndex.from_tuples(idx[:-2]))
|
||
|
|
||
|
left.loc[('test', 17)] = 11
|
||
|
left.loc[('test', 18)] = 12
|
||
|
|
||
|
right = pd.Series(np.linspace(0, 12, 13),
|
||
|
pd.MultiIndex.from_tuples(idx))
|
||
|
|
||
|
tm.assert_series_equal(left, right)
|
||
|
|
||
|
def test_take_preserve_name(self):
|
||
|
taken = self.index.take([3, 0, 1])
|
||
|
assert taken.names == self.index.names
|
||
|
|
||
|
def test_take_fill_value(self):
|
||
|
# GH 12631
|
||
|
vals = [['A', 'B'],
|
||
|
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
|
||
|
idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
|
||
|
|
||
|
result = idx.take(np.array([1, 0, -1]))
|
||
|
exp_vals = [('A', pd.Timestamp('2011-01-02')),
|
||
|
('A', pd.Timestamp('2011-01-01')),
|
||
|
('B', pd.Timestamp('2011-01-02'))]
|
||
|
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
# fill_value
|
||
|
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||
|
exp_vals = [('A', pd.Timestamp('2011-01-02')),
|
||
|
('A', pd.Timestamp('2011-01-01')),
|
||
|
(np.nan, pd.NaT)]
|
||
|
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
# allow_fill=False
|
||
|
result = idx.take(np.array([1, 0, -1]), allow_fill=False,
|
||
|
fill_value=True)
|
||
|
exp_vals = [('A', pd.Timestamp('2011-01-02')),
|
||
|
('A', pd.Timestamp('2011-01-01')),
|
||
|
('B', pd.Timestamp('2011-01-02'))]
|
||
|
expected = pd.MultiIndex.from_tuples(exp_vals, names=['str', 'dt'])
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
msg = ('When allow_fill=True and fill_value is not None, '
|
||
|
'all indices must be >= -1')
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||
|
|
||
|
with pytest.raises(IndexError):
|
||
|
idx.take(np.array([1, -5]))
|
||
|
|
||
|
def take_invalid_kwargs(self):
|
||
|
vals = [['A', 'B'],
|
||
|
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]]
|
||
|
idx = pd.MultiIndex.from_product(vals, names=['str', 'dt'])
|
||
|
indices = [1, 2]
|
||
|
|
||
|
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||
|
tm.assert_raises_regex(TypeError, msg, idx.take,
|
||
|
indices, foo=2)
|
||
|
|
||
|
msg = "the 'out' parameter is not supported"
|
||
|
tm.assert_raises_regex(ValueError, msg, idx.take,
|
||
|
indices, out=indices)
|
||
|
|
||
|
msg = "the 'mode' parameter is not supported"
|
||
|
tm.assert_raises_regex(ValueError, msg, idx.take,
|
||
|
indices, mode='clip')
|
||
|
|
||
|
@pytest.mark.parametrize('other',
|
||
|
[Index(['three', 'one', 'two']),
|
||
|
Index(['one']),
|
||
|
Index(['one', 'three'])])
|
||
|
def test_join_level(self, other, join_type):
|
||
|
join_index, lidx, ridx = other.join(self.index, how=join_type,
|
||
|
level='second',
|
||
|
return_indexers=True)
|
||
|
|
||
|
exp_level = other.join(self.index.levels[1], how=join_type)
|
||
|
assert join_index.levels[0].equals(self.index.levels[0])
|
||
|
assert join_index.levels[1].equals(exp_level)
|
||
|
|
||
|
# pare down levels
|
||
|
mask = np.array(
|
||
|
[x[1] in exp_level for x in self.index], dtype=bool)
|
||
|
exp_values = self.index.values[mask]
|
||
|
tm.assert_numpy_array_equal(join_index.values, exp_values)
|
||
|
|
||
|
if join_type in ('outer', 'inner'):
|
||
|
join_index2, ridx2, lidx2 = \
|
||
|
self.index.join(other, how=join_type, level='second',
|
||
|
return_indexers=True)
|
||
|
|
||
|
assert join_index.equals(join_index2)
|
||
|
tm.assert_numpy_array_equal(lidx, lidx2)
|
||
|
tm.assert_numpy_array_equal(ridx, ridx2)
|
||
|
tm.assert_numpy_array_equal(join_index2.values, exp_values)
|
||
|
|
||
|
def test_join_level_corner_case(self):
|
||
|
# some corner cases
|
||
|
idx = Index(['three', 'one', 'two'])
|
||
|
result = idx.join(self.index, level='second')
|
||
|
assert isinstance(result, MultiIndex)
|
||
|
|
||
|
tm.assert_raises_regex(TypeError, "Join.*MultiIndex.*ambiguous",
|
||
|
self.index.join, self.index, level=1)
|
||
|
|
||
|
def test_join_self(self, join_type):
|
||
|
res = self.index
|
||
|
joined = res.join(res, how=join_type)
|
||
|
assert res is joined
|
||
|
|
||
|
def test_join_multi(self):
|
||
|
# GH 10665
|
||
|
midx = pd.MultiIndex.from_product(
|
||
|
[np.arange(4), np.arange(4)], names=['a', 'b'])
|
||
|
idx = pd.Index([1, 2, 5], name='b')
|
||
|
|
||
|
# inner
|
||
|
jidx, lidx, ridx = midx.join(idx, how='inner', return_indexers=True)
|
||
|
exp_idx = pd.MultiIndex.from_product(
|
||
|
[np.arange(4), [1, 2]], names=['a', 'b'])
|
||
|
exp_lidx = np.array([1, 2, 5, 6, 9, 10, 13, 14], dtype=np.intp)
|
||
|
exp_ridx = np.array([0, 1, 0, 1, 0, 1, 0, 1], dtype=np.intp)
|
||
|
tm.assert_index_equal(jidx, exp_idx)
|
||
|
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||
|
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||
|
# flip
|
||
|
jidx, ridx, lidx = idx.join(midx, how='inner', return_indexers=True)
|
||
|
tm.assert_index_equal(jidx, exp_idx)
|
||
|
tm.assert_numpy_array_equal(lidx, exp_lidx)
|
||
|
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||
|
|
||
|
# keep MultiIndex
|
||
|
jidx, lidx, ridx = midx.join(idx, how='left', return_indexers=True)
|
||
|
exp_ridx = np.array([-1, 0, 1, -1, -1, 0, 1, -1, -1, 0, 1, -1, -1, 0,
|
||
|
1, -1], dtype=np.intp)
|
||
|
tm.assert_index_equal(jidx, midx)
|
||
|
assert lidx is None
|
||
|
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||
|
# flip
|
||
|
jidx, ridx, lidx = idx.join(midx, how='right', return_indexers=True)
|
||
|
tm.assert_index_equal(jidx, midx)
|
||
|
assert lidx is None
|
||
|
tm.assert_numpy_array_equal(ridx, exp_ridx)
|
||
|
|
||
|
def test_reindex(self):
|
||
|
result, indexer = self.index.reindex(list(self.index[:4]))
|
||
|
assert isinstance(result, MultiIndex)
|
||
|
self.check_level_names(result, self.index[:4].names)
|
||
|
|
||
|
result, indexer = self.index.reindex(list(self.index))
|
||
|
assert isinstance(result, MultiIndex)
|
||
|
assert indexer is None
|
||
|
self.check_level_names(result, self.index.names)
|
||
|
|
||
|
def test_reindex_level(self):
|
||
|
idx = Index(['one'])
|
||
|
|
||
|
target, indexer = self.index.reindex(idx, level='second')
|
||
|
target2, indexer2 = idx.reindex(self.index, level='second')
|
||
|
|
||
|
exp_index = self.index.join(idx, level='second', how='right')
|
||
|
exp_index2 = self.index.join(idx, level='second', how='left')
|
||
|
|
||
|
assert target.equals(exp_index)
|
||
|
exp_indexer = np.array([0, 2, 4])
|
||
|
tm.assert_numpy_array_equal(indexer, exp_indexer, check_dtype=False)
|
||
|
|
||
|
assert target2.equals(exp_index2)
|
||
|
exp_indexer2 = np.array([0, -1, 0, -1, 0, -1])
|
||
|
tm.assert_numpy_array_equal(indexer2, exp_indexer2, check_dtype=False)
|
||
|
|
||
|
tm.assert_raises_regex(TypeError, "Fill method not supported",
|
||
|
self.index.reindex, self.index,
|
||
|
method='pad', level='second')
|
||
|
|
||
|
tm.assert_raises_regex(TypeError, "Fill method not supported",
|
||
|
idx.reindex, idx, method='bfill',
|
||
|
level='first')
|
||
|
|
||
|
def test_duplicates(self):
|
||
|
assert not self.index.has_duplicates
|
||
|
assert self.index.append(self.index).has_duplicates
|
||
|
|
||
|
index = MultiIndex(levels=[[0, 1], [0, 1, 2]], labels=[
|
||
|
[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]])
|
||
|
assert index.has_duplicates
|
||
|
|
||
|
# GH 9075
|
||
|
t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169),
|
||
|
(u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119),
|
||
|
(u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135),
|
||
|
(u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145),
|
||
|
(u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158),
|
||
|
(u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122),
|
||
|
(u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160),
|
||
|
(u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180),
|
||
|
(u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143),
|
||
|
(u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128),
|
||
|
(u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129),
|
||
|
(u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111),
|
||
|
(u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114),
|
||
|
(u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121),
|
||
|
(u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126),
|
||
|
(u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155),
|
||
|
(u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123),
|
||
|
(u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)]
|
||
|
|
||
|
index = pd.MultiIndex.from_tuples(t)
|
||
|
assert not index.has_duplicates
|
||
|
|
||
|
# handle int64 overflow if possible
|
||
|
def check(nlevels, with_nulls):
|
||
|
labels = np.tile(np.arange(500), 2)
|
||
|
level = np.arange(500)
|
||
|
|
||
|
if with_nulls: # inject some null values
|
||
|
labels[500] = -1 # common nan value
|
||
|
labels = [labels.copy() for i in range(nlevels)]
|
||
|
for i in range(nlevels):
|
||
|
labels[i][500 + i - nlevels // 2] = -1
|
||
|
|
||
|
labels += [np.array([-1, 1]).repeat(500)]
|
||
|
else:
|
||
|
labels = [labels] * nlevels + [np.arange(2).repeat(500)]
|
||
|
|
||
|
levels = [level] * nlevels + [[0, 1]]
|
||
|
|
||
|
# no dups
|
||
|
index = MultiIndex(levels=levels, labels=labels)
|
||
|
assert not index.has_duplicates
|
||
|
|
||
|
# with a dup
|
||
|
if with_nulls:
|
||
|
f = lambda a: np.insert(a, 1000, a[0])
|
||
|
labels = list(map(f, labels))
|
||
|
index = MultiIndex(levels=levels, labels=labels)
|
||
|
else:
|
||
|
values = index.values.tolist()
|
||
|
index = MultiIndex.from_tuples(values + [values[0]])
|
||
|
|
||
|
assert index.has_duplicates
|
||
|
|
||
|
# no overflow
|
||
|
check(4, False)
|
||
|
check(4, True)
|
||
|
|
||
|
# overflow possible
|
||
|
check(8, False)
|
||
|
check(8, True)
|
||
|
|
||
|
# GH 9125
|
||
|
n, k = 200, 5000
|
||
|
levels = [np.arange(n), tm.makeStringIndex(n), 1000 + np.arange(n)]
|
||
|
labels = [np.random.choice(n, k * n) for lev in levels]
|
||
|
mi = MultiIndex(levels=levels, labels=labels)
|
||
|
|
||
|
for keep in ['first', 'last', False]:
|
||
|
left = mi.duplicated(keep=keep)
|
||
|
right = pd._libs.hashtable.duplicated_object(mi.values, keep=keep)
|
||
|
tm.assert_numpy_array_equal(left, right)
|
||
|
|
||
|
# GH5873
|
||
|
for a in [101, 102]:
|
||
|
mi = MultiIndex.from_arrays([[101, a], [3.5, np.nan]])
|
||
|
assert not mi.has_duplicates
|
||
|
|
||
|
with warnings.catch_warnings(record=True):
|
||
|
# Deprecated - see GH20239
|
||
|
assert mi.get_duplicates().equals(MultiIndex.from_arrays(
|
||
|
[[], []]))
|
||
|
|
||
|
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(
|
||
|
2, dtype='bool'))
|
||
|
|
||
|
for n in range(1, 6): # 1st level shape
|
||
|
for m in range(1, 5): # 2nd level shape
|
||
|
# all possible unique combinations, including nan
|
||
|
lab = product(range(-1, n), range(-1, m))
|
||
|
mi = MultiIndex(levels=[list('abcde')[:n], list('WXYZ')[:m]],
|
||
|
labels=np.random.permutation(list(lab)).T)
|
||
|
assert len(mi) == (n + 1) * (m + 1)
|
||
|
assert not mi.has_duplicates
|
||
|
|
||
|
with warnings.catch_warnings(record=True):
|
||
|
# Deprecated - see GH20239
|
||
|
assert mi.get_duplicates().equals(MultiIndex.from_arrays(
|
||
|
[[], []]))
|
||
|
|
||
|
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(
|
||
|
len(mi), dtype='bool'))
|
||
|
|
||
|
def test_duplicate_meta_data(self):
|
||
|
# GH 10115
|
||
|
index = MultiIndex(
|
||
|
levels=[[0, 1], [0, 1, 2]],
|
||
|
labels=[[0, 0, 0, 0, 1, 1, 1],
|
||
|
[0, 1, 2, 0, 0, 1, 2]])
|
||
|
|
||
|
for idx in [index,
|
||
|
index.set_names([None, None]),
|
||
|
index.set_names([None, 'Num']),
|
||
|
index.set_names(['Upper', 'Num']), ]:
|
||
|
assert idx.has_duplicates
|
||
|
assert idx.drop_duplicates().names == idx.names
|
||
|
|
||
|
def test_get_unique_index(self):
|
||
|
idx = self.index[[0, 1, 0, 1, 1, 0, 0]]
|
||
|
expected = self.index._shallow_copy(idx[[0, 1]])
|
||
|
|
||
|
for dropna in [False, True]:
|
||
|
result = idx._get_unique_index(dropna=dropna)
|
||
|
assert result.unique
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize('names', [None, ['first', 'second']])
|
||
|
def test_unique(self, names):
|
||
|
mi = pd.MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]],
|
||
|
names=names)
|
||
|
|
||
|
res = mi.unique()
|
||
|
exp = pd.MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
|
||
|
tm.assert_index_equal(res, exp)
|
||
|
|
||
|
mi = pd.MultiIndex.from_arrays([list('aaaa'), list('abab')],
|
||
|
names=names)
|
||
|
res = mi.unique()
|
||
|
exp = pd.MultiIndex.from_arrays([list('aa'), list('ab')],
|
||
|
names=mi.names)
|
||
|
tm.assert_index_equal(res, exp)
|
||
|
|
||
|
mi = pd.MultiIndex.from_arrays([list('aaaa'), list('aaaa')],
|
||
|
names=names)
|
||
|
res = mi.unique()
|
||
|
exp = pd.MultiIndex.from_arrays([['a'], ['a']], names=mi.names)
|
||
|
tm.assert_index_equal(res, exp)
|
||
|
|
||
|
# GH #20568 - empty MI
|
||
|
mi = pd.MultiIndex.from_arrays([[], []], names=names)
|
||
|
res = mi.unique()
|
||
|
tm.assert_index_equal(mi, res)
|
||
|
|
||
|
@pytest.mark.parametrize('level', [0, 'first', 1, 'second'])
|
||
|
def test_unique_level(self, level):
|
||
|
# GH #17896 - with level= argument
|
||
|
result = self.index.unique(level=level)
|
||
|
expected = self.index.get_level_values(level).unique()
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
# With already unique level
|
||
|
mi = pd.MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]],
|
||
|
names=['first', 'second'])
|
||
|
result = mi.unique(level=level)
|
||
|
expected = mi.get_level_values(level)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
# With empty MI
|
||
|
mi = pd.MultiIndex.from_arrays([[], []], names=['first', 'second'])
|
||
|
result = mi.unique(level=level)
|
||
|
expected = mi.get_level_values(level)
|
||
|
|
||
|
def test_unique_datetimelike(self):
|
||
|
idx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-01',
|
||
|
'2015-01-01', 'NaT', 'NaT'])
|
||
|
idx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', '2015-01-02',
|
||
|
'2015-01-02', 'NaT', '2015-01-01'],
|
||
|
tz='Asia/Tokyo')
|
||
|
result = pd.MultiIndex.from_arrays([idx1, idx2]).unique()
|
||
|
|
||
|
eidx1 = pd.DatetimeIndex(['2015-01-01', '2015-01-01', 'NaT', 'NaT'])
|
||
|
eidx2 = pd.DatetimeIndex(['2015-01-01', '2015-01-02',
|
||
|
'NaT', '2015-01-01'],
|
||
|
tz='Asia/Tokyo')
|
||
|
exp = pd.MultiIndex.from_arrays([eidx1, eidx2])
|
||
|
tm.assert_index_equal(result, exp)
|
||
|
|
||
|
def test_tolist(self):
|
||
|
result = self.index.tolist()
|
||
|
exp = list(self.index.values)
|
||
|
assert result == exp
|
||
|
|
||
|
def test_repr_with_unicode_data(self):
|
||
|
with pd.core.config.option_context("display.encoding", 'UTF-8'):
|
||
|
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||
|
index = pd.DataFrame(d).set_index(["a", "b"]).index
|
||
|
assert "\\u" not in repr(index) # we don't want unicode-escaped
|
||
|
|
||
|
def test_repr_roundtrip(self):
|
||
|
|
||
|
mi = MultiIndex.from_product([list('ab'), range(3)],
|
||
|
names=['first', 'second'])
|
||
|
str(mi)
|
||
|
|
||
|
if PY3:
|
||
|
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
|
||
|
else:
|
||
|
result = eval(repr(mi))
|
||
|
# string coerces to unicode
|
||
|
tm.assert_index_equal(result, mi, exact=False)
|
||
|
assert mi.get_level_values('first').inferred_type == 'string'
|
||
|
assert result.get_level_values('first').inferred_type == 'unicode'
|
||
|
|
||
|
mi_u = MultiIndex.from_product(
|
||
|
[list(u'ab'), range(3)], names=['first', 'second'])
|
||
|
result = eval(repr(mi_u))
|
||
|
tm.assert_index_equal(result, mi_u, exact=True)
|
||
|
|
||
|
# formatting
|
||
|
if PY3:
|
||
|
str(mi)
|
||
|
else:
|
||
|
compat.text_type(mi)
|
||
|
|
||
|
# long format
|
||
|
mi = MultiIndex.from_product([list('abcdefg'), range(10)],
|
||
|
names=['first', 'second'])
|
||
|
|
||
|
if PY3:
|
||
|
tm.assert_index_equal(eval(repr(mi)), mi, exact=True)
|
||
|
else:
|
||
|
result = eval(repr(mi))
|
||
|
# string coerces to unicode
|
||
|
tm.assert_index_equal(result, mi, exact=False)
|
||
|
assert mi.get_level_values('first').inferred_type == 'string'
|
||
|
assert result.get_level_values('first').inferred_type == 'unicode'
|
||
|
|
||
|
result = eval(repr(mi_u))
|
||
|
tm.assert_index_equal(result, mi_u, exact=True)
|
||
|
|
||
|
def test_str(self):
|
||
|
# tested elsewhere
|
||
|
pass
|
||
|
|
||
|
def test_unicode_string_with_unicode(self):
|
||
|
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||
|
idx = pd.DataFrame(d).set_index(["a", "b"]).index
|
||
|
|
||
|
if PY3:
|
||
|
str(idx)
|
||
|
else:
|
||
|
compat.text_type(idx)
|
||
|
|
||
|
def test_bytestring_with_unicode(self):
|
||
|
d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||
|
idx = pd.DataFrame(d).set_index(["a", "b"]).index
|
||
|
|
||
|
if PY3:
|
||
|
bytes(idx)
|
||
|
else:
|
||
|
str(idx)
|
||
|
|
||
|
def test_slice_keep_name(self):
|
||
|
x = MultiIndex.from_tuples([('a', 'b'), (1, 2), ('c', 'd')],
|
||
|
names=['x', 'y'])
|
||
|
assert x[1:].names == x.names
|
||
|
|
||
|
def test_isna_behavior(self):
|
||
|
# should not segfault GH5123
|
||
|
# NOTE: if MI representation changes, may make sense to allow
|
||
|
# isna(MI)
|
||
|
with pytest.raises(NotImplementedError):
|
||
|
pd.isna(self.index)
|
||
|
|
||
|
def test_level_setting_resets_attributes(self):
|
||
|
ind = pd.MultiIndex.from_arrays([
|
||
|
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
|
||
|
])
|
||
|
assert ind.is_monotonic
|
||
|
ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True)
|
||
|
# if this fails, probably didn't reset the cache correctly.
|
||
|
assert not ind.is_monotonic
|
||
|
|
||
|
def test_is_monotonic_increasing(self):
|
||
|
i = MultiIndex.from_product([np.arange(10),
|
||
|
np.arange(10)], names=['one', 'two'])
|
||
|
assert i.is_monotonic
|
||
|
assert i._is_strictly_monotonic_increasing
|
||
|
assert Index(i.values).is_monotonic
|
||
|
assert i._is_strictly_monotonic_increasing
|
||
|
|
||
|
i = MultiIndex.from_product([np.arange(10, 0, -1),
|
||
|
np.arange(10)], names=['one', 'two'])
|
||
|
assert not i.is_monotonic
|
||
|
assert not i._is_strictly_monotonic_increasing
|
||
|
assert not Index(i.values).is_monotonic
|
||
|
assert not Index(i.values)._is_strictly_monotonic_increasing
|
||
|
|
||
|
i = MultiIndex.from_product([np.arange(10),
|
||
|
np.arange(10, 0, -1)],
|
||
|
names=['one', 'two'])
|
||
|
assert not i.is_monotonic
|
||
|
assert not i._is_strictly_monotonic_increasing
|
||
|
assert not Index(i.values).is_monotonic
|
||
|
assert not Index(i.values)._is_strictly_monotonic_increasing
|
||
|
|
||
|
i = MultiIndex.from_product([[1.0, np.nan, 2.0], ['a', 'b', 'c']])
|
||
|
assert not i.is_monotonic
|
||
|
assert not i._is_strictly_monotonic_increasing
|
||
|
assert not Index(i.values).is_monotonic
|
||
|
assert not Index(i.values)._is_strictly_monotonic_increasing
|
||
|
|
||
|
# string ordering
|
||
|
i = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
|
||
|
['one', 'two', 'three']],
|
||
|
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||
|
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||
|
names=['first', 'second'])
|
||
|
assert not i.is_monotonic
|
||
|
assert not Index(i.values).is_monotonic
|
||
|
assert not i._is_strictly_monotonic_increasing
|
||
|
assert not Index(i.values)._is_strictly_monotonic_increasing
|
||
|
|
||
|
i = MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'],
|
||
|
['mom', 'next', 'zenith']],
|
||
|
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||
|
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||
|
names=['first', 'second'])
|
||
|
assert i.is_monotonic
|
||
|
assert Index(i.values).is_monotonic
|
||
|
assert i._is_strictly_monotonic_increasing
|
||
|
assert Index(i.values)._is_strictly_monotonic_increasing
|
||
|
|
||
|
# mixed levels, hits the TypeError
|
||
|
i = MultiIndex(
|
||
|
levels=[[1, 2, 3, 4], ['gb00b03mlx29', 'lu0197800237',
|
||
|
'nl0000289783',
|
||
|
'nl0000289965', 'nl0000301109']],
|
||
|
labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||
|
names=['household_id', 'asset_id'])
|
||
|
|
||
|
assert not i.is_monotonic
|
||
|
assert not i._is_strictly_monotonic_increasing
|
||
|
|
||
|
# empty
|
||
|
i = MultiIndex.from_arrays([[], []])
|
||
|
assert i.is_monotonic
|
||
|
assert Index(i.values).is_monotonic
|
||
|
assert i._is_strictly_monotonic_increasing
|
||
|
assert Index(i.values)._is_strictly_monotonic_increasing
|
||
|
|
||
|
def test_is_monotonic_decreasing(self):
|
||
|
i = MultiIndex.from_product([np.arange(9, -1, -1),
|
||
|
np.arange(9, -1, -1)],
|
||
|
names=['one', 'two'])
|
||
|
assert i.is_monotonic_decreasing
|
||
|
assert i._is_strictly_monotonic_decreasing
|
||
|
assert Index(i.values).is_monotonic_decreasing
|
||
|
assert i._is_strictly_monotonic_decreasing
|
||
|
|
||
|
i = MultiIndex.from_product([np.arange(10),
|
||
|
np.arange(10, 0, -1)],
|
||
|
names=['one', 'two'])
|
||
|
assert not i.is_monotonic_decreasing
|
||
|
assert not i._is_strictly_monotonic_decreasing
|
||
|
assert not Index(i.values).is_monotonic_decreasing
|
||
|
assert not Index(i.values)._is_strictly_monotonic_decreasing
|
||
|
|
||
|
i = MultiIndex.from_product([np.arange(10, 0, -1),
|
||
|
np.arange(10)], names=['one', 'two'])
|
||
|
assert not i.is_monotonic_decreasing
|
||
|
assert not i._is_strictly_monotonic_decreasing
|
||
|
assert not Index(i.values).is_monotonic_decreasing
|
||
|
assert not Index(i.values)._is_strictly_monotonic_decreasing
|
||
|
|
||
|
i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']])
|
||
|
assert not i.is_monotonic_decreasing
|
||
|
assert not i._is_strictly_monotonic_decreasing
|
||
|
assert not Index(i.values).is_monotonic_decreasing
|
||
|
assert not Index(i.values)._is_strictly_monotonic_decreasing
|
||
|
|
||
|
# string ordering
|
||
|
i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
|
||
|
['three', 'two', 'one']],
|
||
|
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||
|
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||
|
names=['first', 'second'])
|
||
|
assert not i.is_monotonic_decreasing
|
||
|
assert not Index(i.values).is_monotonic_decreasing
|
||
|
assert not i._is_strictly_monotonic_decreasing
|
||
|
assert not Index(i.values)._is_strictly_monotonic_decreasing
|
||
|
|
||
|
i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
|
||
|
['zenith', 'next', 'mom']],
|
||
|
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
||
|
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
||
|
names=['first', 'second'])
|
||
|
assert i.is_monotonic_decreasing
|
||
|
assert Index(i.values).is_monotonic_decreasing
|
||
|
assert i._is_strictly_monotonic_decreasing
|
||
|
assert Index(i.values)._is_strictly_monotonic_decreasing
|
||
|
|
||
|
# mixed levels, hits the TypeError
|
||
|
i = MultiIndex(
|
||
|
levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965',
|
||
|
'nl0000289783', 'lu0197800237',
|
||
|
'gb00b03mlx29']],
|
||
|
labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
|
||
|
names=['household_id', 'asset_id'])
|
||
|
|
||
|
assert not i.is_monotonic_decreasing
|
||
|
assert not i._is_strictly_monotonic_decreasing
|
||
|
|
||
|
# empty
|
||
|
i = MultiIndex.from_arrays([[], []])
|
||
|
assert i.is_monotonic_decreasing
|
||
|
assert Index(i.values).is_monotonic_decreasing
|
||
|
assert i._is_strictly_monotonic_decreasing
|
||
|
assert Index(i.values)._is_strictly_monotonic_decreasing
|
||
|
|
||
|
def test_is_strictly_monotonic_increasing(self):
|
||
|
idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']],
|
||
|
labels=[[0, 0, 1, 1], [0, 0, 0, 1]])
|
||
|
assert idx.is_monotonic_increasing
|
||
|
assert not idx._is_strictly_monotonic_increasing
|
||
|
|
||
|
def test_is_strictly_monotonic_decreasing(self):
|
||
|
idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']],
|
||
|
labels=[[0, 0, 1, 1], [0, 0, 0, 1]])
|
||
|
assert idx.is_monotonic_decreasing
|
||
|
assert not idx._is_strictly_monotonic_decreasing
|
||
|
|
||
|
def test_reconstruct_sort(self):
|
||
|
|
||
|
# starts off lexsorted & monotonic
|
||
|
mi = MultiIndex.from_arrays([
|
||
|
['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]
|
||
|
])
|
||
|
assert mi.is_lexsorted()
|
||
|
assert mi.is_monotonic
|
||
|
|
||
|
recons = mi._sort_levels_monotonic()
|
||
|
assert recons.is_lexsorted()
|
||
|
assert recons.is_monotonic
|
||
|
assert mi is recons
|
||
|
|
||
|
assert mi.equals(recons)
|
||
|
assert Index(mi.values).equals(Index(recons.values))
|
||
|
|
||
|
# cannot convert to lexsorted
|
||
|
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
|
||
|
('x', 'b'), ('y', 'a'), ('z', 'b')],
|
||
|
names=['one', 'two'])
|
||
|
assert not mi.is_lexsorted()
|
||
|
assert not mi.is_monotonic
|
||
|
|
||
|
recons = mi._sort_levels_monotonic()
|
||
|
assert not recons.is_lexsorted()
|
||
|
assert not recons.is_monotonic
|
||
|
|
||
|
assert mi.equals(recons)
|
||
|
assert Index(mi.values).equals(Index(recons.values))
|
||
|
|
||
|
# cannot convert to lexsorted
|
||
|
mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
|
||
|
labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
|
||
|
names=['col1', 'col2'])
|
||
|
assert not mi.is_lexsorted()
|
||
|
assert not mi.is_monotonic
|
||
|
|
||
|
recons = mi._sort_levels_monotonic()
|
||
|
assert not recons.is_lexsorted()
|
||
|
assert not recons.is_monotonic
|
||
|
|
||
|
assert mi.equals(recons)
|
||
|
assert Index(mi.values).equals(Index(recons.values))
|
||
|
|
||
|
def test_reconstruct_remove_unused(self):
|
||
|
# xref to GH 2770
|
||
|
df = DataFrame([['deleteMe', 1, 9],
|
||
|
['keepMe', 2, 9],
|
||
|
['keepMeToo', 3, 9]],
|
||
|
columns=['first', 'second', 'third'])
|
||
|
df2 = df.set_index(['first', 'second'], drop=False)
|
||
|
df2 = df2[df2['first'] != 'deleteMe']
|
||
|
|
||
|
# removed levels are there
|
||
|
expected = MultiIndex(levels=[['deleteMe', 'keepMe', 'keepMeToo'],
|
||
|
[1, 2, 3]],
|
||
|
labels=[[1, 2], [1, 2]],
|
||
|
names=['first', 'second'])
|
||
|
result = df2.index
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
expected = MultiIndex(levels=[['keepMe', 'keepMeToo'],
|
||
|
[2, 3]],
|
||
|
labels=[[0, 1], [0, 1]],
|
||
|
names=['first', 'second'])
|
||
|
result = df2.index.remove_unused_levels()
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
# idempotent
|
||
|
result2 = result.remove_unused_levels()
|
||
|
tm.assert_index_equal(result2, expected)
|
||
|
assert result2.is_(result)
|
||
|
|
||
|
@pytest.mark.parametrize('level0', [['a', 'd', 'b'],
|
||
|
['a', 'd', 'b', 'unused']])
|
||
|
@pytest.mark.parametrize('level1', [['w', 'x', 'y', 'z'],
|
||
|
['w', 'x', 'y', 'z', 'unused']])
|
||
|
def test_remove_unused_nan(self, level0, level1):
|
||
|
# GH 18417
|
||
|
mi = pd.MultiIndex(levels=[level0, level1],
|
||
|
labels=[[0, 2, -1, 1, -1], [0, 1, 2, 3, 2]])
|
||
|
|
||
|
result = mi.remove_unused_levels()
|
||
|
tm.assert_index_equal(result, mi)
|
||
|
for level in 0, 1:
|
||
|
assert('unused' not in result.levels[level])
|
||
|
|
||
|
@pytest.mark.parametrize('first_type,second_type', [
|
||
|
('int64', 'int64'),
|
||
|
('datetime64[D]', 'str')])
|
||
|
def test_remove_unused_levels_large(self, first_type, second_type):
|
||
|
# GH16556
|
||
|
|
||
|
# because tests should be deterministic (and this test in particular
|
||
|
# checks that levels are removed, which is not the case for every
|
||
|
# random input):
|
||
|
rng = np.random.RandomState(4) # seed is arbitrary value that works
|
||
|
|
||
|
size = 1 << 16
|
||
|
df = DataFrame(dict(
|
||
|
first=rng.randint(0, 1 << 13, size).astype(first_type),
|
||
|
second=rng.randint(0, 1 << 10, size).astype(second_type),
|
||
|
third=rng.rand(size)))
|
||
|
df = df.groupby(['first', 'second']).sum()
|
||
|
df = df[df.third < 0.1]
|
||
|
|
||
|
result = df.index.remove_unused_levels()
|
||
|
assert len(result.levels[0]) < len(df.index.levels[0])
|
||
|
assert len(result.levels[1]) < len(df.index.levels[1])
|
||
|
assert result.equals(df.index)
|
||
|
|
||
|
expected = df.reset_index().set_index(['first', 'second']).index
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_isin(self):
|
||
|
values = [('foo', 2), ('bar', 3), ('quux', 4)]
|
||
|
|
||
|
idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange(
|
||
|
4)])
|
||
|
result = idx.isin(values)
|
||
|
expected = np.array([False, False, True, True])
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
# empty, return dtype bool
|
||
|
idx = MultiIndex.from_arrays([[], []])
|
||
|
result = idx.isin(values)
|
||
|
assert len(result) == 0
|
||
|
assert result.dtype == np.bool_
|
||
|
|
||
|
@pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy")
|
||
|
def test_isin_nan_not_pypy(self):
|
||
|
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
|
||
|
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
|
||
|
np.array([False, False]))
|
||
|
tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
|
||
|
np.array([False, False]))
|
||
|
|
||
|
@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy")
|
||
|
def test_isin_nan_pypy(self):
|
||
|
idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
|
||
|
tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
|
||
|
np.array([False, True]))
|
||
|
tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
|
||
|
np.array([False, True]))
|
||
|
|
||
|
def test_isin_level_kwarg(self):
|
||
|
idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange(
|
||
|
4)])
|
||
|
|
||
|
vals_0 = ['foo', 'bar', 'quux']
|
||
|
vals_1 = [2, 3, 10]
|
||
|
|
||
|
expected = np.array([False, False, True, True])
|
||
|
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
|
||
|
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
|
||
|
|
||
|
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
|
||
|
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
|
||
|
|
||
|
pytest.raises(IndexError, idx.isin, vals_0, level=5)
|
||
|
pytest.raises(IndexError, idx.isin, vals_0, level=-5)
|
||
|
|
||
|
pytest.raises(KeyError, idx.isin, vals_0, level=1.0)
|
||
|
pytest.raises(KeyError, idx.isin, vals_1, level=-1.0)
|
||
|
pytest.raises(KeyError, idx.isin, vals_1, level='A')
|
||
|
|
||
|
idx.names = ['A', 'B']
|
||
|
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level='A'))
|
||
|
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level='B'))
|
||
|
|
||
|
pytest.raises(KeyError, idx.isin, vals_1, level='C')
|
||
|
|
||
|
def test_reindex_preserves_names_when_target_is_list_or_ndarray(self):
|
||
|
# GH6552
|
||
|
idx = self.index.copy()
|
||
|
target = idx.copy()
|
||
|
idx.names = target.names = [None, None]
|
||
|
|
||
|
other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]])
|
||
|
|
||
|
# list & ndarray cases
|
||
|
assert idx.reindex([])[0].names == [None, None]
|
||
|
assert idx.reindex(np.array([]))[0].names == [None, None]
|
||
|
assert idx.reindex(target.tolist())[0].names == [None, None]
|
||
|
assert idx.reindex(target.values)[0].names == [None, None]
|
||
|
assert idx.reindex(other_dtype.tolist())[0].names == [None, None]
|
||
|
assert idx.reindex(other_dtype.values)[0].names == [None, None]
|
||
|
|
||
|
idx.names = ['foo', 'bar']
|
||
|
assert idx.reindex([])[0].names == ['foo', 'bar']
|
||
|
assert idx.reindex(np.array([]))[0].names == ['foo', 'bar']
|
||
|
assert idx.reindex(target.tolist())[0].names == ['foo', 'bar']
|
||
|
assert idx.reindex(target.values)[0].names == ['foo', 'bar']
|
||
|
assert idx.reindex(other_dtype.tolist())[0].names == ['foo', 'bar']
|
||
|
assert idx.reindex(other_dtype.values)[0].names == ['foo', 'bar']
|
||
|
|
||
|
def test_reindex_lvl_preserves_names_when_target_is_list_or_array(self):
|
||
|
# GH7774
|
||
|
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']],
|
||
|
names=['foo', 'bar'])
|
||
|
assert idx.reindex([], level=0)[0].names == ['foo', 'bar']
|
||
|
assert idx.reindex([], level=1)[0].names == ['foo', 'bar']
|
||
|
|
||
|
def test_reindex_lvl_preserves_type_if_target_is_empty_list_or_array(self):
|
||
|
# GH7774
|
||
|
idx = pd.MultiIndex.from_product([[0, 1], ['a', 'b']])
|
||
|
assert idx.reindex([], level=0)[0].levels[0].dtype.type == np.int64
|
||
|
assert idx.reindex([], level=1)[0].levels[1].dtype.type == np.object_
|
||
|
|
||
|
def test_groupby(self):
|
||
|
groups = self.index.groupby(np.array([1, 1, 1, 2, 2, 2]))
|
||
|
labels = self.index.get_values().tolist()
|
||
|
exp = {1: labels[:3], 2: labels[3:]}
|
||
|
tm.assert_dict_equal(groups, exp)
|
||
|
|
||
|
# GH5620
|
||
|
groups = self.index.groupby(self.index)
|
||
|
exp = {key: [key] for key in self.index}
|
||
|
tm.assert_dict_equal(groups, exp)
|
||
|
|
||
|
def test_index_name_retained(self):
|
||
|
# GH9857
|
||
|
result = pd.DataFrame({'x': [1, 2, 6],
|
||
|
'y': [2, 2, 8],
|
||
|
'z': [-5, 0, 5]})
|
||
|
result = result.set_index('z')
|
||
|
result.loc[10] = [9, 10]
|
||
|
df_expected = pd.DataFrame({'x': [1, 2, 6, 9],
|
||
|
'y': [2, 2, 8, 10],
|
||
|
'z': [-5, 0, 5, 10]})
|
||
|
df_expected = df_expected.set_index('z')
|
||
|
tm.assert_frame_equal(result, df_expected)
|
||
|
|
||
|
def test_equals_operator(self):
|
||
|
# GH9785
|
||
|
assert (self.index == self.index).all()
|
||
|
|
||
|
def test_large_multiindex_error(self):
|
||
|
# GH12527
|
||
|
df_below_1000000 = pd.DataFrame(
|
||
|
1, index=pd.MultiIndex.from_product([[1, 2], range(499999)]),
|
||
|
columns=['dest'])
|
||
|
with pytest.raises(KeyError):
|
||
|
df_below_1000000.loc[(-1, 0), 'dest']
|
||
|
with pytest.raises(KeyError):
|
||
|
df_below_1000000.loc[(3, 0), 'dest']
|
||
|
df_above_1000000 = pd.DataFrame(
|
||
|
1, index=pd.MultiIndex.from_product([[1, 2], range(500001)]),
|
||
|
columns=['dest'])
|
||
|
with pytest.raises(KeyError):
|
||
|
df_above_1000000.loc[(-1, 0), 'dest']
|
||
|
with pytest.raises(KeyError):
|
||
|
df_above_1000000.loc[(3, 0), 'dest']
|
||
|
|
||
|
def test_partial_string_timestamp_multiindex(self):
|
||
|
# GH10331
|
||
|
dr = pd.date_range('2016-01-01', '2016-01-03', freq='12H')
|
||
|
abc = ['a', 'b', 'c']
|
||
|
ix = pd.MultiIndex.from_product([dr, abc])
|
||
|
df = pd.DataFrame({'c1': range(0, 15)}, index=ix)
|
||
|
idx = pd.IndexSlice
|
||
|
|
||
|
# c1
|
||
|
# 2016-01-01 00:00:00 a 0
|
||
|
# b 1
|
||
|
# c 2
|
||
|
# 2016-01-01 12:00:00 a 3
|
||
|
# b 4
|
||
|
# c 5
|
||
|
# 2016-01-02 00:00:00 a 6
|
||
|
# b 7
|
||
|
# c 8
|
||
|
# 2016-01-02 12:00:00 a 9
|
||
|
# b 10
|
||
|
# c 11
|
||
|
# 2016-01-03 00:00:00 a 12
|
||
|
# b 13
|
||
|
# c 14
|
||
|
|
||
|
# partial string matching on a single index
|
||
|
for df_swap in (df.swaplevel(),
|
||
|
df.swaplevel(0),
|
||
|
df.swaplevel(0, 1)):
|
||
|
df_swap = df_swap.sort_index()
|
||
|
just_a = df_swap.loc['a']
|
||
|
result = just_a.loc['2016-01-01']
|
||
|
expected = df.loc[idx[:, 'a'], :].iloc[0:2]
|
||
|
expected.index = expected.index.droplevel(1)
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# indexing with IndexSlice
|
||
|
result = df.loc[idx['2016-01-01':'2016-02-01', :], :]
|
||
|
expected = df
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# match on secondary index
|
||
|
result = df_swap.loc[idx[:, '2016-01-01':'2016-01-01'], :]
|
||
|
expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# Even though this syntax works on a single index, this is somewhat
|
||
|
# ambiguous and we don't want to extend this behavior forward to work
|
||
|
# in multi-indexes. This would amount to selecting a scalar from a
|
||
|
# column.
|
||
|
with pytest.raises(KeyError):
|
||
|
df['2016-01-01']
|
||
|
|
||
|
# partial string match on year only
|
||
|
result = df.loc['2016']
|
||
|
expected = df
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# partial string match on date
|
||
|
result = df.loc['2016-01-01']
|
||
|
expected = df.iloc[0:6]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# partial string match on date and hour, from middle
|
||
|
result = df.loc['2016-01-02 12']
|
||
|
expected = df.iloc[9:12]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# partial string match on secondary index
|
||
|
result = df_swap.loc[idx[:, '2016-01-02'], :]
|
||
|
expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# tuple selector with partial string match on date
|
||
|
result = df.loc[('2016-01-01', 'a'), :]
|
||
|
expected = df.iloc[[0, 3]]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# Slicing date on first level should break (of course)
|
||
|
with pytest.raises(KeyError):
|
||
|
df_swap.loc['2016-01-01']
|
||
|
|
||
|
# GH12685 (partial string with daily resolution or below)
|
||
|
dr = date_range('2013-01-01', periods=100, freq='D')
|
||
|
ix = MultiIndex.from_product([dr, ['a', 'b']])
|
||
|
df = DataFrame(np.random.randn(200, 1), columns=['A'], index=ix)
|
||
|
|
||
|
result = df.loc[idx['2013-03':'2013-03', :], :]
|
||
|
expected = df.iloc[118:180]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_rangeindex_fallback_coercion_bug(self):
|
||
|
# GH 12893
|
||
|
foo = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||
|
bar = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||
|
df = pd.concat({'foo': foo.stack(), 'bar': bar.stack()}, axis=1)
|
||
|
df.index.names = ['fizz', 'buzz']
|
||
|
|
||
|
str(df)
|
||
|
expected = pd.DataFrame({'bar': np.arange(100),
|
||
|
'foo': np.arange(100)},
|
||
|
index=pd.MultiIndex.from_product(
|
||
|
[range(10), range(10)],
|
||
|
names=['fizz', 'buzz']))
|
||
|
tm.assert_frame_equal(df, expected, check_like=True)
|
||
|
|
||
|
result = df.index.get_level_values('fizz')
|
||
|
expected = pd.Int64Index(np.arange(10), name='fizz').repeat(10)
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
result = df.index.get_level_values('buzz')
|
||
|
expected = pd.Int64Index(np.tile(np.arange(10), 10), name='buzz')
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
|
||
|
def test_dropna(self):
|
||
|
# GH 6194
|
||
|
idx = pd.MultiIndex.from_arrays([[1, np.nan, 3, np.nan, 5],
|
||
|
[1, 2, np.nan, np.nan, 5],
|
||
|
['a', 'b', 'c', np.nan, 'e']])
|
||
|
|
||
|
exp = pd.MultiIndex.from_arrays([[1, 5],
|
||
|
[1, 5],
|
||
|
['a', 'e']])
|
||
|
tm.assert_index_equal(idx.dropna(), exp)
|
||
|
tm.assert_index_equal(idx.dropna(how='any'), exp)
|
||
|
|
||
|
exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5],
|
||
|
[1, 2, np.nan, 5],
|
||
|
['a', 'b', 'c', 'e']])
|
||
|
tm.assert_index_equal(idx.dropna(how='all'), exp)
|
||
|
|
||
|
msg = "invalid how option: xxx"
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
idx.dropna(how='xxx')
|
||
|
|
||
|
def test_unsortedindex(self):
|
||
|
# GH 11897
|
||
|
mi = pd.MultiIndex.from_tuples([('z', 'a'), ('x', 'a'), ('y', 'b'),
|
||
|
('x', 'b'), ('y', 'a'), ('z', 'b')],
|
||
|
names=['one', 'two'])
|
||
|
df = pd.DataFrame([[i, 10 * i] for i in lrange(6)], index=mi,
|
||
|
columns=['one', 'two'])
|
||
|
|
||
|
# GH 16734: not sorted, but no real slicing
|
||
|
result = df.loc(axis=0)['z', 'a']
|
||
|
expected = df.iloc[0]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
with pytest.raises(UnsortedIndexError):
|
||
|
df.loc(axis=0)['z', slice('a')]
|
||
|
df.sort_index(inplace=True)
|
||
|
assert len(df.loc(axis=0)['z', :]) == 2
|
||
|
|
||
|
with pytest.raises(KeyError):
|
||
|
df.loc(axis=0)['q', :]
|
||
|
|
||
|
def test_unsortedindex_doc_examples(self):
|
||
|
# http://pandas.pydata.org/pandas-docs/stable/advanced.html#sorting-a-multiindex # noqa
|
||
|
dfm = DataFrame({'jim': [0, 0, 1, 1],
|
||
|
'joe': ['x', 'x', 'z', 'y'],
|
||
|
'jolie': np.random.rand(4)})
|
||
|
|
||
|
dfm = dfm.set_index(['jim', 'joe'])
|
||
|
with tm.assert_produces_warning(PerformanceWarning):
|
||
|
dfm.loc[(1, 'z')]
|
||
|
|
||
|
with pytest.raises(UnsortedIndexError):
|
||
|
dfm.loc[(0, 'y'):(1, 'z')]
|
||
|
|
||
|
assert not dfm.index.is_lexsorted()
|
||
|
assert dfm.index.lexsort_depth == 1
|
||
|
|
||
|
# sort it
|
||
|
dfm = dfm.sort_index()
|
||
|
dfm.loc[(1, 'z')]
|
||
|
dfm.loc[(0, 'y'):(1, 'z')]
|
||
|
|
||
|
assert dfm.index.is_lexsorted()
|
||
|
assert dfm.index.lexsort_depth == 2
|
||
|
|
||
|
def test_tuples_with_name_string(self):
|
||
|
# GH 15110 and GH 14848
|
||
|
|
||
|
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
|
||
|
with pytest.raises(ValueError):
|
||
|
pd.Index(li, name='abc')
|
||
|
with pytest.raises(ValueError):
|
||
|
pd.Index(li, name='a')
|
||
|
|
||
|
def test_nan_stays_float(self):
|
||
|
|
||
|
# GH 7031
|
||
|
idx0 = pd.MultiIndex(levels=[["A", "B"], []],
|
||
|
labels=[[1, 0], [-1, -1]],
|
||
|
names=[0, 1])
|
||
|
idx1 = pd.MultiIndex(levels=[["C"], ["D"]],
|
||
|
labels=[[0], [0]],
|
||
|
names=[0, 1])
|
||
|
idxm = idx0.join(idx1, how='outer')
|
||
|
assert pd.isna(idx0.get_level_values(1)).all()
|
||
|
# the following failed in 0.14.1
|
||
|
assert pd.isna(idxm.get_level_values(1)[:-1]).all()
|
||
|
|
||
|
df0 = pd.DataFrame([[1, 2]], index=idx0)
|
||
|
df1 = pd.DataFrame([[3, 4]], index=idx1)
|
||
|
dfm = df0 - df1
|
||
|
assert pd.isna(df0.index.get_level_values(1)).all()
|
||
|
# the following failed in 0.14.1
|
||
|
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
|
||
|
|
||
|
def test_million_record_attribute_error(self):
|
||
|
# GH 18165
|
||
|
r = list(range(1000000))
|
||
|
df = pd.DataFrame({'a': r, 'b': r},
|
||
|
index=pd.MultiIndex.from_tuples([(x, x) for x in r]))
|
||
|
|
||
|
with tm.assert_raises_regex(AttributeError,
|
||
|
"'Series' object has no attribute 'foo'"):
|
||
|
df['a'].foo()
|
||
|
|
||
|
def test_duplicate_multiindex_labels(self):
|
||
|
# GH 17464
|
||
|
# Make sure that a MultiIndex with duplicate levels throws a ValueError
|
||
|
with pytest.raises(ValueError):
|
||
|
ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)])
|
||
|
|
||
|
# And that using set_levels with duplicate levels fails
|
||
|
ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'],
|
||
|
[1, 2, 1, 2, 3]])
|
||
|
with pytest.raises(ValueError):
|
||
|
ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]],
|
||
|
inplace=True)
|
||
|
|
||
|
def test_multiindex_compare(self):
|
||
|
# GH 21149
|
||
|
# Ensure comparison operations for MultiIndex with nlevels == 1
|
||
|
# behave consistently with those for MultiIndex with nlevels > 1
|
||
|
|
||
|
midx = pd.MultiIndex.from_product([[0, 1]])
|
||
|
|
||
|
# Equality self-test: MultiIndex object vs self
|
||
|
expected = pd.Series([True, True])
|
||
|
result = pd.Series(midx == midx)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# Greater than comparison: MultiIndex object vs self
|
||
|
expected = pd.Series([False, False])
|
||
|
result = pd.Series(midx > midx)
|
||
|
tm.assert_series_equal(result, expected)
|