718 lines
26 KiB
Python
718 lines
26 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
import pandas.compat as compat
|
||
|
import numpy as np
|
||
|
from pandas import (Series, DataFrame, Timestamp, Categorical,
|
||
|
CategoricalIndex, Interval, Index)
|
||
|
from pandas.util.testing import assert_series_equal, assert_frame_equal
|
||
|
from pandas.util import testing as tm
|
||
|
from pandas.core.dtypes.common import is_categorical_dtype
|
||
|
from pandas.api.types import CategoricalDtype as CDT
|
||
|
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||
|
|
||
|
|
||
|
class TestCategoricalIndex(object):
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
|
||
|
self.df = DataFrame({'A': np.arange(6, dtype='int64'),
|
||
|
'B': Series(list('aabbca')).astype(
|
||
|
CDT(list('cab')))}).set_index('B')
|
||
|
self.df2 = DataFrame({'A': np.arange(6, dtype='int64'),
|
||
|
'B': Series(list('aabbca')).astype(
|
||
|
CDT(list('cabe')))}).set_index('B')
|
||
|
self.df3 = DataFrame({'A': np.arange(6, dtype='int64'),
|
||
|
'B': (Series([1, 1, 2, 1, 3, 2])
|
||
|
.astype(CDT([3, 2, 1], ordered=True)))
|
||
|
}).set_index('B')
|
||
|
self.df4 = DataFrame({'A': np.arange(6, dtype='int64'),
|
||
|
'B': (Series([1, 1, 2, 1, 3, 2])
|
||
|
.astype(CDT([3, 2, 1], ordered=False)))
|
||
|
}).set_index('B')
|
||
|
|
||
|
def test_loc_scalar(self):
|
||
|
result = self.df.loc['a']
|
||
|
expected = (DataFrame({'A': [0, 1, 5],
|
||
|
'B': (Series(list('aaa'))
|
||
|
.astype(CDT(list('cab'))))})
|
||
|
.set_index('B'))
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
df = self.df.copy()
|
||
|
df.loc['a'] = 20
|
||
|
expected = (DataFrame({'A': [20, 20, 2, 3, 4, 20],
|
||
|
'B': (Series(list('aabbca'))
|
||
|
.astype(CDT(list('cab'))))})
|
||
|
.set_index('B'))
|
||
|
assert_frame_equal(df, expected)
|
||
|
|
||
|
# value not in the categories
|
||
|
pytest.raises(KeyError, lambda: df.loc['d'])
|
||
|
|
||
|
def f():
|
||
|
df.loc['d'] = 10
|
||
|
|
||
|
pytest.raises(TypeError, f)
|
||
|
|
||
|
def f():
|
||
|
df.loc['d', 'A'] = 10
|
||
|
|
||
|
pytest.raises(TypeError, f)
|
||
|
|
||
|
def f():
|
||
|
df.loc['d', 'C'] = 10
|
||
|
|
||
|
pytest.raises(TypeError, f)
|
||
|
|
||
|
def test_getitem_scalar(self):
|
||
|
|
||
|
cats = Categorical([Timestamp('12-31-1999'),
|
||
|
Timestamp('12-31-2000')])
|
||
|
|
||
|
s = Series([1, 2], index=cats)
|
||
|
|
||
|
expected = s.iloc[0]
|
||
|
result = s[cats[0]]
|
||
|
assert result == expected
|
||
|
|
||
|
def test_slicing_directly(self):
|
||
|
cat = Categorical(["a", "b", "c", "d", "a", "b", "c"])
|
||
|
sliced = cat[3]
|
||
|
assert sliced == "d"
|
||
|
sliced = cat[3:5]
|
||
|
expected = Categorical(["d", "a"], categories=['a', 'b', 'c', 'd'])
|
||
|
tm.assert_numpy_array_equal(sliced._codes, expected._codes)
|
||
|
tm.assert_index_equal(sliced.categories, expected.categories)
|
||
|
|
||
|
def test_slicing(self):
|
||
|
cat = Series(Categorical([1, 2, 3, 4]))
|
||
|
reversed = cat[::-1]
|
||
|
exp = np.array([4, 3, 2, 1], dtype=np.int64)
|
||
|
tm.assert_numpy_array_equal(reversed.__array__(), exp)
|
||
|
|
||
|
df = DataFrame({'value': (np.arange(100) + 1).astype('int64')})
|
||
|
df['D'] = pd.cut(df.value, bins=[0, 25, 50, 75, 100])
|
||
|
|
||
|
expected = Series([11, Interval(0, 25)], index=['value', 'D'], name=10)
|
||
|
result = df.iloc[10]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
expected = DataFrame({'value': np.arange(11, 21).astype('int64')},
|
||
|
index=np.arange(10, 20).astype('int64'))
|
||
|
expected['D'] = pd.cut(expected.value, bins=[0, 25, 50, 75, 100])
|
||
|
result = df.iloc[10:20]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
expected = Series([9, Interval(0, 25)], index=['value', 'D'], name=8)
|
||
|
result = df.loc[8]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_slicing_and_getting_ops(self):
|
||
|
|
||
|
# systematically test the slicing operations:
|
||
|
# for all slicing ops:
|
||
|
# - returning a dataframe
|
||
|
# - returning a column
|
||
|
# - returning a row
|
||
|
# - returning a single value
|
||
|
|
||
|
cats = Categorical(
|
||
|
["a", "c", "b", "c", "c", "c", "c"], categories=["a", "b", "c"])
|
||
|
idx = Index(["h", "i", "j", "k", "l", "m", "n"])
|
||
|
values = [1, 2, 3, 4, 5, 6, 7]
|
||
|
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||
|
|
||
|
# the expected values
|
||
|
cats2 = Categorical(["b", "c"], categories=["a", "b", "c"])
|
||
|
idx2 = Index(["j", "k"])
|
||
|
values2 = [3, 4]
|
||
|
|
||
|
# 2:4,: | "j":"k",:
|
||
|
exp_df = DataFrame({"cats": cats2, "values": values2}, index=idx2)
|
||
|
|
||
|
# :,"cats" | :,0
|
||
|
exp_col = Series(cats, index=idx, name='cats')
|
||
|
|
||
|
# "j",: | 2,:
|
||
|
exp_row = Series(["b", 3], index=["cats", "values"], dtype="object",
|
||
|
name="j")
|
||
|
|
||
|
# "j","cats | 2,0
|
||
|
exp_val = "b"
|
||
|
|
||
|
# iloc
|
||
|
# frame
|
||
|
res_df = df.iloc[2:4, :]
|
||
|
tm.assert_frame_equal(res_df, exp_df)
|
||
|
assert is_categorical_dtype(res_df["cats"])
|
||
|
|
||
|
# row
|
||
|
res_row = df.iloc[2, :]
|
||
|
tm.assert_series_equal(res_row, exp_row)
|
||
|
assert isinstance(res_row["cats"], compat.string_types)
|
||
|
|
||
|
# col
|
||
|
res_col = df.iloc[:, 0]
|
||
|
tm.assert_series_equal(res_col, exp_col)
|
||
|
assert is_categorical_dtype(res_col)
|
||
|
|
||
|
# single value
|
||
|
res_val = df.iloc[2, 0]
|
||
|
assert res_val == exp_val
|
||
|
|
||
|
# loc
|
||
|
# frame
|
||
|
res_df = df.loc["j":"k", :]
|
||
|
tm.assert_frame_equal(res_df, exp_df)
|
||
|
assert is_categorical_dtype(res_df["cats"])
|
||
|
|
||
|
# row
|
||
|
res_row = df.loc["j", :]
|
||
|
tm.assert_series_equal(res_row, exp_row)
|
||
|
assert isinstance(res_row["cats"], compat.string_types)
|
||
|
|
||
|
# col
|
||
|
res_col = df.loc[:, "cats"]
|
||
|
tm.assert_series_equal(res_col, exp_col)
|
||
|
assert is_categorical_dtype(res_col)
|
||
|
|
||
|
# single value
|
||
|
res_val = df.loc["j", "cats"]
|
||
|
assert res_val == exp_val
|
||
|
|
||
|
# ix
|
||
|
# frame
|
||
|
# res_df = df.loc["j":"k",[0,1]] # doesn't work?
|
||
|
res_df = df.loc["j":"k", :]
|
||
|
tm.assert_frame_equal(res_df, exp_df)
|
||
|
assert is_categorical_dtype(res_df["cats"])
|
||
|
|
||
|
# row
|
||
|
res_row = df.loc["j", :]
|
||
|
tm.assert_series_equal(res_row, exp_row)
|
||
|
assert isinstance(res_row["cats"], compat.string_types)
|
||
|
|
||
|
# col
|
||
|
res_col = df.loc[:, "cats"]
|
||
|
tm.assert_series_equal(res_col, exp_col)
|
||
|
assert is_categorical_dtype(res_col)
|
||
|
|
||
|
# single value
|
||
|
res_val = df.loc["j", df.columns[0]]
|
||
|
assert res_val == exp_val
|
||
|
|
||
|
# iat
|
||
|
res_val = df.iat[2, 0]
|
||
|
assert res_val == exp_val
|
||
|
|
||
|
# at
|
||
|
res_val = df.at["j", "cats"]
|
||
|
assert res_val == exp_val
|
||
|
|
||
|
# fancy indexing
|
||
|
exp_fancy = df.iloc[[2]]
|
||
|
|
||
|
res_fancy = df[df["cats"] == "b"]
|
||
|
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||
|
res_fancy = df[df["values"] == 3]
|
||
|
tm.assert_frame_equal(res_fancy, exp_fancy)
|
||
|
|
||
|
# get_value
|
||
|
res_val = df.at["j", "cats"]
|
||
|
assert res_val == exp_val
|
||
|
|
||
|
# i : int, slice, or sequence of integers
|
||
|
res_row = df.iloc[2]
|
||
|
tm.assert_series_equal(res_row, exp_row)
|
||
|
assert isinstance(res_row["cats"], compat.string_types)
|
||
|
|
||
|
res_df = df.iloc[slice(2, 4)]
|
||
|
tm.assert_frame_equal(res_df, exp_df)
|
||
|
assert is_categorical_dtype(res_df["cats"])
|
||
|
|
||
|
res_df = df.iloc[[2, 3]]
|
||
|
tm.assert_frame_equal(res_df, exp_df)
|
||
|
assert is_categorical_dtype(res_df["cats"])
|
||
|
|
||
|
res_col = df.iloc[:, 0]
|
||
|
tm.assert_series_equal(res_col, exp_col)
|
||
|
assert is_categorical_dtype(res_col)
|
||
|
|
||
|
res_df = df.iloc[:, slice(0, 2)]
|
||
|
tm.assert_frame_equal(res_df, df)
|
||
|
assert is_categorical_dtype(res_df["cats"])
|
||
|
|
||
|
res_df = df.iloc[:, [0, 1]]
|
||
|
tm.assert_frame_equal(res_df, df)
|
||
|
assert is_categorical_dtype(res_df["cats"])
|
||
|
|
||
|
def test_slicing_doc_examples(self):
|
||
|
|
||
|
# GH 7918
|
||
|
cats = Categorical(["a", "b", "b", "b", "c", "c", "c"],
|
||
|
categories=["a", "b", "c"])
|
||
|
idx = Index(["h", "i", "j", "k", "l", "m", "n", ])
|
||
|
values = [1, 2, 2, 2, 3, 4, 5]
|
||
|
df = DataFrame({"cats": cats, "values": values}, index=idx)
|
||
|
|
||
|
result = df.iloc[2:4, :]
|
||
|
expected = DataFrame(
|
||
|
{"cats": Categorical(['b', 'b'], categories=['a', 'b', 'c']),
|
||
|
"values": [2, 2]}, index=['j', 'k'])
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.iloc[2:4, :].dtypes
|
||
|
expected = Series(['category', 'int64'], ['cats', 'values'])
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = df.loc["h":"j", "cats"]
|
||
|
expected = Series(Categorical(['a', 'b', 'b'],
|
||
|
categories=['a', 'b', 'c']),
|
||
|
index=['h', 'i', 'j'], name='cats')
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = df.loc["h":"j", df.columns[0:1]]
|
||
|
expected = DataFrame({'cats': Categorical(['a', 'b', 'b'],
|
||
|
categories=['a', 'b', 'c'])},
|
||
|
index=['h', 'i', 'j'])
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_getitem_category_type(self):
|
||
|
# GH 14580
|
||
|
# test iloc() on Series with Categorical data
|
||
|
|
||
|
s = Series([1, 2, 3]).astype('category')
|
||
|
|
||
|
# get slice
|
||
|
result = s.iloc[0:2]
|
||
|
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# get list of indexes
|
||
|
result = s.iloc[[0, 1]]
|
||
|
expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# get boolean array
|
||
|
result = s.iloc[[True, False, False]]
|
||
|
expected = Series([1]).astype(CategoricalDtype([1, 2, 3]))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_loc_listlike(self):
|
||
|
|
||
|
# list of labels
|
||
|
result = self.df.loc[['c', 'a']]
|
||
|
expected = self.df.iloc[[4, 0, 1, 5]]
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
result = self.df2.loc[['a', 'b', 'e']]
|
||
|
exp_index = CategoricalIndex(
|
||
|
list('aaabbe'), categories=list('cabe'), name='B')
|
||
|
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
# element in the categories but not in the values
|
||
|
pytest.raises(KeyError, lambda: self.df2.loc['e'])
|
||
|
|
||
|
# assign is ok
|
||
|
df = self.df2.copy()
|
||
|
df.loc['e'] = 20
|
||
|
result = df.loc[['a', 'b', 'e']]
|
||
|
exp_index = CategoricalIndex(
|
||
|
list('aaabbe'), categories=list('cabe'), name='B')
|
||
|
expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index)
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
df = self.df2.copy()
|
||
|
result = df.loc[['a', 'b', 'e']]
|
||
|
exp_index = CategoricalIndex(
|
||
|
list('aaabbe'), categories=list('cabe'), name='B')
|
||
|
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
# not all labels in the categories
|
||
|
with pytest.raises(KeyError):
|
||
|
self.df2.loc[['a', 'd']]
|
||
|
|
||
|
def test_loc_listlike_dtypes(self):
|
||
|
# GH 11586
|
||
|
|
||
|
# unique categories and codes
|
||
|
index = CategoricalIndex(['a', 'b', 'c'])
|
||
|
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
|
||
|
|
||
|
# unique slice
|
||
|
res = df.loc[['a', 'b']]
|
||
|
exp_index = CategoricalIndex(['a', 'b'],
|
||
|
categories=index.categories)
|
||
|
exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index)
|
||
|
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||
|
|
||
|
# duplicated slice
|
||
|
res = df.loc[['a', 'a', 'b']]
|
||
|
|
||
|
exp_index = CategoricalIndex(['a', 'a', 'b'],
|
||
|
categories=index.categories)
|
||
|
exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index)
|
||
|
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||
|
|
||
|
with tm.assert_raises_regex(
|
||
|
KeyError,
|
||
|
'a list-indexer must only include values that are '
|
||
|
'in the categories'):
|
||
|
df.loc[['a', 'x']]
|
||
|
|
||
|
# duplicated categories and codes
|
||
|
index = CategoricalIndex(['a', 'b', 'a'])
|
||
|
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
|
||
|
|
||
|
# unique slice
|
||
|
res = df.loc[['a', 'b']]
|
||
|
exp = DataFrame({'A': [1, 3, 2],
|
||
|
'B': [4, 6, 5]},
|
||
|
index=CategoricalIndex(['a', 'a', 'b']))
|
||
|
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||
|
|
||
|
# duplicated slice
|
||
|
res = df.loc[['a', 'a', 'b']]
|
||
|
exp = DataFrame(
|
||
|
{'A': [1, 3, 1, 3, 2],
|
||
|
'B': [4, 6, 4, 6, 5
|
||
|
]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b']))
|
||
|
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||
|
|
||
|
with tm.assert_raises_regex(
|
||
|
KeyError,
|
||
|
'a list-indexer must only include values '
|
||
|
'that are in the categories'):
|
||
|
df.loc[['a', 'x']]
|
||
|
|
||
|
# contains unused category
|
||
|
index = CategoricalIndex(
|
||
|
['a', 'b', 'a', 'c'], categories=list('abcde'))
|
||
|
df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index)
|
||
|
|
||
|
res = df.loc[['a', 'b']]
|
||
|
exp = DataFrame({'A': [1, 3, 2], 'B': [5, 7, 6]},
|
||
|
index=CategoricalIndex(['a', 'a', 'b'],
|
||
|
categories=list('abcde')))
|
||
|
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||
|
|
||
|
res = df.loc[['a', 'e']]
|
||
|
exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]},
|
||
|
index=CategoricalIndex(['a', 'a', 'e'],
|
||
|
categories=list('abcde')))
|
||
|
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||
|
|
||
|
# duplicated slice
|
||
|
res = df.loc[['a', 'a', 'b']]
|
||
|
exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]},
|
||
|
index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'],
|
||
|
categories=list('abcde')))
|
||
|
tm.assert_frame_equal(res, exp, check_index_type=True)
|
||
|
|
||
|
with tm.assert_raises_regex(
|
||
|
KeyError,
|
||
|
'a list-indexer must only include values '
|
||
|
'that are in the categories'):
|
||
|
df.loc[['a', 'x']]
|
||
|
|
||
|
def test_get_indexer_array(self):
|
||
|
arr = np.array([Timestamp('1999-12-31 00:00:00'),
|
||
|
Timestamp('2000-12-31 00:00:00')], dtype=object)
|
||
|
cats = [Timestamp('1999-12-31 00:00:00'),
|
||
|
Timestamp('2000-12-31 00:00:00')]
|
||
|
ci = CategoricalIndex(cats,
|
||
|
categories=cats,
|
||
|
ordered=False, dtype='category')
|
||
|
result = ci.get_indexer(arr)
|
||
|
expected = np.array([0, 1], dtype='intp')
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
def test_get_indexer_same_categories_same_order(self):
|
||
|
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])
|
||
|
|
||
|
result = ci.get_indexer(CategoricalIndex(['b', 'b'],
|
||
|
categories=['a', 'b']))
|
||
|
expected = np.array([1, 1], dtype='intp')
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
def test_get_indexer_same_categories_different_order(self):
|
||
|
# https://github.com/pandas-dev/pandas/issues/19551
|
||
|
ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'])
|
||
|
|
||
|
result = ci.get_indexer(CategoricalIndex(['b', 'b'],
|
||
|
categories=['b', 'a']))
|
||
|
expected = np.array([1, 1], dtype='intp')
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
|
||
|
def test_getitem_with_listlike(self):
|
||
|
# GH 16115
|
||
|
cats = Categorical([Timestamp('12-31-1999'),
|
||
|
Timestamp('12-31-2000')])
|
||
|
|
||
|
expected = DataFrame([[1, 0], [0, 1]], dtype='uint8',
|
||
|
index=[0, 1], columns=cats)
|
||
|
dummies = pd.get_dummies(cats)
|
||
|
result = dummies[[c for c in dummies.columns]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_setitem_listlike(self):
|
||
|
|
||
|
# GH 9469
|
||
|
# properly coerce the input indexers
|
||
|
np.random.seed(1)
|
||
|
c = Categorical(np.random.randint(0, 5, size=150000).astype(
|
||
|
np.int8)).add_categories([-1000])
|
||
|
indexer = np.array([100000]).astype(np.int64)
|
||
|
c[indexer] = -1000
|
||
|
|
||
|
# we are asserting the code result here
|
||
|
# which maps to the -1000 category
|
||
|
result = c.codes[np.array([100000]).astype(np.int64)]
|
||
|
tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))
|
||
|
|
||
|
def test_ix_categorical_index(self):
|
||
|
# GH 12531
|
||
|
df = DataFrame(np.random.randn(3, 3),
|
||
|
index=list('ABC'), columns=list('XYZ'))
|
||
|
cdf = df.copy()
|
||
|
cdf.index = CategoricalIndex(df.index)
|
||
|
cdf.columns = CategoricalIndex(df.columns)
|
||
|
|
||
|
expect = Series(df.loc['A', :], index=cdf.columns, name='A')
|
||
|
assert_series_equal(cdf.loc['A', :], expect)
|
||
|
|
||
|
expect = Series(df.loc[:, 'X'], index=cdf.index, name='X')
|
||
|
assert_series_equal(cdf.loc[:, 'X'], expect)
|
||
|
|
||
|
exp_index = CategoricalIndex(list('AB'), categories=['A', 'B', 'C'])
|
||
|
expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
|
||
|
index=exp_index)
|
||
|
assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
|
||
|
|
||
|
exp_columns = CategoricalIndex(list('XY'),
|
||
|
categories=['X', 'Y', 'Z'])
|
||
|
expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
|
||
|
columns=exp_columns)
|
||
|
assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
|
||
|
|
||
|
# non-unique
|
||
|
df = DataFrame(np.random.randn(3, 3),
|
||
|
index=list('ABA'), columns=list('XYX'))
|
||
|
cdf = df.copy()
|
||
|
cdf.index = CategoricalIndex(df.index)
|
||
|
cdf.columns = CategoricalIndex(df.columns)
|
||
|
|
||
|
exp_index = CategoricalIndex(list('AA'), categories=['A', 'B'])
|
||
|
expect = DataFrame(df.loc['A', :], columns=cdf.columns,
|
||
|
index=exp_index)
|
||
|
assert_frame_equal(cdf.loc['A', :], expect)
|
||
|
|
||
|
exp_columns = CategoricalIndex(list('XX'), categories=['X', 'Y'])
|
||
|
expect = DataFrame(df.loc[:, 'X'], index=cdf.index,
|
||
|
columns=exp_columns)
|
||
|
assert_frame_equal(cdf.loc[:, 'X'], expect)
|
||
|
|
||
|
expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
|
||
|
index=CategoricalIndex(list('AAB')))
|
||
|
assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
|
||
|
|
||
|
expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
|
||
|
columns=CategoricalIndex(list('XXY')))
|
||
|
assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
|
||
|
|
||
|
def test_read_only_source(self):
|
||
|
# GH 10043
|
||
|
rw_array = np.eye(10)
|
||
|
rw_df = DataFrame(rw_array)
|
||
|
|
||
|
ro_array = np.eye(10)
|
||
|
ro_array.setflags(write=False)
|
||
|
ro_df = DataFrame(ro_array)
|
||
|
|
||
|
assert_frame_equal(rw_df.iloc[[1, 2, 3]], ro_df.iloc[[1, 2, 3]])
|
||
|
assert_frame_equal(rw_df.iloc[[1]], ro_df.iloc[[1]])
|
||
|
assert_series_equal(rw_df.iloc[1], ro_df.iloc[1])
|
||
|
assert_frame_equal(rw_df.iloc[1:3], ro_df.iloc[1:3])
|
||
|
|
||
|
assert_frame_equal(rw_df.loc[[1, 2, 3]], ro_df.loc[[1, 2, 3]])
|
||
|
assert_frame_equal(rw_df.loc[[1]], ro_df.loc[[1]])
|
||
|
assert_series_equal(rw_df.loc[1], ro_df.loc[1])
|
||
|
assert_frame_equal(rw_df.loc[1:3], ro_df.loc[1:3])
|
||
|
|
||
|
def test_reindexing(self):
|
||
|
|
||
|
# reindexing
|
||
|
# convert to a regular index
|
||
|
result = self.df2.reindex(['a', 'b', 'e'])
|
||
|
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
|
||
|
'B': Series(list('aaabbe'))}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
result = self.df2.reindex(['a', 'b'])
|
||
|
expected = DataFrame({'A': [0, 1, 5, 2, 3],
|
||
|
'B': Series(list('aaabb'))}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
result = self.df2.reindex(['e'])
|
||
|
expected = DataFrame({'A': [np.nan],
|
||
|
'B': Series(['e'])}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
result = self.df2.reindex(['d'])
|
||
|
expected = DataFrame({'A': [np.nan],
|
||
|
'B': Series(['d'])}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
# since we are actually reindexing with a Categorical
|
||
|
# then return a Categorical
|
||
|
cats = list('cabe')
|
||
|
|
||
|
result = self.df2.reindex(Categorical(['a', 'd'], categories=cats))
|
||
|
expected = DataFrame({'A': [0, 1, 5, np.nan],
|
||
|
'B': Series(list('aaad')).astype(
|
||
|
CDT(cats))}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
result = self.df2.reindex(Categorical(['a'], categories=cats))
|
||
|
expected = DataFrame({'A': [0, 1, 5],
|
||
|
'B': Series(list('aaa')).astype(
|
||
|
CDT(cats))}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
result = self.df2.reindex(['a', 'b', 'e'])
|
||
|
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan],
|
||
|
'B': Series(list('aaabbe'))}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
result = self.df2.reindex(['a', 'b'])
|
||
|
expected = DataFrame({'A': [0, 1, 5, 2, 3],
|
||
|
'B': Series(list('aaabb'))}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
result = self.df2.reindex(['e'])
|
||
|
expected = DataFrame({'A': [np.nan],
|
||
|
'B': Series(['e'])}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
# give back the type of categorical that we received
|
||
|
result = self.df2.reindex(Categorical(
|
||
|
['a', 'd'], categories=cats, ordered=True))
|
||
|
expected = DataFrame(
|
||
|
{'A': [0, 1, 5, np.nan],
|
||
|
'B': Series(list('aaad')).astype(
|
||
|
CDT(cats, ordered=True))}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
result = self.df2.reindex(Categorical(
|
||
|
['a', 'd'], categories=['a', 'd']))
|
||
|
expected = DataFrame({'A': [0, 1, 5, np.nan],
|
||
|
'B': Series(list('aaad')).astype(
|
||
|
CDT(['a', 'd']))}).set_index('B')
|
||
|
assert_frame_equal(result, expected, check_index_type=True)
|
||
|
|
||
|
# passed duplicate indexers are not allowed
|
||
|
pytest.raises(ValueError, lambda: self.df2.reindex(['a', 'a']))
|
||
|
|
||
|
# args NotImplemented ATM
|
||
|
pytest.raises(NotImplementedError,
|
||
|
lambda: self.df2.reindex(['a'], method='ffill'))
|
||
|
pytest.raises(NotImplementedError,
|
||
|
lambda: self.df2.reindex(['a'], level=1))
|
||
|
pytest.raises(NotImplementedError,
|
||
|
lambda: self.df2.reindex(['a'], limit=2))
|
||
|
|
||
|
def test_loc_slice(self):
|
||
|
# slicing
|
||
|
# not implemented ATM
|
||
|
# GH9748
|
||
|
|
||
|
pytest.raises(TypeError, lambda: self.df.loc[1:5])
|
||
|
|
||
|
# result = df.loc[1:5]
|
||
|
# expected = df.iloc[[1,2,3,4]]
|
||
|
# assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_boolean_selection(self):
|
||
|
|
||
|
df3 = self.df3
|
||
|
df4 = self.df4
|
||
|
|
||
|
result = df3[df3.index == 'a']
|
||
|
expected = df3.iloc[[]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df4[df4.index == 'a']
|
||
|
expected = df4.iloc[[]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df3[df3.index == 1]
|
||
|
expected = df3.iloc[[0, 1, 3]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df4[df4.index == 1]
|
||
|
expected = df4.iloc[[0, 1, 3]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# since we have an ordered categorical
|
||
|
|
||
|
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||
|
# categories=[3, 2, 1],
|
||
|
# ordered=True,
|
||
|
# name=u'B')
|
||
|
result = df3[df3.index < 2]
|
||
|
expected = df3.iloc[[4]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df3[df3.index > 1]
|
||
|
expected = df3.iloc[[]]
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# unordered
|
||
|
# cannot be compared
|
||
|
|
||
|
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||
|
# categories=[3, 2, 1],
|
||
|
# ordered=False,
|
||
|
# name=u'B')
|
||
|
pytest.raises(TypeError, lambda: df4[df4.index < 2])
|
||
|
pytest.raises(TypeError, lambda: df4[df4.index > 1])
|
||
|
|
||
|
def test_indexing_with_category(self):
|
||
|
|
||
|
# https://github.com/pandas-dev/pandas/issues/12564
|
||
|
# consistent result if comparing as Dataframe
|
||
|
|
||
|
cat = DataFrame({'A': ['foo', 'bar', 'baz']})
|
||
|
exp = DataFrame({'A': [True, False, False]})
|
||
|
|
||
|
res = (cat[['A']] == 'foo')
|
||
|
tm.assert_frame_equal(res, exp)
|
||
|
|
||
|
cat['A'] = cat['A'].astype('category')
|
||
|
|
||
|
res = (cat[['A']] == 'foo')
|
||
|
tm.assert_frame_equal(res, exp)
|
||
|
|
||
|
def test_map_with_dict_or_series(self):
|
||
|
orig_values = ['a', 'B', 1, 'a']
|
||
|
new_values = ['one', 2, 3.0, 'one']
|
||
|
cur_index = pd.CategoricalIndex(orig_values, name='XXX')
|
||
|
expected = pd.CategoricalIndex(new_values,
|
||
|
name='XXX', categories=[3.0, 2, 'one'])
|
||
|
|
||
|
mapper = pd.Series(new_values[:-1], index=orig_values[:-1])
|
||
|
output = cur_index.map(mapper)
|
||
|
# Order of categories in output can be different
|
||
|
tm.assert_index_equal(expected, output)
|
||
|
|
||
|
mapper = {o: n for o, n in
|
||
|
zip(orig_values[:-1], new_values[:-1])}
|
||
|
output = cur_index.map(mapper)
|
||
|
# Order of categories in output can be different
|
||
|
tm.assert_index_equal(expected, output)
|