laywerrobot/lib/python3.6/site-packages/pandas/tests/categorical/test_indexing.py

124 lines
4.5 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
# -*- coding: utf-8 -*-
import pytest
import numpy as np
import pandas.util.testing as tm
from pandas import Categorical, Index, CategoricalIndex, PeriodIndex
from pandas.tests.categorical.common import TestCategorical
class TestCategoricalIndexingWithFactor(TestCategorical):
def test_getitem(self):
assert self.factor[0] == 'a'
assert self.factor[-1] == 'c'
subf = self.factor[[0, 1, 2]]
tm.assert_numpy_array_equal(subf._codes,
np.array([0, 1, 1], dtype=np.int8))
subf = self.factor[np.asarray(self.factor) == 'c']
tm.assert_numpy_array_equal(subf._codes,
np.array([2, 2, 2], dtype=np.int8))
def test_setitem(self):
# int/positional
c = self.factor.copy()
c[0] = 'b'
assert c[0] == 'b'
c[-1] = 'a'
assert c[-1] == 'a'
# boolean
c = self.factor.copy()
indexer = np.zeros(len(c), dtype='bool')
indexer[0] = True
indexer[-1] = True
c[indexer] = 'c'
expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
ordered=True)
tm.assert_categorical_equal(c, expected)
class TestCategoricalIndexing(object):
def test_getitem_listlike(self):
# GH 9469
# properly coerce the input indexers
np.random.seed(1)
c = Categorical(np.random.randint(0, 5, size=150000).astype(np.int8))
result = c.codes[np.array([100000]).astype(np.int64)]
expected = c[np.array([100000]).astype(np.int64)].codes
tm.assert_numpy_array_equal(result, expected)
def test_periodindex(self):
idx1 = PeriodIndex(['2014-01', '2014-01', '2014-02', '2014-02',
'2014-03', '2014-03'], freq='M')
cat1 = Categorical(idx1)
str(cat1)
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.int8)
exp_idx = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
tm.assert_numpy_array_equal(cat1._codes, exp_arr)
tm.assert_index_equal(cat1.categories, exp_idx)
idx2 = PeriodIndex(['2014-03', '2014-03', '2014-02', '2014-01',
'2014-03', '2014-01'], freq='M')
cat2 = Categorical(idx2, ordered=True)
str(cat2)
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.int8)
exp_idx2 = PeriodIndex(['2014-01', '2014-02', '2014-03'], freq='M')
tm.assert_numpy_array_equal(cat2._codes, exp_arr)
tm.assert_index_equal(cat2.categories, exp_idx2)
idx3 = PeriodIndex(['2013-12', '2013-11', '2013-10', '2013-09',
'2013-08', '2013-07', '2013-05'], freq='M')
cat3 = Categorical(idx3, ordered=True)
exp_arr = np.array([6, 5, 4, 3, 2, 1, 0], dtype=np.int8)
exp_idx = PeriodIndex(['2013-05', '2013-07', '2013-08', '2013-09',
'2013-10', '2013-11', '2013-12'], freq='M')
tm.assert_numpy_array_equal(cat3._codes, exp_arr)
tm.assert_index_equal(cat3.categories, exp_idx)
def test_categories_assigments(self):
s = Categorical(["a", "b", "c", "a"])
exp = np.array([1, 2, 3, 1], dtype=np.int64)
s.categories = [1, 2, 3]
tm.assert_numpy_array_equal(s.__array__(), exp)
tm.assert_index_equal(s.categories, Index([1, 2, 3]))
# lengthen
def f():
s.categories = [1, 2, 3, 4]
pytest.raises(ValueError, f)
# shorten
def f():
s.categories = [1, 2]
pytest.raises(ValueError, f)
# Combinations of sorted/unique:
@pytest.mark.parametrize("idx_values", [[1, 2, 3, 4], [1, 3, 2, 4],
[1, 3, 3, 4], [1, 2, 2, 4]])
# Combinations of missing/unique
@pytest.mark.parametrize("key_values", [[1, 2], [1, 5], [1, 1], [5, 5]])
@pytest.mark.parametrize("key_class", [Categorical, CategoricalIndex])
def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
# GH 21448
key = key_class(key_values, categories=range(1, 5))
# Test for flat index and CategoricalIndex with same/different cats:
for dtype in None, 'category', key.dtype:
idx = Index(idx_values, dtype=dtype)
expected, exp_miss = idx.get_indexer_non_unique(key_values)
result, res_miss = idx.get_indexer_non_unique(key)
tm.assert_numpy_array_equal(expected, result)
tm.assert_numpy_array_equal(exp_miss, res_miss)