114 lines
4 KiB
Python
114 lines
4 KiB
Python
|
import pytest
|
||
|
import numpy as np
|
||
|
|
||
|
import pandas as pd
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize('ordered', [True, False])
|
||
|
@pytest.mark.parametrize('categories', [
|
||
|
['b', 'a', 'c'],
|
||
|
['a', 'b', 'c', 'd'],
|
||
|
])
|
||
|
def test_factorize(categories, ordered):
|
||
|
cat = pd.Categorical(['b', 'b', 'a', 'c', None],
|
||
|
categories=categories,
|
||
|
ordered=ordered)
|
||
|
labels, uniques = pd.factorize(cat)
|
||
|
expected_labels = np.array([0, 0, 1, 2, -1], dtype=np.intp)
|
||
|
expected_uniques = pd.Categorical(['b', 'a', 'c'],
|
||
|
categories=categories,
|
||
|
ordered=ordered)
|
||
|
|
||
|
tm.assert_numpy_array_equal(labels, expected_labels)
|
||
|
tm.assert_categorical_equal(uniques, expected_uniques)
|
||
|
|
||
|
|
||
|
def test_factorized_sort():
|
||
|
cat = pd.Categorical(['b', 'b', None, 'a'])
|
||
|
labels, uniques = pd.factorize(cat, sort=True)
|
||
|
expected_labels = np.array([1, 1, -1, 0], dtype=np.intp)
|
||
|
expected_uniques = pd.Categorical(['a', 'b'])
|
||
|
|
||
|
tm.assert_numpy_array_equal(labels, expected_labels)
|
||
|
tm.assert_categorical_equal(uniques, expected_uniques)
|
||
|
|
||
|
|
||
|
def test_factorized_sort_ordered():
|
||
|
cat = pd.Categorical(['b', 'b', None, 'a'],
|
||
|
categories=['c', 'b', 'a'],
|
||
|
ordered=True)
|
||
|
|
||
|
labels, uniques = pd.factorize(cat, sort=True)
|
||
|
expected_labels = np.array([0, 0, -1, 1], dtype=np.intp)
|
||
|
expected_uniques = pd.Categorical(['b', 'a'],
|
||
|
categories=['c', 'b', 'a'],
|
||
|
ordered=True)
|
||
|
|
||
|
tm.assert_numpy_array_equal(labels, expected_labels)
|
||
|
tm.assert_categorical_equal(uniques, expected_uniques)
|
||
|
|
||
|
|
||
|
def test_isin_cats():
|
||
|
# GH2003
|
||
|
cat = pd.Categorical(["a", "b", np.nan])
|
||
|
|
||
|
result = cat.isin(["a", np.nan])
|
||
|
expected = np.array([True, False, True], dtype=bool)
|
||
|
tm.assert_numpy_array_equal(expected, result)
|
||
|
|
||
|
result = cat.isin(["a", "c"])
|
||
|
expected = np.array([True, False, False], dtype=bool)
|
||
|
tm.assert_numpy_array_equal(expected, result)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("empty", [[], pd.Series(), np.array([])])
|
||
|
def test_isin_empty(empty):
|
||
|
s = pd.Categorical(["a", "b"])
|
||
|
expected = np.array([False, False], dtype=bool)
|
||
|
|
||
|
result = s.isin(empty)
|
||
|
tm.assert_numpy_array_equal(expected, result)
|
||
|
|
||
|
|
||
|
class TestTake(object):
|
||
|
# https://github.com/pandas-dev/pandas/issues/20664
|
||
|
|
||
|
def test_take_warns(self):
|
||
|
cat = pd.Categorical(['a', 'b'])
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
cat.take([0, -1])
|
||
|
|
||
|
def test_take_positive_no_warning(self):
|
||
|
cat = pd.Categorical(['a', 'b'])
|
||
|
with tm.assert_produces_warning(None):
|
||
|
cat.take([0, 0])
|
||
|
|
||
|
def test_take_bounds(self, allow_fill):
|
||
|
# https://github.com/pandas-dev/pandas/issues/20664
|
||
|
cat = pd.Categorical(['a', 'b', 'a'])
|
||
|
with pytest.raises(IndexError):
|
||
|
cat.take([4, 5], allow_fill=allow_fill)
|
||
|
|
||
|
def test_take_empty(self, allow_fill):
|
||
|
# https://github.com/pandas-dev/pandas/issues/20664
|
||
|
cat = pd.Categorical([], categories=['a', 'b'])
|
||
|
with pytest.raises(IndexError):
|
||
|
cat.take([0], allow_fill=allow_fill)
|
||
|
|
||
|
def test_positional_take(self, ordered):
|
||
|
cat = pd.Categorical(['a', 'a', 'b', 'b'], categories=['b', 'a'],
|
||
|
ordered=ordered)
|
||
|
result = cat.take([0, 1, 2], allow_fill=False)
|
||
|
expected = pd.Categorical(['a', 'a', 'b'], categories=cat.categories,
|
||
|
ordered=ordered)
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
|
||
|
def test_positional_take_unobserved(self, ordered):
|
||
|
cat = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c'],
|
||
|
ordered=ordered)
|
||
|
result = cat.take([1, 0], allow_fill=False)
|
||
|
expected = pd.Categorical(['b', 'a'], categories=cat.categories,
|
||
|
ordered=ordered)
|
||
|
tm.assert_categorical_equal(result, expected)
|