1244 lines
45 KiB
Python
1244 lines
45 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import re
|
||
|
import sys
|
||
|
from datetime import datetime, timedelta
|
||
|
import pytest
|
||
|
import numpy as np
|
||
|
|
||
|
import pandas as pd
|
||
|
import pandas.compat as compat
|
||
|
from pandas.core.dtypes.common import (
|
||
|
is_object_dtype, is_datetimetz, is_datetime64_dtype,
|
||
|
needs_i8_conversion)
|
||
|
import pandas.util.testing as tm
|
||
|
from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex,
|
||
|
PeriodIndex, Timedelta, IntervalIndex, Interval,
|
||
|
CategoricalIndex, Timestamp)
|
||
|
from pandas.compat import StringIO, PYPY, long
|
||
|
from pandas.compat.numpy import np_array_datetime64_compat
|
||
|
from pandas.core.accessor import PandasDelegate
|
||
|
from pandas.core.base import PandasObject, NoNewAttributesMixin
|
||
|
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
|
||
|
from pandas._libs.tslib import iNaT
|
||
|
|
||
|
|
||
|
class CheckStringMixin(object):
|
||
|
|
||
|
def test_string_methods_dont_fail(self):
|
||
|
repr(self.container)
|
||
|
str(self.container)
|
||
|
bytes(self.container)
|
||
|
if not compat.PY3:
|
||
|
unicode(self.container) # noqa
|
||
|
|
||
|
def test_tricky_container(self):
|
||
|
if not hasattr(self, 'unicode_container'):
|
||
|
pytest.skip('Need unicode_container to test with this')
|
||
|
repr(self.unicode_container)
|
||
|
str(self.unicode_container)
|
||
|
bytes(self.unicode_container)
|
||
|
if not compat.PY3:
|
||
|
unicode(self.unicode_container) # noqa
|
||
|
|
||
|
|
||
|
class CheckImmutable(object):
|
||
|
mutable_regex = re.compile('does not support mutable operations')
|
||
|
|
||
|
def check_mutable_error(self, *args, **kwargs):
|
||
|
# Pass whatever function you normally would to assert_raises_regex
|
||
|
# (after the Exception kind).
|
||
|
tm.assert_raises_regex(
|
||
|
TypeError, self.mutable_regex, *args, **kwargs)
|
||
|
|
||
|
def test_no_mutable_funcs(self):
|
||
|
def setitem():
|
||
|
self.container[0] = 5
|
||
|
|
||
|
self.check_mutable_error(setitem)
|
||
|
|
||
|
def setslice():
|
||
|
self.container[1:2] = 3
|
||
|
|
||
|
self.check_mutable_error(setslice)
|
||
|
|
||
|
def delitem():
|
||
|
del self.container[0]
|
||
|
|
||
|
self.check_mutable_error(delitem)
|
||
|
|
||
|
def delslice():
|
||
|
del self.container[0:3]
|
||
|
|
||
|
self.check_mutable_error(delslice)
|
||
|
mutable_methods = getattr(self, "mutable_methods", [])
|
||
|
|
||
|
for meth in mutable_methods:
|
||
|
self.check_mutable_error(getattr(self.container, meth))
|
||
|
|
||
|
def test_slicing_maintains_type(self):
|
||
|
result = self.container[1:2]
|
||
|
expected = self.lst[1:2]
|
||
|
self.check_result(result, expected)
|
||
|
|
||
|
def check_result(self, result, expected, klass=None):
|
||
|
klass = klass or self.klass
|
||
|
assert isinstance(result, klass)
|
||
|
assert result == expected
|
||
|
|
||
|
|
||
|
class TestPandasDelegate(object):
|
||
|
|
||
|
class Delegator(object):
|
||
|
_properties = ['foo']
|
||
|
_methods = ['bar']
|
||
|
|
||
|
def _set_foo(self, value):
|
||
|
self.foo = value
|
||
|
|
||
|
def _get_foo(self):
|
||
|
return self.foo
|
||
|
|
||
|
foo = property(_get_foo, _set_foo, doc="foo property")
|
||
|
|
||
|
def bar(self, *args, **kwargs):
|
||
|
""" a test bar method """
|
||
|
pass
|
||
|
|
||
|
class Delegate(PandasDelegate, PandasObject):
|
||
|
|
||
|
def __init__(self, obj):
|
||
|
self.obj = obj
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
pass
|
||
|
|
||
|
def test_invalid_delegation(self):
|
||
|
# these show that in order for the delegation to work
|
||
|
# the _delegate_* methods need to be overridden to not raise
|
||
|
# a TypeError
|
||
|
|
||
|
self.Delegate._add_delegate_accessors(
|
||
|
delegate=self.Delegator,
|
||
|
accessors=self.Delegator._properties,
|
||
|
typ='property'
|
||
|
)
|
||
|
self.Delegate._add_delegate_accessors(
|
||
|
delegate=self.Delegator,
|
||
|
accessors=self.Delegator._methods,
|
||
|
typ='method'
|
||
|
)
|
||
|
|
||
|
delegate = self.Delegate(self.Delegator())
|
||
|
|
||
|
def f():
|
||
|
delegate.foo
|
||
|
|
||
|
pytest.raises(TypeError, f)
|
||
|
|
||
|
def f():
|
||
|
delegate.foo = 5
|
||
|
|
||
|
pytest.raises(TypeError, f)
|
||
|
|
||
|
def f():
|
||
|
delegate.foo()
|
||
|
|
||
|
pytest.raises(TypeError, f)
|
||
|
|
||
|
@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
|
||
|
def test_memory_usage(self):
|
||
|
# Delegate does not implement memory_usage.
|
||
|
# Check that we fall back to in-built `__sizeof__`
|
||
|
# GH 12924
|
||
|
delegate = self.Delegate(self.Delegator())
|
||
|
sys.getsizeof(delegate)
|
||
|
|
||
|
|
||
|
class Ops(object):
|
||
|
|
||
|
def _allow_na_ops(self, obj):
|
||
|
"""Whether to skip test cases including NaN"""
|
||
|
if (isinstance(obj, Index) and
|
||
|
(obj.is_boolean() or not obj._can_hold_na)):
|
||
|
# don't test boolean / int64 index
|
||
|
return False
|
||
|
return True
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
self.bool_index = tm.makeBoolIndex(10, name='a')
|
||
|
self.int_index = tm.makeIntIndex(10, name='a')
|
||
|
self.float_index = tm.makeFloatIndex(10, name='a')
|
||
|
self.dt_index = tm.makeDateIndex(10, name='a')
|
||
|
self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize(
|
||
|
tz='US/Eastern')
|
||
|
self.period_index = tm.makePeriodIndex(10, name='a')
|
||
|
self.string_index = tm.makeStringIndex(10, name='a')
|
||
|
self.unicode_index = tm.makeUnicodeIndex(10, name='a')
|
||
|
|
||
|
arr = np.random.randn(10)
|
||
|
self.int_series = Series(arr, index=self.int_index, name='a')
|
||
|
self.float_series = Series(arr, index=self.float_index, name='a')
|
||
|
self.dt_series = Series(arr, index=self.dt_index, name='a')
|
||
|
self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
|
||
|
self.period_series = Series(arr, index=self.period_index, name='a')
|
||
|
self.string_series = Series(arr, index=self.string_index, name='a')
|
||
|
|
||
|
types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string',
|
||
|
'unicode']
|
||
|
fmts = ["{0}_{1}".format(t, f)
|
||
|
for t in types for f in ['index', 'series']]
|
||
|
self.objs = [getattr(self, f)
|
||
|
for f in fmts if getattr(self, f, None) is not None]
|
||
|
|
||
|
def check_ops_properties(self, props, filter=None, ignore_failures=False):
|
||
|
for op in props:
|
||
|
for o in self.is_valid_objs:
|
||
|
|
||
|
# if a filter, skip if it doesn't match
|
||
|
if filter is not None:
|
||
|
filt = o.index if isinstance(o, Series) else o
|
||
|
if not filter(filt):
|
||
|
continue
|
||
|
|
||
|
try:
|
||
|
if isinstance(o, Series):
|
||
|
expected = Series(
|
||
|
getattr(o.index, op), index=o.index, name='a')
|
||
|
else:
|
||
|
expected = getattr(o, op)
|
||
|
except (AttributeError):
|
||
|
if ignore_failures:
|
||
|
continue
|
||
|
|
||
|
result = getattr(o, op)
|
||
|
|
||
|
# these couuld be series, arrays or scalars
|
||
|
if isinstance(result, Series) and isinstance(expected, Series):
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
elif isinstance(result, Index) and isinstance(expected, Index):
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
elif isinstance(result, np.ndarray) and isinstance(expected,
|
||
|
np.ndarray):
|
||
|
tm.assert_numpy_array_equal(result, expected)
|
||
|
else:
|
||
|
assert result == expected
|
||
|
|
||
|
# freq raises AttributeError on an Int64Index because its not
|
||
|
# defined we mostly care about Series here anyhow
|
||
|
if not ignore_failures:
|
||
|
for o in self.not_valid_objs:
|
||
|
|
||
|
# an object that is datetimelike will raise a TypeError,
|
||
|
# otherwise an AttributeError
|
||
|
if issubclass(type(o), DatetimeIndexOpsMixin):
|
||
|
pytest.raises(TypeError, lambda: getattr(o, op))
|
||
|
else:
|
||
|
pytest.raises(AttributeError,
|
||
|
lambda: getattr(o, op))
|
||
|
|
||
|
def test_binary_ops_docs(self):
|
||
|
from pandas import DataFrame, Panel
|
||
|
op_map = {'add': '+',
|
||
|
'sub': '-',
|
||
|
'mul': '*',
|
||
|
'mod': '%',
|
||
|
'pow': '**',
|
||
|
'truediv': '/',
|
||
|
'floordiv': '//'}
|
||
|
for op_name in ['add', 'sub', 'mul', 'mod', 'pow', 'truediv',
|
||
|
'floordiv']:
|
||
|
for klass in [Series, DataFrame, Panel]:
|
||
|
operand1 = klass.__name__.lower()
|
||
|
operand2 = 'other'
|
||
|
op = op_map[op_name]
|
||
|
expected_str = ' '.join([operand1, op, operand2])
|
||
|
assert expected_str in getattr(klass, op_name).__doc__
|
||
|
|
||
|
# reverse version of the binary ops
|
||
|
expected_str = ' '.join([operand2, op, operand1])
|
||
|
assert expected_str in getattr(klass, 'r' + op_name).__doc__
|
||
|
|
||
|
|
||
|
class TestIndexOps(Ops):
|
||
|
|
||
|
def setup_method(self, method):
|
||
|
super(TestIndexOps, self).setup_method(method)
|
||
|
self.is_valid_objs = self.objs
|
||
|
self.not_valid_objs = []
|
||
|
|
||
|
def test_none_comparison(self):
|
||
|
|
||
|
# bug brought up by #1079
|
||
|
# changed from TypeError in 0.17.0
|
||
|
for o in self.is_valid_objs:
|
||
|
if isinstance(o, Series):
|
||
|
|
||
|
o[0] = np.nan
|
||
|
|
||
|
# noinspection PyComparisonWithNone
|
||
|
result = o == None # noqa
|
||
|
assert not result.iat[0]
|
||
|
assert not result.iat[1]
|
||
|
|
||
|
# noinspection PyComparisonWithNone
|
||
|
result = o != None # noqa
|
||
|
assert result.iat[0]
|
||
|
assert result.iat[1]
|
||
|
|
||
|
result = None == o # noqa
|
||
|
assert not result.iat[0]
|
||
|
assert not result.iat[1]
|
||
|
|
||
|
# this fails for numpy < 1.9
|
||
|
# and oddly for *some* platforms
|
||
|
# result = None != o # noqa
|
||
|
# assert result.iat[0]
|
||
|
# assert result.iat[1]
|
||
|
if (is_datetime64_dtype(o) or is_datetimetz(o)):
|
||
|
# Following DatetimeIndex (and Timestamp) convention,
|
||
|
# inequality comparisons with Series[datetime64] raise
|
||
|
with pytest.raises(TypeError):
|
||
|
None > o
|
||
|
with pytest.raises(TypeError):
|
||
|
o > None
|
||
|
else:
|
||
|
result = None > o
|
||
|
assert not result.iat[0]
|
||
|
assert not result.iat[1]
|
||
|
|
||
|
result = o < None
|
||
|
assert not result.iat[0]
|
||
|
assert not result.iat[1]
|
||
|
|
||
|
def test_ndarray_compat_properties(self):
|
||
|
|
||
|
for o in self.objs:
|
||
|
# Check that we work.
|
||
|
for p in ['shape', 'dtype', 'T', 'nbytes']:
|
||
|
assert getattr(o, p, None) is not None
|
||
|
|
||
|
# deprecated properties
|
||
|
for p in ['flags', 'strides', 'itemsize']:
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
assert getattr(o, p, None) is not None
|
||
|
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
assert hasattr(o, 'base')
|
||
|
|
||
|
# If we have a datetime-like dtype then needs a view to work
|
||
|
# but the user is responsible for that
|
||
|
try:
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
assert o.data is not None
|
||
|
except ValueError:
|
||
|
pass
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
o.item() # len > 1
|
||
|
|
||
|
assert o.ndim == 1
|
||
|
assert o.size == len(o)
|
||
|
|
||
|
assert Index([1]).item() == 1
|
||
|
assert Series([1]).item() == 1
|
||
|
|
||
|
def test_ops(self):
|
||
|
for op in ['max', 'min']:
|
||
|
for o in self.objs:
|
||
|
result = getattr(o, op)()
|
||
|
if not isinstance(o, PeriodIndex):
|
||
|
expected = getattr(o.values, op)()
|
||
|
else:
|
||
|
expected = pd.Period(
|
||
|
ordinal=getattr(o._ndarray_values, op)(),
|
||
|
freq=o.freq)
|
||
|
try:
|
||
|
assert result == expected
|
||
|
except TypeError:
|
||
|
# comparing tz-aware series with np.array results in
|
||
|
# TypeError
|
||
|
expected = expected.astype('M8[ns]').astype('int64')
|
||
|
assert result.value == expected
|
||
|
|
||
|
def test_nanops(self):
|
||
|
# GH 7261
|
||
|
for op in ['max', 'min']:
|
||
|
for klass in [Index, Series]:
|
||
|
|
||
|
obj = klass([np.nan, 2.0])
|
||
|
assert getattr(obj, op)() == 2.0
|
||
|
|
||
|
obj = klass([np.nan])
|
||
|
assert pd.isna(getattr(obj, op)())
|
||
|
|
||
|
obj = klass([])
|
||
|
assert pd.isna(getattr(obj, op)())
|
||
|
|
||
|
obj = klass([pd.NaT, datetime(2011, 11, 1)])
|
||
|
# check DatetimeIndex monotonic path
|
||
|
assert getattr(obj, op)() == datetime(2011, 11, 1)
|
||
|
|
||
|
obj = klass([pd.NaT, datetime(2011, 11, 1), pd.NaT])
|
||
|
# check DatetimeIndex non-monotonic path
|
||
|
assert getattr(obj, op)(), datetime(2011, 11, 1)
|
||
|
|
||
|
# argmin/max
|
||
|
obj = Index(np.arange(5, dtype='int64'))
|
||
|
assert obj.argmin() == 0
|
||
|
assert obj.argmax() == 4
|
||
|
|
||
|
obj = Index([np.nan, 1, np.nan, 2])
|
||
|
assert obj.argmin() == 1
|
||
|
assert obj.argmax() == 3
|
||
|
|
||
|
obj = Index([np.nan])
|
||
|
assert obj.argmin() == -1
|
||
|
assert obj.argmax() == -1
|
||
|
|
||
|
obj = Index([pd.NaT, datetime(2011, 11, 1), datetime(2011, 11, 2),
|
||
|
pd.NaT])
|
||
|
assert obj.argmin() == 1
|
||
|
assert obj.argmax() == 2
|
||
|
|
||
|
obj = Index([pd.NaT])
|
||
|
assert obj.argmin() == -1
|
||
|
assert obj.argmax() == -1
|
||
|
|
||
|
def test_value_counts_unique_nunique(self):
|
||
|
for orig in self.objs:
|
||
|
o = orig.copy()
|
||
|
klass = type(o)
|
||
|
values = o._values
|
||
|
|
||
|
if isinstance(values, Index):
|
||
|
# reset name not to affect latter process
|
||
|
values.name = None
|
||
|
|
||
|
# create repeated values, 'n'th element is repeated by n+1 times
|
||
|
# skip boolean, because it only has 2 values at most
|
||
|
if isinstance(o, Index) and o.is_boolean():
|
||
|
continue
|
||
|
elif isinstance(o, Index):
|
||
|
expected_index = Index(o[::-1])
|
||
|
expected_index.name = None
|
||
|
o = o.repeat(range(1, len(o) + 1))
|
||
|
o.name = 'a'
|
||
|
else:
|
||
|
expected_index = Index(values[::-1])
|
||
|
idx = o.index.repeat(range(1, len(o) + 1))
|
||
|
rep = np.repeat(values, range(1, len(o) + 1))
|
||
|
o = klass(rep, index=idx, name='a')
|
||
|
|
||
|
# check values has the same dtype as the original
|
||
|
assert o.dtype == orig.dtype
|
||
|
|
||
|
expected_s = Series(range(10, 0, -1), index=expected_index,
|
||
|
dtype='int64', name='a')
|
||
|
|
||
|
result = o.value_counts()
|
||
|
tm.assert_series_equal(result, expected_s)
|
||
|
assert result.index.name is None
|
||
|
assert result.name == 'a'
|
||
|
|
||
|
result = o.unique()
|
||
|
if isinstance(o, Index):
|
||
|
assert isinstance(result, o.__class__)
|
||
|
tm.assert_index_equal(result, orig)
|
||
|
elif is_datetimetz(o):
|
||
|
# datetimetz Series returns array of Timestamp
|
||
|
assert result[0] == orig[0]
|
||
|
for r in result:
|
||
|
assert isinstance(r, Timestamp)
|
||
|
tm.assert_numpy_array_equal(result,
|
||
|
orig._values.astype(object).values)
|
||
|
else:
|
||
|
tm.assert_numpy_array_equal(result, orig.values)
|
||
|
|
||
|
assert o.nunique() == len(np.unique(o.values))
|
||
|
|
||
|
def test_value_counts_unique_nunique_null(self):
|
||
|
|
||
|
for null_obj in [np.nan, None]:
|
||
|
for orig in self.objs:
|
||
|
o = orig.copy()
|
||
|
klass = type(o)
|
||
|
values = o._ndarray_values
|
||
|
|
||
|
if not self._allow_na_ops(o):
|
||
|
continue
|
||
|
|
||
|
# special assign to the numpy array
|
||
|
if is_datetimetz(o):
|
||
|
if isinstance(o, DatetimeIndex):
|
||
|
v = o.asi8
|
||
|
v[0:2] = iNaT
|
||
|
values = o._shallow_copy(v)
|
||
|
else:
|
||
|
o = o.copy()
|
||
|
o[0:2] = iNaT
|
||
|
values = o._values
|
||
|
|
||
|
elif needs_i8_conversion(o):
|
||
|
values[0:2] = iNaT
|
||
|
values = o._shallow_copy(values)
|
||
|
else:
|
||
|
values[0:2] = null_obj
|
||
|
# check values has the same dtype as the original
|
||
|
|
||
|
assert values.dtype == o.dtype
|
||
|
|
||
|
# create repeated values, 'n'th element is repeated by n+1
|
||
|
# times
|
||
|
if isinstance(o, (DatetimeIndex, PeriodIndex)):
|
||
|
expected_index = o.copy()
|
||
|
expected_index.name = None
|
||
|
|
||
|
# attach name to klass
|
||
|
o = klass(values.repeat(range(1, len(o) + 1)))
|
||
|
o.name = 'a'
|
||
|
else:
|
||
|
if is_datetimetz(o):
|
||
|
expected_index = orig._values._shallow_copy(values)
|
||
|
else:
|
||
|
expected_index = Index(values)
|
||
|
expected_index.name = None
|
||
|
o = o.repeat(range(1, len(o) + 1))
|
||
|
o.name = 'a'
|
||
|
|
||
|
# check values has the same dtype as the original
|
||
|
assert o.dtype == orig.dtype
|
||
|
# check values correctly have NaN
|
||
|
nanloc = np.zeros(len(o), dtype=np.bool)
|
||
|
nanloc[:3] = True
|
||
|
if isinstance(o, Index):
|
||
|
tm.assert_numpy_array_equal(pd.isna(o), nanloc)
|
||
|
else:
|
||
|
exp = Series(nanloc, o.index, name='a')
|
||
|
tm.assert_series_equal(pd.isna(o), exp)
|
||
|
|
||
|
expected_s_na = Series(list(range(10, 2, -1)) + [3],
|
||
|
index=expected_index[9:0:-1],
|
||
|
dtype='int64', name='a')
|
||
|
expected_s = Series(list(range(10, 2, -1)),
|
||
|
index=expected_index[9:1:-1],
|
||
|
dtype='int64', name='a')
|
||
|
|
||
|
result_s_na = o.value_counts(dropna=False)
|
||
|
tm.assert_series_equal(result_s_na, expected_s_na)
|
||
|
assert result_s_na.index.name is None
|
||
|
assert result_s_na.name == 'a'
|
||
|
result_s = o.value_counts()
|
||
|
tm.assert_series_equal(o.value_counts(), expected_s)
|
||
|
assert result_s.index.name is None
|
||
|
assert result_s.name == 'a'
|
||
|
|
||
|
result = o.unique()
|
||
|
if isinstance(o, Index):
|
||
|
tm.assert_index_equal(result,
|
||
|
Index(values[1:], name='a'))
|
||
|
elif is_datetimetz(o):
|
||
|
# unable to compare NaT / nan
|
||
|
vals = values[2:].astype(object).values
|
||
|
tm.assert_numpy_array_equal(result[1:], vals)
|
||
|
assert result[0] is pd.NaT
|
||
|
else:
|
||
|
tm.assert_numpy_array_equal(result[1:], values[2:])
|
||
|
|
||
|
assert pd.isna(result[0])
|
||
|
assert result.dtype == orig.dtype
|
||
|
|
||
|
assert o.nunique() == 8
|
||
|
assert o.nunique(dropna=False) == 9
|
||
|
|
||
|
def test_value_counts_inferred(self):
|
||
|
klasses = [Index, Series]
|
||
|
for klass in klasses:
|
||
|
s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
|
||
|
s = klass(s_values)
|
||
|
expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
|
||
|
tm.assert_series_equal(s.value_counts(), expected)
|
||
|
|
||
|
if isinstance(s, Index):
|
||
|
exp = Index(np.unique(np.array(s_values, dtype=np.object_)))
|
||
|
tm.assert_index_equal(s.unique(), exp)
|
||
|
else:
|
||
|
exp = np.unique(np.array(s_values, dtype=np.object_))
|
||
|
tm.assert_numpy_array_equal(s.unique(), exp)
|
||
|
|
||
|
assert s.nunique() == 4
|
||
|
# don't sort, have to sort after the fact as not sorting is
|
||
|
# platform-dep
|
||
|
hist = s.value_counts(sort=False).sort_values()
|
||
|
expected = Series([3, 1, 4, 2], index=list('acbd')).sort_values()
|
||
|
tm.assert_series_equal(hist, expected)
|
||
|
|
||
|
# sort ascending
|
||
|
hist = s.value_counts(ascending=True)
|
||
|
expected = Series([1, 2, 3, 4], index=list('cdab'))
|
||
|
tm.assert_series_equal(hist, expected)
|
||
|
|
||
|
# relative histogram.
|
||
|
hist = s.value_counts(normalize=True)
|
||
|
expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
|
||
|
tm.assert_series_equal(hist, expected)
|
||
|
|
||
|
def test_value_counts_bins(self):
|
||
|
klasses = [Index, Series]
|
||
|
for klass in klasses:
|
||
|
s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a']
|
||
|
s = klass(s_values)
|
||
|
|
||
|
# bins
|
||
|
pytest.raises(TypeError, lambda bins: s.value_counts(bins=bins), 1)
|
||
|
|
||
|
s1 = Series([1, 1, 2, 3])
|
||
|
res1 = s1.value_counts(bins=1)
|
||
|
exp1 = Series({Interval(0.997, 3.0): 4})
|
||
|
tm.assert_series_equal(res1, exp1)
|
||
|
res1n = s1.value_counts(bins=1, normalize=True)
|
||
|
exp1n = Series({Interval(0.997, 3.0): 1.0})
|
||
|
tm.assert_series_equal(res1n, exp1n)
|
||
|
|
||
|
if isinstance(s1, Index):
|
||
|
tm.assert_index_equal(s1.unique(), Index([1, 2, 3]))
|
||
|
else:
|
||
|
exp = np.array([1, 2, 3], dtype=np.int64)
|
||
|
tm.assert_numpy_array_equal(s1.unique(), exp)
|
||
|
|
||
|
assert s1.nunique() == 3
|
||
|
|
||
|
# these return the same
|
||
|
res4 = s1.value_counts(bins=4, dropna=True)
|
||
|
intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0])
|
||
|
exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2]))
|
||
|
tm.assert_series_equal(res4, exp4)
|
||
|
|
||
|
res4 = s1.value_counts(bins=4, dropna=False)
|
||
|
intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0])
|
||
|
exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2]))
|
||
|
tm.assert_series_equal(res4, exp4)
|
||
|
|
||
|
res4n = s1.value_counts(bins=4, normalize=True)
|
||
|
exp4n = Series([0.5, 0.25, 0.25, 0],
|
||
|
index=intervals.take([0, 3, 1, 2]))
|
||
|
tm.assert_series_equal(res4n, exp4n)
|
||
|
|
||
|
# handle NA's properly
|
||
|
s_values = ['a', 'b', 'b', 'b', np.nan, np.nan,
|
||
|
'd', 'd', 'a', 'a', 'b']
|
||
|
s = klass(s_values)
|
||
|
expected = Series([4, 3, 2], index=['b', 'a', 'd'])
|
||
|
tm.assert_series_equal(s.value_counts(), expected)
|
||
|
|
||
|
if isinstance(s, Index):
|
||
|
exp = Index(['a', 'b', np.nan, 'd'])
|
||
|
tm.assert_index_equal(s.unique(), exp)
|
||
|
else:
|
||
|
exp = np.array(['a', 'b', np.nan, 'd'], dtype=object)
|
||
|
tm.assert_numpy_array_equal(s.unique(), exp)
|
||
|
assert s.nunique() == 3
|
||
|
|
||
|
s = klass({})
|
||
|
expected = Series([], dtype=np.int64)
|
||
|
tm.assert_series_equal(s.value_counts(), expected,
|
||
|
check_index_type=False)
|
||
|
# returned dtype differs depending on original
|
||
|
if isinstance(s, Index):
|
||
|
tm.assert_index_equal(s.unique(), Index([]), exact=False)
|
||
|
else:
|
||
|
tm.assert_numpy_array_equal(s.unique(), np.array([]),
|
||
|
check_dtype=False)
|
||
|
|
||
|
assert s.nunique() == 0
|
||
|
|
||
|
@pytest.mark.parametrize('klass', [Index, Series])
|
||
|
def test_value_counts_datetime64(self, klass):
|
||
|
|
||
|
# GH 3002, datetime64[ns]
|
||
|
# don't test names though
|
||
|
txt = "\n".join(['xxyyzz20100101PIE', 'xxyyzz20100101GUM',
|
||
|
'xxyyzz20100101EGG', 'xxyyww20090101EGG',
|
||
|
'foofoo20080909PIE', 'foofoo20080909GUM'])
|
||
|
f = StringIO(txt)
|
||
|
df = pd.read_fwf(f, widths=[6, 8, 3],
|
||
|
names=["person_id", "dt", "food"],
|
||
|
parse_dates=["dt"])
|
||
|
|
||
|
s = klass(df['dt'].copy())
|
||
|
s.name = None
|
||
|
|
||
|
idx = pd.to_datetime(['2010-01-01 00:00:00Z',
|
||
|
'2008-09-09 00:00:00Z',
|
||
|
'2009-01-01 00:00:00Z'])
|
||
|
expected_s = Series([3, 2, 1], index=idx)
|
||
|
tm.assert_series_equal(s.value_counts(), expected_s)
|
||
|
|
||
|
expected = np_array_datetime64_compat(['2010-01-01 00:00:00Z',
|
||
|
'2009-01-01 00:00:00Z',
|
||
|
'2008-09-09 00:00:00Z'],
|
||
|
dtype='datetime64[ns]')
|
||
|
if isinstance(s, Index):
|
||
|
tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
|
||
|
else:
|
||
|
tm.assert_numpy_array_equal(s.unique(), expected)
|
||
|
|
||
|
assert s.nunique() == 3
|
||
|
|
||
|
# with NaT
|
||
|
s = df['dt'].copy()
|
||
|
s = klass([v for v in s.values] + [pd.NaT])
|
||
|
|
||
|
result = s.value_counts()
|
||
|
assert result.index.dtype == 'datetime64[ns]'
|
||
|
tm.assert_series_equal(result, expected_s)
|
||
|
|
||
|
result = s.value_counts(dropna=False)
|
||
|
expected_s[pd.NaT] = 1
|
||
|
tm.assert_series_equal(result, expected_s)
|
||
|
|
||
|
unique = s.unique()
|
||
|
assert unique.dtype == 'datetime64[ns]'
|
||
|
|
||
|
# numpy_array_equal cannot compare pd.NaT
|
||
|
if isinstance(s, Index):
|
||
|
exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT])
|
||
|
tm.assert_index_equal(unique, exp_idx)
|
||
|
else:
|
||
|
tm.assert_numpy_array_equal(unique[:3], expected)
|
||
|
assert pd.isna(unique[3])
|
||
|
|
||
|
assert s.nunique() == 3
|
||
|
assert s.nunique(dropna=False) == 4
|
||
|
|
||
|
# timedelta64[ns]
|
||
|
td = df.dt - df.dt + timedelta(1)
|
||
|
td = klass(td, name='dt')
|
||
|
|
||
|
result = td.value_counts()
|
||
|
expected_s = Series([6], index=[Timedelta('1day')], name='dt')
|
||
|
tm.assert_series_equal(result, expected_s)
|
||
|
|
||
|
expected = TimedeltaIndex(['1 days'], name='dt')
|
||
|
if isinstance(td, Index):
|
||
|
tm.assert_index_equal(td.unique(), expected)
|
||
|
else:
|
||
|
tm.assert_numpy_array_equal(td.unique(), expected.values)
|
||
|
|
||
|
td2 = timedelta(1) + (df.dt - df.dt)
|
||
|
td2 = klass(td2, name='dt')
|
||
|
result2 = td2.value_counts()
|
||
|
tm.assert_series_equal(result2, expected_s)
|
||
|
|
||
|
def test_factorize(self):
|
||
|
for orig in self.objs:
|
||
|
o = orig.copy()
|
||
|
|
||
|
if isinstance(o, Index) and o.is_boolean():
|
||
|
exp_arr = np.array([0, 1] + [0] * 8, dtype=np.intp)
|
||
|
exp_uniques = o
|
||
|
exp_uniques = Index([False, True])
|
||
|
else:
|
||
|
exp_arr = np.array(range(len(o)), dtype=np.intp)
|
||
|
exp_uniques = o
|
||
|
labels, uniques = o.factorize()
|
||
|
|
||
|
tm.assert_numpy_array_equal(labels, exp_arr)
|
||
|
if isinstance(o, Series):
|
||
|
tm.assert_index_equal(uniques, Index(orig),
|
||
|
check_names=False)
|
||
|
else:
|
||
|
# factorize explicitly resets name
|
||
|
tm.assert_index_equal(uniques, exp_uniques,
|
||
|
check_names=False)
|
||
|
|
||
|
def test_factorize_repeated(self):
|
||
|
for orig in self.objs:
|
||
|
o = orig.copy()
|
||
|
|
||
|
# don't test boolean
|
||
|
if isinstance(o, Index) and o.is_boolean():
|
||
|
continue
|
||
|
|
||
|
# sort by value, and create duplicates
|
||
|
if isinstance(o, Series):
|
||
|
o = o.sort_values()
|
||
|
n = o.iloc[5:].append(o)
|
||
|
else:
|
||
|
indexer = o.argsort()
|
||
|
o = o.take(indexer)
|
||
|
n = o[5:].append(o)
|
||
|
|
||
|
exp_arr = np.array([5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
|
||
|
dtype=np.intp)
|
||
|
labels, uniques = n.factorize(sort=True)
|
||
|
|
||
|
tm.assert_numpy_array_equal(labels, exp_arr)
|
||
|
if isinstance(o, Series):
|
||
|
tm.assert_index_equal(uniques, Index(orig).sort_values(),
|
||
|
check_names=False)
|
||
|
else:
|
||
|
tm.assert_index_equal(uniques, o, check_names=False)
|
||
|
|
||
|
exp_arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4],
|
||
|
np.intp)
|
||
|
labels, uniques = n.factorize(sort=False)
|
||
|
tm.assert_numpy_array_equal(labels, exp_arr)
|
||
|
|
||
|
if isinstance(o, Series):
|
||
|
expected = Index(o.iloc[5:10].append(o.iloc[:5]))
|
||
|
tm.assert_index_equal(uniques, expected, check_names=False)
|
||
|
else:
|
||
|
expected = o[5:10].append(o[:5])
|
||
|
tm.assert_index_equal(uniques, expected, check_names=False)
|
||
|
|
||
|
def test_duplicated_drop_duplicates_index(self):
|
||
|
# GH 4060
|
||
|
for original in self.objs:
|
||
|
if isinstance(original, Index):
|
||
|
|
||
|
# special case
|
||
|
if original.is_boolean():
|
||
|
result = original.drop_duplicates()
|
||
|
expected = Index([False, True], name='a')
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
continue
|
||
|
|
||
|
# original doesn't have duplicates
|
||
|
expected = np.array([False] * len(original), dtype=bool)
|
||
|
duplicated = original.duplicated()
|
||
|
tm.assert_numpy_array_equal(duplicated, expected)
|
||
|
assert duplicated.dtype == bool
|
||
|
result = original.drop_duplicates()
|
||
|
tm.assert_index_equal(result, original)
|
||
|
assert result is not original
|
||
|
|
||
|
# has_duplicates
|
||
|
assert not original.has_duplicates
|
||
|
|
||
|
# create repeated values, 3rd and 5th values are duplicated
|
||
|
idx = original[list(range(len(original))) + [5, 3]]
|
||
|
expected = np.array([False] * len(original) + [True, True],
|
||
|
dtype=bool)
|
||
|
duplicated = idx.duplicated()
|
||
|
tm.assert_numpy_array_equal(duplicated, expected)
|
||
|
assert duplicated.dtype == bool
|
||
|
tm.assert_index_equal(idx.drop_duplicates(), original)
|
||
|
|
||
|
base = [False] * len(idx)
|
||
|
base[3] = True
|
||
|
base[5] = True
|
||
|
expected = np.array(base)
|
||
|
|
||
|
duplicated = idx.duplicated(keep='last')
|
||
|
tm.assert_numpy_array_equal(duplicated, expected)
|
||
|
assert duplicated.dtype == bool
|
||
|
result = idx.drop_duplicates(keep='last')
|
||
|
tm.assert_index_equal(result, idx[~expected])
|
||
|
|
||
|
base = [False] * len(original) + [True, True]
|
||
|
base[3] = True
|
||
|
base[5] = True
|
||
|
expected = np.array(base)
|
||
|
|
||
|
duplicated = idx.duplicated(keep=False)
|
||
|
tm.assert_numpy_array_equal(duplicated, expected)
|
||
|
assert duplicated.dtype == bool
|
||
|
result = idx.drop_duplicates(keep=False)
|
||
|
tm.assert_index_equal(result, idx[~expected])
|
||
|
|
||
|
with tm.assert_raises_regex(
|
||
|
TypeError, r"drop_duplicates\(\) got an unexpected "
|
||
|
"keyword argument"):
|
||
|
idx.drop_duplicates(inplace=True)
|
||
|
|
||
|
else:
|
||
|
expected = Series([False] * len(original),
|
||
|
index=original.index, name='a')
|
||
|
tm.assert_series_equal(original.duplicated(), expected)
|
||
|
result = original.drop_duplicates()
|
||
|
tm.assert_series_equal(result, original)
|
||
|
assert result is not original
|
||
|
|
||
|
idx = original.index[list(range(len(original))) + [5, 3]]
|
||
|
values = original._values[list(range(len(original))) + [5, 3]]
|
||
|
s = Series(values, index=idx, name='a')
|
||
|
|
||
|
expected = Series([False] * len(original) + [True, True],
|
||
|
index=idx, name='a')
|
||
|
tm.assert_series_equal(s.duplicated(), expected)
|
||
|
tm.assert_series_equal(s.drop_duplicates(), original)
|
||
|
|
||
|
base = [False] * len(idx)
|
||
|
base[3] = True
|
||
|
base[5] = True
|
||
|
expected = Series(base, index=idx, name='a')
|
||
|
|
||
|
tm.assert_series_equal(s.duplicated(keep='last'), expected)
|
||
|
tm.assert_series_equal(s.drop_duplicates(keep='last'),
|
||
|
s[~np.array(base)])
|
||
|
|
||
|
base = [False] * len(original) + [True, True]
|
||
|
base[3] = True
|
||
|
base[5] = True
|
||
|
expected = Series(base, index=idx, name='a')
|
||
|
|
||
|
tm.assert_series_equal(s.duplicated(keep=False), expected)
|
||
|
tm.assert_series_equal(s.drop_duplicates(keep=False),
|
||
|
s[~np.array(base)])
|
||
|
|
||
|
s.drop_duplicates(inplace=True)
|
||
|
tm.assert_series_equal(s, original)
|
||
|
|
||
|
def test_drop_duplicates_series_vs_dataframe(self):
|
||
|
# GH 14192
|
||
|
df = pd.DataFrame({'a': [1, 1, 1, 'one', 'one'],
|
||
|
'b': [2, 2, np.nan, np.nan, np.nan],
|
||
|
'c': [3, 3, np.nan, np.nan, 'three'],
|
||
|
'd': [1, 2, 3, 4, 4],
|
||
|
'e': [datetime(2015, 1, 1), datetime(2015, 1, 1),
|
||
|
datetime(2015, 2, 1), pd.NaT, pd.NaT]
|
||
|
})
|
||
|
for column in df.columns:
|
||
|
for keep in ['first', 'last', False]:
|
||
|
dropped_frame = df[[column]].drop_duplicates(keep=keep)
|
||
|
dropped_series = df[column].drop_duplicates(keep=keep)
|
||
|
tm.assert_frame_equal(dropped_frame, dropped_series.to_frame())
|
||
|
|
||
|
def test_fillna(self):
|
||
|
# # GH 11343
|
||
|
# though Index.fillna and Series.fillna has separate impl,
|
||
|
# test here to confirm these works as the same
|
||
|
|
||
|
for orig in self.objs:
|
||
|
|
||
|
o = orig.copy()
|
||
|
values = o.values
|
||
|
|
||
|
# values will not be changed
|
||
|
result = o.fillna(o.astype(object).values[0])
|
||
|
if isinstance(o, Index):
|
||
|
tm.assert_index_equal(o, result)
|
||
|
else:
|
||
|
tm.assert_series_equal(o, result)
|
||
|
# check shallow_copied
|
||
|
assert o is not result
|
||
|
|
||
|
for null_obj in [np.nan, None]:
|
||
|
for orig in self.objs:
|
||
|
o = orig.copy()
|
||
|
klass = type(o)
|
||
|
|
||
|
if not self._allow_na_ops(o):
|
||
|
continue
|
||
|
|
||
|
if needs_i8_conversion(o):
|
||
|
|
||
|
values = o.astype(object).values
|
||
|
fill_value = values[0]
|
||
|
values[0:2] = pd.NaT
|
||
|
else:
|
||
|
values = o.values.copy()
|
||
|
fill_value = o.values[0]
|
||
|
values[0:2] = null_obj
|
||
|
|
||
|
expected = [fill_value] * 2 + list(values[2:])
|
||
|
|
||
|
expected = klass(expected)
|
||
|
o = klass(values)
|
||
|
|
||
|
# check values has the same dtype as the original
|
||
|
assert o.dtype == orig.dtype
|
||
|
|
||
|
result = o.fillna(fill_value)
|
||
|
if isinstance(o, Index):
|
||
|
tm.assert_index_equal(result, expected)
|
||
|
else:
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
# check shallow_copied
|
||
|
assert o is not result
|
||
|
|
||
|
@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
|
||
|
def test_memory_usage(self):
|
||
|
for o in self.objs:
|
||
|
res = o.memory_usage()
|
||
|
res_deep = o.memory_usage(deep=True)
|
||
|
|
||
|
if (is_object_dtype(o) or (isinstance(o, Series) and
|
||
|
is_object_dtype(o.index))):
|
||
|
# if there are objects, only deep will pick them up
|
||
|
assert res_deep > res
|
||
|
else:
|
||
|
assert res == res_deep
|
||
|
|
||
|
if isinstance(o, Series):
|
||
|
assert ((o.memory_usage(index=False) +
|
||
|
o.index.memory_usage()) ==
|
||
|
o.memory_usage(index=True))
|
||
|
|
||
|
# sys.getsizeof will call the .memory_usage with
|
||
|
# deep=True, and add on some GC overhead
|
||
|
diff = res_deep - sys.getsizeof(o)
|
||
|
assert abs(diff) < 100
|
||
|
|
||
|
def test_searchsorted(self):
|
||
|
# See gh-12238
|
||
|
for o in self.objs:
|
||
|
index = np.searchsorted(o, max(o))
|
||
|
assert 0 <= index <= len(o)
|
||
|
|
||
|
index = np.searchsorted(o, max(o), sorter=range(len(o)))
|
||
|
assert 0 <= index <= len(o)
|
||
|
|
||
|
def test_validate_bool_args(self):
|
||
|
invalid_values = [1, "True", [1, 2, 3], 5.0]
|
||
|
|
||
|
for value in invalid_values:
|
||
|
with pytest.raises(ValueError):
|
||
|
self.int_series.drop_duplicates(inplace=value)
|
||
|
|
||
|
|
||
|
class TestTranspose(Ops):
|
||
|
errmsg = "the 'axes' parameter is not supported"
|
||
|
|
||
|
def test_transpose(self):
|
||
|
for obj in self.objs:
|
||
|
if isinstance(obj, Index):
|
||
|
tm.assert_index_equal(obj.transpose(), obj)
|
||
|
else:
|
||
|
tm.assert_series_equal(obj.transpose(), obj)
|
||
|
|
||
|
def test_transpose_non_default_axes(self):
|
||
|
for obj in self.objs:
|
||
|
tm.assert_raises_regex(ValueError, self.errmsg,
|
||
|
obj.transpose, 1)
|
||
|
tm.assert_raises_regex(ValueError, self.errmsg,
|
||
|
obj.transpose, axes=1)
|
||
|
|
||
|
def test_numpy_transpose(self):
|
||
|
for obj in self.objs:
|
||
|
if isinstance(obj, Index):
|
||
|
tm.assert_index_equal(np.transpose(obj), obj)
|
||
|
else:
|
||
|
tm.assert_series_equal(np.transpose(obj), obj)
|
||
|
|
||
|
tm.assert_raises_regex(ValueError, self.errmsg,
|
||
|
np.transpose, obj, axes=1)
|
||
|
|
||
|
|
||
|
class TestNoNewAttributesMixin(object):
|
||
|
|
||
|
def test_mixin(self):
|
||
|
class T(NoNewAttributesMixin):
|
||
|
pass
|
||
|
|
||
|
t = T()
|
||
|
assert not hasattr(t, "__frozen")
|
||
|
|
||
|
t.a = "test"
|
||
|
assert t.a == "test"
|
||
|
|
||
|
t._freeze()
|
||
|
assert "__frozen" in dir(t)
|
||
|
assert getattr(t, "__frozen")
|
||
|
|
||
|
def f():
|
||
|
t.b = "test"
|
||
|
|
||
|
pytest.raises(AttributeError, f)
|
||
|
assert not hasattr(t, "b")
|
||
|
|
||
|
|
||
|
class TestToIterable(object):
|
||
|
# test that we convert an iterable to python types
|
||
|
|
||
|
dtypes = [
|
||
|
('int8', (int, long)),
|
||
|
('int16', (int, long)),
|
||
|
('int32', (int, long)),
|
||
|
('int64', (int, long)),
|
||
|
('uint8', (int, long)),
|
||
|
('uint16', (int, long)),
|
||
|
('uint32', (int, long)),
|
||
|
('uint64', (int, long)),
|
||
|
('float16', float),
|
||
|
('float32', float),
|
||
|
('float64', float),
|
||
|
('datetime64[ns]', Timestamp),
|
||
|
('datetime64[ns, US/Eastern]', Timestamp),
|
||
|
('timedelta64[ns]', Timedelta)]
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
'dtype, rdtype', dtypes)
|
||
|
@pytest.mark.parametrize(
|
||
|
'method',
|
||
|
[
|
||
|
lambda x: x.tolist(),
|
||
|
lambda x: list(x),
|
||
|
lambda x: list(x.__iter__()),
|
||
|
], ids=['tolist', 'list', 'iter'])
|
||
|
@pytest.mark.parametrize('typ', [Series, Index])
|
||
|
def test_iterable(self, typ, method, dtype, rdtype):
|
||
|
# gh-10904
|
||
|
# gh-13258
|
||
|
# coerce iteration to underlying python / pandas types
|
||
|
s = typ([1], dtype=dtype)
|
||
|
result = method(s)[0]
|
||
|
assert isinstance(result, rdtype)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
'dtype, rdtype, obj',
|
||
|
[
|
||
|
('object', object, 'a'),
|
||
|
('object', (int, long), 1),
|
||
|
('category', object, 'a'),
|
||
|
('category', (int, long), 1)])
|
||
|
@pytest.mark.parametrize(
|
||
|
'method',
|
||
|
[
|
||
|
lambda x: x.tolist(),
|
||
|
lambda x: list(x),
|
||
|
lambda x: list(x.__iter__()),
|
||
|
], ids=['tolist', 'list', 'iter'])
|
||
|
@pytest.mark.parametrize('typ', [Series, Index])
|
||
|
def test_iterable_object_and_category(self, typ, method,
|
||
|
dtype, rdtype, obj):
|
||
|
# gh-10904
|
||
|
# gh-13258
|
||
|
# coerce iteration to underlying python / pandas types
|
||
|
s = typ([obj], dtype=dtype)
|
||
|
result = method(s)[0]
|
||
|
assert isinstance(result, rdtype)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
'dtype, rdtype', dtypes)
|
||
|
def test_iterable_items(self, dtype, rdtype):
|
||
|
# gh-13258
|
||
|
# test items / iteritems yields the correct boxed scalars
|
||
|
# this only applies to series
|
||
|
s = Series([1], dtype=dtype)
|
||
|
_, result = list(s.items())[0]
|
||
|
assert isinstance(result, rdtype)
|
||
|
|
||
|
_, result = list(s.iteritems())[0]
|
||
|
assert isinstance(result, rdtype)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
'dtype, rdtype',
|
||
|
dtypes + [
|
||
|
('object', (int, long)),
|
||
|
('category', (int, long))])
|
||
|
@pytest.mark.parametrize('typ', [Series, Index])
|
||
|
def test_iterable_map(self, typ, dtype, rdtype):
|
||
|
# gh-13236
|
||
|
# coerce iteration to underlying python / pandas types
|
||
|
s = typ([1], dtype=dtype)
|
||
|
result = s.map(type)[0]
|
||
|
if not isinstance(rdtype, tuple):
|
||
|
rdtype = tuple([rdtype])
|
||
|
assert result in rdtype
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
'method',
|
||
|
[
|
||
|
lambda x: x.tolist(),
|
||
|
lambda x: list(x),
|
||
|
lambda x: list(x.__iter__()),
|
||
|
], ids=['tolist', 'list', 'iter'])
|
||
|
def test_categorial_datetimelike(self, method):
|
||
|
i = CategoricalIndex([Timestamp('1999-12-31'),
|
||
|
Timestamp('2000-12-31')])
|
||
|
|
||
|
result = method(i)[0]
|
||
|
assert isinstance(result, Timestamp)
|
||
|
|
||
|
def test_iter_box(self):
|
||
|
vals = [Timestamp('2011-01-01'), Timestamp('2011-01-02')]
|
||
|
s = Series(vals)
|
||
|
assert s.dtype == 'datetime64[ns]'
|
||
|
for res, exp in zip(s, vals):
|
||
|
assert isinstance(res, Timestamp)
|
||
|
assert res.tz is None
|
||
|
assert res == exp
|
||
|
|
||
|
vals = [Timestamp('2011-01-01', tz='US/Eastern'),
|
||
|
Timestamp('2011-01-02', tz='US/Eastern')]
|
||
|
s = Series(vals)
|
||
|
|
||
|
assert s.dtype == 'datetime64[ns, US/Eastern]'
|
||
|
for res, exp in zip(s, vals):
|
||
|
assert isinstance(res, Timestamp)
|
||
|
assert res.tz == exp.tz
|
||
|
assert res == exp
|
||
|
|
||
|
# timedelta
|
||
|
vals = [Timedelta('1 days'), Timedelta('2 days')]
|
||
|
s = Series(vals)
|
||
|
assert s.dtype == 'timedelta64[ns]'
|
||
|
for res, exp in zip(s, vals):
|
||
|
assert isinstance(res, Timedelta)
|
||
|
assert res == exp
|
||
|
|
||
|
# period (object dtype, not boxed)
|
||
|
vals = [pd.Period('2011-01-01', freq='M'),
|
||
|
pd.Period('2011-01-02', freq='M')]
|
||
|
s = Series(vals)
|
||
|
assert s.dtype == 'object'
|
||
|
for res, exp in zip(s, vals):
|
||
|
assert isinstance(res, pd.Period)
|
||
|
assert res.freq == 'M'
|
||
|
assert res == exp
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize('array, expected_type, dtype', [
|
||
|
(np.array([0, 1], dtype=np.int64), np.ndarray, 'int64'),
|
||
|
(np.array(['a', 'b']), np.ndarray, 'object'),
|
||
|
(pd.Categorical(['a', 'b']), pd.Categorical, 'category'),
|
||
|
(pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]'),
|
||
|
(pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex,
|
||
|
'datetime64[ns, US/Central]'),
|
||
|
(pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]'),
|
||
|
(pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'),
|
||
|
(pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray, 'object'),
|
||
|
])
|
||
|
def test_values_consistent(array, expected_type, dtype):
|
||
|
l_values = pd.Series(array)._values
|
||
|
r_values = pd.Index(array)._values
|
||
|
assert type(l_values) is expected_type
|
||
|
assert type(l_values) is type(r_values)
|
||
|
|
||
|
if isinstance(l_values, np.ndarray):
|
||
|
tm.assert_numpy_array_equal(l_values, r_values)
|
||
|
elif isinstance(l_values, pd.Index):
|
||
|
tm.assert_index_equal(l_values, r_values)
|
||
|
elif pd.api.types.is_categorical(l_values):
|
||
|
tm.assert_categorical_equal(l_values, r_values)
|
||
|
else:
|
||
|
raise TypeError("Unexpected type {}".format(type(l_values)))
|
||
|
|
||
|
assert l_values.dtype == dtype
|
||
|
assert r_values.dtype == dtype
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize('array, expected', [
|
||
|
(np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)),
|
||
|
(np.array(['0', '1']), np.array(['0', '1'], dtype=object)),
|
||
|
(pd.Categorical(['a', 'a']), np.array([0, 0], dtype='int8')),
|
||
|
(pd.DatetimeIndex(['2017-01-01T00:00:00']),
|
||
|
np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')),
|
||
|
(pd.DatetimeIndex(['2017-01-01T00:00:00'], tz="US/Eastern"),
|
||
|
np.array(['2017-01-01T05:00:00'], dtype='M8[ns]')),
|
||
|
(pd.TimedeltaIndex([10**10]), np.array([10**10], dtype='m8[ns]')),
|
||
|
pytest.param(
|
||
|
pd.PeriodIndex(['2017', '2018'], freq='D'),
|
||
|
np.array([17167, 17532]),
|
||
|
marks=pytest.mark.xfail(reason="PeriodArray Not implemented")
|
||
|
),
|
||
|
])
|
||
|
def test_ndarray_values(array, expected):
|
||
|
l_values = pd.Series(array)._ndarray_values
|
||
|
r_values = pd.Index(array)._ndarray_values
|
||
|
tm.assert_numpy_array_equal(l_values, r_values)
|
||
|
tm.assert_numpy_array_equal(l_values, expected)
|