590 lines
21 KiB
Python
590 lines
21 KiB
Python
|
# coding=utf-8
|
||
|
# pylint: disable-msg=E1101,W0612
|
||
|
|
||
|
import pytest
|
||
|
|
||
|
from collections import Counter, defaultdict, OrderedDict
|
||
|
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
|
||
|
from pandas import (Index, Series, DataFrame, isna)
|
||
|
from pandas.compat import lrange
|
||
|
from pandas import compat
|
||
|
from pandas.util.testing import assert_series_equal, assert_frame_equal
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
from .common import TestData
|
||
|
|
||
|
|
||
|
class TestSeriesApply(TestData):
|
||
|
|
||
|
def test_apply(self):
|
||
|
with np.errstate(all='ignore'):
|
||
|
tm.assert_series_equal(self.ts.apply(np.sqrt), np.sqrt(self.ts))
|
||
|
|
||
|
# element-wise apply
|
||
|
import math
|
||
|
tm.assert_series_equal(self.ts.apply(math.exp), np.exp(self.ts))
|
||
|
|
||
|
# empty series
|
||
|
s = Series(dtype=object, name='foo', index=pd.Index([], name='bar'))
|
||
|
rs = s.apply(lambda x: x)
|
||
|
tm.assert_series_equal(s, rs)
|
||
|
|
||
|
# check all metadata (GH 9322)
|
||
|
assert s is not rs
|
||
|
assert s.index is rs.index
|
||
|
assert s.dtype == rs.dtype
|
||
|
assert s.name == rs.name
|
||
|
|
||
|
# index but no data
|
||
|
s = Series(index=[1, 2, 3])
|
||
|
rs = s.apply(lambda x: x)
|
||
|
tm.assert_series_equal(s, rs)
|
||
|
|
||
|
def test_apply_same_length_inference_bug(self):
|
||
|
s = Series([1, 2])
|
||
|
f = lambda x: (x, x + 1)
|
||
|
|
||
|
result = s.apply(f)
|
||
|
expected = s.map(f)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
s = Series([1, 2, 3])
|
||
|
result = s.apply(f)
|
||
|
expected = s.map(f)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_apply_dont_convert_dtype(self):
|
||
|
s = Series(np.random.randn(10))
|
||
|
|
||
|
f = lambda x: x if x > 0 else np.nan
|
||
|
result = s.apply(f, convert_dtype=False)
|
||
|
assert result.dtype == object
|
||
|
|
||
|
def test_with_string_args(self):
|
||
|
|
||
|
for arg in ['sum', 'mean', 'min', 'max', 'std']:
|
||
|
result = self.ts.apply(arg)
|
||
|
expected = getattr(self.ts, arg)()
|
||
|
assert result == expected
|
||
|
|
||
|
def test_apply_args(self):
|
||
|
s = Series(['foo,bar'])
|
||
|
|
||
|
result = s.apply(str.split, args=(',', ))
|
||
|
assert result[0] == ['foo', 'bar']
|
||
|
assert isinstance(result[0], list)
|
||
|
|
||
|
def test_series_map_box_timestamps(self):
|
||
|
# GH#2689, GH#2627
|
||
|
ser = Series(pd.date_range('1/1/2000', periods=10))
|
||
|
|
||
|
def func(x):
|
||
|
return (x.hour, x.day, x.month)
|
||
|
|
||
|
# it works!
|
||
|
ser.map(func)
|
||
|
ser.apply(func)
|
||
|
|
||
|
def test_apply_box(self):
|
||
|
# ufunc will not be boxed. Same test cases as the test_map_box
|
||
|
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
|
||
|
s = pd.Series(vals)
|
||
|
assert s.dtype == 'datetime64[ns]'
|
||
|
# boxed value must be Timestamp instance
|
||
|
res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||
|
x.day, x.tz))
|
||
|
exp = pd.Series(['Timestamp_1_None', 'Timestamp_2_None'])
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||
|
pd.Timestamp('2011-01-02', tz='US/Eastern')]
|
||
|
s = pd.Series(vals)
|
||
|
assert s.dtype == 'datetime64[ns, US/Eastern]'
|
||
|
res = s.apply(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||
|
x.day, x.tz))
|
||
|
exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern'])
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
# timedelta
|
||
|
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
|
||
|
s = pd.Series(vals)
|
||
|
assert s.dtype == 'timedelta64[ns]'
|
||
|
res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days))
|
||
|
exp = pd.Series(['Timedelta_1', 'Timedelta_2'])
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
# period (object dtype, not boxed)
|
||
|
vals = [pd.Period('2011-01-01', freq='M'),
|
||
|
pd.Period('2011-01-02', freq='M')]
|
||
|
s = pd.Series(vals)
|
||
|
assert s.dtype == 'object'
|
||
|
res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__,
|
||
|
x.freqstr))
|
||
|
exp = pd.Series(['Period_M', 'Period_M'])
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
def test_apply_datetimetz(self):
|
||
|
values = pd.date_range('2011-01-01', '2011-01-02',
|
||
|
freq='H').tz_localize('Asia/Tokyo')
|
||
|
s = pd.Series(values, name='XX')
|
||
|
|
||
|
result = s.apply(lambda x: x + pd.offsets.Day())
|
||
|
exp_values = pd.date_range('2011-01-02', '2011-01-03',
|
||
|
freq='H').tz_localize('Asia/Tokyo')
|
||
|
exp = pd.Series(exp_values, name='XX')
|
||
|
tm.assert_series_equal(result, exp)
|
||
|
|
||
|
# change dtype
|
||
|
# GH 14506 : Returned dtype changed from int32 to int64
|
||
|
result = s.apply(lambda x: x.hour)
|
||
|
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
|
||
|
tm.assert_series_equal(result, exp)
|
||
|
|
||
|
# not vectorized
|
||
|
def f(x):
|
||
|
if not isinstance(x, pd.Timestamp):
|
||
|
raise ValueError
|
||
|
return str(x.tz)
|
||
|
|
||
|
result = s.map(f)
|
||
|
exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
|
||
|
tm.assert_series_equal(result, exp)
|
||
|
|
||
|
def test_apply_dict_depr(self):
|
||
|
|
||
|
tsdf = pd.DataFrame(np.random.randn(10, 3),
|
||
|
columns=['A', 'B', 'C'],
|
||
|
index=pd.date_range('1/1/2000', periods=10))
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
tsdf.A.agg({'foo': ['sum', 'mean']})
|
||
|
|
||
|
|
||
|
class TestSeriesAggregate(TestData):
|
||
|
|
||
|
def test_transform(self):
|
||
|
# transforming functions
|
||
|
|
||
|
with np.errstate(all='ignore'):
|
||
|
|
||
|
f_sqrt = np.sqrt(self.series)
|
||
|
f_abs = np.abs(self.series)
|
||
|
|
||
|
# ufunc
|
||
|
result = self.series.transform(np.sqrt)
|
||
|
expected = f_sqrt.copy()
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
result = self.series.apply(np.sqrt)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
# list-like
|
||
|
result = self.series.transform([np.sqrt])
|
||
|
expected = f_sqrt.to_frame().copy()
|
||
|
expected.columns = ['sqrt']
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = self.series.transform([np.sqrt])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = self.series.transform(['sqrt'])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# multiple items in list
|
||
|
# these are in the order as if we are applying both functions per
|
||
|
# series and then concatting
|
||
|
expected = pd.concat([f_sqrt, f_abs], axis=1)
|
||
|
expected.columns = ['sqrt', 'absolute']
|
||
|
result = self.series.apply([np.sqrt, np.abs])
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
result = self.series.transform(['sqrt', 'abs'])
|
||
|
expected.columns = ['sqrt', 'abs']
|
||
|
assert_frame_equal(result, expected)
|
||
|
|
||
|
# dict, provide renaming
|
||
|
expected = pd.concat([f_sqrt, f_abs], axis=1)
|
||
|
expected.columns = ['foo', 'bar']
|
||
|
expected = expected.unstack().rename('series')
|
||
|
|
||
|
result = self.series.apply({'foo': np.sqrt, 'bar': np.abs})
|
||
|
assert_series_equal(result.reindex_like(expected), expected)
|
||
|
|
||
|
def test_transform_and_agg_error(self):
|
||
|
# we are trying to transform with an aggregator
|
||
|
def f():
|
||
|
self.series.transform(['min', 'max'])
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
def f():
|
||
|
with np.errstate(all='ignore'):
|
||
|
self.series.agg(['sqrt', 'max'])
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
def f():
|
||
|
with np.errstate(all='ignore'):
|
||
|
self.series.transform(['sqrt', 'max'])
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
def f():
|
||
|
with np.errstate(all='ignore'):
|
||
|
self.series.agg({'foo': np.sqrt, 'bar': 'sum'})
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
def test_demo(self):
|
||
|
# demonstration tests
|
||
|
s = Series(range(6), dtype='int64', name='series')
|
||
|
|
||
|
result = s.agg(['min', 'max'])
|
||
|
expected = Series([0, 5], index=['min', 'max'], name='series')
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = s.agg({'foo': 'min'})
|
||
|
expected = Series([0], index=['foo'], name='series')
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# nested renaming
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
result = s.agg({'foo': ['min', 'max']})
|
||
|
|
||
|
expected = DataFrame(
|
||
|
{'foo': [0, 5]},
|
||
|
index=['min', 'max']).unstack().rename('series')
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_multiple_aggregators_with_dict_api(self):
|
||
|
|
||
|
s = Series(range(6), dtype='int64', name='series')
|
||
|
# nested renaming
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']})
|
||
|
|
||
|
expected = DataFrame(
|
||
|
{'foo': [5.0, np.nan, 0.0, np.nan],
|
||
|
'bar': [np.nan, 2.5, np.nan, 15.0]},
|
||
|
columns=['foo', 'bar'],
|
||
|
index=['max', 'mean',
|
||
|
'min', 'sum']).unstack().rename('series')
|
||
|
tm.assert_series_equal(result.reindex_like(expected), expected)
|
||
|
|
||
|
def test_agg_apply_evaluate_lambdas_the_same(self):
|
||
|
# test that we are evaluating row-by-row first
|
||
|
# before vectorized evaluation
|
||
|
result = self.series.apply(lambda x: str(x))
|
||
|
expected = self.series.agg(lambda x: str(x))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = self.series.apply(str)
|
||
|
expected = self.series.agg(str)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_with_nested_series(self):
|
||
|
# GH 2316
|
||
|
# .agg with a reducer and a transform, what to do
|
||
|
result = self.ts.apply(lambda x: Series(
|
||
|
[x, x ** 2], index=['x', 'x^2']))
|
||
|
expected = DataFrame({'x': self.ts, 'x^2': self.ts ** 2})
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = self.ts.agg(lambda x: Series(
|
||
|
[x, x ** 2], index=['x', 'x^2']))
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_replicate_describe(self):
|
||
|
# this also tests a result set that is all scalars
|
||
|
expected = self.series.describe()
|
||
|
result = self.series.apply(OrderedDict(
|
||
|
[('count', 'count'),
|
||
|
('mean', 'mean'),
|
||
|
('std', 'std'),
|
||
|
('min', 'min'),
|
||
|
('25%', lambda x: x.quantile(0.25)),
|
||
|
('50%', 'median'),
|
||
|
('75%', lambda x: x.quantile(0.75)),
|
||
|
('max', 'max')]))
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_reduce(self):
|
||
|
# reductions with named functions
|
||
|
result = self.series.agg(['sum', 'mean'])
|
||
|
expected = Series([self.series.sum(),
|
||
|
self.series.mean()],
|
||
|
['sum', 'mean'],
|
||
|
name=self.series.name)
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_non_callable_aggregates(self):
|
||
|
# test agg using non-callable series attributes
|
||
|
s = Series([1, 2, None])
|
||
|
|
||
|
# Calling agg w/ just a string arg same as calling s.arg
|
||
|
result = s.agg('size')
|
||
|
expected = s.size
|
||
|
assert result == expected
|
||
|
|
||
|
# test when mixed w/ callable reducers
|
||
|
result = s.agg(['size', 'count', 'mean'])
|
||
|
expected = Series(OrderedDict([('size', 3.0),
|
||
|
('count', 2.0),
|
||
|
('mean', 1.5)]))
|
||
|
assert_series_equal(result[expected.index], expected)
|
||
|
|
||
|
|
||
|
class TestSeriesMap(TestData):
|
||
|
|
||
|
def test_map(self):
|
||
|
index, data = tm.getMixedTypeDict()
|
||
|
|
||
|
source = Series(data['B'], index=data['C'])
|
||
|
target = Series(data['C'][:4], index=data['D'][:4])
|
||
|
|
||
|
merged = target.map(source)
|
||
|
|
||
|
for k, v in compat.iteritems(merged):
|
||
|
assert v == source[target[k]]
|
||
|
|
||
|
# input could be a dict
|
||
|
merged = target.map(source.to_dict())
|
||
|
|
||
|
for k, v in compat.iteritems(merged):
|
||
|
assert v == source[target[k]]
|
||
|
|
||
|
# function
|
||
|
result = self.ts.map(lambda x: x * 2)
|
||
|
tm.assert_series_equal(result, self.ts * 2)
|
||
|
|
||
|
# GH 10324
|
||
|
a = Series([1, 2, 3, 4])
|
||
|
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
||
|
c = Series(["even", "odd", "even", "odd"])
|
||
|
|
||
|
exp = Series(["odd", "even", "odd", np.nan], dtype="category")
|
||
|
tm.assert_series_equal(a.map(b), exp)
|
||
|
exp = Series(["odd", "even", "odd", np.nan])
|
||
|
tm.assert_series_equal(a.map(c), exp)
|
||
|
|
||
|
a = Series(['a', 'b', 'c', 'd'])
|
||
|
b = Series([1, 2, 3, 4],
|
||
|
index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
|
||
|
c = Series([1, 2, 3, 4], index=Index(['b', 'c', 'd', 'e']))
|
||
|
|
||
|
exp = Series([np.nan, 1, 2, 3])
|
||
|
tm.assert_series_equal(a.map(b), exp)
|
||
|
exp = Series([np.nan, 1, 2, 3])
|
||
|
tm.assert_series_equal(a.map(c), exp)
|
||
|
|
||
|
a = Series(['a', 'b', 'c', 'd'])
|
||
|
b = Series(['B', 'C', 'D', 'E'], dtype='category',
|
||
|
index=pd.CategoricalIndex(['b', 'c', 'd', 'e']))
|
||
|
c = Series(['B', 'C', 'D', 'E'], index=Index(['b', 'c', 'd', 'e']))
|
||
|
|
||
|
exp = Series(pd.Categorical([np.nan, 'B', 'C', 'D'],
|
||
|
categories=['B', 'C', 'D', 'E']))
|
||
|
tm.assert_series_equal(a.map(b), exp)
|
||
|
exp = Series([np.nan, 'B', 'C', 'D'])
|
||
|
tm.assert_series_equal(a.map(c), exp)
|
||
|
|
||
|
@pytest.mark.parametrize("index", tm.all_index_generator(10))
|
||
|
def test_map_empty(self, index):
|
||
|
s = Series(index)
|
||
|
result = s.map({})
|
||
|
|
||
|
expected = pd.Series(np.nan, index=s.index)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_map_compat(self):
|
||
|
# related GH 8024
|
||
|
s = Series([True, True, False], index=[1, 2, 3])
|
||
|
result = s.map({True: 'foo', False: 'bar'})
|
||
|
expected = Series(['foo', 'foo', 'bar'], index=[1, 2, 3])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_map_int(self):
|
||
|
left = Series({'a': 1., 'b': 2., 'c': 3., 'd': 4})
|
||
|
right = Series({1: 11, 2: 22, 3: 33})
|
||
|
|
||
|
assert left.dtype == np.float_
|
||
|
assert issubclass(right.dtype.type, np.integer)
|
||
|
|
||
|
merged = left.map(right)
|
||
|
assert merged.dtype == np.float_
|
||
|
assert isna(merged['d'])
|
||
|
assert not isna(merged['c'])
|
||
|
|
||
|
def test_map_type_inference(self):
|
||
|
s = Series(lrange(3))
|
||
|
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
|
||
|
assert issubclass(s2.dtype.type, np.integer)
|
||
|
|
||
|
def test_map_decimal(self):
|
||
|
from decimal import Decimal
|
||
|
|
||
|
result = self.series.map(lambda x: Decimal(str(x)))
|
||
|
assert result.dtype == np.object_
|
||
|
assert isinstance(result[0], Decimal)
|
||
|
|
||
|
def test_map_na_exclusion(self):
|
||
|
s = Series([1.5, np.nan, 3, np.nan, 5])
|
||
|
|
||
|
result = s.map(lambda x: x * 2, na_action='ignore')
|
||
|
exp = s * 2
|
||
|
assert_series_equal(result, exp)
|
||
|
|
||
|
def test_map_dict_with_tuple_keys(self):
|
||
|
"""
|
||
|
Due to new MultiIndex-ing behaviour in v0.14.0,
|
||
|
dicts with tuple keys passed to map were being
|
||
|
converted to a multi-index, preventing tuple values
|
||
|
from being mapped properly.
|
||
|
"""
|
||
|
# GH 18496
|
||
|
df = pd.DataFrame({'a': [(1, ), (2, ), (3, 4), (5, 6)]})
|
||
|
label_mappings = {(1, ): 'A', (2, ): 'B', (3, 4): 'A', (5, 6): 'B'}
|
||
|
|
||
|
df['labels'] = df['a'].map(label_mappings)
|
||
|
df['expected_labels'] = pd.Series(['A', 'B', 'A', 'B'], index=df.index)
|
||
|
# All labels should be filled now
|
||
|
tm.assert_series_equal(df['labels'], df['expected_labels'],
|
||
|
check_names=False)
|
||
|
|
||
|
def test_map_counter(self):
|
||
|
s = Series(['a', 'b', 'c'], index=[1, 2, 3])
|
||
|
counter = Counter()
|
||
|
counter['b'] = 5
|
||
|
counter['c'] += 1
|
||
|
result = s.map(counter)
|
||
|
expected = Series([0, 5, 1], index=[1, 2, 3])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_map_defaultdict(self):
|
||
|
s = Series([1, 2, 3], index=['a', 'b', 'c'])
|
||
|
default_dict = defaultdict(lambda: 'blank')
|
||
|
default_dict[1] = 'stuff'
|
||
|
result = s.map(default_dict)
|
||
|
expected = Series(['stuff', 'blank', 'blank'], index=['a', 'b', 'c'])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_map_dict_subclass_with_missing(self):
|
||
|
"""
|
||
|
Test Series.map with a dictionary subclass that defines __missing__,
|
||
|
i.e. sets a default value (GH #15999).
|
||
|
"""
|
||
|
class DictWithMissing(dict):
|
||
|
def __missing__(self, key):
|
||
|
return 'missing'
|
||
|
s = Series([1, 2, 3])
|
||
|
dictionary = DictWithMissing({3: 'three'})
|
||
|
result = s.map(dictionary)
|
||
|
expected = Series(['missing', 'missing', 'three'])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_map_dict_subclass_without_missing(self):
|
||
|
class DictWithoutMissing(dict):
|
||
|
pass
|
||
|
s = Series([1, 2, 3])
|
||
|
dictionary = DictWithoutMissing({3: 'three'})
|
||
|
result = s.map(dictionary)
|
||
|
expected = Series([np.nan, np.nan, 'three'])
|
||
|
assert_series_equal(result, expected)
|
||
|
|
||
|
def test_map_box(self):
|
||
|
vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
|
||
|
s = pd.Series(vals)
|
||
|
assert s.dtype == 'datetime64[ns]'
|
||
|
# boxed value must be Timestamp instance
|
||
|
res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||
|
x.day, x.tz))
|
||
|
exp = pd.Series(['Timestamp_1_None', 'Timestamp_2_None'])
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||
|
pd.Timestamp('2011-01-02', tz='US/Eastern')]
|
||
|
s = pd.Series(vals)
|
||
|
assert s.dtype == 'datetime64[ns, US/Eastern]'
|
||
|
res = s.map(lambda x: '{0}_{1}_{2}'.format(x.__class__.__name__,
|
||
|
x.day, x.tz))
|
||
|
exp = pd.Series(['Timestamp_1_US/Eastern', 'Timestamp_2_US/Eastern'])
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
# timedelta
|
||
|
vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
|
||
|
s = pd.Series(vals)
|
||
|
assert s.dtype == 'timedelta64[ns]'
|
||
|
res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.days))
|
||
|
exp = pd.Series(['Timedelta_1', 'Timedelta_2'])
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
# period (object dtype, not boxed)
|
||
|
vals = [pd.Period('2011-01-01', freq='M'),
|
||
|
pd.Period('2011-01-02', freq='M')]
|
||
|
s = pd.Series(vals)
|
||
|
assert s.dtype == 'object'
|
||
|
res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__,
|
||
|
x.freqstr))
|
||
|
exp = pd.Series(['Period_M', 'Period_M'])
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
def test_map_categorical(self):
|
||
|
values = pd.Categorical(list('ABBABCD'), categories=list('DCBA'),
|
||
|
ordered=True)
|
||
|
s = pd.Series(values, name='XX', index=list('abcdefg'))
|
||
|
|
||
|
result = s.map(lambda x: x.lower())
|
||
|
exp_values = pd.Categorical(list('abbabcd'), categories=list('dcba'),
|
||
|
ordered=True)
|
||
|
exp = pd.Series(exp_values, name='XX', index=list('abcdefg'))
|
||
|
tm.assert_series_equal(result, exp)
|
||
|
tm.assert_categorical_equal(result.values, exp_values)
|
||
|
|
||
|
result = s.map(lambda x: 'A')
|
||
|
exp = pd.Series(['A'] * 7, name='XX', index=list('abcdefg'))
|
||
|
tm.assert_series_equal(result, exp)
|
||
|
assert result.dtype == np.object
|
||
|
|
||
|
with pytest.raises(NotImplementedError):
|
||
|
s.map(lambda x: x, na_action='ignore')
|
||
|
|
||
|
def test_map_datetimetz(self):
|
||
|
values = pd.date_range('2011-01-01', '2011-01-02',
|
||
|
freq='H').tz_localize('Asia/Tokyo')
|
||
|
s = pd.Series(values, name='XX')
|
||
|
|
||
|
# keep tz
|
||
|
result = s.map(lambda x: x + pd.offsets.Day())
|
||
|
exp_values = pd.date_range('2011-01-02', '2011-01-03',
|
||
|
freq='H').tz_localize('Asia/Tokyo')
|
||
|
exp = pd.Series(exp_values, name='XX')
|
||
|
tm.assert_series_equal(result, exp)
|
||
|
|
||
|
# change dtype
|
||
|
# GH 14506 : Returned dtype changed from int32 to int64
|
||
|
result = s.map(lambda x: x.hour)
|
||
|
exp = pd.Series(list(range(24)) + [0], name='XX', dtype=np.int64)
|
||
|
tm.assert_series_equal(result, exp)
|
||
|
|
||
|
with pytest.raises(NotImplementedError):
|
||
|
s.map(lambda x: x, na_action='ignore')
|
||
|
|
||
|
# not vectorized
|
||
|
def f(x):
|
||
|
if not isinstance(x, pd.Timestamp):
|
||
|
raise ValueError
|
||
|
return str(x.tz)
|
||
|
|
||
|
result = s.map(f)
|
||
|
exp = pd.Series(['Asia/Tokyo'] * 25, name='XX')
|
||
|
tm.assert_series_equal(result, exp)
|
||
|
|
||
|
@pytest.mark.parametrize("vals,mapping,exp", [
|
||
|
(list('abc'), {np.nan: 'not NaN'}, [np.nan] * 3 + ['not NaN']),
|
||
|
(list('abc'), {'a': 'a letter'}, ['a letter'] + [np.nan] * 3),
|
||
|
(list(range(3)), {0: 42}, [42] + [np.nan] * 3)])
|
||
|
def test_map_missing_mixed(self, vals, mapping, exp):
|
||
|
# GH20495
|
||
|
s = pd.Series(vals + [np.nan])
|
||
|
result = s.map(mapping)
|
||
|
|
||
|
tm.assert_series_equal(result, pd.Series(exp))
|