laywerrobot/lib/python3.6/site-packages/pandas/tests/series/test_missing.py
2020-08-27 21:55:39 +02:00

1319 lines
50 KiB
Python

# coding=utf-8
# pylint: disable-msg=E1101,W0612
import pytz
import pytest
from datetime import timedelta, datetime
from distutils.version import LooseVersion
from numpy import nan
import numpy as np
import pandas as pd
from pandas import (Series, DataFrame, isna, date_range,
MultiIndex, Index, Timestamp, NaT, IntervalIndex,
Categorical)
from pandas.compat import range
from pandas._libs.tslib import iNaT
from pandas.core.series import remove_na
from pandas.util.testing import assert_series_equal, assert_frame_equal
import pandas.util.testing as tm
import pandas.util._test_decorators as td
from .common import TestData
try:
import scipy
_is_scipy_ge_0190 = (LooseVersion(scipy.__version__) >=
LooseVersion('0.19.0'))
except:
_is_scipy_ge_0190 = False
def _skip_if_no_pchip():
try:
from scipy.interpolate import pchip_interpolate # noqa
except ImportError:
import pytest
pytest.skip('scipy.interpolate.pchip missing')
def _skip_if_no_akima():
try:
from scipy.interpolate import Akima1DInterpolator # noqa
except ImportError:
import pytest
pytest.skip('scipy.interpolate.Akima1DInterpolator missing')
def _simple_ts(start, end, freq='D'):
rng = date_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
class TestSeriesMissingData(TestData):
def test_remove_na_deprecation(self):
# see gh-16971
with tm.assert_produces_warning(FutureWarning):
remove_na(Series([]))
def test_timedelta_fillna(self):
# GH 3371
s = Series([Timestamp('20130101'), Timestamp('20130101'),
Timestamp('20130102'), Timestamp('20130103 9:01:01')])
td = s.diff()
# reg fillna
result = td.fillna(0)
expected = Series([timedelta(0), timedelta(0), timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
assert_series_equal(result, expected)
# interprested as seconds
result = td.fillna(1)
expected = Series([timedelta(seconds=1), timedelta(0), timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
assert_series_equal(result, expected)
result = td.fillna(timedelta(days=1, seconds=1))
expected = Series([timedelta(days=1, seconds=1), timedelta(0),
timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
assert_series_equal(result, expected)
result = td.fillna(np.timedelta64(int(1e9)))
expected = Series([timedelta(seconds=1), timedelta(0), timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1)])
assert_series_equal(result, expected)
result = td.fillna(NaT)
expected = Series([NaT, timedelta(0), timedelta(1),
timedelta(days=1, seconds=9 * 3600 + 60 + 1)],
dtype='m8[ns]')
assert_series_equal(result, expected)
# ffill
td[2] = np.nan
result = td.ffill()
expected = td.fillna(0)
expected[0] = np.nan
assert_series_equal(result, expected)
# bfill
td[2] = np.nan
result = td.bfill()
expected = td.fillna(0)
expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
assert_series_equal(result, expected)
def test_datetime64_fillna(self):
s = Series([Timestamp('20130101'), Timestamp('20130101'), Timestamp(
'20130102'), Timestamp('20130103 9:01:01')])
s[2] = np.nan
# reg fillna
result = s.fillna(Timestamp('20130104'))
expected = Series([Timestamp('20130101'), Timestamp(
'20130101'), Timestamp('20130104'), Timestamp('20130103 9:01:01')])
assert_series_equal(result, expected)
result = s.fillna(NaT)
expected = s
assert_series_equal(result, expected)
# ffill
result = s.ffill()
expected = Series([Timestamp('20130101'), Timestamp(
'20130101'), Timestamp('20130101'), Timestamp('20130103 9:01:01')])
assert_series_equal(result, expected)
# bfill
result = s.bfill()
expected = Series([Timestamp('20130101'), Timestamp('20130101'),
Timestamp('20130103 9:01:01'), Timestamp(
'20130103 9:01:01')])
assert_series_equal(result, expected)
# GH 6587
# make sure that we are treating as integer when filling
# this also tests inference of a datetime-like with NaT's
s = Series([pd.NaT, pd.NaT, '2013-08-05 15:30:00.000001'])
expected = Series(
['2013-08-05 15:30:00.000001', '2013-08-05 15:30:00.000001',
'2013-08-05 15:30:00.000001'], dtype='M8[ns]')
result = s.fillna(method='backfill')
assert_series_equal(result, expected)
def test_datetime64_tz_fillna(self):
for tz in ['US/Eastern', 'Asia/Tokyo']:
# DatetimeBlock
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT,
Timestamp('2011-01-03 10:00'), pd.NaT])
null_loc = pd.Series([False, True, False, True])
result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-02 10:00')])
tm.assert_series_equal(expected, result)
# check s is not changed
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-02 10:00', tz=tz)])
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna('AAA')
expected = Series([Timestamp('2011-01-01 10:00'), 'AAA',
Timestamp('2011-01-03 10:00'), 'AAA'],
dtype=object)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-04 10:00')])
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00'),
Timestamp('2011-01-04 10:00')])
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
# DatetimeBlockTZ
idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
'2011-01-03 10:00', pd.NaT], tz=tz)
s = pd.Series(idx)
assert s.dtype == 'datetime64[ns, {0}]'.format(tz)
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna(pd.Timestamp('2011-01-02 10:00'))
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00'),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-02 10:00')])
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz))
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
'2011-01-03 10:00', '2011-01-02 10:00'],
tz=tz)
expected = Series(idx)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna(pd.Timestamp('2011-01-02 10:00',
tz=tz).to_pydatetime())
idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00',
'2011-01-03 10:00', '2011-01-02 10:00'],
tz=tz)
expected = Series(idx)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna('AAA')
expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA',
Timestamp('2011-01-03 10:00', tz=tz), 'AAA'],
dtype=object)
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00')})
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-04 10:00')])
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz),
3: pd.Timestamp('2011-01-04 10:00', tz=tz)})
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2011-01-02 10:00', tz=tz),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2011-01-04 10:00', tz=tz)])
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
# filling with a naive/other zone, coerce to object
result = s.fillna(Timestamp('20130101'))
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2013-01-01'),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2013-01-01')])
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
result = s.fillna(Timestamp('20130101', tz='US/Pacific'))
expected = Series([Timestamp('2011-01-01 10:00', tz=tz),
Timestamp('2013-01-01', tz='US/Pacific'),
Timestamp('2011-01-03 10:00', tz=tz),
Timestamp('2013-01-01', tz='US/Pacific')])
tm.assert_series_equal(expected, result)
tm.assert_series_equal(pd.isna(s), null_loc)
# with timezone
# GH 15855
df = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'), pd.NaT])
exp = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'),
pd.Timestamp('2012-11-11 00:00:00+01:00')])
assert_series_equal(df.fillna(method='pad'), exp)
df = pd.Series([pd.NaT, pd.Timestamp('2012-11-11 00:00:00+01:00')])
exp = pd.Series([pd.Timestamp('2012-11-11 00:00:00+01:00'),
pd.Timestamp('2012-11-11 00:00:00+01:00')])
assert_series_equal(df.fillna(method='bfill'), exp)
def test_fillna_consistency(self):
# GH 16402
# fillna with a tz aware to a tz-naive, should result in object
s = Series([Timestamp('20130101'), pd.NaT])
result = s.fillna(Timestamp('20130101', tz='US/Eastern'))
expected = Series([Timestamp('20130101'),
Timestamp('2013-01-01', tz='US/Eastern')],
dtype='object')
assert_series_equal(result, expected)
# where (we ignore the errors=)
result = s.where([True, False],
Timestamp('20130101', tz='US/Eastern'),
errors='ignore')
assert_series_equal(result, expected)
result = s.where([True, False],
Timestamp('20130101', tz='US/Eastern'),
errors='ignore')
assert_series_equal(result, expected)
# with a non-datetime
result = s.fillna('foo')
expected = Series([Timestamp('20130101'),
'foo'])
assert_series_equal(result, expected)
# assignment
s2 = s.copy()
s2[1] = 'foo'
assert_series_equal(s2, expected)
def test_datetime64tz_fillna_round_issue(self):
# GH 14872
data = pd.Series([pd.NaT, pd.NaT,
datetime(2016, 12, 12, 22, 24, 6, 100001,
tzinfo=pytz.utc)])
filled = data.fillna(method='bfill')
expected = pd.Series([datetime(2016, 12, 12, 22, 24, 6,
100001, tzinfo=pytz.utc),
datetime(2016, 12, 12, 22, 24, 6,
100001, tzinfo=pytz.utc),
datetime(2016, 12, 12, 22, 24, 6,
100001, tzinfo=pytz.utc)])
assert_series_equal(filled, expected)
def test_fillna_downcast(self):
# GH 15277
# infer int64 from float64
s = pd.Series([1., np.nan])
result = s.fillna(0, downcast='infer')
expected = pd.Series([1, 0])
assert_series_equal(result, expected)
# infer int64 from float64 when fillna value is a dict
s = pd.Series([1., np.nan])
result = s.fillna({1: 0}, downcast='infer')
expected = pd.Series([1, 0])
assert_series_equal(result, expected)
def test_fillna_int(self):
s = Series(np.random.randint(-100, 100, 50))
s.fillna(method='ffill', inplace=True)
assert_series_equal(s.fillna(method='ffill', inplace=False), s)
def test_fillna_raise(self):
s = Series(np.random.randint(-100, 100, 50))
pytest.raises(TypeError, s.fillna, [1, 2])
pytest.raises(TypeError, s.fillna, (1, 2))
# related GH 9217, make sure limit is an int and greater than 0
s = Series([1, 2, 3, None])
for limit in [-1, 0, 1., 2.]:
for method in ['backfill', 'bfill', 'pad', 'ffill', None]:
with pytest.raises(ValueError):
s.fillna(1, limit=limit, method=method)
def test_categorical_nan_equality(self):
cat = Series(Categorical(["a", "b", "c", np.nan]))
exp = Series([True, True, True, False])
res = (cat == cat)
tm.assert_series_equal(res, exp)
def test_categorical_nan_handling(self):
# NaNs are represented as -1 in labels
s = Series(Categorical(["a", "b", np.nan, "a"]))
tm.assert_index_equal(s.cat.categories, Index(["a", "b"]))
tm.assert_numpy_array_equal(s.values.codes,
np.array([0, 1, -1, 0], dtype=np.int8))
@pytest.mark.parametrize('fill_value, expected_output', [
('a', ['a', 'a', 'b', 'a', 'a']),
({1: 'a', 3: 'b', 4: 'b'}, ['a', 'a', 'b', 'b', 'b']),
({1: 'a'}, ['a', 'a', 'b', np.nan, np.nan]),
({1: 'a', 3: 'b'}, ['a', 'a', 'b', 'b', np.nan]),
(Series('a'), ['a', np.nan, 'b', np.nan, np.nan]),
(Series('a', index=[1]), ['a', 'a', 'b', np.nan, np.nan]),
(Series({1: 'a', 3: 'b'}), ['a', 'a', 'b', 'b', np.nan]),
(Series(['a', 'b'], index=[3, 4]), ['a', np.nan, 'b', 'a', 'b'])
])
def test_fillna_categorical(self, fill_value, expected_output):
# GH 17033
# Test fillna for a Categorical series
data = ['a', np.nan, 'b', np.nan, np.nan]
s = Series(Categorical(data, categories=['a', 'b']))
exp = Series(Categorical(expected_output, categories=['a', 'b']))
tm.assert_series_equal(s.fillna(fill_value), exp)
def test_fillna_categorical_raise(self):
data = ['a', np.nan, 'b', np.nan, np.nan]
s = Series(Categorical(data, categories=['a', 'b']))
with tm.assert_raises_regex(ValueError,
"fill value must be in categories"):
s.fillna('d')
with tm.assert_raises_regex(ValueError,
"fill value must be in categories"):
s.fillna(Series('d'))
with tm.assert_raises_regex(ValueError,
"fill value must be in categories"):
s.fillna({1: 'd', 3: 'a'})
with tm.assert_raises_regex(TypeError,
'"value" parameter must be a scalar or '
'dict, but you passed a "list"'):
s.fillna(['a', 'b'])
with tm.assert_raises_regex(TypeError,
'"value" parameter must be a scalar or '
'dict, but you passed a "tuple"'):
s.fillna(('a', 'b'))
with tm.assert_raises_regex(TypeError,
'"value" parameter must be a scalar, dict '
'or Series, but you passed a "DataFrame"'):
s.fillna(DataFrame({1: ['a'], 3: ['b']}))
def test_fillna_nat(self):
series = Series([0, 1, 2, iNaT], dtype='M8[ns]')
filled = series.fillna(method='pad')
filled2 = series.fillna(value=series.values[2])
expected = series.copy()
expected.values[3] = expected.values[2]
assert_series_equal(filled, expected)
assert_series_equal(filled2, expected)
df = DataFrame({'A': series})
filled = df.fillna(method='pad')
filled2 = df.fillna(value=series.values[2])
expected = DataFrame({'A': expected})
assert_frame_equal(filled, expected)
assert_frame_equal(filled2, expected)
series = Series([iNaT, 0, 1, 2], dtype='M8[ns]')
filled = series.fillna(method='bfill')
filled2 = series.fillna(value=series[1])
expected = series.copy()
expected[0] = expected[1]
assert_series_equal(filled, expected)
assert_series_equal(filled2, expected)
df = DataFrame({'A': series})
filled = df.fillna(method='bfill')
filled2 = df.fillna(value=series[1])
expected = DataFrame({'A': expected})
assert_frame_equal(filled, expected)
assert_frame_equal(filled2, expected)
def test_isna_for_inf(self):
s = Series(['a', np.inf, np.nan, 1.0])
with pd.option_context('mode.use_inf_as_na', True):
r = s.isna()
dr = s.dropna()
e = Series([False, True, True, False])
de = Series(['a', 1.0], index=[0, 3])
tm.assert_series_equal(r, e)
tm.assert_series_equal(dr, de)
@tm.capture_stdout
def test_isnull_for_inf_deprecated(self):
# gh-17115
s = Series(['a', np.inf, np.nan, 1.0])
with pd.option_context('mode.use_inf_as_null', True):
r = s.isna()
dr = s.dropna()
e = Series([False, True, True, False])
de = Series(['a', 1.0], index=[0, 3])
tm.assert_series_equal(r, e)
tm.assert_series_equal(dr, de)
def test_fillna(self):
ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5))
tm.assert_series_equal(ts, ts.fillna(method='ffill'))
ts[2] = np.NaN
exp = Series([0., 1., 1., 3., 4.], index=ts.index)
tm.assert_series_equal(ts.fillna(method='ffill'), exp)
exp = Series([0., 1., 3., 3., 4.], index=ts.index)
tm.assert_series_equal(ts.fillna(method='backfill'), exp)
exp = Series([0., 1., 5., 3., 4.], index=ts.index)
tm.assert_series_equal(ts.fillna(value=5), exp)
pytest.raises(ValueError, ts.fillna)
pytest.raises(ValueError, self.ts.fillna, value=0, method='ffill')
# GH 5703
s1 = Series([np.nan])
s2 = Series([1])
result = s1.fillna(s2)
expected = Series([1.])
assert_series_equal(result, expected)
result = s1.fillna({})
assert_series_equal(result, s1)
result = s1.fillna(Series(()))
assert_series_equal(result, s1)
result = s2.fillna(s1)
assert_series_equal(result, s2)
result = s1.fillna({0: 1})
assert_series_equal(result, expected)
result = s1.fillna({1: 1})
assert_series_equal(result, Series([np.nan]))
result = s1.fillna({0: 1, 1: 1})
assert_series_equal(result, expected)
result = s1.fillna(Series({0: 1, 1: 1}))
assert_series_equal(result, expected)
result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5]))
assert_series_equal(result, s1)
s1 = Series([0, 1, 2], list('abc'))
s2 = Series([0, np.nan, 2], list('bac'))
result = s2.fillna(s1)
expected = Series([0, 0, 2.], list('bac'))
assert_series_equal(result, expected)
# limit
s = Series(np.nan, index=[0, 1, 2])
result = s.fillna(999, limit=1)
expected = Series([999, np.nan, np.nan], index=[0, 1, 2])
assert_series_equal(result, expected)
result = s.fillna(999, limit=2)
expected = Series([999, 999, np.nan], index=[0, 1, 2])
assert_series_equal(result, expected)
# GH 9043
# make sure a string representation of int/float values can be filled
# correctly without raising errors or being converted
vals = ['0', '1.5', '-0.3']
for val in vals:
s = Series([0, 1, np.nan, np.nan, 4], dtype='float64')
result = s.fillna(val)
expected = Series([0, 1, val, val, 4], dtype='object')
assert_series_equal(result, expected)
def test_fillna_bug(self):
x = Series([nan, 1., nan, 3., nan], ['z', 'a', 'b', 'c', 'd'])
filled = x.fillna(method='ffill')
expected = Series([nan, 1., 1., 3., 3.], x.index)
assert_series_equal(filled, expected)
filled = x.fillna(method='bfill')
expected = Series([1., 1., 3., 3., nan], x.index)
assert_series_equal(filled, expected)
def test_fillna_inplace(self):
x = Series([nan, 1., nan, 3., nan], ['z', 'a', 'b', 'c', 'd'])
y = x.copy()
y.fillna(value=0, inplace=True)
expected = x.fillna(value=0)
assert_series_equal(y, expected)
def test_fillna_invalid_method(self):
try:
self.ts.fillna(method='ffil')
except ValueError as inst:
assert 'ffil' in str(inst)
def test_ffill(self):
ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5))
ts[2] = np.NaN
assert_series_equal(ts.ffill(), ts.fillna(method='ffill'))
def test_ffill_mixed_dtypes_without_missing_data(self):
# GH14956
series = pd.Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1])
result = series.ffill()
assert_series_equal(series, result)
def test_bfill(self):
ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5))
ts[2] = np.NaN
assert_series_equal(ts.bfill(), ts.fillna(method='bfill'))
def test_timedelta64_nan(self):
td = Series([timedelta(days=i) for i in range(10)])
# nan ops on timedeltas
td1 = td.copy()
td1[0] = np.nan
assert isna(td1[0])
assert td1[0].value == iNaT
td1[0] = td[0]
assert not isna(td1[0])
td1[1] = iNaT
assert isna(td1[1])
assert td1[1].value == iNaT
td1[1] = td[1]
assert not isna(td1[1])
td1[2] = NaT
assert isna(td1[2])
assert td1[2].value == iNaT
td1[2] = td[2]
assert not isna(td1[2])
# boolean setting
# this doesn't work, not sure numpy even supports it
# result = td[(td>np.timedelta64(timedelta(days=3))) &
# td<np.timedelta64(timedelta(days=7)))] = np.nan
# assert isna(result).sum() == 7
# NumPy limitiation =(
# def test_logical_range_select(self):
# np.random.seed(12345)
# selector = -0.5 <= self.ts <= 0.5
# expected = (self.ts >= -0.5) & (self.ts <= 0.5)
# assert_series_equal(selector, expected)
def test_dropna_empty(self):
s = Series([])
assert len(s.dropna()) == 0
s.dropna(inplace=True)
assert len(s) == 0
# invalid axis
pytest.raises(ValueError, s.dropna, axis=1)
def test_datetime64_tz_dropna(self):
# DatetimeBlock
s = Series([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp(
'2011-01-03 10:00'), pd.NaT])
result = s.dropna()
expected = Series([Timestamp('2011-01-01 10:00'),
Timestamp('2011-01-03 10:00')], index=[0, 2])
tm.assert_series_equal(result, expected)
# DatetimeBlockTZ
idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT,
'2011-01-03 10:00', pd.NaT],
tz='Asia/Tokyo')
s = pd.Series(idx)
assert s.dtype == 'datetime64[ns, Asia/Tokyo]'
result = s.dropna()
expected = Series([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
Timestamp('2011-01-03 10:00', tz='Asia/Tokyo')],
index=[0, 2])
assert result.dtype == 'datetime64[ns, Asia/Tokyo]'
tm.assert_series_equal(result, expected)
def test_dropna_no_nan(self):
for s in [Series([1, 2, 3], name='x'), Series(
[False, True, False], name='x')]:
result = s.dropna()
tm.assert_series_equal(result, s)
assert result is not s
s2 = s.copy()
s2.dropna(inplace=True)
tm.assert_series_equal(s2, s)
def test_dropna_intervals(self):
s = Series([np.nan, 1, 2, 3], IntervalIndex.from_arrays(
[np.nan, 0, 1, 2],
[np.nan, 1, 2, 3]))
result = s.dropna()
expected = s.iloc[1:]
assert_series_equal(result, expected)
def test_valid(self):
ts = self.ts.copy()
ts[::2] = np.NaN
result = ts.dropna()
assert len(result) == ts.count()
tm.assert_series_equal(result, ts[1::2])
tm.assert_series_equal(result, ts[pd.notna(ts)])
def test_isna(self):
ser = Series([0, 5.4, 3, nan, -0.001])
expected = Series([False, False, False, True, False])
tm.assert_series_equal(ser.isna(), expected)
ser = Series(["hi", "", nan])
expected = Series([False, False, True])
tm.assert_series_equal(ser.isna(), expected)
def test_notna(self):
ser = Series([0, 5.4, 3, nan, -0.001])
expected = Series([True, True, True, False, True])
tm.assert_series_equal(ser.notna(), expected)
ser = Series(["hi", "", nan])
expected = Series([True, True, False])
tm.assert_series_equal(ser.notna(), expected)
def test_pad_nan(self):
x = Series([np.nan, 1., np.nan, 3., np.nan], ['z', 'a', 'b', 'c', 'd'],
dtype=float)
x.fillna(method='pad', inplace=True)
expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0],
['z', 'a', 'b', 'c', 'd'], dtype=float)
assert_series_equal(x[1:], expected[1:])
assert np.isnan(x[0]), np.isnan(expected[0])
def test_pad_require_monotonicity(self):
rng = date_range('1/1/2000', '3/1/2000', freq='B')
# neither monotonic increasing or decreasing
rng2 = rng[[1, 0, 2]]
pytest.raises(ValueError, rng2.get_indexer, rng, method='pad')
def test_dropna_preserve_name(self):
self.ts[:5] = np.nan
result = self.ts.dropna()
assert result.name == self.ts.name
name = self.ts.name
ts = self.ts.copy()
ts.dropna(inplace=True)
assert ts.name == name
def test_fill_value_when_combine_const(self):
# GH12723
s = Series([0, 1, np.nan, 3, 4, 5])
exp = s.fillna(0).add(2)
res = s.add(2, fill_value=0)
assert_series_equal(res, exp)
def test_series_fillna_limit(self):
index = np.arange(10)
s = Series(np.random.randn(10), index=index)
result = s[:2].reindex(index)
result = result.fillna(method='pad', limit=5)
expected = s[:2].reindex(index).fillna(method='pad')
expected[-3:] = np.nan
assert_series_equal(result, expected)
result = s[-2:].reindex(index)
result = result.fillna(method='bfill', limit=5)
expected = s[-2:].reindex(index).fillna(method='backfill')
expected[:3] = np.nan
assert_series_equal(result, expected)
def test_sparse_series_fillna_limit(self):
index = np.arange(10)
s = Series(np.random.randn(10), index=index)
ss = s[:2].reindex(index).to_sparse()
result = ss.fillna(method='pad', limit=5)
expected = ss.fillna(method='pad', limit=5)
expected = expected.to_dense()
expected[-3:] = np.nan
expected = expected.to_sparse()
assert_series_equal(result, expected)
ss = s[-2:].reindex(index).to_sparse()
result = ss.fillna(method='backfill', limit=5)
expected = ss.fillna(method='backfill')
expected = expected.to_dense()
expected[:3] = np.nan
expected = expected.to_sparse()
assert_series_equal(result, expected)
def test_sparse_series_pad_backfill_limit(self):
index = np.arange(10)
s = Series(np.random.randn(10), index=index)
s = s.to_sparse()
result = s[:2].reindex(index, method='pad', limit=5)
expected = s[:2].reindex(index).fillna(method='pad')
expected = expected.to_dense()
expected[-3:] = np.nan
expected = expected.to_sparse()
assert_series_equal(result, expected)
result = s[-2:].reindex(index, method='backfill', limit=5)
expected = s[-2:].reindex(index).fillna(method='backfill')
expected = expected.to_dense()
expected[:3] = np.nan
expected = expected.to_sparse()
assert_series_equal(result, expected)
def test_series_pad_backfill_limit(self):
index = np.arange(10)
s = Series(np.random.randn(10), index=index)
result = s[:2].reindex(index, method='pad', limit=5)
expected = s[:2].reindex(index).fillna(method='pad')
expected[-3:] = np.nan
assert_series_equal(result, expected)
result = s[-2:].reindex(index, method='backfill', limit=5)
expected = s[-2:].reindex(index).fillna(method='backfill')
expected[:3] = np.nan
assert_series_equal(result, expected)
class TestSeriesInterpolateData(TestData):
def test_interpolate(self):
ts = Series(np.arange(len(self.ts), dtype=float), self.ts.index)
ts_copy = ts.copy()
ts_copy[5:10] = np.NaN
linear_interp = ts_copy.interpolate(method='linear')
tm.assert_series_equal(linear_interp, ts)
ord_ts = Series([d.toordinal() for d in self.ts.index],
index=self.ts.index).astype(float)
ord_ts_copy = ord_ts.copy()
ord_ts_copy[5:10] = np.NaN
time_interp = ord_ts_copy.interpolate(method='time')
tm.assert_series_equal(time_interp, ord_ts)
# try time interpolation on a non-TimeSeries
# Only raises ValueError if there are NaNs.
non_ts = self.series.copy()
non_ts[0] = np.NaN
pytest.raises(ValueError, non_ts.interpolate, method='time')
@td.skip_if_no_scipy
def test_interpolate_pchip(self):
_skip_if_no_pchip()
ser = Series(np.sort(np.random.uniform(size=100)))
# interpolate at new_index
new_index = ser.index.union(Index([49.25, 49.5, 49.75, 50.25, 50.5,
50.75]))
interp_s = ser.reindex(new_index).interpolate(method='pchip')
# does not blow up, GH5977
interp_s[49:51]
@td.skip_if_no_scipy
def test_interpolate_akima(self):
_skip_if_no_akima()
ser = Series([10, 11, 12, 13])
expected = Series([11.00, 11.25, 11.50, 11.75,
12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75,
2.0, 2.25, 2.5, 2.75, 3.0]))
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
interp_s = ser.reindex(new_index).interpolate(method='akima')
assert_series_equal(interp_s[1:3], expected)
@td.skip_if_no_scipy
def test_interpolate_piecewise_polynomial(self):
ser = Series([10, 11, 12, 13])
expected = Series([11.00, 11.25, 11.50, 11.75,
12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75,
2.0, 2.25, 2.5, 2.75, 3.0]))
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
interp_s = ser.reindex(new_index).interpolate(
method='piecewise_polynomial')
assert_series_equal(interp_s[1:3], expected)
@td.skip_if_no_scipy
def test_interpolate_from_derivatives(self):
ser = Series([10, 11, 12, 13])
expected = Series([11.00, 11.25, 11.50, 11.75,
12.00, 12.25, 12.50, 12.75, 13.00],
index=Index([1.0, 1.25, 1.5, 1.75,
2.0, 2.25, 2.5, 2.75, 3.0]))
# interpolate at new_index
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
interp_s = ser.reindex(new_index).interpolate(
method='from_derivatives')
assert_series_equal(interp_s[1:3], expected)
@pytest.mark.parametrize("kwargs", [
{},
pytest.param({'method': 'polynomial', 'order': 1},
marks=td.skip_if_no_scipy)
])
def test_interpolate_corners(self, kwargs):
s = Series([np.nan, np.nan])
assert_series_equal(s.interpolate(**kwargs), s)
s = Series([]).interpolate()
assert_series_equal(s.interpolate(**kwargs), s)
def test_interpolate_index_values(self):
s = Series(np.nan, index=np.sort(np.random.rand(30)))
s[::3] = np.random.randn(10)
vals = s.index.values.astype(float)
result = s.interpolate(method='index')
expected = s.copy()
bad = isna(expected.values)
good = ~bad
expected = Series(np.interp(vals[bad], vals[good],
s.values[good]),
index=s.index[bad])
assert_series_equal(result[bad], expected)
# 'values' is synonymous with 'index' for the method kwarg
other_result = s.interpolate(method='values')
assert_series_equal(other_result, result)
assert_series_equal(other_result[bad], expected)
def test_interpolate_non_ts(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
with pytest.raises(ValueError):
s.interpolate(method='time')
@pytest.mark.parametrize("kwargs", [
{},
pytest.param({'method': 'polynomial', 'order': 1},
marks=td.skip_if_no_scipy)
])
def test_nan_interpolate(self, kwargs):
s = Series([0, 1, np.nan, 3])
result = s.interpolate(**kwargs)
expected = Series([0., 1., 2., 3.])
assert_series_equal(result, expected)
def test_nan_irregular_index(self):
s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
result = s.interpolate()
expected = Series([1., 2., 3., 4.], index=[1, 3, 5, 9])
assert_series_equal(result, expected)
def test_nan_str_index(self):
s = Series([0, 1, 2, np.nan], index=list('abcd'))
result = s.interpolate()
expected = Series([0., 1., 2., 2.], index=list('abcd'))
assert_series_equal(result, expected)
@td.skip_if_no_scipy
def test_interp_quad(self):
sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4])
result = sq.interpolate(method='quadratic')
expected = Series([1., 4., 9., 16.], index=[1, 2, 3, 4])
assert_series_equal(result, expected)
@td.skip_if_no_scipy
def test_interp_scipy_basic(self):
s = Series([1, 3, np.nan, 12, np.nan, 25])
# slinear
expected = Series([1., 3., 7.5, 12., 18.5, 25.])
result = s.interpolate(method='slinear')
assert_series_equal(result, expected)
result = s.interpolate(method='slinear', downcast='infer')
assert_series_equal(result, expected)
# nearest
expected = Series([1, 3, 3, 12, 12, 25])
result = s.interpolate(method='nearest')
assert_series_equal(result, expected.astype('float'))
result = s.interpolate(method='nearest', downcast='infer')
assert_series_equal(result, expected)
# zero
expected = Series([1, 3, 3, 12, 12, 25])
result = s.interpolate(method='zero')
assert_series_equal(result, expected.astype('float'))
result = s.interpolate(method='zero', downcast='infer')
assert_series_equal(result, expected)
# quadratic
# GH #15662.
# new cubic and quadratic interpolation algorithms from scipy 0.19.0.
# previously `splmake` was used. See scipy/scipy#6710
if _is_scipy_ge_0190:
expected = Series([1, 3., 6.823529, 12., 18.058824, 25.])
else:
expected = Series([1, 3., 6.769231, 12., 18.230769, 25.])
result = s.interpolate(method='quadratic')
assert_series_equal(result, expected)
result = s.interpolate(method='quadratic', downcast='infer')
assert_series_equal(result, expected)
# cubic
expected = Series([1., 3., 6.8, 12., 18.2, 25.])
result = s.interpolate(method='cubic')
assert_series_equal(result, expected)
def test_interp_limit(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
expected = Series([1., 3., 5., 7., np.nan, 11.])
result = s.interpolate(method='linear', limit=2)
assert_series_equal(result, expected)
# GH 9217, make sure limit is an int and greater than 0
methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero',
'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',
'polynomial', 'spline', 'piecewise_polynomial', None,
'from_derivatives', 'pchip', 'akima']
s = pd.Series([1, 2, np.nan, np.nan, 5])
for limit in [-1, 0, 1., 2.]:
for method in methods:
with pytest.raises(ValueError):
s.interpolate(limit=limit, method=method)
def test_interp_limit_forward(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
# Provide 'forward' (the default) explicitly here.
expected = Series([1., 3., 5., 7., np.nan, 11.])
result = s.interpolate(method='linear', limit=2,
limit_direction='forward')
assert_series_equal(result, expected)
result = s.interpolate(method='linear', limit=2,
limit_direction='FORWARD')
assert_series_equal(result, expected)
def test_interp_unlimited(self):
# these test are for issue #16282 default Limit=None is unlimited
s = Series([np.nan, 1., 3., np.nan, np.nan, np.nan, 11., np.nan])
expected = Series([1., 1., 3., 5., 7., 9., 11., 11.])
result = s.interpolate(method='linear',
limit_direction='both')
assert_series_equal(result, expected)
expected = Series([np.nan, 1., 3., 5., 7., 9., 11., 11.])
result = s.interpolate(method='linear',
limit_direction='forward')
assert_series_equal(result, expected)
expected = Series([1., 1., 3., 5., 7., 9., 11., np.nan])
result = s.interpolate(method='linear',
limit_direction='backward')
assert_series_equal(result, expected)
def test_interp_limit_bad_direction(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
pytest.raises(ValueError, s.interpolate, method='linear', limit=2,
limit_direction='abc')
# raises an error even if no limit is specified.
pytest.raises(ValueError, s.interpolate, method='linear',
limit_direction='abc')
# limit_area introduced GH #16284
def test_interp_limit_area(self):
# These tests are for issue #9218 -- fill NaNs in both directions.
s = Series([nan, nan, 3, nan, nan, nan, 7, nan, nan])
expected = Series([nan, nan, 3., 4., 5., 6., 7., nan, nan])
result = s.interpolate(method='linear', limit_area='inside')
assert_series_equal(result, expected)
expected = Series([nan, nan, 3., 4., nan, nan, 7., nan, nan])
result = s.interpolate(method='linear', limit_area='inside',
limit=1)
expected = Series([nan, nan, 3., 4., nan, 6., 7., nan, nan])
result = s.interpolate(method='linear', limit_area='inside',
limit_direction='both', limit=1)
assert_series_equal(result, expected)
expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., 7.])
result = s.interpolate(method='linear', limit_area='outside')
assert_series_equal(result, expected)
expected = Series([nan, nan, 3., nan, nan, nan, 7., 7., nan])
result = s.interpolate(method='linear', limit_area='outside',
limit=1)
expected = Series([nan, 3., 3., nan, nan, nan, 7., 7., nan])
result = s.interpolate(method='linear', limit_area='outside',
limit_direction='both', limit=1)
assert_series_equal(result, expected)
expected = Series([3., 3., 3., nan, nan, nan, 7., nan, nan])
result = s.interpolate(method='linear', limit_area='outside',
direction='backward')
# raises an error even if limit type is wrong.
pytest.raises(ValueError, s.interpolate, method='linear',
limit_area='abc')
def test_interp_limit_direction(self):
# These tests are for issue #9218 -- fill NaNs in both directions.
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
expected = Series([1., 3., np.nan, 7., 9., 11.])
result = s.interpolate(method='linear', limit=2,
limit_direction='backward')
assert_series_equal(result, expected)
expected = Series([1., 3., 5., np.nan, 9., 11.])
result = s.interpolate(method='linear', limit=1,
limit_direction='both')
assert_series_equal(result, expected)
# Check that this works on a longer series of nans.
s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12,
np.nan])
expected = Series([1., 3., 4., 5., 6., 7., 9., 10., 11., 12., 12.])
result = s.interpolate(method='linear', limit=2,
limit_direction='both')
assert_series_equal(result, expected)
expected = Series([1., 3., 4., np.nan, 6., 7., 9., 10., 11., 12., 12.])
result = s.interpolate(method='linear', limit=1,
limit_direction='both')
assert_series_equal(result, expected)
def test_interp_limit_to_ends(self):
# These test are for issue #10420 -- flow back to beginning.
s = Series([np.nan, np.nan, 5, 7, 9, np.nan])
expected = Series([5., 5., 5., 7., 9., np.nan])
result = s.interpolate(method='linear', limit=2,
limit_direction='backward')
assert_series_equal(result, expected)
expected = Series([5., 5., 5., 7., 9., 9.])
result = s.interpolate(method='linear', limit=2,
limit_direction='both')
assert_series_equal(result, expected)
def test_interp_limit_before_ends(self):
# These test are for issue #11115 -- limit ends properly.
s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan])
expected = Series([np.nan, np.nan, 5., 7., 7., np.nan])
result = s.interpolate(method='linear', limit=1,
limit_direction='forward')
assert_series_equal(result, expected)
expected = Series([np.nan, 5., 5., 7., np.nan, np.nan])
result = s.interpolate(method='linear', limit=1,
limit_direction='backward')
assert_series_equal(result, expected)
expected = Series([np.nan, 5., 5., 7., 7., np.nan])
result = s.interpolate(method='linear', limit=1,
limit_direction='both')
assert_series_equal(result, expected)
@td.skip_if_no_scipy
def test_interp_all_good(self):
s = Series([1, 2, 3])
result = s.interpolate(method='polynomial', order=1)
assert_series_equal(result, s)
# non-scipy
result = s.interpolate()
assert_series_equal(result, s)
@pytest.mark.parametrize("check_scipy", [
False,
pytest.param(True, marks=td.skip_if_no_scipy)
])
def test_interp_multiIndex(self, check_scipy):
idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')])
s = Series([1, 2, np.nan], index=idx)
expected = s.copy()
expected.loc[2] = 2
result = s.interpolate()
assert_series_equal(result, expected)
if check_scipy:
with pytest.raises(ValueError):
s.interpolate(method='polynomial', order=1)
@td.skip_if_no_scipy
def test_interp_nonmono_raise(self):
s = Series([1, np.nan, 3], index=[0, 2, 1])
with pytest.raises(ValueError):
s.interpolate(method='krogh')
@td.skip_if_no_scipy
def test_interp_datetime64(self):
df = Series([1, np.nan, 3], index=date_range('1/1/2000', periods=3))
result = df.interpolate(method='nearest')
expected = Series([1., 1., 3.],
index=date_range('1/1/2000', periods=3))
assert_series_equal(result, expected)
def test_interp_limit_no_nans(self):
# GH 7173
s = pd.Series([1., 2., 3.])
result = s.interpolate(limit=1)
expected = s
assert_series_equal(result, expected)
@td.skip_if_no_scipy
@pytest.mark.parametrize("method", ['polynomial', 'spline'])
def test_no_order(self, method):
s = Series([0, 1, np.nan, 3])
with pytest.raises(ValueError):
s.interpolate(method=method)
@td.skip_if_no_scipy
def test_spline(self):
s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
result = s.interpolate(method='spline', order=1)
expected = Series([1., 2., 3., 4., 5., 6., 7.])
assert_series_equal(result, expected)
@td.skip_if_no('scipy', min_version='0.15')
def test_spline_extrapolate(self):
s = Series([1, 2, 3, 4, np.nan, 6, np.nan])
result3 = s.interpolate(method='spline', order=1, ext=3)
expected3 = Series([1., 2., 3., 4., 5., 6., 6.])
assert_series_equal(result3, expected3)
result1 = s.interpolate(method='spline', order=1, ext=0)
expected1 = Series([1., 2., 3., 4., 5., 6., 7.])
assert_series_equal(result1, expected1)
@td.skip_if_no_scipy
def test_spline_smooth(self):
s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7])
assert (s.interpolate(method='spline', order=3, s=0)[5] !=
s.interpolate(method='spline', order=3)[5])
@td.skip_if_no_scipy
def test_spline_interpolation(self):
s = Series(np.arange(10) ** 2)
s[np.random.randint(0, 9, 3)] = np.nan
result1 = s.interpolate(method='spline', order=1)
expected1 = s.interpolate(method='spline', order=1)
assert_series_equal(result1, expected1)
@td.skip_if_no_scipy
def test_spline_error(self):
# see gh-10633
s = pd.Series(np.arange(10) ** 2)
s[np.random.randint(0, 9, 3)] = np.nan
with pytest.raises(ValueError):
s.interpolate(method='spline')
with pytest.raises(ValueError):
s.interpolate(method='spline', order=0)
def test_interp_timedelta64(self):
# GH 6424
df = Series([1, np.nan, 3],
index=pd.to_timedelta([1, 2, 3]))
result = df.interpolate(method='time')
expected = Series([1., 2., 3.],
index=pd.to_timedelta([1, 2, 3]))
assert_series_equal(result, expected)
# test for non uniform spacing
df = Series([1, np.nan, 3],
index=pd.to_timedelta([1, 2, 4]))
result = df.interpolate(method='time')
expected = Series([1., 1.666667, 3.],
index=pd.to_timedelta([1, 2, 4]))
assert_series_equal(result, expected)
def test_series_interpolate_method_values(self):
# #1646
ts = _simple_ts('1/1/2000', '1/20/2000')
ts[::2] = np.nan
result = ts.interpolate(method='values')
exp = ts.interpolate()
assert_series_equal(result, exp)
def test_series_interpolate_intraday(self):
# #1698
index = pd.date_range('1/1/2012', periods=4, freq='12D')
ts = pd.Series([0, 12, 24, 36], index)
new_index = index.append(index + pd.DateOffset(days=1)).sort_values()
exp = ts.reindex(new_index).interpolate(method='time')
index = pd.date_range('1/1/2012', periods=4, freq='12H')
ts = pd.Series([0, 12, 24, 36], index)
new_index = index.append(index + pd.DateOffset(hours=1)).sort_values()
result = ts.reindex(new_index).interpolate(method='time')
tm.assert_numpy_array_equal(result.values, exp.values)