laywerrobot/lib/python3.6/site-packages/pandas/tests/frame/test_missing.py
2020-08-27 21:55:39 +02:00

846 lines
30 KiB
Python

# -*- coding: utf-8 -*-
from __future__ import print_function
import pytest
from distutils.version import LooseVersion
from numpy import nan, random
import numpy as np
import datetime
import dateutil
from pandas.compat import lrange
from pandas import (DataFrame, Series, Timestamp,
date_range, Categorical)
import pandas as pd
from pandas.util.testing import assert_series_equal, assert_frame_equal
import pandas.util.testing as tm
import pandas.util._test_decorators as td
from pandas.tests.frame.common import TestData, _check_mixed_float
try:
import scipy
_is_scipy_ge_0190 = (LooseVersion(scipy.__version__) >=
LooseVersion('0.19.0'))
except:
_is_scipy_ge_0190 = False
def _skip_if_no_pchip():
try:
from scipy.interpolate import pchip_interpolate # noqa
except ImportError:
import pytest
pytest.skip('scipy.interpolate.pchip missing')
class TestDataFrameMissingData(TestData):
def test_dropEmptyRows(self):
N = len(self.frame.index)
mat = random.randn(N)
mat[:5] = nan
frame = DataFrame({'foo': mat}, index=self.frame.index)
original = Series(mat, index=self.frame.index, name='foo')
expected = original.dropna()
inplace_frame1, inplace_frame2 = frame.copy(), frame.copy()
smaller_frame = frame.dropna(how='all')
# check that original was preserved
assert_series_equal(frame['foo'], original)
inplace_frame1.dropna(how='all', inplace=True)
assert_series_equal(smaller_frame['foo'], expected)
assert_series_equal(inplace_frame1['foo'], expected)
smaller_frame = frame.dropna(how='all', subset=['foo'])
inplace_frame2.dropna(how='all', subset=['foo'], inplace=True)
assert_series_equal(smaller_frame['foo'], expected)
assert_series_equal(inplace_frame2['foo'], expected)
def test_dropIncompleteRows(self):
N = len(self.frame.index)
mat = random.randn(N)
mat[:5] = nan
frame = DataFrame({'foo': mat}, index=self.frame.index)
frame['bar'] = 5
original = Series(mat, index=self.frame.index, name='foo')
inp_frame1, inp_frame2 = frame.copy(), frame.copy()
smaller_frame = frame.dropna()
assert_series_equal(frame['foo'], original)
inp_frame1.dropna(inplace=True)
exp = Series(mat[5:], index=self.frame.index[5:], name='foo')
tm.assert_series_equal(smaller_frame['foo'], exp)
tm.assert_series_equal(inp_frame1['foo'], exp)
samesize_frame = frame.dropna(subset=['bar'])
assert_series_equal(frame['foo'], original)
assert (frame['bar'] == 5).all()
inp_frame2.dropna(subset=['bar'], inplace=True)
tm.assert_index_equal(samesize_frame.index, self.frame.index)
tm.assert_index_equal(inp_frame2.index, self.frame.index)
def test_dropna(self):
df = DataFrame(np.random.randn(6, 4))
df[2][:2] = nan
dropped = df.dropna(axis=1)
expected = df.loc[:, [0, 1, 3]]
inp = df.copy()
inp.dropna(axis=1, inplace=True)
assert_frame_equal(dropped, expected)
assert_frame_equal(inp, expected)
dropped = df.dropna(axis=0)
expected = df.loc[lrange(2, 6)]
inp = df.copy()
inp.dropna(axis=0, inplace=True)
assert_frame_equal(dropped, expected)
assert_frame_equal(inp, expected)
# threshold
dropped = df.dropna(axis=1, thresh=5)
expected = df.loc[:, [0, 1, 3]]
inp = df.copy()
inp.dropna(axis=1, thresh=5, inplace=True)
assert_frame_equal(dropped, expected)
assert_frame_equal(inp, expected)
dropped = df.dropna(axis=0, thresh=4)
expected = df.loc[lrange(2, 6)]
inp = df.copy()
inp.dropna(axis=0, thresh=4, inplace=True)
assert_frame_equal(dropped, expected)
assert_frame_equal(inp, expected)
dropped = df.dropna(axis=1, thresh=4)
assert_frame_equal(dropped, df)
dropped = df.dropna(axis=1, thresh=3)
assert_frame_equal(dropped, df)
# subset
dropped = df.dropna(axis=0, subset=[0, 1, 3])
inp = df.copy()
inp.dropna(axis=0, subset=[0, 1, 3], inplace=True)
assert_frame_equal(dropped, df)
assert_frame_equal(inp, df)
# all
dropped = df.dropna(axis=1, how='all')
assert_frame_equal(dropped, df)
df[2] = nan
dropped = df.dropna(axis=1, how='all')
expected = df.loc[:, [0, 1, 3]]
assert_frame_equal(dropped, expected)
# bad input
pytest.raises(ValueError, df.dropna, axis=3)
def test_drop_and_dropna_caching(self):
# tst that cacher updates
original = Series([1, 2, np.nan], name='A')
expected = Series([1, 2], dtype=original.dtype, name='A')
df = pd.DataFrame({'A': original.values.copy()})
df2 = df.copy()
df['A'].dropna()
assert_series_equal(df['A'], original)
df['A'].dropna(inplace=True)
assert_series_equal(df['A'], expected)
df2['A'].drop([1])
assert_series_equal(df2['A'], original)
df2['A'].drop([1], inplace=True)
assert_series_equal(df2['A'], original.drop([1]))
def test_dropna_corner(self):
# bad input
pytest.raises(ValueError, self.frame.dropna, how='foo')
pytest.raises(TypeError, self.frame.dropna, how=None)
# non-existent column - 8303
pytest.raises(KeyError, self.frame.dropna, subset=['A', 'X'])
def test_dropna_multiple_axes(self):
df = DataFrame([[1, np.nan, 2, 3],
[4, np.nan, 5, 6],
[np.nan, np.nan, np.nan, np.nan],
[7, np.nan, 8, 9]])
cp = df.copy()
# GH20987
with tm.assert_produces_warning(FutureWarning):
result = df.dropna(how='all', axis=[0, 1])
with tm.assert_produces_warning(FutureWarning):
result2 = df.dropna(how='all', axis=(0, 1))
expected = df.dropna(how='all').dropna(how='all', axis=1)
assert_frame_equal(result, expected)
assert_frame_equal(result2, expected)
assert_frame_equal(df, cp)
inp = df.copy()
with tm.assert_produces_warning(FutureWarning):
inp.dropna(how='all', axis=(0, 1), inplace=True)
assert_frame_equal(inp, expected)
def test_dropna_tz_aware_datetime(self):
# GH13407
df = DataFrame()
dt1 = datetime.datetime(2015, 1, 1,
tzinfo=dateutil.tz.tzutc())
dt2 = datetime.datetime(2015, 2, 2,
tzinfo=dateutil.tz.tzutc())
df['Time'] = [dt1]
result = df.dropna(axis=0)
expected = DataFrame({'Time': [dt1]})
assert_frame_equal(result, expected)
# Ex2
df = DataFrame({'Time': [dt1, None, np.nan, dt2]})
result = df.dropna(axis=0)
expected = DataFrame([dt1, dt2],
columns=['Time'],
index=[0, 3])
assert_frame_equal(result, expected)
def test_fillna(self):
tf = self.tsframe
tf.loc[tf.index[:5], 'A'] = nan
tf.loc[tf.index[-5:], 'A'] = nan
zero_filled = self.tsframe.fillna(0)
assert (zero_filled.loc[zero_filled.index[:5], 'A'] == 0).all()
padded = self.tsframe.fillna(method='pad')
assert np.isnan(padded.loc[padded.index[:5], 'A']).all()
assert (padded.loc[padded.index[-5:], 'A'] ==
padded.loc[padded.index[-5], 'A']).all()
# mixed type
mf = self.mixed_frame
mf.loc[mf.index[5:20], 'foo'] = nan
mf.loc[mf.index[-10:], 'A'] = nan
result = self.mixed_frame.fillna(value=0)
result = self.mixed_frame.fillna(method='pad')
pytest.raises(ValueError, self.tsframe.fillna)
pytest.raises(ValueError, self.tsframe.fillna, 5, method='ffill')
# mixed numeric (but no float16)
mf = self.mixed_float.reindex(columns=['A', 'B', 'D'])
mf.loc[mf.index[-10:], 'A'] = nan
result = mf.fillna(value=0)
_check_mixed_float(result, dtype=dict(C=None))
result = mf.fillna(method='pad')
_check_mixed_float(result, dtype=dict(C=None))
# empty frame (GH #2778)
df = DataFrame(columns=['x'])
for m in ['pad', 'backfill']:
df.x.fillna(method=m, inplace=True)
df.x.fillna(method=m)
# with different dtype (GH3386)
df = DataFrame([['a', 'a', np.nan, 'a'], [
'b', 'b', np.nan, 'b'], ['c', 'c', np.nan, 'c']])
result = df.fillna({2: 'foo'})
expected = DataFrame([['a', 'a', 'foo', 'a'],
['b', 'b', 'foo', 'b'],
['c', 'c', 'foo', 'c']])
assert_frame_equal(result, expected)
df.fillna({2: 'foo'}, inplace=True)
assert_frame_equal(df, expected)
# limit and value
df = DataFrame(np.random.randn(10, 3))
df.iloc[2:7, 0] = np.nan
df.iloc[3:5, 2] = np.nan
expected = df.copy()
expected.iloc[2, 0] = 999
expected.iloc[3, 2] = 999
result = df.fillna(999, limit=1)
assert_frame_equal(result, expected)
# with datelike
# GH 6344
df = DataFrame({
'Date': [pd.NaT, Timestamp("2014-1-1")],
'Date2': [Timestamp("2013-1-1"), pd.NaT]
})
expected = df.copy()
expected['Date'] = expected['Date'].fillna(
df.loc[df.index[0], 'Date2'])
result = df.fillna(value={'Date': df['Date2']})
assert_frame_equal(result, expected)
# with timezone
# GH 15855
df = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'),
pd.NaT]})
exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'),
pd.Timestamp('2012-11-11 00:00:00+01:00')]})
assert_frame_equal(df.fillna(method='pad'), exp)
df = pd.DataFrame({'A': [pd.NaT,
pd.Timestamp('2012-11-11 00:00:00+01:00')]})
exp = pd.DataFrame({'A': [pd.Timestamp('2012-11-11 00:00:00+01:00'),
pd.Timestamp('2012-11-11 00:00:00+01:00')]})
assert_frame_equal(df.fillna(method='bfill'), exp)
def test_na_actions_categorical(self):
cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
vals = ["a", "b", np.nan, "d"]
df = DataFrame({"cats": cat, "vals": vals})
cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3])
vals2 = ["a", "b", "b", "d"]
df_exp_fill = DataFrame({"cats": cat2, "vals": vals2})
cat3 = Categorical([1, 2, 3], categories=[1, 2, 3])
vals3 = ["a", "b", np.nan]
df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3})
cat4 = Categorical([1, 2], categories=[1, 2, 3])
vals4 = ["a", "b"]
df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4})
# fillna
res = df.fillna(value={"cats": 3, "vals": "b"})
tm.assert_frame_equal(res, df_exp_fill)
with tm.assert_raises_regex(ValueError, "fill value must be "
"in categories"):
df.fillna(value={"cats": 4, "vals": "c"})
res = df.fillna(method='pad')
tm.assert_frame_equal(res, df_exp_fill)
# dropna
res = df.dropna(subset=["cats"])
tm.assert_frame_equal(res, df_exp_drop_cats)
res = df.dropna()
tm.assert_frame_equal(res, df_exp_drop_all)
# make sure that fillna takes missing values into account
c = Categorical([np.nan, "b", np.nan], categories=["a", "b"])
df = pd.DataFrame({"cats": c, "vals": [1, 2, 3]})
cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"])
df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]})
res = df.fillna("a")
tm.assert_frame_equal(res, df_exp)
def test_fillna_categorical_nan(self):
# GH 14021
# np.nan should always be a valid filler
cat = Categorical([np.nan, 2, np.nan])
val = Categorical([np.nan, np.nan, np.nan])
df = DataFrame({"cats": cat, "vals": val})
res = df.fillna(df.median())
v_exp = [np.nan, np.nan, np.nan]
df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp},
dtype='category')
tm.assert_frame_equal(res, df_exp)
result = df.cats.fillna(np.nan)
tm.assert_series_equal(result, df.cats)
result = df.vals.fillna(np.nan)
tm.assert_series_equal(result, df.vals)
idx = pd.DatetimeIndex(['2011-01-01 09:00', '2016-01-01 23:45',
'2011-01-01 09:00', pd.NaT, pd.NaT])
df = DataFrame({'a': Categorical(idx)})
tm.assert_frame_equal(df.fillna(value=pd.NaT), df)
idx = pd.PeriodIndex(['2011-01', '2011-01', '2011-01',
pd.NaT, pd.NaT], freq='M')
df = DataFrame({'a': Categorical(idx)})
tm.assert_frame_equal(df.fillna(value=pd.NaT), df)
idx = pd.TimedeltaIndex(['1 days', '2 days',
'1 days', pd.NaT, pd.NaT])
df = DataFrame({'a': Categorical(idx)})
tm.assert_frame_equal(df.fillna(value=pd.NaT), df)
def test_fillna_downcast(self):
# GH 15277
# infer int64 from float64
df = pd.DataFrame({'a': [1., np.nan]})
result = df.fillna(0, downcast='infer')
expected = pd.DataFrame({'a': [1, 0]})
assert_frame_equal(result, expected)
# infer int64 from float64 when fillna value is a dict
df = pd.DataFrame({'a': [1., np.nan]})
result = df.fillna({'a': 0}, downcast='infer')
expected = pd.DataFrame({'a': [1, 0]})
assert_frame_equal(result, expected)
def test_fillna_dtype_conversion(self):
# make sure that fillna on an empty frame works
df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
result = df.get_dtype_counts().sort_values()
expected = Series({'object': 5})
assert_series_equal(result, expected)
result = df.fillna(1)
expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
result = result.get_dtype_counts().sort_values()
expected = Series({'int64': 5})
assert_series_equal(result, expected)
# empty block
df = DataFrame(index=lrange(3), columns=['A', 'B'], dtype='float64')
result = df.fillna('nan')
expected = DataFrame('nan', index=lrange(3), columns=['A', 'B'])
assert_frame_equal(result, expected)
# equiv of replace
df = DataFrame(dict(A=[1, np.nan], B=[1., 2.]))
for v in ['', 1, np.nan, 1.0]:
expected = df.replace(np.nan, v)
result = df.fillna(v)
assert_frame_equal(result, expected)
def test_fillna_datetime_columns(self):
# GH 7095
df = pd.DataFrame({'A': [-1, -2, np.nan],
'B': date_range('20130101', periods=3),
'C': ['foo', 'bar', None],
'D': ['foo2', 'bar2', None]},
index=date_range('20130110', periods=3))
result = df.fillna('?')
expected = pd.DataFrame({'A': [-1, -2, '?'],
'B': date_range('20130101', periods=3),
'C': ['foo', 'bar', '?'],
'D': ['foo2', 'bar2', '?']},
index=date_range('20130110', periods=3))
tm.assert_frame_equal(result, expected)
df = pd.DataFrame({'A': [-1, -2, np.nan],
'B': [pd.Timestamp('2013-01-01'),
pd.Timestamp('2013-01-02'), pd.NaT],
'C': ['foo', 'bar', None],
'D': ['foo2', 'bar2', None]},
index=date_range('20130110', periods=3))
result = df.fillna('?')
expected = pd.DataFrame({'A': [-1, -2, '?'],
'B': [pd.Timestamp('2013-01-01'),
pd.Timestamp('2013-01-02'), '?'],
'C': ['foo', 'bar', '?'],
'D': ['foo2', 'bar2', '?']},
index=pd.date_range('20130110', periods=3))
tm.assert_frame_equal(result, expected)
def test_ffill(self):
self.tsframe['A'][:5] = nan
self.tsframe['A'][-5:] = nan
assert_frame_equal(self.tsframe.ffill(),
self.tsframe.fillna(method='ffill'))
def test_bfill(self):
self.tsframe['A'][:5] = nan
self.tsframe['A'][-5:] = nan
assert_frame_equal(self.tsframe.bfill(),
self.tsframe.fillna(method='bfill'))
def test_frame_pad_backfill_limit(self):
index = np.arange(10)
df = DataFrame(np.random.randn(10, 4), index=index)
result = df[:2].reindex(index, method='pad', limit=5)
expected = df[:2].reindex(index).fillna(method='pad')
expected.values[-3:] = np.nan
tm.assert_frame_equal(result, expected)
result = df[-2:].reindex(index, method='backfill', limit=5)
expected = df[-2:].reindex(index).fillna(method='backfill')
expected.values[:3] = np.nan
tm.assert_frame_equal(result, expected)
def test_frame_fillna_limit(self):
index = np.arange(10)
df = DataFrame(np.random.randn(10, 4), index=index)
result = df[:2].reindex(index)
result = result.fillna(method='pad', limit=5)
expected = df[:2].reindex(index).fillna(method='pad')
expected.values[-3:] = np.nan
tm.assert_frame_equal(result, expected)
result = df[-2:].reindex(index)
result = result.fillna(method='backfill', limit=5)
expected = df[-2:].reindex(index).fillna(method='backfill')
expected.values[:3] = np.nan
tm.assert_frame_equal(result, expected)
def test_fillna_skip_certain_blocks(self):
# don't try to fill boolean, int blocks
df = DataFrame(np.random.randn(10, 4).astype(int))
# it works!
df.fillna(np.nan)
def test_fillna_inplace(self):
df = DataFrame(np.random.randn(10, 4))
df[1][:4] = np.nan
df[3][-4:] = np.nan
expected = df.fillna(value=0)
assert expected is not df
df.fillna(value=0, inplace=True)
tm.assert_frame_equal(df, expected)
expected = df.fillna(value={0: 0}, inplace=True)
assert expected is None
df[1][:4] = np.nan
df[3][-4:] = np.nan
expected = df.fillna(method='ffill')
assert expected is not df
df.fillna(method='ffill', inplace=True)
tm.assert_frame_equal(df, expected)
def test_fillna_dict_series(self):
df = DataFrame({'a': [nan, 1, 2, nan, nan],
'b': [1, 2, 3, nan, nan],
'c': [nan, 1, 2, 3, 4]})
result = df.fillna({'a': 0, 'b': 5})
expected = df.copy()
expected['a'] = expected['a'].fillna(0)
expected['b'] = expected['b'].fillna(5)
assert_frame_equal(result, expected)
# it works
result = df.fillna({'a': 0, 'b': 5, 'd': 7})
# Series treated same as dict
result = df.fillna(df.max())
expected = df.fillna(df.max().to_dict())
assert_frame_equal(result, expected)
# disable this for now
with tm.assert_raises_regex(NotImplementedError,
'column by column'):
df.fillna(df.max(1), axis=1)
def test_fillna_dataframe(self):
# GH 8377
df = DataFrame({'a': [nan, 1, 2, nan, nan],
'b': [1, 2, 3, nan, nan],
'c': [nan, 1, 2, 3, 4]},
index=list('VWXYZ'))
# df2 may have different index and columns
df2 = DataFrame({'a': [nan, 10, 20, 30, 40],
'b': [50, 60, 70, 80, 90],
'foo': ['bar'] * 5},
index=list('VWXuZ'))
result = df.fillna(df2)
# only those columns and indices which are shared get filled
expected = DataFrame({'a': [nan, 1, 2, nan, 40],
'b': [1, 2, 3, nan, 90],
'c': [nan, 1, 2, 3, 4]},
index=list('VWXYZ'))
assert_frame_equal(result, expected)
def test_fillna_columns(self):
df = DataFrame(np.random.randn(10, 10))
df.values[:, ::2] = np.nan
result = df.fillna(method='ffill', axis=1)
expected = df.T.fillna(method='pad').T
assert_frame_equal(result, expected)
df.insert(6, 'foo', 5)
result = df.fillna(method='ffill', axis=1)
expected = df.astype(float).fillna(method='ffill', axis=1)
assert_frame_equal(result, expected)
def test_fillna_invalid_method(self):
with tm.assert_raises_regex(ValueError, 'ffil'):
self.frame.fillna(method='ffil')
def test_fillna_invalid_value(self):
# list
pytest.raises(TypeError, self.frame.fillna, [1, 2])
# tuple
pytest.raises(TypeError, self.frame.fillna, (1, 2))
# frame with series
pytest.raises(TypeError, self.frame.iloc[:, 0].fillna, self.frame)
def test_fillna_col_reordering(self):
cols = ["COL." + str(i) for i in range(5, 0, -1)]
data = np.random.rand(20, 5)
df = DataFrame(index=lrange(20), columns=cols, data=data)
filled = df.fillna(method='ffill')
assert df.columns.tolist() == filled.columns.tolist()
def test_fill_corner(self):
mf = self.mixed_frame
mf.loc[mf.index[5:20], 'foo'] = nan
mf.loc[mf.index[-10:], 'A'] = nan
filled = self.mixed_frame.fillna(value=0)
assert (filled.loc[filled.index[5:20], 'foo'] == 0).all()
del self.mixed_frame['foo']
empty_float = self.frame.reindex(columns=[])
# TODO(wesm): unused?
result = empty_float.fillna(value=0) # noqa
def test_fill_value_when_combine_const(self):
# GH12723
dat = np.array([0, 1, np.nan, 3, 4, 5], dtype='float')
df = DataFrame({'foo': dat}, index=range(6))
exp = df.fillna(0).add(2)
res = df.add(2, fill_value=0)
assert_frame_equal(res, exp)
class TestDataFrameInterpolate(TestData):
def test_interp_basic(self):
df = DataFrame({'A': [1, 2, np.nan, 4],
'B': [1, 4, 9, np.nan],
'C': [1, 2, 3, 5],
'D': list('abcd')})
expected = DataFrame({'A': [1., 2., 3., 4.],
'B': [1., 4., 9., 9.],
'C': [1, 2, 3, 5],
'D': list('abcd')})
result = df.interpolate()
assert_frame_equal(result, expected)
result = df.set_index('C').interpolate()
expected = df.set_index('C')
expected.loc[3, 'A'] = 3
expected.loc[5, 'B'] = 9
assert_frame_equal(result, expected)
def test_interp_bad_method(self):
df = DataFrame({'A': [1, 2, np.nan, 4],
'B': [1, 4, 9, np.nan],
'C': [1, 2, 3, 5],
'D': list('abcd')})
with pytest.raises(ValueError):
df.interpolate(method='not_a_method')
def test_interp_combo(self):
df = DataFrame({'A': [1., 2., np.nan, 4.],
'B': [1, 4, 9, np.nan],
'C': [1, 2, 3, 5],
'D': list('abcd')})
result = df['A'].interpolate()
expected = Series([1., 2., 3., 4.], name='A')
assert_series_equal(result, expected)
result = df['A'].interpolate(downcast='infer')
expected = Series([1, 2, 3, 4], name='A')
assert_series_equal(result, expected)
def test_interp_nan_idx(self):
df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [np.nan, 2, 3, 4]})
df = df.set_index('A')
with pytest.raises(NotImplementedError):
df.interpolate(method='values')
@td.skip_if_no_scipy
def test_interp_various(self):
df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7],
'C': [1, 2, 3, 5, 8, 13, 21]})
df = df.set_index('C')
expected = df.copy()
result = df.interpolate(method='polynomial', order=1)
expected.A.loc[3] = 2.66666667
expected.A.loc[13] = 5.76923076
assert_frame_equal(result, expected)
result = df.interpolate(method='cubic')
# GH #15662.
# new cubic and quadratic interpolation algorithms from scipy 0.19.0.
# previously `splmake` was used. See scipy/scipy#6710
if _is_scipy_ge_0190:
expected.A.loc[3] = 2.81547781
expected.A.loc[13] = 5.52964175
else:
expected.A.loc[3] = 2.81621174
expected.A.loc[13] = 5.64146581
assert_frame_equal(result, expected)
result = df.interpolate(method='nearest')
expected.A.loc[3] = 2
expected.A.loc[13] = 5
assert_frame_equal(result, expected, check_dtype=False)
result = df.interpolate(method='quadratic')
if _is_scipy_ge_0190:
expected.A.loc[3] = 2.82150771
expected.A.loc[13] = 6.12648668
else:
expected.A.loc[3] = 2.82533638
expected.A.loc[13] = 6.02817974
assert_frame_equal(result, expected)
result = df.interpolate(method='slinear')
expected.A.loc[3] = 2.66666667
expected.A.loc[13] = 5.76923077
assert_frame_equal(result, expected)
result = df.interpolate(method='zero')
expected.A.loc[3] = 2.
expected.A.loc[13] = 5
assert_frame_equal(result, expected, check_dtype=False)
@td.skip_if_no_scipy
def test_interp_alt_scipy(self):
df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7],
'C': [1, 2, 3, 5, 8, 13, 21]})
result = df.interpolate(method='barycentric')
expected = df.copy()
expected.loc[2, 'A'] = 3
expected.loc[5, 'A'] = 6
assert_frame_equal(result, expected)
result = df.interpolate(method='barycentric', downcast='infer')
assert_frame_equal(result, expected.astype(np.int64))
result = df.interpolate(method='krogh')
expectedk = df.copy()
expectedk['A'] = expected['A']
assert_frame_equal(result, expectedk)
_skip_if_no_pchip()
import scipy
result = df.interpolate(method='pchip')
expected.loc[2, 'A'] = 3
if LooseVersion(scipy.__version__) >= LooseVersion('0.17.0'):
expected.loc[5, 'A'] = 6.0
else:
expected.loc[5, 'A'] = 6.125
assert_frame_equal(result, expected)
def test_interp_rowwise(self):
df = DataFrame({0: [1, 2, np.nan, 4],
1: [2, 3, 4, np.nan],
2: [np.nan, 4, 5, 6],
3: [4, np.nan, 6, 7],
4: [1, 2, 3, 4]})
result = df.interpolate(axis=1)
expected = df.copy()
expected.loc[3, 1] = 5
expected.loc[0, 2] = 3
expected.loc[1, 3] = 3
expected[4] = expected[4].astype(np.float64)
assert_frame_equal(result, expected)
result = df.interpolate(axis=1, method='values')
assert_frame_equal(result, expected)
result = df.interpolate(axis=0)
expected = df.interpolate()
assert_frame_equal(result, expected)
def test_rowwise_alt(self):
df = DataFrame({0: [0, .5, 1., np.nan, 4, 8, np.nan, np.nan, 64],
1: [1, 2, 3, 4, 3, 2, 1, 0, -1]})
df.interpolate(axis=0)
@pytest.mark.parametrize("check_scipy", [
False, pytest.param(True, marks=td.skip_if_no_scipy)
])
def test_interp_leading_nans(self, check_scipy):
df = DataFrame({"A": [np.nan, np.nan, .5, .25, 0],
"B": [np.nan, -3, -3.5, np.nan, -4]})
result = df.interpolate()
expected = df.copy()
expected['B'].loc[3] = -3.75
assert_frame_equal(result, expected)
if check_scipy:
result = df.interpolate(method='polynomial', order=1)
assert_frame_equal(result, expected)
def test_interp_raise_on_only_mixed(self):
df = DataFrame({'A': [1, 2, np.nan, 4],
'B': ['a', 'b', 'c', 'd'],
'C': [np.nan, 2, 5, 7],
'D': [np.nan, np.nan, 9, 9],
'E': [1, 2, 3, 4]})
with pytest.raises(TypeError):
df.interpolate(axis=1)
def test_interp_inplace(self):
df = DataFrame({'a': [1., 2., np.nan, 4.]})
expected = DataFrame({'a': [1., 2., 3., 4.]})
result = df.copy()
result['a'].interpolate(inplace=True)
assert_frame_equal(result, expected)
result = df.copy()
result['a'].interpolate(inplace=True, downcast='infer')
assert_frame_equal(result, expected.astype('int64'))
def test_interp_inplace_row(self):
# GH 10395
result = DataFrame({'a': [1., 2., 3., 4.],
'b': [np.nan, 2., 3., 4.],
'c': [3, 2, 2, 2]})
expected = result.interpolate(method='linear', axis=1, inplace=False)
result.interpolate(method='linear', axis=1, inplace=True)
assert_frame_equal(result, expected)
def test_interp_ignore_all_good(self):
# GH
df = DataFrame({'A': [1, 2, np.nan, 4],
'B': [1, 2, 3, 4],
'C': [1., 2., np.nan, 4.],
'D': [1., 2., 3., 4.]})
expected = DataFrame({'A': np.array(
[1, 2, 3, 4], dtype='float64'),
'B': np.array(
[1, 2, 3, 4], dtype='int64'),
'C': np.array(
[1., 2., 3, 4.], dtype='float64'),
'D': np.array(
[1., 2., 3., 4.], dtype='float64')})
result = df.interpolate(downcast=None)
assert_frame_equal(result, expected)
# all good
result = df[['B', 'D']].interpolate(downcast=None)
assert_frame_equal(result, df[['B', 'D']])