185 lines
5.9 KiB
Python
185 lines
5.9 KiB
Python
|
# coding=utf-8
|
||
|
# pylint: disable-msg=E1101,W0612
|
||
|
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
|
||
|
from pandas import Index, Series
|
||
|
from pandas.core.indexes.datetimes import Timestamp
|
||
|
from pandas.core.dtypes.common import is_integer
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
from .common import TestData
|
||
|
|
||
|
|
||
|
class TestSeriesQuantile(TestData):
|
||
|
|
||
|
def test_quantile(self):
|
||
|
|
||
|
q = self.ts.quantile(0.1)
|
||
|
assert q == np.percentile(self.ts.dropna(), 10)
|
||
|
|
||
|
q = self.ts.quantile(0.9)
|
||
|
assert q == np.percentile(self.ts.dropna(), 90)
|
||
|
|
||
|
# object dtype
|
||
|
q = Series(self.ts, dtype=object).quantile(0.9)
|
||
|
assert q == np.percentile(self.ts.dropna(), 90)
|
||
|
|
||
|
# datetime64[ns] dtype
|
||
|
dts = self.ts.index.to_series()
|
||
|
q = dts.quantile(.2)
|
||
|
assert q == Timestamp('2000-01-10 19:12:00')
|
||
|
|
||
|
# timedelta64[ns] dtype
|
||
|
tds = dts.diff()
|
||
|
q = tds.quantile(.25)
|
||
|
assert q == pd.to_timedelta('24:00:00')
|
||
|
|
||
|
# GH7661
|
||
|
result = Series([np.timedelta64('NaT')]).sum()
|
||
|
assert result == pd.Timedelta(0)
|
||
|
|
||
|
msg = 'percentiles should all be in the interval \\[0, 1\\]'
|
||
|
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
|
||
|
with tm.assert_raises_regex(ValueError, msg):
|
||
|
self.ts.quantile(invalid)
|
||
|
|
||
|
def test_quantile_multi(self):
|
||
|
|
||
|
qs = [.1, .9]
|
||
|
result = self.ts.quantile(qs)
|
||
|
expected = pd.Series([np.percentile(self.ts.dropna(), 10),
|
||
|
np.percentile(self.ts.dropna(), 90)],
|
||
|
index=qs, name=self.ts.name)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
dts = self.ts.index.to_series()
|
||
|
dts.name = 'xxx'
|
||
|
result = dts.quantile((.2, .2))
|
||
|
expected = Series([Timestamp('2000-01-10 19:12:00'),
|
||
|
Timestamp('2000-01-10 19:12:00')],
|
||
|
index=[.2, .2], name='xxx')
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = self.ts.quantile([])
|
||
|
expected = pd.Series([], name=self.ts.name, index=Index(
|
||
|
[], dtype=float))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_quantile_interpolation(self):
|
||
|
# see gh-10174
|
||
|
|
||
|
# interpolation = linear (default case)
|
||
|
q = self.ts.quantile(0.1, interpolation='linear')
|
||
|
assert q == np.percentile(self.ts.dropna(), 10)
|
||
|
q1 = self.ts.quantile(0.1)
|
||
|
assert q1 == np.percentile(self.ts.dropna(), 10)
|
||
|
|
||
|
# test with and without interpolation keyword
|
||
|
assert q == q1
|
||
|
|
||
|
def test_quantile_interpolation_dtype(self):
|
||
|
# GH #10174
|
||
|
|
||
|
# interpolation = linear (default case)
|
||
|
q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='lower')
|
||
|
assert q == np.percentile(np.array([1, 3, 4]), 50)
|
||
|
assert is_integer(q)
|
||
|
|
||
|
q = pd.Series([1, 3, 4]).quantile(0.5, interpolation='higher')
|
||
|
assert q == np.percentile(np.array([1, 3, 4]), 50)
|
||
|
assert is_integer(q)
|
||
|
|
||
|
def test_quantile_nan(self):
|
||
|
|
||
|
# GH 13098
|
||
|
s = pd.Series([1, 2, 3, 4, np.nan])
|
||
|
result = s.quantile(0.5)
|
||
|
expected = 2.5
|
||
|
assert result == expected
|
||
|
|
||
|
# all nan/empty
|
||
|
cases = [Series([]), Series([np.nan, np.nan])]
|
||
|
|
||
|
for s in cases:
|
||
|
res = s.quantile(0.5)
|
||
|
assert np.isnan(res)
|
||
|
|
||
|
res = s.quantile([0.5])
|
||
|
tm.assert_series_equal(res, pd.Series([np.nan], index=[0.5]))
|
||
|
|
||
|
res = s.quantile([0.2, 0.3])
|
||
|
tm.assert_series_equal(res, pd.Series([np.nan, np.nan],
|
||
|
index=[0.2, 0.3]))
|
||
|
|
||
|
def test_quantile_box(self):
|
||
|
cases = [[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'),
|
||
|
pd.Timestamp('2011-01-03')],
|
||
|
[pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||
|
pd.Timestamp('2011-01-02', tz='US/Eastern'),
|
||
|
pd.Timestamp('2011-01-03', tz='US/Eastern')],
|
||
|
[pd.Timedelta('1 days'), pd.Timedelta('2 days'),
|
||
|
pd.Timedelta('3 days')],
|
||
|
# NaT
|
||
|
[pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02'),
|
||
|
pd.Timestamp('2011-01-03'), pd.NaT],
|
||
|
[pd.Timestamp('2011-01-01', tz='US/Eastern'),
|
||
|
pd.Timestamp('2011-01-02', tz='US/Eastern'),
|
||
|
pd.Timestamp('2011-01-03', tz='US/Eastern'), pd.NaT],
|
||
|
[pd.Timedelta('1 days'), pd.Timedelta('2 days'),
|
||
|
pd.Timedelta('3 days'), pd.NaT]]
|
||
|
|
||
|
for case in cases:
|
||
|
s = pd.Series(case, name='XXX')
|
||
|
res = s.quantile(0.5)
|
||
|
assert res == case[1]
|
||
|
|
||
|
res = s.quantile([0.5])
|
||
|
exp = pd.Series([case[1]], index=[0.5], name='XXX')
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
def test_datetime_timedelta_quantiles(self):
|
||
|
# covers #9694
|
||
|
assert pd.isna(Series([], dtype='M8[ns]').quantile(.5))
|
||
|
assert pd.isna(Series([], dtype='m8[ns]').quantile(.5))
|
||
|
|
||
|
def test_quantile_nat(self):
|
||
|
res = Series([pd.NaT, pd.NaT]).quantile(0.5)
|
||
|
assert res is pd.NaT
|
||
|
|
||
|
res = Series([pd.NaT, pd.NaT]).quantile([0.5])
|
||
|
tm.assert_series_equal(res, pd.Series([pd.NaT], index=[0.5]))
|
||
|
|
||
|
def test_quantile_empty(self):
|
||
|
|
||
|
# floats
|
||
|
s = Series([], dtype='float64')
|
||
|
|
||
|
res = s.quantile(0.5)
|
||
|
assert np.isnan(res)
|
||
|
|
||
|
res = s.quantile([0.5])
|
||
|
exp = Series([np.nan], index=[0.5])
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
# int
|
||
|
s = Series([], dtype='int64')
|
||
|
|
||
|
res = s.quantile(0.5)
|
||
|
assert np.isnan(res)
|
||
|
|
||
|
res = s.quantile([0.5])
|
||
|
exp = Series([np.nan], index=[0.5])
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
# datetime
|
||
|
s = Series([], dtype='datetime64[ns]')
|
||
|
|
||
|
res = s.quantile(0.5)
|
||
|
assert res is pd.NaT
|
||
|
|
||
|
res = s.quantile([0.5])
|
||
|
exp = Series([pd.NaT], index=[0.5])
|
||
|
tm.assert_series_equal(res, exp)
|