608 lines
22 KiB
Python
608 lines
22 KiB
Python
|
import os
|
||
|
import pytest
|
||
|
|
||
|
import numpy as np
|
||
|
from pandas.compat import zip
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (DataFrame, Series, isna, to_datetime, DatetimeIndex, Index,
|
||
|
Timestamp, Interval, IntervalIndex, Categorical,
|
||
|
cut, qcut, date_range, NaT, TimedeltaIndex)
|
||
|
from pandas.tseries.offsets import Nano, Day
|
||
|
import pandas.util.testing as tm
|
||
|
from pandas.api.types import CategoricalDtype as CDT
|
||
|
|
||
|
from pandas.core.algorithms import quantile
|
||
|
import pandas.core.reshape.tile as tmod
|
||
|
|
||
|
|
||
|
class TestCut(object):
|
||
|
|
||
|
def test_simple(self):
|
||
|
data = np.ones(5, dtype='int64')
|
||
|
result = cut(data, 4, labels=False)
|
||
|
expected = np.array([1, 1, 1, 1, 1])
|
||
|
tm.assert_numpy_array_equal(result, expected,
|
||
|
check_dtype=False)
|
||
|
|
||
|
def test_bins(self):
|
||
|
data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1])
|
||
|
result, bins = cut(data, 3, retbins=True)
|
||
|
|
||
|
intervals = IntervalIndex.from_breaks(bins.round(3))
|
||
|
intervals = intervals.take([0, 0, 0, 1, 2, 0])
|
||
|
expected = Categorical(intervals, ordered=True)
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667,
|
||
|
6.53333333, 9.7]))
|
||
|
|
||
|
def test_right(self):
|
||
|
data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
|
||
|
result, bins = cut(data, 4, right=True, retbins=True)
|
||
|
intervals = IntervalIndex.from_breaks(bins.round(3))
|
||
|
expected = Categorical(intervals, ordered=True)
|
||
|
expected = expected.take([0, 0, 0, 2, 3, 0, 0])
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
tm.assert_almost_equal(bins, np.array([0.1905, 2.575, 4.95,
|
||
|
7.325, 9.7]))
|
||
|
|
||
|
def test_noright(self):
|
||
|
data = np.array([.2, 1.4, 2.5, 6.2, 9.7, 2.1, 2.575])
|
||
|
result, bins = cut(data, 4, right=False, retbins=True)
|
||
|
intervals = IntervalIndex.from_breaks(bins.round(3), closed='left')
|
||
|
intervals = intervals.take([0, 0, 0, 2, 3, 0, 1])
|
||
|
expected = Categorical(intervals, ordered=True)
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95,
|
||
|
7.325, 9.7095]))
|
||
|
|
||
|
def test_arraylike(self):
|
||
|
data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
|
||
|
result, bins = cut(data, 3, retbins=True)
|
||
|
intervals = IntervalIndex.from_breaks(bins.round(3))
|
||
|
intervals = intervals.take([0, 0, 0, 1, 2, 0])
|
||
|
expected = Categorical(intervals, ordered=True)
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667,
|
||
|
6.53333333, 9.7]))
|
||
|
|
||
|
def test_bins_from_intervalindex(self):
|
||
|
c = cut(range(5), 3)
|
||
|
expected = c
|
||
|
result = cut(range(5), bins=expected.categories)
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
|
||
|
expected = Categorical.from_codes(np.append(c.codes, -1),
|
||
|
categories=c.categories,
|
||
|
ordered=True)
|
||
|
result = cut(range(6), bins=expected.categories)
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
|
||
|
# doc example
|
||
|
# make sure we preserve the bins
|
||
|
ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60])
|
||
|
c = cut(ages, bins=[0, 18, 35, 70])
|
||
|
expected = IntervalIndex.from_tuples([(0, 18), (18, 35), (35, 70)])
|
||
|
tm.assert_index_equal(c.categories, expected)
|
||
|
|
||
|
result = cut([25, 20, 50], bins=c.categories)
|
||
|
tm.assert_index_equal(result.categories, expected)
|
||
|
tm.assert_numpy_array_equal(result.codes,
|
||
|
np.array([1, 1, 2], dtype='int8'))
|
||
|
|
||
|
def test_bins_not_monotonic(self):
|
||
|
data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
|
||
|
pytest.raises(ValueError, cut, data, [0.1, 1.5, 1, 10])
|
||
|
|
||
|
def test_wrong_num_labels(self):
|
||
|
data = [.2, 1.4, 2.5, 6.2, 9.7, 2.1]
|
||
|
pytest.raises(ValueError, cut, data, [0, 1, 10],
|
||
|
labels=['foo', 'bar', 'baz'])
|
||
|
|
||
|
def test_cut_corner(self):
|
||
|
# h3h
|
||
|
pytest.raises(ValueError, cut, [], 2)
|
||
|
|
||
|
pytest.raises(ValueError, cut, [1, 2, 3], 0.5)
|
||
|
|
||
|
@pytest.mark.parametrize('arg', [2, np.eye(2), DataFrame(np.eye(2))])
|
||
|
@pytest.mark.parametrize('cut_func', [cut, qcut])
|
||
|
def test_cut_not_1d_arg(self, arg, cut_func):
|
||
|
with pytest.raises(ValueError):
|
||
|
cut_func(arg, 2)
|
||
|
|
||
|
def test_cut_out_of_range_more(self):
|
||
|
# #1511
|
||
|
s = Series([0, -1, 0, 1, -3], name='x')
|
||
|
ind = cut(s, [0, 1], labels=False)
|
||
|
exp = Series([np.nan, np.nan, np.nan, 0, np.nan], name='x')
|
||
|
tm.assert_series_equal(ind, exp)
|
||
|
|
||
|
def test_labels(self):
|
||
|
arr = np.tile(np.arange(0, 1.01, 0.1), 4)
|
||
|
|
||
|
result, bins = cut(arr, 4, retbins=True)
|
||
|
ex_levels = IntervalIndex.from_breaks([-1e-3, 0.25, 0.5, 0.75, 1])
|
||
|
tm.assert_index_equal(result.categories, ex_levels)
|
||
|
|
||
|
result, bins = cut(arr, 4, retbins=True, right=False)
|
||
|
ex_levels = IntervalIndex.from_breaks([0, 0.25, 0.5, 0.75, 1 + 1e-3],
|
||
|
closed='left')
|
||
|
tm.assert_index_equal(result.categories, ex_levels)
|
||
|
|
||
|
def test_cut_pass_series_name_to_factor(self):
|
||
|
s = Series(np.random.randn(100), name='foo')
|
||
|
|
||
|
factor = cut(s, 4)
|
||
|
assert factor.name == 'foo'
|
||
|
|
||
|
def test_label_precision(self):
|
||
|
arr = np.arange(0, 0.73, 0.01)
|
||
|
|
||
|
result = cut(arr, 4, precision=2)
|
||
|
ex_levels = IntervalIndex.from_breaks([-0.00072, 0.18, 0.36,
|
||
|
0.54, 0.72])
|
||
|
tm.assert_index_equal(result.categories, ex_levels)
|
||
|
|
||
|
def test_na_handling(self):
|
||
|
arr = np.arange(0, 0.75, 0.01)
|
||
|
arr[::3] = np.nan
|
||
|
|
||
|
result = cut(arr, 4)
|
||
|
|
||
|
result_arr = np.asarray(result)
|
||
|
|
||
|
ex_arr = np.where(isna(arr), np.nan, result_arr)
|
||
|
|
||
|
tm.assert_almost_equal(result_arr, ex_arr)
|
||
|
|
||
|
result = cut(arr, 4, labels=False)
|
||
|
ex_result = np.where(isna(arr), np.nan, result)
|
||
|
tm.assert_almost_equal(result, ex_result)
|
||
|
|
||
|
def test_inf_handling(self):
|
||
|
data = np.arange(6)
|
||
|
data_ser = Series(data, dtype='int64')
|
||
|
|
||
|
bins = [-np.inf, 2, 4, np.inf]
|
||
|
result = cut(data, bins)
|
||
|
result_ser = cut(data_ser, bins)
|
||
|
|
||
|
ex_uniques = IntervalIndex.from_breaks(bins)
|
||
|
tm.assert_index_equal(result.categories, ex_uniques)
|
||
|
assert result[5] == Interval(4, np.inf)
|
||
|
assert result[0] == Interval(-np.inf, 2)
|
||
|
assert result_ser[5] == Interval(4, np.inf)
|
||
|
assert result_ser[0] == Interval(-np.inf, 2)
|
||
|
|
||
|
def test_qcut(self):
|
||
|
arr = np.random.randn(1000)
|
||
|
|
||
|
# We store the bins as Index that have been rounded
|
||
|
# to comparisons are a bit tricky.
|
||
|
labels, bins = qcut(arr, 4, retbins=True)
|
||
|
ex_bins = quantile(arr, [0, .25, .5, .75, 1.])
|
||
|
result = labels.categories.left.values
|
||
|
assert np.allclose(result, ex_bins[:-1], atol=1e-2)
|
||
|
result = labels.categories.right.values
|
||
|
assert np.allclose(result, ex_bins[1:], atol=1e-2)
|
||
|
|
||
|
ex_levels = cut(arr, ex_bins, include_lowest=True)
|
||
|
tm.assert_categorical_equal(labels, ex_levels)
|
||
|
|
||
|
def test_qcut_bounds(self):
|
||
|
arr = np.random.randn(1000)
|
||
|
|
||
|
factor = qcut(arr, 10, labels=False)
|
||
|
assert len(np.unique(factor)) == 10
|
||
|
|
||
|
def test_qcut_specify_quantiles(self):
|
||
|
arr = np.random.randn(100)
|
||
|
|
||
|
factor = qcut(arr, [0, .25, .5, .75, 1.])
|
||
|
expected = qcut(arr, 4)
|
||
|
tm.assert_categorical_equal(factor, expected)
|
||
|
|
||
|
def test_qcut_all_bins_same(self):
|
||
|
tm.assert_raises_regex(ValueError, "edges.*unique", qcut,
|
||
|
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 3)
|
||
|
|
||
|
def test_cut_out_of_bounds(self):
|
||
|
arr = np.random.randn(100)
|
||
|
|
||
|
result = cut(arr, [-1, 0, 1])
|
||
|
|
||
|
mask = isna(result)
|
||
|
ex_mask = (arr < -1) | (arr > 1)
|
||
|
tm.assert_numpy_array_equal(mask, ex_mask)
|
||
|
|
||
|
def test_cut_pass_labels(self):
|
||
|
arr = [50, 5, 10, 15, 20, 30, 70]
|
||
|
bins = [0, 25, 50, 100]
|
||
|
labels = ['Small', 'Medium', 'Large']
|
||
|
|
||
|
result = cut(arr, bins, labels=labels)
|
||
|
exp = Categorical(['Medium'] + 4 * ['Small'] + ['Medium', 'Large'],
|
||
|
categories=labels,
|
||
|
ordered=True)
|
||
|
tm.assert_categorical_equal(result, exp)
|
||
|
|
||
|
result = cut(arr, bins, labels=Categorical.from_codes([0, 1, 2],
|
||
|
labels))
|
||
|
exp = Categorical.from_codes([1] + 4 * [0] + [1, 2], labels)
|
||
|
tm.assert_categorical_equal(result, exp)
|
||
|
|
||
|
# issue 16459
|
||
|
labels = ['Good', 'Medium', 'Bad']
|
||
|
result = cut(arr, 3, labels=labels)
|
||
|
exp = cut(arr, 3, labels=Categorical(labels, categories=labels,
|
||
|
ordered=True))
|
||
|
tm.assert_categorical_equal(result, exp)
|
||
|
|
||
|
def test_qcut_include_lowest(self):
|
||
|
values = np.arange(10)
|
||
|
|
||
|
ii = qcut(values, 4)
|
||
|
|
||
|
ex_levels = IntervalIndex(
|
||
|
[Interval(-0.001, 2.25),
|
||
|
Interval(2.25, 4.5),
|
||
|
Interval(4.5, 6.75),
|
||
|
Interval(6.75, 9)])
|
||
|
tm.assert_index_equal(ii.categories, ex_levels)
|
||
|
|
||
|
def test_qcut_nas(self):
|
||
|
arr = np.random.randn(100)
|
||
|
arr[:20] = np.nan
|
||
|
|
||
|
result = qcut(arr, 4)
|
||
|
assert isna(result[:20]).all()
|
||
|
|
||
|
def test_qcut_index(self):
|
||
|
result = qcut([0, 2], 2)
|
||
|
intervals = [Interval(-0.001, 1), Interval(1, 2)]
|
||
|
expected = Categorical(intervals, ordered=True)
|
||
|
tm.assert_categorical_equal(result, expected)
|
||
|
|
||
|
def test_round_frac(self):
|
||
|
# it works
|
||
|
result = cut(np.arange(11.), 2)
|
||
|
|
||
|
result = cut(np.arange(11.) / 1e10, 2)
|
||
|
|
||
|
# #1979, negative numbers
|
||
|
|
||
|
result = tmod._round_frac(-117.9998, precision=3)
|
||
|
assert result == -118
|
||
|
result = tmod._round_frac(117.9998, precision=3)
|
||
|
assert result == 118
|
||
|
|
||
|
result = tmod._round_frac(117.9998, precision=2)
|
||
|
assert result == 118
|
||
|
result = tmod._round_frac(0.000123456, precision=2)
|
||
|
assert result == 0.00012
|
||
|
|
||
|
def test_qcut_binning_issues(self, datapath):
|
||
|
# #1978, 1979
|
||
|
cut_file = datapath(os.path.join('reshape', 'data', 'cut_data.csv'))
|
||
|
arr = np.loadtxt(cut_file)
|
||
|
|
||
|
result = qcut(arr, 20)
|
||
|
|
||
|
starts = []
|
||
|
ends = []
|
||
|
for lev in np.unique(result):
|
||
|
s = lev.left
|
||
|
e = lev.right
|
||
|
assert s != e
|
||
|
|
||
|
starts.append(float(s))
|
||
|
ends.append(float(e))
|
||
|
|
||
|
for (sp, sn), (ep, en) in zip(zip(starts[:-1], starts[1:]),
|
||
|
zip(ends[:-1], ends[1:])):
|
||
|
assert sp < sn
|
||
|
assert ep < en
|
||
|
assert ep <= sn
|
||
|
|
||
|
def test_cut_return_intervals(self):
|
||
|
s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||
|
res = cut(s, 3)
|
||
|
exp_bins = np.linspace(0, 8, num=4).round(3)
|
||
|
exp_bins[0] -= 0.008
|
||
|
exp = Series(IntervalIndex.from_breaks(exp_bins, closed='right').take(
|
||
|
[0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True))
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
def test_qcut_return_intervals(self):
|
||
|
s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
|
||
|
res = qcut(s, [0, 0.333, 0.666, 1])
|
||
|
exp_levels = np.array([Interval(-0.001, 2.664),
|
||
|
Interval(2.664, 5.328), Interval(5.328, 8)])
|
||
|
exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(
|
||
|
CDT(ordered=True))
|
||
|
tm.assert_series_equal(res, exp)
|
||
|
|
||
|
def test_series_retbins(self):
|
||
|
# GH 8589
|
||
|
s = Series(np.arange(4))
|
||
|
result, bins = cut(s, 2, retbins=True)
|
||
|
expected = Series(IntervalIndex.from_breaks(
|
||
|
[-0.003, 1.5, 3], closed='right').repeat(2)).astype(
|
||
|
CDT(ordered=True))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result, bins = qcut(s, 2, retbins=True)
|
||
|
expected = Series(IntervalIndex.from_breaks(
|
||
|
[-0.001, 1.5, 3], closed='right').repeat(2)).astype(
|
||
|
CDT(ordered=True))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_cut_duplicates_bin(self):
|
||
|
# issue 20947
|
||
|
values = Series(np.array([1, 3, 5, 7, 9]),
|
||
|
index=["a", "b", "c", "d", "e"])
|
||
|
bins = [0, 2, 4, 6, 10, 10]
|
||
|
result = cut(values, bins, duplicates='drop')
|
||
|
expected = cut(values, pd.unique(bins))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
pytest.raises(ValueError, cut, values, bins)
|
||
|
pytest.raises(ValueError, cut, values, bins, duplicates='raise')
|
||
|
|
||
|
# invalid
|
||
|
pytest.raises(ValueError, cut, values, bins, duplicates='foo')
|
||
|
|
||
|
def test_qcut_duplicates_bin(self):
|
||
|
# GH 7751
|
||
|
values = [0, 0, 0, 0, 1, 2, 3]
|
||
|
expected = IntervalIndex([Interval(-0.001, 1), Interval(1, 3)])
|
||
|
|
||
|
result = qcut(values, 3, duplicates='drop')
|
||
|
tm.assert_index_equal(result.categories, expected)
|
||
|
|
||
|
pytest.raises(ValueError, qcut, values, 3)
|
||
|
pytest.raises(ValueError, qcut, values, 3, duplicates='raise')
|
||
|
|
||
|
# invalid
|
||
|
pytest.raises(ValueError, qcut, values, 3, duplicates='foo')
|
||
|
|
||
|
def test_single_quantile(self):
|
||
|
# issue 15431
|
||
|
expected = Series([0, 0])
|
||
|
|
||
|
s = Series([9., 9.])
|
||
|
result = qcut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
result = qcut(s, 1)
|
||
|
intervals = IntervalIndex([Interval(8.999, 9.0),
|
||
|
Interval(8.999, 9.0)], closed='right')
|
||
|
expected = Series(intervals).astype(CDT(ordered=True))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s = Series([-9., -9.])
|
||
|
expected = Series([0, 0])
|
||
|
result = qcut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
result = qcut(s, 1)
|
||
|
intervals = IntervalIndex([Interval(-9.001, -9.0),
|
||
|
Interval(-9.001, -9.0)], closed='right')
|
||
|
expected = Series(intervals).astype(CDT(ordered=True))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s = Series([0., 0.])
|
||
|
expected = Series([0, 0])
|
||
|
result = qcut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
result = qcut(s, 1)
|
||
|
intervals = IntervalIndex([Interval(-0.001, 0.0),
|
||
|
Interval(-0.001, 0.0)], closed='right')
|
||
|
expected = Series(intervals).astype(CDT(ordered=True))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s = Series([9])
|
||
|
expected = Series([0])
|
||
|
result = qcut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
result = qcut(s, 1)
|
||
|
intervals = IntervalIndex([Interval(8.999, 9.0)], closed='right')
|
||
|
expected = Series(intervals).astype(CDT(ordered=True))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s = Series([-9])
|
||
|
expected = Series([0])
|
||
|
result = qcut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
result = qcut(s, 1)
|
||
|
intervals = IntervalIndex([Interval(-9.001, -9.0)], closed='right')
|
||
|
expected = Series(intervals).astype(CDT(ordered=True))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s = Series([0])
|
||
|
expected = Series([0])
|
||
|
result = qcut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
result = qcut(s, 1)
|
||
|
intervals = IntervalIndex([Interval(-0.001, 0.0)], closed='right')
|
||
|
expected = Series(intervals).astype(CDT(ordered=True))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_single_bin(self):
|
||
|
# issue 14652
|
||
|
expected = Series([0, 0])
|
||
|
|
||
|
s = Series([9., 9.])
|
||
|
result = cut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s = Series([-9., -9.])
|
||
|
result = cut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
expected = Series([0])
|
||
|
|
||
|
s = Series([9])
|
||
|
result = cut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
s = Series([-9])
|
||
|
result = cut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# issue 15428
|
||
|
expected = Series([0, 0])
|
||
|
|
||
|
s = Series([0., 0.])
|
||
|
result = cut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
expected = Series([0])
|
||
|
|
||
|
s = Series([0])
|
||
|
result = cut(s, 1, labels=False)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"array_1_writeable, array_2_writeable",
|
||
|
[(True, True), (True, False), (False, False)])
|
||
|
def test_cut_read_only(self, array_1_writeable, array_2_writeable):
|
||
|
# issue 18773
|
||
|
array_1 = np.arange(0, 100, 10)
|
||
|
array_1.flags.writeable = array_1_writeable
|
||
|
|
||
|
array_2 = np.arange(0, 100, 10)
|
||
|
array_2.flags.writeable = array_2_writeable
|
||
|
|
||
|
hundred_elements = np.arange(100)
|
||
|
|
||
|
tm.assert_categorical_equal(cut(hundred_elements, array_1),
|
||
|
cut(hundred_elements, array_2))
|
||
|
|
||
|
|
||
|
class TestDatelike(object):
|
||
|
|
||
|
@pytest.mark.parametrize('s', [
|
||
|
Series(DatetimeIndex(['20180101', NaT, '20180103'])),
|
||
|
Series(TimedeltaIndex(['0 days', NaT, '2 days']))],
|
||
|
ids=lambda x: str(x.dtype))
|
||
|
def test_qcut_nat(self, s):
|
||
|
# GH 19768
|
||
|
intervals = IntervalIndex.from_tuples(
|
||
|
[(s[0] - Nano(), s[2] - Day()), np.nan, (s[2] - Day(), s[2])])
|
||
|
expected = Series(Categorical(intervals, ordered=True))
|
||
|
result = qcut(s, 2)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_datetime_cut(self):
|
||
|
# GH 14714
|
||
|
# testing for time data to be present as series
|
||
|
data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03']))
|
||
|
|
||
|
result, bins = cut(data, 3, retbins=True)
|
||
|
expected = (
|
||
|
Series(IntervalIndex([
|
||
|
Interval(Timestamp('2012-12-31 23:57:07.200000'),
|
||
|
Timestamp('2013-01-01 16:00:00')),
|
||
|
Interval(Timestamp('2013-01-01 16:00:00'),
|
||
|
Timestamp('2013-01-02 08:00:00')),
|
||
|
Interval(Timestamp('2013-01-02 08:00:00'),
|
||
|
Timestamp('2013-01-03 00:00:00'))]))
|
||
|
.astype(CDT(ordered=True)))
|
||
|
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# testing for time data to be present as list
|
||
|
data = [np.datetime64('2013-01-01'), np.datetime64('2013-01-02'),
|
||
|
np.datetime64('2013-01-03')]
|
||
|
result, bins = cut(data, 3, retbins=True)
|
||
|
tm.assert_series_equal(Series(result), expected)
|
||
|
|
||
|
# testing for time data to be present as ndarray
|
||
|
data = np.array([np.datetime64('2013-01-01'),
|
||
|
np.datetime64('2013-01-02'),
|
||
|
np.datetime64('2013-01-03')])
|
||
|
result, bins = cut(data, 3, retbins=True)
|
||
|
tm.assert_series_equal(Series(result), expected)
|
||
|
|
||
|
# testing for time data to be present as datetime index
|
||
|
data = DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03'])
|
||
|
result, bins = cut(data, 3, retbins=True)
|
||
|
tm.assert_series_equal(Series(result), expected)
|
||
|
|
||
|
@pytest.mark.parametrize('bins', [
|
||
|
3, [Timestamp('2013-01-01 04:57:07.200000'),
|
||
|
Timestamp('2013-01-01 21:00:00'),
|
||
|
Timestamp('2013-01-02 13:00:00'),
|
||
|
Timestamp('2013-01-03 05:00:00')]])
|
||
|
@pytest.mark.parametrize('box', [list, np.array, Index, Series])
|
||
|
def test_datetimetz_cut(self, bins, box):
|
||
|
# GH 19872
|
||
|
tz = 'US/Eastern'
|
||
|
s = Series(date_range('20130101', periods=3, tz=tz))
|
||
|
if not isinstance(bins, int):
|
||
|
bins = box(bins)
|
||
|
result = cut(s, bins)
|
||
|
expected = (
|
||
|
Series(IntervalIndex([
|
||
|
Interval(Timestamp('2012-12-31 23:57:07.200000', tz=tz),
|
||
|
Timestamp('2013-01-01 16:00:00', tz=tz)),
|
||
|
Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
|
||
|
Timestamp('2013-01-02 08:00:00', tz=tz)),
|
||
|
Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
|
||
|
Timestamp('2013-01-03 00:00:00', tz=tz))]))
|
||
|
.astype(CDT(ordered=True)))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
@pytest.mark.parametrize('bins', [3, np.linspace(0, 1, 4)])
|
||
|
def test_datetimetz_qcut(self, bins):
|
||
|
# GH 19872
|
||
|
tz = 'US/Eastern'
|
||
|
s = Series(date_range('20130101', periods=3, tz=tz))
|
||
|
result = qcut(s, bins)
|
||
|
expected = (
|
||
|
Series(IntervalIndex([
|
||
|
Interval(Timestamp('2012-12-31 23:59:59.999999999', tz=tz),
|
||
|
Timestamp('2013-01-01 16:00:00', tz=tz)),
|
||
|
Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
|
||
|
Timestamp('2013-01-02 08:00:00', tz=tz)),
|
||
|
Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
|
||
|
Timestamp('2013-01-03 00:00:00', tz=tz))]))
|
||
|
.astype(CDT(ordered=True)))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_datetime_bin(self):
|
||
|
data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')]
|
||
|
bin_data = ['2012-12-12', '2012-12-14', '2012-12-16']
|
||
|
expected = (
|
||
|
Series(IntervalIndex([
|
||
|
Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
|
||
|
Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))]))
|
||
|
.astype(CDT(ordered=True)))
|
||
|
|
||
|
for conv in [Timestamp, Timestamp, np.datetime64]:
|
||
|
bins = [conv(v) for v in bin_data]
|
||
|
result = cut(data, bins=bins)
|
||
|
tm.assert_series_equal(Series(result), expected)
|
||
|
|
||
|
bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data]
|
||
|
result = cut(data, bins=bin_pydatetime)
|
||
|
tm.assert_series_equal(Series(result), expected)
|
||
|
|
||
|
bins = to_datetime(bin_data)
|
||
|
result = cut(data, bins=bin_pydatetime)
|
||
|
tm.assert_series_equal(Series(result), expected)
|
||
|
|
||
|
def test_datetime_nan(self):
|
||
|
|
||
|
def f():
|
||
|
cut(date_range('20130101', periods=3), bins=[0, 2, 4])
|
||
|
pytest.raises(ValueError, f)
|
||
|
|
||
|
result = cut(date_range('20130102', periods=5),
|
||
|
bins=date_range('20130101', periods=2))
|
||
|
mask = result.categories.isna()
|
||
|
tm.assert_numpy_array_equal(mask, np.array([False]))
|
||
|
mask = result.isna()
|
||
|
tm.assert_numpy_array_equal(
|
||
|
mask, np.array([False, True, True, True, True]))
|