laywerrobot/lib/python3.6/site-packages/pandas/tests/sparse/test_array.py
2020-08-27 21:55:39 +02:00

935 lines
33 KiB
Python

from pandas.compat import range
import re
import operator
import pytest
import warnings
from numpy import nan
import numpy as np
from pandas.core.sparse.api import SparseArray, SparseSeries
from pandas._libs.sparse import IntIndex
from pandas.util.testing import assert_almost_equal
import pandas.util.testing as tm
class TestSparseArray(object):
def setup_method(self, method):
self.arr_data = np.array([nan, nan, 1, 2, 3, nan, 4, 5, nan, 6])
self.arr = SparseArray(self.arr_data)
self.zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
def test_constructor_dtype(self):
arr = SparseArray([np.nan, 1, 2, np.nan])
assert arr.dtype == np.float64
assert np.isnan(arr.fill_value)
arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
assert arr.dtype == np.float64
assert arr.fill_value == 0
arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
assert arr.dtype == np.float64
assert np.isnan(arr.fill_value)
arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
assert arr.dtype == np.int64
assert arr.fill_value == 0
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
assert arr.dtype == np.int64
assert arr.fill_value == 0
arr = SparseArray([0, 1, 2, 4], dtype=None)
assert arr.dtype == np.int64
assert arr.fill_value == 0
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
assert arr.dtype == np.int64
assert arr.fill_value == 0
def test_constructor_object_dtype(self):
# GH 11856
arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object)
assert arr.dtype == np.object
assert np.isnan(arr.fill_value)
arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object,
fill_value='A')
assert arr.dtype == np.object
assert arr.fill_value == 'A'
# GH 17574
data = [False, 0, 100.0, 0.0]
arr = SparseArray(data, dtype=np.object, fill_value=False)
assert arr.dtype == np.object
assert arr.fill_value is False
arr_expected = np.array(data, dtype=np.object)
it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
assert np.fromiter(it, dtype=np.bool).all()
def test_constructor_spindex_dtype(self):
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan]))
assert arr.dtype == np.float64
assert np.isnan(arr.fill_value)
arr = SparseArray(data=[1, 2, 3],
sparse_index=IntIndex(4, [1, 2, 3]),
dtype=np.int64, fill_value=0)
exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == np.int64
assert arr.fill_value == 0
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
fill_value=0, dtype=np.int64)
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == np.int64
assert arr.fill_value == 0
arr = SparseArray(data=[1, 2, 3],
sparse_index=IntIndex(4, [1, 2, 3]),
dtype=None, fill_value=0)
exp = SparseArray([0, 1, 2, 3], dtype=None)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == np.int64
assert arr.fill_value == 0
# scalar input
arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
exp = SparseArray([1], dtype=None)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == np.int64
assert arr.fill_value == 0
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]),
fill_value=0, dtype=None)
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == np.int64
assert arr.fill_value == 0
@pytest.mark.parametrize('scalar,dtype', [
(False, bool),
(0.0, 'float64'),
(1, 'int64'),
('z', 'object')])
def test_scalar_with_index_infer_dtype(self, scalar, dtype):
# GH 19163
arr = SparseArray(scalar, index=[1, 2, 3], fill_value=scalar)
exp = SparseArray([scalar, scalar, scalar], fill_value=scalar)
tm.assert_sp_array_equal(arr, exp)
assert arr.dtype == dtype
assert exp.dtype == dtype
def test_sparseseries_roundtrip(self):
# GH 13999
for kind in ['integer', 'block']:
for fill in [1, np.nan, 0]:
arr = SparseArray([np.nan, 1, np.nan, 2, 3], kind=kind,
fill_value=fill)
res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)
arr = SparseArray([0, 0, 0, 1, 1, 2], dtype=np.int64,
kind=kind, fill_value=fill)
res = SparseArray(SparseSeries(arr), dtype=np.int64)
tm.assert_sp_array_equal(arr, res)
res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)
for fill in [True, False, np.nan]:
arr = SparseArray([True, False, True, True], dtype=np.bool,
kind=kind, fill_value=fill)
res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)
res = SparseArray(SparseSeries(arr))
tm.assert_sp_array_equal(arr, res)
def test_get_item(self):
assert np.isnan(self.arr[1])
assert self.arr[2] == 1
assert self.arr[7] == 5
assert self.zarr[0] == 0
assert self.zarr[2] == 1
assert self.zarr[7] == 5
errmsg = re.compile("bounds")
tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[11])
tm.assert_raises_regex(IndexError, errmsg, lambda: self.arr[-11])
assert self.arr[-1] == self.arr[len(self.arr) - 1]
def test_take(self):
assert np.isnan(self.arr.take(0))
assert np.isscalar(self.arr.take(2))
assert self.arr.take(2) == np.take(self.arr_data, 2)
assert self.arr.take(6) == np.take(self.arr_data, 6)
exp = SparseArray(np.take(self.arr_data, [2, 3]))
tm.assert_sp_array_equal(self.arr.take([2, 3]), exp)
exp = SparseArray(np.take(self.arr_data, [0, 1, 2]))
tm.assert_sp_array_equal(self.arr.take([0, 1, 2]), exp)
def test_take_fill_value(self):
data = np.array([1, np.nan, 0, 3, 0])
sparse = SparseArray(data, fill_value=0)
exp = SparseArray(np.take(data, [0]), fill_value=0)
tm.assert_sp_array_equal(sparse.take([0]), exp)
exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
def test_take_negative(self):
exp = SparseArray(np.take(self.arr_data, [-1]))
tm.assert_sp_array_equal(self.arr.take([-1]), exp)
exp = SparseArray(np.take(self.arr_data, [-4, -3, -2]))
tm.assert_sp_array_equal(self.arr.take([-4, -3, -2]), exp)
def test_bad_take(self):
tm.assert_raises_regex(
IndexError, "bounds", lambda: self.arr.take(11))
pytest.raises(IndexError, lambda: self.arr.take(-11))
def test_take_invalid_kwargs(self):
msg = r"take\(\) got an unexpected keyword argument 'foo'"
tm.assert_raises_regex(TypeError, msg, self.arr.take,
[2, 3], foo=2)
msg = "the 'out' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, self.arr.take,
[2, 3], out=self.arr)
msg = "the 'mode' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, self.arr.take,
[2, 3], mode='clip')
def test_take_filling(self):
# similar tests as GH 12631
sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
result = sparse.take(np.array([1, 0, -1]))
expected = SparseArray([np.nan, np.nan, 4])
tm.assert_sp_array_equal(result, expected)
# fill_value
result = sparse.take(np.array([1, 0, -1]), fill_value=True)
expected = SparseArray([np.nan, np.nan, np.nan])
tm.assert_sp_array_equal(result, expected)
# allow_fill=False
result = sparse.take(np.array([1, 0, -1]),
allow_fill=False, fill_value=True)
expected = SparseArray([np.nan, np.nan, 4])
tm.assert_sp_array_equal(result, expected)
msg = ('When allow_fill=True and fill_value is not None, '
'all indices must be >= -1')
with tm.assert_raises_regex(ValueError, msg):
sparse.take(np.array([1, 0, -2]), fill_value=True)
with tm.assert_raises_regex(ValueError, msg):
sparse.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
sparse.take(np.array([1, -6]))
with pytest.raises(IndexError):
sparse.take(np.array([1, 5]))
with pytest.raises(IndexError):
sparse.take(np.array([1, 5]), fill_value=True)
def test_take_filling_fill_value(self):
# same tests as GH 12631
sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
result = sparse.take(np.array([1, 0, -1]))
expected = SparseArray([0, np.nan, 4], fill_value=0)
tm.assert_sp_array_equal(result, expected)
# fill_value
result = sparse.take(np.array([1, 0, -1]), fill_value=True)
expected = SparseArray([0, np.nan, 0], fill_value=0)
tm.assert_sp_array_equal(result, expected)
# allow_fill=False
result = sparse.take(np.array([1, 0, -1]),
allow_fill=False, fill_value=True)
expected = SparseArray([0, np.nan, 4], fill_value=0)
tm.assert_sp_array_equal(result, expected)
msg = ('When allow_fill=True and fill_value is not None, '
'all indices must be >= -1')
with tm.assert_raises_regex(ValueError, msg):
sparse.take(np.array([1, 0, -2]), fill_value=True)
with tm.assert_raises_regex(ValueError, msg):
sparse.take(np.array([1, 0, -5]), fill_value=True)
with pytest.raises(IndexError):
sparse.take(np.array([1, -6]))
with pytest.raises(IndexError):
sparse.take(np.array([1, 5]))
with pytest.raises(IndexError):
sparse.take(np.array([1, 5]), fill_value=True)
def test_take_filling_all_nan(self):
sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan])
result = sparse.take(np.array([1, 0, -1]))
expected = SparseArray([np.nan, np.nan, np.nan])
tm.assert_sp_array_equal(result, expected)
result = sparse.take(np.array([1, 0, -1]), fill_value=True)
expected = SparseArray([np.nan, np.nan, np.nan])
tm.assert_sp_array_equal(result, expected)
with pytest.raises(IndexError):
sparse.take(np.array([1, -6]))
with pytest.raises(IndexError):
sparse.take(np.array([1, 5]))
with pytest.raises(IndexError):
sparse.take(np.array([1, 5]), fill_value=True)
def test_set_item(self):
def setitem():
self.arr[5] = 3
def setslice():
self.arr[1:5] = 2
tm.assert_raises_regex(TypeError, "item assignment", setitem)
tm.assert_raises_regex(TypeError, "item assignment", setslice)
def test_constructor_from_too_large_array(self):
tm.assert_raises_regex(TypeError, "expected dimension <= 1 data",
SparseArray, np.arange(10).reshape((2, 5)))
def test_constructor_from_sparse(self):
res = SparseArray(self.zarr)
assert res.fill_value == 0
assert_almost_equal(res.sp_values, self.zarr.sp_values)
def test_constructor_copy(self):
cp = SparseArray(self.arr, copy=True)
cp.sp_values[:3] = 0
assert not (self.arr.sp_values[:3] == 0).any()
not_copy = SparseArray(self.arr)
not_copy.sp_values[:3] = 0
assert (self.arr.sp_values[:3] == 0).all()
def test_constructor_bool(self):
# GH 10648
data = np.array([False, False, True, True, False, False])
arr = SparseArray(data, fill_value=False, dtype=bool)
assert arr.dtype == bool
tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
tm.assert_numpy_array_equal(arr.sp_index.indices,
np.array([2, 3], np.int32))
for dense in [arr.to_dense(), arr.values]:
assert dense.dtype == bool
tm.assert_numpy_array_equal(dense, data)
def test_constructor_bool_fill_value(self):
arr = SparseArray([True, False, True], dtype=None)
assert arr.dtype == np.bool
assert not arr.fill_value
arr = SparseArray([True, False, True], dtype=np.bool)
assert arr.dtype == np.bool
assert not arr.fill_value
arr = SparseArray([True, False, True], dtype=np.bool, fill_value=True)
assert arr.dtype == np.bool
assert arr.fill_value
def test_constructor_float32(self):
# GH 10648
data = np.array([1., np.nan, 3], dtype=np.float32)
arr = SparseArray(data, dtype=np.float32)
assert arr.dtype == np.float32
tm.assert_numpy_array_equal(arr.sp_values,
np.array([1, 3], dtype=np.float32))
tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
tm.assert_numpy_array_equal(arr.sp_index.indices,
np.array([0, 2], dtype=np.int32))
for dense in [arr.to_dense(), arr.values]:
assert dense.dtype == np.float32
tm.assert_numpy_array_equal(dense, data)
def test_astype(self):
res = self.arr.astype('f8')
res.sp_values[:3] = 27
assert not (self.arr.sp_values[:3] == 27).any()
msg = "unable to coerce current fill_value nan to int64 dtype"
with tm.assert_raises_regex(ValueError, msg):
self.arr.astype('i8')
arr = SparseArray([0, np.nan, 0, 1])
with tm.assert_raises_regex(ValueError, msg):
arr.astype('i8')
arr = SparseArray([0, np.nan, 0, 1], fill_value=0)
msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer'
with tm.assert_raises_regex(ValueError, msg):
arr.astype('i8')
def test_astype_all(self):
vals = np.array([1, 2, 3])
arr = SparseArray(vals, fill_value=1)
types = [np.float64, np.float32, np.int64,
np.int32, np.int16, np.int8]
for typ in types:
res = arr.astype(typ)
assert res.dtype == typ
assert res.sp_values.dtype == typ
tm.assert_numpy_array_equal(res.values, vals.astype(typ))
def test_set_fill_value(self):
arr = SparseArray([1., np.nan, 2.], fill_value=np.nan)
arr.fill_value = 2
assert arr.fill_value == 2
arr = SparseArray([1, 0, 2], fill_value=0, dtype=np.int64)
arr.fill_value = 2
assert arr.fill_value == 2
# coerces to int
msg = "unable to set fill_value 3\\.1 to int64 dtype"
with tm.assert_raises_regex(ValueError, msg):
arr.fill_value = 3.1
msg = "unable to set fill_value nan to int64 dtype"
with tm.assert_raises_regex(ValueError, msg):
arr.fill_value = np.nan
arr = SparseArray([True, False, True], fill_value=False, dtype=np.bool)
arr.fill_value = True
assert arr.fill_value
# coerces to bool
msg = "unable to set fill_value 0 to bool dtype"
with tm.assert_raises_regex(ValueError, msg):
arr.fill_value = 0
msg = "unable to set fill_value nan to bool dtype"
with tm.assert_raises_regex(ValueError, msg):
arr.fill_value = np.nan
# invalid
msg = "fill_value must be a scalar"
for val in [[1, 2, 3], np.array([1, 2]), (1, 2, 3)]:
with tm.assert_raises_regex(ValueError, msg):
arr.fill_value = val
def test_copy_shallow(self):
arr2 = self.arr.copy(deep=False)
def _get_base(values):
base = values.base
while base.base is not None:
base = base.base
return base
assert (_get_base(arr2) is _get_base(self.arr))
def test_values_asarray(self):
assert_almost_equal(self.arr.values, self.arr_data)
assert_almost_equal(self.arr.to_dense(), self.arr_data)
assert_almost_equal(self.arr.sp_values, np.asarray(self.arr))
@pytest.mark.parametrize('data,shape,dtype', [
([0, 0, 0, 0, 0], (5,), None),
([], (0,), None),
([0], (1,), None),
(['A', 'A', np.nan, 'B'], (4,), np.object)
])
def test_shape(self, data, shape, dtype):
# GH 21126
out = SparseArray(data, dtype=dtype)
assert out.shape == shape
def test_to_dense(self):
vals = np.array([1, np.nan, np.nan, 3, np.nan])
res = SparseArray(vals).to_dense()
tm.assert_numpy_array_equal(res, vals)
res = SparseArray(vals, fill_value=0).to_dense()
tm.assert_numpy_array_equal(res, vals)
vals = np.array([1, np.nan, 0, 3, 0])
res = SparseArray(vals).to_dense()
tm.assert_numpy_array_equal(res, vals)
res = SparseArray(vals, fill_value=0).to_dense()
tm.assert_numpy_array_equal(res, vals)
vals = np.array([np.nan, np.nan, np.nan, np.nan, np.nan])
res = SparseArray(vals).to_dense()
tm.assert_numpy_array_equal(res, vals)
res = SparseArray(vals, fill_value=0).to_dense()
tm.assert_numpy_array_equal(res, vals)
# see gh-14647
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
SparseArray(vals).to_dense(fill=2)
def test_getitem(self):
def _checkit(i):
assert_almost_equal(self.arr[i], self.arr.values[i])
for i in range(len(self.arr)):
_checkit(i)
_checkit(-i)
def test_getslice(self):
result = self.arr[:-3]
exp = SparseArray(self.arr.values[:-3])
tm.assert_sp_array_equal(result, exp)
result = self.arr[-4:]
exp = SparseArray(self.arr.values[-4:])
tm.assert_sp_array_equal(result, exp)
# two corner cases from Series
result = self.arr[-12:]
exp = SparseArray(self.arr)
tm.assert_sp_array_equal(result, exp)
result = self.arr[:-12]
exp = SparseArray(self.arr.values[:0])
tm.assert_sp_array_equal(result, exp)
def test_getslice_tuple(self):
dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])
sparse = SparseArray(dense)
res = sparse[4:, ]
exp = SparseArray(dense[4:, ])
tm.assert_sp_array_equal(res, exp)
sparse = SparseArray(dense, fill_value=0)
res = sparse[4:, ]
exp = SparseArray(dense[4:, ], fill_value=0)
tm.assert_sp_array_equal(res, exp)
with pytest.raises(IndexError):
sparse[4:, :]
with pytest.raises(IndexError):
# check numpy compat
dense[4:, :]
def test_binary_operators(self):
data1 = np.random.randn(20)
data2 = np.random.randn(20)
data1[::2] = np.nan
data2[::3] = np.nan
arr1 = SparseArray(data1)
arr2 = SparseArray(data2)
data1[::2] = 3
data2[::3] = 3
farr1 = SparseArray(data1, fill_value=3)
farr2 = SparseArray(data2, fill_value=3)
def _check_op(op, first, second):
res = op(first, second)
exp = SparseArray(op(first.values, second.values),
fill_value=first.fill_value)
assert isinstance(res, SparseArray)
assert_almost_equal(res.values, exp.values)
res2 = op(first, second.values)
assert isinstance(res2, SparseArray)
tm.assert_sp_array_equal(res, res2)
res3 = op(first.values, second)
assert isinstance(res3, SparseArray)
tm.assert_sp_array_equal(res, res3)
res4 = op(first, 4)
assert isinstance(res4, SparseArray)
# ignore this if the actual op raises (e.g. pow)
try:
exp = op(first.values, 4)
exp_fv = op(first.fill_value, 4)
assert_almost_equal(res4.fill_value, exp_fv)
assert_almost_equal(res4.values, exp)
except ValueError:
pass
def _check_inplace_op(op):
tmp = arr1.copy()
pytest.raises(NotImplementedError, op, tmp, arr2)
with np.errstate(all='ignore'):
bin_ops = [operator.add, operator.sub, operator.mul,
operator.truediv, operator.floordiv, operator.pow]
for op in bin_ops:
_check_op(op, arr1, arr2)
_check_op(op, farr1, farr2)
inplace_ops = ['iadd', 'isub', 'imul', 'itruediv', 'ifloordiv',
'ipow']
for op in inplace_ops:
_check_inplace_op(getattr(operator, op))
def test_pickle(self):
def _check_roundtrip(obj):
unpickled = tm.round_trip_pickle(obj)
tm.assert_sp_array_equal(unpickled, obj)
_check_roundtrip(self.arr)
_check_roundtrip(self.zarr)
def test_generator_warnings(self):
sp_arr = SparseArray([1, 2, 3])
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings(action='always',
category=DeprecationWarning)
warnings.filterwarnings(action='always',
category=PendingDeprecationWarning)
for _ in sp_arr:
pass
assert len(w) == 0
def test_fillna(self):
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
res = s.fillna(-1)
exp = SparseArray([1, -1, -1, 3, -1], fill_value=-1, dtype=np.float64)
tm.assert_sp_array_equal(res, exp)
s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
res = s.fillna(-1)
exp = SparseArray([1, -1, -1, 3, -1], fill_value=0, dtype=np.float64)
tm.assert_sp_array_equal(res, exp)
s = SparseArray([1, np.nan, 0, 3, 0])
res = s.fillna(-1)
exp = SparseArray([1, -1, 0, 3, 0], fill_value=-1, dtype=np.float64)
tm.assert_sp_array_equal(res, exp)
s = SparseArray([1, np.nan, 0, 3, 0], fill_value=0)
res = s.fillna(-1)
exp = SparseArray([1, -1, 0, 3, 0], fill_value=0, dtype=np.float64)
tm.assert_sp_array_equal(res, exp)
s = SparseArray([np.nan, np.nan, np.nan, np.nan])
res = s.fillna(-1)
exp = SparseArray([-1, -1, -1, -1], fill_value=-1, dtype=np.float64)
tm.assert_sp_array_equal(res, exp)
s = SparseArray([np.nan, np.nan, np.nan, np.nan], fill_value=0)
res = s.fillna(-1)
exp = SparseArray([-1, -1, -1, -1], fill_value=0, dtype=np.float64)
tm.assert_sp_array_equal(res, exp)
# float dtype's fill_value is np.nan, replaced by -1
s = SparseArray([0., 0., 0., 0.])
res = s.fillna(-1)
exp = SparseArray([0., 0., 0., 0.], fill_value=-1)
tm.assert_sp_array_equal(res, exp)
# int dtype shouldn't have missing. No changes.
s = SparseArray([0, 0, 0, 0])
assert s.dtype == np.int64
assert s.fill_value == 0
res = s.fillna(-1)
tm.assert_sp_array_equal(res, s)
s = SparseArray([0, 0, 0, 0], fill_value=0)
assert s.dtype == np.int64
assert s.fill_value == 0
res = s.fillna(-1)
exp = SparseArray([0, 0, 0, 0], fill_value=0)
tm.assert_sp_array_equal(res, exp)
# fill_value can be nan if there is no missing hole.
# only fill_value will be changed
s = SparseArray([0, 0, 0, 0], fill_value=np.nan)
assert s.dtype == np.int64
assert np.isnan(s.fill_value)
res = s.fillna(-1)
exp = SparseArray([0, 0, 0, 0], fill_value=-1)
tm.assert_sp_array_equal(res, exp)
def test_fillna_overlap(self):
s = SparseArray([1, np.nan, np.nan, 3, np.nan])
# filling with existing value doesn't replace existing value with
# fill_value, i.e. existing 3 remains in sp_values
res = s.fillna(3)
exp = np.array([1, 3, 3, 3, 3], dtype=np.float64)
tm.assert_numpy_array_equal(res.to_dense(), exp)
s = SparseArray([1, np.nan, np.nan, 3, np.nan], fill_value=0)
res = s.fillna(3)
exp = SparseArray([1, 3, 3, 3, 3], fill_value=0, dtype=np.float64)
tm.assert_sp_array_equal(res, exp)
class TestSparseArrayAnalytics(object):
@pytest.mark.parametrize('data,pos,neg', [
([True, True, True], True, False),
([1, 2, 1], 1, 0),
([1.0, 2.0, 1.0], 1.0, 0.0)
])
def test_all(self, data, pos, neg):
# GH 17570
out = SparseArray(data).all()
assert out
out = SparseArray(data, fill_value=pos).all()
assert out
data[1] = neg
out = SparseArray(data).all()
assert not out
out = SparseArray(data, fill_value=pos).all()
assert not out
@pytest.mark.parametrize('data,pos,neg', [
([True, True, True], True, False),
([1, 2, 1], 1, 0),
([1.0, 2.0, 1.0], 1.0, 0.0)
])
def test_numpy_all(self, data, pos, neg):
# GH 17570
out = np.all(SparseArray(data))
assert out
out = np.all(SparseArray(data, fill_value=pos))
assert out
data[1] = neg
out = np.all(SparseArray(data))
assert not out
out = np.all(SparseArray(data, fill_value=pos))
assert not out
msg = "the 'out' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.all,
SparseArray(data), out=out)
@pytest.mark.parametrize('data,pos,neg', [
([False, True, False], True, False),
([0, 2, 0], 2, 0),
([0.0, 2.0, 0.0], 2.0, 0.0)
])
def test_any(self, data, pos, neg):
# GH 17570
out = SparseArray(data).any()
assert out
out = SparseArray(data, fill_value=pos).any()
assert out
data[1] = neg
out = SparseArray(data).any()
assert not out
out = SparseArray(data, fill_value=pos).any()
assert not out
@pytest.mark.parametrize('data,pos,neg', [
([False, True, False], True, False),
([0, 2, 0], 2, 0),
([0.0, 2.0, 0.0], 2.0, 0.0)
])
def test_numpy_any(self, data, pos, neg):
# GH 17570
out = np.any(SparseArray(data))
assert out
out = np.any(SparseArray(data, fill_value=pos))
assert out
data[1] = neg
out = np.any(SparseArray(data))
assert not out
out = np.any(SparseArray(data, fill_value=pos))
assert not out
msg = "the 'out' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.any,
SparseArray(data), out=out)
def test_sum(self):
data = np.arange(10).astype(float)
out = SparseArray(data).sum()
assert out == 45.0
data[5] = np.nan
out = SparseArray(data, fill_value=2).sum()
assert out == 40.0
out = SparseArray(data, fill_value=np.nan).sum()
assert out == 40.0
def test_numpy_sum(self):
data = np.arange(10).astype(float)
out = np.sum(SparseArray(data))
assert out == 45.0
data[5] = np.nan
out = np.sum(SparseArray(data, fill_value=2))
assert out == 40.0
out = np.sum(SparseArray(data, fill_value=np.nan))
assert out == 40.0
msg = "the 'dtype' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.sum,
SparseArray(data), dtype=np.int64)
msg = "the 'out' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.sum,
SparseArray(data), out=out)
def test_cumsum(self):
non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
non_null_expected = SparseArray(non_null_data.cumsum())
null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))
for data, expected in [
(null_data, null_expected),
(non_null_data, non_null_expected)
]:
out = SparseArray(data).cumsum()
tm.assert_sp_array_equal(out, expected)
out = SparseArray(data, fill_value=np.nan).cumsum()
tm.assert_sp_array_equal(out, expected)
out = SparseArray(data, fill_value=2).cumsum()
tm.assert_sp_array_equal(out, expected)
axis = 1 # SparseArray currently 1-D, so only axis = 0 is valid.
msg = "axis\\(={axis}\\) out of bounds".format(axis=axis)
with tm.assert_raises_regex(ValueError, msg):
SparseArray(data).cumsum(axis=axis)
def test_numpy_cumsum(self):
non_null_data = np.array([1, 2, 3, 4, 5], dtype=float)
non_null_expected = SparseArray(non_null_data.cumsum())
null_data = np.array([1, 2, np.nan, 4, 5], dtype=float)
null_expected = SparseArray(np.array([1.0, 3.0, np.nan, 7.0, 12.0]))
for data, expected in [
(null_data, null_expected),
(non_null_data, non_null_expected)
]:
out = np.cumsum(SparseArray(data))
tm.assert_sp_array_equal(out, expected)
out = np.cumsum(SparseArray(data, fill_value=np.nan))
tm.assert_sp_array_equal(out, expected)
out = np.cumsum(SparseArray(data, fill_value=2))
tm.assert_sp_array_equal(out, expected)
msg = "the 'dtype' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.cumsum,
SparseArray(data), dtype=np.int64)
msg = "the 'out' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.cumsum,
SparseArray(data), out=out)
def test_mean(self):
data = np.arange(10).astype(float)
out = SparseArray(data).mean()
assert out == 4.5
data[5] = np.nan
out = SparseArray(data).mean()
assert out == 40.0 / 9
def test_numpy_mean(self):
data = np.arange(10).astype(float)
out = np.mean(SparseArray(data))
assert out == 4.5
data[5] = np.nan
out = np.mean(SparseArray(data))
assert out == 40.0 / 9
msg = "the 'dtype' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.mean,
SparseArray(data), dtype=np.int64)
msg = "the 'out' parameter is not supported"
tm.assert_raises_regex(ValueError, msg, np.mean,
SparseArray(data), out=out)
def test_ufunc(self):
# GH 13853 make sure ufunc is applied to fill_value
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
result = SparseArray([1, np.nan, 2, np.nan, 2])
tm.assert_sp_array_equal(abs(sparse), result)
tm.assert_sp_array_equal(np.abs(sparse), result)
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index,
fill_value=1)
tm.assert_sp_array_equal(abs(sparse), result)
tm.assert_sp_array_equal(np.abs(sparse), result)
sparse = SparseArray([1, -1, 2, -2], fill_value=-1)
result = SparseArray([1, 2, 2], sparse_index=sparse.sp_index,
fill_value=1)
tm.assert_sp_array_equal(abs(sparse), result)
tm.assert_sp_array_equal(np.abs(sparse), result)
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
result = SparseArray(np.sin([1, np.nan, 2, np.nan, -2]))
tm.assert_sp_array_equal(np.sin(sparse), result)
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
result = SparseArray(np.sin([1, -1, 2, -2]), fill_value=np.sin(1))
tm.assert_sp_array_equal(np.sin(sparse), result)
sparse = SparseArray([1, -1, 0, -2], fill_value=0)
result = SparseArray(np.sin([1, -1, 0, -2]), fill_value=np.sin(0))
tm.assert_sp_array_equal(np.sin(sparse), result)
def test_ufunc_args(self):
# GH 13853 make sure ufunc is applied to fill_value, including its arg
sparse = SparseArray([1, np.nan, 2, np.nan, -2])
result = SparseArray([2, np.nan, 3, np.nan, -1])
tm.assert_sp_array_equal(np.add(sparse, 1), result)
sparse = SparseArray([1, -1, 2, -2], fill_value=1)
result = SparseArray([2, 0, 3, -1], fill_value=2)
tm.assert_sp_array_equal(np.add(sparse, 1), result)
sparse = SparseArray([1, -1, 0, -2], fill_value=0)
result = SparseArray([2, 0, 1, -1], fill_value=1)
tm.assert_sp_array_equal(np.add(sparse, 1), result)