283 lines
7.7 KiB
Python
283 lines
7.7 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
import pytest
|
||
|
import os
|
||
|
import collections
|
||
|
from functools import partial
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from pandas import Series, DataFrame, Timestamp
|
||
|
from pandas.compat import range, lmap
|
||
|
import pandas.core.common as com
|
||
|
from pandas.core import ops
|
||
|
from pandas.io.common import _get_handle
|
||
|
import pandas.util.testing as tm
|
||
|
|
||
|
|
||
|
def test_mut_exclusive():
|
||
|
msg = "mutually exclusive arguments: '[ab]' and '[ab]'"
|
||
|
with tm.assert_raises_regex(TypeError, msg):
|
||
|
com._mut_exclusive(a=1, b=2)
|
||
|
assert com._mut_exclusive(a=1, b=None) == 1
|
||
|
assert com._mut_exclusive(major=None, major_axis=None) is None
|
||
|
assert com._mut_exclusive(a=None, b=2) == 2
|
||
|
|
||
|
|
||
|
def test_get_callable_name():
|
||
|
from functools import partial
|
||
|
getname = com._get_callable_name
|
||
|
|
||
|
def fn(x):
|
||
|
return x
|
||
|
|
||
|
lambda_ = lambda x: x
|
||
|
part1 = partial(fn)
|
||
|
part2 = partial(part1)
|
||
|
|
||
|
class somecall(object):
|
||
|
|
||
|
def __call__(self):
|
||
|
return x # noqa
|
||
|
|
||
|
assert getname(fn) == 'fn'
|
||
|
assert getname(lambda_)
|
||
|
assert getname(part1) == 'fn'
|
||
|
assert getname(part2) == 'fn'
|
||
|
assert getname(somecall()) == 'somecall'
|
||
|
assert getname(1) is None
|
||
|
|
||
|
|
||
|
def test_any_none():
|
||
|
assert (com._any_none(1, 2, 3, None))
|
||
|
assert (not com._any_none(1, 2, 3, 4))
|
||
|
|
||
|
|
||
|
def test_all_not_none():
|
||
|
assert (com._all_not_none(1, 2, 3, 4))
|
||
|
assert (not com._all_not_none(1, 2, 3, None))
|
||
|
assert (not com._all_not_none(None, None, None, None))
|
||
|
|
||
|
|
||
|
def test_iterpairs():
|
||
|
data = [1, 2, 3, 4]
|
||
|
expected = [(1, 2), (2, 3), (3, 4)]
|
||
|
|
||
|
result = list(com.iterpairs(data))
|
||
|
|
||
|
assert (result == expected)
|
||
|
|
||
|
|
||
|
def test_split_ranges():
|
||
|
def _bin(x, width):
|
||
|
"return int(x) as a base2 string of given width"
|
||
|
return ''.join(str((x >> i) & 1) for i in range(width - 1, -1, -1))
|
||
|
|
||
|
def test_locs(mask):
|
||
|
nfalse = sum(np.array(mask) == 0)
|
||
|
|
||
|
remaining = 0
|
||
|
for s, e in com.split_ranges(mask):
|
||
|
remaining += e - s
|
||
|
|
||
|
assert 0 not in mask[s:e]
|
||
|
|
||
|
# make sure the total items covered by the ranges are a complete cover
|
||
|
assert remaining + nfalse == len(mask)
|
||
|
|
||
|
# exhaustively test all possible mask sequences of length 8
|
||
|
ncols = 8
|
||
|
for i in range(2 ** ncols):
|
||
|
cols = lmap(int, list(_bin(i, ncols))) # count up in base2
|
||
|
mask = [cols[i] == 1 for i in range(len(cols))]
|
||
|
test_locs(mask)
|
||
|
|
||
|
# base cases
|
||
|
test_locs([])
|
||
|
test_locs([0])
|
||
|
test_locs([1])
|
||
|
|
||
|
|
||
|
def test_map_indices_py():
|
||
|
data = [4, 3, 2, 1]
|
||
|
expected = {4: 0, 3: 1, 2: 2, 1: 3}
|
||
|
|
||
|
result = com.map_indices_py(data)
|
||
|
|
||
|
assert (result == expected)
|
||
|
|
||
|
|
||
|
def test_union():
|
||
|
a = [1, 2, 3]
|
||
|
b = [4, 5, 6]
|
||
|
|
||
|
union = sorted(com.union(a, b))
|
||
|
|
||
|
assert ((a + b) == union)
|
||
|
|
||
|
|
||
|
def test_difference():
|
||
|
a = [1, 2, 3]
|
||
|
b = [1, 2, 3, 4, 5, 6]
|
||
|
|
||
|
inter = sorted(com.difference(b, a))
|
||
|
|
||
|
assert ([4, 5, 6] == inter)
|
||
|
|
||
|
|
||
|
def test_intersection():
|
||
|
a = [1, 2, 3]
|
||
|
b = [1, 2, 3, 4, 5, 6]
|
||
|
|
||
|
inter = sorted(com.intersection(a, b))
|
||
|
|
||
|
assert (a == inter)
|
||
|
|
||
|
|
||
|
def test_groupby():
|
||
|
values = ['foo', 'bar', 'baz', 'baz2', 'qux', 'foo3']
|
||
|
expected = {'f': ['foo', 'foo3'],
|
||
|
'b': ['bar', 'baz', 'baz2'],
|
||
|
'q': ['qux']}
|
||
|
|
||
|
grouped = com.groupby(values, lambda x: x[0])
|
||
|
|
||
|
for k, v in grouped:
|
||
|
assert v == expected[k]
|
||
|
|
||
|
|
||
|
def test_random_state():
|
||
|
import numpy.random as npr
|
||
|
# Check with seed
|
||
|
state = com._random_state(5)
|
||
|
assert state.uniform() == npr.RandomState(5).uniform()
|
||
|
|
||
|
# Check with random state object
|
||
|
state2 = npr.RandomState(10)
|
||
|
assert (com._random_state(state2).uniform() ==
|
||
|
npr.RandomState(10).uniform())
|
||
|
|
||
|
# check with no arg random state
|
||
|
assert com._random_state() is np.random
|
||
|
|
||
|
# Error for floats or strings
|
||
|
with pytest.raises(ValueError):
|
||
|
com._random_state('test')
|
||
|
|
||
|
with pytest.raises(ValueError):
|
||
|
com._random_state(5.5)
|
||
|
|
||
|
|
||
|
def test_maybe_match_name():
|
||
|
|
||
|
matched = ops._maybe_match_name(
|
||
|
Series([1], name='x'), Series(
|
||
|
[2], name='x'))
|
||
|
assert (matched == 'x')
|
||
|
|
||
|
matched = ops._maybe_match_name(
|
||
|
Series([1], name='x'), Series(
|
||
|
[2], name='y'))
|
||
|
assert (matched is None)
|
||
|
|
||
|
matched = ops._maybe_match_name(Series([1]), Series([2], name='x'))
|
||
|
assert (matched is None)
|
||
|
|
||
|
matched = ops._maybe_match_name(Series([1], name='x'), Series([2]))
|
||
|
assert (matched is None)
|
||
|
|
||
|
matched = ops._maybe_match_name(Series([1], name='x'), [2])
|
||
|
assert (matched == 'x')
|
||
|
|
||
|
matched = ops._maybe_match_name([1], Series([2], name='y'))
|
||
|
assert (matched == 'y')
|
||
|
|
||
|
|
||
|
def test_dict_compat():
|
||
|
data_datetime64 = {np.datetime64('1990-03-15'): 1,
|
||
|
np.datetime64('2015-03-15'): 2}
|
||
|
data_unchanged = {1: 2, 3: 4, 5: 6}
|
||
|
expected = {Timestamp('1990-3-15'): 1, Timestamp('2015-03-15'): 2}
|
||
|
assert (com._dict_compat(data_datetime64) == expected)
|
||
|
assert (com._dict_compat(expected) == expected)
|
||
|
assert (com._dict_compat(data_unchanged) == data_unchanged)
|
||
|
|
||
|
|
||
|
def test_standardize_mapping():
|
||
|
# No uninitialized defaultdicts
|
||
|
with pytest.raises(TypeError):
|
||
|
com.standardize_mapping(collections.defaultdict)
|
||
|
|
||
|
# No non-mapping subtypes, instance
|
||
|
with pytest.raises(TypeError):
|
||
|
com.standardize_mapping([])
|
||
|
|
||
|
# No non-mapping subtypes, class
|
||
|
with pytest.raises(TypeError):
|
||
|
com.standardize_mapping(list)
|
||
|
|
||
|
fill = {'bad': 'data'}
|
||
|
assert (com.standardize_mapping(fill) == dict)
|
||
|
|
||
|
# Convert instance to type
|
||
|
assert (com.standardize_mapping({}) == dict)
|
||
|
|
||
|
dd = collections.defaultdict(list)
|
||
|
assert isinstance(com.standardize_mapping(dd), partial)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize('obj', [
|
||
|
DataFrame(100 * [[0.123456, 0.234567, 0.567567],
|
||
|
[12.32112, 123123.2, 321321.2]],
|
||
|
columns=['X', 'Y', 'Z']),
|
||
|
Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
|
||
|
@pytest.mark.parametrize('method', ['to_pickle', 'to_json', 'to_csv'])
|
||
|
def test_compression_size(obj, method, compression):
|
||
|
if not compression:
|
||
|
pytest.skip("only test compression case.")
|
||
|
|
||
|
with tm.ensure_clean() as filename:
|
||
|
getattr(obj, method)(filename, compression=compression)
|
||
|
compressed = os.path.getsize(filename)
|
||
|
getattr(obj, method)(filename, compression=None)
|
||
|
uncompressed = os.path.getsize(filename)
|
||
|
assert uncompressed > compressed
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize('obj', [
|
||
|
DataFrame(100 * [[0.123456, 0.234567, 0.567567],
|
||
|
[12.32112, 123123.2, 321321.2]],
|
||
|
columns=['X', 'Y', 'Z']),
|
||
|
Series(100 * [0.123456, 0.234567, 0.567567], name='X')])
|
||
|
@pytest.mark.parametrize('method', ['to_csv', 'to_json'])
|
||
|
def test_compression_size_fh(obj, method, compression_only):
|
||
|
|
||
|
with tm.ensure_clean() as filename:
|
||
|
f, _handles = _get_handle(filename, 'w', compression=compression_only)
|
||
|
with f:
|
||
|
getattr(obj, method)(f)
|
||
|
assert not f.closed
|
||
|
assert f.closed
|
||
|
compressed = os.path.getsize(filename)
|
||
|
with tm.ensure_clean() as filename:
|
||
|
f, _handles = _get_handle(filename, 'w', compression=None)
|
||
|
with f:
|
||
|
getattr(obj, method)(f)
|
||
|
assert not f.closed
|
||
|
assert f.closed
|
||
|
uncompressed = os.path.getsize(filename)
|
||
|
assert uncompressed > compressed
|
||
|
|
||
|
|
||
|
# GH 21227
|
||
|
def test_compression_warning(compression_only):
|
||
|
df = DataFrame(100 * [[0.123456, 0.234567, 0.567567],
|
||
|
[12.32112, 123123.2, 321321.2]],
|
||
|
columns=['X', 'Y', 'Z'])
|
||
|
with tm.ensure_clean() as filename:
|
||
|
f, _handles = _get_handle(filename, 'w', compression=compression_only)
|
||
|
with tm.assert_produces_warning(RuntimeWarning,
|
||
|
check_stacklevel=False):
|
||
|
with f:
|
||
|
df.to_csv(f, compression=compression_only)
|