271 lines
9.5 KiB
Python
271 lines
9.5 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
# pylint: disable-msg=E1101,W0612
|
||
|
|
||
|
from operator import methodcaller
|
||
|
from copy import deepcopy
|
||
|
from distutils.version import LooseVersion
|
||
|
|
||
|
import pytest
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
|
||
|
from pandas import Series, DataFrame, date_range, MultiIndex
|
||
|
|
||
|
from pandas.compat import range
|
||
|
from pandas.util.testing import (assert_series_equal,
|
||
|
assert_frame_equal,
|
||
|
assert_almost_equal)
|
||
|
|
||
|
import pandas.util.testing as tm
|
||
|
import pandas.util._test_decorators as td
|
||
|
from .test_generic import Generic
|
||
|
|
||
|
try:
|
||
|
import xarray
|
||
|
_XARRAY_INSTALLED = True
|
||
|
except ImportError:
|
||
|
_XARRAY_INSTALLED = False
|
||
|
|
||
|
|
||
|
class TestDataFrame(Generic):
|
||
|
_typ = DataFrame
|
||
|
_comparator = lambda self, x, y: assert_frame_equal(x, y)
|
||
|
|
||
|
def test_rename_mi(self):
|
||
|
df = DataFrame([
|
||
|
11, 21, 31
|
||
|
], index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]))
|
||
|
df.rename(str.lower)
|
||
|
|
||
|
def test_set_axis_name(self):
|
||
|
df = pd.DataFrame([[1, 2], [3, 4]])
|
||
|
funcs = ['_set_axis_name', 'rename_axis']
|
||
|
for func in funcs:
|
||
|
result = methodcaller(func, 'foo')(df)
|
||
|
assert df.index.name is None
|
||
|
assert result.index.name == 'foo'
|
||
|
|
||
|
result = methodcaller(func, 'cols', axis=1)(df)
|
||
|
assert df.columns.name is None
|
||
|
assert result.columns.name == 'cols'
|
||
|
|
||
|
def test_set_axis_name_mi(self):
|
||
|
df = DataFrame(
|
||
|
np.empty((3, 3)),
|
||
|
index=MultiIndex.from_tuples([("A", x) for x in list('aBc')]),
|
||
|
columns=MultiIndex.from_tuples([('C', x) for x in list('xyz')])
|
||
|
)
|
||
|
|
||
|
level_names = ['L1', 'L2']
|
||
|
funcs = ['_set_axis_name', 'rename_axis']
|
||
|
for func in funcs:
|
||
|
result = methodcaller(func, level_names)(df)
|
||
|
assert result.index.names == level_names
|
||
|
assert result.columns.names == [None, None]
|
||
|
|
||
|
result = methodcaller(func, level_names, axis=1)(df)
|
||
|
assert result.columns.names == ["L1", "L2"]
|
||
|
assert result.index.names == [None, None]
|
||
|
|
||
|
def test_nonzero_single_element(self):
|
||
|
|
||
|
# allow single item via bool method
|
||
|
df = DataFrame([[True]])
|
||
|
assert df.bool()
|
||
|
|
||
|
df = DataFrame([[False]])
|
||
|
assert not df.bool()
|
||
|
|
||
|
df = DataFrame([[False, False]])
|
||
|
pytest.raises(ValueError, lambda: df.bool())
|
||
|
pytest.raises(ValueError, lambda: bool(df))
|
||
|
|
||
|
def test_get_numeric_data_preserve_dtype(self):
|
||
|
|
||
|
# get the numeric data
|
||
|
o = DataFrame({'A': [1, '2', 3.]})
|
||
|
result = o._get_numeric_data()
|
||
|
expected = DataFrame(index=[0, 1, 2], dtype=object)
|
||
|
self._compare(result, expected)
|
||
|
|
||
|
def test_metadata_propagation_indiv(self):
|
||
|
|
||
|
# groupby
|
||
|
df = DataFrame(
|
||
|
{'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
|
||
|
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
|
||
|
'C': np.random.randn(8),
|
||
|
'D': np.random.randn(8)})
|
||
|
result = df.groupby('A').sum()
|
||
|
self.check_metadata(df, result)
|
||
|
|
||
|
# resample
|
||
|
df = DataFrame(np.random.randn(1000, 2),
|
||
|
index=date_range('20130101', periods=1000, freq='s'))
|
||
|
result = df.resample('1T')
|
||
|
self.check_metadata(df, result)
|
||
|
|
||
|
# merging with override
|
||
|
# GH 6923
|
||
|
_metadata = DataFrame._metadata
|
||
|
_finalize = DataFrame.__finalize__
|
||
|
|
||
|
np.random.seed(10)
|
||
|
df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['a', 'b'])
|
||
|
df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['c', 'd'])
|
||
|
DataFrame._metadata = ['filename']
|
||
|
df1.filename = 'fname1.csv'
|
||
|
df2.filename = 'fname2.csv'
|
||
|
|
||
|
def finalize(self, other, method=None, **kwargs):
|
||
|
|
||
|
for name in self._metadata:
|
||
|
if method == 'merge':
|
||
|
left, right = other.left, other.right
|
||
|
value = getattr(left, name, '') + '|' + getattr(right,
|
||
|
name, '')
|
||
|
object.__setattr__(self, name, value)
|
||
|
else:
|
||
|
object.__setattr__(self, name, getattr(other, name, ''))
|
||
|
|
||
|
return self
|
||
|
|
||
|
DataFrame.__finalize__ = finalize
|
||
|
result = df1.merge(df2, left_on=['a'], right_on=['c'], how='inner')
|
||
|
assert result.filename == 'fname1.csv|fname2.csv'
|
||
|
|
||
|
# concat
|
||
|
# GH 6927
|
||
|
DataFrame._metadata = ['filename']
|
||
|
df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list('ab'))
|
||
|
df1.filename = 'foo'
|
||
|
|
||
|
def finalize(self, other, method=None, **kwargs):
|
||
|
for name in self._metadata:
|
||
|
if method == 'concat':
|
||
|
value = '+'.join([getattr(
|
||
|
o, name) for o in other.objs if getattr(o, name, None)
|
||
|
])
|
||
|
object.__setattr__(self, name, value)
|
||
|
else:
|
||
|
object.__setattr__(self, name, getattr(other, name, None))
|
||
|
|
||
|
return self
|
||
|
|
||
|
DataFrame.__finalize__ = finalize
|
||
|
|
||
|
result = pd.concat([df1, df1])
|
||
|
assert result.filename == 'foo+foo'
|
||
|
|
||
|
# reset
|
||
|
DataFrame._metadata = _metadata
|
||
|
DataFrame.__finalize__ = _finalize
|
||
|
|
||
|
def test_set_attribute(self):
|
||
|
# Test for consistent setattr behavior when an attribute and a column
|
||
|
# have the same name (Issue #8994)
|
||
|
df = DataFrame({'x': [1, 2, 3]})
|
||
|
|
||
|
df.y = 2
|
||
|
df['y'] = [2, 4, 6]
|
||
|
df.y = 5
|
||
|
|
||
|
assert df.y == 5
|
||
|
assert_series_equal(df['y'], Series([2, 4, 6], name='y'))
|
||
|
|
||
|
@pytest.mark.skipif(not _XARRAY_INSTALLED or _XARRAY_INSTALLED and
|
||
|
LooseVersion(xarray.__version__) <
|
||
|
LooseVersion('0.10.0'),
|
||
|
reason='xarray >= 0.10.0 required')
|
||
|
@pytest.mark.parametrize(
|
||
|
"index", ['FloatIndex', 'IntIndex',
|
||
|
'StringIndex', 'UnicodeIndex',
|
||
|
'DateIndex', 'PeriodIndex',
|
||
|
'CategoricalIndex', 'TimedeltaIndex'])
|
||
|
def test_to_xarray_index_types(self, index):
|
||
|
from xarray import Dataset
|
||
|
|
||
|
index = getattr(tm, 'make{}'.format(index))
|
||
|
df = DataFrame({'a': list('abc'),
|
||
|
'b': list(range(1, 4)),
|
||
|
'c': np.arange(3, 6).astype('u1'),
|
||
|
'd': np.arange(4.0, 7.0, dtype='float64'),
|
||
|
'e': [True, False, True],
|
||
|
'f': pd.Categorical(list('abc')),
|
||
|
'g': pd.date_range('20130101', periods=3),
|
||
|
'h': pd.date_range('20130101',
|
||
|
periods=3,
|
||
|
tz='US/Eastern')}
|
||
|
)
|
||
|
|
||
|
df.index = index(3)
|
||
|
df.index.name = 'foo'
|
||
|
df.columns.name = 'bar'
|
||
|
result = df.to_xarray()
|
||
|
assert result.dims['foo'] == 3
|
||
|
assert len(result.coords) == 1
|
||
|
assert len(result.data_vars) == 8
|
||
|
assert_almost_equal(list(result.coords.keys()), ['foo'])
|
||
|
assert isinstance(result, Dataset)
|
||
|
|
||
|
# idempotency
|
||
|
# categoricals are not preserved
|
||
|
# datetimes w/tz are not preserved
|
||
|
# column names are lost
|
||
|
expected = df.copy()
|
||
|
expected['f'] = expected['f'].astype(object)
|
||
|
expected['h'] = expected['h'].astype('datetime64[ns]')
|
||
|
expected.columns.name = None
|
||
|
assert_frame_equal(result.to_dataframe(), expected,
|
||
|
check_index_type=False, check_categorical=False)
|
||
|
|
||
|
@td.skip_if_no('xarray', min_version='0.7.0')
|
||
|
def test_to_xarray(self):
|
||
|
from xarray import Dataset
|
||
|
|
||
|
df = DataFrame({'a': list('abc'),
|
||
|
'b': list(range(1, 4)),
|
||
|
'c': np.arange(3, 6).astype('u1'),
|
||
|
'd': np.arange(4.0, 7.0, dtype='float64'),
|
||
|
'e': [True, False, True],
|
||
|
'f': pd.Categorical(list('abc')),
|
||
|
'g': pd.date_range('20130101', periods=3),
|
||
|
'h': pd.date_range('20130101',
|
||
|
periods=3,
|
||
|
tz='US/Eastern')}
|
||
|
)
|
||
|
|
||
|
df.index.name = 'foo'
|
||
|
result = df[0:0].to_xarray()
|
||
|
assert result.dims['foo'] == 0
|
||
|
assert isinstance(result, Dataset)
|
||
|
|
||
|
# available in 0.7.1
|
||
|
# MultiIndex
|
||
|
df.index = pd.MultiIndex.from_product([['a'], range(3)],
|
||
|
names=['one', 'two'])
|
||
|
result = df.to_xarray()
|
||
|
assert result.dims['one'] == 1
|
||
|
assert result.dims['two'] == 3
|
||
|
assert len(result.coords) == 2
|
||
|
assert len(result.data_vars) == 8
|
||
|
assert_almost_equal(list(result.coords.keys()), ['one', 'two'])
|
||
|
assert isinstance(result, Dataset)
|
||
|
|
||
|
result = result.to_dataframe()
|
||
|
expected = df.copy()
|
||
|
expected['f'] = expected['f'].astype(object)
|
||
|
expected['h'] = expected['h'].astype('datetime64[ns]')
|
||
|
expected.columns.name = None
|
||
|
assert_frame_equal(result,
|
||
|
expected,
|
||
|
check_index_type=False)
|
||
|
|
||
|
def test_deepcopy_empty(self):
|
||
|
# This test covers empty frame copying with non-empty column sets
|
||
|
# as reported in issue GH15370
|
||
|
empty_frame = DataFrame(data=[], index=[], columns=['A'])
|
||
|
empty_frame_copy = deepcopy(empty_frame)
|
||
|
|
||
|
self._compare(empty_frame_copy, empty_frame)
|