5570 lines
207 KiB
Python
5570 lines
207 KiB
Python
import pytest
|
|
import os
|
|
import tempfile
|
|
from contextlib import contextmanager
|
|
from warnings import catch_warnings
|
|
from distutils.version import LooseVersion
|
|
|
|
import datetime
|
|
from datetime import timedelta
|
|
|
|
import numpy as np
|
|
|
|
import pandas as pd
|
|
from pandas import (Series, DataFrame, Panel, MultiIndex, Int64Index,
|
|
RangeIndex, Categorical, bdate_range,
|
|
date_range, timedelta_range, Index, DatetimeIndex,
|
|
isna, compat, concat, Timestamp, _np_version_under1p15)
|
|
|
|
import pandas.util.testing as tm
|
|
import pandas.util._test_decorators as td
|
|
from pandas.util.testing import (assert_panel_equal,
|
|
assert_frame_equal,
|
|
assert_series_equal,
|
|
set_timezone)
|
|
|
|
from pandas.compat import (is_platform_windows, is_platform_little_endian,
|
|
PY35, PY36, BytesIO, text_type,
|
|
range, lrange, u)
|
|
from pandas.io.formats.printing import pprint_thing
|
|
from pandas.core.dtypes.common import is_categorical_dtype
|
|
|
|
tables = pytest.importorskip('tables')
|
|
from pandas.io import pytables as pytables # noqa:E402
|
|
from pandas.io.pytables import (TableIterator, # noqa:E402
|
|
HDFStore, get_store, Term, read_hdf,
|
|
PossibleDataLossError, ClosedFileError)
|
|
|
|
|
|
_default_compressor = ('blosc' if LooseVersion(tables.__version__) >=
|
|
LooseVersion('2.2') else 'zlib')
|
|
|
|
|
|
# contextmanager to ensure the file cleanup
|
|
|
|
|
|
def safe_remove(path):
|
|
if path is not None:
|
|
try:
|
|
os.remove(path)
|
|
except:
|
|
pass
|
|
|
|
|
|
def safe_close(store):
|
|
try:
|
|
if store is not None:
|
|
store.close()
|
|
except:
|
|
pass
|
|
|
|
|
|
def create_tempfile(path):
|
|
""" create an unopened named temporary file """
|
|
return os.path.join(tempfile.gettempdir(), path)
|
|
|
|
|
|
@contextmanager
|
|
def ensure_clean_store(path, mode='a', complevel=None, complib=None,
|
|
fletcher32=False):
|
|
|
|
try:
|
|
|
|
# put in the temporary path if we don't have one already
|
|
if not len(os.path.dirname(path)):
|
|
path = create_tempfile(path)
|
|
|
|
store = HDFStore(path, mode=mode, complevel=complevel,
|
|
complib=complib, fletcher32=False)
|
|
yield store
|
|
finally:
|
|
safe_close(store)
|
|
if mode == 'w' or mode == 'a':
|
|
safe_remove(path)
|
|
|
|
|
|
@contextmanager
|
|
def ensure_clean_path(path):
|
|
"""
|
|
return essentially a named temporary file that is not opened
|
|
and deleted on existing; if path is a list, then create and
|
|
return list of filenames
|
|
"""
|
|
try:
|
|
if isinstance(path, list):
|
|
filenames = [create_tempfile(p) for p in path]
|
|
yield filenames
|
|
else:
|
|
filenames = [create_tempfile(path)]
|
|
yield filenames[0]
|
|
finally:
|
|
for f in filenames:
|
|
safe_remove(f)
|
|
|
|
|
|
# set these parameters so we don't have file sharing
|
|
tables.parameters.MAX_NUMEXPR_THREADS = 1
|
|
tables.parameters.MAX_BLOSC_THREADS = 1
|
|
tables.parameters.MAX_THREADS = 1
|
|
|
|
|
|
def _maybe_remove(store, key):
|
|
"""For tests using tables, try removing the table to be sure there is
|
|
no content from previous tests using the same table name."""
|
|
try:
|
|
store.remove(key)
|
|
except:
|
|
pass
|
|
|
|
|
|
class Base(object):
|
|
|
|
@classmethod
|
|
def setup_class(cls):
|
|
|
|
# Pytables 3.0.0 deprecates lots of things
|
|
tm.reset_testing_mode()
|
|
|
|
@classmethod
|
|
def teardown_class(cls):
|
|
|
|
# Pytables 3.0.0 deprecates lots of things
|
|
tm.set_testing_mode()
|
|
|
|
def setup_method(self, method):
|
|
self.path = 'tmp.__%s__.h5' % tm.rands(10)
|
|
|
|
def teardown_method(self, method):
|
|
pass
|
|
|
|
|
|
@pytest.mark.single
|
|
class TestHDFStore(Base):
|
|
|
|
def test_factory_fun(self):
|
|
path = create_tempfile(self.path)
|
|
try:
|
|
with catch_warnings(record=True):
|
|
with get_store(path) as tbl:
|
|
raise ValueError('blah')
|
|
except ValueError:
|
|
pass
|
|
finally:
|
|
safe_remove(path)
|
|
|
|
try:
|
|
with catch_warnings(record=True):
|
|
with get_store(path) as tbl:
|
|
tbl['a'] = tm.makeDataFrame()
|
|
|
|
with catch_warnings(record=True):
|
|
with get_store(path) as tbl:
|
|
assert len(tbl) == 1
|
|
assert type(tbl['a']) == DataFrame
|
|
finally:
|
|
safe_remove(self.path)
|
|
|
|
def test_context(self):
|
|
path = create_tempfile(self.path)
|
|
try:
|
|
with HDFStore(path) as tbl:
|
|
raise ValueError('blah')
|
|
except ValueError:
|
|
pass
|
|
finally:
|
|
safe_remove(path)
|
|
|
|
try:
|
|
with HDFStore(path) as tbl:
|
|
tbl['a'] = tm.makeDataFrame()
|
|
|
|
with HDFStore(path) as tbl:
|
|
assert len(tbl) == 1
|
|
assert type(tbl['a']) == DataFrame
|
|
finally:
|
|
safe_remove(path)
|
|
|
|
def test_conv_read_write(self):
|
|
path = create_tempfile(self.path)
|
|
try:
|
|
def roundtrip(key, obj, **kwargs):
|
|
obj.to_hdf(path, key, **kwargs)
|
|
return read_hdf(path, key)
|
|
|
|
o = tm.makeTimeSeries()
|
|
assert_series_equal(o, roundtrip('series', o))
|
|
|
|
o = tm.makeStringSeries()
|
|
assert_series_equal(o, roundtrip('string_series', o))
|
|
|
|
o = tm.makeDataFrame()
|
|
assert_frame_equal(o, roundtrip('frame', o))
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
o = tm.makePanel()
|
|
assert_panel_equal(o, roundtrip('panel', o))
|
|
|
|
# table
|
|
df = DataFrame(dict(A=lrange(5), B=lrange(5)))
|
|
df.to_hdf(path, 'table', append=True)
|
|
result = read_hdf(path, 'table', where=['index>2'])
|
|
assert_frame_equal(df[df.index > 2], result)
|
|
|
|
finally:
|
|
safe_remove(path)
|
|
|
|
def test_long_strings(self):
|
|
|
|
# GH6166
|
|
# unconversion of long strings was being chopped in earlier
|
|
# versions of numpy < 1.7.2
|
|
df = DataFrame({'a': tm.rands_array(100, size=10)},
|
|
index=tm.rands_array(100, size=10))
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('df', df, data_columns=['a'])
|
|
|
|
result = store.select('df')
|
|
assert_frame_equal(df, result)
|
|
|
|
def test_api(self):
|
|
|
|
# GH4584
|
|
# API issue when to_hdf doesn't acdept append AND format args
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
df = tm.makeDataFrame()
|
|
df.iloc[:10].to_hdf(path, 'df', append=True, format='table')
|
|
df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
|
|
assert_frame_equal(read_hdf(path, 'df'), df)
|
|
|
|
# append to False
|
|
df.iloc[:10].to_hdf(path, 'df', append=False, format='table')
|
|
df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
|
|
assert_frame_equal(read_hdf(path, 'df'), df)
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
df = tm.makeDataFrame()
|
|
df.iloc[:10].to_hdf(path, 'df', append=True)
|
|
df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
|
|
assert_frame_equal(read_hdf(path, 'df'), df)
|
|
|
|
# append to False
|
|
df.iloc[:10].to_hdf(path, 'df', append=False, format='table')
|
|
df.iloc[10:].to_hdf(path, 'df', append=True)
|
|
assert_frame_equal(read_hdf(path, 'df'), df)
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
df = tm.makeDataFrame()
|
|
df.to_hdf(path, 'df', append=False, format='fixed')
|
|
assert_frame_equal(read_hdf(path, 'df'), df)
|
|
|
|
df.to_hdf(path, 'df', append=False, format='f')
|
|
assert_frame_equal(read_hdf(path, 'df'), df)
|
|
|
|
df.to_hdf(path, 'df', append=False)
|
|
assert_frame_equal(read_hdf(path, 'df'), df)
|
|
|
|
df.to_hdf(path, 'df')
|
|
assert_frame_equal(read_hdf(path, 'df'), df)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
path = store._path
|
|
df = tm.makeDataFrame()
|
|
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df.iloc[:10], append=True, format='table')
|
|
store.append('df', df.iloc[10:], append=True, format='table')
|
|
assert_frame_equal(store.select('df'), df)
|
|
|
|
# append to False
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df.iloc[:10], append=False, format='table')
|
|
store.append('df', df.iloc[10:], append=True, format='table')
|
|
assert_frame_equal(store.select('df'), df)
|
|
|
|
# formats
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df.iloc[:10], append=False, format='table')
|
|
store.append('df', df.iloc[10:], append=True, format='table')
|
|
assert_frame_equal(store.select('df'), df)
|
|
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df.iloc[:10], append=False, format='table')
|
|
store.append('df', df.iloc[10:], append=True, format=None)
|
|
assert_frame_equal(store.select('df'), df)
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
# invalid
|
|
df = tm.makeDataFrame()
|
|
pytest.raises(ValueError, df.to_hdf, path,
|
|
'df', append=True, format='f')
|
|
pytest.raises(ValueError, df.to_hdf, path,
|
|
'df', append=True, format='fixed')
|
|
|
|
pytest.raises(TypeError, df.to_hdf, path,
|
|
'df', append=True, format='foo')
|
|
pytest.raises(TypeError, df.to_hdf, path,
|
|
'df', append=False, format='bar')
|
|
|
|
# File path doesn't exist
|
|
path = ""
|
|
pytest.raises(compat.FileNotFoundError,
|
|
read_hdf, path, 'df')
|
|
|
|
def test_api_default_format(self):
|
|
|
|
# default_format option
|
|
with ensure_clean_store(self.path) as store:
|
|
df = tm.makeDataFrame()
|
|
|
|
pd.set_option('io.hdf.default_format', 'fixed')
|
|
_maybe_remove(store, 'df')
|
|
store.put('df', df)
|
|
assert not store.get_storer('df').is_table
|
|
pytest.raises(ValueError, store.append, 'df2', df)
|
|
|
|
pd.set_option('io.hdf.default_format', 'table')
|
|
_maybe_remove(store, 'df')
|
|
store.put('df', df)
|
|
assert store.get_storer('df').is_table
|
|
_maybe_remove(store, 'df2')
|
|
store.append('df2', df)
|
|
assert store.get_storer('df').is_table
|
|
|
|
pd.set_option('io.hdf.default_format', None)
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
df = tm.makeDataFrame()
|
|
|
|
pd.set_option('io.hdf.default_format', 'fixed')
|
|
df.to_hdf(path, 'df')
|
|
with HDFStore(path) as store:
|
|
assert not store.get_storer('df').is_table
|
|
pytest.raises(ValueError, df.to_hdf, path, 'df2', append=True)
|
|
|
|
pd.set_option('io.hdf.default_format', 'table')
|
|
df.to_hdf(path, 'df3')
|
|
with HDFStore(path) as store:
|
|
assert store.get_storer('df3').is_table
|
|
df.to_hdf(path, 'df4', append=True)
|
|
with HDFStore(path) as store:
|
|
assert store.get_storer('df4').is_table
|
|
|
|
pd.set_option('io.hdf.default_format', None)
|
|
|
|
def test_keys(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store['a'] = tm.makeTimeSeries()
|
|
store['b'] = tm.makeStringSeries()
|
|
store['c'] = tm.makeDataFrame()
|
|
with catch_warnings(record=True):
|
|
store['d'] = tm.makePanel()
|
|
store['foo/bar'] = tm.makePanel()
|
|
assert len(store) == 5
|
|
expected = set(['/a', '/b', '/c', '/d', '/foo/bar'])
|
|
assert set(store.keys()) == expected
|
|
assert set(store) == expected
|
|
|
|
def test_keys_ignore_hdf_softlink(self):
|
|
|
|
# GH 20523
|
|
# Puts a softlink into HDF file and rereads
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df = DataFrame(dict(A=lrange(5), B=lrange(5)))
|
|
store.put("df", df)
|
|
|
|
assert store.keys() == ["/df"]
|
|
|
|
store._handle.create_soft_link(store._handle.root, "symlink", "df")
|
|
|
|
# Should ignore the softlink
|
|
assert store.keys() == ["/df"]
|
|
|
|
def test_iter_empty(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
# GH 12221
|
|
assert list(store) == []
|
|
|
|
def test_repr(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
repr(store)
|
|
store.info()
|
|
store['a'] = tm.makeTimeSeries()
|
|
store['b'] = tm.makeStringSeries()
|
|
store['c'] = tm.makeDataFrame()
|
|
|
|
with catch_warnings(record=True):
|
|
store['d'] = tm.makePanel()
|
|
store['foo/bar'] = tm.makePanel()
|
|
store.append('e', tm.makePanel())
|
|
|
|
df = tm.makeDataFrame()
|
|
df['obj1'] = 'foo'
|
|
df['obj2'] = 'bar'
|
|
df['bool1'] = df['A'] > 0
|
|
df['bool2'] = df['B'] > 0
|
|
df['bool3'] = True
|
|
df['int1'] = 1
|
|
df['int2'] = 2
|
|
df['timestamp1'] = Timestamp('20010102')
|
|
df['timestamp2'] = Timestamp('20010103')
|
|
df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
|
|
df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
|
|
df.loc[3:6, ['obj1']] = np.nan
|
|
df = df._consolidate()._convert(datetime=True)
|
|
|
|
# PerformanceWarning
|
|
with catch_warnings(record=True):
|
|
store['df'] = df
|
|
|
|
# make a random group in hdf space
|
|
store._handle.create_group(store._handle.root, 'bah')
|
|
|
|
assert store.filename in repr(store)
|
|
assert store.filename in str(store)
|
|
store.info()
|
|
|
|
# storers
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df = tm.makeDataFrame()
|
|
store.append('df', df)
|
|
|
|
s = store.get_storer('df')
|
|
repr(s)
|
|
str(s)
|
|
|
|
def test_contains(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store['a'] = tm.makeTimeSeries()
|
|
store['b'] = tm.makeDataFrame()
|
|
store['foo/bar'] = tm.makeDataFrame()
|
|
assert 'a' in store
|
|
assert 'b' in store
|
|
assert 'c' not in store
|
|
assert 'foo/bar' in store
|
|
assert '/foo/bar' in store
|
|
assert '/foo/b' not in store
|
|
assert 'bar' not in store
|
|
|
|
# gh-2694: tables.NaturalNameWarning
|
|
with catch_warnings(record=True):
|
|
store['node())'] = tm.makeDataFrame()
|
|
assert 'node())' in store
|
|
|
|
def test_versioning(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store['a'] = tm.makeTimeSeries()
|
|
store['b'] = tm.makeDataFrame()
|
|
df = tm.makeTimeDataFrame()
|
|
_maybe_remove(store, 'df1')
|
|
store.append('df1', df[:10])
|
|
store.append('df1', df[10:])
|
|
assert store.root.a._v_attrs.pandas_version == '0.15.2'
|
|
assert store.root.b._v_attrs.pandas_version == '0.15.2'
|
|
assert store.root.df1._v_attrs.pandas_version == '0.15.2'
|
|
|
|
# write a file and wipe its versioning
|
|
_maybe_remove(store, 'df2')
|
|
store.append('df2', df)
|
|
|
|
# this is an error because its table_type is appendable, but no
|
|
# version info
|
|
store.get_node('df2')._v_attrs.pandas_version = None
|
|
pytest.raises(Exception, store.select, 'df2')
|
|
|
|
def test_mode(self):
|
|
|
|
df = tm.makeTimeDataFrame()
|
|
|
|
def check(mode):
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
# constructor
|
|
if mode in ['r', 'r+']:
|
|
pytest.raises(IOError, HDFStore, path, mode=mode)
|
|
|
|
else:
|
|
store = HDFStore(path, mode=mode)
|
|
assert store._handle.mode == mode
|
|
store.close()
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
# context
|
|
if mode in ['r', 'r+']:
|
|
def f():
|
|
with HDFStore(path, mode=mode) as store: # noqa
|
|
pass
|
|
pytest.raises(IOError, f)
|
|
else:
|
|
with HDFStore(path, mode=mode) as store:
|
|
assert store._handle.mode == mode
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
# conv write
|
|
if mode in ['r', 'r+']:
|
|
pytest.raises(IOError, df.to_hdf,
|
|
path, 'df', mode=mode)
|
|
df.to_hdf(path, 'df', mode='w')
|
|
else:
|
|
df.to_hdf(path, 'df', mode=mode)
|
|
|
|
# conv read
|
|
if mode in ['w']:
|
|
pytest.raises(ValueError, read_hdf,
|
|
path, 'df', mode=mode)
|
|
else:
|
|
result = read_hdf(path, 'df', mode=mode)
|
|
assert_frame_equal(result, df)
|
|
|
|
def check_default_mode():
|
|
|
|
# read_hdf uses default mode
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', mode='w')
|
|
result = read_hdf(path, 'df')
|
|
assert_frame_equal(result, df)
|
|
|
|
check('r')
|
|
check('r+')
|
|
check('a')
|
|
check('w')
|
|
check_default_mode()
|
|
|
|
def test_reopen_handle(self):
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
store = HDFStore(path, mode='a')
|
|
store['a'] = tm.makeTimeSeries()
|
|
|
|
# invalid mode change
|
|
pytest.raises(PossibleDataLossError, store.open, 'w')
|
|
store.close()
|
|
assert not store.is_open
|
|
|
|
# truncation ok here
|
|
store.open('w')
|
|
assert store.is_open
|
|
assert len(store) == 0
|
|
store.close()
|
|
assert not store.is_open
|
|
|
|
store = HDFStore(path, mode='a')
|
|
store['a'] = tm.makeTimeSeries()
|
|
|
|
# reopen as read
|
|
store.open('r')
|
|
assert store.is_open
|
|
assert len(store) == 1
|
|
assert store._mode == 'r'
|
|
store.close()
|
|
assert not store.is_open
|
|
|
|
# reopen as append
|
|
store.open('a')
|
|
assert store.is_open
|
|
assert len(store) == 1
|
|
assert store._mode == 'a'
|
|
store.close()
|
|
assert not store.is_open
|
|
|
|
# reopen as append (again)
|
|
store.open('a')
|
|
assert store.is_open
|
|
assert len(store) == 1
|
|
assert store._mode == 'a'
|
|
store.close()
|
|
assert not store.is_open
|
|
|
|
def test_open_args(self):
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
df = tm.makeDataFrame()
|
|
|
|
# create an in memory store
|
|
store = HDFStore(path, mode='a', driver='H5FD_CORE',
|
|
driver_core_backing_store=0)
|
|
store['df'] = df
|
|
store.append('df2', df)
|
|
|
|
tm.assert_frame_equal(store['df'], df)
|
|
tm.assert_frame_equal(store['df2'], df)
|
|
|
|
store.close()
|
|
|
|
# the file should not have actually been written
|
|
assert not os.path.exists(path)
|
|
|
|
def test_flush(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store['a'] = tm.makeTimeSeries()
|
|
store.flush()
|
|
store.flush(fsync=True)
|
|
|
|
def test_get(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store['a'] = tm.makeTimeSeries()
|
|
left = store.get('a')
|
|
right = store['a']
|
|
tm.assert_series_equal(left, right)
|
|
|
|
left = store.get('/a')
|
|
right = store['/a']
|
|
tm.assert_series_equal(left, right)
|
|
|
|
pytest.raises(KeyError, store.get, 'b')
|
|
|
|
def test_getattr(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
s = tm.makeTimeSeries()
|
|
store['a'] = s
|
|
|
|
# test attribute access
|
|
result = store.a
|
|
tm.assert_series_equal(result, s)
|
|
result = getattr(store, 'a')
|
|
tm.assert_series_equal(result, s)
|
|
|
|
df = tm.makeTimeDataFrame()
|
|
store['df'] = df
|
|
result = store.df
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# errors
|
|
pytest.raises(AttributeError, getattr, store, 'd')
|
|
|
|
for x in ['mode', 'path', 'handle', 'complib']:
|
|
pytest.raises(AttributeError, getattr, store, x)
|
|
|
|
# not stores
|
|
for x in ['mode', 'path', 'handle', 'complib']:
|
|
getattr(store, "_%s" % x)
|
|
|
|
def test_put(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
ts = tm.makeTimeSeries()
|
|
df = tm.makeTimeDataFrame()
|
|
store['a'] = ts
|
|
store['b'] = df[:10]
|
|
store['foo/bar/bah'] = df[:10]
|
|
store['foo'] = df[:10]
|
|
store['/foo'] = df[:10]
|
|
store.put('c', df[:10], format='table')
|
|
|
|
# not OK, not a table
|
|
pytest.raises(
|
|
ValueError, store.put, 'b', df[10:], append=True)
|
|
|
|
# node does not currently exist, test _is_table_type returns False
|
|
# in this case
|
|
# _maybe_remove(store, 'f')
|
|
# pytest.raises(ValueError, store.put, 'f', df[10:],
|
|
# append=True)
|
|
|
|
# can't put to a table (use append instead)
|
|
pytest.raises(ValueError, store.put, 'c', df[10:], append=True)
|
|
|
|
# overwrite table
|
|
store.put('c', df[:10], format='table', append=False)
|
|
tm.assert_frame_equal(df[:10], store['c'])
|
|
|
|
def test_put_string_index(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
index = Index(
|
|
["I am a very long string index: %s" % i for i in range(20)])
|
|
s = Series(np.arange(20), index=index)
|
|
df = DataFrame({'A': s, 'B': s})
|
|
|
|
store['a'] = s
|
|
tm.assert_series_equal(store['a'], s)
|
|
|
|
store['b'] = df
|
|
tm.assert_frame_equal(store['b'], df)
|
|
|
|
# mixed length
|
|
index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] +
|
|
["I am a very long string index: %s" % i
|
|
for i in range(20)])
|
|
s = Series(np.arange(21), index=index)
|
|
df = DataFrame({'A': s, 'B': s})
|
|
store['a'] = s
|
|
tm.assert_series_equal(store['a'], s)
|
|
|
|
store['b'] = df
|
|
tm.assert_frame_equal(store['b'], df)
|
|
|
|
def test_put_compression(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
df = tm.makeTimeDataFrame()
|
|
|
|
store.put('c', df, format='table', complib='zlib')
|
|
tm.assert_frame_equal(store['c'], df)
|
|
|
|
# can't compress if format='fixed'
|
|
pytest.raises(ValueError, store.put, 'b', df,
|
|
format='fixed', complib='zlib')
|
|
|
|
@td.skip_if_windows_python_3
|
|
def test_put_compression_blosc(self):
|
|
df = tm.makeTimeDataFrame()
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# can't compress if format='fixed'
|
|
pytest.raises(ValueError, store.put, 'b', df,
|
|
format='fixed', complib='blosc')
|
|
|
|
store.put('c', df, format='table', complib='blosc')
|
|
tm.assert_frame_equal(store['c'], df)
|
|
|
|
def test_complibs_default_settings(self):
|
|
# GH15943
|
|
df = tm.makeDataFrame()
|
|
|
|
# Set complevel and check if complib is automatically set to
|
|
# default value
|
|
with ensure_clean_path(self.path) as tmpfile:
|
|
df.to_hdf(tmpfile, 'df', complevel=9)
|
|
result = pd.read_hdf(tmpfile, 'df')
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
with tables.open_file(tmpfile, mode='r') as h5file:
|
|
for node in h5file.walk_nodes(where='/df', classname='Leaf'):
|
|
assert node.filters.complevel == 9
|
|
assert node.filters.complib == 'zlib'
|
|
|
|
# Set complib and check to see if compression is disabled
|
|
with ensure_clean_path(self.path) as tmpfile:
|
|
df.to_hdf(tmpfile, 'df', complib='zlib')
|
|
result = pd.read_hdf(tmpfile, 'df')
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
with tables.open_file(tmpfile, mode='r') as h5file:
|
|
for node in h5file.walk_nodes(where='/df', classname='Leaf'):
|
|
assert node.filters.complevel == 0
|
|
assert node.filters.complib is None
|
|
|
|
# Check if not setting complib or complevel results in no compression
|
|
with ensure_clean_path(self.path) as tmpfile:
|
|
df.to_hdf(tmpfile, 'df')
|
|
result = pd.read_hdf(tmpfile, 'df')
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
with tables.open_file(tmpfile, mode='r') as h5file:
|
|
for node in h5file.walk_nodes(where='/df', classname='Leaf'):
|
|
assert node.filters.complevel == 0
|
|
assert node.filters.complib is None
|
|
|
|
# Check if file-defaults can be overridden on a per table basis
|
|
with ensure_clean_path(self.path) as tmpfile:
|
|
store = pd.HDFStore(tmpfile)
|
|
store.append('dfc', df, complevel=9, complib='blosc')
|
|
store.append('df', df)
|
|
store.close()
|
|
|
|
with tables.open_file(tmpfile, mode='r') as h5file:
|
|
for node in h5file.walk_nodes(where='/df', classname='Leaf'):
|
|
assert node.filters.complevel == 0
|
|
assert node.filters.complib is None
|
|
for node in h5file.walk_nodes(where='/dfc', classname='Leaf'):
|
|
assert node.filters.complevel == 9
|
|
assert node.filters.complib == 'blosc'
|
|
|
|
def test_complibs(self):
|
|
# GH14478
|
|
df = tm.makeDataFrame()
|
|
|
|
# Building list of all complibs and complevels tuples
|
|
all_complibs = tables.filters.all_complibs
|
|
# Remove lzo if its not available on this platform
|
|
if not tables.which_lib_version('lzo'):
|
|
all_complibs.remove('lzo')
|
|
# Remove bzip2 if its not available on this platform
|
|
if not tables.which_lib_version("bzip2"):
|
|
all_complibs.remove("bzip2")
|
|
|
|
all_levels = range(0, 10)
|
|
all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]
|
|
|
|
for (lib, lvl) in all_tests:
|
|
with ensure_clean_path(self.path) as tmpfile:
|
|
gname = 'foo'
|
|
|
|
# Write and read file to see if data is consistent
|
|
df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
|
|
result = pd.read_hdf(tmpfile, gname)
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# Open file and check metadata
|
|
# for correct amount of compression
|
|
h5table = tables.open_file(tmpfile, mode='r')
|
|
for node in h5table.walk_nodes(where='/' + gname,
|
|
classname='Leaf'):
|
|
assert node.filters.complevel == lvl
|
|
if lvl == 0:
|
|
assert node.filters.complib is None
|
|
else:
|
|
assert node.filters.complib == lib
|
|
h5table.close()
|
|
|
|
def test_put_integer(self):
|
|
# non-date, non-string index
|
|
df = DataFrame(np.random.randn(50, 100))
|
|
self._check_roundtrip(df, tm.assert_frame_equal)
|
|
|
|
def test_put_mixed_type(self):
|
|
df = tm.makeTimeDataFrame()
|
|
df['obj1'] = 'foo'
|
|
df['obj2'] = 'bar'
|
|
df['bool1'] = df['A'] > 0
|
|
df['bool2'] = df['B'] > 0
|
|
df['bool3'] = True
|
|
df['int1'] = 1
|
|
df['int2'] = 2
|
|
df['timestamp1'] = Timestamp('20010102')
|
|
df['timestamp2'] = Timestamp('20010103')
|
|
df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
|
|
df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
|
|
df.loc[3:6, ['obj1']] = np.nan
|
|
df = df._consolidate()._convert(datetime=True)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
_maybe_remove(store, 'df')
|
|
|
|
# PerformanceWarning
|
|
with catch_warnings(record=True):
|
|
store.put('df', df)
|
|
|
|
expected = store.get('df')
|
|
tm.assert_frame_equal(expected, df)
|
|
|
|
def test_append(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# this is allowed by almost always don't want to do it
|
|
# tables.NaturalNameWarning):
|
|
with catch_warnings(record=True):
|
|
|
|
df = tm.makeTimeDataFrame()
|
|
_maybe_remove(store, 'df1')
|
|
store.append('df1', df[:10])
|
|
store.append('df1', df[10:])
|
|
tm.assert_frame_equal(store['df1'], df)
|
|
|
|
_maybe_remove(store, 'df2')
|
|
store.put('df2', df[:10], format='table')
|
|
store.append('df2', df[10:])
|
|
tm.assert_frame_equal(store['df2'], df)
|
|
|
|
_maybe_remove(store, 'df3')
|
|
store.append('/df3', df[:10])
|
|
store.append('/df3', df[10:])
|
|
tm.assert_frame_equal(store['df3'], df)
|
|
|
|
# this is allowed by almost always don't want to do it
|
|
# tables.NaturalNameWarning
|
|
_maybe_remove(store, '/df3 foo')
|
|
store.append('/df3 foo', df[:10])
|
|
store.append('/df3 foo', df[10:])
|
|
tm.assert_frame_equal(store['df3 foo'], df)
|
|
|
|
# panel
|
|
wp = tm.makePanel()
|
|
_maybe_remove(store, 'wp1')
|
|
store.append('wp1', wp.iloc[:, :10, :])
|
|
store.append('wp1', wp.iloc[:, 10:, :])
|
|
assert_panel_equal(store['wp1'], wp)
|
|
|
|
# test using differt order of items on the non-index axes
|
|
_maybe_remove(store, 'wp1')
|
|
wp_append1 = wp.iloc[:, :10, :]
|
|
store.append('wp1', wp_append1)
|
|
wp_append2 = wp.iloc[:, 10:, :].reindex(items=wp.items[::-1])
|
|
store.append('wp1', wp_append2)
|
|
assert_panel_equal(store['wp1'], wp)
|
|
|
|
# dtype issues - mizxed type in a single object column
|
|
df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
|
|
df['mixed_column'] = 'testing'
|
|
df.loc[2, 'mixed_column'] = np.nan
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df)
|
|
tm.assert_frame_equal(store['df'], df)
|
|
|
|
# uints - test storage of uints
|
|
uint_data = DataFrame({
|
|
'u08': Series(np.random.randint(0, high=255, size=5),
|
|
dtype=np.uint8),
|
|
'u16': Series(np.random.randint(0, high=65535, size=5),
|
|
dtype=np.uint16),
|
|
'u32': Series(np.random.randint(0, high=2**30, size=5),
|
|
dtype=np.uint32),
|
|
'u64': Series([2**58, 2**59, 2**60, 2**61, 2**62],
|
|
dtype=np.uint64)}, index=np.arange(5))
|
|
_maybe_remove(store, 'uints')
|
|
store.append('uints', uint_data)
|
|
tm.assert_frame_equal(store['uints'], uint_data)
|
|
|
|
# uints - test storage of uints in indexable columns
|
|
_maybe_remove(store, 'uints')
|
|
# 64-bit indices not yet supported
|
|
store.append('uints', uint_data, data_columns=[
|
|
'u08', 'u16', 'u32'])
|
|
tm.assert_frame_equal(store['uints'], uint_data)
|
|
|
|
def test_append_series(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# basic
|
|
ss = tm.makeStringSeries()
|
|
ts = tm.makeTimeSeries()
|
|
ns = Series(np.arange(100))
|
|
|
|
store.append('ss', ss)
|
|
result = store['ss']
|
|
tm.assert_series_equal(result, ss)
|
|
assert result.name is None
|
|
|
|
store.append('ts', ts)
|
|
result = store['ts']
|
|
tm.assert_series_equal(result, ts)
|
|
assert result.name is None
|
|
|
|
ns.name = 'foo'
|
|
store.append('ns', ns)
|
|
result = store['ns']
|
|
tm.assert_series_equal(result, ns)
|
|
assert result.name == ns.name
|
|
|
|
# select on the values
|
|
expected = ns[ns > 60]
|
|
result = store.select('ns', 'foo>60')
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# select on the index and values
|
|
expected = ns[(ns > 70) & (ns.index < 90)]
|
|
result = store.select('ns', 'foo>70 and index<90')
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# multi-index
|
|
mi = DataFrame(np.random.randn(5, 1), columns=['A'])
|
|
mi['B'] = np.arange(len(mi))
|
|
mi['C'] = 'foo'
|
|
mi.loc[3:5, 'C'] = 'bar'
|
|
mi.set_index(['C', 'B'], inplace=True)
|
|
s = mi.stack()
|
|
s.index = s.index.droplevel(2)
|
|
store.append('mi', s)
|
|
tm.assert_series_equal(store['mi'], s)
|
|
|
|
def test_store_index_types(self):
|
|
# GH5386
|
|
# test storing various index types
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
def check(format, index):
|
|
df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
|
|
df.index = index(len(df))
|
|
|
|
_maybe_remove(store, 'df')
|
|
store.put('df', df, format=format)
|
|
assert_frame_equal(df, store['df'])
|
|
|
|
for index in [tm.makeFloatIndex, tm.makeStringIndex,
|
|
tm.makeIntIndex, tm.makeDateIndex]:
|
|
|
|
check('table', index)
|
|
check('fixed', index)
|
|
|
|
# period index currently broken for table
|
|
# seee GH7796 FIXME
|
|
check('fixed', tm.makePeriodIndex)
|
|
# check('table',tm.makePeriodIndex)
|
|
|
|
# unicode
|
|
index = tm.makeUnicodeIndex
|
|
if compat.PY3:
|
|
check('table', index)
|
|
check('fixed', index)
|
|
else:
|
|
|
|
# only support for fixed types (and they have a perf warning)
|
|
pytest.raises(TypeError, check, 'table', index)
|
|
|
|
# PerformanceWarning
|
|
with catch_warnings(record=True):
|
|
check('fixed', index)
|
|
|
|
@pytest.mark.skipif(not is_platform_little_endian(),
|
|
reason="reason platform is not little endian")
|
|
def test_encoding(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
df = DataFrame(dict(A='foo', B='bar'), index=range(5))
|
|
df.loc[2, 'A'] = np.nan
|
|
df.loc[3, 'B'] = np.nan
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, encoding='ascii')
|
|
tm.assert_frame_equal(store['df'], df)
|
|
|
|
expected = df.reindex(columns=['A'])
|
|
result = store.select('df', Term('columns=A', encoding='ascii'))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_latin_encoding(self):
|
|
|
|
if compat.PY2:
|
|
tm.assert_raises_regex(
|
|
TypeError, r'\[unicode\] is not implemented as a table column')
|
|
return
|
|
|
|
values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
|
|
[b'E\xc9, 17', b'a', b'b', b'c'],
|
|
[b'EE, 17', b'', b'a', b'b', b'c'],
|
|
[b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
|
|
[b'', b'a', b'b', b'c'],
|
|
[b'\xf8\xfc', b'a', b'b', b'c'],
|
|
[b'A\xf8\xfc', b'', b'a', b'b', b'c'],
|
|
[np.nan, b'', b'b', b'c'],
|
|
[b'A\xf8\xfc', np.nan, b'', b'b', b'c']]
|
|
|
|
def _try_decode(x, encoding='latin-1'):
|
|
try:
|
|
return x.decode(encoding)
|
|
except AttributeError:
|
|
return x
|
|
# not sure how to remove latin-1 from code in python 2 and 3
|
|
values = [[_try_decode(x) for x in y] for y in values]
|
|
|
|
examples = []
|
|
for dtype in ['category', object]:
|
|
for val in values:
|
|
examples.append(pd.Series(val, dtype=dtype))
|
|
|
|
def roundtrip(s, key='data', encoding='latin-1', nan_rep=''):
|
|
with ensure_clean_path(self.path) as store:
|
|
s.to_hdf(store, key, format='table', encoding=encoding,
|
|
nan_rep=nan_rep)
|
|
retr = read_hdf(store, key)
|
|
s_nan = s.replace(nan_rep, np.nan)
|
|
if is_categorical_dtype(s_nan):
|
|
assert is_categorical_dtype(retr)
|
|
assert_series_equal(s_nan, retr, check_dtype=False,
|
|
check_categorical=False)
|
|
else:
|
|
assert_series_equal(s_nan, retr)
|
|
|
|
for s in examples:
|
|
roundtrip(s)
|
|
|
|
# fails:
|
|
# for x in examples:
|
|
# roundtrip(s, nan_rep=b'\xf8\xfc')
|
|
|
|
def test_append_some_nans(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
df = DataFrame({'A': Series(np.random.randn(20)).astype('int32'),
|
|
'A1': np.random.randn(20),
|
|
'A2': np.random.randn(20),
|
|
'B': 'foo', 'C': 'bar',
|
|
'D': Timestamp("20010101"),
|
|
'E': datetime.datetime(2001, 1, 2, 0, 0)},
|
|
index=np.arange(20))
|
|
# some nans
|
|
_maybe_remove(store, 'df1')
|
|
df.loc[0:15, ['A1', 'B', 'D', 'E']] = np.nan
|
|
store.append('df1', df[:10])
|
|
store.append('df1', df[10:])
|
|
tm.assert_frame_equal(store['df1'], df)
|
|
|
|
# first column
|
|
df1 = df.copy()
|
|
df1.loc[:, 'A1'] = np.nan
|
|
_maybe_remove(store, 'df1')
|
|
store.append('df1', df1[:10])
|
|
store.append('df1', df1[10:])
|
|
tm.assert_frame_equal(store['df1'], df1)
|
|
|
|
# 2nd column
|
|
df2 = df.copy()
|
|
df2.loc[:, 'A2'] = np.nan
|
|
_maybe_remove(store, 'df2')
|
|
store.append('df2', df2[:10])
|
|
store.append('df2', df2[10:])
|
|
tm.assert_frame_equal(store['df2'], df2)
|
|
|
|
# datetimes
|
|
df3 = df.copy()
|
|
df3.loc[:, 'E'] = np.nan
|
|
_maybe_remove(store, 'df3')
|
|
store.append('df3', df3[:10])
|
|
store.append('df3', df3[10:])
|
|
tm.assert_frame_equal(store['df3'], df3)
|
|
|
|
def test_append_all_nans(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df = DataFrame({'A1': np.random.randn(20),
|
|
'A2': np.random.randn(20)},
|
|
index=np.arange(20))
|
|
df.loc[0:15, :] = np.nan
|
|
|
|
# nan some entire rows (dropna=True)
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df[:10], dropna=True)
|
|
store.append('df', df[10:], dropna=True)
|
|
tm.assert_frame_equal(store['df'], df[-4:])
|
|
|
|
# nan some entire rows (dropna=False)
|
|
_maybe_remove(store, 'df2')
|
|
store.append('df2', df[:10], dropna=False)
|
|
store.append('df2', df[10:], dropna=False)
|
|
tm.assert_frame_equal(store['df2'], df)
|
|
|
|
# tests the option io.hdf.dropna_table
|
|
pd.set_option('io.hdf.dropna_table', False)
|
|
_maybe_remove(store, 'df3')
|
|
store.append('df3', df[:10])
|
|
store.append('df3', df[10:])
|
|
tm.assert_frame_equal(store['df3'], df)
|
|
|
|
pd.set_option('io.hdf.dropna_table', True)
|
|
_maybe_remove(store, 'df4')
|
|
store.append('df4', df[:10])
|
|
store.append('df4', df[10:])
|
|
tm.assert_frame_equal(store['df4'], df[-4:])
|
|
|
|
# nan some entire rows (string are still written!)
|
|
df = DataFrame({'A1': np.random.randn(20),
|
|
'A2': np.random.randn(20),
|
|
'B': 'foo', 'C': 'bar'},
|
|
index=np.arange(20))
|
|
|
|
df.loc[0:15, :] = np.nan
|
|
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df[:10], dropna=True)
|
|
store.append('df', df[10:], dropna=True)
|
|
tm.assert_frame_equal(store['df'], df)
|
|
|
|
_maybe_remove(store, 'df2')
|
|
store.append('df2', df[:10], dropna=False)
|
|
store.append('df2', df[10:], dropna=False)
|
|
tm.assert_frame_equal(store['df2'], df)
|
|
|
|
# nan some entire rows (but since we have dates they are still
|
|
# written!)
|
|
df = DataFrame({'A1': np.random.randn(20),
|
|
'A2': np.random.randn(20),
|
|
'B': 'foo', 'C': 'bar',
|
|
'D': Timestamp("20010101"),
|
|
'E': datetime.datetime(2001, 1, 2, 0, 0)},
|
|
index=np.arange(20))
|
|
|
|
df.loc[0:15, :] = np.nan
|
|
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df[:10], dropna=True)
|
|
store.append('df', df[10:], dropna=True)
|
|
tm.assert_frame_equal(store['df'], df)
|
|
|
|
_maybe_remove(store, 'df2')
|
|
store.append('df2', df[:10], dropna=False)
|
|
store.append('df2', df[10:], dropna=False)
|
|
tm.assert_frame_equal(store['df2'], df)
|
|
|
|
# Test to make sure defaults are to not drop.
|
|
# Corresponding to Issue 9382
|
|
df_with_missing = DataFrame(
|
|
{'col1': [0, np.nan, 2], 'col2': [1, np.nan, np.nan]})
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
df_with_missing.to_hdf(path, 'df_with_missing', format='table')
|
|
reloaded = read_hdf(path, 'df_with_missing')
|
|
tm.assert_frame_equal(df_with_missing, reloaded)
|
|
|
|
matrix = [[[np.nan, np.nan, np.nan], [1, np.nan, np.nan]],
|
|
[[np.nan, np.nan, np.nan], [np.nan, 5, 6]],
|
|
[[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]]
|
|
|
|
with catch_warnings(record=True):
|
|
panel_with_missing = Panel(matrix,
|
|
items=['Item1', 'Item2', 'Item3'],
|
|
major_axis=[1, 2],
|
|
minor_axis=['A', 'B', 'C'])
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
panel_with_missing.to_hdf(
|
|
path, 'panel_with_missing', format='table')
|
|
reloaded_panel = read_hdf(path, 'panel_with_missing')
|
|
tm.assert_panel_equal(panel_with_missing, reloaded_panel)
|
|
|
|
def test_append_frame_column_oriented(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# column oriented
|
|
df = tm.makeTimeDataFrame()
|
|
_maybe_remove(store, 'df1')
|
|
store.append('df1', df.iloc[:, :2], axes=['columns'])
|
|
store.append('df1', df.iloc[:, 2:])
|
|
tm.assert_frame_equal(store['df1'], df)
|
|
|
|
result = store.select('df1', 'columns=A')
|
|
expected = df.reindex(columns=['A'])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# selection on the non-indexable
|
|
result = store.select(
|
|
'df1', ('columns=A', 'index=df.index[0:4]'))
|
|
expected = df.reindex(columns=['A'], index=df.index[0:4])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# this isn't supported
|
|
with pytest.raises(TypeError):
|
|
store.select('df1',
|
|
'columns=A and index>df.index[4]')
|
|
|
|
def test_append_with_different_block_ordering(self):
|
|
|
|
# GH 4096; using same frames, but different block orderings
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
for i in range(10):
|
|
|
|
df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
|
|
df['index'] = range(10)
|
|
df['index'] += i * 10
|
|
df['int64'] = Series([1] * len(df), dtype='int64')
|
|
df['int16'] = Series([1] * len(df), dtype='int16')
|
|
|
|
if i % 2 == 0:
|
|
del df['int64']
|
|
df['int64'] = Series([1] * len(df), dtype='int64')
|
|
if i % 3 == 0:
|
|
a = df.pop('A')
|
|
df['A'] = a
|
|
|
|
df.set_index('index', inplace=True)
|
|
|
|
store.append('df', df)
|
|
|
|
# test a different ordering but with more fields (like invalid
|
|
# combinate)
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df = DataFrame(np.random.randn(10, 2),
|
|
columns=list('AB'), dtype='float64')
|
|
df['int64'] = Series([1] * len(df), dtype='int64')
|
|
df['int16'] = Series([1] * len(df), dtype='int16')
|
|
store.append('df', df)
|
|
|
|
# store additional fields in different blocks
|
|
df['int16_2'] = Series([1] * len(df), dtype='int16')
|
|
pytest.raises(ValueError, store.append, 'df', df)
|
|
|
|
# store multile additional fields in different blocks
|
|
df['float_3'] = Series([1.] * len(df), dtype='float64')
|
|
pytest.raises(ValueError, store.append, 'df', df)
|
|
|
|
def test_append_with_strings(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
with catch_warnings(record=True):
|
|
wp = tm.makePanel()
|
|
wp2 = wp.rename_axis(
|
|
{x: "%s_extra" % x for x in wp.minor_axis}, axis=2)
|
|
|
|
def check_col(key, name, size):
|
|
assert getattr(store.get_storer(key)
|
|
.table.description, name).itemsize == size
|
|
|
|
store.append('s1', wp, min_itemsize=20)
|
|
store.append('s1', wp2)
|
|
expected = concat([wp, wp2], axis=2)
|
|
expected = expected.reindex(
|
|
minor_axis=sorted(expected.minor_axis))
|
|
assert_panel_equal(store['s1'], expected)
|
|
check_col('s1', 'minor_axis', 20)
|
|
|
|
# test dict format
|
|
store.append('s2', wp, min_itemsize={'minor_axis': 20})
|
|
store.append('s2', wp2)
|
|
expected = concat([wp, wp2], axis=2)
|
|
expected = expected.reindex(
|
|
minor_axis=sorted(expected.minor_axis))
|
|
assert_panel_equal(store['s2'], expected)
|
|
check_col('s2', 'minor_axis', 20)
|
|
|
|
# apply the wrong field (similar to #1)
|
|
store.append('s3', wp, min_itemsize={'major_axis': 20})
|
|
pytest.raises(ValueError, store.append, 's3', wp2)
|
|
|
|
# test truncation of bigger strings
|
|
store.append('s4', wp)
|
|
pytest.raises(ValueError, store.append, 's4', wp2)
|
|
|
|
# avoid truncation on elements
|
|
df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
|
|
store.append('df_big', df)
|
|
tm.assert_frame_equal(store.select('df_big'), df)
|
|
check_col('df_big', 'values_block_1', 15)
|
|
|
|
# appending smaller string ok
|
|
df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']])
|
|
store.append('df_big', df2)
|
|
expected = concat([df, df2])
|
|
tm.assert_frame_equal(store.select('df_big'), expected)
|
|
check_col('df_big', 'values_block_1', 15)
|
|
|
|
# avoid truncation on elements
|
|
df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
|
|
store.append('df_big2', df, min_itemsize={'values': 50})
|
|
tm.assert_frame_equal(store.select('df_big2'), df)
|
|
check_col('df_big2', 'values_block_1', 50)
|
|
|
|
# bigger string on next append
|
|
store.append('df_new', df)
|
|
df_new = DataFrame(
|
|
[[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']])
|
|
pytest.raises(ValueError, store.append, 'df_new', df_new)
|
|
|
|
# min_itemsize on Series index (GH 11412)
|
|
df = tm.makeMixedDataFrame().set_index('C')
|
|
store.append('ss', df['B'], min_itemsize={'index': 4})
|
|
tm.assert_series_equal(store.select('ss'), df['B'])
|
|
|
|
# same as above, with data_columns=True
|
|
store.append('ss2', df['B'], data_columns=True,
|
|
min_itemsize={'index': 4})
|
|
tm.assert_series_equal(store.select('ss2'), df['B'])
|
|
|
|
# min_itemsize in index without appending (GH 10381)
|
|
store.put('ss3', df, format='table',
|
|
min_itemsize={'index': 6})
|
|
# just make sure there is a longer string:
|
|
df2 = df.copy().reset_index().assign(C='longer').set_index('C')
|
|
store.append('ss3', df2)
|
|
tm.assert_frame_equal(store.select('ss3'),
|
|
pd.concat([df, df2]))
|
|
|
|
# same as above, with a Series
|
|
store.put('ss4', df['B'], format='table',
|
|
min_itemsize={'index': 6})
|
|
store.append('ss4', df2['B'])
|
|
tm.assert_series_equal(store.select('ss4'),
|
|
pd.concat([df['B'], df2['B']]))
|
|
|
|
# with nans
|
|
_maybe_remove(store, 'df')
|
|
df = tm.makeTimeDataFrame()
|
|
df['string'] = 'foo'
|
|
df.loc[1:4, 'string'] = np.nan
|
|
df['string2'] = 'bar'
|
|
df.loc[4:8, 'string2'] = np.nan
|
|
df['string3'] = 'bah'
|
|
df.loc[1:, 'string3'] = np.nan
|
|
store.append('df', df)
|
|
result = store.select('df')
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
def check_col(key, name, size):
|
|
assert getattr(store.get_storer(key)
|
|
.table.description, name).itemsize, size
|
|
|
|
df = DataFrame(dict(A='foo', B='bar'), index=range(10))
|
|
|
|
# a min_itemsize that creates a data_column
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, min_itemsize={'A': 200})
|
|
check_col('df', 'A', 200)
|
|
assert store.get_storer('df').data_columns == ['A']
|
|
|
|
# a min_itemsize that creates a data_column2
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, data_columns=['B'], min_itemsize={'A': 200})
|
|
check_col('df', 'A', 200)
|
|
assert store.get_storer('df').data_columns == ['B', 'A']
|
|
|
|
# a min_itemsize that creates a data_column2
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, data_columns=[
|
|
'B'], min_itemsize={'values': 200})
|
|
check_col('df', 'B', 200)
|
|
check_col('df', 'values_block_0', 200)
|
|
assert store.get_storer('df').data_columns == ['B']
|
|
|
|
# infer the .typ on subsequent appends
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df[:5], min_itemsize=200)
|
|
store.append('df', df[5:], min_itemsize=200)
|
|
tm.assert_frame_equal(store['df'], df)
|
|
|
|
# invalid min_itemsize keys
|
|
df = DataFrame(['foo', 'foo', 'foo', 'barh',
|
|
'barh', 'barh'], columns=['A'])
|
|
_maybe_remove(store, 'df')
|
|
pytest.raises(ValueError, store.append, 'df',
|
|
df, min_itemsize={'foo': 20, 'foobar': 20})
|
|
|
|
def test_to_hdf_with_min_itemsize(self):
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
# min_itemsize in index with to_hdf (GH 10381)
|
|
df = tm.makeMixedDataFrame().set_index('C')
|
|
df.to_hdf(path, 'ss3', format='table', min_itemsize={'index': 6})
|
|
# just make sure there is a longer string:
|
|
df2 = df.copy().reset_index().assign(C='longer').set_index('C')
|
|
df2.to_hdf(path, 'ss3', append=True, format='table')
|
|
tm.assert_frame_equal(pd.read_hdf(path, 'ss3'),
|
|
pd.concat([df, df2]))
|
|
|
|
# same as above, with a Series
|
|
df['B'].to_hdf(path, 'ss4', format='table',
|
|
min_itemsize={'index': 6})
|
|
df2['B'].to_hdf(path, 'ss4', append=True, format='table')
|
|
tm.assert_series_equal(pd.read_hdf(path, 'ss4'),
|
|
pd.concat([df['B'], df2['B']]))
|
|
|
|
@pytest.mark.parametrize("format", ['fixed', 'table'])
|
|
def test_to_hdf_errors(self, format):
|
|
|
|
data = ['\ud800foo']
|
|
ser = pd.Series(data, index=pd.Index(data))
|
|
with ensure_clean_path(self.path) as path:
|
|
# GH 20835
|
|
ser.to_hdf(path, 'table', format=format, errors='surrogatepass')
|
|
|
|
result = pd.read_hdf(path, 'table', errors='surrogatepass')
|
|
tm.assert_series_equal(result, ser)
|
|
|
|
def test_append_with_data_columns(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
df = tm.makeTimeDataFrame()
|
|
df.iloc[0, df.columns.get_loc('B')] = 1.
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df[:2], data_columns=['B'])
|
|
store.append('df', df[2:])
|
|
tm.assert_frame_equal(store['df'], df)
|
|
|
|
# check that we have indicies created
|
|
assert(store._handle.root.df.table.cols.index.is_indexed is True)
|
|
assert(store._handle.root.df.table.cols.B.is_indexed is True)
|
|
|
|
# data column searching
|
|
result = store.select('df', 'B>0')
|
|
expected = df[df.B > 0]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# data column searching (with an indexable and a data_columns)
|
|
result = store.select(
|
|
'df', 'B>0 and index>df.index[3]')
|
|
df_new = df.reindex(index=df.index[4:])
|
|
expected = df_new[df_new.B > 0]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# data column selection with a string data_column
|
|
df_new = df.copy()
|
|
df_new['string'] = 'foo'
|
|
df_new.loc[1:4, 'string'] = np.nan
|
|
df_new.loc[5:6, 'string'] = 'bar'
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df_new, data_columns=['string'])
|
|
result = store.select('df', "string='foo'")
|
|
expected = df_new[df_new.string == 'foo']
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# using min_itemsize and a data column
|
|
def check_col(key, name, size):
|
|
assert getattr(store.get_storer(key)
|
|
.table.description, name).itemsize == size
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df_new, data_columns=['string'],
|
|
min_itemsize={'string': 30})
|
|
check_col('df', 'string', 30)
|
|
_maybe_remove(store, 'df')
|
|
store.append(
|
|
'df', df_new, data_columns=['string'], min_itemsize=30)
|
|
check_col('df', 'string', 30)
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df_new, data_columns=['string'],
|
|
min_itemsize={'values': 30})
|
|
check_col('df', 'string', 30)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
df_new['string2'] = 'foobarbah'
|
|
df_new['string_block1'] = 'foobarbah1'
|
|
df_new['string_block2'] = 'foobarbah2'
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df_new, data_columns=['string', 'string2'],
|
|
min_itemsize={'string': 30, 'string2': 40,
|
|
'values': 50})
|
|
check_col('df', 'string', 30)
|
|
check_col('df', 'string2', 40)
|
|
check_col('df', 'values_block_1', 50)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
# multiple data columns
|
|
df_new = df.copy()
|
|
df_new.iloc[0, df_new.columns.get_loc('A')] = 1.
|
|
df_new.iloc[0, df_new.columns.get_loc('B')] = -1.
|
|
df_new['string'] = 'foo'
|
|
|
|
sl = df_new.columns.get_loc('string')
|
|
df_new.iloc[1:4, sl] = np.nan
|
|
df_new.iloc[5:6, sl] = 'bar'
|
|
|
|
df_new['string2'] = 'foo'
|
|
sl = df_new.columns.get_loc('string2')
|
|
df_new.iloc[2:5, sl] = np.nan
|
|
df_new.iloc[7:8, sl] = 'bar'
|
|
_maybe_remove(store, 'df')
|
|
store.append(
|
|
'df', df_new, data_columns=['A', 'B', 'string', 'string2'])
|
|
result = store.select('df',
|
|
"string='foo' and string2='foo'"
|
|
" and A>0 and B<0")
|
|
expected = df_new[(df_new.string == 'foo') & (
|
|
df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)]
|
|
tm.assert_frame_equal(result, expected, check_index_type=False)
|
|
|
|
# yield an empty frame
|
|
result = store.select('df', "string='foo' and string2='cool'")
|
|
expected = df_new[(df_new.string == 'foo') & (
|
|
df_new.string2 == 'cool')]
|
|
tm.assert_frame_equal(result, expected, check_index_type=False)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
# doc example
|
|
df_dc = df.copy()
|
|
df_dc['string'] = 'foo'
|
|
df_dc.loc[4:6, 'string'] = np.nan
|
|
df_dc.loc[7:9, 'string'] = 'bar'
|
|
df_dc['string2'] = 'cool'
|
|
df_dc['datetime'] = Timestamp('20010102')
|
|
df_dc = df_dc._convert(datetime=True)
|
|
df_dc.loc[3:5, ['A', 'B', 'datetime']] = np.nan
|
|
|
|
_maybe_remove(store, 'df_dc')
|
|
store.append('df_dc', df_dc,
|
|
data_columns=['B', 'C', 'string',
|
|
'string2', 'datetime'])
|
|
result = store.select('df_dc', 'B>0')
|
|
|
|
expected = df_dc[df_dc.B > 0]
|
|
tm.assert_frame_equal(result, expected, check_index_type=False)
|
|
|
|
result = store.select(
|
|
'df_dc', ['B > 0', 'C > 0', 'string == foo'])
|
|
expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (
|
|
df_dc.string == 'foo')]
|
|
tm.assert_frame_equal(result, expected, check_index_type=False)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
# doc example part 2
|
|
np.random.seed(1234)
|
|
index = date_range('1/1/2000', periods=8)
|
|
df_dc = DataFrame(np.random.randn(8, 3), index=index,
|
|
columns=['A', 'B', 'C'])
|
|
df_dc['string'] = 'foo'
|
|
df_dc.loc[4:6, 'string'] = np.nan
|
|
df_dc.loc[7:9, 'string'] = 'bar'
|
|
df_dc.loc[:, ['B', 'C']] = df_dc.loc[:, ['B', 'C']].abs()
|
|
df_dc['string2'] = 'cool'
|
|
|
|
# on-disk operations
|
|
store.append('df_dc', df_dc, data_columns=[
|
|
'B', 'C', 'string', 'string2'])
|
|
|
|
result = store.select('df_dc', 'B>0')
|
|
expected = df_dc[df_dc.B > 0]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = store.select(
|
|
'df_dc', ['B > 0', 'C > 0', 'string == "foo"'])
|
|
expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) &
|
|
(df_dc.string == 'foo')]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
with catch_warnings(record=True):
|
|
# panel
|
|
# GH5717 not handling data_columns
|
|
np.random.seed(1234)
|
|
p = tm.makePanel()
|
|
|
|
store.append('p1', p)
|
|
tm.assert_panel_equal(store.select('p1'), p)
|
|
|
|
store.append('p2', p, data_columns=True)
|
|
tm.assert_panel_equal(store.select('p2'), p)
|
|
|
|
result = store.select('p2', where='ItemA>0')
|
|
expected = p.to_frame()
|
|
expected = expected[expected['ItemA'] > 0]
|
|
tm.assert_frame_equal(result.to_frame(), expected)
|
|
|
|
result = store.select(
|
|
'p2', where='ItemA>0 & minor_axis=["A","B"]')
|
|
expected = p.to_frame()
|
|
expected = expected[expected['ItemA'] > 0]
|
|
expected = expected[expected.reset_index(
|
|
level=['major']).index.isin(['A', 'B'])]
|
|
tm.assert_frame_equal(result.to_frame(), expected)
|
|
|
|
def test_create_table_index(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with catch_warnings(record=True):
|
|
def col(t, column):
|
|
return getattr(store.get_storer(t).table.cols, column)
|
|
|
|
# index=False
|
|
wp = tm.makePanel()
|
|
store.append('p5', wp, index=False)
|
|
store.create_table_index('p5', columns=['major_axis'])
|
|
assert(col('p5', 'major_axis').is_indexed is True)
|
|
assert(col('p5', 'minor_axis').is_indexed is False)
|
|
|
|
# index=True
|
|
store.append('p5i', wp, index=True)
|
|
assert(col('p5i', 'major_axis').is_indexed is True)
|
|
assert(col('p5i', 'minor_axis').is_indexed is True)
|
|
|
|
# default optlevels
|
|
store.get_storer('p5').create_index()
|
|
assert(col('p5', 'major_axis').index.optlevel == 6)
|
|
assert(col('p5', 'minor_axis').index.kind == 'medium')
|
|
|
|
# let's change the indexing scheme
|
|
store.create_table_index('p5')
|
|
assert(col('p5', 'major_axis').index.optlevel == 6)
|
|
assert(col('p5', 'minor_axis').index.kind == 'medium')
|
|
store.create_table_index('p5', optlevel=9)
|
|
assert(col('p5', 'major_axis').index.optlevel == 9)
|
|
assert(col('p5', 'minor_axis').index.kind == 'medium')
|
|
store.create_table_index('p5', kind='full')
|
|
assert(col('p5', 'major_axis').index.optlevel == 9)
|
|
assert(col('p5', 'minor_axis').index.kind == 'full')
|
|
store.create_table_index('p5', optlevel=1, kind='light')
|
|
assert(col('p5', 'major_axis').index.optlevel == 1)
|
|
assert(col('p5', 'minor_axis').index.kind == 'light')
|
|
|
|
# data columns
|
|
df = tm.makeTimeDataFrame()
|
|
df['string'] = 'foo'
|
|
df['string2'] = 'bar'
|
|
store.append('f', df, data_columns=['string', 'string2'])
|
|
assert(col('f', 'index').is_indexed is True)
|
|
assert(col('f', 'string').is_indexed is True)
|
|
assert(col('f', 'string2').is_indexed is True)
|
|
|
|
# specify index=columns
|
|
store.append(
|
|
'f2', df, index=['string'],
|
|
data_columns=['string', 'string2'])
|
|
assert(col('f2', 'index').is_indexed is False)
|
|
assert(col('f2', 'string').is_indexed is True)
|
|
assert(col('f2', 'string2').is_indexed is False)
|
|
|
|
# try to index a non-table
|
|
_maybe_remove(store, 'f2')
|
|
store.put('f2', df)
|
|
pytest.raises(TypeError, store.create_table_index, 'f2')
|
|
|
|
def test_append_diff_item_order(self):
|
|
|
|
with catch_warnings(record=True):
|
|
wp = tm.makePanel()
|
|
wp1 = wp.iloc[:, :10, :]
|
|
wp2 = wp.iloc[wp.items.get_indexer(['ItemC', 'ItemB', 'ItemA']),
|
|
10:, :]
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('panel', wp1, format='table')
|
|
pytest.raises(ValueError, store.put, 'panel', wp2,
|
|
append=True)
|
|
|
|
def test_append_hierarchical(self):
|
|
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
|
|
['one', 'two', 'three']],
|
|
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
|
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
|
names=['foo', 'bar'])
|
|
df = DataFrame(np.random.randn(10, 3), index=index,
|
|
columns=['A', 'B', 'C'])
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('mi', df)
|
|
result = store.select('mi')
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# GH 3748
|
|
result = store.select('mi', columns=['A', 'B'])
|
|
expected = df.reindex(columns=['A', 'B'])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
with ensure_clean_path('test.hdf') as path:
|
|
df.to_hdf(path, 'df', format='table')
|
|
result = read_hdf(path, 'df', columns=['A', 'B'])
|
|
expected = df.reindex(columns=['A', 'B'])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_column_multiindex(self):
|
|
# GH 4710
|
|
# recreate multi-indexes properly
|
|
|
|
index = MultiIndex.from_tuples([('A', 'a'), ('A', 'b'),
|
|
('B', 'a'), ('B', 'b')],
|
|
names=['first', 'second'])
|
|
df = DataFrame(np.arange(12).reshape(3, 4), columns=index)
|
|
expected = df.copy()
|
|
if isinstance(expected.index, RangeIndex):
|
|
expected.index = Int64Index(expected.index)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
store.put('df', df)
|
|
tm.assert_frame_equal(store['df'], expected,
|
|
check_index_type=True,
|
|
check_column_type=True)
|
|
|
|
store.put('df1', df, format='table')
|
|
tm.assert_frame_equal(store['df1'], expected,
|
|
check_index_type=True,
|
|
check_column_type=True)
|
|
|
|
pytest.raises(ValueError, store.put, 'df2', df,
|
|
format='table', data_columns=['A'])
|
|
pytest.raises(ValueError, store.put, 'df3', df,
|
|
format='table', data_columns=True)
|
|
|
|
# appending multi-column on existing table (see GH 6167)
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('df2', df)
|
|
store.append('df2', df)
|
|
|
|
tm.assert_frame_equal(store['df2'], concat((df, df)))
|
|
|
|
# non_index_axes name
|
|
df = DataFrame(np.arange(12).reshape(3, 4),
|
|
columns=Index(list('ABCD'), name='foo'))
|
|
expected = df.copy()
|
|
if isinstance(expected.index, RangeIndex):
|
|
expected.index = Int64Index(expected.index)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
store.put('df1', df, format='table')
|
|
tm.assert_frame_equal(store['df1'], expected,
|
|
check_index_type=True,
|
|
check_column_type=True)
|
|
|
|
def test_store_multiindex(self):
|
|
|
|
# validate multi-index names
|
|
# GH 5527
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
def make_index(names=None):
|
|
return MultiIndex.from_tuples([(datetime.datetime(2013, 12, d),
|
|
s, t)
|
|
for d in range(1, 3)
|
|
for s in range(2)
|
|
for t in range(3)],
|
|
names=names)
|
|
|
|
# no names
|
|
_maybe_remove(store, 'df')
|
|
df = DataFrame(np.zeros((12, 2)), columns=[
|
|
'a', 'b'], index=make_index())
|
|
store.append('df', df)
|
|
tm.assert_frame_equal(store.select('df'), df)
|
|
|
|
# partial names
|
|
_maybe_remove(store, 'df')
|
|
df = DataFrame(np.zeros((12, 2)), columns=[
|
|
'a', 'b'], index=make_index(['date', None, None]))
|
|
store.append('df', df)
|
|
tm.assert_frame_equal(store.select('df'), df)
|
|
|
|
# series
|
|
_maybe_remove(store, 's')
|
|
s = Series(np.zeros(12), index=make_index(['date', None, None]))
|
|
store.append('s', s)
|
|
xp = Series(np.zeros(12), index=make_index(
|
|
['date', 'level_1', 'level_2']))
|
|
tm.assert_series_equal(store.select('s'), xp)
|
|
|
|
# dup with column
|
|
_maybe_remove(store, 'df')
|
|
df = DataFrame(np.zeros((12, 2)), columns=[
|
|
'a', 'b'], index=make_index(['date', 'a', 't']))
|
|
pytest.raises(ValueError, store.append, 'df', df)
|
|
|
|
# dup within level
|
|
_maybe_remove(store, 'df')
|
|
df = DataFrame(np.zeros((12, 2)), columns=['a', 'b'],
|
|
index=make_index(['date', 'date', 'date']))
|
|
pytest.raises(ValueError, store.append, 'df', df)
|
|
|
|
# fully names
|
|
_maybe_remove(store, 'df')
|
|
df = DataFrame(np.zeros((12, 2)), columns=[
|
|
'a', 'b'], index=make_index(['date', 's', 't']))
|
|
store.append('df', df)
|
|
tm.assert_frame_equal(store.select('df'), df)
|
|
|
|
def test_select_columns_in_where(self):
|
|
|
|
# GH 6169
|
|
# recreate multi-indexes when columns is passed
|
|
# in the `where` argument
|
|
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
|
|
['one', 'two', 'three']],
|
|
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
|
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
|
names=['foo_name', 'bar_name'])
|
|
|
|
# With a DataFrame
|
|
df = DataFrame(np.random.randn(10, 3), index=index,
|
|
columns=['A', 'B', 'C'])
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('df', df, format='table')
|
|
expected = df[['A']]
|
|
|
|
tm.assert_frame_equal(store.select('df', columns=['A']), expected)
|
|
|
|
tm.assert_frame_equal(store.select(
|
|
'df', where="columns=['A']"), expected)
|
|
|
|
# With a Series
|
|
s = Series(np.random.randn(10), index=index,
|
|
name='A')
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('s', s, format='table')
|
|
tm.assert_series_equal(store.select('s', where="columns=['A']"), s)
|
|
|
|
def test_mi_data_columns(self):
|
|
# GH 14435
|
|
idx = pd.MultiIndex.from_arrays([date_range('2000-01-01', periods=5),
|
|
range(5)], names=['date', 'id'])
|
|
df = pd.DataFrame({'a': [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('df', df, data_columns=True)
|
|
|
|
actual = store.select('df', where='id == 1')
|
|
expected = df.iloc[[1], :]
|
|
tm.assert_frame_equal(actual, expected)
|
|
|
|
def test_pass_spec_to_storer(self):
|
|
|
|
df = tm.makeDataFrame()
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('df', df)
|
|
pytest.raises(TypeError, store.select, 'df', columns=['A'])
|
|
pytest.raises(TypeError, store.select,
|
|
'df', where=[('columns=A')])
|
|
|
|
def test_append_misc(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
df = tm.makeDataFrame()
|
|
store.append('df', df, chunksize=1)
|
|
result = store.select('df')
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
store.append('df1', df, expectedrows=10)
|
|
result = store.select('df1')
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# more chunksize in append tests
|
|
def check(obj, comparator):
|
|
for c in [10, 200, 1000]:
|
|
with ensure_clean_store(self.path, mode='w') as store:
|
|
store.append('obj', obj, chunksize=c)
|
|
result = store.select('obj')
|
|
comparator(result, obj)
|
|
|
|
df = tm.makeDataFrame()
|
|
df['string'] = 'foo'
|
|
df['float322'] = 1.
|
|
df['float322'] = df['float322'].astype('float32')
|
|
df['bool'] = df['float322'] > 0
|
|
df['time1'] = Timestamp('20130101')
|
|
df['time2'] = Timestamp('20130102')
|
|
check(df, tm.assert_frame_equal)
|
|
|
|
with catch_warnings(record=True):
|
|
p = tm.makePanel()
|
|
check(p, assert_panel_equal)
|
|
|
|
# empty frame, GH4273
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# 0 len
|
|
df_empty = DataFrame(columns=list('ABC'))
|
|
store.append('df', df_empty)
|
|
pytest.raises(KeyError, store.select, 'df')
|
|
|
|
# repeated append of 0/non-zero frames
|
|
df = DataFrame(np.random.rand(10, 3), columns=list('ABC'))
|
|
store.append('df', df)
|
|
assert_frame_equal(store.select('df'), df)
|
|
store.append('df', df_empty)
|
|
assert_frame_equal(store.select('df'), df)
|
|
|
|
# store
|
|
df = DataFrame(columns=list('ABC'))
|
|
store.put('df2', df)
|
|
assert_frame_equal(store.select('df2'), df)
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
# 0 len
|
|
p_empty = Panel(items=list('ABC'))
|
|
store.append('p', p_empty)
|
|
pytest.raises(KeyError, store.select, 'p')
|
|
|
|
# repeated append of 0/non-zero frames
|
|
p = Panel(np.random.randn(3, 4, 5), items=list('ABC'))
|
|
store.append('p', p)
|
|
assert_panel_equal(store.select('p'), p)
|
|
store.append('p', p_empty)
|
|
assert_panel_equal(store.select('p'), p)
|
|
|
|
# store
|
|
store.put('p2', p_empty)
|
|
assert_panel_equal(store.select('p2'), p_empty)
|
|
|
|
def test_append_raise(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# test append with invalid input to get good error messages
|
|
|
|
# list in column
|
|
df = tm.makeDataFrame()
|
|
df['invalid'] = [['a']] * len(df)
|
|
assert df.dtypes['invalid'] == np.object_
|
|
pytest.raises(TypeError, store.append, 'df', df)
|
|
|
|
# multiple invalid columns
|
|
df['invalid2'] = [['a']] * len(df)
|
|
df['invalid3'] = [['a']] * len(df)
|
|
pytest.raises(TypeError, store.append, 'df', df)
|
|
|
|
# datetime with embedded nans as object
|
|
df = tm.makeDataFrame()
|
|
s = Series(datetime.datetime(2001, 1, 2), index=df.index)
|
|
s = s.astype(object)
|
|
s[0:5] = np.nan
|
|
df['invalid'] = s
|
|
assert df.dtypes['invalid'] == np.object_
|
|
pytest.raises(TypeError, store.append, 'df', df)
|
|
|
|
# directly ndarray
|
|
pytest.raises(TypeError, store.append, 'df', np.arange(10))
|
|
|
|
# series directly
|
|
pytest.raises(TypeError, store.append,
|
|
'df', Series(np.arange(10)))
|
|
|
|
# appending an incompatible table
|
|
df = tm.makeDataFrame()
|
|
store.append('df', df)
|
|
|
|
df['foo'] = 'foo'
|
|
pytest.raises(ValueError, store.append, 'df', df)
|
|
|
|
def test_table_index_incompatible_dtypes(self):
|
|
df1 = DataFrame({'a': [1, 2, 3]})
|
|
df2 = DataFrame({'a': [4, 5, 6]},
|
|
index=date_range('1/1/2000', periods=3))
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('frame', df1, format='table')
|
|
pytest.raises(TypeError, store.put, 'frame', df2,
|
|
format='table', append=True)
|
|
|
|
def test_table_values_dtypes_roundtrip(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8')
|
|
store.append('df_f8', df1)
|
|
assert_series_equal(df1.dtypes, store['df_f8'].dtypes)
|
|
|
|
df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8')
|
|
store.append('df_i8', df2)
|
|
assert_series_equal(df2.dtypes, store['df_i8'].dtypes)
|
|
|
|
# incompatible dtype
|
|
pytest.raises(ValueError, store.append, 'df_i8', df1)
|
|
|
|
# check creation/storage/retrieval of float32 (a bit hacky to
|
|
# actually create them thought)
|
|
df1 = DataFrame(
|
|
np.array([[1], [2], [3]], dtype='f4'), columns=['A'])
|
|
store.append('df_f4', df1)
|
|
assert_series_equal(df1.dtypes, store['df_f4'].dtypes)
|
|
assert df1.dtypes[0] == 'float32'
|
|
|
|
# check with mixed dtypes
|
|
df1 = DataFrame(dict((c, Series(np.random.randn(5), dtype=c))
|
|
for c in ['float32', 'float64', 'int32',
|
|
'int64', 'int16', 'int8']))
|
|
df1['string'] = 'foo'
|
|
df1['float322'] = 1.
|
|
df1['float322'] = df1['float322'].astype('float32')
|
|
df1['bool'] = df1['float32'] > 0
|
|
df1['time1'] = Timestamp('20130101')
|
|
df1['time2'] = Timestamp('20130102')
|
|
|
|
store.append('df_mixed_dtypes1', df1)
|
|
result = store.select('df_mixed_dtypes1').get_dtype_counts()
|
|
expected = Series({'float32': 2, 'float64': 1, 'int32': 1,
|
|
'bool': 1, 'int16': 1, 'int8': 1,
|
|
'int64': 1, 'object': 1, 'datetime64[ns]': 2})
|
|
result = result.sort_index()
|
|
expected = expected.sort_index()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_table_mixed_dtypes(self):
|
|
|
|
# frame
|
|
df = tm.makeDataFrame()
|
|
df['obj1'] = 'foo'
|
|
df['obj2'] = 'bar'
|
|
df['bool1'] = df['A'] > 0
|
|
df['bool2'] = df['B'] > 0
|
|
df['bool3'] = True
|
|
df['int1'] = 1
|
|
df['int2'] = 2
|
|
df['timestamp1'] = Timestamp('20010102')
|
|
df['timestamp2'] = Timestamp('20010103')
|
|
df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
|
|
df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
|
|
df.loc[3:6, ['obj1']] = np.nan
|
|
df = df._consolidate()._convert(datetime=True)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('df1_mixed', df)
|
|
tm.assert_frame_equal(store.select('df1_mixed'), df)
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
# panel
|
|
wp = tm.makePanel()
|
|
wp['obj1'] = 'foo'
|
|
wp['obj2'] = 'bar'
|
|
wp['bool1'] = wp['ItemA'] > 0
|
|
wp['bool2'] = wp['ItemB'] > 0
|
|
wp['int1'] = 1
|
|
wp['int2'] = 2
|
|
wp = wp._consolidate()
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('p1_mixed', wp)
|
|
assert_panel_equal(store.select('p1_mixed'), wp)
|
|
|
|
def test_unimplemented_dtypes_table_columns(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
l = [('date', datetime.date(2001, 1, 2))]
|
|
|
|
# py3 ok for unicode
|
|
if not compat.PY3:
|
|
l.append(('unicode', u('\\u03c3')))
|
|
|
|
# currently not supported dtypes ####
|
|
for n, f in l:
|
|
df = tm.makeDataFrame()
|
|
df[n] = f
|
|
pytest.raises(
|
|
TypeError, store.append, 'df1_%s' % n, df)
|
|
|
|
# frame
|
|
df = tm.makeDataFrame()
|
|
df['obj1'] = 'foo'
|
|
df['obj2'] = 'bar'
|
|
df['datetime1'] = datetime.date(2001, 1, 2)
|
|
df = df._consolidate()._convert(datetime=True)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
# this fails because we have a date in the object block......
|
|
pytest.raises(TypeError, store.append, 'df_unimplemented', df)
|
|
|
|
@pytest.mark.skipif(
|
|
not _np_version_under1p15,
|
|
reason=("pytables conda build package needs build "
|
|
"with numpy 1.15: gh-22098"))
|
|
def test_calendar_roundtrip_issue(self):
|
|
|
|
# 8591
|
|
# doc example from tseries holiday section
|
|
weekmask_egypt = 'Sun Mon Tue Wed Thu'
|
|
holidays = ['2012-05-01',
|
|
datetime.datetime(2013, 5, 1), np.datetime64('2014-05-01')]
|
|
bday_egypt = pd.offsets.CustomBusinessDay(
|
|
holidays=holidays, weekmask=weekmask_egypt)
|
|
dt = datetime.datetime(2013, 4, 30)
|
|
dts = date_range(dt, periods=5, freq=bday_egypt)
|
|
|
|
s = (Series(dts.weekday, dts).map(
|
|
Series('Mon Tue Wed Thu Fri Sat Sun'.split())))
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
store.put('fixed', s)
|
|
result = store.select('fixed')
|
|
assert_series_equal(result, s)
|
|
|
|
store.append('table', s)
|
|
result = store.select('table')
|
|
assert_series_equal(result, s)
|
|
|
|
def test_roundtrip_tz_aware_index(self):
|
|
# GH 17618
|
|
time = pd.Timestamp('2000-01-01 01:00:00', tz='US/Eastern')
|
|
df = pd.DataFrame(data=[0], index=[time])
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('frame', df, format='fixed')
|
|
recons = store['frame']
|
|
tm.assert_frame_equal(recons, df)
|
|
assert recons.index[0].value == 946706400000000000
|
|
|
|
def test_append_with_timedelta(self):
|
|
# GH 3577
|
|
# append timedelta
|
|
|
|
df = DataFrame(dict(A=Timestamp('20130101'), B=[Timestamp(
|
|
'20130101') + timedelta(days=i, seconds=10) for i in range(10)]))
|
|
df['C'] = df['A'] - df['B']
|
|
df.loc[3:5, 'C'] = np.nan
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# table
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, data_columns=True)
|
|
result = store.select('df')
|
|
assert_frame_equal(result, df)
|
|
|
|
result = store.select('df', where="C<100000")
|
|
assert_frame_equal(result, df)
|
|
|
|
result = store.select('df', where="C<pd.Timedelta('-3D')")
|
|
assert_frame_equal(result, df.iloc[3:])
|
|
|
|
result = store.select('df', "C<'-3D'")
|
|
assert_frame_equal(result, df.iloc[3:])
|
|
|
|
# a bit hacky here as we don't really deal with the NaT properly
|
|
|
|
result = store.select('df', "C<'-500000s'")
|
|
result = result.dropna(subset=['C'])
|
|
assert_frame_equal(result, df.iloc[6:])
|
|
|
|
result = store.select('df', "C<'-3.5D'")
|
|
result = result.iloc[1:]
|
|
assert_frame_equal(result, df.iloc[4:])
|
|
|
|
# fixed
|
|
_maybe_remove(store, 'df2')
|
|
store.put('df2', df)
|
|
result = store.select('df2')
|
|
assert_frame_equal(result, df)
|
|
|
|
def test_remove(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
ts = tm.makeTimeSeries()
|
|
df = tm.makeDataFrame()
|
|
store['a'] = ts
|
|
store['b'] = df
|
|
_maybe_remove(store, 'a')
|
|
assert len(store) == 1
|
|
tm.assert_frame_equal(df, store['b'])
|
|
|
|
_maybe_remove(store, 'b')
|
|
assert len(store) == 0
|
|
|
|
# nonexistence
|
|
pytest.raises(KeyError, store.remove, 'a_nonexistent_store')
|
|
|
|
# pathing
|
|
store['a'] = ts
|
|
store['b/foo'] = df
|
|
_maybe_remove(store, 'foo')
|
|
_maybe_remove(store, 'b/foo')
|
|
assert len(store) == 1
|
|
|
|
store['a'] = ts
|
|
store['b/foo'] = df
|
|
_maybe_remove(store, 'b')
|
|
assert len(store) == 1
|
|
|
|
# __delitem__
|
|
store['a'] = ts
|
|
store['b'] = df
|
|
del store['a']
|
|
del store['b']
|
|
assert len(store) == 0
|
|
|
|
def test_remove_where(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
# non-existance
|
|
crit1 = 'index>foo'
|
|
pytest.raises(KeyError, store.remove, 'a', [crit1])
|
|
|
|
# try to remove non-table (with crit)
|
|
# non-table ok (where = None)
|
|
wp = tm.makePanel(30)
|
|
store.put('wp', wp, format='table')
|
|
store.remove('wp', ["minor_axis=['A', 'D']"])
|
|
rs = store.select('wp')
|
|
expected = wp.reindex(minor_axis=['B', 'C'])
|
|
assert_panel_equal(rs, expected)
|
|
|
|
# empty where
|
|
_maybe_remove(store, 'wp')
|
|
store.put('wp', wp, format='table')
|
|
|
|
# deleted number (entire table)
|
|
n = store.remove('wp', [])
|
|
assert n == 120
|
|
|
|
# non - empty where
|
|
_maybe_remove(store, 'wp')
|
|
store.put('wp', wp, format='table')
|
|
pytest.raises(ValueError, store.remove,
|
|
'wp', ['foo'])
|
|
|
|
def test_remove_startstop(self):
|
|
# GH #4835 and #6177
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with catch_warnings(record=True):
|
|
wp = tm.makePanel(30)
|
|
|
|
# start
|
|
_maybe_remove(store, 'wp1')
|
|
store.put('wp1', wp, format='t')
|
|
n = store.remove('wp1', start=32)
|
|
assert n == 120 - 32
|
|
result = store.select('wp1')
|
|
expected = wp.reindex(major_axis=wp.major_axis[:32 // 4])
|
|
assert_panel_equal(result, expected)
|
|
|
|
_maybe_remove(store, 'wp2')
|
|
store.put('wp2', wp, format='t')
|
|
n = store.remove('wp2', start=-32)
|
|
assert n == 32
|
|
result = store.select('wp2')
|
|
expected = wp.reindex(major_axis=wp.major_axis[:-32 // 4])
|
|
assert_panel_equal(result, expected)
|
|
|
|
# stop
|
|
_maybe_remove(store, 'wp3')
|
|
store.put('wp3', wp, format='t')
|
|
n = store.remove('wp3', stop=32)
|
|
assert n == 32
|
|
result = store.select('wp3')
|
|
expected = wp.reindex(major_axis=wp.major_axis[32 // 4:])
|
|
assert_panel_equal(result, expected)
|
|
|
|
_maybe_remove(store, 'wp4')
|
|
store.put('wp4', wp, format='t')
|
|
n = store.remove('wp4', stop=-32)
|
|
assert n == 120 - 32
|
|
result = store.select('wp4')
|
|
expected = wp.reindex(major_axis=wp.major_axis[-32 // 4:])
|
|
assert_panel_equal(result, expected)
|
|
|
|
# start n stop
|
|
_maybe_remove(store, 'wp5')
|
|
store.put('wp5', wp, format='t')
|
|
n = store.remove('wp5', start=16, stop=-16)
|
|
assert n == 120 - 32
|
|
result = store.select('wp5')
|
|
expected = wp.reindex(
|
|
major_axis=(wp.major_axis[:16 // 4]
|
|
.union(wp.major_axis[-16 // 4:])))
|
|
assert_panel_equal(result, expected)
|
|
|
|
_maybe_remove(store, 'wp6')
|
|
store.put('wp6', wp, format='t')
|
|
n = store.remove('wp6', start=16, stop=16)
|
|
assert n == 0
|
|
result = store.select('wp6')
|
|
expected = wp.reindex(major_axis=wp.major_axis)
|
|
assert_panel_equal(result, expected)
|
|
|
|
# with where
|
|
_maybe_remove(store, 'wp7')
|
|
|
|
# TODO: unused?
|
|
date = wp.major_axis.take(np.arange(0, 30, 3)) # noqa
|
|
|
|
crit = 'major_axis=date'
|
|
store.put('wp7', wp, format='t')
|
|
n = store.remove('wp7', where=[crit], stop=80)
|
|
assert n == 28
|
|
result = store.select('wp7')
|
|
expected = wp.reindex(major_axis=wp.major_axis.difference(
|
|
wp.major_axis[np.arange(0, 20, 3)]))
|
|
assert_panel_equal(result, expected)
|
|
|
|
def test_remove_crit(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with catch_warnings(record=True):
|
|
wp = tm.makePanel(30)
|
|
|
|
# group row removal
|
|
_maybe_remove(store, 'wp3')
|
|
date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10])
|
|
crit4 = 'major_axis=date4'
|
|
store.put('wp3', wp, format='t')
|
|
n = store.remove('wp3', where=[crit4])
|
|
assert n == 36
|
|
|
|
result = store.select('wp3')
|
|
expected = wp.reindex(
|
|
major_axis=wp.major_axis.difference(date4))
|
|
assert_panel_equal(result, expected)
|
|
|
|
# upper half
|
|
_maybe_remove(store, 'wp')
|
|
store.put('wp', wp, format='table')
|
|
date = wp.major_axis[len(wp.major_axis) // 2]
|
|
|
|
crit1 = 'major_axis>date'
|
|
crit2 = "minor_axis=['A', 'D']"
|
|
n = store.remove('wp', where=[crit1])
|
|
assert n == 56
|
|
|
|
n = store.remove('wp', where=[crit2])
|
|
assert n == 32
|
|
|
|
result = store['wp']
|
|
expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
|
|
assert_panel_equal(result, expected)
|
|
|
|
# individual row elements
|
|
_maybe_remove(store, 'wp2')
|
|
store.put('wp2', wp, format='table')
|
|
|
|
date1 = wp.major_axis[1:3]
|
|
crit1 = 'major_axis=date1'
|
|
store.remove('wp2', where=[crit1])
|
|
result = store.select('wp2')
|
|
expected = wp.reindex(
|
|
major_axis=wp.major_axis.difference(date1))
|
|
assert_panel_equal(result, expected)
|
|
|
|
date2 = wp.major_axis[5]
|
|
crit2 = 'major_axis=date2'
|
|
store.remove('wp2', where=[crit2])
|
|
result = store['wp2']
|
|
expected = wp.reindex(
|
|
major_axis=(wp.major_axis
|
|
.difference(date1)
|
|
.difference(Index([date2]))
|
|
))
|
|
assert_panel_equal(result, expected)
|
|
|
|
date3 = [wp.major_axis[7], wp.major_axis[9]]
|
|
crit3 = 'major_axis=date3'
|
|
store.remove('wp2', where=[crit3])
|
|
result = store['wp2']
|
|
expected = wp.reindex(major_axis=wp.major_axis
|
|
.difference(date1)
|
|
.difference(Index([date2]))
|
|
.difference(Index(date3)))
|
|
assert_panel_equal(result, expected)
|
|
|
|
# corners
|
|
_maybe_remove(store, 'wp4')
|
|
store.put('wp4', wp, format='table')
|
|
n = store.remove(
|
|
'wp4', where="major_axis>wp.major_axis[-1]")
|
|
result = store.select('wp4')
|
|
assert_panel_equal(result, wp)
|
|
|
|
def test_invalid_terms(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
df = tm.makeTimeDataFrame()
|
|
df['string'] = 'foo'
|
|
df.loc[0:4, 'string'] = 'bar'
|
|
wp = tm.makePanel()
|
|
|
|
store.put('df', df, format='table')
|
|
store.put('wp', wp, format='table')
|
|
|
|
# some invalid terms
|
|
pytest.raises(ValueError, store.select,
|
|
'wp', "minor=['A', 'B']")
|
|
pytest.raises(ValueError, store.select,
|
|
'wp', ["index=['20121114']"])
|
|
pytest.raises(ValueError, store.select, 'wp', [
|
|
"index=['20121114', '20121114']"])
|
|
pytest.raises(TypeError, Term)
|
|
|
|
# more invalid
|
|
pytest.raises(
|
|
ValueError, store.select, 'df', 'df.index[3]')
|
|
pytest.raises(SyntaxError, store.select, 'df', 'index>')
|
|
pytest.raises(
|
|
ValueError, store.select, 'wp',
|
|
"major_axis<'20000108' & minor_axis['A', 'B']")
|
|
|
|
# from the docs
|
|
with ensure_clean_path(self.path) as path:
|
|
dfq = DataFrame(np.random.randn(10, 4), columns=list(
|
|
'ABCD'), index=date_range('20130101', periods=10))
|
|
dfq.to_hdf(path, 'dfq', format='table', data_columns=True)
|
|
|
|
# check ok
|
|
read_hdf(path, 'dfq',
|
|
where="index>Timestamp('20130104') & columns=['A', 'B']")
|
|
read_hdf(path, 'dfq', where="A>0 or C>0")
|
|
|
|
# catch the invalid reference
|
|
with ensure_clean_path(self.path) as path:
|
|
dfq = DataFrame(np.random.randn(10, 4), columns=list(
|
|
'ABCD'), index=date_range('20130101', periods=10))
|
|
dfq.to_hdf(path, 'dfq', format='table')
|
|
|
|
pytest.raises(ValueError, read_hdf, path,
|
|
'dfq', where="A>0 or C>0")
|
|
|
|
def test_terms(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
wp = tm.makePanel()
|
|
wpneg = Panel.fromDict({-1: tm.makeDataFrame(),
|
|
0: tm.makeDataFrame(),
|
|
1: tm.makeDataFrame()})
|
|
|
|
store.put('wp', wp, format='table')
|
|
store.put('wpneg', wpneg, format='table')
|
|
|
|
# panel
|
|
result = store.select(
|
|
'wp',
|
|
"major_axis<'20000108' and minor_axis=['A', 'B']")
|
|
expected = wp.truncate(
|
|
after='20000108').reindex(minor=['A', 'B'])
|
|
assert_panel_equal(result, expected)
|
|
|
|
# with deprecation
|
|
result = store.select(
|
|
'wp', where=("major_axis<'20000108' "
|
|
"and minor_axis=['A', 'B']"))
|
|
expected = wp.truncate(
|
|
after='20000108').reindex(minor=['A', 'B'])
|
|
tm.assert_panel_equal(result, expected)
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
# valid terms
|
|
terms = [('major_axis=20121114'),
|
|
('major_axis>20121114'),
|
|
(("major_axis=['20121114', '20121114']"),),
|
|
('major_axis=datetime.datetime(2012, 11, 14)'),
|
|
'major_axis> 20121114',
|
|
'major_axis >20121114',
|
|
'major_axis > 20121114',
|
|
(("minor_axis=['A', 'B']"),),
|
|
(("minor_axis=['A', 'B']"),),
|
|
((("minor_axis==['A', 'B']"),),),
|
|
(("items=['ItemA', 'ItemB']"),),
|
|
('items=ItemA'),
|
|
]
|
|
|
|
for t in terms:
|
|
store.select('wp', t)
|
|
|
|
with tm.assert_raises_regex(
|
|
TypeError, 'Only named functions are supported'):
|
|
store.select(
|
|
'wp',
|
|
'major_axis == (lambda x: x)("20130101")')
|
|
|
|
with catch_warnings(record=True):
|
|
# check USub node parsing
|
|
res = store.select('wpneg', 'items == -1')
|
|
expected = Panel({-1: wpneg[-1]})
|
|
tm.assert_panel_equal(res, expected)
|
|
|
|
with tm.assert_raises_regex(NotImplementedError,
|
|
'Unary addition '
|
|
'not supported'):
|
|
store.select('wpneg', 'items == +1')
|
|
|
|
def test_term_compat(self):
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with catch_warnings(record=True):
|
|
wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
|
|
major_axis=date_range('1/1/2000', periods=5),
|
|
minor_axis=['A', 'B', 'C', 'D'])
|
|
store.append('wp', wp)
|
|
|
|
result = store.select(
|
|
'wp', where=("major_axis>20000102 "
|
|
"and minor_axis=['A', 'B']"))
|
|
expected = wp.loc[:, wp.major_axis >
|
|
Timestamp('20000102'), ['A', 'B']]
|
|
assert_panel_equal(result, expected)
|
|
|
|
store.remove('wp', 'major_axis>20000103')
|
|
result = store.select('wp')
|
|
expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :]
|
|
assert_panel_equal(result, expected)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with catch_warnings(record=True):
|
|
wp = Panel(np.random.randn(2, 5, 4),
|
|
items=['Item1', 'Item2'],
|
|
major_axis=date_range('1/1/2000', periods=5),
|
|
minor_axis=['A', 'B', 'C', 'D'])
|
|
store.append('wp', wp)
|
|
|
|
# stringified datetimes
|
|
result = store.select(
|
|
'wp', 'major_axis>datetime.datetime(2000, 1, 2)')
|
|
expected = wp.loc[:, wp.major_axis > Timestamp('20000102')]
|
|
assert_panel_equal(result, expected)
|
|
|
|
result = store.select(
|
|
'wp', 'major_axis>datetime.datetime(2000, 1, 2)')
|
|
expected = wp.loc[:, wp.major_axis > Timestamp('20000102')]
|
|
assert_panel_equal(result, expected)
|
|
|
|
result = store.select(
|
|
'wp',
|
|
"major_axis=[datetime.datetime(2000, 1, 2, 0, 0), "
|
|
"datetime.datetime(2000, 1, 3, 0, 0)]")
|
|
expected = wp.loc[:, [Timestamp('20000102'),
|
|
Timestamp('20000103')]]
|
|
assert_panel_equal(result, expected)
|
|
|
|
result = store.select(
|
|
'wp', "minor_axis=['A', 'B']")
|
|
expected = wp.loc[:, :, ['A', 'B']]
|
|
assert_panel_equal(result, expected)
|
|
|
|
def test_same_name_scoping(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
import pandas as pd
|
|
df = DataFrame(np.random.randn(20, 2),
|
|
index=pd.date_range('20130101', periods=20))
|
|
store.put('df', df, format='table')
|
|
expected = df[df.index > pd.Timestamp('20130105')]
|
|
|
|
import datetime # noqa
|
|
result = store.select('df', 'index>datetime.datetime(2013,1,5)')
|
|
assert_frame_equal(result, expected)
|
|
|
|
from datetime import datetime # noqa
|
|
|
|
# technically an error, but allow it
|
|
result = store.select('df', 'index>datetime.datetime(2013,1,5)')
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = store.select('df', 'index>datetime(2013,1,5)')
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_series(self):
|
|
|
|
s = tm.makeStringSeries()
|
|
self._check_roundtrip(s, tm.assert_series_equal)
|
|
|
|
ts = tm.makeTimeSeries()
|
|
self._check_roundtrip(ts, tm.assert_series_equal)
|
|
|
|
ts2 = Series(ts.index, Index(ts.index, dtype=object))
|
|
self._check_roundtrip(ts2, tm.assert_series_equal)
|
|
|
|
ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object),
|
|
dtype=object))
|
|
self._check_roundtrip(ts3, tm.assert_series_equal,
|
|
check_index_type=False)
|
|
|
|
def test_sparse_series(self):
|
|
|
|
s = tm.makeStringSeries()
|
|
s.iloc[3:5] = np.nan
|
|
ss = s.to_sparse()
|
|
self._check_roundtrip(ss, tm.assert_series_equal,
|
|
check_series_type=True)
|
|
|
|
ss2 = s.to_sparse(kind='integer')
|
|
self._check_roundtrip(ss2, tm.assert_series_equal,
|
|
check_series_type=True)
|
|
|
|
ss3 = s.to_sparse(fill_value=0)
|
|
self._check_roundtrip(ss3, tm.assert_series_equal,
|
|
check_series_type=True)
|
|
|
|
def test_sparse_frame(self):
|
|
|
|
s = tm.makeDataFrame()
|
|
s.iloc[3:5, 1:3] = np.nan
|
|
s.iloc[8:10, -2] = np.nan
|
|
ss = s.to_sparse()
|
|
|
|
self._check_double_roundtrip(ss, tm.assert_frame_equal,
|
|
check_frame_type=True)
|
|
|
|
ss2 = s.to_sparse(kind='integer')
|
|
self._check_double_roundtrip(ss2, tm.assert_frame_equal,
|
|
check_frame_type=True)
|
|
|
|
ss3 = s.to_sparse(fill_value=0)
|
|
self._check_double_roundtrip(ss3, tm.assert_frame_equal,
|
|
check_frame_type=True)
|
|
|
|
def test_float_index(self):
|
|
|
|
# GH #454
|
|
index = np.random.randn(10)
|
|
s = Series(np.random.randn(10), index=index)
|
|
self._check_roundtrip(s, tm.assert_series_equal)
|
|
|
|
def test_tuple_index(self):
|
|
|
|
# GH #492
|
|
col = np.arange(10)
|
|
idx = [(0., 1.), (2., 3.), (4., 5.)]
|
|
data = np.random.randn(30).reshape((3, 10))
|
|
DF = DataFrame(data, index=idx, columns=col)
|
|
|
|
with catch_warnings(record=True):
|
|
self._check_roundtrip(DF, tm.assert_frame_equal)
|
|
|
|
def test_index_types(self):
|
|
|
|
with catch_warnings(record=True):
|
|
values = np.random.randn(2)
|
|
|
|
func = lambda l, r: tm.assert_series_equal(l, r,
|
|
check_dtype=True,
|
|
check_index_type=True,
|
|
check_series_type=True)
|
|
|
|
with catch_warnings(record=True):
|
|
ser = Series(values, [0, 'y'])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
with catch_warnings(record=True):
|
|
ser = Series(values, [datetime.datetime.today(), 0])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
with catch_warnings(record=True):
|
|
ser = Series(values, ['y', 0])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
with catch_warnings(record=True):
|
|
ser = Series(values, [datetime.date.today(), 'a'])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
ser = Series(values, [0, 'y'])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
ser = Series(values, [datetime.datetime.today(), 0])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
ser = Series(values, ['y', 0])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
ser = Series(values, [datetime.date.today(), 'a'])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
ser = Series(values, [1.23, 'b'])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
ser = Series(values, [1, 1.53])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
ser = Series(values, [1, 5])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
ser = Series(values, [datetime.datetime(
|
|
2012, 1, 1), datetime.datetime(2012, 1, 2)])
|
|
self._check_roundtrip(ser, func)
|
|
|
|
def test_timeseries_preepoch(self):
|
|
|
|
dr = bdate_range('1/1/1940', '1/1/1960')
|
|
ts = Series(np.random.randn(len(dr)), index=dr)
|
|
try:
|
|
self._check_roundtrip(ts, tm.assert_series_equal)
|
|
except OverflowError:
|
|
pytest.skip('known failer on some windows platforms')
|
|
|
|
@pytest.mark.parametrize("compression", [
|
|
False, pytest.param(True, marks=td.skip_if_windows_python_3)
|
|
])
|
|
def test_frame(self, compression):
|
|
|
|
df = tm.makeDataFrame()
|
|
|
|
# put in some random NAs
|
|
df.values[0, 0] = np.nan
|
|
df.values[5, 3] = np.nan
|
|
|
|
self._check_roundtrip_table(df, tm.assert_frame_equal,
|
|
compression=compression)
|
|
self._check_roundtrip(df, tm.assert_frame_equal,
|
|
compression=compression)
|
|
|
|
tdf = tm.makeTimeDataFrame()
|
|
self._check_roundtrip(tdf, tm.assert_frame_equal,
|
|
compression=compression)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
# not consolidated
|
|
df['foo'] = np.random.randn(len(df))
|
|
store['df'] = df
|
|
recons = store['df']
|
|
assert recons._data.is_consolidated()
|
|
|
|
# empty
|
|
self._check_roundtrip(df[:0], tm.assert_frame_equal)
|
|
|
|
def test_empty_series_frame(self):
|
|
s0 = Series()
|
|
s1 = Series(name='myseries')
|
|
df0 = DataFrame()
|
|
df1 = DataFrame(index=['a', 'b', 'c'])
|
|
df2 = DataFrame(columns=['d', 'e', 'f'])
|
|
|
|
self._check_roundtrip(s0, tm.assert_series_equal)
|
|
self._check_roundtrip(s1, tm.assert_series_equal)
|
|
self._check_roundtrip(df0, tm.assert_frame_equal)
|
|
self._check_roundtrip(df1, tm.assert_frame_equal)
|
|
self._check_roundtrip(df2, tm.assert_frame_equal)
|
|
|
|
def test_empty_series(self):
|
|
for dtype in [np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]']:
|
|
s = Series(dtype=dtype)
|
|
self._check_roundtrip(s, tm.assert_series_equal)
|
|
|
|
def test_can_serialize_dates(self):
|
|
|
|
rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')]
|
|
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
|
|
|
self._check_roundtrip(frame, tm.assert_frame_equal)
|
|
|
|
def test_store_hierarchical(self):
|
|
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
|
|
['one', 'two', 'three']],
|
|
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
|
|
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
|
|
names=['foo', 'bar'])
|
|
frame = DataFrame(np.random.randn(10, 3), index=index,
|
|
columns=['A', 'B', 'C'])
|
|
|
|
self._check_roundtrip(frame, tm.assert_frame_equal)
|
|
self._check_roundtrip(frame.T, tm.assert_frame_equal)
|
|
self._check_roundtrip(frame['A'], tm.assert_series_equal)
|
|
|
|
# check that the names are stored
|
|
with ensure_clean_store(self.path) as store:
|
|
store['frame'] = frame
|
|
recons = store['frame']
|
|
tm.assert_frame_equal(recons, frame)
|
|
|
|
def test_store_index_name(self):
|
|
df = tm.makeDataFrame()
|
|
df.index.name = 'foo'
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store['frame'] = df
|
|
recons = store['frame']
|
|
tm.assert_frame_equal(recons, df)
|
|
|
|
def test_store_index_name_with_tz(self):
|
|
# GH 13884
|
|
df = pd.DataFrame({'A': [1, 2]})
|
|
df.index = pd.DatetimeIndex([1234567890123456787, 1234567890123456788])
|
|
df.index = df.index.tz_localize('UTC')
|
|
df.index.name = 'foo'
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('frame', df, format='table')
|
|
recons = store['frame']
|
|
tm.assert_frame_equal(recons, df)
|
|
|
|
@pytest.mark.parametrize('table_format', ['table', 'fixed'])
|
|
def test_store_index_name_numpy_str(self, table_format):
|
|
# GH #13492
|
|
idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1),
|
|
datetime.date(2000, 1, 2)]),
|
|
name=u('cols\u05d2'))
|
|
idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1),
|
|
datetime.date(2010, 1, 2)]),
|
|
name=u('rows\u05d0'))
|
|
df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)
|
|
|
|
# This used to fail, returning numpy strings instead of python strings.
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', format=table_format)
|
|
df2 = read_hdf(path, 'df')
|
|
|
|
assert_frame_equal(df, df2, check_names=True)
|
|
|
|
assert type(df2.index.name) == text_type
|
|
assert type(df2.columns.name) == text_type
|
|
|
|
def test_store_series_name(self):
|
|
df = tm.makeDataFrame()
|
|
series = df['A']
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store['series'] = series
|
|
recons = store['series']
|
|
tm.assert_series_equal(recons, series)
|
|
|
|
@pytest.mark.parametrize("compression", [
|
|
False, pytest.param(True, marks=td.skip_if_windows_python_3)
|
|
])
|
|
def test_store_mixed(self, compression):
|
|
|
|
def _make_one():
|
|
df = tm.makeDataFrame()
|
|
df['obj1'] = 'foo'
|
|
df['obj2'] = 'bar'
|
|
df['bool1'] = df['A'] > 0
|
|
df['bool2'] = df['B'] > 0
|
|
df['int1'] = 1
|
|
df['int2'] = 2
|
|
return df._consolidate()
|
|
|
|
df1 = _make_one()
|
|
df2 = _make_one()
|
|
|
|
self._check_roundtrip(df1, tm.assert_frame_equal)
|
|
self._check_roundtrip(df2, tm.assert_frame_equal)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store['obj'] = df1
|
|
tm.assert_frame_equal(store['obj'], df1)
|
|
store['obj'] = df2
|
|
tm.assert_frame_equal(store['obj'], df2)
|
|
|
|
# check that can store Series of all of these types
|
|
self._check_roundtrip(df1['obj1'], tm.assert_series_equal,
|
|
compression=compression)
|
|
self._check_roundtrip(df1['bool1'], tm.assert_series_equal,
|
|
compression=compression)
|
|
self._check_roundtrip(df1['int1'], tm.assert_series_equal,
|
|
compression=compression)
|
|
|
|
def test_wide(self):
|
|
|
|
with catch_warnings(record=True):
|
|
wp = tm.makePanel()
|
|
self._check_roundtrip(wp, assert_panel_equal)
|
|
|
|
def test_select_with_dups(self):
|
|
|
|
# single dtypes
|
|
df = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B'])
|
|
df.index = date_range('20130101 9:30', periods=10, freq='T')
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('df', df)
|
|
|
|
result = store.select('df')
|
|
expected = df
|
|
assert_frame_equal(result, expected, by_blocks=True)
|
|
|
|
result = store.select('df', columns=df.columns)
|
|
expected = df
|
|
assert_frame_equal(result, expected, by_blocks=True)
|
|
|
|
result = store.select('df', columns=['A'])
|
|
expected = df.loc[:, ['A']]
|
|
assert_frame_equal(result, expected)
|
|
|
|
# dups across dtypes
|
|
df = concat([DataFrame(np.random.randn(10, 4),
|
|
columns=['A', 'A', 'B', 'B']),
|
|
DataFrame(np.random.randint(0, 10, size=20)
|
|
.reshape(10, 2),
|
|
columns=['A', 'C'])],
|
|
axis=1)
|
|
df.index = date_range('20130101 9:30', periods=10, freq='T')
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('df', df)
|
|
|
|
result = store.select('df')
|
|
expected = df
|
|
assert_frame_equal(result, expected, by_blocks=True)
|
|
|
|
result = store.select('df', columns=df.columns)
|
|
expected = df
|
|
assert_frame_equal(result, expected, by_blocks=True)
|
|
|
|
expected = df.loc[:, ['A']]
|
|
result = store.select('df', columns=['A'])
|
|
assert_frame_equal(result, expected, by_blocks=True)
|
|
|
|
expected = df.loc[:, ['B', 'A']]
|
|
result = store.select('df', columns=['B', 'A'])
|
|
assert_frame_equal(result, expected, by_blocks=True)
|
|
|
|
# duplicates on both index and columns
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('df', df)
|
|
store.append('df', df)
|
|
|
|
expected = df.loc[:, ['B', 'A']]
|
|
expected = concat([expected, expected])
|
|
result = store.select('df', columns=['B', 'A'])
|
|
assert_frame_equal(result, expected, by_blocks=True)
|
|
|
|
def test_wide_table_dups(self):
|
|
with ensure_clean_store(self.path) as store:
|
|
with catch_warnings(record=True):
|
|
|
|
wp = tm.makePanel()
|
|
store.put('panel', wp, format='table')
|
|
store.put('panel', wp, format='table', append=True)
|
|
|
|
recons = store['panel']
|
|
|
|
assert_panel_equal(recons, wp)
|
|
|
|
def test_long(self):
|
|
def _check(left, right):
|
|
assert_panel_equal(left.to_panel(), right.to_panel())
|
|
|
|
with catch_warnings(record=True):
|
|
wp = tm.makePanel()
|
|
self._check_roundtrip(wp.to_frame(), _check)
|
|
|
|
def test_longpanel(self):
|
|
pass
|
|
|
|
def test_overwrite_node(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store['a'] = tm.makeTimeDataFrame()
|
|
ts = tm.makeTimeSeries()
|
|
store['a'] = ts
|
|
|
|
tm.assert_series_equal(store['a'], ts)
|
|
|
|
def test_sparse_with_compression(self):
|
|
|
|
# GH 2931
|
|
|
|
# make sparse dataframe
|
|
arr = np.random.binomial(n=1, p=.01, size=(1000, 10))
|
|
df = DataFrame(arr).to_sparse(fill_value=0)
|
|
|
|
# case 1: store uncompressed
|
|
self._check_double_roundtrip(df, tm.assert_frame_equal,
|
|
compression=False,
|
|
check_frame_type=True)
|
|
|
|
# case 2: store compressed (works)
|
|
self._check_double_roundtrip(df, tm.assert_frame_equal,
|
|
compression='zlib',
|
|
check_frame_type=True)
|
|
|
|
# set one series to be completely sparse
|
|
df[0] = np.zeros(1000)
|
|
|
|
# case 3: store df with completely sparse series uncompressed
|
|
self._check_double_roundtrip(df, tm.assert_frame_equal,
|
|
compression=False,
|
|
check_frame_type=True)
|
|
|
|
# case 4: try storing df with completely sparse series compressed
|
|
# (fails)
|
|
self._check_double_roundtrip(df, tm.assert_frame_equal,
|
|
compression='zlib',
|
|
check_frame_type=True)
|
|
|
|
def test_select(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with catch_warnings(record=True):
|
|
wp = tm.makePanel()
|
|
|
|
# put/select ok
|
|
_maybe_remove(store, 'wp')
|
|
store.put('wp', wp, format='table')
|
|
store.select('wp')
|
|
|
|
# non-table ok (where = None)
|
|
_maybe_remove(store, 'wp')
|
|
store.put('wp2', wp)
|
|
store.select('wp2')
|
|
|
|
# selection on the non-indexable with a large number of columns
|
|
wp = Panel(np.random.randn(100, 100, 100),
|
|
items=['Item%03d' % i for i in range(100)],
|
|
major_axis=date_range('1/1/2000', periods=100),
|
|
minor_axis=['E%03d' % i for i in range(100)])
|
|
|
|
_maybe_remove(store, 'wp')
|
|
store.append('wp', wp)
|
|
items = ['Item%03d' % i for i in range(80)]
|
|
result = store.select('wp', 'items=items')
|
|
expected = wp.reindex(items=items)
|
|
assert_panel_equal(expected, result)
|
|
|
|
# selectin non-table with a where
|
|
# pytest.raises(ValueError, store.select,
|
|
# 'wp2', ('column', ['A', 'D']))
|
|
|
|
# select with columns=
|
|
df = tm.makeTimeDataFrame()
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df)
|
|
result = store.select('df', columns=['A', 'B'])
|
|
expected = df.reindex(columns=['A', 'B'])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# equivalentsly
|
|
result = store.select('df', [("columns=['A', 'B']")])
|
|
expected = df.reindex(columns=['A', 'B'])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# with a data column
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, data_columns=['A'])
|
|
result = store.select('df', ['A > 0'], columns=['A', 'B'])
|
|
expected = df[df.A > 0].reindex(columns=['A', 'B'])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# all a data columns
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, data_columns=True)
|
|
result = store.select('df', ['A > 0'], columns=['A', 'B'])
|
|
expected = df[df.A > 0].reindex(columns=['A', 'B'])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# with a data column, but different columns
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, data_columns=['A'])
|
|
result = store.select('df', ['A > 0'], columns=['C', 'D'])
|
|
expected = df[df.A > 0].reindex(columns=['C', 'D'])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
def test_select_dtypes(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
# with a Timestamp data column (GH #2637)
|
|
df = DataFrame(dict(
|
|
ts=bdate_range('2012-01-01', periods=300),
|
|
A=np.random.randn(300)))
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, data_columns=['ts', 'A'])
|
|
|
|
result = store.select('df', "ts>=Timestamp('2012-02-01')")
|
|
expected = df[df.ts >= Timestamp('2012-02-01')]
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# bool columns (GH #2849)
|
|
df = DataFrame(np.random.randn(5, 2), columns=['A', 'B'])
|
|
df['object'] = 'foo'
|
|
df.loc[4:5, 'object'] = 'bar'
|
|
df['boolv'] = df['A'] > 0
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, data_columns=True)
|
|
|
|
expected = (df[df.boolv == True] # noqa
|
|
.reindex(columns=['A', 'boolv']))
|
|
for v in [True, 'true', 1]:
|
|
result = store.select('df', 'boolv == %s' % str(v),
|
|
columns=['A', 'boolv'])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
expected = (df[df.boolv == False] # noqa
|
|
.reindex(columns=['A', 'boolv']))
|
|
for v in [False, 'false', 0]:
|
|
result = store.select(
|
|
'df', 'boolv == %s' % str(v), columns=['A', 'boolv'])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# integer index
|
|
df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
|
|
_maybe_remove(store, 'df_int')
|
|
store.append('df_int', df)
|
|
result = store.select(
|
|
'df_int', "index<10 and columns=['A']")
|
|
expected = df.reindex(index=list(df.index)[0:10], columns=['A'])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# float index
|
|
df = DataFrame(dict(A=np.random.rand(
|
|
20), B=np.random.rand(20), index=np.arange(20, dtype='f8')))
|
|
_maybe_remove(store, 'df_float')
|
|
store.append('df_float', df)
|
|
result = store.select(
|
|
'df_float', "index<10.0 and columns=['A']")
|
|
expected = df.reindex(index=list(df.index)[0:10], columns=['A'])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# floats w/o NaN
|
|
df = DataFrame(
|
|
dict(cols=range(11), values=range(11)), dtype='float64')
|
|
df['cols'] = (df['cols'] + 10).apply(str)
|
|
|
|
store.append('df1', df, data_columns=True)
|
|
result = store.select(
|
|
'df1', where='values>2.0')
|
|
expected = df[df['values'] > 2.0]
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# floats with NaN
|
|
df.iloc[0] = np.nan
|
|
expected = df[df['values'] > 2.0]
|
|
|
|
store.append('df2', df, data_columns=True, index=False)
|
|
result = store.select(
|
|
'df2', where='values>2.0')
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# https://github.com/PyTables/PyTables/issues/282
|
|
# bug in selection when 0th row has a np.nan and an index
|
|
# store.append('df3',df,data_columns=True)
|
|
# result = store.select(
|
|
# 'df3', where='values>2.0')
|
|
# tm.assert_frame_equal(expected, result)
|
|
|
|
# not in first position float with NaN ok too
|
|
df = DataFrame(
|
|
dict(cols=range(11), values=range(11)), dtype='float64')
|
|
df['cols'] = (df['cols'] + 10).apply(str)
|
|
|
|
df.iloc[1] = np.nan
|
|
expected = df[df['values'] > 2.0]
|
|
|
|
store.append('df4', df, data_columns=True)
|
|
result = store.select(
|
|
'df4', where='values>2.0')
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# test selection with comparison against numpy scalar
|
|
# GH 11283
|
|
with ensure_clean_store(self.path) as store:
|
|
df = tm.makeDataFrame()
|
|
|
|
expected = df[df['A'] > 0]
|
|
|
|
store.append('df', df, data_columns=True)
|
|
np_zero = np.float64(0) # noqa
|
|
result = store.select('df', where=["A>np_zero"])
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
def test_select_with_many_inputs(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300),
|
|
A=np.random.randn(300),
|
|
B=range(300),
|
|
users=['a'] * 50 + ['b'] * 50 + ['c'] * 100 +
|
|
['a%03d' % i for i in range(100)]))
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df, data_columns=['ts', 'A', 'B', 'users'])
|
|
|
|
# regular select
|
|
result = store.select('df', "ts>=Timestamp('2012-02-01')")
|
|
expected = df[df.ts >= Timestamp('2012-02-01')]
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# small selector
|
|
result = store.select(
|
|
'df',
|
|
"ts>=Timestamp('2012-02-01') & users=['a','b','c']")
|
|
expected = df[(df.ts >= Timestamp('2012-02-01')) &
|
|
df.users.isin(['a', 'b', 'c'])]
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# big selector along the columns
|
|
selector = ['a', 'b', 'c'] + ['a%03d' % i for i in range(60)]
|
|
result = store.select(
|
|
'df',
|
|
"ts>=Timestamp('2012-02-01') and users=selector")
|
|
expected = df[(df.ts >= Timestamp('2012-02-01')) &
|
|
df.users.isin(selector)]
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
selector = range(100, 200)
|
|
result = store.select('df', 'B=selector')
|
|
expected = df[df.B.isin(selector)]
|
|
tm.assert_frame_equal(expected, result)
|
|
assert len(result) == 100
|
|
|
|
# big selector along the index
|
|
selector = Index(df.ts[0:100].values)
|
|
result = store.select('df', 'ts=selector')
|
|
expected = df[df.ts.isin(selector.values)]
|
|
tm.assert_frame_equal(expected, result)
|
|
assert len(result) == 100
|
|
|
|
def test_select_iterator(self):
|
|
|
|
# single table
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df = tm.makeTimeDataFrame(500)
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df)
|
|
|
|
expected = store.select('df')
|
|
|
|
results = [s for s in store.select('df', iterator=True)]
|
|
result = concat(results)
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
results = [s for s in store.select('df', chunksize=100)]
|
|
assert len(results) == 5
|
|
result = concat(results)
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
results = [s for s in store.select('df', chunksize=150)]
|
|
result = concat(results)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
df = tm.makeTimeDataFrame(500)
|
|
df.to_hdf(path, 'df_non_table')
|
|
pytest.raises(TypeError, read_hdf, path,
|
|
'df_non_table', chunksize=100)
|
|
pytest.raises(TypeError, read_hdf, path,
|
|
'df_non_table', iterator=True)
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
df = tm.makeTimeDataFrame(500)
|
|
df.to_hdf(path, 'df', format='table')
|
|
|
|
results = [s for s in read_hdf(path, 'df', chunksize=100)]
|
|
result = concat(results)
|
|
|
|
assert len(results) == 5
|
|
tm.assert_frame_equal(result, df)
|
|
tm.assert_frame_equal(result, read_hdf(path, 'df'))
|
|
|
|
# multiple
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df1 = tm.makeTimeDataFrame(500)
|
|
store.append('df1', df1, data_columns=True)
|
|
df2 = tm.makeTimeDataFrame(500).rename(
|
|
columns=lambda x: "%s_2" % x)
|
|
df2['foo'] = 'bar'
|
|
store.append('df2', df2)
|
|
|
|
df = concat([df1, df2], axis=1)
|
|
|
|
# full selection
|
|
expected = store.select_as_multiple(
|
|
['df1', 'df2'], selector='df1')
|
|
results = [s for s in store.select_as_multiple(
|
|
['df1', 'df2'], selector='df1', chunksize=150)]
|
|
result = concat(results)
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
def test_select_iterator_complete_8014(self):
|
|
|
|
# GH 8014
|
|
# using iterator and where clause
|
|
chunksize = 1e4
|
|
|
|
# no iterator
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
expected = tm.makeTimeDataFrame(100064, 'S')
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', expected)
|
|
|
|
beg_dt = expected.index[0]
|
|
end_dt = expected.index[-1]
|
|
|
|
# select w/o iteration and no where clause works
|
|
result = store.select('df')
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# select w/o iterator and where clause, single term, begin
|
|
# of range, works
|
|
where = "index >= '%s'" % beg_dt
|
|
result = store.select('df', where=where)
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# select w/o iterator and where clause, single term, end
|
|
# of range, works
|
|
where = "index <= '%s'" % end_dt
|
|
result = store.select('df', where=where)
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# select w/o iterator and where clause, inclusive range,
|
|
# works
|
|
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
|
|
result = store.select('df', where=where)
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# with iterator, full range
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
expected = tm.makeTimeDataFrame(100064, 'S')
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', expected)
|
|
|
|
beg_dt = expected.index[0]
|
|
end_dt = expected.index[-1]
|
|
|
|
# select w/iterator and no where clause works
|
|
results = [s for s in store.select('df', chunksize=chunksize)]
|
|
result = concat(results)
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# select w/iterator and where clause, single term, begin of range
|
|
where = "index >= '%s'" % beg_dt
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
result = concat(results)
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# select w/iterator and where clause, single term, end of range
|
|
where = "index <= '%s'" % end_dt
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
result = concat(results)
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
# select w/iterator and where clause, inclusive range
|
|
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
result = concat(results)
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
def test_select_iterator_non_complete_8014(self):
|
|
|
|
# GH 8014
|
|
# using iterator and where clause
|
|
chunksize = 1e4
|
|
|
|
# with iterator, non complete range
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
expected = tm.makeTimeDataFrame(100064, 'S')
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', expected)
|
|
|
|
beg_dt = expected.index[1]
|
|
end_dt = expected.index[-2]
|
|
|
|
# select w/iterator and where clause, single term, begin of range
|
|
where = "index >= '%s'" % beg_dt
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
result = concat(results)
|
|
rexpected = expected[expected.index >= beg_dt]
|
|
tm.assert_frame_equal(rexpected, result)
|
|
|
|
# select w/iterator and where clause, single term, end of range
|
|
where = "index <= '%s'" % end_dt
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
result = concat(results)
|
|
rexpected = expected[expected.index <= end_dt]
|
|
tm.assert_frame_equal(rexpected, result)
|
|
|
|
# select w/iterator and where clause, inclusive range
|
|
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
result = concat(results)
|
|
rexpected = expected[(expected.index >= beg_dt) &
|
|
(expected.index <= end_dt)]
|
|
tm.assert_frame_equal(rexpected, result)
|
|
|
|
# with iterator, empty where
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
expected = tm.makeTimeDataFrame(100064, 'S')
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', expected)
|
|
|
|
end_dt = expected.index[-1]
|
|
|
|
# select w/iterator and where clause, single term, begin of range
|
|
where = "index > '%s'" % end_dt
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
assert 0 == len(results)
|
|
|
|
def test_select_iterator_many_empty_frames(self):
|
|
|
|
# GH 8014
|
|
# using iterator and where clause can return many empty
|
|
# frames.
|
|
chunksize = int(1e4)
|
|
|
|
# with iterator, range limited to the first chunk
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
expected = tm.makeTimeDataFrame(100000, 'S')
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', expected)
|
|
|
|
beg_dt = expected.index[0]
|
|
end_dt = expected.index[chunksize - 1]
|
|
|
|
# select w/iterator and where clause, single term, begin of range
|
|
where = "index >= '%s'" % beg_dt
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
result = concat(results)
|
|
rexpected = expected[expected.index >= beg_dt]
|
|
tm.assert_frame_equal(rexpected, result)
|
|
|
|
# select w/iterator and where clause, single term, end of range
|
|
where = "index <= '%s'" % end_dt
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
|
|
assert len(results) == 1
|
|
result = concat(results)
|
|
rexpected = expected[expected.index <= end_dt]
|
|
tm.assert_frame_equal(rexpected, result)
|
|
|
|
# select w/iterator and where clause, inclusive range
|
|
where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
|
|
# should be 1, is 10
|
|
assert len(results) == 1
|
|
result = concat(results)
|
|
rexpected = expected[(expected.index >= beg_dt) &
|
|
(expected.index <= end_dt)]
|
|
tm.assert_frame_equal(rexpected, result)
|
|
|
|
# select w/iterator and where clause which selects
|
|
# *nothing*.
|
|
#
|
|
# To be consistent with Python idiom I suggest this should
|
|
# return [] e.g. `for e in []: print True` never prints
|
|
# True.
|
|
|
|
where = "index <= '%s' & index >= '%s'" % (beg_dt, end_dt)
|
|
results = [s for s in store.select(
|
|
'df', where=where, chunksize=chunksize)]
|
|
|
|
# should be []
|
|
assert len(results) == 0
|
|
|
|
def test_retain_index_attributes(self):
|
|
|
|
# GH 3499, losing frequency info on index recreation
|
|
df = DataFrame(dict(
|
|
A=Series(lrange(3),
|
|
index=date_range('2000-1-1', periods=3, freq='H'))))
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
_maybe_remove(store, 'data')
|
|
store.put('data', df, format='table')
|
|
|
|
result = store.get('data')
|
|
tm.assert_frame_equal(df, result)
|
|
|
|
for attr in ['freq', 'tz', 'name']:
|
|
for idx in ['index', 'columns']:
|
|
assert (getattr(getattr(df, idx), attr, None) ==
|
|
getattr(getattr(result, idx), attr, None))
|
|
|
|
# try to append a table with a different frequency
|
|
with catch_warnings(record=True):
|
|
df2 = DataFrame(dict(
|
|
A=Series(lrange(3),
|
|
index=date_range('2002-1-1',
|
|
periods=3, freq='D'))))
|
|
store.append('data', df2)
|
|
|
|
assert store.get_storer('data').info['index']['freq'] is None
|
|
|
|
# this is ok
|
|
_maybe_remove(store, 'df2')
|
|
df2 = DataFrame(dict(
|
|
A=Series(lrange(3),
|
|
index=[Timestamp('20010101'), Timestamp('20010102'),
|
|
Timestamp('20020101')])))
|
|
store.append('df2', df2)
|
|
df3 = DataFrame(dict(
|
|
A=Series(lrange(3),
|
|
index=date_range('2002-1-1', periods=3,
|
|
freq='D'))))
|
|
store.append('df2', df3)
|
|
|
|
def test_retain_index_attributes2(self):
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
df = DataFrame(dict(
|
|
A=Series(lrange(3),
|
|
index=date_range('2000-1-1',
|
|
periods=3, freq='H'))))
|
|
df.to_hdf(path, 'data', mode='w', append=True)
|
|
df2 = DataFrame(dict(
|
|
A=Series(lrange(3),
|
|
index=date_range('2002-1-1', periods=3,
|
|
freq='D'))))
|
|
df2.to_hdf(path, 'data', append=True)
|
|
|
|
idx = date_range('2000-1-1', periods=3, freq='H')
|
|
idx.name = 'foo'
|
|
df = DataFrame(dict(A=Series(lrange(3), index=idx)))
|
|
df.to_hdf(path, 'data', mode='w', append=True)
|
|
|
|
assert read_hdf(path, 'data').index.name == 'foo'
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
idx2 = date_range('2001-1-1', periods=3, freq='H')
|
|
idx2.name = 'bar'
|
|
df2 = DataFrame(dict(A=Series(lrange(3), index=idx2)))
|
|
df2.to_hdf(path, 'data', append=True)
|
|
|
|
assert read_hdf(path, 'data').index.name is None
|
|
|
|
def test_panel_select(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
wp = tm.makePanel()
|
|
|
|
store.put('wp', wp, format='table')
|
|
date = wp.major_axis[len(wp.major_axis) // 2]
|
|
|
|
crit1 = ('major_axis>=date')
|
|
crit2 = ("minor_axis=['A', 'D']")
|
|
|
|
result = store.select('wp', [crit1, crit2])
|
|
expected = wp.truncate(before=date).reindex(minor=['A', 'D'])
|
|
assert_panel_equal(result, expected)
|
|
|
|
result = store.select(
|
|
'wp', ['major_axis>="20000124"',
|
|
("minor_axis=['A', 'B']")])
|
|
expected = wp.truncate(
|
|
before='20000124').reindex(minor=['A', 'B'])
|
|
assert_panel_equal(result, expected)
|
|
|
|
def test_frame_select(self):
|
|
|
|
df = tm.makeTimeDataFrame()
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('frame', df, format='table')
|
|
date = df.index[len(df) // 2]
|
|
|
|
crit1 = Term('index>=date')
|
|
assert crit1.env.scope['date'] == date
|
|
|
|
crit2 = ("columns=['A', 'D']")
|
|
crit3 = ('columns=A')
|
|
|
|
result = store.select('frame', [crit1, crit2])
|
|
expected = df.loc[date:, ['A', 'D']]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = store.select('frame', [crit3])
|
|
expected = df.loc[:, ['A']]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# invalid terms
|
|
df = tm.makeTimeDataFrame()
|
|
store.append('df_time', df)
|
|
pytest.raises(
|
|
ValueError, store.select, 'df_time', "index>0")
|
|
|
|
# can't select if not written as table
|
|
# store['frame'] = df
|
|
# pytest.raises(ValueError, store.select,
|
|
# 'frame', [crit1, crit2])
|
|
|
|
def test_frame_select_complex(self):
|
|
# select via complex criteria
|
|
|
|
df = tm.makeTimeDataFrame()
|
|
df['string'] = 'foo'
|
|
df.loc[df.index[0:4], 'string'] = 'bar'
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('df', df, format='table', data_columns=['string'])
|
|
|
|
# empty
|
|
result = store.select('df', 'index>df.index[3] & string="bar"')
|
|
expected = df.loc[(df.index > df.index[3]) & (df.string == 'bar')]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = store.select('df', 'index>df.index[3] & string="foo"')
|
|
expected = df.loc[(df.index > df.index[3]) & (df.string == 'foo')]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# or
|
|
result = store.select('df', 'index>df.index[3] | string="bar"')
|
|
expected = df.loc[(df.index > df.index[3]) | (df.string == 'bar')]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = store.select('df', '(index>df.index[3] & '
|
|
'index<=df.index[6]) | string="bar"')
|
|
expected = df.loc[((df.index > df.index[3]) & (
|
|
df.index <= df.index[6])) | (df.string == 'bar')]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# invert
|
|
result = store.select('df', 'string!="bar"')
|
|
expected = df.loc[df.string != 'bar']
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# invert not implemented in numexpr :(
|
|
pytest.raises(NotImplementedError,
|
|
store.select, 'df', '~(string="bar")')
|
|
|
|
# invert ok for filters
|
|
result = store.select('df', "~(columns=['A','B'])")
|
|
expected = df.loc[:, df.columns.difference(['A', 'B'])]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# in
|
|
result = store.select(
|
|
'df', "index>df.index[3] & columns in ['A','B']")
|
|
expected = df.loc[df.index > df.index[3]].reindex(columns=[
|
|
'A', 'B'])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_frame_select_complex2(self):
|
|
|
|
with ensure_clean_path(['parms.hdf', 'hist.hdf']) as paths:
|
|
|
|
pp, hh = paths
|
|
|
|
# use non-trivial selection criteria
|
|
parms = DataFrame({'A': [1, 1, 2, 2, 3]})
|
|
parms.to_hdf(pp, 'df', mode='w',
|
|
format='table', data_columns=['A'])
|
|
|
|
selection = read_hdf(pp, 'df', where='A=[2,3]')
|
|
hist = DataFrame(np.random.randn(25, 1),
|
|
columns=['data'],
|
|
index=MultiIndex.from_tuples(
|
|
[(i, j) for i in range(5)
|
|
for j in range(5)],
|
|
names=['l1', 'l2']))
|
|
|
|
hist.to_hdf(hh, 'df', mode='w', format='table')
|
|
|
|
expected = read_hdf(hh, 'df', where='l1=[2, 3, 4]')
|
|
|
|
# sccope with list like
|
|
l = selection.index.tolist() # noqa
|
|
store = HDFStore(hh)
|
|
result = store.select('df', where='l1=l')
|
|
assert_frame_equal(result, expected)
|
|
store.close()
|
|
|
|
result = read_hdf(hh, 'df', where='l1=l')
|
|
assert_frame_equal(result, expected)
|
|
|
|
# index
|
|
index = selection.index # noqa
|
|
result = read_hdf(hh, 'df', where='l1=index')
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = read_hdf(hh, 'df', where='l1=selection.index')
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = read_hdf(hh, 'df', where='l1=selection.index.tolist()')
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = read_hdf(hh, 'df', where='l1=list(selection.index)')
|
|
assert_frame_equal(result, expected)
|
|
|
|
# sccope with index
|
|
store = HDFStore(hh)
|
|
|
|
result = store.select('df', where='l1=index')
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = store.select('df', where='l1=selection.index')
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = store.select('df', where='l1=selection.index.tolist()')
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = store.select('df', where='l1=list(selection.index)')
|
|
assert_frame_equal(result, expected)
|
|
|
|
store.close()
|
|
|
|
def test_invalid_filtering(self):
|
|
|
|
# can't use more than one filter (atm)
|
|
|
|
df = tm.makeTimeDataFrame()
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('df', df, format='table')
|
|
|
|
# not implemented
|
|
pytest.raises(NotImplementedError, store.select,
|
|
'df', "columns=['A'] | columns=['B']")
|
|
|
|
# in theory we could deal with this
|
|
pytest.raises(NotImplementedError, store.select,
|
|
'df', "columns=['A','B'] & columns=['C']")
|
|
|
|
def test_string_select(self):
|
|
# GH 2973
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df = tm.makeTimeDataFrame()
|
|
|
|
# test string ==/!=
|
|
df['x'] = 'none'
|
|
df.loc[2:7, 'x'] = ''
|
|
|
|
store.append('df', df, data_columns=['x'])
|
|
|
|
result = store.select('df', 'x=none')
|
|
expected = df[df.x == 'none']
|
|
assert_frame_equal(result, expected)
|
|
|
|
try:
|
|
result = store.select('df', 'x!=none')
|
|
expected = df[df.x != 'none']
|
|
assert_frame_equal(result, expected)
|
|
except Exception as detail:
|
|
pprint_thing("[{0}]".format(detail))
|
|
pprint_thing(store)
|
|
pprint_thing(expected)
|
|
|
|
df2 = df.copy()
|
|
df2.loc[df2.x == '', 'x'] = np.nan
|
|
|
|
store.append('df2', df2, data_columns=['x'])
|
|
result = store.select('df2', 'x!=none')
|
|
expected = df2[isna(df2.x)]
|
|
assert_frame_equal(result, expected)
|
|
|
|
# int ==/!=
|
|
df['int'] = 1
|
|
df.loc[2:7, 'int'] = 2
|
|
|
|
store.append('df3', df, data_columns=['int'])
|
|
|
|
result = store.select('df3', 'int=2')
|
|
expected = df[df.int == 2]
|
|
assert_frame_equal(result, expected)
|
|
|
|
result = store.select('df3', 'int!=2')
|
|
expected = df[df.int != 2]
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_read_column(self):
|
|
|
|
df = tm.makeTimeDataFrame()
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
_maybe_remove(store, 'df')
|
|
|
|
# GH 17912
|
|
# HDFStore.select_column should raise a KeyError
|
|
# exception if the key is not a valid store
|
|
with pytest.raises(KeyError,
|
|
message='No object named index in the file'):
|
|
store.select_column('df', 'index')
|
|
|
|
store.append('df', df)
|
|
# error
|
|
pytest.raises(KeyError, store.select_column, 'df', 'foo')
|
|
|
|
def f():
|
|
store.select_column('df', 'index', where=['index>5'])
|
|
pytest.raises(Exception, f)
|
|
|
|
# valid
|
|
result = store.select_column('df', 'index')
|
|
tm.assert_almost_equal(result.values, Series(df.index).values)
|
|
assert isinstance(result, Series)
|
|
|
|
# not a data indexable column
|
|
pytest.raises(
|
|
ValueError, store.select_column, 'df', 'values_block_0')
|
|
|
|
# a data column
|
|
df2 = df.copy()
|
|
df2['string'] = 'foo'
|
|
store.append('df2', df2, data_columns=['string'])
|
|
result = store.select_column('df2', 'string')
|
|
tm.assert_almost_equal(result.values, df2['string'].values)
|
|
|
|
# a data column with NaNs, result excludes the NaNs
|
|
df3 = df.copy()
|
|
df3['string'] = 'foo'
|
|
df3.loc[4:6, 'string'] = np.nan
|
|
store.append('df3', df3, data_columns=['string'])
|
|
result = store.select_column('df3', 'string')
|
|
tm.assert_almost_equal(result.values, df3['string'].values)
|
|
|
|
# start/stop
|
|
result = store.select_column('df3', 'string', start=2)
|
|
tm.assert_almost_equal(result.values, df3['string'].values[2:])
|
|
|
|
result = store.select_column('df3', 'string', start=-2)
|
|
tm.assert_almost_equal(result.values, df3['string'].values[-2:])
|
|
|
|
result = store.select_column('df3', 'string', stop=2)
|
|
tm.assert_almost_equal(result.values, df3['string'].values[:2])
|
|
|
|
result = store.select_column('df3', 'string', stop=-2)
|
|
tm.assert_almost_equal(result.values, df3['string'].values[:-2])
|
|
|
|
result = store.select_column('df3', 'string', start=2, stop=-2)
|
|
tm.assert_almost_equal(result.values, df3['string'].values[2:-2])
|
|
|
|
result = store.select_column('df3', 'string', start=-2, stop=2)
|
|
tm.assert_almost_equal(result.values, df3['string'].values[-2:2])
|
|
|
|
# GH 10392 - make sure column name is preserved
|
|
df4 = DataFrame({'A': np.random.randn(10), 'B': 'foo'})
|
|
store.append('df4', df4, data_columns=True)
|
|
expected = df4['B']
|
|
result = store.select_column('df4', 'B')
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_coordinates(self):
|
|
df = tm.makeTimeDataFrame()
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df)
|
|
|
|
# all
|
|
c = store.select_as_coordinates('df')
|
|
assert((c.values == np.arange(len(df.index))).all())
|
|
|
|
# get coordinates back & test vs frame
|
|
_maybe_remove(store, 'df')
|
|
|
|
df = DataFrame(dict(A=lrange(5), B=lrange(5)))
|
|
store.append('df', df)
|
|
c = store.select_as_coordinates('df', ['index<3'])
|
|
assert((c.values == np.arange(3)).all())
|
|
result = store.select('df', where=c)
|
|
expected = df.loc[0:2, :]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
c = store.select_as_coordinates('df', ['index>=3', 'index<=4'])
|
|
assert((c.values == np.arange(2) + 3).all())
|
|
result = store.select('df', where=c)
|
|
expected = df.loc[3:4, :]
|
|
tm.assert_frame_equal(result, expected)
|
|
assert isinstance(c, Index)
|
|
|
|
# multiple tables
|
|
_maybe_remove(store, 'df1')
|
|
_maybe_remove(store, 'df2')
|
|
df1 = tm.makeTimeDataFrame()
|
|
df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
|
|
store.append('df1', df1, data_columns=['A', 'B'])
|
|
store.append('df2', df2)
|
|
|
|
c = store.select_as_coordinates('df1', ['A>0', 'B>0'])
|
|
df1_result = store.select('df1', c)
|
|
df2_result = store.select('df2', c)
|
|
result = concat([df1_result, df2_result], axis=1)
|
|
|
|
expected = concat([df1, df2], axis=1)
|
|
expected = expected[(expected.A > 0) & (expected.B > 0)]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# pass array/mask as the coordinates
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df = DataFrame(np.random.randn(1000, 2),
|
|
index=date_range('20000101', periods=1000))
|
|
store.append('df', df)
|
|
c = store.select_column('df', 'index')
|
|
where = c[DatetimeIndex(c).month == 5].index
|
|
expected = df.iloc[where]
|
|
|
|
# locations
|
|
result = store.select('df', where=where)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# boolean
|
|
result = store.select('df', where=where)
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# invalid
|
|
pytest.raises(ValueError, store.select, 'df',
|
|
where=np.arange(len(df), dtype='float64'))
|
|
pytest.raises(ValueError, store.select, 'df',
|
|
where=np.arange(len(df) + 1))
|
|
pytest.raises(ValueError, store.select, 'df',
|
|
where=np.arange(len(df)), start=5)
|
|
pytest.raises(ValueError, store.select, 'df',
|
|
where=np.arange(len(df)), start=5, stop=10)
|
|
|
|
# selection with filter
|
|
selection = date_range('20000101', periods=500)
|
|
result = store.select('df', where='index in selection')
|
|
expected = df[df.index.isin(selection)]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# list
|
|
df = DataFrame(np.random.randn(10, 2))
|
|
store.append('df2', df)
|
|
result = store.select('df2', where=[0, 3, 5])
|
|
expected = df.iloc[[0, 3, 5]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# boolean
|
|
where = [True] * 10
|
|
where[-2] = False
|
|
result = store.select('df2', where=where)
|
|
expected = df.loc[where]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# start/stop
|
|
result = store.select('df2', start=5, stop=10)
|
|
expected = df[5:10]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_append_to_multiple(self):
|
|
df1 = tm.makeTimeDataFrame()
|
|
df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
|
|
df2['foo'] = 'bar'
|
|
df = concat([df1, df2], axis=1)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# exceptions
|
|
pytest.raises(ValueError, store.append_to_multiple,
|
|
{'df1': ['A', 'B'], 'df2': None}, df,
|
|
selector='df3')
|
|
pytest.raises(ValueError, store.append_to_multiple,
|
|
{'df1': None, 'df2': None}, df, selector='df3')
|
|
pytest.raises(
|
|
ValueError, store.append_to_multiple, 'df1', df, 'df1')
|
|
|
|
# regular operation
|
|
store.append_to_multiple(
|
|
{'df1': ['A', 'B'], 'df2': None}, df, selector='df1')
|
|
result = store.select_as_multiple(
|
|
['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
|
|
expected = df[(df.A > 0) & (df.B > 0)]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_append_to_multiple_dropna(self):
|
|
df1 = tm.makeTimeDataFrame()
|
|
df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
|
|
df1.iloc[1, df1.columns.get_indexer(['A', 'B'])] = np.nan
|
|
df = concat([df1, df2], axis=1)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# dropna=True should guarantee rows are synchronized
|
|
store.append_to_multiple(
|
|
{'df1': ['A', 'B'], 'df2': None}, df, selector='df1',
|
|
dropna=True)
|
|
result = store.select_as_multiple(['df1', 'df2'])
|
|
expected = df.dropna()
|
|
tm.assert_frame_equal(result, expected)
|
|
tm.assert_index_equal(store.select('df1').index,
|
|
store.select('df2').index)
|
|
|
|
@pytest.mark.xfail(run=False,
|
|
reason="append_to_multiple_dropna_false "
|
|
"is not raising as failed")
|
|
def test_append_to_multiple_dropna_false(self):
|
|
df1 = tm.makeTimeDataFrame()
|
|
df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
|
|
df1.iloc[1, df1.columns.get_indexer(['A', 'B'])] = np.nan
|
|
df = concat([df1, df2], axis=1)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# dropna=False shouldn't synchronize row indexes
|
|
store.append_to_multiple(
|
|
{'df1a': ['A', 'B'], 'df2a': None}, df, selector='df1a',
|
|
dropna=False)
|
|
|
|
with pytest.raises(ValueError):
|
|
store.select_as_multiple(['df1a', 'df2a'])
|
|
|
|
assert not store.select('df1a').index.equals(
|
|
store.select('df2a').index)
|
|
|
|
def test_select_as_multiple(self):
|
|
|
|
df1 = tm.makeTimeDataFrame()
|
|
df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
|
|
df2['foo'] = 'bar'
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# no tables stored
|
|
pytest.raises(Exception, store.select_as_multiple,
|
|
None, where=['A>0', 'B>0'], selector='df1')
|
|
|
|
store.append('df1', df1, data_columns=['A', 'B'])
|
|
store.append('df2', df2)
|
|
|
|
# exceptions
|
|
pytest.raises(Exception, store.select_as_multiple,
|
|
None, where=['A>0', 'B>0'], selector='df1')
|
|
pytest.raises(Exception, store.select_as_multiple,
|
|
[None], where=['A>0', 'B>0'], selector='df1')
|
|
pytest.raises(KeyError, store.select_as_multiple,
|
|
['df1', 'df3'], where=['A>0', 'B>0'],
|
|
selector='df1')
|
|
pytest.raises(KeyError, store.select_as_multiple,
|
|
['df3'], where=['A>0', 'B>0'], selector='df1')
|
|
pytest.raises(KeyError, store.select_as_multiple,
|
|
['df1', 'df2'], where=['A>0', 'B>0'],
|
|
selector='df4')
|
|
|
|
# default select
|
|
result = store.select('df1', ['A>0', 'B>0'])
|
|
expected = store.select_as_multiple(
|
|
['df1'], where=['A>0', 'B>0'], selector='df1')
|
|
tm.assert_frame_equal(result, expected)
|
|
expected = store.select_as_multiple(
|
|
'df1', where=['A>0', 'B>0'], selector='df1')
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# multiple
|
|
result = store.select_as_multiple(
|
|
['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
|
|
expected = concat([df1, df2], axis=1)
|
|
expected = expected[(expected.A > 0) & (expected.B > 0)]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# multiple (diff selector)
|
|
result = store.select_as_multiple(
|
|
['df1', 'df2'], where='index>df2.index[4]', selector='df2')
|
|
expected = concat([df1, df2], axis=1)
|
|
expected = expected[5:]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# test excpection for diff rows
|
|
store.append('df3', tm.makeTimeDataFrame(nper=50))
|
|
pytest.raises(ValueError, store.select_as_multiple,
|
|
['df1', 'df3'], where=['A>0', 'B>0'],
|
|
selector='df1')
|
|
|
|
@pytest.mark.skipif(
|
|
LooseVersion(tables.__version__) < LooseVersion('3.1.0'),
|
|
reason=("tables version does not support fix for nan selection "
|
|
"bug: GH 4858"))
|
|
def test_nan_selection_bug_4858(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df = DataFrame(dict(cols=range(6), values=range(6)),
|
|
dtype='float64')
|
|
df['cols'] = (df['cols'] + 10).apply(str)
|
|
df.iloc[0] = np.nan
|
|
|
|
expected = DataFrame(dict(cols=['13.0', '14.0', '15.0'], values=[
|
|
3., 4., 5.]), index=[3, 4, 5])
|
|
|
|
# write w/o the index on that particular column
|
|
store.append('df', df, data_columns=True, index=['cols'])
|
|
result = store.select('df', where='values>2.0')
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_start_stop_table(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# table
|
|
df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
|
|
store.append('df', df)
|
|
|
|
result = store.select(
|
|
'df', "columns=['A']", start=0, stop=5)
|
|
expected = df.loc[0:4, ['A']]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# out of range
|
|
result = store.select(
|
|
'df', "columns=['A']", start=30, stop=40)
|
|
assert len(result) == 0
|
|
expected = df.loc[30:40, ['A']]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_start_stop_multiple(self):
|
|
|
|
# GH 16209
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
df = DataFrame({"foo": [1, 2], "bar": [1, 2]})
|
|
|
|
store.append_to_multiple({'selector': ['foo'], 'data': None}, df,
|
|
selector='selector')
|
|
result = store.select_as_multiple(['selector', 'data'],
|
|
selector='selector', start=0,
|
|
stop=1)
|
|
expected = df.loc[[0], ['foo', 'bar']]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_start_stop_fixed(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# fixed, GH 8287
|
|
df = DataFrame(dict(A=np.random.rand(20),
|
|
B=np.random.rand(20)),
|
|
index=pd.date_range('20130101', periods=20))
|
|
store.put('df', df)
|
|
|
|
result = store.select(
|
|
'df', start=0, stop=5)
|
|
expected = df.iloc[0:5, :]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
result = store.select(
|
|
'df', start=5, stop=10)
|
|
expected = df.iloc[5:10, :]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# out of range
|
|
result = store.select(
|
|
'df', start=30, stop=40)
|
|
expected = df.iloc[30:40, :]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# series
|
|
s = df.A
|
|
store.put('s', s)
|
|
result = store.select('s', start=0, stop=5)
|
|
expected = s.iloc[0:5]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = store.select('s', start=5, stop=10)
|
|
expected = s.iloc[5:10]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# sparse; not implemented
|
|
df = tm.makeDataFrame()
|
|
df.iloc[3:5, 1:3] = np.nan
|
|
df.iloc[8:10, -2] = np.nan
|
|
dfs = df.to_sparse()
|
|
store.put('dfs', dfs)
|
|
with pytest.raises(NotImplementedError):
|
|
store.select('dfs', start=0, stop=5)
|
|
|
|
def test_select_filter_corner(self):
|
|
|
|
df = DataFrame(np.random.randn(50, 100))
|
|
df.index = ['%.3d' % c for c in df.index]
|
|
df.columns = ['%.3d' % c for c in df.columns]
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('frame', df, format='table')
|
|
|
|
crit = 'columns=df.columns[:75]'
|
|
result = store.select('frame', [crit])
|
|
tm.assert_frame_equal(result, df.loc[:, df.columns[:75]])
|
|
|
|
crit = 'columns=df.columns[:75:2]'
|
|
result = store.select('frame', [crit])
|
|
tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])
|
|
|
|
def test_path_pathlib(self):
|
|
df = tm.makeDataFrame()
|
|
|
|
result = tm.round_trip_pathlib(
|
|
lambda p: df.to_hdf(p, 'df'),
|
|
lambda p: pd.read_hdf(p, 'df'))
|
|
tm.assert_frame_equal(df, result)
|
|
|
|
@pytest.mark.parametrize('start, stop', [(0, 2), (1, 2), (None, None)])
|
|
def test_contiguous_mixed_data_table(self, start, stop):
|
|
# GH 17021
|
|
# ValueError when reading a contiguous mixed-data table ft. VLArray
|
|
df = DataFrame({'a': Series([20111010, 20111011, 20111012]),
|
|
'b': Series(['ab', 'cd', 'ab'])})
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('test_dataset', df)
|
|
|
|
result = store.select('test_dataset', start=start, stop=stop)
|
|
assert_frame_equal(df[start:stop], result)
|
|
|
|
def test_path_pathlib_hdfstore(self):
|
|
df = tm.makeDataFrame()
|
|
|
|
def writer(path):
|
|
with pd.HDFStore(path) as store:
|
|
df.to_hdf(store, 'df')
|
|
|
|
def reader(path):
|
|
with pd.HDFStore(path) as store:
|
|
return pd.read_hdf(store, 'df')
|
|
|
|
result = tm.round_trip_pathlib(writer, reader)
|
|
tm.assert_frame_equal(df, result)
|
|
|
|
def test_pickle_path_localpath(self):
|
|
df = tm.makeDataFrame()
|
|
result = tm.round_trip_pathlib(
|
|
lambda p: df.to_hdf(p, 'df'),
|
|
lambda p: pd.read_hdf(p, 'df'))
|
|
tm.assert_frame_equal(df, result)
|
|
|
|
def test_path_localpath_hdfstore(self):
|
|
df = tm.makeDataFrame()
|
|
|
|
def writer(path):
|
|
with pd.HDFStore(path) as store:
|
|
df.to_hdf(store, 'df')
|
|
|
|
def reader(path):
|
|
with pd.HDFStore(path) as store:
|
|
return pd.read_hdf(store, 'df')
|
|
|
|
result = tm.round_trip_localpath(writer, reader)
|
|
tm.assert_frame_equal(df, result)
|
|
|
|
def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):
|
|
|
|
options = {}
|
|
if compression:
|
|
options['complib'] = _default_compressor
|
|
|
|
with ensure_clean_store(self.path, 'w', **options) as store:
|
|
store['obj'] = obj
|
|
retrieved = store['obj']
|
|
comparator(retrieved, obj, **kwargs)
|
|
|
|
def _check_double_roundtrip(self, obj, comparator, compression=False,
|
|
**kwargs):
|
|
options = {}
|
|
if compression:
|
|
options['complib'] = compression or _default_compressor
|
|
|
|
with ensure_clean_store(self.path, 'w', **options) as store:
|
|
store['obj'] = obj
|
|
retrieved = store['obj']
|
|
comparator(retrieved, obj, **kwargs)
|
|
store['obj'] = retrieved
|
|
again = store['obj']
|
|
comparator(again, obj, **kwargs)
|
|
|
|
def _check_roundtrip_table(self, obj, comparator, compression=False):
|
|
options = {}
|
|
if compression:
|
|
options['complib'] = _default_compressor
|
|
|
|
with ensure_clean_store(self.path, 'w', **options) as store:
|
|
store.put('obj', obj, format='table')
|
|
retrieved = store['obj']
|
|
|
|
comparator(retrieved, obj)
|
|
|
|
def test_multiple_open_close(self):
|
|
# gh-4409: open & close multiple times
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
df = tm.makeDataFrame()
|
|
df.to_hdf(path, 'df', mode='w', format='table')
|
|
|
|
# single
|
|
store = HDFStore(path)
|
|
assert 'CLOSED' not in store.info()
|
|
assert store.is_open
|
|
|
|
store.close()
|
|
assert 'CLOSED' in store.info()
|
|
assert not store.is_open
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
if pytables._table_file_open_policy_is_strict:
|
|
|
|
# multiples
|
|
store1 = HDFStore(path)
|
|
|
|
def f():
|
|
HDFStore(path)
|
|
pytest.raises(ValueError, f)
|
|
store1.close()
|
|
|
|
else:
|
|
|
|
# multiples
|
|
store1 = HDFStore(path)
|
|
store2 = HDFStore(path)
|
|
|
|
assert 'CLOSED' not in store1.info()
|
|
assert 'CLOSED' not in store2.info()
|
|
assert store1.is_open
|
|
assert store2.is_open
|
|
|
|
store1.close()
|
|
assert 'CLOSED' in store1.info()
|
|
assert not store1.is_open
|
|
assert 'CLOSED' not in store2.info()
|
|
assert store2.is_open
|
|
|
|
store2.close()
|
|
assert 'CLOSED' in store1.info()
|
|
assert 'CLOSED' in store2.info()
|
|
assert not store1.is_open
|
|
assert not store2.is_open
|
|
|
|
# nested close
|
|
store = HDFStore(path, mode='w')
|
|
store.append('df', df)
|
|
|
|
store2 = HDFStore(path)
|
|
store2.append('df2', df)
|
|
store2.close()
|
|
assert 'CLOSED' in store2.info()
|
|
assert not store2.is_open
|
|
|
|
store.close()
|
|
assert 'CLOSED' in store.info()
|
|
assert not store.is_open
|
|
|
|
# double closing
|
|
store = HDFStore(path, mode='w')
|
|
store.append('df', df)
|
|
|
|
store2 = HDFStore(path)
|
|
store.close()
|
|
assert 'CLOSED' in store.info()
|
|
assert not store.is_open
|
|
|
|
store2.close()
|
|
assert 'CLOSED' in store2.info()
|
|
assert not store2.is_open
|
|
|
|
# ops on a closed store
|
|
with ensure_clean_path(self.path) as path:
|
|
|
|
df = tm.makeDataFrame()
|
|
df.to_hdf(path, 'df', mode='w', format='table')
|
|
|
|
store = HDFStore(path)
|
|
store.close()
|
|
|
|
pytest.raises(ClosedFileError, store.keys)
|
|
pytest.raises(ClosedFileError, lambda: 'df' in store)
|
|
pytest.raises(ClosedFileError, lambda: len(store))
|
|
pytest.raises(ClosedFileError, lambda: store['df'])
|
|
pytest.raises(AttributeError, lambda: store.df)
|
|
pytest.raises(ClosedFileError, store.select, 'df')
|
|
pytest.raises(ClosedFileError, store.get, 'df')
|
|
pytest.raises(ClosedFileError, store.append, 'df2', df)
|
|
pytest.raises(ClosedFileError, store.put, 'df3', df)
|
|
pytest.raises(ClosedFileError, store.get_storer, 'df2')
|
|
pytest.raises(ClosedFileError, store.remove, 'df2')
|
|
|
|
def f():
|
|
store.select('df')
|
|
tm.assert_raises_regex(ClosedFileError, 'file is not open', f)
|
|
|
|
def test_pytables_native_read(self, datapath):
|
|
with ensure_clean_store(
|
|
datapath('io', 'data', 'legacy_hdf/pytables_native.h5'),
|
|
mode='r') as store:
|
|
d2 = store['detector/readout']
|
|
assert isinstance(d2, DataFrame)
|
|
|
|
@pytest.mark.skipif(PY35 and is_platform_windows(),
|
|
reason="native2 read fails oddly on windows / 3.5")
|
|
def test_pytables_native2_read(self, datapath):
|
|
with ensure_clean_store(
|
|
datapath('io', 'data', 'legacy_hdf', 'pytables_native2.h5'),
|
|
mode='r') as store:
|
|
str(store)
|
|
d1 = store['detector']
|
|
assert isinstance(d1, DataFrame)
|
|
|
|
def test_legacy_table_read(self, datapath):
|
|
# legacy table types
|
|
with ensure_clean_store(
|
|
datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'),
|
|
mode='r') as store:
|
|
|
|
with catch_warnings(record=True):
|
|
store.select('df1')
|
|
store.select('df2')
|
|
store.select('wp1')
|
|
|
|
# force the frame
|
|
store.select('df2', typ='legacy_frame')
|
|
|
|
# old version warning
|
|
pytest.raises(
|
|
Exception, store.select, 'wp1', 'minor_axis=B')
|
|
|
|
df2 = store.select('df2')
|
|
result = store.select('df2', 'index>df2.index[2]')
|
|
expected = df2[df2.index > df2.index[2]]
|
|
assert_frame_equal(expected, result)
|
|
|
|
def test_copy(self):
|
|
|
|
with catch_warnings(record=True):
|
|
|
|
def do_copy(f, new_f=None, keys=None,
|
|
propindexes=True, **kwargs):
|
|
try:
|
|
store = HDFStore(f, 'r')
|
|
|
|
if new_f is None:
|
|
import tempfile
|
|
fd, new_f = tempfile.mkstemp()
|
|
|
|
tstore = store.copy(
|
|
new_f, keys=keys, propindexes=propindexes, **kwargs)
|
|
|
|
# check keys
|
|
if keys is None:
|
|
keys = store.keys()
|
|
assert set(keys) == set(tstore.keys())
|
|
|
|
# check indicies & nrows
|
|
for k in tstore.keys():
|
|
if tstore.get_storer(k).is_table:
|
|
new_t = tstore.get_storer(k)
|
|
orig_t = store.get_storer(k)
|
|
|
|
assert orig_t.nrows == new_t.nrows
|
|
|
|
# check propindixes
|
|
if propindexes:
|
|
for a in orig_t.axes:
|
|
if a.is_indexed:
|
|
assert new_t[a.name].is_indexed
|
|
|
|
finally:
|
|
safe_close(store)
|
|
safe_close(tstore)
|
|
try:
|
|
os.close(fd)
|
|
except:
|
|
pass
|
|
safe_remove(new_f)
|
|
|
|
# new table
|
|
df = tm.makeDataFrame()
|
|
|
|
try:
|
|
path = create_tempfile(self.path)
|
|
st = HDFStore(path)
|
|
st.append('df', df, data_columns=['A'])
|
|
st.close()
|
|
do_copy(f=path)
|
|
do_copy(f=path, propindexes=False)
|
|
finally:
|
|
safe_remove(path)
|
|
|
|
def test_store_datetime_fractional_secs(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
|
|
series = Series([0], [dt])
|
|
store['a'] = series
|
|
assert store['a'].index[0] == dt
|
|
|
|
def test_tseries_indices_series(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
idx = tm.makeDateIndex(10)
|
|
ser = Series(np.random.randn(len(idx)), idx)
|
|
store['a'] = ser
|
|
result = store['a']
|
|
|
|
tm.assert_series_equal(result, ser)
|
|
assert result.index.freq == ser.index.freq
|
|
tm.assert_class_equal(result.index, ser.index, obj="series index")
|
|
|
|
idx = tm.makePeriodIndex(10)
|
|
ser = Series(np.random.randn(len(idx)), idx)
|
|
store['a'] = ser
|
|
result = store['a']
|
|
|
|
tm.assert_series_equal(result, ser)
|
|
assert result.index.freq == ser.index.freq
|
|
tm.assert_class_equal(result.index, ser.index, obj="series index")
|
|
|
|
def test_tseries_indices_frame(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
idx = tm.makeDateIndex(10)
|
|
df = DataFrame(np.random.randn(len(idx), 3), index=idx)
|
|
store['a'] = df
|
|
result = store['a']
|
|
|
|
assert_frame_equal(result, df)
|
|
assert result.index.freq == df.index.freq
|
|
tm.assert_class_equal(result.index, df.index,
|
|
obj="dataframe index")
|
|
|
|
idx = tm.makePeriodIndex(10)
|
|
df = DataFrame(np.random.randn(len(idx), 3), idx)
|
|
store['a'] = df
|
|
result = store['a']
|
|
|
|
assert_frame_equal(result, df)
|
|
assert result.index.freq == df.index.freq
|
|
tm.assert_class_equal(result.index, df.index,
|
|
obj="dataframe index")
|
|
|
|
def test_unicode_index(self):
|
|
|
|
unicode_values = [u('\u03c3'), u('\u03c3\u03c3')]
|
|
|
|
# PerformanceWarning
|
|
with catch_warnings(record=True):
|
|
s = Series(np.random.randn(len(unicode_values)), unicode_values)
|
|
self._check_roundtrip(s, tm.assert_series_equal)
|
|
|
|
def test_unicode_longer_encoded(self):
|
|
# GH 11234
|
|
char = '\u0394'
|
|
df = pd.DataFrame({'A': [char]})
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('df', df, format='table', encoding='utf-8')
|
|
result = store.get('df')
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
df = pd.DataFrame({'A': ['a', char], 'B': ['b', 'b']})
|
|
with ensure_clean_store(self.path) as store:
|
|
store.put('df', df, format='table', encoding='utf-8')
|
|
result = store.get('df')
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
def test_store_datetime_mixed(self):
|
|
|
|
df = DataFrame(
|
|
{'a': [1, 2, 3], 'b': [1., 2., 3.], 'c': ['a', 'b', 'c']})
|
|
ts = tm.makeTimeSeries()
|
|
df['d'] = ts.index[:3]
|
|
self._check_roundtrip(df, tm.assert_frame_equal)
|
|
|
|
# def test_cant_write_multiindex_table(self):
|
|
# # for now, #1848
|
|
# df = DataFrame(np.random.randn(10, 4),
|
|
# index=[np.arange(5).repeat(2),
|
|
# np.tile(np.arange(2), 5)])
|
|
|
|
# pytest.raises(Exception, store.put, 'foo', df, format='table')
|
|
|
|
def test_append_with_diff_col_name_types_raises_value_error(self):
|
|
df = DataFrame(np.random.randn(10, 1))
|
|
df2 = DataFrame({'a': np.random.randn(10)})
|
|
df3 = DataFrame({(1, 2): np.random.randn(10)})
|
|
df4 = DataFrame({('1', 2): np.random.randn(10)})
|
|
df5 = DataFrame({('1', 2, object): np.random.randn(10)})
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
name = 'df_%s' % tm.rands(10)
|
|
store.append(name, df)
|
|
|
|
for d in (df2, df3, df4, df5):
|
|
with pytest.raises(ValueError):
|
|
store.append(name, d)
|
|
|
|
def test_query_with_nested_special_character(self):
|
|
df = DataFrame({'a': ['a', 'a', 'c', 'b',
|
|
'test & test', 'c', 'b', 'e'],
|
|
'b': [1, 2, 3, 4, 5, 6, 7, 8]})
|
|
expected = df[df.a == 'test & test']
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('test', df, format='table', data_columns=True)
|
|
result = store.select('test', 'a = "test & test"')
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
def test_categorical(self):
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# Basic
|
|
_maybe_remove(store, 's')
|
|
s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
|
|
'a', 'b', 'c', 'd'], ordered=False))
|
|
store.append('s', s, format='table')
|
|
result = store.select('s')
|
|
tm.assert_series_equal(s, result)
|
|
|
|
_maybe_remove(store, 's_ordered')
|
|
s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
|
|
'a', 'b', 'c', 'd'], ordered=True))
|
|
store.append('s_ordered', s, format='table')
|
|
result = store.select('s_ordered')
|
|
tm.assert_series_equal(s, result)
|
|
|
|
_maybe_remove(store, 'df')
|
|
|
|
df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
|
|
store.append('df', df, format='table')
|
|
result = store.select('df')
|
|
tm.assert_frame_equal(result, df)
|
|
|
|
# Dtypes
|
|
s = Series([1, 1, 2, 2, 3, 4, 5]).astype('category')
|
|
store.append('si', s)
|
|
result = store.select('si')
|
|
tm.assert_series_equal(result, s)
|
|
|
|
s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype('category')
|
|
store.append('si2', s)
|
|
result = store.select('si2')
|
|
tm.assert_series_equal(result, s)
|
|
|
|
# Multiple
|
|
df2 = df.copy()
|
|
df2['s2'] = Series(list('abcdefg')).astype('category')
|
|
store.append('df2', df2)
|
|
result = store.select('df2')
|
|
tm.assert_frame_equal(result, df2)
|
|
|
|
# Make sure the metadata is OK
|
|
info = store.info()
|
|
assert '/df2 ' in info
|
|
# assert '/df2/meta/values_block_0/meta' in info
|
|
assert '/df2/meta/values_block_1/meta' in info
|
|
|
|
# unordered
|
|
s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
|
|
'a', 'b', 'c', 'd'], ordered=False))
|
|
store.append('s2', s, format='table')
|
|
result = store.select('s2')
|
|
tm.assert_series_equal(result, s)
|
|
|
|
# Query
|
|
store.append('df3', df, data_columns=['s'])
|
|
expected = df[df.s.isin(['b', 'c'])]
|
|
result = store.select('df3', where=['s in ["b","c"]'])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = df[df.s.isin(['b', 'c'])]
|
|
result = store.select('df3', where=['s = ["b","c"]'])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = df[df.s.isin(['d'])]
|
|
result = store.select('df3', where=['s in ["d"]'])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
expected = df[df.s.isin(['f'])]
|
|
result = store.select('df3', where=['s in ["f"]'])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# Appending with same categories is ok
|
|
store.append('df3', df)
|
|
|
|
df = concat([df, df])
|
|
expected = df[df.s.isin(['b', 'c'])]
|
|
result = store.select('df3', where=['s in ["b","c"]'])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# Appending must have the same categories
|
|
df3 = df.copy()
|
|
df3['s'].cat.remove_unused_categories(inplace=True)
|
|
|
|
with pytest.raises(ValueError):
|
|
store.append('df3', df3)
|
|
|
|
# Remove, and make sure meta data is removed (its a recursive
|
|
# removal so should be).
|
|
result = store.select('df3/meta/s/meta')
|
|
assert result is not None
|
|
store.remove('df3')
|
|
|
|
with pytest.raises(KeyError):
|
|
store.select('df3/meta/s/meta')
|
|
|
|
def test_categorical_conversion(self):
|
|
|
|
# GH13322
|
|
# Check that read_hdf with categorical columns doesn't return rows if
|
|
# where criteria isn't met.
|
|
obsids = ['ESP_012345_6789', 'ESP_987654_3210']
|
|
imgids = ['APF00006np', 'APF0001imm']
|
|
data = [4.3, 9.8]
|
|
|
|
# Test without categories
|
|
df = DataFrame(dict(obsids=obsids, imgids=imgids, data=data))
|
|
|
|
# We are expecting an empty DataFrame matching types of df
|
|
expected = df.iloc[[], :]
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', format='table', data_columns=True)
|
|
result = read_hdf(path, 'df', where='obsids=B')
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# Test with categories
|
|
df.obsids = df.obsids.astype('category')
|
|
df.imgids = df.imgids.astype('category')
|
|
|
|
# We are expecting an empty DataFrame matching types of df
|
|
expected = df.iloc[[], :]
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', format='table', data_columns=True)
|
|
result = read_hdf(path, 'df', where='obsids=B')
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_categorical_nan_only_columns(self):
|
|
# GH18413
|
|
# Check that read_hdf with categorical columns with NaN-only values can
|
|
# be read back.
|
|
df = pd.DataFrame({
|
|
'a': ['a', 'b', 'c', np.nan],
|
|
'b': [np.nan, np.nan, np.nan, np.nan],
|
|
'c': [1, 2, 3, 4],
|
|
'd': pd.Series([None] * 4, dtype=object)
|
|
})
|
|
df['a'] = df.a.astype('category')
|
|
df['b'] = df.b.astype('category')
|
|
df['d'] = df.b.astype('category')
|
|
expected = df
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', format='table', data_columns=True)
|
|
result = read_hdf(path, 'df')
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_duplicate_column_name(self):
|
|
df = DataFrame(columns=["a", "a"], data=[[0, 0]])
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
pytest.raises(ValueError, df.to_hdf,
|
|
path, 'df', format='fixed')
|
|
|
|
df.to_hdf(path, 'df', format='table')
|
|
other = read_hdf(path, 'df')
|
|
|
|
tm.assert_frame_equal(df, other)
|
|
assert df.equals(other)
|
|
assert other.equals(df)
|
|
|
|
def test_round_trip_equals(self):
|
|
# GH 9330
|
|
df = DataFrame({"B": [1, 2], "A": ["x", "y"]})
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', format='table')
|
|
other = read_hdf(path, 'df')
|
|
tm.assert_frame_equal(df, other)
|
|
assert df.equals(other)
|
|
assert other.equals(df)
|
|
|
|
def test_preserve_timedeltaindex_type(self):
|
|
# GH9635
|
|
# Storing TimedeltaIndexed DataFrames in fixed stores did not preserve
|
|
# the type of the index.
|
|
df = DataFrame(np.random.normal(size=(10, 5)))
|
|
df.index = timedelta_range(
|
|
start='0s', periods=10, freq='1s', name='example')
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
store['df'] = df
|
|
assert_frame_equal(store['df'], df)
|
|
|
|
def test_columns_multiindex_modified(self):
|
|
# BUG: 7212
|
|
# read_hdf store.select modified the passed columns parameters
|
|
# when multi-indexed.
|
|
|
|
df = DataFrame(np.random.rand(4, 5),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
df.index.name = 'letters'
|
|
df = df.set_index(keys='E', append=True)
|
|
|
|
data_columns = df.index.names + df.columns.tolist()
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df',
|
|
mode='a',
|
|
append=True,
|
|
data_columns=data_columns,
|
|
index=False)
|
|
cols2load = list('BCD')
|
|
cols2load_original = list(cols2load)
|
|
df_loaded = read_hdf(path, 'df', columns=cols2load) # noqa
|
|
assert cols2load_original == cols2load
|
|
|
|
def test_to_hdf_with_object_column_names(self):
|
|
# GH9057
|
|
# Writing HDF5 table format should only work for string-like
|
|
# column types
|
|
|
|
types_should_fail = [tm.makeIntIndex, tm.makeFloatIndex,
|
|
tm.makeDateIndex, tm.makeTimedeltaIndex,
|
|
tm.makePeriodIndex]
|
|
types_should_run = [tm.makeStringIndex, tm.makeCategoricalIndex]
|
|
|
|
if compat.PY3:
|
|
types_should_run.append(tm.makeUnicodeIndex)
|
|
else:
|
|
# TODO: Add back to types_should_fail
|
|
# https://github.com/pandas-dev/pandas/issues/20907
|
|
pass
|
|
|
|
for index in types_should_fail:
|
|
df = DataFrame(np.random.randn(10, 2), columns=index(2))
|
|
with ensure_clean_path(self.path) as path:
|
|
with catch_warnings(record=True):
|
|
with tm.assert_raises_regex(
|
|
ValueError, ("cannot have non-object label "
|
|
"DataIndexableCol")):
|
|
df.to_hdf(path, 'df', format='table',
|
|
data_columns=True)
|
|
|
|
for index in types_should_run:
|
|
df = DataFrame(np.random.randn(10, 2), columns=index(2))
|
|
with ensure_clean_path(self.path) as path:
|
|
with catch_warnings(record=True):
|
|
df.to_hdf(path, 'df', format='table', data_columns=True)
|
|
result = pd.read_hdf(
|
|
path, 'df', where="index = [{0}]".format(df.index[0]))
|
|
assert(len(result))
|
|
|
|
def test_read_hdf_open_store(self):
|
|
# GH10330
|
|
# No check for non-string path_or-buf, and no test of open store
|
|
df = DataFrame(np.random.rand(4, 5),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
df.index.name = 'letters'
|
|
df = df.set_index(keys='E', append=True)
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', mode='w')
|
|
direct = read_hdf(path, 'df')
|
|
store = HDFStore(path, mode='r')
|
|
indirect = read_hdf(store, 'df')
|
|
tm.assert_frame_equal(direct, indirect)
|
|
assert store.is_open
|
|
store.close()
|
|
|
|
def test_read_hdf_iterator(self):
|
|
df = DataFrame(np.random.rand(4, 5),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
df.index.name = 'letters'
|
|
df = df.set_index(keys='E', append=True)
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', mode='w', format='t')
|
|
direct = read_hdf(path, 'df')
|
|
iterator = read_hdf(path, 'df', iterator=True)
|
|
assert isinstance(iterator, TableIterator)
|
|
indirect = next(iterator.__iter__())
|
|
tm.assert_frame_equal(direct, indirect)
|
|
iterator.store.close()
|
|
|
|
def test_read_hdf_errors(self):
|
|
df = DataFrame(np.random.rand(4, 5),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
pytest.raises(IOError, read_hdf, path, 'key')
|
|
df.to_hdf(path, 'df')
|
|
store = HDFStore(path, mode='r')
|
|
store.close()
|
|
pytest.raises(IOError, read_hdf, store, 'df')
|
|
|
|
def test_read_hdf_generic_buffer_errors(self):
|
|
pytest.raises(NotImplementedError, read_hdf, BytesIO(b''), 'df')
|
|
|
|
def test_invalid_complib(self):
|
|
df = DataFrame(np.random.rand(4, 5),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
with ensure_clean_path(self.path) as path:
|
|
with pytest.raises(ValueError):
|
|
df.to_hdf(path, 'df', complib='foolib')
|
|
# GH10443
|
|
|
|
def test_read_nokey(self):
|
|
df = DataFrame(np.random.rand(4, 5),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
|
|
# Categorical dtype not supported for "fixed" format. So no need
|
|
# to test with that dtype in the dataframe here.
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', mode='a')
|
|
reread = read_hdf(path)
|
|
assert_frame_equal(df, reread)
|
|
df.to_hdf(path, 'df2', mode='a')
|
|
pytest.raises(ValueError, read_hdf, path)
|
|
|
|
def test_read_nokey_table(self):
|
|
# GH13231
|
|
df = DataFrame({'i': range(5),
|
|
'c': Series(list('abacd'), dtype='category')})
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', mode='a', format='table')
|
|
reread = read_hdf(path)
|
|
assert_frame_equal(df, reread)
|
|
df.to_hdf(path, 'df2', mode='a', format='table')
|
|
pytest.raises(ValueError, read_hdf, path)
|
|
|
|
def test_read_nokey_empty(self):
|
|
with ensure_clean_path(self.path) as path:
|
|
store = HDFStore(path)
|
|
store.close()
|
|
pytest.raises(ValueError, read_hdf, path)
|
|
|
|
@td.skip_if_no('pathlib')
|
|
def test_read_from_pathlib_path(self):
|
|
|
|
# GH11773
|
|
from pathlib import Path
|
|
|
|
expected = DataFrame(np.random.rand(4, 5),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
with ensure_clean_path(self.path) as filename:
|
|
path_obj = Path(filename)
|
|
|
|
expected.to_hdf(path_obj, 'df', mode='a')
|
|
actual = read_hdf(path_obj, 'df')
|
|
|
|
tm.assert_frame_equal(expected, actual)
|
|
|
|
@td.skip_if_no('py.path')
|
|
def test_read_from_py_localpath(self):
|
|
|
|
# GH11773
|
|
from py.path import local as LocalPath
|
|
|
|
expected = DataFrame(np.random.rand(4, 5),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
with ensure_clean_path(self.path) as filename:
|
|
path_obj = LocalPath(filename)
|
|
|
|
expected.to_hdf(path_obj, 'df', mode='a')
|
|
actual = read_hdf(path_obj, 'df')
|
|
|
|
tm.assert_frame_equal(expected, actual)
|
|
|
|
def test_query_long_float_literal(self):
|
|
# GH 14241
|
|
df = pd.DataFrame({'A': [1000000000.0009,
|
|
1000000000.0011,
|
|
1000000000.0015]})
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('test', df, format='table', data_columns=True)
|
|
|
|
cutoff = 1000000000.0006
|
|
result = store.select('test', "A < %.4f" % cutoff)
|
|
assert result.empty
|
|
|
|
cutoff = 1000000000.0010
|
|
result = store.select('test', "A > %.4f" % cutoff)
|
|
expected = df.loc[[1, 2], :]
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
exact = 1000000000.0011
|
|
result = store.select('test', 'A == %.4f' % exact)
|
|
expected = df.loc[[1], :]
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
def test_query_compare_column_type(self):
|
|
# GH 15492
|
|
df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'],
|
|
'real_date': date_range('2014-01-01', periods=2),
|
|
'float': [1.1, 1.2],
|
|
'int': [1, 2]},
|
|
columns=['date', 'real_date', 'float', 'int'])
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('test', df, format='table', data_columns=True)
|
|
|
|
ts = pd.Timestamp('2014-01-01') # noqa
|
|
result = store.select('test', where='real_date > ts')
|
|
expected = df.loc[[1], :]
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
for op in ['<', '>', '==']:
|
|
# non strings to string column always fail
|
|
for v in [2.1, True, pd.Timestamp('2014-01-01'),
|
|
pd.Timedelta(1, 's')]:
|
|
query = 'date {op} v'.format(op=op)
|
|
with pytest.raises(TypeError):
|
|
result = store.select('test', where=query)
|
|
|
|
# strings to other columns must be convertible to type
|
|
v = 'a'
|
|
for col in ['int', 'float', 'real_date']:
|
|
query = '{col} {op} v'.format(op=op, col=col)
|
|
with pytest.raises(ValueError):
|
|
result = store.select('test', where=query)
|
|
|
|
for v, col in zip(['1', '1.1', '2014-01-01'],
|
|
['int', 'float', 'real_date']):
|
|
query = '{col} {op} v'.format(op=op, col=col)
|
|
result = store.select('test', where=query)
|
|
|
|
if op == '==':
|
|
expected = df.loc[[0], :]
|
|
elif op == '>':
|
|
expected = df.loc[[1], :]
|
|
else:
|
|
expected = df.loc[[], :]
|
|
tm.assert_frame_equal(expected, result)
|
|
|
|
@pytest.mark.parametrize('format', ['fixed', 'table'])
|
|
def test_read_hdf_series_mode_r(self, format):
|
|
# GH 16583
|
|
# Tests that reading a Series saved to an HDF file
|
|
# still works if a mode='r' argument is supplied
|
|
series = tm.makeFloatSeries()
|
|
with ensure_clean_path(self.path) as path:
|
|
series.to_hdf(path, key='data', format=format)
|
|
result = pd.read_hdf(path, key='data', mode='r')
|
|
tm.assert_series_equal(result, series)
|
|
|
|
@pytest.mark.skipif(not PY36, reason="Need python 3.6")
|
|
def test_fspath(self):
|
|
with tm.ensure_clean('foo.h5') as path:
|
|
with pd.HDFStore(path) as store:
|
|
assert os.fspath(store) == str(path)
|
|
|
|
def test_read_py2_hdf_file_in_py3(self, datapath):
|
|
# GH 16781
|
|
|
|
# tests reading a PeriodIndex DataFrame written in Python2 in Python3
|
|
|
|
# the file was generated in Python 2.7 like so:
|
|
#
|
|
# df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex(
|
|
# ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
|
|
# df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')
|
|
|
|
expected = pd.DataFrame([1., 2, 3], index=pd.PeriodIndex(
|
|
['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
|
|
|
|
with ensure_clean_store(
|
|
datapath('io', 'data', 'legacy_hdf',
|
|
'periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
|
|
mode='r') as store:
|
|
result = store['p']
|
|
assert_frame_equal(result, expected)
|
|
|
|
|
|
class TestHDFComplexValues(Base):
|
|
# GH10447
|
|
|
|
def test_complex_fixed(self):
|
|
df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df')
|
|
reread = read_hdf(path, 'df')
|
|
assert_frame_equal(df, reread)
|
|
|
|
df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df')
|
|
reread = read_hdf(path, 'df')
|
|
assert_frame_equal(df, reread)
|
|
|
|
def test_complex_table(self):
|
|
df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', format='table')
|
|
reread = read_hdf(path, 'df')
|
|
assert_frame_equal(df, reread)
|
|
|
|
df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
|
|
index=list('abcd'),
|
|
columns=list('ABCDE'))
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', format='table', mode='w')
|
|
reread = read_hdf(path, 'df')
|
|
assert_frame_equal(df, reread)
|
|
|
|
def test_complex_mixed_fixed(self):
|
|
complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j,
|
|
1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
|
|
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
|
|
dtype=np.complex128)
|
|
df = DataFrame({'A': [1, 2, 3, 4],
|
|
'B': ['a', 'b', 'c', 'd'],
|
|
'C': complex64,
|
|
'D': complex128,
|
|
'E': [1.0, 2.0, 3.0, 4.0]},
|
|
index=list('abcd'))
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df')
|
|
reread = read_hdf(path, 'df')
|
|
assert_frame_equal(df, reread)
|
|
|
|
def test_complex_mixed_table(self):
|
|
complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j,
|
|
1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
|
|
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
|
|
dtype=np.complex128)
|
|
df = DataFrame({'A': [1, 2, 3, 4],
|
|
'B': ['a', 'b', 'c', 'd'],
|
|
'C': complex64,
|
|
'D': complex128,
|
|
'E': [1.0, 2.0, 3.0, 4.0]},
|
|
index=list('abcd'))
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('df', df, data_columns=['A', 'B'])
|
|
result = store.select('df', where='A>2')
|
|
assert_frame_equal(df.loc[df.A > 2], result)
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
df.to_hdf(path, 'df', format='table')
|
|
reread = read_hdf(path, 'df')
|
|
assert_frame_equal(df, reread)
|
|
|
|
def test_complex_across_dimensions_fixed(self):
|
|
with catch_warnings(record=True):
|
|
complex128 = np.array(
|
|
[1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
|
|
s = Series(complex128, index=list('abcd'))
|
|
df = DataFrame({'A': s, 'B': s})
|
|
p = Panel({'One': df, 'Two': df})
|
|
|
|
objs = [s, df, p]
|
|
comps = [tm.assert_series_equal, tm.assert_frame_equal,
|
|
tm.assert_panel_equal]
|
|
for obj, comp in zip(objs, comps):
|
|
with ensure_clean_path(self.path) as path:
|
|
obj.to_hdf(path, 'obj', format='fixed')
|
|
reread = read_hdf(path, 'obj')
|
|
comp(obj, reread)
|
|
|
|
def test_complex_across_dimensions(self):
|
|
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
|
|
s = Series(complex128, index=list('abcd'))
|
|
df = DataFrame({'A': s, 'B': s})
|
|
|
|
with catch_warnings(record=True):
|
|
p = Panel({'One': df, 'Two': df})
|
|
|
|
objs = [df, p]
|
|
comps = [tm.assert_frame_equal, tm.assert_panel_equal]
|
|
for obj, comp in zip(objs, comps):
|
|
with ensure_clean_path(self.path) as path:
|
|
obj.to_hdf(path, 'obj', format='table')
|
|
reread = read_hdf(path, 'obj')
|
|
comp(obj, reread)
|
|
|
|
def test_complex_indexing_error(self):
|
|
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
|
|
dtype=np.complex128)
|
|
df = DataFrame({'A': [1, 2, 3, 4],
|
|
'B': ['a', 'b', 'c', 'd'],
|
|
'C': complex128},
|
|
index=list('abcd'))
|
|
with ensure_clean_store(self.path) as store:
|
|
pytest.raises(TypeError, store.append,
|
|
'df', df, data_columns=['C'])
|
|
|
|
def test_complex_series_error(self):
|
|
complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
|
|
s = Series(complex128, index=list('abcd'))
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
pytest.raises(TypeError, s.to_hdf, path, 'obj', format='t')
|
|
|
|
with ensure_clean_path(self.path) as path:
|
|
s.to_hdf(path, 'obj', format='t', index=False)
|
|
reread = read_hdf(path, 'obj')
|
|
tm.assert_series_equal(s, reread)
|
|
|
|
def test_complex_append(self):
|
|
df = DataFrame({'a': np.random.randn(100).astype(np.complex128),
|
|
'b': np.random.randn(100)})
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('df', df, data_columns=['b'])
|
|
store.append('df', df)
|
|
result = store.select('df')
|
|
assert_frame_equal(pd.concat([df, df], 0), result)
|
|
|
|
|
|
class TestTimezones(Base):
|
|
|
|
def _compare_with_tz(self, a, b):
|
|
tm.assert_frame_equal(a, b)
|
|
|
|
# compare the zones on each element
|
|
for c in a.columns:
|
|
for i in a.index:
|
|
a_e = a.loc[i, c]
|
|
b_e = b.loc[i, c]
|
|
if not (a_e == b_e and a_e.tz == b_e.tz):
|
|
raise AssertionError(
|
|
"invalid tz comparison [%s] [%s]" % (a_e, b_e))
|
|
|
|
def test_append_with_timezones_dateutil(self):
|
|
|
|
from datetime import timedelta
|
|
|
|
# use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
|
|
# filename issues.
|
|
from pandas._libs.tslibs.timezones import maybe_get_tz
|
|
gettz = lambda x: maybe_get_tz('dateutil/' + x)
|
|
|
|
# as columns
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
_maybe_remove(store, 'df_tz')
|
|
df = DataFrame(dict(A=[Timestamp('20130102 2:00:00', tz=gettz(
|
|
'US/Eastern')) + timedelta(hours=1) * i for i in range(5)]))
|
|
|
|
store.append('df_tz', df, data_columns=['A'])
|
|
result = store['df_tz']
|
|
self._compare_with_tz(result, df)
|
|
assert_frame_equal(result, df)
|
|
|
|
# select with tz aware
|
|
expected = df[df.A >= df.A[3]]
|
|
result = store.select('df_tz', where='A>=df.A[3]')
|
|
self._compare_with_tz(result, expected)
|
|
|
|
# ensure we include dates in DST and STD time here.
|
|
_maybe_remove(store, 'df_tz')
|
|
df = DataFrame(dict(A=Timestamp('20130102',
|
|
tz=gettz('US/Eastern')),
|
|
B=Timestamp('20130603',
|
|
tz=gettz('US/Eastern'))),
|
|
index=range(5))
|
|
store.append('df_tz', df)
|
|
result = store['df_tz']
|
|
self._compare_with_tz(result, df)
|
|
assert_frame_equal(result, df)
|
|
|
|
df = DataFrame(dict(A=Timestamp('20130102',
|
|
tz=gettz('US/Eastern')),
|
|
B=Timestamp('20130102', tz=gettz('EET'))),
|
|
index=range(5))
|
|
pytest.raises(ValueError, store.append, 'df_tz', df)
|
|
|
|
# this is ok
|
|
_maybe_remove(store, 'df_tz')
|
|
store.append('df_tz', df, data_columns=['A', 'B'])
|
|
result = store['df_tz']
|
|
self._compare_with_tz(result, df)
|
|
assert_frame_equal(result, df)
|
|
|
|
# can't append with diff timezone
|
|
df = DataFrame(dict(A=Timestamp('20130102',
|
|
tz=gettz('US/Eastern')),
|
|
B=Timestamp('20130102', tz=gettz('CET'))),
|
|
index=range(5))
|
|
pytest.raises(ValueError, store.append, 'df_tz', df)
|
|
|
|
# as index
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# GH 4098 example
|
|
df = DataFrame(dict(A=Series(lrange(3), index=date_range(
|
|
'2000-1-1', periods=3, freq='H', tz=gettz('US/Eastern')))))
|
|
|
|
_maybe_remove(store, 'df')
|
|
store.put('df', df)
|
|
result = store.select('df')
|
|
assert_frame_equal(result, df)
|
|
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df)
|
|
result = store.select('df')
|
|
assert_frame_equal(result, df)
|
|
|
|
def test_append_with_timezones_pytz(self):
|
|
|
|
from datetime import timedelta
|
|
|
|
# as columns
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
_maybe_remove(store, 'df_tz')
|
|
df = DataFrame(dict(A=[Timestamp('20130102 2:00:00',
|
|
tz='US/Eastern') +
|
|
timedelta(hours=1) * i
|
|
for i in range(5)]))
|
|
store.append('df_tz', df, data_columns=['A'])
|
|
result = store['df_tz']
|
|
self._compare_with_tz(result, df)
|
|
assert_frame_equal(result, df)
|
|
|
|
# select with tz aware
|
|
self._compare_with_tz(store.select(
|
|
'df_tz', where='A>=df.A[3]'), df[df.A >= df.A[3]])
|
|
|
|
_maybe_remove(store, 'df_tz')
|
|
# ensure we include dates in DST and STD time here.
|
|
df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
|
|
B=Timestamp('20130603', tz='US/Eastern')),
|
|
index=range(5))
|
|
store.append('df_tz', df)
|
|
result = store['df_tz']
|
|
self._compare_with_tz(result, df)
|
|
assert_frame_equal(result, df)
|
|
|
|
df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
|
|
B=Timestamp('20130102', tz='EET')),
|
|
index=range(5))
|
|
pytest.raises(ValueError, store.append, 'df_tz', df)
|
|
|
|
# this is ok
|
|
_maybe_remove(store, 'df_tz')
|
|
store.append('df_tz', df, data_columns=['A', 'B'])
|
|
result = store['df_tz']
|
|
self._compare_with_tz(result, df)
|
|
assert_frame_equal(result, df)
|
|
|
|
# can't append with diff timezone
|
|
df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
|
|
B=Timestamp('20130102', tz='CET')),
|
|
index=range(5))
|
|
pytest.raises(ValueError, store.append, 'df_tz', df)
|
|
|
|
# as index
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# GH 4098 example
|
|
df = DataFrame(dict(A=Series(lrange(3), index=date_range(
|
|
'2000-1-1', periods=3, freq='H', tz='US/Eastern'))))
|
|
|
|
_maybe_remove(store, 'df')
|
|
store.put('df', df)
|
|
result = store.select('df')
|
|
assert_frame_equal(result, df)
|
|
|
|
_maybe_remove(store, 'df')
|
|
store.append('df', df)
|
|
result = store.select('df')
|
|
assert_frame_equal(result, df)
|
|
|
|
def test_tseries_select_index_column(self):
|
|
# GH7777
|
|
# selecting a UTC datetimeindex column did
|
|
# not preserve UTC tzinfo set before storing
|
|
|
|
# check that no tz still works
|
|
rng = date_range('1/1/2000', '1/30/2000')
|
|
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('frame', frame)
|
|
result = store.select_column('frame', 'index')
|
|
assert rng.tz == DatetimeIndex(result.values).tz
|
|
|
|
# check utc
|
|
rng = date_range('1/1/2000', '1/30/2000', tz='UTC')
|
|
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('frame', frame)
|
|
result = store.select_column('frame', 'index')
|
|
assert rng.tz == result.dt.tz
|
|
|
|
# double check non-utc
|
|
rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
|
|
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store.append('frame', frame)
|
|
result = store.select_column('frame', 'index')
|
|
assert rng.tz == result.dt.tz
|
|
|
|
def test_timezones_fixed(self):
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
# index
|
|
rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
|
|
df = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
|
store['df'] = df
|
|
result = store['df']
|
|
assert_frame_equal(result, df)
|
|
|
|
# as data
|
|
# GH11411
|
|
_maybe_remove(store, 'df')
|
|
df = DataFrame({'A': rng,
|
|
'B': rng.tz_convert('UTC').tz_localize(None),
|
|
'C': rng.tz_convert('CET'),
|
|
'D': range(len(rng))}, index=rng)
|
|
store['df'] = df
|
|
result = store['df']
|
|
assert_frame_equal(result, df)
|
|
|
|
def test_fixed_offset_tz(self):
|
|
rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00')
|
|
frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
|
|
|
|
with ensure_clean_store(self.path) as store:
|
|
store['frame'] = frame
|
|
recons = store['frame']
|
|
tm.assert_index_equal(recons.index, rng)
|
|
assert rng.tz == recons.index.tz
|
|
|
|
@td.skip_if_windows
|
|
def test_store_timezone(self):
|
|
# GH2852
|
|
# issue storing datetime.date with a timezone as it resets when read
|
|
# back in a new timezone
|
|
|
|
# original method
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
today = datetime.date(2013, 9, 10)
|
|
df = DataFrame([1, 2, 3], index=[today, today, today])
|
|
store['obj1'] = df
|
|
result = store['obj1']
|
|
assert_frame_equal(result, df)
|
|
|
|
# with tz setting
|
|
with ensure_clean_store(self.path) as store:
|
|
|
|
with set_timezone('EST5EDT'):
|
|
today = datetime.date(2013, 9, 10)
|
|
df = DataFrame([1, 2, 3], index=[today, today, today])
|
|
store['obj1'] = df
|
|
|
|
with set_timezone('CST6CDT'):
|
|
result = store['obj1']
|
|
|
|
assert_frame_equal(result, df)
|
|
|
|
def test_legacy_datetimetz_object(self, datapath):
|
|
# legacy from < 0.17.0
|
|
# 8260
|
|
expected = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
|
|
B=Timestamp('20130603', tz='CET')),
|
|
index=range(5))
|
|
with ensure_clean_store(
|
|
datapath('io', 'data', 'legacy_hdf', 'datetimetz_object.h5'),
|
|
mode='r') as store:
|
|
result = store['df']
|
|
assert_frame_equal(result, expected)
|
|
|
|
def test_dst_transitions(self):
|
|
# make sure we are not failing on transaitions
|
|
with ensure_clean_store(self.path) as store:
|
|
times = pd.date_range("2013-10-26 23:00", "2013-10-27 01:00",
|
|
tz="Europe/London",
|
|
freq="H",
|
|
ambiguous='infer')
|
|
|
|
for i in [times, times + pd.Timedelta('10min')]:
|
|
_maybe_remove(store, 'df')
|
|
df = DataFrame({'A': range(len(i)), 'B': i}, index=i)
|
|
store.append('df', df)
|
|
result = store.select('df')
|
|
assert_frame_equal(result, df)
|