laywerrobot/lib/python3.6/site-packages/pandas/tests/io/test_pytables.py

import pytest
import os
import tempfile
from contextlib import contextmanager
from warnings import catch_warnings
from distutils.version import LooseVersion

import datetime
from datetime import timedelta

import numpy as np

import pandas as pd
from pandas import (Series, DataFrame, Panel, MultiIndex, Int64Index,
                    RangeIndex, Categorical, bdate_range,
                    date_range, timedelta_range, Index, DatetimeIndex,
                    isna, compat, concat, Timestamp, _np_version_under1p15)

import pandas.util.testing as tm
import pandas.util._test_decorators as td
from pandas.util.testing import (assert_panel_equal,
                                 assert_frame_equal,
                                 assert_series_equal,
                                 set_timezone)

from pandas.compat import (is_platform_windows, is_platform_little_endian,
                           PY35, PY36, BytesIO, text_type,
                           range, lrange, u)
from pandas.io.formats.printing import pprint_thing
from pandas.core.dtypes.common import is_categorical_dtype

tables = pytest.importorskip('tables')
from pandas.io import pytables as pytables  # noqa:E402
from pandas.io.pytables import (TableIterator,  # noqa:E402
                                HDFStore, get_store, Term, read_hdf,
                                PossibleDataLossError, ClosedFileError)


_default_compressor = ('blosc' if LooseVersion(tables.__version__) >=
                       LooseVersion('2.2') else 'zlib')


# contextmanager to ensure the file cleanup


def safe_remove(path):
    if path is not None:
        try:
            os.remove(path)
        except:
            pass


def safe_close(store):
    try:
        if store is not None:
            store.close()
    except:
        pass


def create_tempfile(path):
    """ create an unopened named temporary file """
    return os.path.join(tempfile.gettempdir(), path)


@contextmanager
def ensure_clean_store(path, mode='a', complevel=None, complib=None,
                       fletcher32=False):

    try:

        # put in the temporary path if we don't have one already
        if not len(os.path.dirname(path)):
            path = create_tempfile(path)

        store = HDFStore(path, mode=mode, complevel=complevel,
                         complib=complib, fletcher32=False)
        yield store
    finally:
        safe_close(store)
        if mode == 'w' or mode == 'a':
            safe_remove(path)


@contextmanager
def ensure_clean_path(path):
    """
    return essentially a named temporary file that is not opened
    and deleted on existing; if path is a list, then create and
    return list of filenames
    """
    try:
        if isinstance(path, list):
            filenames = [create_tempfile(p) for p in path]
            yield filenames
        else:
            filenames = [create_tempfile(path)]
            yield filenames[0]
    finally:
        for f in filenames:
            safe_remove(f)


# set these parameters so we don't have file sharing
tables.parameters.MAX_NUMEXPR_THREADS = 1
tables.parameters.MAX_BLOSC_THREADS = 1
tables.parameters.MAX_THREADS = 1


def _maybe_remove(store, key):
    """For tests using tables, try removing the table to be sure there is
    no content from previous tests using the same table name."""
    try:
        store.remove(key)
    except:
        pass


class Base(object):

    @classmethod
    def setup_class(cls):

        # Pytables 3.0.0 deprecates lots of things
        tm.reset_testing_mode()

    @classmethod
    def teardown_class(cls):

        # Pytables 3.0.0 deprecates lots of things
        tm.set_testing_mode()

    def setup_method(self, method):
        self.path = 'tmp.__%s__.h5' % tm.rands(10)

    def teardown_method(self, method):
        pass


@pytest.mark.single
class TestHDFStore(Base):

    def test_factory_fun(self):
        path = create_tempfile(self.path)
        try:
            with catch_warnings(record=True):
                with get_store(path) as tbl:
                    raise ValueError('blah')
        except ValueError:
            pass
        finally:
            safe_remove(path)

        try:
            with catch_warnings(record=True):
                with get_store(path) as tbl:
                    tbl['a'] = tm.makeDataFrame()

            with catch_warnings(record=True):
                with get_store(path) as tbl:
                    assert len(tbl) == 1
                    assert type(tbl['a']) == DataFrame
        finally:
            safe_remove(self.path)

    def test_context(self):
        path = create_tempfile(self.path)
        try:
            with HDFStore(path) as tbl:
                raise ValueError('blah')
        except ValueError:
            pass
        finally:
            safe_remove(path)

        try:
            with HDFStore(path) as tbl:
                tbl['a'] = tm.makeDataFrame()

            with HDFStore(path) as tbl:
                assert len(tbl) == 1
                assert type(tbl['a']) == DataFrame
        finally:
            safe_remove(path)

    def test_conv_read_write(self):
        path = create_tempfile(self.path)
        try:
            def roundtrip(key, obj, **kwargs):
                obj.to_hdf(path, key, **kwargs)
                return read_hdf(path, key)

            o = tm.makeTimeSeries()
            assert_series_equal(o, roundtrip('series', o))

            o = tm.makeStringSeries()
            assert_series_equal(o, roundtrip('string_series', o))

            o = tm.makeDataFrame()
            assert_frame_equal(o, roundtrip('frame', o))

            with catch_warnings(record=True):

                o = tm.makePanel()
                assert_panel_equal(o, roundtrip('panel', o))

            # table
            df = DataFrame(dict(A=lrange(5), B=lrange(5)))
            df.to_hdf(path, 'table', append=True)
            result = read_hdf(path, 'table', where=['index>2'])
            assert_frame_equal(df[df.index > 2], result)

        finally:
            safe_remove(path)

    def test_long_strings(self):

        # GH6166
        # unconversion of long strings was being chopped in earlier
        # versions of numpy < 1.7.2
        df = DataFrame({'a': tm.rands_array(100, size=10)},
                       index=tm.rands_array(100, size=10))

        with ensure_clean_store(self.path) as store:
            store.append('df', df, data_columns=['a'])

            result = store.select('df')
            assert_frame_equal(df, result)

    def test_api(self):

        # GH4584
        # API issue when to_hdf doesn't acdept append AND format args
        with ensure_clean_path(self.path) as path:

            df = tm.makeDataFrame()
            df.iloc[:10].to_hdf(path, 'df', append=True, format='table')
            df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
            assert_frame_equal(read_hdf(path, 'df'), df)

            # append to False
            df.iloc[:10].to_hdf(path, 'df', append=False, format='table')
            df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
            assert_frame_equal(read_hdf(path, 'df'), df)

        with ensure_clean_path(self.path) as path:

            df = tm.makeDataFrame()
            df.iloc[:10].to_hdf(path, 'df', append=True)
            df.iloc[10:].to_hdf(path, 'df', append=True, format='table')
            assert_frame_equal(read_hdf(path, 'df'), df)

            # append to False
            df.iloc[:10].to_hdf(path, 'df', append=False, format='table')
            df.iloc[10:].to_hdf(path, 'df', append=True)
            assert_frame_equal(read_hdf(path, 'df'), df)

        with ensure_clean_path(self.path) as path:

            df = tm.makeDataFrame()
            df.to_hdf(path, 'df', append=False, format='fixed')
            assert_frame_equal(read_hdf(path, 'df'), df)

            df.to_hdf(path, 'df', append=False, format='f')
            assert_frame_equal(read_hdf(path, 'df'), df)

            df.to_hdf(path, 'df', append=False)
            assert_frame_equal(read_hdf(path, 'df'), df)

            df.to_hdf(path, 'df')
            assert_frame_equal(read_hdf(path, 'df'), df)

        with ensure_clean_store(self.path) as store:

            path = store._path
            df = tm.makeDataFrame()

            _maybe_remove(store, 'df')
            store.append('df', df.iloc[:10], append=True, format='table')
            store.append('df', df.iloc[10:], append=True, format='table')
            assert_frame_equal(store.select('df'), df)

            # append to False
            _maybe_remove(store, 'df')
            store.append('df', df.iloc[:10], append=False, format='table')
            store.append('df', df.iloc[10:], append=True, format='table')
            assert_frame_equal(store.select('df'), df)

            # formats
            _maybe_remove(store, 'df')
            store.append('df', df.iloc[:10], append=False, format='table')
            store.append('df', df.iloc[10:], append=True, format='table')
            assert_frame_equal(store.select('df'), df)

            _maybe_remove(store, 'df')
            store.append('df', df.iloc[:10], append=False, format='table')
            store.append('df', df.iloc[10:], append=True, format=None)
            assert_frame_equal(store.select('df'), df)

        with ensure_clean_path(self.path) as path:

            # invalid
            df = tm.makeDataFrame()
            pytest.raises(ValueError, df.to_hdf, path,
                          'df', append=True, format='f')
            pytest.raises(ValueError, df.to_hdf, path,
                          'df', append=True, format='fixed')

            pytest.raises(TypeError, df.to_hdf, path,
                          'df', append=True, format='foo')
            pytest.raises(TypeError, df.to_hdf, path,
                          'df', append=False, format='bar')

        # File path doesn't exist
        path = ""
        pytest.raises(compat.FileNotFoundError,
                      read_hdf, path, 'df')

    def test_api_default_format(self):

        # default_format option
        with ensure_clean_store(self.path) as store:
            df = tm.makeDataFrame()

            pd.set_option('io.hdf.default_format', 'fixed')
            _maybe_remove(store, 'df')
            store.put('df', df)
            assert not store.get_storer('df').is_table
            pytest.raises(ValueError, store.append, 'df2', df)

            pd.set_option('io.hdf.default_format', 'table')
            _maybe_remove(store, 'df')
            store.put('df', df)
            assert store.get_storer('df').is_table
            _maybe_remove(store, 'df2')
            store.append('df2', df)
            assert store.get_storer('df').is_table

            pd.set_option('io.hdf.default_format', None)

        with ensure_clean_path(self.path) as path:

            df = tm.makeDataFrame()

            pd.set_option('io.hdf.default_format', 'fixed')
            df.to_hdf(path, 'df')
            with HDFStore(path) as store:
                assert not store.get_storer('df').is_table
            pytest.raises(ValueError, df.to_hdf, path, 'df2', append=True)

            pd.set_option('io.hdf.default_format', 'table')
            df.to_hdf(path, 'df3')
            with HDFStore(path) as store:
                assert store.get_storer('df3').is_table
            df.to_hdf(path, 'df4', append=True)
            with HDFStore(path) as store:
                assert store.get_storer('df4').is_table

            pd.set_option('io.hdf.default_format', None)

    def test_keys(self):

        with ensure_clean_store(self.path) as store:
            store['a'] = tm.makeTimeSeries()
            store['b'] = tm.makeStringSeries()
            store['c'] = tm.makeDataFrame()
            with catch_warnings(record=True):
                store['d'] = tm.makePanel()
                store['foo/bar'] = tm.makePanel()
            assert len(store) == 5
            expected = set(['/a', '/b', '/c', '/d', '/foo/bar'])
            assert set(store.keys()) == expected
            assert set(store) == expected

    def test_keys_ignore_hdf_softlink(self):

        # GH 20523
        # Puts a softlink into HDF file and rereads

        with ensure_clean_store(self.path) as store:

            df = DataFrame(dict(A=lrange(5), B=lrange(5)))
            store.put("df", df)

            assert store.keys() == ["/df"]

            store._handle.create_soft_link(store._handle.root, "symlink", "df")

            # Should ignore the softlink
            assert store.keys() == ["/df"]

    def test_iter_empty(self):

        with ensure_clean_store(self.path) as store:
            # GH 12221
            assert list(store) == []

    def test_repr(self):

        with ensure_clean_store(self.path) as store:
            repr(store)
            store.info()
            store['a'] = tm.makeTimeSeries()
            store['b'] = tm.makeStringSeries()
            store['c'] = tm.makeDataFrame()

            with catch_warnings(record=True):
                store['d'] = tm.makePanel()
                store['foo/bar'] = tm.makePanel()
                store.append('e', tm.makePanel())

            df = tm.makeDataFrame()
            df['obj1'] = 'foo'
            df['obj2'] = 'bar'
            df['bool1'] = df['A'] > 0
            df['bool2'] = df['B'] > 0
            df['bool3'] = True
            df['int1'] = 1
            df['int2'] = 2
            df['timestamp1'] = Timestamp('20010102')
            df['timestamp2'] = Timestamp('20010103')
            df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
            df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
            df.loc[3:6, ['obj1']] = np.nan
            df = df._consolidate()._convert(datetime=True)

            # PerformanceWarning
            with catch_warnings(record=True):
                store['df'] = df

            # make a random group in hdf space
            store._handle.create_group(store._handle.root, 'bah')

            assert store.filename in repr(store)
            assert store.filename in str(store)
            store.info()

        # storers
        with ensure_clean_store(self.path) as store:

            df = tm.makeDataFrame()
            store.append('df', df)

            s = store.get_storer('df')
            repr(s)
            str(s)

    def test_contains(self):

        with ensure_clean_store(self.path) as store:
            store['a'] = tm.makeTimeSeries()
            store['b'] = tm.makeDataFrame()
            store['foo/bar'] = tm.makeDataFrame()
            assert 'a' in store
            assert 'b' in store
            assert 'c' not in store
            assert 'foo/bar' in store
            assert '/foo/bar' in store
            assert '/foo/b' not in store
            assert 'bar' not in store

            # gh-2694: tables.NaturalNameWarning
            with catch_warnings(record=True):
                store['node())'] = tm.makeDataFrame()
            assert 'node())' in store

    def test_versioning(self):

        with ensure_clean_store(self.path) as store:
            store['a'] = tm.makeTimeSeries()
            store['b'] = tm.makeDataFrame()
            df = tm.makeTimeDataFrame()
            _maybe_remove(store, 'df1')
            store.append('df1', df[:10])
            store.append('df1', df[10:])
            assert store.root.a._v_attrs.pandas_version == '0.15.2'
            assert store.root.b._v_attrs.pandas_version == '0.15.2'
            assert store.root.df1._v_attrs.pandas_version == '0.15.2'

            # write a file and wipe its versioning
            _maybe_remove(store, 'df2')
            store.append('df2', df)

            # this is an error because its table_type is appendable, but no
            # version info
            store.get_node('df2')._v_attrs.pandas_version = None
            pytest.raises(Exception, store.select, 'df2')

    def test_mode(self):

        df = tm.makeTimeDataFrame()

        def check(mode):

            with ensure_clean_path(self.path) as path:

                # constructor
                if mode in ['r', 'r+']:
                    pytest.raises(IOError, HDFStore, path, mode=mode)

                else:
                    store = HDFStore(path, mode=mode)
                    assert store._handle.mode == mode
                    store.close()

            with ensure_clean_path(self.path) as path:

                # context
                if mode in ['r', 'r+']:
                    def f():
                        with HDFStore(path, mode=mode) as store:  # noqa
                            pass
                    pytest.raises(IOError, f)
                else:
                    with HDFStore(path, mode=mode) as store:
                        assert store._handle.mode == mode

            with ensure_clean_path(self.path) as path:

                # conv write
                if mode in ['r', 'r+']:
                    pytest.raises(IOError, df.to_hdf,
                                  path, 'df', mode=mode)
                    df.to_hdf(path, 'df', mode='w')
                else:
                    df.to_hdf(path, 'df', mode=mode)

                # conv read
                if mode in ['w']:
                    pytest.raises(ValueError, read_hdf,
                                  path, 'df', mode=mode)
                else:
                    result = read_hdf(path, 'df', mode=mode)
                    assert_frame_equal(result, df)

        def check_default_mode():

            # read_hdf uses default mode
            with ensure_clean_path(self.path) as path:
                df.to_hdf(path, 'df', mode='w')
                result = read_hdf(path, 'df')
                assert_frame_equal(result, df)

        check('r')
        check('r+')
        check('a')
        check('w')
        check_default_mode()

    def test_reopen_handle(self):

        with ensure_clean_path(self.path) as path:

            store = HDFStore(path, mode='a')
            store['a'] = tm.makeTimeSeries()

            # invalid mode change
            pytest.raises(PossibleDataLossError, store.open, 'w')
            store.close()
            assert not store.is_open

            # truncation ok here
            store.open('w')
            assert store.is_open
            assert len(store) == 0
            store.close()
            assert not store.is_open

            store = HDFStore(path, mode='a')
            store['a'] = tm.makeTimeSeries()

            # reopen as read
            store.open('r')
            assert store.is_open
            assert len(store) == 1
            assert store._mode == 'r'
            store.close()
            assert not store.is_open

            # reopen as append
            store.open('a')
            assert store.is_open
            assert len(store) == 1
            assert store._mode == 'a'
            store.close()
            assert not store.is_open

            # reopen as append (again)
            store.open('a')
            assert store.is_open
            assert len(store) == 1
            assert store._mode == 'a'
            store.close()
            assert not store.is_open

    def test_open_args(self):

        with ensure_clean_path(self.path) as path:

            df = tm.makeDataFrame()

            # create an in memory store
            store = HDFStore(path, mode='a', driver='H5FD_CORE',
                             driver_core_backing_store=0)
            store['df'] = df
            store.append('df2', df)

            tm.assert_frame_equal(store['df'], df)
            tm.assert_frame_equal(store['df2'], df)

            store.close()

            # the file should not have actually been written
            assert not os.path.exists(path)

    def test_flush(self):

        with ensure_clean_store(self.path) as store:
            store['a'] = tm.makeTimeSeries()
            store.flush()
            store.flush(fsync=True)

    def test_get(self):

        with ensure_clean_store(self.path) as store:
            store['a'] = tm.makeTimeSeries()
            left = store.get('a')
            right = store['a']
            tm.assert_series_equal(left, right)

            left = store.get('/a')
            right = store['/a']
            tm.assert_series_equal(left, right)

            pytest.raises(KeyError, store.get, 'b')

    def test_getattr(self):

        with ensure_clean_store(self.path) as store:

            s = tm.makeTimeSeries()
            store['a'] = s

            # test attribute access
            result = store.a
            tm.assert_series_equal(result, s)
            result = getattr(store, 'a')
            tm.assert_series_equal(result, s)

            df = tm.makeTimeDataFrame()
            store['df'] = df
            result = store.df
            tm.assert_frame_equal(result, df)

            # errors
            pytest.raises(AttributeError, getattr, store, 'd')

            for x in ['mode', 'path', 'handle', 'complib']:
                pytest.raises(AttributeError, getattr, store, x)

            # not stores
            for x in ['mode', 'path', 'handle', 'complib']:
                getattr(store, "_%s" % x)

    def test_put(self):

        with ensure_clean_store(self.path) as store:

            ts = tm.makeTimeSeries()
            df = tm.makeTimeDataFrame()
            store['a'] = ts
            store['b'] = df[:10]
            store['foo/bar/bah'] = df[:10]
            store['foo'] = df[:10]
            store['/foo'] = df[:10]
            store.put('c', df[:10], format='table')

            # not OK, not a table
            pytest.raises(
                ValueError, store.put, 'b', df[10:], append=True)

            # node does not currently exist, test _is_table_type returns False
            # in this case
            # _maybe_remove(store, 'f')
            # pytest.raises(ValueError, store.put, 'f', df[10:],
            #                   append=True)

            # can't put to a table (use append instead)
            pytest.raises(ValueError, store.put, 'c', df[10:], append=True)

            # overwrite table
            store.put('c', df[:10], format='table', append=False)
            tm.assert_frame_equal(df[:10], store['c'])

    def test_put_string_index(self):

        with ensure_clean_store(self.path) as store:

            index = Index(
                ["I am a very long string index: %s" % i for i in range(20)])
            s = Series(np.arange(20), index=index)
            df = DataFrame({'A': s, 'B': s})

            store['a'] = s
            tm.assert_series_equal(store['a'], s)

            store['b'] = df
            tm.assert_frame_equal(store['b'], df)

            # mixed length
            index = Index(['abcdefghijklmnopqrstuvwxyz1234567890'] +
                          ["I am a very long string index: %s" % i
                           for i in range(20)])
            s = Series(np.arange(21), index=index)
            df = DataFrame({'A': s, 'B': s})
            store['a'] = s
            tm.assert_series_equal(store['a'], s)

            store['b'] = df
            tm.assert_frame_equal(store['b'], df)

    def test_put_compression(self):

        with ensure_clean_store(self.path) as store:
            df = tm.makeTimeDataFrame()

            store.put('c', df, format='table', complib='zlib')
            tm.assert_frame_equal(store['c'], df)

            # can't compress if format='fixed'
            pytest.raises(ValueError, store.put, 'b', df,
                          format='fixed', complib='zlib')

    @td.skip_if_windows_python_3
    def test_put_compression_blosc(self):
        df = tm.makeTimeDataFrame()

        with ensure_clean_store(self.path) as store:

            # can't compress if format='fixed'
            pytest.raises(ValueError, store.put, 'b', df,
                          format='fixed', complib='blosc')

            store.put('c', df, format='table', complib='blosc')
            tm.assert_frame_equal(store['c'], df)

    def test_complibs_default_settings(self):
        # GH15943
        df = tm.makeDataFrame()

        # Set complevel and check if complib is automatically set to
        # default value
        with ensure_clean_path(self.path) as tmpfile:
            df.to_hdf(tmpfile, 'df', complevel=9)
            result = pd.read_hdf(tmpfile, 'df')
            tm.assert_frame_equal(result, df)

            with tables.open_file(tmpfile, mode='r') as h5file:
                for node in h5file.walk_nodes(where='/df', classname='Leaf'):
                    assert node.filters.complevel == 9
                    assert node.filters.complib == 'zlib'

        # Set complib and check to see if compression is disabled
        with ensure_clean_path(self.path) as tmpfile:
            df.to_hdf(tmpfile, 'df', complib='zlib')
            result = pd.read_hdf(tmpfile, 'df')
            tm.assert_frame_equal(result, df)

            with tables.open_file(tmpfile, mode='r') as h5file:
                for node in h5file.walk_nodes(where='/df', classname='Leaf'):
                    assert node.filters.complevel == 0
                    assert node.filters.complib is None

        # Check if not setting complib or complevel results in no compression
        with ensure_clean_path(self.path) as tmpfile:
            df.to_hdf(tmpfile, 'df')
            result = pd.read_hdf(tmpfile, 'df')
            tm.assert_frame_equal(result, df)

            with tables.open_file(tmpfile, mode='r') as h5file:
                for node in h5file.walk_nodes(where='/df', classname='Leaf'):
                    assert node.filters.complevel == 0
                    assert node.filters.complib is None

        # Check if file-defaults can be overridden on a per table basis
        with ensure_clean_path(self.path) as tmpfile:
            store = pd.HDFStore(tmpfile)
            store.append('dfc', df, complevel=9, complib='blosc')
            store.append('df', df)
            store.close()

            with tables.open_file(tmpfile, mode='r') as h5file:
                for node in h5file.walk_nodes(where='/df', classname='Leaf'):
                    assert node.filters.complevel == 0
                    assert node.filters.complib is None
                for node in h5file.walk_nodes(where='/dfc', classname='Leaf'):
                    assert node.filters.complevel == 9
                    assert node.filters.complib == 'blosc'

    def test_complibs(self):
        # GH14478
        df = tm.makeDataFrame()

        # Building list of all complibs and complevels tuples
        all_complibs = tables.filters.all_complibs
        # Remove lzo if its not available on this platform
        if not tables.which_lib_version('lzo'):
            all_complibs.remove('lzo')
        # Remove bzip2 if its not available on this platform
        if not tables.which_lib_version("bzip2"):
            all_complibs.remove("bzip2")

        all_levels = range(0, 10)
        all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]

        for (lib, lvl) in all_tests:
            with ensure_clean_path(self.path) as tmpfile:
                gname = 'foo'

                # Write and read file to see if data is consistent
                df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)
                result = pd.read_hdf(tmpfile, gname)
                tm.assert_frame_equal(result, df)

                # Open file and check metadata
                # for correct amount of compression
                h5table = tables.open_file(tmpfile, mode='r')
                for node in h5table.walk_nodes(where='/' + gname,
                                               classname='Leaf'):
                    assert node.filters.complevel == lvl
                    if lvl == 0:
                        assert node.filters.complib is None
                    else:
                        assert node.filters.complib == lib
                h5table.close()

    def test_put_integer(self):
        # non-date, non-string index
        df = DataFrame(np.random.randn(50, 100))
        self._check_roundtrip(df, tm.assert_frame_equal)

    def test_put_mixed_type(self):
        df = tm.makeTimeDataFrame()
        df['obj1'] = 'foo'
        df['obj2'] = 'bar'
        df['bool1'] = df['A'] > 0
        df['bool2'] = df['B'] > 0
        df['bool3'] = True
        df['int1'] = 1
        df['int2'] = 2
        df['timestamp1'] = Timestamp('20010102')
        df['timestamp2'] = Timestamp('20010103')
        df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
        df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
        df.loc[3:6, ['obj1']] = np.nan
        df = df._consolidate()._convert(datetime=True)

        with ensure_clean_store(self.path) as store:
            _maybe_remove(store, 'df')

            # PerformanceWarning
            with catch_warnings(record=True):
                store.put('df', df)

            expected = store.get('df')
            tm.assert_frame_equal(expected, df)

    def test_append(self):

        with ensure_clean_store(self.path) as store:

            # this is allowed by almost always don't want to do it
            # tables.NaturalNameWarning):
            with catch_warnings(record=True):

                df = tm.makeTimeDataFrame()
                _maybe_remove(store, 'df1')
                store.append('df1', df[:10])
                store.append('df1', df[10:])
                tm.assert_frame_equal(store['df1'], df)

                _maybe_remove(store, 'df2')
                store.put('df2', df[:10], format='table')
                store.append('df2', df[10:])
                tm.assert_frame_equal(store['df2'], df)

                _maybe_remove(store, 'df3')
                store.append('/df3', df[:10])
                store.append('/df3', df[10:])
                tm.assert_frame_equal(store['df3'], df)

                # this is allowed by almost always don't want to do it
                # tables.NaturalNameWarning
                _maybe_remove(store, '/df3 foo')
                store.append('/df3 foo', df[:10])
                store.append('/df3 foo', df[10:])
                tm.assert_frame_equal(store['df3 foo'], df)

                # panel
                wp = tm.makePanel()
                _maybe_remove(store, 'wp1')
                store.append('wp1', wp.iloc[:, :10, :])
                store.append('wp1', wp.iloc[:, 10:, :])
                assert_panel_equal(store['wp1'], wp)

                # test using differt order of items on the non-index axes
                _maybe_remove(store, 'wp1')
                wp_append1 = wp.iloc[:, :10, :]
                store.append('wp1', wp_append1)
                wp_append2 = wp.iloc[:, 10:, :].reindex(items=wp.items[::-1])
                store.append('wp1', wp_append2)
                assert_panel_equal(store['wp1'], wp)

                # dtype issues - mizxed type in a single object column
                df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])
                df['mixed_column'] = 'testing'
                df.loc[2, 'mixed_column'] = np.nan
                _maybe_remove(store, 'df')
                store.append('df', df)
                tm.assert_frame_equal(store['df'], df)

                # uints - test storage of uints
                uint_data = DataFrame({
                    'u08': Series(np.random.randint(0, high=255, size=5),
                                  dtype=np.uint8),
                    'u16': Series(np.random.randint(0, high=65535, size=5),
                                  dtype=np.uint16),
                    'u32': Series(np.random.randint(0, high=2**30, size=5),
                                  dtype=np.uint32),
                    'u64': Series([2**58, 2**59, 2**60, 2**61, 2**62],
                                  dtype=np.uint64)}, index=np.arange(5))
                _maybe_remove(store, 'uints')
                store.append('uints', uint_data)
                tm.assert_frame_equal(store['uints'], uint_data)

                # uints - test storage of uints in indexable columns
                _maybe_remove(store, 'uints')
                # 64-bit indices not yet supported
                store.append('uints', uint_data, data_columns=[
                             'u08', 'u16', 'u32'])
                tm.assert_frame_equal(store['uints'], uint_data)

    def test_append_series(self):

        with ensure_clean_store(self.path) as store:

            # basic
            ss = tm.makeStringSeries()
            ts = tm.makeTimeSeries()
            ns = Series(np.arange(100))

            store.append('ss', ss)
            result = store['ss']
            tm.assert_series_equal(result, ss)
            assert result.name is None

            store.append('ts', ts)
            result = store['ts']
            tm.assert_series_equal(result, ts)
            assert result.name is None

            ns.name = 'foo'
            store.append('ns', ns)
            result = store['ns']
            tm.assert_series_equal(result, ns)
            assert result.name == ns.name

            # select on the values
            expected = ns[ns > 60]
            result = store.select('ns', 'foo>60')
            tm.assert_series_equal(result, expected)

            # select on the index and values
            expected = ns[(ns > 70) & (ns.index < 90)]
            result = store.select('ns', 'foo>70 and index<90')
            tm.assert_series_equal(result, expected)

            # multi-index
            mi = DataFrame(np.random.randn(5, 1), columns=['A'])
            mi['B'] = np.arange(len(mi))
            mi['C'] = 'foo'
            mi.loc[3:5, 'C'] = 'bar'
            mi.set_index(['C', 'B'], inplace=True)
            s = mi.stack()
            s.index = s.index.droplevel(2)
            store.append('mi', s)
            tm.assert_series_equal(store['mi'], s)

    def test_store_index_types(self):
        # GH5386
        # test storing various index types

        with ensure_clean_store(self.path) as store:

            def check(format, index):
                df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
                df.index = index(len(df))

                _maybe_remove(store, 'df')
                store.put('df', df, format=format)
                assert_frame_equal(df, store['df'])

            for index in [tm.makeFloatIndex, tm.makeStringIndex,
                          tm.makeIntIndex, tm.makeDateIndex]:

                check('table', index)
                check('fixed', index)

            # period index currently broken for table
            # seee GH7796 FIXME
            check('fixed', tm.makePeriodIndex)
            # check('table',tm.makePeriodIndex)

            # unicode
            index = tm.makeUnicodeIndex
            if compat.PY3:
                check('table', index)
                check('fixed', index)
            else:

                # only support for fixed types (and they have a perf warning)
                pytest.raises(TypeError, check, 'table', index)

                # PerformanceWarning
                with catch_warnings(record=True):
                    check('fixed', index)

    @pytest.mark.skipif(not is_platform_little_endian(),
                        reason="reason platform is not little endian")
    def test_encoding(self):

        with ensure_clean_store(self.path) as store:
            df = DataFrame(dict(A='foo', B='bar'), index=range(5))
            df.loc[2, 'A'] = np.nan
            df.loc[3, 'B'] = np.nan
            _maybe_remove(store, 'df')
            store.append('df', df, encoding='ascii')
            tm.assert_frame_equal(store['df'], df)

            expected = df.reindex(columns=['A'])
            result = store.select('df', Term('columns=A', encoding='ascii'))
            tm.assert_frame_equal(result, expected)

    def test_latin_encoding(self):

        if compat.PY2:
            tm.assert_raises_regex(
                TypeError, r'\[unicode\] is not implemented as a table column')
            return

        values = [[b'E\xc9, 17', b'', b'a', b'b', b'c'],
                  [b'E\xc9, 17', b'a', b'b', b'c'],
                  [b'EE, 17', b'', b'a', b'b', b'c'],
                  [b'E\xc9, 17', b'\xf8\xfc', b'a', b'b', b'c'],
                  [b'', b'a', b'b', b'c'],
                  [b'\xf8\xfc', b'a', b'b', b'c'],
                  [b'A\xf8\xfc', b'', b'a', b'b', b'c'],
                  [np.nan, b'', b'b', b'c'],
                  [b'A\xf8\xfc', np.nan, b'', b'b', b'c']]

        def _try_decode(x, encoding='latin-1'):
            try:
                return x.decode(encoding)
            except AttributeError:
                return x
        # not sure how to remove latin-1 from code in python 2 and 3
        values = [[_try_decode(x) for x in y] for y in values]

        examples = []
        for dtype in ['category', object]:
            for val in values:
                examples.append(pd.Series(val, dtype=dtype))

        def roundtrip(s, key='data', encoding='latin-1', nan_rep=''):
            with ensure_clean_path(self.path) as store:
                s.to_hdf(store, key, format='table', encoding=encoding,
                         nan_rep=nan_rep)
                retr = read_hdf(store, key)
                s_nan = s.replace(nan_rep, np.nan)
                if is_categorical_dtype(s_nan):
                    assert is_categorical_dtype(retr)
                    assert_series_equal(s_nan, retr, check_dtype=False,
                                        check_categorical=False)
                else:
                    assert_series_equal(s_nan, retr)

        for s in examples:
            roundtrip(s)

        # fails:
        # for x in examples:
        #     roundtrip(s, nan_rep=b'\xf8\xfc')

    def test_append_some_nans(self):

        with ensure_clean_store(self.path) as store:
            df = DataFrame({'A': Series(np.random.randn(20)).astype('int32'),
                            'A1': np.random.randn(20),
                            'A2': np.random.randn(20),
                            'B': 'foo', 'C': 'bar',
                            'D': Timestamp("20010101"),
                            'E': datetime.datetime(2001, 1, 2, 0, 0)},
                           index=np.arange(20))
            # some nans
            _maybe_remove(store, 'df1')
            df.loc[0:15, ['A1', 'B', 'D', 'E']] = np.nan
            store.append('df1', df[:10])
            store.append('df1', df[10:])
            tm.assert_frame_equal(store['df1'], df)

            # first column
            df1 = df.copy()
            df1.loc[:, 'A1'] = np.nan
            _maybe_remove(store, 'df1')
            store.append('df1', df1[:10])
            store.append('df1', df1[10:])
            tm.assert_frame_equal(store['df1'], df1)

            # 2nd column
            df2 = df.copy()
            df2.loc[:, 'A2'] = np.nan
            _maybe_remove(store, 'df2')
            store.append('df2', df2[:10])
            store.append('df2', df2[10:])
            tm.assert_frame_equal(store['df2'], df2)

            # datetimes
            df3 = df.copy()
            df3.loc[:, 'E'] = np.nan
            _maybe_remove(store, 'df3')
            store.append('df3', df3[:10])
            store.append('df3', df3[10:])
            tm.assert_frame_equal(store['df3'], df3)

    def test_append_all_nans(self):

        with ensure_clean_store(self.path) as store:

            df = DataFrame({'A1': np.random.randn(20),
                            'A2': np.random.randn(20)},
                           index=np.arange(20))
            df.loc[0:15, :] = np.nan

            # nan some entire rows (dropna=True)
            _maybe_remove(store, 'df')
            store.append('df', df[:10], dropna=True)
            store.append('df', df[10:], dropna=True)
            tm.assert_frame_equal(store['df'], df[-4:])

            # nan some entire rows (dropna=False)
            _maybe_remove(store, 'df2')
            store.append('df2', df[:10], dropna=False)
            store.append('df2', df[10:], dropna=False)
            tm.assert_frame_equal(store['df2'], df)

            # tests the option io.hdf.dropna_table
            pd.set_option('io.hdf.dropna_table', False)
            _maybe_remove(store, 'df3')
            store.append('df3', df[:10])
            store.append('df3', df[10:])
            tm.assert_frame_equal(store['df3'], df)

            pd.set_option('io.hdf.dropna_table', True)
            _maybe_remove(store, 'df4')
            store.append('df4', df[:10])
            store.append('df4', df[10:])
            tm.assert_frame_equal(store['df4'], df[-4:])

            # nan some entire rows (string are still written!)
            df = DataFrame({'A1': np.random.randn(20),
                            'A2': np.random.randn(20),
                            'B': 'foo', 'C': 'bar'},
                           index=np.arange(20))

            df.loc[0:15, :] = np.nan

            _maybe_remove(store, 'df')
            store.append('df', df[:10], dropna=True)
            store.append('df', df[10:], dropna=True)
            tm.assert_frame_equal(store['df'], df)

            _maybe_remove(store, 'df2')
            store.append('df2', df[:10], dropna=False)
            store.append('df2', df[10:], dropna=False)
            tm.assert_frame_equal(store['df2'], df)

            # nan some entire rows (but since we have dates they are still
            # written!)
            df = DataFrame({'A1': np.random.randn(20),
                            'A2': np.random.randn(20),
                            'B': 'foo', 'C': 'bar',
                            'D': Timestamp("20010101"),
                            'E': datetime.datetime(2001, 1, 2, 0, 0)},
                           index=np.arange(20))

            df.loc[0:15, :] = np.nan

            _maybe_remove(store, 'df')
            store.append('df', df[:10], dropna=True)
            store.append('df', df[10:], dropna=True)
            tm.assert_frame_equal(store['df'], df)

            _maybe_remove(store, 'df2')
            store.append('df2', df[:10], dropna=False)
            store.append('df2', df[10:], dropna=False)
            tm.assert_frame_equal(store['df2'], df)

        # Test to make sure defaults are to not drop.
        # Corresponding to Issue 9382
        df_with_missing = DataFrame(
            {'col1': [0, np.nan, 2], 'col2': [1, np.nan, np.nan]})

        with ensure_clean_path(self.path) as path:
            df_with_missing.to_hdf(path, 'df_with_missing', format='table')
            reloaded = read_hdf(path, 'df_with_missing')
            tm.assert_frame_equal(df_with_missing, reloaded)

        matrix = [[[np.nan, np.nan, np.nan], [1, np.nan, np.nan]],
                  [[np.nan, np.nan, np.nan], [np.nan, 5, 6]],
                  [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]]

        with catch_warnings(record=True):
            panel_with_missing = Panel(matrix,
                                       items=['Item1', 'Item2', 'Item3'],
                                       major_axis=[1, 2],
                                       minor_axis=['A', 'B', 'C'])

            with ensure_clean_path(self.path) as path:
                panel_with_missing.to_hdf(
                    path, 'panel_with_missing', format='table')
                reloaded_panel = read_hdf(path, 'panel_with_missing')
                tm.assert_panel_equal(panel_with_missing, reloaded_panel)

    def test_append_frame_column_oriented(self):

        with ensure_clean_store(self.path) as store:

            # column oriented
            df = tm.makeTimeDataFrame()
            _maybe_remove(store, 'df1')
            store.append('df1', df.iloc[:, :2], axes=['columns'])
            store.append('df1', df.iloc[:, 2:])
            tm.assert_frame_equal(store['df1'], df)

            result = store.select('df1', 'columns=A')
            expected = df.reindex(columns=['A'])
            tm.assert_frame_equal(expected, result)

            # selection on the non-indexable
            result = store.select(
                'df1', ('columns=A', 'index=df.index[0:4]'))
            expected = df.reindex(columns=['A'], index=df.index[0:4])
            tm.assert_frame_equal(expected, result)

            # this isn't supported
            with pytest.raises(TypeError):
                store.select('df1',
                             'columns=A and index>df.index[4]')

    def test_append_with_different_block_ordering(self):

        # GH 4096; using same frames, but different block orderings
        with ensure_clean_store(self.path) as store:

            for i in range(10):

                df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
                df['index'] = range(10)
                df['index'] += i * 10
                df['int64'] = Series([1] * len(df), dtype='int64')
                df['int16'] = Series([1] * len(df), dtype='int16')

                if i % 2 == 0:
                    del df['int64']
                    df['int64'] = Series([1] * len(df), dtype='int64')
                if i % 3 == 0:
                    a = df.pop('A')
                    df['A'] = a

                df.set_index('index', inplace=True)

                store.append('df', df)

        # test a different ordering but with more fields (like invalid
        # combinate)
        with ensure_clean_store(self.path) as store:

            df = DataFrame(np.random.randn(10, 2),
                           columns=list('AB'), dtype='float64')
            df['int64'] = Series([1] * len(df), dtype='int64')
            df['int16'] = Series([1] * len(df), dtype='int16')
            store.append('df', df)

            # store additional fields in different blocks
            df['int16_2'] = Series([1] * len(df), dtype='int16')
            pytest.raises(ValueError, store.append, 'df', df)

            # store multile additional fields in different blocks
            df['float_3'] = Series([1.] * len(df), dtype='float64')
            pytest.raises(ValueError, store.append, 'df', df)

    def test_append_with_strings(self):

        with ensure_clean_store(self.path) as store:
            with catch_warnings(record=True):
                wp = tm.makePanel()
                wp2 = wp.rename_axis(
                    {x: "%s_extra" % x for x in wp.minor_axis}, axis=2)

                def check_col(key, name, size):
                    assert getattr(store.get_storer(key)
                                   .table.description, name).itemsize == size

                store.append('s1', wp, min_itemsize=20)
                store.append('s1', wp2)
                expected = concat([wp, wp2], axis=2)
                expected = expected.reindex(
                    minor_axis=sorted(expected.minor_axis))
                assert_panel_equal(store['s1'], expected)
                check_col('s1', 'minor_axis', 20)

                # test dict format
                store.append('s2', wp, min_itemsize={'minor_axis': 20})
                store.append('s2', wp2)
                expected = concat([wp, wp2], axis=2)
                expected = expected.reindex(
                    minor_axis=sorted(expected.minor_axis))
                assert_panel_equal(store['s2'], expected)
                check_col('s2', 'minor_axis', 20)

                # apply the wrong field (similar to #1)
                store.append('s3', wp, min_itemsize={'major_axis': 20})
                pytest.raises(ValueError, store.append, 's3', wp2)

                # test truncation of bigger strings
                store.append('s4', wp)
                pytest.raises(ValueError, store.append, 's4', wp2)

                # avoid truncation on elements
                df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
                store.append('df_big', df)
                tm.assert_frame_equal(store.select('df_big'), df)
                check_col('df_big', 'values_block_1', 15)

                # appending smaller string ok
                df2 = DataFrame([[124, 'asdqy'], [346, 'dggnhefbdfb']])
                store.append('df_big', df2)
                expected = concat([df, df2])
                tm.assert_frame_equal(store.select('df_big'), expected)
                check_col('df_big', 'values_block_1', 15)

                # avoid truncation on elements
                df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']])
                store.append('df_big2', df, min_itemsize={'values': 50})
                tm.assert_frame_equal(store.select('df_big2'), df)
                check_col('df_big2', 'values_block_1', 50)

                # bigger string on next append
                store.append('df_new', df)
                df_new = DataFrame(
                    [[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']])
                pytest.raises(ValueError, store.append, 'df_new', df_new)

                # min_itemsize on Series index (GH 11412)
                df = tm.makeMixedDataFrame().set_index('C')
                store.append('ss', df['B'], min_itemsize={'index': 4})
                tm.assert_series_equal(store.select('ss'), df['B'])

                # same as above, with data_columns=True
                store.append('ss2', df['B'], data_columns=True,
                             min_itemsize={'index': 4})
                tm.assert_series_equal(store.select('ss2'), df['B'])

                # min_itemsize in index without appending (GH 10381)
                store.put('ss3', df, format='table',
                          min_itemsize={'index': 6})
                # just make sure there is a longer string:
                df2 = df.copy().reset_index().assign(C='longer').set_index('C')
                store.append('ss3', df2)
                tm.assert_frame_equal(store.select('ss3'),
                                      pd.concat([df, df2]))

                # same as above, with a Series
                store.put('ss4', df['B'], format='table',
                          min_itemsize={'index': 6})
                store.append('ss4', df2['B'])
                tm.assert_series_equal(store.select('ss4'),
                                       pd.concat([df['B'], df2['B']]))

                # with nans
                _maybe_remove(store, 'df')
                df = tm.makeTimeDataFrame()
                df['string'] = 'foo'
                df.loc[1:4, 'string'] = np.nan
                df['string2'] = 'bar'
                df.loc[4:8, 'string2'] = np.nan
                df['string3'] = 'bah'
                df.loc[1:, 'string3'] = np.nan
                store.append('df', df)
                result = store.select('df')
                tm.assert_frame_equal(result, df)

        with ensure_clean_store(self.path) as store:

            def check_col(key, name, size):
                assert getattr(store.get_storer(key)
                               .table.description, name).itemsize, size

            df = DataFrame(dict(A='foo', B='bar'), index=range(10))

            # a min_itemsize that creates a data_column
            _maybe_remove(store, 'df')
            store.append('df', df, min_itemsize={'A': 200})
            check_col('df', 'A', 200)
            assert store.get_storer('df').data_columns == ['A']

            # a min_itemsize that creates a data_column2
            _maybe_remove(store, 'df')
            store.append('df', df, data_columns=['B'], min_itemsize={'A': 200})
            check_col('df', 'A', 200)
            assert store.get_storer('df').data_columns == ['B', 'A']

            # a min_itemsize that creates a data_column2
            _maybe_remove(store, 'df')
            store.append('df', df, data_columns=[
                         'B'], min_itemsize={'values': 200})
            check_col('df', 'B', 200)
            check_col('df', 'values_block_0', 200)
            assert store.get_storer('df').data_columns == ['B']

            # infer the .typ on subsequent appends
            _maybe_remove(store, 'df')
            store.append('df', df[:5], min_itemsize=200)
            store.append('df', df[5:], min_itemsize=200)
            tm.assert_frame_equal(store['df'], df)

            # invalid min_itemsize keys
            df = DataFrame(['foo', 'foo', 'foo', 'barh',
                            'barh', 'barh'], columns=['A'])
            _maybe_remove(store, 'df')
            pytest.raises(ValueError, store.append, 'df',
                          df, min_itemsize={'foo': 20, 'foobar': 20})

    def test_to_hdf_with_min_itemsize(self):

        with ensure_clean_path(self.path) as path:

            # min_itemsize in index with to_hdf (GH 10381)
            df = tm.makeMixedDataFrame().set_index('C')
            df.to_hdf(path, 'ss3', format='table', min_itemsize={'index': 6})
            # just make sure there is a longer string:
            df2 = df.copy().reset_index().assign(C='longer').set_index('C')
            df2.to_hdf(path, 'ss3', append=True, format='table')
            tm.assert_frame_equal(pd.read_hdf(path, 'ss3'),
                                  pd.concat([df, df2]))

            # same as above, with a Series
            df['B'].to_hdf(path, 'ss4', format='table',
                           min_itemsize={'index': 6})
            df2['B'].to_hdf(path, 'ss4', append=True, format='table')
            tm.assert_series_equal(pd.read_hdf(path, 'ss4'),
                                   pd.concat([df['B'], df2['B']]))

    @pytest.mark.parametrize("format", ['fixed', 'table'])
    def test_to_hdf_errors(self, format):

        data = ['\ud800foo']
        ser = pd.Series(data, index=pd.Index(data))
        with ensure_clean_path(self.path) as path:
            # GH 20835
            ser.to_hdf(path, 'table', format=format, errors='surrogatepass')

            result = pd.read_hdf(path, 'table', errors='surrogatepass')
            tm.assert_series_equal(result, ser)

    def test_append_with_data_columns(self):

        with ensure_clean_store(self.path) as store:
            df = tm.makeTimeDataFrame()
            df.iloc[0, df.columns.get_loc('B')] = 1.
            _maybe_remove(store, 'df')
            store.append('df', df[:2], data_columns=['B'])
            store.append('df', df[2:])
            tm.assert_frame_equal(store['df'], df)

            # check that we have indicies created
            assert(store._handle.root.df.table.cols.index.is_indexed is True)
            assert(store._handle.root.df.table.cols.B.is_indexed is True)

            # data column searching
            result = store.select('df', 'B>0')
            expected = df[df.B > 0]
            tm.assert_frame_equal(result, expected)

            # data column searching (with an indexable and a data_columns)
            result = store.select(
                'df', 'B>0 and index>df.index[3]')
            df_new = df.reindex(index=df.index[4:])
            expected = df_new[df_new.B > 0]
            tm.assert_frame_equal(result, expected)

            # data column selection with a string data_column
            df_new = df.copy()
            df_new['string'] = 'foo'
            df_new.loc[1:4, 'string'] = np.nan
            df_new.loc[5:6, 'string'] = 'bar'
            _maybe_remove(store, 'df')
            store.append('df', df_new, data_columns=['string'])
            result = store.select('df', "string='foo'")
            expected = df_new[df_new.string == 'foo']
            tm.assert_frame_equal(result, expected)

            # using min_itemsize and a data column
            def check_col(key, name, size):
                assert getattr(store.get_storer(key)
                               .table.description, name).itemsize == size

        with ensure_clean_store(self.path) as store:
            _maybe_remove(store, 'df')
            store.append('df', df_new, data_columns=['string'],
                         min_itemsize={'string': 30})
            check_col('df', 'string', 30)
            _maybe_remove(store, 'df')
            store.append(
                'df', df_new, data_columns=['string'], min_itemsize=30)
            check_col('df', 'string', 30)
            _maybe_remove(store, 'df')
            store.append('df', df_new, data_columns=['string'],
                         min_itemsize={'values': 30})
            check_col('df', 'string', 30)

        with ensure_clean_store(self.path) as store:
            df_new['string2'] = 'foobarbah'
            df_new['string_block1'] = 'foobarbah1'
            df_new['string_block2'] = 'foobarbah2'
            _maybe_remove(store, 'df')
            store.append('df', df_new, data_columns=['string', 'string2'],
                         min_itemsize={'string': 30, 'string2': 40,
                                       'values': 50})
            check_col('df', 'string', 30)
            check_col('df', 'string2', 40)
            check_col('df', 'values_block_1', 50)

        with ensure_clean_store(self.path) as store:
            # multiple data columns
            df_new = df.copy()
            df_new.iloc[0, df_new.columns.get_loc('A')] = 1.
            df_new.iloc[0, df_new.columns.get_loc('B')] = -1.
            df_new['string'] = 'foo'

            sl = df_new.columns.get_loc('string')
            df_new.iloc[1:4, sl] = np.nan
            df_new.iloc[5:6, sl] = 'bar'

            df_new['string2'] = 'foo'
            sl = df_new.columns.get_loc('string2')
            df_new.iloc[2:5, sl] = np.nan
            df_new.iloc[7:8, sl] = 'bar'
            _maybe_remove(store, 'df')
            store.append(
                'df', df_new, data_columns=['A', 'B', 'string', 'string2'])
            result = store.select('df',
                                  "string='foo' and string2='foo'"
                                  " and A>0 and B<0")
            expected = df_new[(df_new.string == 'foo') & (
                df_new.string2 == 'foo') & (df_new.A > 0) & (df_new.B < 0)]
            tm.assert_frame_equal(result, expected, check_index_type=False)

            # yield an empty frame
            result = store.select('df', "string='foo' and string2='cool'")
            expected = df_new[(df_new.string == 'foo') & (
                df_new.string2 == 'cool')]
            tm.assert_frame_equal(result, expected, check_index_type=False)

        with ensure_clean_store(self.path) as store:
            # doc example
            df_dc = df.copy()
            df_dc['string'] = 'foo'
            df_dc.loc[4:6, 'string'] = np.nan
            df_dc.loc[7:9, 'string'] = 'bar'
            df_dc['string2'] = 'cool'
            df_dc['datetime'] = Timestamp('20010102')
            df_dc = df_dc._convert(datetime=True)
            df_dc.loc[3:5, ['A', 'B', 'datetime']] = np.nan

            _maybe_remove(store, 'df_dc')
            store.append('df_dc', df_dc,
                         data_columns=['B', 'C', 'string',
                                       'string2', 'datetime'])
            result = store.select('df_dc', 'B>0')

            expected = df_dc[df_dc.B > 0]
            tm.assert_frame_equal(result, expected, check_index_type=False)

            result = store.select(
                'df_dc', ['B > 0', 'C > 0', 'string == foo'])
            expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (
                df_dc.string == 'foo')]
            tm.assert_frame_equal(result, expected, check_index_type=False)

        with ensure_clean_store(self.path) as store:
            # doc example part 2
            np.random.seed(1234)
            index = date_range('1/1/2000', periods=8)
            df_dc = DataFrame(np.random.randn(8, 3), index=index,
                              columns=['A', 'B', 'C'])
            df_dc['string'] = 'foo'
            df_dc.loc[4:6, 'string'] = np.nan
            df_dc.loc[7:9, 'string'] = 'bar'
            df_dc.loc[:, ['B', 'C']] = df_dc.loc[:, ['B', 'C']].abs()
            df_dc['string2'] = 'cool'

            # on-disk operations
            store.append('df_dc', df_dc, data_columns=[
                         'B', 'C', 'string', 'string2'])

            result = store.select('df_dc', 'B>0')
            expected = df_dc[df_dc.B > 0]
            tm.assert_frame_equal(result, expected)

            result = store.select(
                'df_dc', ['B > 0', 'C > 0', 'string == "foo"'])
            expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) &
                             (df_dc.string == 'foo')]
            tm.assert_frame_equal(result, expected)

        with ensure_clean_store(self.path) as store:
            with catch_warnings(record=True):
                # panel
                # GH5717 not handling data_columns
                np.random.seed(1234)
                p = tm.makePanel()

                store.append('p1', p)
                tm.assert_panel_equal(store.select('p1'), p)

                store.append('p2', p, data_columns=True)
                tm.assert_panel_equal(store.select('p2'), p)

                result = store.select('p2', where='ItemA>0')
                expected = p.to_frame()
                expected = expected[expected['ItemA'] > 0]
                tm.assert_frame_equal(result.to_frame(), expected)

                result = store.select(
                    'p2', where='ItemA>0 & minor_axis=["A","B"]')
                expected = p.to_frame()
                expected = expected[expected['ItemA'] > 0]
                expected = expected[expected.reset_index(
                    level=['major']).index.isin(['A', 'B'])]
                tm.assert_frame_equal(result.to_frame(), expected)

    def test_create_table_index(self):

        with ensure_clean_store(self.path) as store:

            with catch_warnings(record=True):
                def col(t, column):
                    return getattr(store.get_storer(t).table.cols, column)

                # index=False
                wp = tm.makePanel()
                store.append('p5', wp, index=False)
                store.create_table_index('p5', columns=['major_axis'])
                assert(col('p5', 'major_axis').is_indexed is True)
                assert(col('p5', 'minor_axis').is_indexed is False)

                # index=True
                store.append('p5i', wp, index=True)
                assert(col('p5i', 'major_axis').is_indexed is True)
                assert(col('p5i', 'minor_axis').is_indexed is True)

                # default optlevels
                store.get_storer('p5').create_index()
                assert(col('p5', 'major_axis').index.optlevel == 6)
                assert(col('p5', 'minor_axis').index.kind == 'medium')

                # let's change the indexing scheme
                store.create_table_index('p5')
                assert(col('p5', 'major_axis').index.optlevel == 6)
                assert(col('p5', 'minor_axis').index.kind == 'medium')
                store.create_table_index('p5', optlevel=9)
                assert(col('p5', 'major_axis').index.optlevel == 9)
                assert(col('p5', 'minor_axis').index.kind == 'medium')
                store.create_table_index('p5', kind='full')
                assert(col('p5', 'major_axis').index.optlevel == 9)
                assert(col('p5', 'minor_axis').index.kind == 'full')
                store.create_table_index('p5', optlevel=1, kind='light')
                assert(col('p5', 'major_axis').index.optlevel == 1)
                assert(col('p5', 'minor_axis').index.kind == 'light')

                # data columns
                df = tm.makeTimeDataFrame()
                df['string'] = 'foo'
                df['string2'] = 'bar'
                store.append('f', df, data_columns=['string', 'string2'])
                assert(col('f', 'index').is_indexed is True)
                assert(col('f', 'string').is_indexed is True)
                assert(col('f', 'string2').is_indexed is True)

                # specify index=columns
                store.append(
                    'f2', df, index=['string'],
                    data_columns=['string', 'string2'])
                assert(col('f2', 'index').is_indexed is False)
                assert(col('f2', 'string').is_indexed is True)
                assert(col('f2', 'string2').is_indexed is False)

                # try to index a non-table
                _maybe_remove(store, 'f2')
                store.put('f2', df)
                pytest.raises(TypeError, store.create_table_index, 'f2')

    def test_append_diff_item_order(self):

        with catch_warnings(record=True):
            wp = tm.makePanel()
            wp1 = wp.iloc[:, :10, :]
            wp2 = wp.iloc[wp.items.get_indexer(['ItemC', 'ItemB', 'ItemA']),
                          10:, :]

            with ensure_clean_store(self.path) as store:
                store.put('panel', wp1, format='table')
                pytest.raises(ValueError, store.put, 'panel', wp2,
                              append=True)

    def test_append_hierarchical(self):
        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                   ['one', 'two', 'three']],
                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['foo', 'bar'])
        df = DataFrame(np.random.randn(10, 3), index=index,
                       columns=['A', 'B', 'C'])

        with ensure_clean_store(self.path) as store:
            store.append('mi', df)
            result = store.select('mi')
            tm.assert_frame_equal(result, df)

            # GH 3748
            result = store.select('mi', columns=['A', 'B'])
            expected = df.reindex(columns=['A', 'B'])
            tm.assert_frame_equal(result, expected)

        with ensure_clean_path('test.hdf') as path:
            df.to_hdf(path, 'df', format='table')
            result = read_hdf(path, 'df', columns=['A', 'B'])
            expected = df.reindex(columns=['A', 'B'])
            tm.assert_frame_equal(result, expected)

    def test_column_multiindex(self):
        # GH 4710
        # recreate multi-indexes properly

        index = MultiIndex.from_tuples([('A', 'a'), ('A', 'b'),
                                        ('B', 'a'), ('B', 'b')],
                                       names=['first', 'second'])
        df = DataFrame(np.arange(12).reshape(3, 4), columns=index)
        expected = df.copy()
        if isinstance(expected.index, RangeIndex):
            expected.index = Int64Index(expected.index)

        with ensure_clean_store(self.path) as store:

            store.put('df', df)
            tm.assert_frame_equal(store['df'], expected,
                                  check_index_type=True,
                                  check_column_type=True)

            store.put('df1', df, format='table')
            tm.assert_frame_equal(store['df1'], expected,
                                  check_index_type=True,
                                  check_column_type=True)

            pytest.raises(ValueError, store.put, 'df2', df,
                          format='table', data_columns=['A'])
            pytest.raises(ValueError, store.put, 'df3', df,
                          format='table', data_columns=True)

        # appending multi-column on existing table (see GH 6167)
        with ensure_clean_store(self.path) as store:
            store.append('df2', df)
            store.append('df2', df)

            tm.assert_frame_equal(store['df2'], concat((df, df)))

        # non_index_axes name
        df = DataFrame(np.arange(12).reshape(3, 4),
                       columns=Index(list('ABCD'), name='foo'))
        expected = df.copy()
        if isinstance(expected.index, RangeIndex):
            expected.index = Int64Index(expected.index)

        with ensure_clean_store(self.path) as store:

            store.put('df1', df, format='table')
            tm.assert_frame_equal(store['df1'], expected,
                                  check_index_type=True,
                                  check_column_type=True)

    def test_store_multiindex(self):

        # validate multi-index names
        # GH 5527
        with ensure_clean_store(self.path) as store:

            def make_index(names=None):
                return MultiIndex.from_tuples([(datetime.datetime(2013, 12, d),
                                                s, t)
                                               for d in range(1, 3)
                                               for s in range(2)
                                               for t in range(3)],
                                              names=names)

            # no names
            _maybe_remove(store, 'df')
            df = DataFrame(np.zeros((12, 2)), columns=[
                           'a', 'b'], index=make_index())
            store.append('df', df)
            tm.assert_frame_equal(store.select('df'), df)

            # partial names
            _maybe_remove(store, 'df')
            df = DataFrame(np.zeros((12, 2)), columns=[
                           'a', 'b'], index=make_index(['date', None, None]))
            store.append('df', df)
            tm.assert_frame_equal(store.select('df'), df)

            # series
            _maybe_remove(store, 's')
            s = Series(np.zeros(12), index=make_index(['date', None, None]))
            store.append('s', s)
            xp = Series(np.zeros(12), index=make_index(
                ['date', 'level_1', 'level_2']))
            tm.assert_series_equal(store.select('s'), xp)

            # dup with column
            _maybe_remove(store, 'df')
            df = DataFrame(np.zeros((12, 2)), columns=[
                           'a', 'b'], index=make_index(['date', 'a', 't']))
            pytest.raises(ValueError, store.append, 'df', df)

            # dup within level
            _maybe_remove(store, 'df')
            df = DataFrame(np.zeros((12, 2)), columns=['a', 'b'],
                           index=make_index(['date', 'date', 'date']))
            pytest.raises(ValueError, store.append, 'df', df)

            # fully names
            _maybe_remove(store, 'df')
            df = DataFrame(np.zeros((12, 2)), columns=[
                           'a', 'b'], index=make_index(['date', 's', 't']))
            store.append('df', df)
            tm.assert_frame_equal(store.select('df'), df)

    def test_select_columns_in_where(self):

        # GH 6169
        # recreate multi-indexes when columns is passed
        # in the `where` argument
        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                   ['one', 'two', 'three']],
                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['foo_name', 'bar_name'])

        # With a DataFrame
        df = DataFrame(np.random.randn(10, 3), index=index,
                       columns=['A', 'B', 'C'])

        with ensure_clean_store(self.path) as store:
            store.put('df', df, format='table')
            expected = df[['A']]

            tm.assert_frame_equal(store.select('df', columns=['A']), expected)

            tm.assert_frame_equal(store.select(
                'df', where="columns=['A']"), expected)

        # With a Series
        s = Series(np.random.randn(10), index=index,
                   name='A')
        with ensure_clean_store(self.path) as store:
            store.put('s', s, format='table')
            tm.assert_series_equal(store.select('s', where="columns=['A']"), s)

    def test_mi_data_columns(self):
        # GH 14435
        idx = pd.MultiIndex.from_arrays([date_range('2000-01-01', periods=5),
                                         range(5)], names=['date', 'id'])
        df = pd.DataFrame({'a': [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx)

        with ensure_clean_store(self.path) as store:
            store.append('df', df, data_columns=True)

            actual = store.select('df', where='id == 1')
            expected = df.iloc[[1], :]
            tm.assert_frame_equal(actual, expected)

    def test_pass_spec_to_storer(self):

        df = tm.makeDataFrame()

        with ensure_clean_store(self.path) as store:
            store.put('df', df)
            pytest.raises(TypeError, store.select, 'df', columns=['A'])
            pytest.raises(TypeError, store.select,
                          'df', where=[('columns=A')])

    def test_append_misc(self):

        with ensure_clean_store(self.path) as store:
            df = tm.makeDataFrame()
            store.append('df', df, chunksize=1)
            result = store.select('df')
            tm.assert_frame_equal(result, df)

            store.append('df1', df, expectedrows=10)
            result = store.select('df1')
            tm.assert_frame_equal(result, df)

        # more chunksize in append tests
        def check(obj, comparator):
            for c in [10, 200, 1000]:
                with ensure_clean_store(self.path, mode='w') as store:
                    store.append('obj', obj, chunksize=c)
                    result = store.select('obj')
                    comparator(result, obj)

        df = tm.makeDataFrame()
        df['string'] = 'foo'
        df['float322'] = 1.
        df['float322'] = df['float322'].astype('float32')
        df['bool'] = df['float322'] > 0
        df['time1'] = Timestamp('20130101')
        df['time2'] = Timestamp('20130102')
        check(df, tm.assert_frame_equal)

        with catch_warnings(record=True):
            p = tm.makePanel()
            check(p, assert_panel_equal)

        # empty frame, GH4273
        with ensure_clean_store(self.path) as store:

            # 0 len
            df_empty = DataFrame(columns=list('ABC'))
            store.append('df', df_empty)
            pytest.raises(KeyError, store.select, 'df')

            # repeated append of 0/non-zero frames
            df = DataFrame(np.random.rand(10, 3), columns=list('ABC'))
            store.append('df', df)
            assert_frame_equal(store.select('df'), df)
            store.append('df', df_empty)
            assert_frame_equal(store.select('df'), df)

            # store
            df = DataFrame(columns=list('ABC'))
            store.put('df2', df)
            assert_frame_equal(store.select('df2'), df)

            with catch_warnings(record=True):

                # 0 len
                p_empty = Panel(items=list('ABC'))
                store.append('p', p_empty)
                pytest.raises(KeyError, store.select, 'p')

                # repeated append of 0/non-zero frames
                p = Panel(np.random.randn(3, 4, 5), items=list('ABC'))
                store.append('p', p)
                assert_panel_equal(store.select('p'), p)
                store.append('p', p_empty)
                assert_panel_equal(store.select('p'), p)

                # store
                store.put('p2', p_empty)
                assert_panel_equal(store.select('p2'), p_empty)

    def test_append_raise(self):

        with ensure_clean_store(self.path) as store:

            # test append with invalid input to get good error messages

            # list in column
            df = tm.makeDataFrame()
            df['invalid'] = [['a']] * len(df)
            assert df.dtypes['invalid'] == np.object_
            pytest.raises(TypeError, store.append, 'df', df)

            # multiple invalid columns
            df['invalid2'] = [['a']] * len(df)
            df['invalid3'] = [['a']] * len(df)
            pytest.raises(TypeError, store.append, 'df', df)

            # datetime with embedded nans as object
            df = tm.makeDataFrame()
            s = Series(datetime.datetime(2001, 1, 2), index=df.index)
            s = s.astype(object)
            s[0:5] = np.nan
            df['invalid'] = s
            assert df.dtypes['invalid'] == np.object_
            pytest.raises(TypeError, store.append, 'df', df)

            # directly ndarray
            pytest.raises(TypeError, store.append, 'df', np.arange(10))

            # series directly
            pytest.raises(TypeError, store.append,
                          'df', Series(np.arange(10)))

            # appending an incompatible table
            df = tm.makeDataFrame()
            store.append('df', df)

            df['foo'] = 'foo'
            pytest.raises(ValueError, store.append, 'df', df)

    def test_table_index_incompatible_dtypes(self):
        df1 = DataFrame({'a': [1, 2, 3]})
        df2 = DataFrame({'a': [4, 5, 6]},
                        index=date_range('1/1/2000', periods=3))

        with ensure_clean_store(self.path) as store:
            store.put('frame', df1, format='table')
            pytest.raises(TypeError, store.put, 'frame', df2,
                          format='table', append=True)

    def test_table_values_dtypes_roundtrip(self):

        with ensure_clean_store(self.path) as store:
            df1 = DataFrame({'a': [1, 2, 3]}, dtype='f8')
            store.append('df_f8', df1)
            assert_series_equal(df1.dtypes, store['df_f8'].dtypes)

            df2 = DataFrame({'a': [1, 2, 3]}, dtype='i8')
            store.append('df_i8', df2)
            assert_series_equal(df2.dtypes, store['df_i8'].dtypes)

            # incompatible dtype
            pytest.raises(ValueError, store.append, 'df_i8', df1)

            # check creation/storage/retrieval of float32 (a bit hacky to
            # actually create them thought)
            df1 = DataFrame(
                np.array([[1], [2], [3]], dtype='f4'), columns=['A'])
            store.append('df_f4', df1)
            assert_series_equal(df1.dtypes, store['df_f4'].dtypes)
            assert df1.dtypes[0] == 'float32'

            # check with mixed dtypes
            df1 = DataFrame(dict((c, Series(np.random.randn(5), dtype=c))
                                 for c in ['float32', 'float64', 'int32',
                                           'int64', 'int16', 'int8']))
            df1['string'] = 'foo'
            df1['float322'] = 1.
            df1['float322'] = df1['float322'].astype('float32')
            df1['bool'] = df1['float32'] > 0
            df1['time1'] = Timestamp('20130101')
            df1['time2'] = Timestamp('20130102')

            store.append('df_mixed_dtypes1', df1)
            result = store.select('df_mixed_dtypes1').get_dtype_counts()
            expected = Series({'float32': 2, 'float64': 1, 'int32': 1,
                               'bool': 1, 'int16': 1, 'int8': 1,
                               'int64': 1, 'object': 1, 'datetime64[ns]': 2})
            result = result.sort_index()
            expected = expected.sort_index()
            tm.assert_series_equal(result, expected)

    def test_table_mixed_dtypes(self):

        # frame
        df = tm.makeDataFrame()
        df['obj1'] = 'foo'
        df['obj2'] = 'bar'
        df['bool1'] = df['A'] > 0
        df['bool2'] = df['B'] > 0
        df['bool3'] = True
        df['int1'] = 1
        df['int2'] = 2
        df['timestamp1'] = Timestamp('20010102')
        df['timestamp2'] = Timestamp('20010103')
        df['datetime1'] = datetime.datetime(2001, 1, 2, 0, 0)
        df['datetime2'] = datetime.datetime(2001, 1, 3, 0, 0)
        df.loc[3:6, ['obj1']] = np.nan
        df = df._consolidate()._convert(datetime=True)

        with ensure_clean_store(self.path) as store:
            store.append('df1_mixed', df)
            tm.assert_frame_equal(store.select('df1_mixed'), df)

        with catch_warnings(record=True):

            # panel
            wp = tm.makePanel()
            wp['obj1'] = 'foo'
            wp['obj2'] = 'bar'
            wp['bool1'] = wp['ItemA'] > 0
            wp['bool2'] = wp['ItemB'] > 0
            wp['int1'] = 1
            wp['int2'] = 2
            wp = wp._consolidate()

        with catch_warnings(record=True):

            with ensure_clean_store(self.path) as store:
                store.append('p1_mixed', wp)
                assert_panel_equal(store.select('p1_mixed'), wp)

    def test_unimplemented_dtypes_table_columns(self):

        with ensure_clean_store(self.path) as store:

            l = [('date', datetime.date(2001, 1, 2))]

            # py3 ok for unicode
            if not compat.PY3:
                l.append(('unicode', u('\\u03c3')))

            # currently not supported dtypes ####
            for n, f in l:
                df = tm.makeDataFrame()
                df[n] = f
                pytest.raises(
                    TypeError, store.append, 'df1_%s' % n, df)

        # frame
        df = tm.makeDataFrame()
        df['obj1'] = 'foo'
        df['obj2'] = 'bar'
        df['datetime1'] = datetime.date(2001, 1, 2)
        df = df._consolidate()._convert(datetime=True)

        with ensure_clean_store(self.path) as store:
            # this fails because we have a date in the object block......
            pytest.raises(TypeError, store.append, 'df_unimplemented', df)

    @pytest.mark.skipif(
        not _np_version_under1p15,
        reason=("pytables conda build package needs build "
                "with numpy 1.15: gh-22098"))
    def test_calendar_roundtrip_issue(self):

        # 8591
        # doc example from tseries holiday section
        weekmask_egypt = 'Sun Mon Tue Wed Thu'
        holidays = ['2012-05-01',
                    datetime.datetime(2013, 5, 1), np.datetime64('2014-05-01')]
        bday_egypt = pd.offsets.CustomBusinessDay(
            holidays=holidays, weekmask=weekmask_egypt)
        dt = datetime.datetime(2013, 4, 30)
        dts = date_range(dt, periods=5, freq=bday_egypt)

        s = (Series(dts.weekday, dts).map(
            Series('Mon Tue Wed Thu Fri Sat Sun'.split())))

        with ensure_clean_store(self.path) as store:

            store.put('fixed', s)
            result = store.select('fixed')
            assert_series_equal(result, s)

            store.append('table', s)
            result = store.select('table')
            assert_series_equal(result, s)

    def test_roundtrip_tz_aware_index(self):
        # GH 17618
        time = pd.Timestamp('2000-01-01 01:00:00', tz='US/Eastern')
        df = pd.DataFrame(data=[0], index=[time])

        with ensure_clean_store(self.path) as store:
            store.put('frame', df, format='fixed')
            recons = store['frame']
            tm.assert_frame_equal(recons, df)
            assert recons.index[0].value == 946706400000000000

    def test_append_with_timedelta(self):
        # GH 3577
        # append timedelta

        df = DataFrame(dict(A=Timestamp('20130101'), B=[Timestamp(
            '20130101') + timedelta(days=i, seconds=10) for i in range(10)]))
        df['C'] = df['A'] - df['B']
        df.loc[3:5, 'C'] = np.nan

        with ensure_clean_store(self.path) as store:

            # table
            _maybe_remove(store, 'df')
            store.append('df', df, data_columns=True)
            result = store.select('df')
            assert_frame_equal(result, df)

            result = store.select('df', where="C<100000")
            assert_frame_equal(result, df)

            result = store.select('df', where="C<pd.Timedelta('-3D')")
            assert_frame_equal(result, df.iloc[3:])

            result = store.select('df', "C<'-3D'")
            assert_frame_equal(result, df.iloc[3:])

            # a bit hacky here as we don't really deal with the NaT properly

            result = store.select('df', "C<'-500000s'")
            result = result.dropna(subset=['C'])
            assert_frame_equal(result, df.iloc[6:])

            result = store.select('df', "C<'-3.5D'")
            result = result.iloc[1:]
            assert_frame_equal(result, df.iloc[4:])

            # fixed
            _maybe_remove(store, 'df2')
            store.put('df2', df)
            result = store.select('df2')
            assert_frame_equal(result, df)

    def test_remove(self):

        with ensure_clean_store(self.path) as store:

            ts = tm.makeTimeSeries()
            df = tm.makeDataFrame()
            store['a'] = ts
            store['b'] = df
            _maybe_remove(store, 'a')
            assert len(store) == 1
            tm.assert_frame_equal(df, store['b'])

            _maybe_remove(store, 'b')
            assert len(store) == 0

            # nonexistence
            pytest.raises(KeyError, store.remove, 'a_nonexistent_store')

            # pathing
            store['a'] = ts
            store['b/foo'] = df
            _maybe_remove(store, 'foo')
            _maybe_remove(store, 'b/foo')
            assert len(store) == 1

            store['a'] = ts
            store['b/foo'] = df
            _maybe_remove(store, 'b')
            assert len(store) == 1

            # __delitem__
            store['a'] = ts
            store['b'] = df
            del store['a']
            del store['b']
            assert len(store) == 0

    def test_remove_where(self):

        with ensure_clean_store(self.path) as store:

            with catch_warnings(record=True):

                # non-existance
                crit1 = 'index>foo'
                pytest.raises(KeyError, store.remove, 'a', [crit1])

                # try to remove non-table (with crit)
                # non-table ok (where = None)
                wp = tm.makePanel(30)
                store.put('wp', wp, format='table')
                store.remove('wp', ["minor_axis=['A', 'D']"])
                rs = store.select('wp')
                expected = wp.reindex(minor_axis=['B', 'C'])
                assert_panel_equal(rs, expected)

                # empty where
                _maybe_remove(store, 'wp')
                store.put('wp', wp, format='table')

                # deleted number (entire table)
                n = store.remove('wp', [])
                assert n == 120

                # non - empty where
                _maybe_remove(store, 'wp')
                store.put('wp', wp, format='table')
                pytest.raises(ValueError, store.remove,
                              'wp', ['foo'])

    def test_remove_startstop(self):
        # GH #4835 and #6177

        with ensure_clean_store(self.path) as store:

            with catch_warnings(record=True):
                wp = tm.makePanel(30)

                # start
                _maybe_remove(store, 'wp1')
                store.put('wp1', wp, format='t')
                n = store.remove('wp1', start=32)
                assert n == 120 - 32
                result = store.select('wp1')
                expected = wp.reindex(major_axis=wp.major_axis[:32 // 4])
                assert_panel_equal(result, expected)

                _maybe_remove(store, 'wp2')
                store.put('wp2', wp, format='t')
                n = store.remove('wp2', start=-32)
                assert n == 32
                result = store.select('wp2')
                expected = wp.reindex(major_axis=wp.major_axis[:-32 // 4])
                assert_panel_equal(result, expected)

                # stop
                _maybe_remove(store, 'wp3')
                store.put('wp3', wp, format='t')
                n = store.remove('wp3', stop=32)
                assert n == 32
                result = store.select('wp3')
                expected = wp.reindex(major_axis=wp.major_axis[32 // 4:])
                assert_panel_equal(result, expected)

                _maybe_remove(store, 'wp4')
                store.put('wp4', wp, format='t')
                n = store.remove('wp4', stop=-32)
                assert n == 120 - 32
                result = store.select('wp4')
                expected = wp.reindex(major_axis=wp.major_axis[-32 // 4:])
                assert_panel_equal(result, expected)

                # start n stop
                _maybe_remove(store, 'wp5')
                store.put('wp5', wp, format='t')
                n = store.remove('wp5', start=16, stop=-16)
                assert n == 120 - 32
                result = store.select('wp5')
                expected = wp.reindex(
                    major_axis=(wp.major_axis[:16 // 4]
                                .union(wp.major_axis[-16 // 4:])))
                assert_panel_equal(result, expected)

                _maybe_remove(store, 'wp6')
                store.put('wp6', wp, format='t')
                n = store.remove('wp6', start=16, stop=16)
                assert n == 0
                result = store.select('wp6')
                expected = wp.reindex(major_axis=wp.major_axis)
                assert_panel_equal(result, expected)

                # with where
                _maybe_remove(store, 'wp7')

                # TODO: unused?
                date = wp.major_axis.take(np.arange(0, 30, 3))  # noqa

                crit = 'major_axis=date'
                store.put('wp7', wp, format='t')
                n = store.remove('wp7', where=[crit], stop=80)
                assert n == 28
                result = store.select('wp7')
                expected = wp.reindex(major_axis=wp.major_axis.difference(
                    wp.major_axis[np.arange(0, 20, 3)]))
                assert_panel_equal(result, expected)

    def test_remove_crit(self):

        with ensure_clean_store(self.path) as store:

            with catch_warnings(record=True):
                wp = tm.makePanel(30)

                # group row removal
                _maybe_remove(store, 'wp3')
                date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10])
                crit4 = 'major_axis=date4'
                store.put('wp3', wp, format='t')
                n = store.remove('wp3', where=[crit4])
                assert n == 36

                result = store.select('wp3')
                expected = wp.reindex(
                    major_axis=wp.major_axis.difference(date4))
                assert_panel_equal(result, expected)

                # upper half
                _maybe_remove(store, 'wp')
                store.put('wp', wp, format='table')
                date = wp.major_axis[len(wp.major_axis) // 2]

                crit1 = 'major_axis>date'
                crit2 = "minor_axis=['A', 'D']"
                n = store.remove('wp', where=[crit1])
                assert n == 56

                n = store.remove('wp', where=[crit2])
                assert n == 32

                result = store['wp']
                expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
                assert_panel_equal(result, expected)

                # individual row elements
                _maybe_remove(store, 'wp2')
                store.put('wp2', wp, format='table')

                date1 = wp.major_axis[1:3]
                crit1 = 'major_axis=date1'
                store.remove('wp2', where=[crit1])
                result = store.select('wp2')
                expected = wp.reindex(
                    major_axis=wp.major_axis.difference(date1))
                assert_panel_equal(result, expected)

                date2 = wp.major_axis[5]
                crit2 = 'major_axis=date2'
                store.remove('wp2', where=[crit2])
                result = store['wp2']
                expected = wp.reindex(
                    major_axis=(wp.major_axis
                                .difference(date1)
                                .difference(Index([date2]))
                                ))
                assert_panel_equal(result, expected)

                date3 = [wp.major_axis[7], wp.major_axis[9]]
                crit3 = 'major_axis=date3'
                store.remove('wp2', where=[crit3])
                result = store['wp2']
                expected = wp.reindex(major_axis=wp.major_axis
                                      .difference(date1)
                                      .difference(Index([date2]))
                                      .difference(Index(date3)))
                assert_panel_equal(result, expected)

                # corners
                _maybe_remove(store, 'wp4')
                store.put('wp4', wp, format='table')
                n = store.remove(
                    'wp4', where="major_axis>wp.major_axis[-1]")
                result = store.select('wp4')
                assert_panel_equal(result, wp)

    def test_invalid_terms(self):

        with ensure_clean_store(self.path) as store:

            with catch_warnings(record=True):

                df = tm.makeTimeDataFrame()
                df['string'] = 'foo'
                df.loc[0:4, 'string'] = 'bar'
                wp = tm.makePanel()

                store.put('df', df, format='table')
                store.put('wp', wp, format='table')

                # some invalid terms
                pytest.raises(ValueError, store.select,
                              'wp', "minor=['A', 'B']")
                pytest.raises(ValueError, store.select,
                              'wp', ["index=['20121114']"])
                pytest.raises(ValueError, store.select, 'wp', [
                    "index=['20121114', '20121114']"])
                pytest.raises(TypeError, Term)

                # more invalid
                pytest.raises(
                    ValueError, store.select, 'df', 'df.index[3]')
                pytest.raises(SyntaxError, store.select, 'df', 'index>')
                pytest.raises(
                    ValueError, store.select, 'wp',
                    "major_axis<'20000108' & minor_axis['A', 'B']")

        # from the docs
        with ensure_clean_path(self.path) as path:
            dfq = DataFrame(np.random.randn(10, 4), columns=list(
                'ABCD'), index=date_range('20130101', periods=10))
            dfq.to_hdf(path, 'dfq', format='table', data_columns=True)

            # check ok
            read_hdf(path, 'dfq',
                     where="index>Timestamp('20130104') & columns=['A', 'B']")
            read_hdf(path, 'dfq', where="A>0 or C>0")

        # catch the invalid reference
        with ensure_clean_path(self.path) as path:
            dfq = DataFrame(np.random.randn(10, 4), columns=list(
                'ABCD'), index=date_range('20130101', periods=10))
            dfq.to_hdf(path, 'dfq', format='table')

            pytest.raises(ValueError, read_hdf, path,
                          'dfq', where="A>0 or C>0")

    def test_terms(self):

        with ensure_clean_store(self.path) as store:

            with catch_warnings(record=True):

                wp = tm.makePanel()
                wpneg = Panel.fromDict({-1: tm.makeDataFrame(),
                                        0: tm.makeDataFrame(),
                                        1: tm.makeDataFrame()})

                store.put('wp', wp, format='table')
                store.put('wpneg', wpneg, format='table')

                # panel
                result = store.select(
                    'wp',
                    "major_axis<'20000108' and minor_axis=['A', 'B']")
                expected = wp.truncate(
                    after='20000108').reindex(minor=['A', 'B'])
                assert_panel_equal(result, expected)

                # with deprecation
                result = store.select(
                    'wp', where=("major_axis<'20000108' "
                                 "and minor_axis=['A', 'B']"))
                expected = wp.truncate(
                    after='20000108').reindex(minor=['A', 'B'])
                tm.assert_panel_equal(result, expected)

            with catch_warnings(record=True):

                # valid terms
                terms = [('major_axis=20121114'),
                         ('major_axis>20121114'),
                         (("major_axis=['20121114', '20121114']"),),
                         ('major_axis=datetime.datetime(2012, 11, 14)'),
                         'major_axis> 20121114',
                         'major_axis >20121114',
                         'major_axis > 20121114',
                         (("minor_axis=['A', 'B']"),),
                         (("minor_axis=['A', 'B']"),),
                         ((("minor_axis==['A', 'B']"),),),
                         (("items=['ItemA', 'ItemB']"),),
                         ('items=ItemA'),
                         ]

                for t in terms:
                    store.select('wp', t)

                with tm.assert_raises_regex(
                        TypeError, 'Only named functions are supported'):
                    store.select(
                        'wp',
                        'major_axis == (lambda x: x)("20130101")')

            with catch_warnings(record=True):
                # check USub node parsing
                res = store.select('wpneg', 'items == -1')
                expected = Panel({-1: wpneg[-1]})
                tm.assert_panel_equal(res, expected)

                with tm.assert_raises_regex(NotImplementedError,
                                            'Unary addition '
                                            'not supported'):
                    store.select('wpneg', 'items == +1')

    def test_term_compat(self):
        with ensure_clean_store(self.path) as store:

            with catch_warnings(record=True):
                wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'],
                           major_axis=date_range('1/1/2000', periods=5),
                           minor_axis=['A', 'B', 'C', 'D'])
                store.append('wp', wp)

                result = store.select(
                    'wp', where=("major_axis>20000102 "
                                 "and minor_axis=['A', 'B']"))
                expected = wp.loc[:, wp.major_axis >
                                  Timestamp('20000102'), ['A', 'B']]
                assert_panel_equal(result, expected)

                store.remove('wp', 'major_axis>20000103')
                result = store.select('wp')
                expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :]
                assert_panel_equal(result, expected)

        with ensure_clean_store(self.path) as store:

            with catch_warnings(record=True):
                wp = Panel(np.random.randn(2, 5, 4),
                           items=['Item1', 'Item2'],
                           major_axis=date_range('1/1/2000', periods=5),
                           minor_axis=['A', 'B', 'C', 'D'])
                store.append('wp', wp)

                # stringified datetimes
                result = store.select(
                    'wp', 'major_axis>datetime.datetime(2000, 1, 2)')
                expected = wp.loc[:, wp.major_axis > Timestamp('20000102')]
                assert_panel_equal(result, expected)

                result = store.select(
                    'wp', 'major_axis>datetime.datetime(2000, 1, 2)')
                expected = wp.loc[:, wp.major_axis > Timestamp('20000102')]
                assert_panel_equal(result, expected)

                result = store.select(
                    'wp',
                    "major_axis=[datetime.datetime(2000, 1, 2, 0, 0), "
                    "datetime.datetime(2000, 1, 3, 0, 0)]")
                expected = wp.loc[:, [Timestamp('20000102'),
                                      Timestamp('20000103')]]
                assert_panel_equal(result, expected)

                result = store.select(
                    'wp', "minor_axis=['A', 'B']")
                expected = wp.loc[:, :, ['A', 'B']]
                assert_panel_equal(result, expected)

    def test_same_name_scoping(self):

        with ensure_clean_store(self.path) as store:

            import pandas as pd
            df = DataFrame(np.random.randn(20, 2),
                           index=pd.date_range('20130101', periods=20))
            store.put('df', df, format='table')
            expected = df[df.index > pd.Timestamp('20130105')]

            import datetime  # noqa
            result = store.select('df', 'index>datetime.datetime(2013,1,5)')
            assert_frame_equal(result, expected)

            from datetime import datetime  # noqa

            # technically an error, but allow it
            result = store.select('df', 'index>datetime.datetime(2013,1,5)')
            assert_frame_equal(result, expected)

            result = store.select('df', 'index>datetime(2013,1,5)')
            assert_frame_equal(result, expected)

    def test_series(self):

        s = tm.makeStringSeries()
        self._check_roundtrip(s, tm.assert_series_equal)

        ts = tm.makeTimeSeries()
        self._check_roundtrip(ts, tm.assert_series_equal)

        ts2 = Series(ts.index, Index(ts.index, dtype=object))
        self._check_roundtrip(ts2, tm.assert_series_equal)

        ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object),
                                      dtype=object))
        self._check_roundtrip(ts3, tm.assert_series_equal,
                              check_index_type=False)

    def test_sparse_series(self):

        s = tm.makeStringSeries()
        s.iloc[3:5] = np.nan
        ss = s.to_sparse()
        self._check_roundtrip(ss, tm.assert_series_equal,
                              check_series_type=True)

        ss2 = s.to_sparse(kind='integer')
        self._check_roundtrip(ss2, tm.assert_series_equal,
                              check_series_type=True)

        ss3 = s.to_sparse(fill_value=0)
        self._check_roundtrip(ss3, tm.assert_series_equal,
                              check_series_type=True)

    def test_sparse_frame(self):

        s = tm.makeDataFrame()
        s.iloc[3:5, 1:3] = np.nan
        s.iloc[8:10, -2] = np.nan
        ss = s.to_sparse()

        self._check_double_roundtrip(ss, tm.assert_frame_equal,
                                     check_frame_type=True)

        ss2 = s.to_sparse(kind='integer')
        self._check_double_roundtrip(ss2, tm.assert_frame_equal,
                                     check_frame_type=True)

        ss3 = s.to_sparse(fill_value=0)
        self._check_double_roundtrip(ss3, tm.assert_frame_equal,
                                     check_frame_type=True)

    def test_float_index(self):

        # GH #454
        index = np.random.randn(10)
        s = Series(np.random.randn(10), index=index)
        self._check_roundtrip(s, tm.assert_series_equal)

    def test_tuple_index(self):

        # GH #492
        col = np.arange(10)
        idx = [(0., 1.), (2., 3.), (4., 5.)]
        data = np.random.randn(30).reshape((3, 10))
        DF = DataFrame(data, index=idx, columns=col)

        with catch_warnings(record=True):
            self._check_roundtrip(DF, tm.assert_frame_equal)

    def test_index_types(self):

        with catch_warnings(record=True):
            values = np.random.randn(2)

            func = lambda l, r: tm.assert_series_equal(l, r,
                                                       check_dtype=True,
                                                       check_index_type=True,
                                                       check_series_type=True)

        with catch_warnings(record=True):
            ser = Series(values, [0, 'y'])
            self._check_roundtrip(ser, func)

        with catch_warnings(record=True):
            ser = Series(values, [datetime.datetime.today(), 0])
            self._check_roundtrip(ser, func)

        with catch_warnings(record=True):
            ser = Series(values, ['y', 0])
            self._check_roundtrip(ser, func)

        with catch_warnings(record=True):
            ser = Series(values, [datetime.date.today(), 'a'])
            self._check_roundtrip(ser, func)

        with catch_warnings(record=True):

            ser = Series(values, [0, 'y'])
            self._check_roundtrip(ser, func)

            ser = Series(values, [datetime.datetime.today(), 0])
            self._check_roundtrip(ser, func)

            ser = Series(values, ['y', 0])
            self._check_roundtrip(ser, func)

            ser = Series(values, [datetime.date.today(), 'a'])
            self._check_roundtrip(ser, func)

            ser = Series(values, [1.23, 'b'])
            self._check_roundtrip(ser, func)

            ser = Series(values, [1, 1.53])
            self._check_roundtrip(ser, func)

            ser = Series(values, [1, 5])
            self._check_roundtrip(ser, func)

            ser = Series(values, [datetime.datetime(
                2012, 1, 1), datetime.datetime(2012, 1, 2)])
            self._check_roundtrip(ser, func)

    def test_timeseries_preepoch(self):

        dr = bdate_range('1/1/1940', '1/1/1960')
        ts = Series(np.random.randn(len(dr)), index=dr)
        try:
            self._check_roundtrip(ts, tm.assert_series_equal)
        except OverflowError:
            pytest.skip('known failer on some windows platforms')

    @pytest.mark.parametrize("compression", [
        False, pytest.param(True, marks=td.skip_if_windows_python_3)
    ])
    def test_frame(self, compression):

        df = tm.makeDataFrame()

        # put in some random NAs
        df.values[0, 0] = np.nan
        df.values[5, 3] = np.nan

        self._check_roundtrip_table(df, tm.assert_frame_equal,
                                    compression=compression)
        self._check_roundtrip(df, tm.assert_frame_equal,
                              compression=compression)

        tdf = tm.makeTimeDataFrame()
        self._check_roundtrip(tdf, tm.assert_frame_equal,
                              compression=compression)

        with ensure_clean_store(self.path) as store:
            # not consolidated
            df['foo'] = np.random.randn(len(df))
            store['df'] = df
            recons = store['df']
            assert recons._data.is_consolidated()

        # empty
        self._check_roundtrip(df[:0], tm.assert_frame_equal)

    def test_empty_series_frame(self):
        s0 = Series()
        s1 = Series(name='myseries')
        df0 = DataFrame()
        df1 = DataFrame(index=['a', 'b', 'c'])
        df2 = DataFrame(columns=['d', 'e', 'f'])

        self._check_roundtrip(s0, tm.assert_series_equal)
        self._check_roundtrip(s1, tm.assert_series_equal)
        self._check_roundtrip(df0, tm.assert_frame_equal)
        self._check_roundtrip(df1, tm.assert_frame_equal)
        self._check_roundtrip(df2, tm.assert_frame_equal)

    def test_empty_series(self):
        for dtype in [np.int64, np.float64, np.object, 'm8[ns]', 'M8[ns]']:
            s = Series(dtype=dtype)
            self._check_roundtrip(s, tm.assert_series_equal)

    def test_can_serialize_dates(self):

        rng = [x.date() for x in bdate_range('1/1/2000', '1/30/2000')]
        frame = DataFrame(np.random.randn(len(rng), 4), index=rng)

        self._check_roundtrip(frame, tm.assert_frame_equal)

    def test_store_hierarchical(self):
        index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
                                   ['one', 'two', 'three']],
                           labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
                                   [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
                           names=['foo', 'bar'])
        frame = DataFrame(np.random.randn(10, 3), index=index,
                          columns=['A', 'B', 'C'])

        self._check_roundtrip(frame, tm.assert_frame_equal)
        self._check_roundtrip(frame.T, tm.assert_frame_equal)
        self._check_roundtrip(frame['A'], tm.assert_series_equal)

        # check that the names are stored
        with ensure_clean_store(self.path) as store:
            store['frame'] = frame
            recons = store['frame']
            tm.assert_frame_equal(recons, frame)

    def test_store_index_name(self):
        df = tm.makeDataFrame()
        df.index.name = 'foo'

        with ensure_clean_store(self.path) as store:
            store['frame'] = df
            recons = store['frame']
            tm.assert_frame_equal(recons, df)

    def test_store_index_name_with_tz(self):
        # GH 13884
        df = pd.DataFrame({'A': [1, 2]})
        df.index = pd.DatetimeIndex([1234567890123456787, 1234567890123456788])
        df.index = df.index.tz_localize('UTC')
        df.index.name = 'foo'

        with ensure_clean_store(self.path) as store:
            store.put('frame', df, format='table')
            recons = store['frame']
            tm.assert_frame_equal(recons, df)

    @pytest.mark.parametrize('table_format', ['table', 'fixed'])
    def test_store_index_name_numpy_str(self, table_format):
        # GH #13492
        idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1),
                                       datetime.date(2000, 1, 2)]),
                       name=u('cols\u05d2'))
        idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1),
                                        datetime.date(2010, 1, 2)]),
                        name=u('rows\u05d0'))
        df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)

        # This used to fail, returning numpy strings instead of python strings.
        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', format=table_format)
            df2 = read_hdf(path, 'df')

            assert_frame_equal(df, df2, check_names=True)

            assert type(df2.index.name) == text_type
            assert type(df2.columns.name) == text_type

    def test_store_series_name(self):
        df = tm.makeDataFrame()
        series = df['A']

        with ensure_clean_store(self.path) as store:
            store['series'] = series
            recons = store['series']
            tm.assert_series_equal(recons, series)

    @pytest.mark.parametrize("compression", [
        False, pytest.param(True, marks=td.skip_if_windows_python_3)
    ])
    def test_store_mixed(self, compression):

        def _make_one():
            df = tm.makeDataFrame()
            df['obj1'] = 'foo'
            df['obj2'] = 'bar'
            df['bool1'] = df['A'] > 0
            df['bool2'] = df['B'] > 0
            df['int1'] = 1
            df['int2'] = 2
            return df._consolidate()

        df1 = _make_one()
        df2 = _make_one()

        self._check_roundtrip(df1, tm.assert_frame_equal)
        self._check_roundtrip(df2, tm.assert_frame_equal)

        with ensure_clean_store(self.path) as store:
            store['obj'] = df1
            tm.assert_frame_equal(store['obj'], df1)
            store['obj'] = df2
            tm.assert_frame_equal(store['obj'], df2)

        # check that can store Series of all of these types
        self._check_roundtrip(df1['obj1'], tm.assert_series_equal,
                              compression=compression)
        self._check_roundtrip(df1['bool1'], tm.assert_series_equal,
                              compression=compression)
        self._check_roundtrip(df1['int1'], tm.assert_series_equal,
                              compression=compression)

    def test_wide(self):

        with catch_warnings(record=True):
            wp = tm.makePanel()
            self._check_roundtrip(wp, assert_panel_equal)

    def test_select_with_dups(self):

        # single dtypes
        df = DataFrame(np.random.randn(10, 4), columns=['A', 'A', 'B', 'B'])
        df.index = date_range('20130101 9:30', periods=10, freq='T')

        with ensure_clean_store(self.path) as store:
            store.append('df', df)

            result = store.select('df')
            expected = df
            assert_frame_equal(result, expected, by_blocks=True)

            result = store.select('df', columns=df.columns)
            expected = df
            assert_frame_equal(result, expected, by_blocks=True)

            result = store.select('df', columns=['A'])
            expected = df.loc[:, ['A']]
            assert_frame_equal(result, expected)

        # dups across dtypes
        df = concat([DataFrame(np.random.randn(10, 4),
                               columns=['A', 'A', 'B', 'B']),
                     DataFrame(np.random.randint(0, 10, size=20)
                               .reshape(10, 2),
                               columns=['A', 'C'])],
                    axis=1)
        df.index = date_range('20130101 9:30', periods=10, freq='T')

        with ensure_clean_store(self.path) as store:
            store.append('df', df)

            result = store.select('df')
            expected = df
            assert_frame_equal(result, expected, by_blocks=True)

            result = store.select('df', columns=df.columns)
            expected = df
            assert_frame_equal(result, expected, by_blocks=True)

            expected = df.loc[:, ['A']]
            result = store.select('df', columns=['A'])
            assert_frame_equal(result, expected, by_blocks=True)

            expected = df.loc[:, ['B', 'A']]
            result = store.select('df', columns=['B', 'A'])
            assert_frame_equal(result, expected, by_blocks=True)

        # duplicates on both index and columns
        with ensure_clean_store(self.path) as store:
            store.append('df', df)
            store.append('df', df)

            expected = df.loc[:, ['B', 'A']]
            expected = concat([expected, expected])
            result = store.select('df', columns=['B', 'A'])
            assert_frame_equal(result, expected, by_blocks=True)

    def test_wide_table_dups(self):
        with ensure_clean_store(self.path) as store:
            with catch_warnings(record=True):

                wp = tm.makePanel()
                store.put('panel', wp, format='table')
                store.put('panel', wp, format='table', append=True)

                recons = store['panel']

                assert_panel_equal(recons, wp)

    def test_long(self):
        def _check(left, right):
            assert_panel_equal(left.to_panel(), right.to_panel())

        with catch_warnings(record=True):
            wp = tm.makePanel()
            self._check_roundtrip(wp.to_frame(), _check)

    def test_longpanel(self):
        pass

    def test_overwrite_node(self):

        with ensure_clean_store(self.path) as store:
            store['a'] = tm.makeTimeDataFrame()
            ts = tm.makeTimeSeries()
            store['a'] = ts

            tm.assert_series_equal(store['a'], ts)

    def test_sparse_with_compression(self):

        # GH 2931

        # make sparse dataframe
        arr = np.random.binomial(n=1, p=.01, size=(1000, 10))
        df = DataFrame(arr).to_sparse(fill_value=0)

        # case 1: store uncompressed
        self._check_double_roundtrip(df, tm.assert_frame_equal,
                                     compression=False,
                                     check_frame_type=True)

        # case 2: store compressed (works)
        self._check_double_roundtrip(df, tm.assert_frame_equal,
                                     compression='zlib',
                                     check_frame_type=True)

        # set one series to be completely sparse
        df[0] = np.zeros(1000)

        # case 3: store df with completely sparse series uncompressed
        self._check_double_roundtrip(df, tm.assert_frame_equal,
                                     compression=False,
                                     check_frame_type=True)

        # case 4: try storing df with completely sparse series compressed
        # (fails)
        self._check_double_roundtrip(df, tm.assert_frame_equal,
                                     compression='zlib',
                                     check_frame_type=True)

    def test_select(self):

        with ensure_clean_store(self.path) as store:

            with catch_warnings(record=True):
                wp = tm.makePanel()

                # put/select ok
                _maybe_remove(store, 'wp')
                store.put('wp', wp, format='table')
                store.select('wp')

                # non-table ok (where = None)
                _maybe_remove(store, 'wp')
                store.put('wp2', wp)
                store.select('wp2')

                # selection on the non-indexable with a large number of columns
                wp = Panel(np.random.randn(100, 100, 100),
                           items=['Item%03d' % i for i in range(100)],
                           major_axis=date_range('1/1/2000', periods=100),
                           minor_axis=['E%03d' % i for i in range(100)])

                _maybe_remove(store, 'wp')
                store.append('wp', wp)
                items = ['Item%03d' % i for i in range(80)]
                result = store.select('wp', 'items=items')
                expected = wp.reindex(items=items)
                assert_panel_equal(expected, result)

                # selectin non-table with a where
                # pytest.raises(ValueError, store.select,
                #                  'wp2', ('column', ['A', 'D']))

                # select with columns=
                df = tm.makeTimeDataFrame()
                _maybe_remove(store, 'df')
                store.append('df', df)
                result = store.select('df', columns=['A', 'B'])
                expected = df.reindex(columns=['A', 'B'])
                tm.assert_frame_equal(expected, result)

                # equivalentsly
                result = store.select('df', [("columns=['A', 'B']")])
                expected = df.reindex(columns=['A', 'B'])
                tm.assert_frame_equal(expected, result)

                # with a data column
                _maybe_remove(store, 'df')
                store.append('df', df, data_columns=['A'])
                result = store.select('df', ['A > 0'], columns=['A', 'B'])
                expected = df[df.A > 0].reindex(columns=['A', 'B'])
                tm.assert_frame_equal(expected, result)

                # all a data columns
                _maybe_remove(store, 'df')
                store.append('df', df, data_columns=True)
                result = store.select('df', ['A > 0'], columns=['A', 'B'])
                expected = df[df.A > 0].reindex(columns=['A', 'B'])
                tm.assert_frame_equal(expected, result)

                # with a data column, but different columns
                _maybe_remove(store, 'df')
                store.append('df', df, data_columns=['A'])
                result = store.select('df', ['A > 0'], columns=['C', 'D'])
                expected = df[df.A > 0].reindex(columns=['C', 'D'])
                tm.assert_frame_equal(expected, result)

    def test_select_dtypes(self):

        with ensure_clean_store(self.path) as store:
            # with a Timestamp data column (GH #2637)
            df = DataFrame(dict(
                ts=bdate_range('2012-01-01', periods=300),
                A=np.random.randn(300)))
            _maybe_remove(store, 'df')
            store.append('df', df, data_columns=['ts', 'A'])

            result = store.select('df', "ts>=Timestamp('2012-02-01')")
            expected = df[df.ts >= Timestamp('2012-02-01')]
            tm.assert_frame_equal(expected, result)

            # bool columns (GH #2849)
            df = DataFrame(np.random.randn(5, 2), columns=['A', 'B'])
            df['object'] = 'foo'
            df.loc[4:5, 'object'] = 'bar'
            df['boolv'] = df['A'] > 0
            _maybe_remove(store, 'df')
            store.append('df', df, data_columns=True)

            expected = (df[df.boolv == True]  # noqa
                        .reindex(columns=['A', 'boolv']))
            for v in [True, 'true', 1]:
                result = store.select('df', 'boolv == %s' % str(v),
                                      columns=['A', 'boolv'])
                tm.assert_frame_equal(expected, result)

            expected = (df[df.boolv == False]  # noqa
                        .reindex(columns=['A', 'boolv']))
            for v in [False, 'false', 0]:
                result = store.select(
                    'df', 'boolv == %s' % str(v), columns=['A', 'boolv'])
                tm.assert_frame_equal(expected, result)

            # integer index
            df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
            _maybe_remove(store, 'df_int')
            store.append('df_int', df)
            result = store.select(
                'df_int', "index<10 and columns=['A']")
            expected = df.reindex(index=list(df.index)[0:10], columns=['A'])
            tm.assert_frame_equal(expected, result)

            # float index
            df = DataFrame(dict(A=np.random.rand(
                20), B=np.random.rand(20), index=np.arange(20, dtype='f8')))
            _maybe_remove(store, 'df_float')
            store.append('df_float', df)
            result = store.select(
                'df_float', "index<10.0 and columns=['A']")
            expected = df.reindex(index=list(df.index)[0:10], columns=['A'])
            tm.assert_frame_equal(expected, result)

        with ensure_clean_store(self.path) as store:

            # floats w/o NaN
            df = DataFrame(
                dict(cols=range(11), values=range(11)), dtype='float64')
            df['cols'] = (df['cols'] + 10).apply(str)

            store.append('df1', df, data_columns=True)
            result = store.select(
                'df1', where='values>2.0')
            expected = df[df['values'] > 2.0]
            tm.assert_frame_equal(expected, result)

            # floats with NaN
            df.iloc[0] = np.nan
            expected = df[df['values'] > 2.0]

            store.append('df2', df, data_columns=True, index=False)
            result = store.select(
                'df2', where='values>2.0')
            tm.assert_frame_equal(expected, result)

            # https://github.com/PyTables/PyTables/issues/282
            # bug in selection when 0th row has a np.nan and an index
            # store.append('df3',df,data_columns=True)
            # result = store.select(
            #    'df3', where='values>2.0')
            # tm.assert_frame_equal(expected, result)

            # not in first position float with NaN ok too
            df = DataFrame(
                dict(cols=range(11), values=range(11)), dtype='float64')
            df['cols'] = (df['cols'] + 10).apply(str)

            df.iloc[1] = np.nan
            expected = df[df['values'] > 2.0]

            store.append('df4', df, data_columns=True)
            result = store.select(
                'df4', where='values>2.0')
            tm.assert_frame_equal(expected, result)

        # test selection with comparison against numpy scalar
        # GH 11283
        with ensure_clean_store(self.path) as store:
            df = tm.makeDataFrame()

            expected = df[df['A'] > 0]

            store.append('df', df, data_columns=True)
            np_zero = np.float64(0)  # noqa
            result = store.select('df', where=["A>np_zero"])
            tm.assert_frame_equal(expected, result)

    def test_select_with_many_inputs(self):

        with ensure_clean_store(self.path) as store:

            df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300),
                                A=np.random.randn(300),
                                B=range(300),
                                users=['a'] * 50 + ['b'] * 50 + ['c'] * 100 +
                                ['a%03d' % i for i in range(100)]))
            _maybe_remove(store, 'df')
            store.append('df', df, data_columns=['ts', 'A', 'B', 'users'])

            # regular select
            result = store.select('df', "ts>=Timestamp('2012-02-01')")
            expected = df[df.ts >= Timestamp('2012-02-01')]
            tm.assert_frame_equal(expected, result)

            # small selector
            result = store.select(
                'df',
                "ts>=Timestamp('2012-02-01') & users=['a','b','c']")
            expected = df[(df.ts >= Timestamp('2012-02-01')) &
                          df.users.isin(['a', 'b', 'c'])]
            tm.assert_frame_equal(expected, result)

            # big selector along the columns
            selector = ['a', 'b', 'c'] + ['a%03d' % i for i in range(60)]
            result = store.select(
                'df',
                "ts>=Timestamp('2012-02-01') and users=selector")
            expected = df[(df.ts >= Timestamp('2012-02-01')) &
                          df.users.isin(selector)]
            tm.assert_frame_equal(expected, result)

            selector = range(100, 200)
            result = store.select('df', 'B=selector')
            expected = df[df.B.isin(selector)]
            tm.assert_frame_equal(expected, result)
            assert len(result) == 100

            # big selector along the index
            selector = Index(df.ts[0:100].values)
            result = store.select('df', 'ts=selector')
            expected = df[df.ts.isin(selector.values)]
            tm.assert_frame_equal(expected, result)
            assert len(result) == 100

    def test_select_iterator(self):

        # single table
        with ensure_clean_store(self.path) as store:

            df = tm.makeTimeDataFrame(500)
            _maybe_remove(store, 'df')
            store.append('df', df)

            expected = store.select('df')

            results = [s for s in store.select('df', iterator=True)]
            result = concat(results)
            tm.assert_frame_equal(expected, result)

            results = [s for s in store.select('df', chunksize=100)]
            assert len(results) == 5
            result = concat(results)
            tm.assert_frame_equal(expected, result)

            results = [s for s in store.select('df', chunksize=150)]
            result = concat(results)
            tm.assert_frame_equal(result, expected)

        with ensure_clean_path(self.path) as path:

            df = tm.makeTimeDataFrame(500)
            df.to_hdf(path, 'df_non_table')
            pytest.raises(TypeError, read_hdf, path,
                          'df_non_table', chunksize=100)
            pytest.raises(TypeError, read_hdf, path,
                          'df_non_table', iterator=True)

        with ensure_clean_path(self.path) as path:

            df = tm.makeTimeDataFrame(500)
            df.to_hdf(path, 'df', format='table')

            results = [s for s in read_hdf(path, 'df', chunksize=100)]
            result = concat(results)

            assert len(results) == 5
            tm.assert_frame_equal(result, df)
            tm.assert_frame_equal(result, read_hdf(path, 'df'))

        # multiple

        with ensure_clean_store(self.path) as store:

            df1 = tm.makeTimeDataFrame(500)
            store.append('df1', df1, data_columns=True)
            df2 = tm.makeTimeDataFrame(500).rename(
                columns=lambda x: "%s_2" % x)
            df2['foo'] = 'bar'
            store.append('df2', df2)

            df = concat([df1, df2], axis=1)

            # full selection
            expected = store.select_as_multiple(
                ['df1', 'df2'], selector='df1')
            results = [s for s in store.select_as_multiple(
                ['df1', 'df2'], selector='df1', chunksize=150)]
            result = concat(results)
            tm.assert_frame_equal(expected, result)

    def test_select_iterator_complete_8014(self):

        # GH 8014
        # using iterator and where clause
        chunksize = 1e4

        # no iterator
        with ensure_clean_store(self.path) as store:

            expected = tm.makeTimeDataFrame(100064, 'S')
            _maybe_remove(store, 'df')
            store.append('df', expected)

            beg_dt = expected.index[0]
            end_dt = expected.index[-1]

            # select w/o iteration and no where clause works
            result = store.select('df')
            tm.assert_frame_equal(expected, result)

            # select w/o iterator and where clause, single term, begin
            # of range, works
            where = "index >= '%s'" % beg_dt
            result = store.select('df', where=where)
            tm.assert_frame_equal(expected, result)

            # select w/o iterator and where clause, single term, end
            # of range, works
            where = "index <= '%s'" % end_dt
            result = store.select('df', where=where)
            tm.assert_frame_equal(expected, result)

            # select w/o iterator and where clause, inclusive range,
            # works
            where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
            result = store.select('df', where=where)
            tm.assert_frame_equal(expected, result)

        # with iterator, full range
        with ensure_clean_store(self.path) as store:

            expected = tm.makeTimeDataFrame(100064, 'S')
            _maybe_remove(store, 'df')
            store.append('df', expected)

            beg_dt = expected.index[0]
            end_dt = expected.index[-1]

            # select w/iterator and no where clause works
            results = [s for s in store.select('df', chunksize=chunksize)]
            result = concat(results)
            tm.assert_frame_equal(expected, result)

            # select w/iterator and where clause, single term, begin of range
            where = "index >= '%s'" % beg_dt
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]
            result = concat(results)
            tm.assert_frame_equal(expected, result)

            # select w/iterator and where clause, single term, end of range
            where = "index <= '%s'" % end_dt
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]
            result = concat(results)
            tm.assert_frame_equal(expected, result)

            # select w/iterator and where clause, inclusive range
            where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]
            result = concat(results)
            tm.assert_frame_equal(expected, result)

    def test_select_iterator_non_complete_8014(self):

        # GH 8014
        # using iterator and where clause
        chunksize = 1e4

        # with iterator, non complete range
        with ensure_clean_store(self.path) as store:

            expected = tm.makeTimeDataFrame(100064, 'S')
            _maybe_remove(store, 'df')
            store.append('df', expected)

            beg_dt = expected.index[1]
            end_dt = expected.index[-2]

            # select w/iterator and where clause, single term, begin of range
            where = "index >= '%s'" % beg_dt
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]
            result = concat(results)
            rexpected = expected[expected.index >= beg_dt]
            tm.assert_frame_equal(rexpected, result)

            # select w/iterator and where clause, single term, end of range
            where = "index <= '%s'" % end_dt
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]
            result = concat(results)
            rexpected = expected[expected.index <= end_dt]
            tm.assert_frame_equal(rexpected, result)

            # select w/iterator and where clause, inclusive range
            where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]
            result = concat(results)
            rexpected = expected[(expected.index >= beg_dt) &
                                 (expected.index <= end_dt)]
            tm.assert_frame_equal(rexpected, result)

        # with iterator, empty where
        with ensure_clean_store(self.path) as store:

            expected = tm.makeTimeDataFrame(100064, 'S')
            _maybe_remove(store, 'df')
            store.append('df', expected)

            end_dt = expected.index[-1]

            # select w/iterator and where clause, single term, begin of range
            where = "index > '%s'" % end_dt
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]
            assert 0 == len(results)

    def test_select_iterator_many_empty_frames(self):

        # GH 8014
        # using iterator and where clause can return many empty
        # frames.
        chunksize = int(1e4)

        # with iterator, range limited to the first chunk
        with ensure_clean_store(self.path) as store:

            expected = tm.makeTimeDataFrame(100000, 'S')
            _maybe_remove(store, 'df')
            store.append('df', expected)

            beg_dt = expected.index[0]
            end_dt = expected.index[chunksize - 1]

            # select w/iterator and where clause, single term, begin of range
            where = "index >= '%s'" % beg_dt
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]
            result = concat(results)
            rexpected = expected[expected.index >= beg_dt]
            tm.assert_frame_equal(rexpected, result)

            # select w/iterator and where clause, single term, end of range
            where = "index <= '%s'" % end_dt
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]

            assert len(results) == 1
            result = concat(results)
            rexpected = expected[expected.index <= end_dt]
            tm.assert_frame_equal(rexpected, result)

            # select w/iterator and where clause, inclusive range
            where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt)
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]

            # should be 1, is 10
            assert len(results) == 1
            result = concat(results)
            rexpected = expected[(expected.index >= beg_dt) &
                                 (expected.index <= end_dt)]
            tm.assert_frame_equal(rexpected, result)

            # select w/iterator and where clause which selects
            # *nothing*.
            #
            # To be consistent with Python idiom I suggest this should
            # return [] e.g. `for e in []: print True` never prints
            # True.

            where = "index <= '%s' & index >= '%s'" % (beg_dt, end_dt)
            results = [s for s in store.select(
                'df', where=where, chunksize=chunksize)]

            # should be []
            assert len(results) == 0

    def test_retain_index_attributes(self):

        # GH 3499, losing frequency info on index recreation
        df = DataFrame(dict(
            A=Series(lrange(3),
                     index=date_range('2000-1-1', periods=3, freq='H'))))

        with ensure_clean_store(self.path) as store:
            _maybe_remove(store, 'data')
            store.put('data', df, format='table')

            result = store.get('data')
            tm.assert_frame_equal(df, result)

            for attr in ['freq', 'tz', 'name']:
                for idx in ['index', 'columns']:
                    assert (getattr(getattr(df, idx), attr, None) ==
                            getattr(getattr(result, idx), attr, None))

            # try to append a table with a different frequency
            with catch_warnings(record=True):
                df2 = DataFrame(dict(
                    A=Series(lrange(3),
                             index=date_range('2002-1-1',
                                              periods=3, freq='D'))))
                store.append('data', df2)

            assert store.get_storer('data').info['index']['freq'] is None

            # this is ok
            _maybe_remove(store, 'df2')
            df2 = DataFrame(dict(
                A=Series(lrange(3),
                         index=[Timestamp('20010101'), Timestamp('20010102'),
                                Timestamp('20020101')])))
            store.append('df2', df2)
            df3 = DataFrame(dict(
                A=Series(lrange(3),
                         index=date_range('2002-1-1', periods=3,
                                          freq='D'))))
            store.append('df2', df3)

    def test_retain_index_attributes2(self):
        with ensure_clean_path(self.path) as path:

            with catch_warnings(record=True):

                df = DataFrame(dict(
                    A=Series(lrange(3),
                             index=date_range('2000-1-1',
                                              periods=3, freq='H'))))
                df.to_hdf(path, 'data', mode='w', append=True)
                df2 = DataFrame(dict(
                    A=Series(lrange(3),
                             index=date_range('2002-1-1', periods=3,
                                              freq='D'))))
                df2.to_hdf(path, 'data', append=True)

                idx = date_range('2000-1-1', periods=3, freq='H')
                idx.name = 'foo'
                df = DataFrame(dict(A=Series(lrange(3), index=idx)))
                df.to_hdf(path, 'data', mode='w', append=True)

            assert read_hdf(path, 'data').index.name == 'foo'

            with catch_warnings(record=True):

                idx2 = date_range('2001-1-1', periods=3, freq='H')
                idx2.name = 'bar'
                df2 = DataFrame(dict(A=Series(lrange(3), index=idx2)))
                df2.to_hdf(path, 'data', append=True)

            assert read_hdf(path, 'data').index.name is None

    def test_panel_select(self):

        with ensure_clean_store(self.path) as store:

            with catch_warnings(record=True):

                wp = tm.makePanel()

                store.put('wp', wp, format='table')
                date = wp.major_axis[len(wp.major_axis) // 2]

                crit1 = ('major_axis>=date')
                crit2 = ("minor_axis=['A', 'D']")

                result = store.select('wp', [crit1, crit2])
                expected = wp.truncate(before=date).reindex(minor=['A', 'D'])
                assert_panel_equal(result, expected)

                result = store.select(
                    'wp', ['major_axis>="20000124"',
                           ("minor_axis=['A', 'B']")])
                expected = wp.truncate(
                    before='20000124').reindex(minor=['A', 'B'])
                assert_panel_equal(result, expected)

    def test_frame_select(self):

        df = tm.makeTimeDataFrame()

        with ensure_clean_store(self.path) as store:
            store.put('frame', df, format='table')
            date = df.index[len(df) // 2]

            crit1 = Term('index>=date')
            assert crit1.env.scope['date'] == date

            crit2 = ("columns=['A', 'D']")
            crit3 = ('columns=A')

            result = store.select('frame', [crit1, crit2])
            expected = df.loc[date:, ['A', 'D']]
            tm.assert_frame_equal(result, expected)

            result = store.select('frame', [crit3])
            expected = df.loc[:, ['A']]
            tm.assert_frame_equal(result, expected)

            # invalid terms
            df = tm.makeTimeDataFrame()
            store.append('df_time', df)
            pytest.raises(
                ValueError, store.select, 'df_time', "index>0")

            # can't select if not written as table
            # store['frame'] = df
            # pytest.raises(ValueError, store.select,
            #                  'frame', [crit1, crit2])

    def test_frame_select_complex(self):
        # select via complex criteria

        df = tm.makeTimeDataFrame()
        df['string'] = 'foo'
        df.loc[df.index[0:4], 'string'] = 'bar'

        with ensure_clean_store(self.path) as store:
            store.put('df', df, format='table', data_columns=['string'])

            # empty
            result = store.select('df', 'index>df.index[3] & string="bar"')
            expected = df.loc[(df.index > df.index[3]) & (df.string == 'bar')]
            tm.assert_frame_equal(result, expected)

            result = store.select('df', 'index>df.index[3] & string="foo"')
            expected = df.loc[(df.index > df.index[3]) & (df.string == 'foo')]
            tm.assert_frame_equal(result, expected)

            # or
            result = store.select('df', 'index>df.index[3] | string="bar"')
            expected = df.loc[(df.index > df.index[3]) | (df.string == 'bar')]
            tm.assert_frame_equal(result, expected)

            result = store.select('df', '(index>df.index[3] & '
                                  'index<=df.index[6]) | string="bar"')
            expected = df.loc[((df.index > df.index[3]) & (
                df.index <= df.index[6])) | (df.string == 'bar')]
            tm.assert_frame_equal(result, expected)

            # invert
            result = store.select('df', 'string!="bar"')
            expected = df.loc[df.string != 'bar']
            tm.assert_frame_equal(result, expected)

            # invert not implemented in numexpr :(
            pytest.raises(NotImplementedError,
                          store.select, 'df', '~(string="bar")')

            # invert ok for filters
            result = store.select('df', "~(columns=['A','B'])")
            expected = df.loc[:, df.columns.difference(['A', 'B'])]
            tm.assert_frame_equal(result, expected)

            # in
            result = store.select(
                'df', "index>df.index[3] & columns in ['A','B']")
            expected = df.loc[df.index > df.index[3]].reindex(columns=[
                                                              'A', 'B'])
            tm.assert_frame_equal(result, expected)

    def test_frame_select_complex2(self):

        with ensure_clean_path(['parms.hdf', 'hist.hdf']) as paths:

            pp, hh = paths

            # use non-trivial selection criteria
            parms = DataFrame({'A': [1, 1, 2, 2, 3]})
            parms.to_hdf(pp, 'df', mode='w',
                         format='table', data_columns=['A'])

            selection = read_hdf(pp, 'df', where='A=[2,3]')
            hist = DataFrame(np.random.randn(25, 1),
                             columns=['data'],
                             index=MultiIndex.from_tuples(
                                 [(i, j) for i in range(5)
                                  for j in range(5)],
                                 names=['l1', 'l2']))

            hist.to_hdf(hh, 'df', mode='w', format='table')

            expected = read_hdf(hh, 'df', where='l1=[2, 3, 4]')

            # sccope with list like
            l = selection.index.tolist()  # noqa
            store = HDFStore(hh)
            result = store.select('df', where='l1=l')
            assert_frame_equal(result, expected)
            store.close()

            result = read_hdf(hh, 'df', where='l1=l')
            assert_frame_equal(result, expected)

            # index
            index = selection.index  # noqa
            result = read_hdf(hh, 'df', where='l1=index')
            assert_frame_equal(result, expected)

            result = read_hdf(hh, 'df', where='l1=selection.index')
            assert_frame_equal(result, expected)

            result = read_hdf(hh, 'df', where='l1=selection.index.tolist()')
            assert_frame_equal(result, expected)

            result = read_hdf(hh, 'df', where='l1=list(selection.index)')
            assert_frame_equal(result, expected)

            # sccope with index
            store = HDFStore(hh)

            result = store.select('df', where='l1=index')
            assert_frame_equal(result, expected)

            result = store.select('df', where='l1=selection.index')
            assert_frame_equal(result, expected)

            result = store.select('df', where='l1=selection.index.tolist()')
            assert_frame_equal(result, expected)

            result = store.select('df', where='l1=list(selection.index)')
            assert_frame_equal(result, expected)

            store.close()

    def test_invalid_filtering(self):

        # can't use more than one filter (atm)

        df = tm.makeTimeDataFrame()

        with ensure_clean_store(self.path) as store:
            store.put('df', df, format='table')

            # not implemented
            pytest.raises(NotImplementedError, store.select,
                          'df', "columns=['A'] | columns=['B']")

            # in theory we could deal with this
            pytest.raises(NotImplementedError, store.select,
                          'df', "columns=['A','B'] & columns=['C']")

    def test_string_select(self):
        # GH 2973
        with ensure_clean_store(self.path) as store:

            df = tm.makeTimeDataFrame()

            # test string ==/!=
            df['x'] = 'none'
            df.loc[2:7, 'x'] = ''

            store.append('df', df, data_columns=['x'])

            result = store.select('df', 'x=none')
            expected = df[df.x == 'none']
            assert_frame_equal(result, expected)

            try:
                result = store.select('df', 'x!=none')
                expected = df[df.x != 'none']
                assert_frame_equal(result, expected)
            except Exception as detail:
                pprint_thing("[{0}]".format(detail))
                pprint_thing(store)
                pprint_thing(expected)

            df2 = df.copy()
            df2.loc[df2.x == '', 'x'] = np.nan

            store.append('df2', df2, data_columns=['x'])
            result = store.select('df2', 'x!=none')
            expected = df2[isna(df2.x)]
            assert_frame_equal(result, expected)

            # int ==/!=
            df['int'] = 1
            df.loc[2:7, 'int'] = 2

            store.append('df3', df, data_columns=['int'])

            result = store.select('df3', 'int=2')
            expected = df[df.int == 2]
            assert_frame_equal(result, expected)

            result = store.select('df3', 'int!=2')
            expected = df[df.int != 2]
            assert_frame_equal(result, expected)

    def test_read_column(self):

        df = tm.makeTimeDataFrame()

        with ensure_clean_store(self.path) as store:
            _maybe_remove(store, 'df')

            # GH 17912
            # HDFStore.select_column should raise a KeyError
            # exception if the key is not a valid store
            with pytest.raises(KeyError,
                               message='No object named index in the file'):
                store.select_column('df', 'index')

            store.append('df', df)
            # error
            pytest.raises(KeyError, store.select_column, 'df', 'foo')

            def f():
                store.select_column('df', 'index', where=['index>5'])
            pytest.raises(Exception, f)

            # valid
            result = store.select_column('df', 'index')
            tm.assert_almost_equal(result.values, Series(df.index).values)
            assert isinstance(result, Series)

            # not a data indexable column
            pytest.raises(
                ValueError, store.select_column, 'df', 'values_block_0')

            # a data column
            df2 = df.copy()
            df2['string'] = 'foo'
            store.append('df2', df2, data_columns=['string'])
            result = store.select_column('df2', 'string')
            tm.assert_almost_equal(result.values, df2['string'].values)

            # a data column with NaNs, result excludes the NaNs
            df3 = df.copy()
            df3['string'] = 'foo'
            df3.loc[4:6, 'string'] = np.nan
            store.append('df3', df3, data_columns=['string'])
            result = store.select_column('df3', 'string')
            tm.assert_almost_equal(result.values, df3['string'].values)

            # start/stop
            result = store.select_column('df3', 'string', start=2)
            tm.assert_almost_equal(result.values, df3['string'].values[2:])

            result = store.select_column('df3', 'string', start=-2)
            tm.assert_almost_equal(result.values, df3['string'].values[-2:])

            result = store.select_column('df3', 'string', stop=2)
            tm.assert_almost_equal(result.values, df3['string'].values[:2])

            result = store.select_column('df3', 'string', stop=-2)
            tm.assert_almost_equal(result.values, df3['string'].values[:-2])

            result = store.select_column('df3', 'string', start=2, stop=-2)
            tm.assert_almost_equal(result.values, df3['string'].values[2:-2])

            result = store.select_column('df3', 'string', start=-2, stop=2)
            tm.assert_almost_equal(result.values, df3['string'].values[-2:2])

            # GH 10392 - make sure column name is preserved
            df4 = DataFrame({'A': np.random.randn(10), 'B': 'foo'})
            store.append('df4', df4, data_columns=True)
            expected = df4['B']
            result = store.select_column('df4', 'B')
            tm.assert_series_equal(result, expected)

    def test_coordinates(self):
        df = tm.makeTimeDataFrame()

        with ensure_clean_store(self.path) as store:

            _maybe_remove(store, 'df')
            store.append('df', df)

            # all
            c = store.select_as_coordinates('df')
            assert((c.values == np.arange(len(df.index))).all())

            # get coordinates back & test vs frame
            _maybe_remove(store, 'df')

            df = DataFrame(dict(A=lrange(5), B=lrange(5)))
            store.append('df', df)
            c = store.select_as_coordinates('df', ['index<3'])
            assert((c.values == np.arange(3)).all())
            result = store.select('df', where=c)
            expected = df.loc[0:2, :]
            tm.assert_frame_equal(result, expected)

            c = store.select_as_coordinates('df', ['index>=3', 'index<=4'])
            assert((c.values == np.arange(2) + 3).all())
            result = store.select('df', where=c)
            expected = df.loc[3:4, :]
            tm.assert_frame_equal(result, expected)
            assert isinstance(c, Index)

            # multiple tables
            _maybe_remove(store, 'df1')
            _maybe_remove(store, 'df2')
            df1 = tm.makeTimeDataFrame()
            df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
            store.append('df1', df1, data_columns=['A', 'B'])
            store.append('df2', df2)

            c = store.select_as_coordinates('df1', ['A>0', 'B>0'])
            df1_result = store.select('df1', c)
            df2_result = store.select('df2', c)
            result = concat([df1_result, df2_result], axis=1)

            expected = concat([df1, df2], axis=1)
            expected = expected[(expected.A > 0) & (expected.B > 0)]
            tm.assert_frame_equal(result, expected)

        # pass array/mask as the coordinates
        with ensure_clean_store(self.path) as store:

            df = DataFrame(np.random.randn(1000, 2),
                           index=date_range('20000101', periods=1000))
            store.append('df', df)
            c = store.select_column('df', 'index')
            where = c[DatetimeIndex(c).month == 5].index
            expected = df.iloc[where]

            # locations
            result = store.select('df', where=where)
            tm.assert_frame_equal(result, expected)

            # boolean
            result = store.select('df', where=where)
            tm.assert_frame_equal(result, expected)

            # invalid
            pytest.raises(ValueError, store.select, 'df',
                          where=np.arange(len(df), dtype='float64'))
            pytest.raises(ValueError, store.select, 'df',
                          where=np.arange(len(df) + 1))
            pytest.raises(ValueError, store.select, 'df',
                          where=np.arange(len(df)), start=5)
            pytest.raises(ValueError, store.select, 'df',
                          where=np.arange(len(df)), start=5, stop=10)

            # selection with filter
            selection = date_range('20000101', periods=500)
            result = store.select('df', where='index in selection')
            expected = df[df.index.isin(selection)]
            tm.assert_frame_equal(result, expected)

            # list
            df = DataFrame(np.random.randn(10, 2))
            store.append('df2', df)
            result = store.select('df2', where=[0, 3, 5])
            expected = df.iloc[[0, 3, 5]]
            tm.assert_frame_equal(result, expected)

            # boolean
            where = [True] * 10
            where[-2] = False
            result = store.select('df2', where=where)
            expected = df.loc[where]
            tm.assert_frame_equal(result, expected)

            # start/stop
            result = store.select('df2', start=5, stop=10)
            expected = df[5:10]
            tm.assert_frame_equal(result, expected)

    def test_append_to_multiple(self):
        df1 = tm.makeTimeDataFrame()
        df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
        df2['foo'] = 'bar'
        df = concat([df1, df2], axis=1)

        with ensure_clean_store(self.path) as store:

            # exceptions
            pytest.raises(ValueError, store.append_to_multiple,
                          {'df1': ['A', 'B'], 'df2': None}, df,
                          selector='df3')
            pytest.raises(ValueError, store.append_to_multiple,
                          {'df1': None, 'df2': None}, df, selector='df3')
            pytest.raises(
                ValueError, store.append_to_multiple, 'df1', df, 'df1')

            # regular operation
            store.append_to_multiple(
                {'df1': ['A', 'B'], 'df2': None}, df, selector='df1')
            result = store.select_as_multiple(
                ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
            expected = df[(df.A > 0) & (df.B > 0)]
            tm.assert_frame_equal(result, expected)

    def test_append_to_multiple_dropna(self):
        df1 = tm.makeTimeDataFrame()
        df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
        df1.iloc[1, df1.columns.get_indexer(['A', 'B'])] = np.nan
        df = concat([df1, df2], axis=1)

        with ensure_clean_store(self.path) as store:

            # dropna=True should guarantee rows are synchronized
            store.append_to_multiple(
                {'df1': ['A', 'B'], 'df2': None}, df, selector='df1',
                dropna=True)
            result = store.select_as_multiple(['df1', 'df2'])
            expected = df.dropna()
            tm.assert_frame_equal(result, expected)
            tm.assert_index_equal(store.select('df1').index,
                                  store.select('df2').index)

    @pytest.mark.xfail(run=False,
                       reason="append_to_multiple_dropna_false "
                       "is not raising as failed")
    def test_append_to_multiple_dropna_false(self):
        df1 = tm.makeTimeDataFrame()
        df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
        df1.iloc[1, df1.columns.get_indexer(['A', 'B'])] = np.nan
        df = concat([df1, df2], axis=1)

        with ensure_clean_store(self.path) as store:

            # dropna=False shouldn't synchronize row indexes
            store.append_to_multiple(
                {'df1a': ['A', 'B'], 'df2a': None}, df, selector='df1a',
                dropna=False)

            with pytest.raises(ValueError):
                store.select_as_multiple(['df1a', 'df2a'])

            assert not store.select('df1a').index.equals(
                store.select('df2a').index)

    def test_select_as_multiple(self):

        df1 = tm.makeTimeDataFrame()
        df2 = tm.makeTimeDataFrame().rename(columns=lambda x: "%s_2" % x)
        df2['foo'] = 'bar'

        with ensure_clean_store(self.path) as store:

            # no tables stored
            pytest.raises(Exception, store.select_as_multiple,
                          None, where=['A>0', 'B>0'], selector='df1')

            store.append('df1', df1, data_columns=['A', 'B'])
            store.append('df2', df2)

            # exceptions
            pytest.raises(Exception, store.select_as_multiple,
                          None, where=['A>0', 'B>0'], selector='df1')
            pytest.raises(Exception, store.select_as_multiple,
                          [None], where=['A>0', 'B>0'], selector='df1')
            pytest.raises(KeyError, store.select_as_multiple,
                          ['df1', 'df3'], where=['A>0', 'B>0'],
                          selector='df1')
            pytest.raises(KeyError, store.select_as_multiple,
                          ['df3'], where=['A>0', 'B>0'], selector='df1')
            pytest.raises(KeyError, store.select_as_multiple,
                          ['df1', 'df2'], where=['A>0', 'B>0'],
                          selector='df4')

            # default select
            result = store.select('df1', ['A>0', 'B>0'])
            expected = store.select_as_multiple(
                ['df1'], where=['A>0', 'B>0'], selector='df1')
            tm.assert_frame_equal(result, expected)
            expected = store.select_as_multiple(
                'df1', where=['A>0', 'B>0'], selector='df1')
            tm.assert_frame_equal(result, expected)

            # multiple
            result = store.select_as_multiple(
                ['df1', 'df2'], where=['A>0', 'B>0'], selector='df1')
            expected = concat([df1, df2], axis=1)
            expected = expected[(expected.A > 0) & (expected.B > 0)]
            tm.assert_frame_equal(result, expected)

            # multiple (diff selector)
            result = store.select_as_multiple(
                ['df1', 'df2'], where='index>df2.index[4]', selector='df2')
            expected = concat([df1, df2], axis=1)
            expected = expected[5:]
            tm.assert_frame_equal(result, expected)

            # test excpection for diff rows
            store.append('df3', tm.makeTimeDataFrame(nper=50))
            pytest.raises(ValueError, store.select_as_multiple,
                          ['df1', 'df3'], where=['A>0', 'B>0'],
                          selector='df1')

    @pytest.mark.skipif(
        LooseVersion(tables.__version__) < LooseVersion('3.1.0'),
        reason=("tables version does not support fix for nan selection "
                "bug: GH 4858"))
    def test_nan_selection_bug_4858(self):

        with ensure_clean_store(self.path) as store:

            df = DataFrame(dict(cols=range(6), values=range(6)),
                           dtype='float64')
            df['cols'] = (df['cols'] + 10).apply(str)
            df.iloc[0] = np.nan

            expected = DataFrame(dict(cols=['13.0', '14.0', '15.0'], values=[
                                 3., 4., 5.]), index=[3, 4, 5])

            # write w/o the index on that particular column
            store.append('df', df, data_columns=True, index=['cols'])
            result = store.select('df', where='values>2.0')
            assert_frame_equal(result, expected)

    def test_start_stop_table(self):

        with ensure_clean_store(self.path) as store:

            # table
            df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
            store.append('df', df)

            result = store.select(
                'df', "columns=['A']", start=0, stop=5)
            expected = df.loc[0:4, ['A']]
            tm.assert_frame_equal(result, expected)

            # out of range
            result = store.select(
                'df', "columns=['A']", start=30, stop=40)
            assert len(result) == 0
            expected = df.loc[30:40, ['A']]
            tm.assert_frame_equal(result, expected)

    def test_start_stop_multiple(self):

        # GH 16209
        with ensure_clean_store(self.path) as store:

            df = DataFrame({"foo": [1, 2], "bar": [1, 2]})

            store.append_to_multiple({'selector': ['foo'], 'data': None}, df,
                                     selector='selector')
            result = store.select_as_multiple(['selector', 'data'],
                                              selector='selector', start=0,
                                              stop=1)
            expected = df.loc[[0], ['foo', 'bar']]
            tm.assert_frame_equal(result, expected)

    def test_start_stop_fixed(self):

        with ensure_clean_store(self.path) as store:

            # fixed, GH 8287
            df = DataFrame(dict(A=np.random.rand(20),
                                B=np.random.rand(20)),
                           index=pd.date_range('20130101', periods=20))
            store.put('df', df)

            result = store.select(
                'df', start=0, stop=5)
            expected = df.iloc[0:5, :]
            tm.assert_frame_equal(result, expected)

            result = store.select(
                'df', start=5, stop=10)
            expected = df.iloc[5:10, :]
            tm.assert_frame_equal(result, expected)

            # out of range
            result = store.select(
                'df', start=30, stop=40)
            expected = df.iloc[30:40, :]
            tm.assert_frame_equal(result, expected)

            # series
            s = df.A
            store.put('s', s)
            result = store.select('s', start=0, stop=5)
            expected = s.iloc[0:5]
            tm.assert_series_equal(result, expected)

            result = store.select('s', start=5, stop=10)
            expected = s.iloc[5:10]
            tm.assert_series_equal(result, expected)

            # sparse; not implemented
            df = tm.makeDataFrame()
            df.iloc[3:5, 1:3] = np.nan
            df.iloc[8:10, -2] = np.nan
            dfs = df.to_sparse()
            store.put('dfs', dfs)
            with pytest.raises(NotImplementedError):
                store.select('dfs', start=0, stop=5)

    def test_select_filter_corner(self):

        df = DataFrame(np.random.randn(50, 100))
        df.index = ['%.3d' % c for c in df.index]
        df.columns = ['%.3d' % c for c in df.columns]

        with ensure_clean_store(self.path) as store:
            store.put('frame', df, format='table')

            crit = 'columns=df.columns[:75]'
            result = store.select('frame', [crit])
            tm.assert_frame_equal(result, df.loc[:, df.columns[:75]])

            crit = 'columns=df.columns[:75:2]'
            result = store.select('frame', [crit])
            tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])

    def test_path_pathlib(self):
        df = tm.makeDataFrame()

        result = tm.round_trip_pathlib(
            lambda p: df.to_hdf(p, 'df'),
            lambda p: pd.read_hdf(p, 'df'))
        tm.assert_frame_equal(df, result)

    @pytest.mark.parametrize('start, stop', [(0, 2), (1, 2), (None, None)])
    def test_contiguous_mixed_data_table(self, start, stop):
        # GH 17021
        # ValueError when reading a contiguous mixed-data table ft. VLArray
        df = DataFrame({'a': Series([20111010, 20111011, 20111012]),
                        'b': Series(['ab', 'cd', 'ab'])})

        with ensure_clean_store(self.path) as store:
            store.append('test_dataset', df)

            result = store.select('test_dataset', start=start, stop=stop)
            assert_frame_equal(df[start:stop], result)

    def test_path_pathlib_hdfstore(self):
        df = tm.makeDataFrame()

        def writer(path):
            with pd.HDFStore(path) as store:
                df.to_hdf(store, 'df')

        def reader(path):
            with pd.HDFStore(path) as store:
                return pd.read_hdf(store, 'df')

        result = tm.round_trip_pathlib(writer, reader)
        tm.assert_frame_equal(df, result)

    def test_pickle_path_localpath(self):
        df = tm.makeDataFrame()
        result = tm.round_trip_pathlib(
            lambda p: df.to_hdf(p, 'df'),
            lambda p: pd.read_hdf(p, 'df'))
        tm.assert_frame_equal(df, result)

    def test_path_localpath_hdfstore(self):
        df = tm.makeDataFrame()

        def writer(path):
            with pd.HDFStore(path) as store:
                df.to_hdf(store, 'df')

        def reader(path):
            with pd.HDFStore(path) as store:
                return pd.read_hdf(store, 'df')

        result = tm.round_trip_localpath(writer, reader)
        tm.assert_frame_equal(df, result)

    def _check_roundtrip(self, obj, comparator, compression=False, **kwargs):

        options = {}
        if compression:
            options['complib'] = _default_compressor

        with ensure_clean_store(self.path, 'w', **options) as store:
            store['obj'] = obj
            retrieved = store['obj']
            comparator(retrieved, obj, **kwargs)

    def _check_double_roundtrip(self, obj, comparator, compression=False,
                                **kwargs):
        options = {}
        if compression:
            options['complib'] = compression or _default_compressor

        with ensure_clean_store(self.path, 'w', **options) as store:
            store['obj'] = obj
            retrieved = store['obj']
            comparator(retrieved, obj, **kwargs)
            store['obj'] = retrieved
            again = store['obj']
            comparator(again, obj, **kwargs)

    def _check_roundtrip_table(self, obj, comparator, compression=False):
        options = {}
        if compression:
            options['complib'] = _default_compressor

        with ensure_clean_store(self.path, 'w', **options) as store:
            store.put('obj', obj, format='table')
            retrieved = store['obj']

            comparator(retrieved, obj)

    def test_multiple_open_close(self):
        # gh-4409: open & close multiple times

        with ensure_clean_path(self.path) as path:

            df = tm.makeDataFrame()
            df.to_hdf(path, 'df', mode='w', format='table')

            # single
            store = HDFStore(path)
            assert 'CLOSED' not in store.info()
            assert store.is_open

            store.close()
            assert 'CLOSED' in store.info()
            assert not store.is_open

        with ensure_clean_path(self.path) as path:

            if pytables._table_file_open_policy_is_strict:

                # multiples
                store1 = HDFStore(path)

                def f():
                    HDFStore(path)
                pytest.raises(ValueError, f)
                store1.close()

            else:

                # multiples
                store1 = HDFStore(path)
                store2 = HDFStore(path)

                assert 'CLOSED' not in store1.info()
                assert 'CLOSED' not in store2.info()
                assert store1.is_open
                assert store2.is_open

                store1.close()
                assert 'CLOSED' in store1.info()
                assert not store1.is_open
                assert 'CLOSED' not in store2.info()
                assert store2.is_open

                store2.close()
                assert 'CLOSED' in store1.info()
                assert 'CLOSED' in store2.info()
                assert not store1.is_open
                assert not store2.is_open

                # nested close
                store = HDFStore(path, mode='w')
                store.append('df', df)

                store2 = HDFStore(path)
                store2.append('df2', df)
                store2.close()
                assert 'CLOSED' in store2.info()
                assert not store2.is_open

                store.close()
                assert 'CLOSED' in store.info()
                assert not store.is_open

                # double closing
                store = HDFStore(path, mode='w')
                store.append('df', df)

                store2 = HDFStore(path)
                store.close()
                assert 'CLOSED' in store.info()
                assert not store.is_open

                store2.close()
                assert 'CLOSED' in store2.info()
                assert not store2.is_open

        # ops on a closed store
        with ensure_clean_path(self.path) as path:

            df = tm.makeDataFrame()
            df.to_hdf(path, 'df', mode='w', format='table')

            store = HDFStore(path)
            store.close()

            pytest.raises(ClosedFileError, store.keys)
            pytest.raises(ClosedFileError, lambda: 'df' in store)
            pytest.raises(ClosedFileError, lambda: len(store))
            pytest.raises(ClosedFileError, lambda: store['df'])
            pytest.raises(AttributeError, lambda: store.df)
            pytest.raises(ClosedFileError, store.select, 'df')
            pytest.raises(ClosedFileError, store.get, 'df')
            pytest.raises(ClosedFileError, store.append, 'df2', df)
            pytest.raises(ClosedFileError, store.put, 'df3', df)
            pytest.raises(ClosedFileError, store.get_storer, 'df2')
            pytest.raises(ClosedFileError, store.remove, 'df2')

            def f():
                store.select('df')
            tm.assert_raises_regex(ClosedFileError, 'file is not open', f)

    def test_pytables_native_read(self, datapath):
        with ensure_clean_store(
                datapath('io', 'data', 'legacy_hdf/pytables_native.h5'),
                mode='r') as store:
            d2 = store['detector/readout']
            assert isinstance(d2, DataFrame)

    @pytest.mark.skipif(PY35 and is_platform_windows(),
                        reason="native2 read fails oddly on windows / 3.5")
    def test_pytables_native2_read(self, datapath):
        with ensure_clean_store(
                datapath('io', 'data', 'legacy_hdf', 'pytables_native2.h5'),
                mode='r') as store:
            str(store)
            d1 = store['detector']
            assert isinstance(d1, DataFrame)

    def test_legacy_table_read(self, datapath):
        # legacy table types
        with ensure_clean_store(
                datapath('io', 'data', 'legacy_hdf', 'legacy_table.h5'),
                mode='r') as store:

            with catch_warnings(record=True):
                store.select('df1')
                store.select('df2')
                store.select('wp1')

                # force the frame
                store.select('df2', typ='legacy_frame')

                # old version warning
                pytest.raises(
                    Exception, store.select, 'wp1', 'minor_axis=B')

                df2 = store.select('df2')
                result = store.select('df2', 'index>df2.index[2]')
                expected = df2[df2.index > df2.index[2]]
                assert_frame_equal(expected, result)

    def test_copy(self):

        with catch_warnings(record=True):

            def do_copy(f, new_f=None, keys=None,
                        propindexes=True, **kwargs):
                try:
                    store = HDFStore(f, 'r')

                    if new_f is None:
                        import tempfile
                        fd, new_f = tempfile.mkstemp()

                    tstore = store.copy(
                        new_f, keys=keys, propindexes=propindexes, **kwargs)

                    # check keys
                    if keys is None:
                        keys = store.keys()
                    assert set(keys) == set(tstore.keys())

                    # check indicies & nrows
                    for k in tstore.keys():
                        if tstore.get_storer(k).is_table:
                            new_t = tstore.get_storer(k)
                            orig_t = store.get_storer(k)

                            assert orig_t.nrows == new_t.nrows

                            # check propindixes
                            if propindexes:
                                for a in orig_t.axes:
                                    if a.is_indexed:
                                        assert new_t[a.name].is_indexed

                finally:
                    safe_close(store)
                    safe_close(tstore)
                    try:
                        os.close(fd)
                    except:
                        pass
                    safe_remove(new_f)

            # new table
            df = tm.makeDataFrame()

            try:
                path = create_tempfile(self.path)
                st = HDFStore(path)
                st.append('df', df, data_columns=['A'])
                st.close()
                do_copy(f=path)
                do_copy(f=path, propindexes=False)
            finally:
                safe_remove(path)

    def test_store_datetime_fractional_secs(self):

        with ensure_clean_store(self.path) as store:
            dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)
            series = Series([0], [dt])
            store['a'] = series
            assert store['a'].index[0] == dt

    def test_tseries_indices_series(self):

        with ensure_clean_store(self.path) as store:
            idx = tm.makeDateIndex(10)
            ser = Series(np.random.randn(len(idx)), idx)
            store['a'] = ser
            result = store['a']

            tm.assert_series_equal(result, ser)
            assert result.index.freq == ser.index.freq
            tm.assert_class_equal(result.index, ser.index, obj="series index")

            idx = tm.makePeriodIndex(10)
            ser = Series(np.random.randn(len(idx)), idx)
            store['a'] = ser
            result = store['a']

            tm.assert_series_equal(result, ser)
            assert result.index.freq == ser.index.freq
            tm.assert_class_equal(result.index, ser.index, obj="series index")

    def test_tseries_indices_frame(self):

        with ensure_clean_store(self.path) as store:
            idx = tm.makeDateIndex(10)
            df = DataFrame(np.random.randn(len(idx), 3), index=idx)
            store['a'] = df
            result = store['a']

            assert_frame_equal(result, df)
            assert result.index.freq == df.index.freq
            tm.assert_class_equal(result.index, df.index,
                                  obj="dataframe index")

            idx = tm.makePeriodIndex(10)
            df = DataFrame(np.random.randn(len(idx), 3), idx)
            store['a'] = df
            result = store['a']

            assert_frame_equal(result, df)
            assert result.index.freq == df.index.freq
            tm.assert_class_equal(result.index, df.index,
                                  obj="dataframe index")

    def test_unicode_index(self):

        unicode_values = [u('\u03c3'), u('\u03c3\u03c3')]

        # PerformanceWarning
        with catch_warnings(record=True):
            s = Series(np.random.randn(len(unicode_values)), unicode_values)
            self._check_roundtrip(s, tm.assert_series_equal)

    def test_unicode_longer_encoded(self):
        # GH 11234
        char = '\u0394'
        df = pd.DataFrame({'A': [char]})
        with ensure_clean_store(self.path) as store:
            store.put('df', df, format='table', encoding='utf-8')
            result = store.get('df')
            tm.assert_frame_equal(result, df)

        df = pd.DataFrame({'A': ['a', char], 'B': ['b', 'b']})
        with ensure_clean_store(self.path) as store:
            store.put('df', df, format='table', encoding='utf-8')
            result = store.get('df')
            tm.assert_frame_equal(result, df)

    def test_store_datetime_mixed(self):

        df = DataFrame(
            {'a': [1, 2, 3], 'b': [1., 2., 3.], 'c': ['a', 'b', 'c']})
        ts = tm.makeTimeSeries()
        df['d'] = ts.index[:3]
        self._check_roundtrip(df, tm.assert_frame_equal)

    # def test_cant_write_multiindex_table(self):
    #    # for now, #1848
    #    df = DataFrame(np.random.randn(10, 4),
    #                   index=[np.arange(5).repeat(2),
    #                          np.tile(np.arange(2), 5)])

    #    pytest.raises(Exception, store.put, 'foo', df, format='table')

    def test_append_with_diff_col_name_types_raises_value_error(self):
        df = DataFrame(np.random.randn(10, 1))
        df2 = DataFrame({'a': np.random.randn(10)})
        df3 = DataFrame({(1, 2): np.random.randn(10)})
        df4 = DataFrame({('1', 2): np.random.randn(10)})
        df5 = DataFrame({('1', 2, object): np.random.randn(10)})

        with ensure_clean_store(self.path) as store:
            name = 'df_%s' % tm.rands(10)
            store.append(name, df)

            for d in (df2, df3, df4, df5):
                with pytest.raises(ValueError):
                    store.append(name, d)

    def test_query_with_nested_special_character(self):
        df = DataFrame({'a': ['a', 'a', 'c', 'b',
                              'test & test', 'c', 'b', 'e'],
                        'b': [1, 2, 3, 4, 5, 6, 7, 8]})
        expected = df[df.a == 'test & test']
        with ensure_clean_store(self.path) as store:
            store.append('test', df, format='table', data_columns=True)
            result = store.select('test', 'a = "test & test"')
        tm.assert_frame_equal(expected, result)

    def test_categorical(self):

        with ensure_clean_store(self.path) as store:

            # Basic
            _maybe_remove(store, 's')
            s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
                       'a', 'b', 'c', 'd'], ordered=False))
            store.append('s', s, format='table')
            result = store.select('s')
            tm.assert_series_equal(s, result)

            _maybe_remove(store, 's_ordered')
            s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
                       'a', 'b', 'c', 'd'], ordered=True))
            store.append('s_ordered', s, format='table')
            result = store.select('s_ordered')
            tm.assert_series_equal(s, result)

            _maybe_remove(store, 'df')

            df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})
            store.append('df', df, format='table')
            result = store.select('df')
            tm.assert_frame_equal(result, df)

            # Dtypes
            s = Series([1, 1, 2, 2, 3, 4, 5]).astype('category')
            store.append('si', s)
            result = store.select('si')
            tm.assert_series_equal(result, s)

            s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype('category')
            store.append('si2', s)
            result = store.select('si2')
            tm.assert_series_equal(result, s)

            # Multiple
            df2 = df.copy()
            df2['s2'] = Series(list('abcdefg')).astype('category')
            store.append('df2', df2)
            result = store.select('df2')
            tm.assert_frame_equal(result, df2)

            # Make sure the metadata is OK
            info = store.info()
            assert '/df2   ' in info
            # assert '/df2/meta/values_block_0/meta' in info
            assert '/df2/meta/values_block_1/meta' in info

            # unordered
            s = Series(Categorical(['a', 'b', 'b', 'a', 'a', 'c'], categories=[
                       'a', 'b', 'c', 'd'], ordered=False))
            store.append('s2', s, format='table')
            result = store.select('s2')
            tm.assert_series_equal(result, s)

            # Query
            store.append('df3', df, data_columns=['s'])
            expected = df[df.s.isin(['b', 'c'])]
            result = store.select('df3', where=['s in ["b","c"]'])
            tm.assert_frame_equal(result, expected)

            expected = df[df.s.isin(['b', 'c'])]
            result = store.select('df3', where=['s = ["b","c"]'])
            tm.assert_frame_equal(result, expected)

            expected = df[df.s.isin(['d'])]
            result = store.select('df3', where=['s in ["d"]'])
            tm.assert_frame_equal(result, expected)

            expected = df[df.s.isin(['f'])]
            result = store.select('df3', where=['s in ["f"]'])
            tm.assert_frame_equal(result, expected)

            # Appending with same categories is ok
            store.append('df3', df)

            df = concat([df, df])
            expected = df[df.s.isin(['b', 'c'])]
            result = store.select('df3', where=['s in ["b","c"]'])
            tm.assert_frame_equal(result, expected)

            # Appending must have the same categories
            df3 = df.copy()
            df3['s'].cat.remove_unused_categories(inplace=True)

            with pytest.raises(ValueError):
                store.append('df3', df3)

            # Remove, and make sure meta data is removed (its a recursive
            # removal so should be).
            result = store.select('df3/meta/s/meta')
            assert result is not None
            store.remove('df3')

            with pytest.raises(KeyError):
                store.select('df3/meta/s/meta')

    def test_categorical_conversion(self):

        # GH13322
        # Check that read_hdf with categorical columns doesn't return rows if
        # where criteria isn't met.
        obsids = ['ESP_012345_6789', 'ESP_987654_3210']
        imgids = ['APF00006np', 'APF0001imm']
        data = [4.3, 9.8]

        # Test without categories
        df = DataFrame(dict(obsids=obsids, imgids=imgids, data=data))

        # We are expecting an empty DataFrame matching types of df
        expected = df.iloc[[], :]
        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', format='table', data_columns=True)
            result = read_hdf(path, 'df', where='obsids=B')
            tm.assert_frame_equal(result, expected)

        # Test with categories
        df.obsids = df.obsids.astype('category')
        df.imgids = df.imgids.astype('category')

        # We are expecting an empty DataFrame matching types of df
        expected = df.iloc[[], :]
        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', format='table', data_columns=True)
            result = read_hdf(path, 'df', where='obsids=B')
            tm.assert_frame_equal(result, expected)

    def test_categorical_nan_only_columns(self):
        # GH18413
        # Check that read_hdf with categorical columns with NaN-only values can
        # be read back.
        df = pd.DataFrame({
            'a': ['a', 'b', 'c', np.nan],
            'b': [np.nan, np.nan, np.nan, np.nan],
            'c': [1, 2, 3, 4],
            'd': pd.Series([None] * 4, dtype=object)
        })
        df['a'] = df.a.astype('category')
        df['b'] = df.b.astype('category')
        df['d'] = df.b.astype('category')
        expected = df
        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', format='table', data_columns=True)
            result = read_hdf(path, 'df')
            tm.assert_frame_equal(result, expected)

    def test_duplicate_column_name(self):
        df = DataFrame(columns=["a", "a"], data=[[0, 0]])

        with ensure_clean_path(self.path) as path:
            pytest.raises(ValueError, df.to_hdf,
                          path, 'df', format='fixed')

            df.to_hdf(path, 'df', format='table')
            other = read_hdf(path, 'df')

            tm.assert_frame_equal(df, other)
            assert df.equals(other)
            assert other.equals(df)

    def test_round_trip_equals(self):
        # GH 9330
        df = DataFrame({"B": [1, 2], "A": ["x", "y"]})

        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', format='table')
            other = read_hdf(path, 'df')
            tm.assert_frame_equal(df, other)
            assert df.equals(other)
            assert other.equals(df)

    def test_preserve_timedeltaindex_type(self):
        # GH9635
        # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve
        # the type of the index.
        df = DataFrame(np.random.normal(size=(10, 5)))
        df.index = timedelta_range(
            start='0s', periods=10, freq='1s', name='example')

        with ensure_clean_store(self.path) as store:

            store['df'] = df
            assert_frame_equal(store['df'], df)

    def test_columns_multiindex_modified(self):
        # BUG: 7212
        # read_hdf store.select modified the passed columns parameters
        # when multi-indexed.

        df = DataFrame(np.random.rand(4, 5),
                       index=list('abcd'),
                       columns=list('ABCDE'))
        df.index.name = 'letters'
        df = df.set_index(keys='E', append=True)

        data_columns = df.index.names + df.columns.tolist()
        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df',
                      mode='a',
                      append=True,
                      data_columns=data_columns,
                      index=False)
            cols2load = list('BCD')
            cols2load_original = list(cols2load)
            df_loaded = read_hdf(path, 'df', columns=cols2load)  # noqa
            assert cols2load_original == cols2load

    def test_to_hdf_with_object_column_names(self):
        # GH9057
        # Writing HDF5 table format should only work for string-like
        # column types

        types_should_fail = [tm.makeIntIndex, tm.makeFloatIndex,
                             tm.makeDateIndex, tm.makeTimedeltaIndex,
                             tm.makePeriodIndex]
        types_should_run = [tm.makeStringIndex, tm.makeCategoricalIndex]

        if compat.PY3:
            types_should_run.append(tm.makeUnicodeIndex)
        else:
            # TODO: Add back to types_should_fail
            # https://github.com/pandas-dev/pandas/issues/20907
            pass

        for index in types_should_fail:
            df = DataFrame(np.random.randn(10, 2), columns=index(2))
            with ensure_clean_path(self.path) as path:
                with catch_warnings(record=True):
                    with tm.assert_raises_regex(
                        ValueError, ("cannot have non-object label "
                                     "DataIndexableCol")):
                        df.to_hdf(path, 'df', format='table',
                                  data_columns=True)

        for index in types_should_run:
            df = DataFrame(np.random.randn(10, 2), columns=index(2))
            with ensure_clean_path(self.path) as path:
                with catch_warnings(record=True):
                    df.to_hdf(path, 'df', format='table', data_columns=True)
                    result = pd.read_hdf(
                        path, 'df', where="index = [{0}]".format(df.index[0]))
                    assert(len(result))

    def test_read_hdf_open_store(self):
        # GH10330
        # No check for non-string path_or-buf, and no test of open store
        df = DataFrame(np.random.rand(4, 5),
                       index=list('abcd'),
                       columns=list('ABCDE'))
        df.index.name = 'letters'
        df = df.set_index(keys='E', append=True)

        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', mode='w')
            direct = read_hdf(path, 'df')
            store = HDFStore(path, mode='r')
            indirect = read_hdf(store, 'df')
            tm.assert_frame_equal(direct, indirect)
            assert store.is_open
            store.close()

    def test_read_hdf_iterator(self):
        df = DataFrame(np.random.rand(4, 5),
                       index=list('abcd'),
                       columns=list('ABCDE'))
        df.index.name = 'letters'
        df = df.set_index(keys='E', append=True)

        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', mode='w', format='t')
            direct = read_hdf(path, 'df')
            iterator = read_hdf(path, 'df', iterator=True)
            assert isinstance(iterator, TableIterator)
            indirect = next(iterator.__iter__())
            tm.assert_frame_equal(direct, indirect)
            iterator.store.close()

    def test_read_hdf_errors(self):
        df = DataFrame(np.random.rand(4, 5),
                       index=list('abcd'),
                       columns=list('ABCDE'))

        with ensure_clean_path(self.path) as path:
            pytest.raises(IOError, read_hdf, path, 'key')
            df.to_hdf(path, 'df')
            store = HDFStore(path, mode='r')
            store.close()
            pytest.raises(IOError, read_hdf, store, 'df')

    def test_read_hdf_generic_buffer_errors(self):
        pytest.raises(NotImplementedError, read_hdf, BytesIO(b''), 'df')

    def test_invalid_complib(self):
        df = DataFrame(np.random.rand(4, 5),
                       index=list('abcd'),
                       columns=list('ABCDE'))
        with ensure_clean_path(self.path) as path:
            with pytest.raises(ValueError):
                df.to_hdf(path, 'df', complib='foolib')
    # GH10443

    def test_read_nokey(self):
        df = DataFrame(np.random.rand(4, 5),
                       index=list('abcd'),
                       columns=list('ABCDE'))

        # Categorical dtype not supported for "fixed" format. So no need
        # to test with that dtype in the dataframe here.
        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', mode='a')
            reread = read_hdf(path)
            assert_frame_equal(df, reread)
            df.to_hdf(path, 'df2', mode='a')
            pytest.raises(ValueError, read_hdf, path)

    def test_read_nokey_table(self):
        # GH13231
        df = DataFrame({'i': range(5),
                        'c': Series(list('abacd'), dtype='category')})

        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', mode='a', format='table')
            reread = read_hdf(path)
            assert_frame_equal(df, reread)
            df.to_hdf(path, 'df2', mode='a', format='table')
            pytest.raises(ValueError, read_hdf, path)

    def test_read_nokey_empty(self):
        with ensure_clean_path(self.path) as path:
            store = HDFStore(path)
            store.close()
            pytest.raises(ValueError, read_hdf, path)

    @td.skip_if_no('pathlib')
    def test_read_from_pathlib_path(self):

        # GH11773
        from pathlib import Path

        expected = DataFrame(np.random.rand(4, 5),
                             index=list('abcd'),
                             columns=list('ABCDE'))
        with ensure_clean_path(self.path) as filename:
            path_obj = Path(filename)

            expected.to_hdf(path_obj, 'df', mode='a')
            actual = read_hdf(path_obj, 'df')

        tm.assert_frame_equal(expected, actual)

    @td.skip_if_no('py.path')
    def test_read_from_py_localpath(self):

        # GH11773
        from py.path import local as LocalPath

        expected = DataFrame(np.random.rand(4, 5),
                             index=list('abcd'),
                             columns=list('ABCDE'))
        with ensure_clean_path(self.path) as filename:
            path_obj = LocalPath(filename)

            expected.to_hdf(path_obj, 'df', mode='a')
            actual = read_hdf(path_obj, 'df')

        tm.assert_frame_equal(expected, actual)

    def test_query_long_float_literal(self):
        # GH 14241
        df = pd.DataFrame({'A': [1000000000.0009,
                                 1000000000.0011,
                                 1000000000.0015]})

        with ensure_clean_store(self.path) as store:
            store.append('test', df, format='table', data_columns=True)

            cutoff = 1000000000.0006
            result = store.select('test', "A < %.4f" % cutoff)
            assert result.empty

            cutoff = 1000000000.0010
            result = store.select('test', "A > %.4f" % cutoff)
            expected = df.loc[[1, 2], :]
            tm.assert_frame_equal(expected, result)

            exact = 1000000000.0011
            result = store.select('test', 'A == %.4f' % exact)
            expected = df.loc[[1], :]
            tm.assert_frame_equal(expected, result)

    def test_query_compare_column_type(self):
        # GH 15492
        df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'],
                           'real_date': date_range('2014-01-01', periods=2),
                           'float': [1.1, 1.2],
                           'int': [1, 2]},
                          columns=['date', 'real_date', 'float', 'int'])

        with ensure_clean_store(self.path) as store:
            store.append('test', df, format='table', data_columns=True)

            ts = pd.Timestamp('2014-01-01')  # noqa
            result = store.select('test', where='real_date > ts')
            expected = df.loc[[1], :]
            tm.assert_frame_equal(expected, result)

            for op in ['<', '>', '==']:
                # non strings to string column always fail
                for v in [2.1, True, pd.Timestamp('2014-01-01'),
                          pd.Timedelta(1, 's')]:
                    query = 'date {op} v'.format(op=op)
                    with pytest.raises(TypeError):
                        result = store.select('test', where=query)

                # strings to other columns must be convertible to type
                v = 'a'
                for col in ['int', 'float', 'real_date']:
                    query = '{col} {op} v'.format(op=op, col=col)
                    with pytest.raises(ValueError):
                        result = store.select('test', where=query)

                for v, col in zip(['1', '1.1', '2014-01-01'],
                                  ['int', 'float', 'real_date']):
                    query = '{col} {op} v'.format(op=op, col=col)
                    result = store.select('test', where=query)

                    if op == '==':
                        expected = df.loc[[0], :]
                    elif op == '>':
                        expected = df.loc[[1], :]
                    else:
                        expected = df.loc[[], :]
                    tm.assert_frame_equal(expected, result)

    @pytest.mark.parametrize('format', ['fixed', 'table'])
    def test_read_hdf_series_mode_r(self, format):
        # GH 16583
        # Tests that reading a Series saved to an HDF file
        # still works if a mode='r' argument is supplied
        series = tm.makeFloatSeries()
        with ensure_clean_path(self.path) as path:
            series.to_hdf(path, key='data', format=format)
            result = pd.read_hdf(path, key='data', mode='r')
        tm.assert_series_equal(result, series)

    @pytest.mark.skipif(not PY36, reason="Need python 3.6")
    def test_fspath(self):
        with tm.ensure_clean('foo.h5') as path:
            with pd.HDFStore(path) as store:
                assert os.fspath(store) == str(path)

    def test_read_py2_hdf_file_in_py3(self, datapath):
        # GH 16781

        # tests reading a PeriodIndex DataFrame written in Python2 in Python3

        # the file was generated in Python 2.7 like so:
        #
        # df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex(
        #              ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))
        # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')

        expected = pd.DataFrame([1., 2, 3], index=pd.PeriodIndex(
            ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))

        with ensure_clean_store(
                datapath('io', 'data', 'legacy_hdf',
                         'periodindex_0.20.1_x86_64_darwin_2.7.13.h5'),
                mode='r') as store:
            result = store['p']
            assert_frame_equal(result, expected)


class TestHDFComplexValues(Base):
    # GH10447

    def test_complex_fixed(self):
        df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
                       index=list('abcd'),
                       columns=list('ABCDE'))

        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df')
            reread = read_hdf(path, 'df')
            assert_frame_equal(df, reread)

        df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
                       index=list('abcd'),
                       columns=list('ABCDE'))
        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df')
            reread = read_hdf(path, 'df')
            assert_frame_equal(df, reread)

    def test_complex_table(self):
        df = DataFrame(np.random.rand(4, 5).astype(np.complex64),
                       index=list('abcd'),
                       columns=list('ABCDE'))

        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', format='table')
            reread = read_hdf(path, 'df')
            assert_frame_equal(df, reread)

        df = DataFrame(np.random.rand(4, 5).astype(np.complex128),
                       index=list('abcd'),
                       columns=list('ABCDE'))

        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', format='table', mode='w')
            reread = read_hdf(path, 'df')
            assert_frame_equal(df, reread)

    def test_complex_mixed_fixed(self):
        complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j,
                              1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
                              dtype=np.complex128)
        df = DataFrame({'A': [1, 2, 3, 4],
                        'B': ['a', 'b', 'c', 'd'],
                        'C': complex64,
                        'D': complex128,
                        'E': [1.0, 2.0, 3.0, 4.0]},
                       index=list('abcd'))
        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df')
            reread = read_hdf(path, 'df')
            assert_frame_equal(df, reread)

    def test_complex_mixed_table(self):
        complex64 = np.array([1.0 + 1.0j, 1.0 + 1.0j,
                              1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex64)
        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
                              dtype=np.complex128)
        df = DataFrame({'A': [1, 2, 3, 4],
                        'B': ['a', 'b', 'c', 'd'],
                        'C': complex64,
                        'D': complex128,
                        'E': [1.0, 2.0, 3.0, 4.0]},
                       index=list('abcd'))

        with ensure_clean_store(self.path) as store:
            store.append('df', df, data_columns=['A', 'B'])
            result = store.select('df', where='A>2')
            assert_frame_equal(df.loc[df.A > 2], result)

        with ensure_clean_path(self.path) as path:
            df.to_hdf(path, 'df', format='table')
            reread = read_hdf(path, 'df')
            assert_frame_equal(df, reread)

    def test_complex_across_dimensions_fixed(self):
        with catch_warnings(record=True):
            complex128 = np.array(
                [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
            s = Series(complex128, index=list('abcd'))
            df = DataFrame({'A': s, 'B': s})
            p = Panel({'One': df, 'Two': df})

            objs = [s, df, p]
            comps = [tm.assert_series_equal, tm.assert_frame_equal,
                     tm.assert_panel_equal]
            for obj, comp in zip(objs, comps):
                with ensure_clean_path(self.path) as path:
                    obj.to_hdf(path, 'obj', format='fixed')
                    reread = read_hdf(path, 'obj')
                    comp(obj, reread)

    def test_complex_across_dimensions(self):
        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
        s = Series(complex128, index=list('abcd'))
        df = DataFrame({'A': s, 'B': s})

        with catch_warnings(record=True):
            p = Panel({'One': df, 'Two': df})

            objs = [df, p]
            comps = [tm.assert_frame_equal, tm.assert_panel_equal]
            for obj, comp in zip(objs, comps):
                with ensure_clean_path(self.path) as path:
                    obj.to_hdf(path, 'obj', format='table')
                    reread = read_hdf(path, 'obj')
                    comp(obj, reread)

    def test_complex_indexing_error(self):
        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j],
                              dtype=np.complex128)
        df = DataFrame({'A': [1, 2, 3, 4],
                        'B': ['a', 'b', 'c', 'd'],
                        'C': complex128},
                       index=list('abcd'))
        with ensure_clean_store(self.path) as store:
            pytest.raises(TypeError, store.append,
                          'df', df, data_columns=['C'])

    def test_complex_series_error(self):
        complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])
        s = Series(complex128, index=list('abcd'))

        with ensure_clean_path(self.path) as path:
            pytest.raises(TypeError, s.to_hdf, path, 'obj', format='t')

        with ensure_clean_path(self.path) as path:
            s.to_hdf(path, 'obj', format='t', index=False)
            reread = read_hdf(path, 'obj')
            tm.assert_series_equal(s, reread)

    def test_complex_append(self):
        df = DataFrame({'a': np.random.randn(100).astype(np.complex128),
                        'b': np.random.randn(100)})

        with ensure_clean_store(self.path) as store:
            store.append('df', df, data_columns=['b'])
            store.append('df', df)
            result = store.select('df')
            assert_frame_equal(pd.concat([df, df], 0), result)


class TestTimezones(Base):

    def _compare_with_tz(self, a, b):
        tm.assert_frame_equal(a, b)

        # compare the zones on each element
        for c in a.columns:
            for i in a.index:
                a_e = a.loc[i, c]
                b_e = b.loc[i, c]
                if not (a_e == b_e and a_e.tz == b_e.tz):
                    raise AssertionError(
                        "invalid tz comparison [%s] [%s]" % (a_e, b_e))

    def test_append_with_timezones_dateutil(self):

        from datetime import timedelta

        # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
        # filename issues.
        from pandas._libs.tslibs.timezones import maybe_get_tz
        gettz = lambda x: maybe_get_tz('dateutil/' + x)

        # as columns
        with ensure_clean_store(self.path) as store:

            _maybe_remove(store, 'df_tz')
            df = DataFrame(dict(A=[Timestamp('20130102 2:00:00', tz=gettz(
                'US/Eastern')) + timedelta(hours=1) * i for i in range(5)]))

            store.append('df_tz', df, data_columns=['A'])
            result = store['df_tz']
            self._compare_with_tz(result, df)
            assert_frame_equal(result, df)

            # select with tz aware
            expected = df[df.A >= df.A[3]]
            result = store.select('df_tz', where='A>=df.A[3]')
            self._compare_with_tz(result, expected)

            # ensure we include dates in DST and STD time here.
            _maybe_remove(store, 'df_tz')
            df = DataFrame(dict(A=Timestamp('20130102',
                                            tz=gettz('US/Eastern')),
                                B=Timestamp('20130603',
                                            tz=gettz('US/Eastern'))),
                           index=range(5))
            store.append('df_tz', df)
            result = store['df_tz']
            self._compare_with_tz(result, df)
            assert_frame_equal(result, df)

            df = DataFrame(dict(A=Timestamp('20130102',
                                            tz=gettz('US/Eastern')),
                                B=Timestamp('20130102', tz=gettz('EET'))),
                           index=range(5))
            pytest.raises(ValueError, store.append, 'df_tz', df)

            # this is ok
            _maybe_remove(store, 'df_tz')
            store.append('df_tz', df, data_columns=['A', 'B'])
            result = store['df_tz']
            self._compare_with_tz(result, df)
            assert_frame_equal(result, df)

            # can't append with diff timezone
            df = DataFrame(dict(A=Timestamp('20130102',
                                            tz=gettz('US/Eastern')),
                                B=Timestamp('20130102', tz=gettz('CET'))),
                           index=range(5))
            pytest.raises(ValueError, store.append, 'df_tz', df)

        # as index
        with ensure_clean_store(self.path) as store:

            # GH 4098 example
            df = DataFrame(dict(A=Series(lrange(3), index=date_range(
                '2000-1-1', periods=3, freq='H', tz=gettz('US/Eastern')))))

            _maybe_remove(store, 'df')
            store.put('df', df)
            result = store.select('df')
            assert_frame_equal(result, df)

            _maybe_remove(store, 'df')
            store.append('df', df)
            result = store.select('df')
            assert_frame_equal(result, df)

    def test_append_with_timezones_pytz(self):

        from datetime import timedelta

        # as columns
        with ensure_clean_store(self.path) as store:

            _maybe_remove(store, 'df_tz')
            df = DataFrame(dict(A=[Timestamp('20130102 2:00:00',
                                             tz='US/Eastern') +
                                   timedelta(hours=1) * i
                                   for i in range(5)]))
            store.append('df_tz', df, data_columns=['A'])
            result = store['df_tz']
            self._compare_with_tz(result, df)
            assert_frame_equal(result, df)

            # select with tz aware
            self._compare_with_tz(store.select(
                'df_tz', where='A>=df.A[3]'), df[df.A >= df.A[3]])

            _maybe_remove(store, 'df_tz')
            # ensure we include dates in DST and STD time here.
            df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
                                B=Timestamp('20130603', tz='US/Eastern')),
                           index=range(5))
            store.append('df_tz', df)
            result = store['df_tz']
            self._compare_with_tz(result, df)
            assert_frame_equal(result, df)

            df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
                                B=Timestamp('20130102', tz='EET')),
                           index=range(5))
            pytest.raises(ValueError, store.append, 'df_tz', df)

            # this is ok
            _maybe_remove(store, 'df_tz')
            store.append('df_tz', df, data_columns=['A', 'B'])
            result = store['df_tz']
            self._compare_with_tz(result, df)
            assert_frame_equal(result, df)

            # can't append with diff timezone
            df = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
                                B=Timestamp('20130102', tz='CET')),
                           index=range(5))
            pytest.raises(ValueError, store.append, 'df_tz', df)

        # as index
        with ensure_clean_store(self.path) as store:

            # GH 4098 example
            df = DataFrame(dict(A=Series(lrange(3), index=date_range(
                '2000-1-1', periods=3, freq='H', tz='US/Eastern'))))

            _maybe_remove(store, 'df')
            store.put('df', df)
            result = store.select('df')
            assert_frame_equal(result, df)

            _maybe_remove(store, 'df')
            store.append('df', df)
            result = store.select('df')
            assert_frame_equal(result, df)

    def test_tseries_select_index_column(self):
        # GH7777
        # selecting a UTC datetimeindex column did
        # not preserve UTC tzinfo set before storing

        # check that no tz still works
        rng = date_range('1/1/2000', '1/30/2000')
        frame = DataFrame(np.random.randn(len(rng), 4), index=rng)

        with ensure_clean_store(self.path) as store:
            store.append('frame', frame)
            result = store.select_column('frame', 'index')
            assert rng.tz == DatetimeIndex(result.values).tz

        # check utc
        rng = date_range('1/1/2000', '1/30/2000', tz='UTC')
        frame = DataFrame(np.random.randn(len(rng), 4), index=rng)

        with ensure_clean_store(self.path) as store:
            store.append('frame', frame)
            result = store.select_column('frame', 'index')
            assert rng.tz == result.dt.tz

        # double check non-utc
        rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
        frame = DataFrame(np.random.randn(len(rng), 4), index=rng)

        with ensure_clean_store(self.path) as store:
            store.append('frame', frame)
            result = store.select_column('frame', 'index')
            assert rng.tz == result.dt.tz

    def test_timezones_fixed(self):
        with ensure_clean_store(self.path) as store:

            # index
            rng = date_range('1/1/2000', '1/30/2000', tz='US/Eastern')
            df = DataFrame(np.random.randn(len(rng), 4), index=rng)
            store['df'] = df
            result = store['df']
            assert_frame_equal(result, df)

            # as data
            # GH11411
            _maybe_remove(store, 'df')
            df = DataFrame({'A': rng,
                            'B': rng.tz_convert('UTC').tz_localize(None),
                            'C': rng.tz_convert('CET'),
                            'D': range(len(rng))}, index=rng)
            store['df'] = df
            result = store['df']
            assert_frame_equal(result, df)

    def test_fixed_offset_tz(self):
        rng = date_range('1/1/2000 00:00:00-07:00', '1/30/2000 00:00:00-07:00')
        frame = DataFrame(np.random.randn(len(rng), 4), index=rng)

        with ensure_clean_store(self.path) as store:
            store['frame'] = frame
            recons = store['frame']
            tm.assert_index_equal(recons.index, rng)
            assert rng.tz == recons.index.tz

    @td.skip_if_windows
    def test_store_timezone(self):
        # GH2852
        # issue storing datetime.date with a timezone as it resets when read
        # back in a new timezone

        # original method
        with ensure_clean_store(self.path) as store:

            today = datetime.date(2013, 9, 10)
            df = DataFrame([1, 2, 3], index=[today, today, today])
            store['obj1'] = df
            result = store['obj1']
            assert_frame_equal(result, df)

        # with tz setting
        with ensure_clean_store(self.path) as store:

            with set_timezone('EST5EDT'):
                today = datetime.date(2013, 9, 10)
                df = DataFrame([1, 2, 3], index=[today, today, today])
                store['obj1'] = df

            with set_timezone('CST6CDT'):
                result = store['obj1']

            assert_frame_equal(result, df)

    def test_legacy_datetimetz_object(self, datapath):
        # legacy from < 0.17.0
        # 8260
        expected = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
                                  B=Timestamp('20130603', tz='CET')),
                             index=range(5))
        with ensure_clean_store(
                datapath('io', 'data', 'legacy_hdf', 'datetimetz_object.h5'),
                mode='r') as store:
            result = store['df']
            assert_frame_equal(result, expected)

    def test_dst_transitions(self):
        # make sure we are not failing on transaitions
        with ensure_clean_store(self.path) as store:
            times = pd.date_range("2013-10-26 23:00", "2013-10-27 01:00",
                                  tz="Europe/London",
                                  freq="H",
                                  ambiguous='infer')

            for i in [times, times + pd.Timedelta('10min')]:
                _maybe_remove(store, 'df')
                df = DataFrame({'A': range(len(i)), 'B': i}, index=i)
                store.append('df', df)
                result = store.select('df')
                assert_frame_equal(result, df)