laywerrobot/lib/python3.6/site-packages/h5py/tests/old/test_dataset.py

# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License:  Standard 3-clause BSD; see "license.txt" for full license terms
#           and contributor agreement.

"""
    Dataset testing operations.

    Tests all dataset operations, including creation, with the exception of:

    1. Slicing operations for read and write, handled by module test_slicing
    2. Type conversion for read and write (currently untested)
"""

from __future__ import absolute_import

import sys

import six

import numpy as np

from ..common import ut, TestCase
from h5py.highlevel import File, Group, Dataset
from h5py._hl.base import is_empty_dataspace
from h5py import h5t
import h5py


class BaseDataset(TestCase):
    def setUp(self):
        self.f = File(self.mktemp(), 'w')

    def tearDown(self):
        if self.f:
            self.f.close()


class TestRepr(BaseDataset):
    """
        Feature: repr(Dataset) behaves sensibly
    """

    def test_repr_open(self):
        """ repr() works on live and dead datasets """
        ds = self.f.create_dataset('foo', (4,))
        self.assertIsInstance(repr(ds), six.string_types)
        self.f.close()
        self.assertIsInstance(repr(ds), six.string_types)


class TestCreateShape(BaseDataset):

    """
        Feature: Datasets can be created from a shape only
    """

    def test_create_scalar(self):
        """ Create a scalar dataset """
        dset = self.f.create_dataset('foo', ())
        self.assertEqual(dset.shape, ())

    def test_create_simple(self):
        """ Create a size-1 dataset """
        dset = self.f.create_dataset('foo', (1,))
        self.assertEqual(dset.shape, (1,))

    def test_create_extended(self):
        """ Create an extended dataset """
        dset = self.f.create_dataset('foo', (63,))
        self.assertEqual(dset.shape, (63,))
        self.assertEqual(dset.size, 63)
        dset = self.f.create_dataset('bar', (6, 10))
        self.assertEqual(dset.shape, (6, 10))
        self.assertEqual(dset.size, (60))

    def test_default_dtype(self):
        """ Confirm that the default dtype is float """
        dset = self.f.create_dataset('foo', (63,))
        self.assertEqual(dset.dtype, np.dtype('=f4'))

    def test_missing_shape(self):
        """ Missing shape raises TypeError """
        with self.assertRaises(TypeError):
            self.f.create_dataset('foo')

    def test_long_double(self):
        """ Confirm that the default dtype is float """
        dset = self.f.create_dataset('foo', (63,), dtype=np.longdouble)
        self.assertEqual(dset.dtype, np.longdouble)

    @ut.skipIf(not hasattr(np, "complex256"), "No support for complex256")
    def test_complex256(self):
        """ Confirm that the default dtype is float """
        dset = self.f.create_dataset('foo', (63,),
                                     dtype=np.dtype('complex256'))
        self.assertEqual(dset.dtype, np.dtype('complex256'))


class TestCreateData(BaseDataset):

    """
        Feature: Datasets can be created from existing data
    """

    def test_create_scalar(self):
        """ Create a scalar dataset from existing array """
        data = np.ones((), 'f')
        dset = self.f.create_dataset('foo', data=data)
        self.assertEqual(dset.shape, data.shape)

    def test_create_extended(self):
        """ Create an extended dataset from existing data """
        data = np.ones((63,), 'f')
        dset = self.f.create_dataset('foo', data=data)
        self.assertEqual(dset.shape, data.shape)

    def test_dataset_intermediate_group(self):
        """ Create dataset with missing intermediate groups """
        ds = self.f.create_dataset("/foo/bar/baz", shape=(10, 10), dtype='<i4')
        self.assertIsInstance(ds, h5py.Dataset)
        self.assertTrue("/foo/bar/baz" in self.f)

    def test_reshape(self):
        """ Create from existing data, and make it fit a new shape """
        data = np.arange(30, dtype='f')
        dset = self.f.create_dataset('foo', shape=(10, 3), data=data)
        self.assertEqual(dset.shape, (10, 3))
        self.assertArrayEqual(dset[...], data.reshape((10, 3)))

    def test_appropriate_low_level_id(self):
        " Binding Dataset to a non-DatasetID identifier fails with ValueError "
        with self.assertRaises(ValueError):
            Dataset(self.f['/'].id)

    @ut.expectedFailure
    def test_create_bytestring(self):
        """ Creating dataset with byte string yields vlen ASCII dataset """
        # there was no test here!
        self.assertEqual(True, False)

    def test_empty_create_via_None_shape(self):
        self.f.create_dataset('foo', dtype='f')
        self.assertTrue(is_empty_dataspace(self.f['foo'].id))

    def test_empty_create_via_Empty_class(self):
        self.f.create_dataset('foo', data=h5py.Empty(dtype='f'))
        self.assertTrue(is_empty_dataspace(self.f['foo'].id))


class TestCreateRequire(BaseDataset):

    """
        Feature: Datasets can be created only if they don't exist in the file
    """

    def test_create(self):
        """ Create new dataset with no conflicts """
        dset = self.f.require_dataset('foo', (10, 3), 'f')
        self.assertIsInstance(dset, Dataset)
        self.assertEqual(dset.shape, (10, 3))

    def test_create_existing(self):
        """ require_dataset yields existing dataset """
        dset = self.f.require_dataset('foo', (10, 3), 'f')
        dset2 = self.f.require_dataset('foo', (10, 3), 'f')
        self.assertEqual(dset, dset2)

    def test_shape_conflict(self):
        """ require_dataset with shape conflict yields TypeError """
        self.f.create_dataset('foo', (10, 3), 'f')
        with self.assertRaises(TypeError):
            self.f.require_dataset('foo', (10, 4), 'f')

    def test_type_conflict(self):
        """ require_dataset with object type conflict yields TypeError """
        self.f.create_group('foo')
        with self.assertRaises(TypeError):
            self.f.require_dataset('foo', (10, 3), 'f')

    def test_dtype_conflict(self):
        """ require_dataset with dtype conflict (strict mode) yields TypeError
        """
        dset = self.f.create_dataset('foo', (10, 3), 'f')
        with self.assertRaises(TypeError):
            self.f.require_dataset('foo', (10, 3), 'S10')

    def test_dtype_close(self):
        """ require_dataset with convertible type succeeds (non-strict mode)
        """
        dset = self.f.create_dataset('foo', (10, 3), 'i4')
        dset2 = self.f.require_dataset('foo', (10, 3), 'i2', exact=False)
        self.assertEqual(dset, dset2)
        self.assertEqual(dset2.dtype, np.dtype('i4'))


class TestCreateChunked(BaseDataset):

    """
        Feature: Datasets can be created by manually specifying chunks
    """

    def test_create_chunks(self):
        """ Create via chunks tuple """
        dset = self.f.create_dataset('foo', shape=(100,), chunks=(10,))
        self.assertEqual(dset.chunks, (10,))

    def test_chunks_mismatch(self):
        """ Illegal chunk size raises ValueError """
        with self.assertRaises(ValueError):
            self.f.create_dataset('foo', shape=(100,), chunks=(200,))

    def test_chunks_scalar(self):
        """ Attempting to create chunked scalar dataset raises TypeError """
        with self.assertRaises(TypeError):
            self.f.create_dataset('foo', shape=(), chunks=(50,))

    def test_auto_chunks(self):
        """ Auto-chunking of datasets """
        dset = self.f.create_dataset('foo', shape=(20, 100), chunks=True)
        self.assertIsInstance(dset.chunks, tuple)
        self.assertEqual(len(dset.chunks), 2)

    def test_auto_chunks_abuse(self):
        """ Auto-chunking with pathologically large element sizes """
        dset = self.f.create_dataset('foo', shape=(3,), dtype='S100000000', chunks=True)
        self.assertEqual(dset.chunks, (1,))


class TestCreateFillvalue(BaseDataset):

    """
        Feature: Datasets can be created with fill value
    """

    def test_create_fillval(self):
        """ Fill value is reflected in dataset contents """
        dset = self.f.create_dataset('foo', (10,), fillvalue=4.0)
        self.assertEqual(dset[0], 4.0)
        self.assertEqual(dset[7], 4.0)

    def test_property(self):
        """ Fill value is recoverable via property """
        dset = self.f.create_dataset('foo', (10,), fillvalue=3.0)
        self.assertEqual(dset.fillvalue, 3.0)
        self.assertNotIsInstance(dset.fillvalue, np.ndarray)

    def test_property_none(self):
        """ .fillvalue property works correctly if not set """
        dset = self.f.create_dataset('foo', (10,))
        self.assertEqual(dset.fillvalue, 0)

    def test_compound(self):
        """ Fill value works with compound types """
        dt = np.dtype([('a', 'f4'), ('b', 'i8')])
        v = np.ones((1,), dtype=dt)[0]
        dset = self.f.create_dataset('foo', (10,), dtype=dt, fillvalue=v)
        self.assertEqual(dset.fillvalue, v)
        self.assertAlmostEqual(dset[4], v)

    def test_exc(self):
        """ Bogus fill value raises TypeError """
        with self.assertRaises(ValueError):
            dset = self.f.create_dataset('foo', (10,),
                    dtype=[('a', 'i'), ('b', 'f')], fillvalue=42)


class TestCreateNamedType(BaseDataset):

    """
        Feature: Datasets created from an existing named type
    """

    def test_named(self):
        """ Named type object works and links the dataset to type """
        self.f['type'] = np.dtype('f8')
        dset = self.f.create_dataset('x', (100,), dtype=self.f['type'])
        self.assertEqual(dset.dtype, np.dtype('f8'))
        self.assertEqual(dset.id.get_type(), self.f['type'].id)
        self.assertTrue(dset.id.get_type().committed())


@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed")
class TestCreateGzip(BaseDataset):

    """
        Feature: Datasets created with gzip compression
    """

    def test_gzip(self):
        """ Create with explicit gzip options """
        dset = self.f.create_dataset('foo', (20, 30), compression='gzip',
                                     compression_opts=9)
        self.assertEqual(dset.compression, 'gzip')
        self.assertEqual(dset.compression_opts, 9)

    def test_gzip_implicit(self):
        """ Create with implicit gzip level (level 4) """
        dset = self.f.create_dataset('foo', (20, 30), compression='gzip')
        self.assertEqual(dset.compression, 'gzip')
        self.assertEqual(dset.compression_opts, 4)

    def test_gzip_number(self):
        """ Create with gzip level by specifying integer """
        dset = self.f.create_dataset('foo', (20, 30), compression=7)
        self.assertEqual(dset.compression, 'gzip')
        self.assertEqual(dset.compression_opts, 7)

        original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS
        try:
            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple()
            with self.assertRaises(ValueError):
                dset = self.f.create_dataset('foo', (20, 30), compression=7)
        finally:
            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals

    def test_gzip_exc(self):
        """ Illegal gzip level (explicit or implicit) raises ValueError """
        with self.assertRaises((ValueError, RuntimeError)):
            self.f.create_dataset('foo', (20, 30), compression=14)
        with self.assertRaises(ValueError):
            self.f.create_dataset('foo', (20, 30), compression=-4)
        with self.assertRaises(ValueError):
            self.f.create_dataset('foo', (20, 30), compression='gzip',
                                  compression_opts=14)


@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed")
class TestCreateCompressionNumber(BaseDataset):

    """
        Feature: Datasets created with a compression code
    """

    def test_compression_number(self):
        """ Create with compression number of gzip (h5py.h5z.FILTER_DEFLATE) and a compression level of 7"""
        original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS
        try:
            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple()
            dset = self.f.create_dataset('foo', (20, 30), compression=h5py.h5z.FILTER_DEFLATE, compression_opts=(7,))
        finally:
            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals

        self.assertEqual(dset.compression, 'gzip')
        self.assertEqual(dset.compression_opts, 7)

    def test_compression_number_invalid(self):
        """ Create with invalid compression numbers  """
        with self.assertRaises(ValueError) as e:
            self.f.create_dataset('foo', (20, 30), compression=-999)
        self.assertIn("Invalid filter", str(e.exception))

        with self.assertRaises(ValueError) as e:
            self.f.create_dataset('foo', (20, 30), compression=100)
        self.assertIn("Unknown compression", str(e.exception))

        original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS
        try:
            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple()

            # Using gzip compression requires a compression level specified in compression_opts
            with self.assertRaises(IndexError):
                self.f.create_dataset('foo', (20, 30), compression=h5py.h5z.FILTER_DEFLATE)
        finally:
            h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals


@ut.skipIf('lzf' not in h5py.filters.encode, "LZF is not installed")
class TestCreateLZF(BaseDataset):

    """
        Feature: Datasets created with LZF compression
    """

    def test_lzf(self):
        """ Create with explicit lzf """
        dset = self.f.create_dataset('foo', (20, 30), compression='lzf')
        self.assertEqual(dset.compression, 'lzf')
        self.assertEqual(dset.compression_opts, None)

    def test_lzf_exc(self):
        """ Giving lzf options raises ValueError """
        with self.assertRaises(ValueError):
            self.f.create_dataset('foo', (20, 30), compression='lzf',
                                  compression_opts=4)


@ut.skipIf('szip' not in h5py.filters.encode, "SZIP is not installed")
class TestCreateSZIP(BaseDataset):

    """
        Feature: Datasets created with LZF compression
    """

    def test_szip(self):
        """ Create with explicit szip """
        dset = self.f.create_dataset('foo', (20, 30), compression='szip',
                                     compression_opts=('ec', 16))


@ut.skipIf('shuffle' not in h5py.filters.encode, "SHUFFLE is not installed")
class TestCreateShuffle(BaseDataset):

    """
        Feature: Datasets can use shuffling filter
    """

    def test_shuffle(self):
        """ Enable shuffle filter """
        dset = self.f.create_dataset('foo', (20, 30), shuffle=True)
        self.assertTrue(dset.shuffle)


@ut.skipIf('fletcher32' not in h5py.filters.encode, "FLETCHER32 is not installed")
class TestCreateFletcher32(BaseDataset):
    """
        Feature: Datasets can use the fletcher32 filter
    """

    def test_fletcher32(self):
        """ Enable fletcher32 filter """
        dset = self.f.create_dataset('foo', (20, 30), fletcher32=True)
        self.assertTrue(dset.fletcher32)


@ut.skipIf('scaleoffset' not in h5py.filters.encode, "SCALEOFFSET is not installed")
class TestCreateScaleOffset(BaseDataset):
    """
        Feature: Datasets can use the scale/offset filter
    """

    def test_float_fails_without_options(self):
        """ Ensure that a scale factor is required for scaleoffset compression of floating point data """

        with self.assertRaises(ValueError):
            dset = self.f.create_dataset('foo', (20, 30), dtype=float, scaleoffset=True)

    def test_float(self):
        """ Scaleoffset filter works for floating point data """

        scalefac = 4
        shape = (100, 300)
        range = 20*10**scalefac
        testdata = (np.random.rand(*shape)-0.5)*range

        dset = self.f.create_dataset('foo', shape, dtype=float, scaleoffset=scalefac)

        # Dataset reports that scaleoffset is in use
        assert dset.scaleoffset is not None

        # Dataset round-trips
        dset[...] = testdata
        filename = self.f.filename
        self.f.close()
        self.f = h5py.File(filename, 'r')
        readdata = self.f['foo'][...]

        # Test that data round-trips to requested precision
        self.assertArrayEqual(readdata, testdata, precision=10**(-scalefac))

        # Test that the filter is actually active (i.e. compression is lossy)
        assert not (readdata == testdata).all()

    def test_int(self):
        """ Scaleoffset filter works for integer data with default precision """

        nbits = 12
        shape = (100, 300)
        testdata = np.random.randint(0, 2**nbits-1, size=shape)

        # Create dataset; note omission of nbits (for library-determined precision)
        dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=True)

        # Dataset reports scaleoffset enabled
        assert dset.scaleoffset is not None

        # Data round-trips correctly and identically
        dset[...] = testdata
        filename = self.f.filename
        self.f.close()
        self.f = h5py.File(filename, 'r')
        readdata = self.f['foo'][...]
        self.assertArrayEqual(readdata, testdata)

    def test_int_with_minbits(self):
        """ Scaleoffset filter works for integer data with specified precision """

        nbits = 12
        shape = (100, 300)
        testdata = np.random.randint(0, 2**nbits, size=shape)

        dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits)

        # Dataset reports scaleoffset enabled with correct precision
        self.assertTrue(dset.scaleoffset == 12)

        # Data round-trips correctly
        dset[...] = testdata
        filename = self.f.filename
        self.f.close()
        self.f = h5py.File(filename, 'r')
        readdata = self.f['foo'][...]
        self.assertArrayEqual(readdata, testdata)

    def test_int_with_minbits_lossy(self):
        """ Scaleoffset filter works for integer data with specified precision """

        nbits = 12
        shape = (100, 300)
        testdata = np.random.randint(0, 2**(nbits+1)-1, size=shape)

        dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits)

        # Dataset reports scaleoffset enabled with correct precision
        self.assertTrue(dset.scaleoffset == 12)

        # Data can be written and read
        dset[...] = testdata
        filename = self.f.filename
        self.f.close()
        self.f = h5py.File(filename, 'r')
        readdata = self.f['foo'][...]

        # Compression is lossy
        assert not (readdata == testdata).all()


class TestAutoCreate(BaseDataset):

    """
        Feature: Datasets auto-created from data produce the correct types
    """

    def test_vlen_bytes(self):
        """ Assignment of a byte string produces a vlen ascii dataset """
        self.f['x'] = b"Hello there"
        ds = self.f['x']
        tid = ds.id.get_type()
        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
        self.assertTrue(tid.is_variable_str())
        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)

    def test_vlen_unicode(self):
        """ Assignment of a unicode string produces a vlen unicode dataset """
        self.f['x'] = u"Hello there" + six.unichr(0x2034)
        ds = self.f['x']
        tid = ds.id.get_type()
        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
        self.assertTrue(tid.is_variable_str())
        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8)

    def test_string_fixed(self):
        """ Assignment of fixed-length byte string produces a fixed-length
        ascii dataset """
        self.f['x'] = np.string_("Hello there")
        ds = self.f['x']
        tid = ds.id.get_type()
        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
        self.assertEqual(tid.get_size(), 11)
        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)


class TestResize(BaseDataset):

    """
        Feature: Datasets created with "maxshape" may be resized
    """

    def test_create(self):
        """ Create dataset with "maxshape" """
        dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
        self.assertIsNot(dset.chunks, None)
        self.assertEqual(dset.maxshape, (20, 60))

    def test_resize(self):
        """ Datasets may be resized up to maxshape """
        dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
        self.assertEqual(dset.shape, (20, 30))
        dset.resize((20, 50))
        self.assertEqual(dset.shape, (20, 50))
        dset.resize((20, 60))
        self.assertEqual(dset.shape, (20, 60))

    def test_resize_over(self):
        """ Resizing past maxshape triggers ValueError """
        dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
        with self.assertRaises(ValueError):
            dset.resize((20, 70))

    def test_resize_nonchunked(self):
        """ Resizing non-chunked dataset raises TypeError """
        dset = self.f.create_dataset("foo", (20, 30))
        with self.assertRaises(TypeError):
            dset.resize((20, 60))

    def test_resize_axis(self):
        """ Resize specified axis """
        dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
        dset.resize(50, axis=1)
        self.assertEqual(dset.shape, (20, 50))

    def test_axis_exc(self):
        """ Illegal axis raises ValueError """
        dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
        with self.assertRaises(ValueError):
            dset.resize(50, axis=2)

    def test_zero_dim(self):
        """ Allow zero-length initial dims for unlimited axes (issue 111) """
        dset = self.f.create_dataset('foo', (15, 0), maxshape=(15, None))
        self.assertEqual(dset.shape, (15, 0))
        self.assertEqual(dset.maxshape, (15, None))


class TestDtype(BaseDataset):

    """
        Feature: Dataset dtype is available as .dtype property
    """

    def test_dtype(self):
        """ Retrieve dtype from dataset """
        dset = self.f.create_dataset('foo', (5,), '|S10')
        self.assertEqual(dset.dtype, np.dtype('|S10'))


class TestLen(BaseDataset):

    """
        Feature: Size of first axis is available via Python's len
    """

    def test_len(self):
        """ Python len() (under 32 bits) """
        dset = self.f.create_dataset('foo', (312, 15))
        self.assertEqual(len(dset), 312)

    def test_len_big(self):
        """ Python len() vs Dataset.len() """
        dset = self.f.create_dataset('foo', (2**33, 15))
        self.assertEqual(dset.shape, (2**33, 15))
        if sys.maxsize == 2**31-1:
            with self.assertRaises(OverflowError):
                len(dset)
        else:
            self.assertEqual(len(dset), 2**33)
        self.assertEqual(dset.len(), 2**33)


class TestIter(BaseDataset):

    """
        Feature: Iterating over a dataset yields rows
    """

    def test_iter(self):
        """ Iterating over a dataset yields rows """
        data = np.arange(30, dtype='f').reshape((10, 3))
        dset = self.f.create_dataset('foo', data=data)
        for x, y in zip(dset, data):
            self.assertEqual(len(x), 3)
            self.assertArrayEqual(x, y)

    def test_iter_scalar(self):
        """ Iterating over scalar dataset raises TypeError """
        dset = self.f.create_dataset('foo', shape=())
        with self.assertRaises(TypeError):
            [x for x in dset]


class TestStrings(BaseDataset):

    """
        Feature: Datasets created with vlen and fixed datatypes correctly
        translate to and from HDF5
    """

    def test_vlen_bytes(self):
        """ Vlen bytes dataset maps to vlen ascii in the file """
        dt = h5py.special_dtype(vlen=bytes)
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        tid = ds.id.get_type()
        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)

    def test_vlen_unicode(self):
        """ Vlen unicode dataset maps to vlen utf-8 in the file """
        dt = h5py.special_dtype(vlen=six.text_type)
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        tid = ds.id.get_type()
        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8)

    def test_fixed_bytes(self):
        """ Fixed-length bytes dataset maps to fixed-length ascii in the file
        """
        dt = np.dtype("|S10")
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        tid = ds.id.get_type()
        self.assertEqual(type(tid), h5py.h5t.TypeStringID)
        self.assertFalse(tid.is_variable_str())
        self.assertEqual(tid.get_size(), 10)
        self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)

    def test_fixed_unicode(self):
        """ Fixed-length unicode datasets are unsupported (raise TypeError) """
        dt = np.dtype("|U10")
        with self.assertRaises(TypeError):
            ds = self.f.create_dataset('x', (100,), dtype=dt)

    def test_roundtrip_vlen_bytes(self):
        """ writing and reading to vlen bytes dataset preserves type and content
        """
        dt = h5py.special_dtype(vlen=bytes)
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        data = b"Hello\xef"
        ds[0] = data
        out = ds[0]
        self.assertEqual(type(out), bytes)
        self.assertEqual(out, data)

    def test_roundtrip_vlen_unicode(self):
        """ Writing and reading to unicode dataset preserves type and content
        """
        dt = h5py.special_dtype(vlen=six.text_type)
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        data = u"Hello" + six.unichr(0x2034)
        ds[0] = data
        out = ds[0]
        self.assertEqual(type(out), six.text_type)
        self.assertEqual(out, data)

    def test_roundtrip_fixed_bytes(self):
        """ Writing to and reading from fixed-length bytes dataset preserves
        type and content """
        dt = np.dtype("|S10")
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        data = b"Hello\xef"
        ds[0] = data
        out = ds[0]
        self.assertEqual(type(out), np.string_)
        self.assertEqual(out, data)

    @ut.expectedFailure
    def test_unicode_write_error(self):
        """ Writing a non-utf8 byte string to a unicode vlen dataset raises
        ValueError """
        dt = h5py.special_dtype(vlen=six.text_type)
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        data = "Hello\xef"
        with self.assertRaises(ValueError):
            ds[0] = data

    def test_unicode_write_bytes(self):
        """ Writing valid utf-8 byte strings to a unicode vlen dataset is OK
        """
        dt = h5py.special_dtype(vlen=six.text_type)
        ds = self.f.create_dataset('x', (100,), dtype=dt)
        data = u"Hello there" + six.unichr(0x2034)
        ds[0] = data.encode('utf8')
        out = ds[0]
        self.assertEqual(type(out), six.text_type)
        self.assertEqual(out, data)


class TestCompound(BaseDataset):

    """
        Feature: Compound types correctly round-trip
    """

    def test_rt(self):
        """ Compound types are read back in correct order (issue 236)"""

        dt = np.dtype( [ ('weight', np.float64),
                             ('cputime', np.float64),
                             ('walltime', np.float64),
                             ('parents_offset', np.uint32),
                             ('n_parents', np.uint32),
                             ('status', np.uint8),
                             ('endpoint_type', np.uint8), ] )

        testdata = np.ndarray((16,), dtype=dt)
        for key in dt.fields:
            testdata[key] = np.random.random((16,))*100

        self.f['test'] = testdata
        outdata = self.f['test'][...]
        self.assertTrue(np.all(outdata == testdata))
        self.assertEqual(outdata.dtype, testdata.dtype)

    def test_assign(self):
        dt = np.dtype( [ ('weight', (np.float64, 3)),
                         ('endpoint_type', np.uint8), ] )

        testdata = np.ndarray((16,), dtype=dt)
        for key in dt.fields:
            testdata[key] = np.random.random(size=testdata[key].shape)*100

        ds = self.f.create_dataset('test', (16,), dtype=dt)
        for key in dt.fields:
            ds[key] = testdata[key]

        outdata = self.f['test'][...]

        self.assertTrue(np.all(outdata == testdata))
        self.assertEqual(outdata.dtype, testdata.dtype)

class TestEnum(BaseDataset):

    """
        Feature: Enum datatype info is preserved, read/write as integer
    """

    EDICT = {'RED': 0, 'GREEN': 1, 'BLUE': 42}

    def test_create(self):
        """ Enum datasets can be created and type correctly round-trips """
        dt = h5py.special_dtype(enum=('i', self.EDICT))
        ds = self.f.create_dataset('x', (100, 100), dtype=dt)
        dt2 = ds.dtype
        dict2 = h5py.check_dtype(enum=dt2)
        self.assertEqual(dict2, self.EDICT)

    def test_readwrite(self):
        """ Enum datasets can be read/written as integers """
        dt = h5py.special_dtype(enum=('i4', self.EDICT))
        ds = self.f.create_dataset('x', (100, 100), dtype=dt)
        ds[35, 37] = 42
        ds[1, :] = 1
        self.assertEqual(ds[35, 37], 42)
        self.assertArrayEqual(ds[1, :], np.array((1,)*100, dtype='i4'))


class TestFloats(BaseDataset):

    """
        Test support for mini and extended-precision floats
    """

    def _exectest(self, dt):
        dset = self.f.create_dataset('x', (100,), dtype=dt)
        self.assertEqual(dset.dtype, dt)
        data = np.ones((100,), dtype=dt)
        dset[...] = data
        self.assertArrayEqual(dset[...], data)

    @ut.skipUnless(hasattr(np, 'float16'), "NumPy float16 support required")
    def test_mini(self):
        """ Mini-floats round trip """
        self._exectest(np.dtype('float16'))

    #TODO: move these tests to test_h5t
    def test_mini_mapping(self):
        """ Test mapping for float16 """
        if hasattr(np, 'float16'):
            self.assertEqual(h5t.IEEE_F16LE.dtype, np.dtype('<f2'))
        else:
            self.assertEqual(h5t.IEEE_F16LE.dtype, np.dtype('<f4'))


class TestTrackTimes(BaseDataset):

    """
        Feature: track_times
    """

    def test_disable_track_times(self):
        """ check that when track_times=False, the time stamp=0 (Jan 1, 1970) """
        ds = self.f.create_dataset('foo', (4,), track_times=False)
        ds_mtime = h5py.h5g.get_objinfo(ds._id).mtime
        self.assertEqual(0, ds_mtime)


class TestZeroShape(BaseDataset):

    """
        Features of datasets with (0,)-shape axes
    """

    def test_array_conversion(self):
        """ Empty datasets can be converted to NumPy arrays """
        ds = self.f.create_dataset('x', (0,), maxshape=(None,))
        self.assertEqual(ds.shape, np.array(ds).shape)

        ds = self.f.create_dataset('y', (0, 0), maxshape=(None, None))
        self.assertEqual(ds.shape, np.array(ds).shape)

    def test_reading(self):
        """ Slicing into empty datasets works correctly """
        dt = [('a', 'f'), ('b', 'i')]
        ds = self.f.create_dataset('x', (0,), dtype=dt, maxshape=(None,))
        arr = np.empty((0,), dtype=dt)

        self.assertEqual(ds[...].shape, arr.shape)
        self.assertEqual(ds[...].dtype, arr.dtype)
        self.assertEqual(ds[()].shape, arr.shape)
        self.assertEqual(ds[()].dtype, arr.dtype)


class TestRegionRefs(BaseDataset):

    """
        Various features of region references
    """

    def setUp(self):
        BaseDataset.setUp(self)
        self.data = np.arange(100*100).reshape((100, 100))
        self.dset = self.f.create_dataset('x', data=self.data)
        self.dset[...] = self.data

    def test_create_ref(self):
        """ Region references can be used as slicing arguments """
        slic = np.s_[25:35, 10:100:5]
        ref = self.dset.regionref[slic]
        self.assertArrayEqual(self.dset[ref], self.data[slic])

    def test_ref_shape(self):
        """ Region reference shape and selection shape """
        slic = np.s_[25:35, 10:100:5]
        ref = self.dset.regionref[slic]
        self.assertEqual(self.dset.regionref.shape(ref), self.dset.shape)
        self.assertEqual(self.dset.regionref.selection(ref), (10, 18))


class TestAstype(BaseDataset):

    """
        .astype context manager
    """

    def test_astype(self):

        dset = self.f.create_dataset('x', (100,), dtype='i2')
        dset[...] = np.arange(100)
        with dset.astype('f8'):
            self.assertEqual(dset[...].dtype, np.dtype('f8'))
            self.assertTrue(np.all(dset[...] == np.arange(100)))


class TestScalarCompound(BaseDataset):

    """
        Retrieval of a single field from a scalar compound dataset should
        strip the field info
    """

    def test_scalar_compound(self):

        dt = np.dtype([('a', 'i')])
        dset = self.f.create_dataset('x', (), dtype=dt)
        self.assertEqual(dset['a'].dtype, np.dtype('i'))


class TestVlen(BaseDataset):
    def test_int(self):
        dt = h5py.special_dtype(vlen=int)
        ds = self.f.create_dataset('vlen', (4,), dtype=dt)
        ds[0] = np.arange(3)
        ds[1] = np.arange(0)
        ds[2] = [1, 2, 3]
        ds[3] = np.arange(1)
        self.assertArrayEqual(ds[0], np.arange(3))
        self.assertArrayEqual(ds[1], np.arange(0))
        self.assertArrayEqual(ds[2], np.array([1, 2, 3]))
        self.assertArrayEqual(ds[1], np.arange(0))
        ds[0:2] = np.array([np.arange(5), np.arange(4)])
        self.assertArrayEqual(ds[0], np.arange(5))
        self.assertArrayEqual(ds[1], np.arange(4))
        ds[0:2] = np.array([np.arange(3), np.arange(3)])
        self.assertArrayEqual(ds[0], np.arange(3))
        self.assertArrayEqual(ds[1], np.arange(3))

    def test_reuse_from_other(self):
        dt = h5py.special_dtype(vlen=int)
        ds = self.f.create_dataset('vlen', (1,), dtype=dt)
        self.f.create_dataset('vlen2', (1,), ds[()].dtype)

    def test_reuse_struct_from_other(self):
        dt = [('a', int), ('b', h5py.special_dtype(vlen=int))]
        ds = self.f.create_dataset('vlen', (1,), dtype=dt)
        fname = self.f.filename
        self.f.close()
        self.f = h5py.File(fname)
        self.f.create_dataset('vlen2', (1,), self.f['vlen']['b'][()].dtype)

    def test_convert(self):
        dt = h5py.special_dtype(vlen=int)
        ds = self.f.create_dataset('vlen', (3,), dtype=dt)
        ds[0] = np.array([1.4, 1.2])
        ds[1] = np.array([1.2])
        ds[2] = [1.2, 2, 3]
        self.assertArrayEqual(ds[0], np.array([1, 1]))
        self.assertArrayEqual(ds[1], np.array([1]))
        self.assertArrayEqual(ds[2], np.array([1, 2, 3]))
        ds[0:2] = np.array([[0.1, 1.1, 2.1, 3.1, 4], np.arange(4)])
        self.assertArrayEqual(ds[0], np.arange(5))
        self.assertArrayEqual(ds[1], np.arange(4))
        ds[0:2] = np.array([np.array([0.1, 1.2, 2.2]),
                            np.array([0.2, 1.2, 2.2])])
        self.assertArrayEqual(ds[0], np.arange(3))
        self.assertArrayEqual(ds[1], np.arange(3))

    def test_multidim(self):
        dt = h5py.special_dtype(vlen=int)
        ds = self.f.create_dataset('vlen', (2, 2), dtype=dt)
        ds[0, 0] = np.arange(1)
        ds[:, :] = np.array([[np.arange(3), np.arange(2)],
                            [np.arange(1), np.arange(2)]])
        ds[:, :] = np.array([[np.arange(2), np.arange(2)],
                             [np.arange(2), np.arange(2)]])

    def _help_float_testing(self, np_dt, dataset_name='vlen'):
        """
        Helper for testing various vlen numpy data types.
        :param np_dt: Numpy datatype to test
        :param dataset_name: String name of the dataset to create for testing.
        """
        dt = h5py.special_dtype(vlen=np_dt)
        ds = self.f.create_dataset(dataset_name, (5,), dtype=dt)

        # Create some arrays, and assign them to the dataset
        array_0 = np.array([1., 2., 30.], dtype=np_dt)
        array_1 = np.array([100.3, 200.4, 98.1, -10.5, -300.0], dtype=np_dt)

        # Test that a numpy array of different type gets cast correctly
        array_2 = np.array([1, 2, 8], dtype=np.dtype('int32'))
        casted_array_2 = array_2.astype(np_dt)

        # Test that we can set a list of floats.
        list_3 = [1., 2., 900., 0., -0.5]
        list_array_3 = np.array(list_3, dtype=np_dt)

        # Test that a list of integers gets casted correctly
        list_4 = [-1, -100, 0, 1, 9999, 70]
        list_array_4 = np.array(list_4, dtype=np_dt)

        ds[0] = array_0
        ds[1] = array_1
        ds[2] = array_2
        ds[3] = list_3
        ds[4] = list_4

        self.assertArrayEqual(array_0, ds[0])
        self.assertArrayEqual(array_1, ds[1])
        self.assertArrayEqual(casted_array_2, ds[2])
        self.assertArrayEqual(list_array_3, ds[3])
        self.assertArrayEqual(list_array_4, ds[4])

        # Test that we can reassign arrays in the dataset
        list_array_3 = np.array([0.3, 2.2], dtype=np_dt)

        ds[0] = list_array_3[:]

        self.assertArrayEqual(list_array_3, ds[0])

        # Make sure we can close the file.
        self.f.flush()
        self.f.close()

    def test_numpy_float16(self):
        np_dt = np.dtype('float16')
        self._help_float_testing(np_dt)

    def test_numpy_float32(self):
        np_dt = np.dtype('float32')
        self._help_float_testing(np_dt)

    def test_numpy_float64_from_dtype(self):
        np_dt = np.dtype('float64')
        self._help_float_testing(np_dt)

    def test_numpy_float64_2(self):
        np_dt = np.float64
        self._help_float_testing(np_dt)


class TestLowOpen(BaseDataset):

    def test_get_access_list(self):
        """ Test H5Dget_access_plist """
        ds = self.f.create_dataset('foo', (4,))
        p_list = ds.id.get_access_plist()

    def test_dapl(self):
        """ Test the dapl keyword to h5d.open """
        dapl = h5py.h5p.create(h5py.h5p.DATASET_ACCESS)
        dset = self.f.create_dataset('x', (100,))
        del dset
        dsid = h5py.h5d.open(self.f.id, b'x', dapl)
        self.assertIsInstance(dsid, h5py.h5d.DatasetID)