1096 lines
38 KiB
Python
1096 lines
38 KiB
Python
# This file is part of h5py, a Python interface to the HDF5 library.
|
|
#
|
|
# http://www.h5py.org
|
|
#
|
|
# Copyright 2008-2013 Andrew Collette and contributors
|
|
#
|
|
# License: Standard 3-clause BSD; see "license.txt" for full license terms
|
|
# and contributor agreement.
|
|
|
|
"""
|
|
Dataset testing operations.
|
|
|
|
Tests all dataset operations, including creation, with the exception of:
|
|
|
|
1. Slicing operations for read and write, handled by module test_slicing
|
|
2. Type conversion for read and write (currently untested)
|
|
"""
|
|
|
|
from __future__ import absolute_import
|
|
|
|
import sys
|
|
|
|
import six
|
|
|
|
import numpy as np
|
|
|
|
from ..common import ut, TestCase
|
|
from h5py.highlevel import File, Group, Dataset
|
|
from h5py._hl.base import is_empty_dataspace
|
|
from h5py import h5t
|
|
import h5py
|
|
|
|
|
|
class BaseDataset(TestCase):
|
|
def setUp(self):
|
|
self.f = File(self.mktemp(), 'w')
|
|
|
|
def tearDown(self):
|
|
if self.f:
|
|
self.f.close()
|
|
|
|
|
|
class TestRepr(BaseDataset):
|
|
"""
|
|
Feature: repr(Dataset) behaves sensibly
|
|
"""
|
|
|
|
def test_repr_open(self):
|
|
""" repr() works on live and dead datasets """
|
|
ds = self.f.create_dataset('foo', (4,))
|
|
self.assertIsInstance(repr(ds), six.string_types)
|
|
self.f.close()
|
|
self.assertIsInstance(repr(ds), six.string_types)
|
|
|
|
|
|
class TestCreateShape(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets can be created from a shape only
|
|
"""
|
|
|
|
def test_create_scalar(self):
|
|
""" Create a scalar dataset """
|
|
dset = self.f.create_dataset('foo', ())
|
|
self.assertEqual(dset.shape, ())
|
|
|
|
def test_create_simple(self):
|
|
""" Create a size-1 dataset """
|
|
dset = self.f.create_dataset('foo', (1,))
|
|
self.assertEqual(dset.shape, (1,))
|
|
|
|
def test_create_extended(self):
|
|
""" Create an extended dataset """
|
|
dset = self.f.create_dataset('foo', (63,))
|
|
self.assertEqual(dset.shape, (63,))
|
|
self.assertEqual(dset.size, 63)
|
|
dset = self.f.create_dataset('bar', (6, 10))
|
|
self.assertEqual(dset.shape, (6, 10))
|
|
self.assertEqual(dset.size, (60))
|
|
|
|
def test_default_dtype(self):
|
|
""" Confirm that the default dtype is float """
|
|
dset = self.f.create_dataset('foo', (63,))
|
|
self.assertEqual(dset.dtype, np.dtype('=f4'))
|
|
|
|
def test_missing_shape(self):
|
|
""" Missing shape raises TypeError """
|
|
with self.assertRaises(TypeError):
|
|
self.f.create_dataset('foo')
|
|
|
|
def test_long_double(self):
|
|
""" Confirm that the default dtype is float """
|
|
dset = self.f.create_dataset('foo', (63,), dtype=np.longdouble)
|
|
self.assertEqual(dset.dtype, np.longdouble)
|
|
|
|
@ut.skipIf(not hasattr(np, "complex256"), "No support for complex256")
|
|
def test_complex256(self):
|
|
""" Confirm that the default dtype is float """
|
|
dset = self.f.create_dataset('foo', (63,),
|
|
dtype=np.dtype('complex256'))
|
|
self.assertEqual(dset.dtype, np.dtype('complex256'))
|
|
|
|
|
|
class TestCreateData(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets can be created from existing data
|
|
"""
|
|
|
|
def test_create_scalar(self):
|
|
""" Create a scalar dataset from existing array """
|
|
data = np.ones((), 'f')
|
|
dset = self.f.create_dataset('foo', data=data)
|
|
self.assertEqual(dset.shape, data.shape)
|
|
|
|
def test_create_extended(self):
|
|
""" Create an extended dataset from existing data """
|
|
data = np.ones((63,), 'f')
|
|
dset = self.f.create_dataset('foo', data=data)
|
|
self.assertEqual(dset.shape, data.shape)
|
|
|
|
def test_dataset_intermediate_group(self):
|
|
""" Create dataset with missing intermediate groups """
|
|
ds = self.f.create_dataset("/foo/bar/baz", shape=(10, 10), dtype='<i4')
|
|
self.assertIsInstance(ds, h5py.Dataset)
|
|
self.assertTrue("/foo/bar/baz" in self.f)
|
|
|
|
def test_reshape(self):
|
|
""" Create from existing data, and make it fit a new shape """
|
|
data = np.arange(30, dtype='f')
|
|
dset = self.f.create_dataset('foo', shape=(10, 3), data=data)
|
|
self.assertEqual(dset.shape, (10, 3))
|
|
self.assertArrayEqual(dset[...], data.reshape((10, 3)))
|
|
|
|
def test_appropriate_low_level_id(self):
|
|
" Binding Dataset to a non-DatasetID identifier fails with ValueError "
|
|
with self.assertRaises(ValueError):
|
|
Dataset(self.f['/'].id)
|
|
|
|
@ut.expectedFailure
|
|
def test_create_bytestring(self):
|
|
""" Creating dataset with byte string yields vlen ASCII dataset """
|
|
# there was no test here!
|
|
self.assertEqual(True, False)
|
|
|
|
def test_empty_create_via_None_shape(self):
|
|
self.f.create_dataset('foo', dtype='f')
|
|
self.assertTrue(is_empty_dataspace(self.f['foo'].id))
|
|
|
|
def test_empty_create_via_Empty_class(self):
|
|
self.f.create_dataset('foo', data=h5py.Empty(dtype='f'))
|
|
self.assertTrue(is_empty_dataspace(self.f['foo'].id))
|
|
|
|
|
|
class TestCreateRequire(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets can be created only if they don't exist in the file
|
|
"""
|
|
|
|
def test_create(self):
|
|
""" Create new dataset with no conflicts """
|
|
dset = self.f.require_dataset('foo', (10, 3), 'f')
|
|
self.assertIsInstance(dset, Dataset)
|
|
self.assertEqual(dset.shape, (10, 3))
|
|
|
|
def test_create_existing(self):
|
|
""" require_dataset yields existing dataset """
|
|
dset = self.f.require_dataset('foo', (10, 3), 'f')
|
|
dset2 = self.f.require_dataset('foo', (10, 3), 'f')
|
|
self.assertEqual(dset, dset2)
|
|
|
|
def test_shape_conflict(self):
|
|
""" require_dataset with shape conflict yields TypeError """
|
|
self.f.create_dataset('foo', (10, 3), 'f')
|
|
with self.assertRaises(TypeError):
|
|
self.f.require_dataset('foo', (10, 4), 'f')
|
|
|
|
def test_type_conflict(self):
|
|
""" require_dataset with object type conflict yields TypeError """
|
|
self.f.create_group('foo')
|
|
with self.assertRaises(TypeError):
|
|
self.f.require_dataset('foo', (10, 3), 'f')
|
|
|
|
def test_dtype_conflict(self):
|
|
""" require_dataset with dtype conflict (strict mode) yields TypeError
|
|
"""
|
|
dset = self.f.create_dataset('foo', (10, 3), 'f')
|
|
with self.assertRaises(TypeError):
|
|
self.f.require_dataset('foo', (10, 3), 'S10')
|
|
|
|
def test_dtype_close(self):
|
|
""" require_dataset with convertible type succeeds (non-strict mode)
|
|
"""
|
|
dset = self.f.create_dataset('foo', (10, 3), 'i4')
|
|
dset2 = self.f.require_dataset('foo', (10, 3), 'i2', exact=False)
|
|
self.assertEqual(dset, dset2)
|
|
self.assertEqual(dset2.dtype, np.dtype('i4'))
|
|
|
|
|
|
class TestCreateChunked(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets can be created by manually specifying chunks
|
|
"""
|
|
|
|
def test_create_chunks(self):
|
|
""" Create via chunks tuple """
|
|
dset = self.f.create_dataset('foo', shape=(100,), chunks=(10,))
|
|
self.assertEqual(dset.chunks, (10,))
|
|
|
|
def test_chunks_mismatch(self):
|
|
""" Illegal chunk size raises ValueError """
|
|
with self.assertRaises(ValueError):
|
|
self.f.create_dataset('foo', shape=(100,), chunks=(200,))
|
|
|
|
def test_chunks_scalar(self):
|
|
""" Attempting to create chunked scalar dataset raises TypeError """
|
|
with self.assertRaises(TypeError):
|
|
self.f.create_dataset('foo', shape=(), chunks=(50,))
|
|
|
|
def test_auto_chunks(self):
|
|
""" Auto-chunking of datasets """
|
|
dset = self.f.create_dataset('foo', shape=(20, 100), chunks=True)
|
|
self.assertIsInstance(dset.chunks, tuple)
|
|
self.assertEqual(len(dset.chunks), 2)
|
|
|
|
def test_auto_chunks_abuse(self):
|
|
""" Auto-chunking with pathologically large element sizes """
|
|
dset = self.f.create_dataset('foo', shape=(3,), dtype='S100000000', chunks=True)
|
|
self.assertEqual(dset.chunks, (1,))
|
|
|
|
|
|
class TestCreateFillvalue(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets can be created with fill value
|
|
"""
|
|
|
|
def test_create_fillval(self):
|
|
""" Fill value is reflected in dataset contents """
|
|
dset = self.f.create_dataset('foo', (10,), fillvalue=4.0)
|
|
self.assertEqual(dset[0], 4.0)
|
|
self.assertEqual(dset[7], 4.0)
|
|
|
|
def test_property(self):
|
|
""" Fill value is recoverable via property """
|
|
dset = self.f.create_dataset('foo', (10,), fillvalue=3.0)
|
|
self.assertEqual(dset.fillvalue, 3.0)
|
|
self.assertNotIsInstance(dset.fillvalue, np.ndarray)
|
|
|
|
def test_property_none(self):
|
|
""" .fillvalue property works correctly if not set """
|
|
dset = self.f.create_dataset('foo', (10,))
|
|
self.assertEqual(dset.fillvalue, 0)
|
|
|
|
def test_compound(self):
|
|
""" Fill value works with compound types """
|
|
dt = np.dtype([('a', 'f4'), ('b', 'i8')])
|
|
v = np.ones((1,), dtype=dt)[0]
|
|
dset = self.f.create_dataset('foo', (10,), dtype=dt, fillvalue=v)
|
|
self.assertEqual(dset.fillvalue, v)
|
|
self.assertAlmostEqual(dset[4], v)
|
|
|
|
def test_exc(self):
|
|
""" Bogus fill value raises TypeError """
|
|
with self.assertRaises(ValueError):
|
|
dset = self.f.create_dataset('foo', (10,),
|
|
dtype=[('a', 'i'), ('b', 'f')], fillvalue=42)
|
|
|
|
|
|
class TestCreateNamedType(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets created from an existing named type
|
|
"""
|
|
|
|
def test_named(self):
|
|
""" Named type object works and links the dataset to type """
|
|
self.f['type'] = np.dtype('f8')
|
|
dset = self.f.create_dataset('x', (100,), dtype=self.f['type'])
|
|
self.assertEqual(dset.dtype, np.dtype('f8'))
|
|
self.assertEqual(dset.id.get_type(), self.f['type'].id)
|
|
self.assertTrue(dset.id.get_type().committed())
|
|
|
|
|
|
@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed")
|
|
class TestCreateGzip(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets created with gzip compression
|
|
"""
|
|
|
|
def test_gzip(self):
|
|
""" Create with explicit gzip options """
|
|
dset = self.f.create_dataset('foo', (20, 30), compression='gzip',
|
|
compression_opts=9)
|
|
self.assertEqual(dset.compression, 'gzip')
|
|
self.assertEqual(dset.compression_opts, 9)
|
|
|
|
def test_gzip_implicit(self):
|
|
""" Create with implicit gzip level (level 4) """
|
|
dset = self.f.create_dataset('foo', (20, 30), compression='gzip')
|
|
self.assertEqual(dset.compression, 'gzip')
|
|
self.assertEqual(dset.compression_opts, 4)
|
|
|
|
def test_gzip_number(self):
|
|
""" Create with gzip level by specifying integer """
|
|
dset = self.f.create_dataset('foo', (20, 30), compression=7)
|
|
self.assertEqual(dset.compression, 'gzip')
|
|
self.assertEqual(dset.compression_opts, 7)
|
|
|
|
original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS
|
|
try:
|
|
h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple()
|
|
with self.assertRaises(ValueError):
|
|
dset = self.f.create_dataset('foo', (20, 30), compression=7)
|
|
finally:
|
|
h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals
|
|
|
|
def test_gzip_exc(self):
|
|
""" Illegal gzip level (explicit or implicit) raises ValueError """
|
|
with self.assertRaises((ValueError, RuntimeError)):
|
|
self.f.create_dataset('foo', (20, 30), compression=14)
|
|
with self.assertRaises(ValueError):
|
|
self.f.create_dataset('foo', (20, 30), compression=-4)
|
|
with self.assertRaises(ValueError):
|
|
self.f.create_dataset('foo', (20, 30), compression='gzip',
|
|
compression_opts=14)
|
|
|
|
|
|
@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed")
|
|
class TestCreateCompressionNumber(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets created with a compression code
|
|
"""
|
|
|
|
def test_compression_number(self):
|
|
""" Create with compression number of gzip (h5py.h5z.FILTER_DEFLATE) and a compression level of 7"""
|
|
original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS
|
|
try:
|
|
h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple()
|
|
dset = self.f.create_dataset('foo', (20, 30), compression=h5py.h5z.FILTER_DEFLATE, compression_opts=(7,))
|
|
finally:
|
|
h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals
|
|
|
|
self.assertEqual(dset.compression, 'gzip')
|
|
self.assertEqual(dset.compression_opts, 7)
|
|
|
|
def test_compression_number_invalid(self):
|
|
""" Create with invalid compression numbers """
|
|
with self.assertRaises(ValueError) as e:
|
|
self.f.create_dataset('foo', (20, 30), compression=-999)
|
|
self.assertIn("Invalid filter", str(e.exception))
|
|
|
|
with self.assertRaises(ValueError) as e:
|
|
self.f.create_dataset('foo', (20, 30), compression=100)
|
|
self.assertIn("Unknown compression", str(e.exception))
|
|
|
|
original_compression_vals = h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS
|
|
try:
|
|
h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = tuple()
|
|
|
|
# Using gzip compression requires a compression level specified in compression_opts
|
|
with self.assertRaises(IndexError):
|
|
self.f.create_dataset('foo', (20, 30), compression=h5py.h5z.FILTER_DEFLATE)
|
|
finally:
|
|
h5py._hl.dataset._LEGACY_GZIP_COMPRESSION_VALS = original_compression_vals
|
|
|
|
|
|
@ut.skipIf('lzf' not in h5py.filters.encode, "LZF is not installed")
|
|
class TestCreateLZF(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets created with LZF compression
|
|
"""
|
|
|
|
def test_lzf(self):
|
|
""" Create with explicit lzf """
|
|
dset = self.f.create_dataset('foo', (20, 30), compression='lzf')
|
|
self.assertEqual(dset.compression, 'lzf')
|
|
self.assertEqual(dset.compression_opts, None)
|
|
|
|
def test_lzf_exc(self):
|
|
""" Giving lzf options raises ValueError """
|
|
with self.assertRaises(ValueError):
|
|
self.f.create_dataset('foo', (20, 30), compression='lzf',
|
|
compression_opts=4)
|
|
|
|
|
|
@ut.skipIf('szip' not in h5py.filters.encode, "SZIP is not installed")
|
|
class TestCreateSZIP(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets created with LZF compression
|
|
"""
|
|
|
|
def test_szip(self):
|
|
""" Create with explicit szip """
|
|
dset = self.f.create_dataset('foo', (20, 30), compression='szip',
|
|
compression_opts=('ec', 16))
|
|
|
|
|
|
@ut.skipIf('shuffle' not in h5py.filters.encode, "SHUFFLE is not installed")
|
|
class TestCreateShuffle(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets can use shuffling filter
|
|
"""
|
|
|
|
def test_shuffle(self):
|
|
""" Enable shuffle filter """
|
|
dset = self.f.create_dataset('foo', (20, 30), shuffle=True)
|
|
self.assertTrue(dset.shuffle)
|
|
|
|
|
|
@ut.skipIf('fletcher32' not in h5py.filters.encode, "FLETCHER32 is not installed")
|
|
class TestCreateFletcher32(BaseDataset):
|
|
"""
|
|
Feature: Datasets can use the fletcher32 filter
|
|
"""
|
|
|
|
def test_fletcher32(self):
|
|
""" Enable fletcher32 filter """
|
|
dset = self.f.create_dataset('foo', (20, 30), fletcher32=True)
|
|
self.assertTrue(dset.fletcher32)
|
|
|
|
|
|
@ut.skipIf('scaleoffset' not in h5py.filters.encode, "SCALEOFFSET is not installed")
|
|
class TestCreateScaleOffset(BaseDataset):
|
|
"""
|
|
Feature: Datasets can use the scale/offset filter
|
|
"""
|
|
|
|
def test_float_fails_without_options(self):
|
|
""" Ensure that a scale factor is required for scaleoffset compression of floating point data """
|
|
|
|
with self.assertRaises(ValueError):
|
|
dset = self.f.create_dataset('foo', (20, 30), dtype=float, scaleoffset=True)
|
|
|
|
def test_float(self):
|
|
""" Scaleoffset filter works for floating point data """
|
|
|
|
scalefac = 4
|
|
shape = (100, 300)
|
|
range = 20*10**scalefac
|
|
testdata = (np.random.rand(*shape)-0.5)*range
|
|
|
|
dset = self.f.create_dataset('foo', shape, dtype=float, scaleoffset=scalefac)
|
|
|
|
# Dataset reports that scaleoffset is in use
|
|
assert dset.scaleoffset is not None
|
|
|
|
# Dataset round-trips
|
|
dset[...] = testdata
|
|
filename = self.f.filename
|
|
self.f.close()
|
|
self.f = h5py.File(filename, 'r')
|
|
readdata = self.f['foo'][...]
|
|
|
|
# Test that data round-trips to requested precision
|
|
self.assertArrayEqual(readdata, testdata, precision=10**(-scalefac))
|
|
|
|
# Test that the filter is actually active (i.e. compression is lossy)
|
|
assert not (readdata == testdata).all()
|
|
|
|
def test_int(self):
|
|
""" Scaleoffset filter works for integer data with default precision """
|
|
|
|
nbits = 12
|
|
shape = (100, 300)
|
|
testdata = np.random.randint(0, 2**nbits-1, size=shape)
|
|
|
|
# Create dataset; note omission of nbits (for library-determined precision)
|
|
dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=True)
|
|
|
|
# Dataset reports scaleoffset enabled
|
|
assert dset.scaleoffset is not None
|
|
|
|
# Data round-trips correctly and identically
|
|
dset[...] = testdata
|
|
filename = self.f.filename
|
|
self.f.close()
|
|
self.f = h5py.File(filename, 'r')
|
|
readdata = self.f['foo'][...]
|
|
self.assertArrayEqual(readdata, testdata)
|
|
|
|
def test_int_with_minbits(self):
|
|
""" Scaleoffset filter works for integer data with specified precision """
|
|
|
|
nbits = 12
|
|
shape = (100, 300)
|
|
testdata = np.random.randint(0, 2**nbits, size=shape)
|
|
|
|
dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits)
|
|
|
|
# Dataset reports scaleoffset enabled with correct precision
|
|
self.assertTrue(dset.scaleoffset == 12)
|
|
|
|
# Data round-trips correctly
|
|
dset[...] = testdata
|
|
filename = self.f.filename
|
|
self.f.close()
|
|
self.f = h5py.File(filename, 'r')
|
|
readdata = self.f['foo'][...]
|
|
self.assertArrayEqual(readdata, testdata)
|
|
|
|
def test_int_with_minbits_lossy(self):
|
|
""" Scaleoffset filter works for integer data with specified precision """
|
|
|
|
nbits = 12
|
|
shape = (100, 300)
|
|
testdata = np.random.randint(0, 2**(nbits+1)-1, size=shape)
|
|
|
|
dset = self.f.create_dataset('foo', shape, dtype=int, scaleoffset=nbits)
|
|
|
|
# Dataset reports scaleoffset enabled with correct precision
|
|
self.assertTrue(dset.scaleoffset == 12)
|
|
|
|
# Data can be written and read
|
|
dset[...] = testdata
|
|
filename = self.f.filename
|
|
self.f.close()
|
|
self.f = h5py.File(filename, 'r')
|
|
readdata = self.f['foo'][...]
|
|
|
|
# Compression is lossy
|
|
assert not (readdata == testdata).all()
|
|
|
|
|
|
class TestAutoCreate(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets auto-created from data produce the correct types
|
|
"""
|
|
|
|
def test_vlen_bytes(self):
|
|
""" Assignment of a byte string produces a vlen ascii dataset """
|
|
self.f['x'] = b"Hello there"
|
|
ds = self.f['x']
|
|
tid = ds.id.get_type()
|
|
self.assertEqual(type(tid), h5py.h5t.TypeStringID)
|
|
self.assertTrue(tid.is_variable_str())
|
|
self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)
|
|
|
|
def test_vlen_unicode(self):
|
|
""" Assignment of a unicode string produces a vlen unicode dataset """
|
|
self.f['x'] = u"Hello there" + six.unichr(0x2034)
|
|
ds = self.f['x']
|
|
tid = ds.id.get_type()
|
|
self.assertEqual(type(tid), h5py.h5t.TypeStringID)
|
|
self.assertTrue(tid.is_variable_str())
|
|
self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8)
|
|
|
|
def test_string_fixed(self):
|
|
""" Assignment of fixed-length byte string produces a fixed-length
|
|
ascii dataset """
|
|
self.f['x'] = np.string_("Hello there")
|
|
ds = self.f['x']
|
|
tid = ds.id.get_type()
|
|
self.assertEqual(type(tid), h5py.h5t.TypeStringID)
|
|
self.assertEqual(tid.get_size(), 11)
|
|
self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)
|
|
|
|
|
|
class TestResize(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets created with "maxshape" may be resized
|
|
"""
|
|
|
|
def test_create(self):
|
|
""" Create dataset with "maxshape" """
|
|
dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
|
|
self.assertIsNot(dset.chunks, None)
|
|
self.assertEqual(dset.maxshape, (20, 60))
|
|
|
|
def test_resize(self):
|
|
""" Datasets may be resized up to maxshape """
|
|
dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
|
|
self.assertEqual(dset.shape, (20, 30))
|
|
dset.resize((20, 50))
|
|
self.assertEqual(dset.shape, (20, 50))
|
|
dset.resize((20, 60))
|
|
self.assertEqual(dset.shape, (20, 60))
|
|
|
|
def test_resize_over(self):
|
|
""" Resizing past maxshape triggers ValueError """
|
|
dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
|
|
with self.assertRaises(ValueError):
|
|
dset.resize((20, 70))
|
|
|
|
def test_resize_nonchunked(self):
|
|
""" Resizing non-chunked dataset raises TypeError """
|
|
dset = self.f.create_dataset("foo", (20, 30))
|
|
with self.assertRaises(TypeError):
|
|
dset.resize((20, 60))
|
|
|
|
def test_resize_axis(self):
|
|
""" Resize specified axis """
|
|
dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
|
|
dset.resize(50, axis=1)
|
|
self.assertEqual(dset.shape, (20, 50))
|
|
|
|
def test_axis_exc(self):
|
|
""" Illegal axis raises ValueError """
|
|
dset = self.f.create_dataset('foo', (20, 30), maxshape=(20, 60))
|
|
with self.assertRaises(ValueError):
|
|
dset.resize(50, axis=2)
|
|
|
|
def test_zero_dim(self):
|
|
""" Allow zero-length initial dims for unlimited axes (issue 111) """
|
|
dset = self.f.create_dataset('foo', (15, 0), maxshape=(15, None))
|
|
self.assertEqual(dset.shape, (15, 0))
|
|
self.assertEqual(dset.maxshape, (15, None))
|
|
|
|
|
|
class TestDtype(BaseDataset):
|
|
|
|
"""
|
|
Feature: Dataset dtype is available as .dtype property
|
|
"""
|
|
|
|
def test_dtype(self):
|
|
""" Retrieve dtype from dataset """
|
|
dset = self.f.create_dataset('foo', (5,), '|S10')
|
|
self.assertEqual(dset.dtype, np.dtype('|S10'))
|
|
|
|
|
|
class TestLen(BaseDataset):
|
|
|
|
"""
|
|
Feature: Size of first axis is available via Python's len
|
|
"""
|
|
|
|
def test_len(self):
|
|
""" Python len() (under 32 bits) """
|
|
dset = self.f.create_dataset('foo', (312, 15))
|
|
self.assertEqual(len(dset), 312)
|
|
|
|
def test_len_big(self):
|
|
""" Python len() vs Dataset.len() """
|
|
dset = self.f.create_dataset('foo', (2**33, 15))
|
|
self.assertEqual(dset.shape, (2**33, 15))
|
|
if sys.maxsize == 2**31-1:
|
|
with self.assertRaises(OverflowError):
|
|
len(dset)
|
|
else:
|
|
self.assertEqual(len(dset), 2**33)
|
|
self.assertEqual(dset.len(), 2**33)
|
|
|
|
|
|
class TestIter(BaseDataset):
|
|
|
|
"""
|
|
Feature: Iterating over a dataset yields rows
|
|
"""
|
|
|
|
def test_iter(self):
|
|
""" Iterating over a dataset yields rows """
|
|
data = np.arange(30, dtype='f').reshape((10, 3))
|
|
dset = self.f.create_dataset('foo', data=data)
|
|
for x, y in zip(dset, data):
|
|
self.assertEqual(len(x), 3)
|
|
self.assertArrayEqual(x, y)
|
|
|
|
def test_iter_scalar(self):
|
|
""" Iterating over scalar dataset raises TypeError """
|
|
dset = self.f.create_dataset('foo', shape=())
|
|
with self.assertRaises(TypeError):
|
|
[x for x in dset]
|
|
|
|
|
|
class TestStrings(BaseDataset):
|
|
|
|
"""
|
|
Feature: Datasets created with vlen and fixed datatypes correctly
|
|
translate to and from HDF5
|
|
"""
|
|
|
|
def test_vlen_bytes(self):
|
|
""" Vlen bytes dataset maps to vlen ascii in the file """
|
|
dt = h5py.special_dtype(vlen=bytes)
|
|
ds = self.f.create_dataset('x', (100,), dtype=dt)
|
|
tid = ds.id.get_type()
|
|
self.assertEqual(type(tid), h5py.h5t.TypeStringID)
|
|
self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)
|
|
|
|
def test_vlen_unicode(self):
|
|
""" Vlen unicode dataset maps to vlen utf-8 in the file """
|
|
dt = h5py.special_dtype(vlen=six.text_type)
|
|
ds = self.f.create_dataset('x', (100,), dtype=dt)
|
|
tid = ds.id.get_type()
|
|
self.assertEqual(type(tid), h5py.h5t.TypeStringID)
|
|
self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8)
|
|
|
|
def test_fixed_bytes(self):
|
|
""" Fixed-length bytes dataset maps to fixed-length ascii in the file
|
|
"""
|
|
dt = np.dtype("|S10")
|
|
ds = self.f.create_dataset('x', (100,), dtype=dt)
|
|
tid = ds.id.get_type()
|
|
self.assertEqual(type(tid), h5py.h5t.TypeStringID)
|
|
self.assertFalse(tid.is_variable_str())
|
|
self.assertEqual(tid.get_size(), 10)
|
|
self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)
|
|
|
|
def test_fixed_unicode(self):
|
|
""" Fixed-length unicode datasets are unsupported (raise TypeError) """
|
|
dt = np.dtype("|U10")
|
|
with self.assertRaises(TypeError):
|
|
ds = self.f.create_dataset('x', (100,), dtype=dt)
|
|
|
|
def test_roundtrip_vlen_bytes(self):
|
|
""" writing and reading to vlen bytes dataset preserves type and content
|
|
"""
|
|
dt = h5py.special_dtype(vlen=bytes)
|
|
ds = self.f.create_dataset('x', (100,), dtype=dt)
|
|
data = b"Hello\xef"
|
|
ds[0] = data
|
|
out = ds[0]
|
|
self.assertEqual(type(out), bytes)
|
|
self.assertEqual(out, data)
|
|
|
|
def test_roundtrip_vlen_unicode(self):
|
|
""" Writing and reading to unicode dataset preserves type and content
|
|
"""
|
|
dt = h5py.special_dtype(vlen=six.text_type)
|
|
ds = self.f.create_dataset('x', (100,), dtype=dt)
|
|
data = u"Hello" + six.unichr(0x2034)
|
|
ds[0] = data
|
|
out = ds[0]
|
|
self.assertEqual(type(out), six.text_type)
|
|
self.assertEqual(out, data)
|
|
|
|
def test_roundtrip_fixed_bytes(self):
|
|
""" Writing to and reading from fixed-length bytes dataset preserves
|
|
type and content """
|
|
dt = np.dtype("|S10")
|
|
ds = self.f.create_dataset('x', (100,), dtype=dt)
|
|
data = b"Hello\xef"
|
|
ds[0] = data
|
|
out = ds[0]
|
|
self.assertEqual(type(out), np.string_)
|
|
self.assertEqual(out, data)
|
|
|
|
@ut.expectedFailure
|
|
def test_unicode_write_error(self):
|
|
""" Writing a non-utf8 byte string to a unicode vlen dataset raises
|
|
ValueError """
|
|
dt = h5py.special_dtype(vlen=six.text_type)
|
|
ds = self.f.create_dataset('x', (100,), dtype=dt)
|
|
data = "Hello\xef"
|
|
with self.assertRaises(ValueError):
|
|
ds[0] = data
|
|
|
|
def test_unicode_write_bytes(self):
|
|
""" Writing valid utf-8 byte strings to a unicode vlen dataset is OK
|
|
"""
|
|
dt = h5py.special_dtype(vlen=six.text_type)
|
|
ds = self.f.create_dataset('x', (100,), dtype=dt)
|
|
data = u"Hello there" + six.unichr(0x2034)
|
|
ds[0] = data.encode('utf8')
|
|
out = ds[0]
|
|
self.assertEqual(type(out), six.text_type)
|
|
self.assertEqual(out, data)
|
|
|
|
|
|
class TestCompound(BaseDataset):
|
|
|
|
"""
|
|
Feature: Compound types correctly round-trip
|
|
"""
|
|
|
|
def test_rt(self):
|
|
""" Compound types are read back in correct order (issue 236)"""
|
|
|
|
dt = np.dtype( [ ('weight', np.float64),
|
|
('cputime', np.float64),
|
|
('walltime', np.float64),
|
|
('parents_offset', np.uint32),
|
|
('n_parents', np.uint32),
|
|
('status', np.uint8),
|
|
('endpoint_type', np.uint8), ] )
|
|
|
|
testdata = np.ndarray((16,), dtype=dt)
|
|
for key in dt.fields:
|
|
testdata[key] = np.random.random((16,))*100
|
|
|
|
self.f['test'] = testdata
|
|
outdata = self.f['test'][...]
|
|
self.assertTrue(np.all(outdata == testdata))
|
|
self.assertEqual(outdata.dtype, testdata.dtype)
|
|
|
|
def test_assign(self):
|
|
dt = np.dtype( [ ('weight', (np.float64, 3)),
|
|
('endpoint_type', np.uint8), ] )
|
|
|
|
testdata = np.ndarray((16,), dtype=dt)
|
|
for key in dt.fields:
|
|
testdata[key] = np.random.random(size=testdata[key].shape)*100
|
|
|
|
ds = self.f.create_dataset('test', (16,), dtype=dt)
|
|
for key in dt.fields:
|
|
ds[key] = testdata[key]
|
|
|
|
outdata = self.f['test'][...]
|
|
|
|
self.assertTrue(np.all(outdata == testdata))
|
|
self.assertEqual(outdata.dtype, testdata.dtype)
|
|
|
|
class TestEnum(BaseDataset):
|
|
|
|
"""
|
|
Feature: Enum datatype info is preserved, read/write as integer
|
|
"""
|
|
|
|
EDICT = {'RED': 0, 'GREEN': 1, 'BLUE': 42}
|
|
|
|
def test_create(self):
|
|
""" Enum datasets can be created and type correctly round-trips """
|
|
dt = h5py.special_dtype(enum=('i', self.EDICT))
|
|
ds = self.f.create_dataset('x', (100, 100), dtype=dt)
|
|
dt2 = ds.dtype
|
|
dict2 = h5py.check_dtype(enum=dt2)
|
|
self.assertEqual(dict2, self.EDICT)
|
|
|
|
def test_readwrite(self):
|
|
""" Enum datasets can be read/written as integers """
|
|
dt = h5py.special_dtype(enum=('i4', self.EDICT))
|
|
ds = self.f.create_dataset('x', (100, 100), dtype=dt)
|
|
ds[35, 37] = 42
|
|
ds[1, :] = 1
|
|
self.assertEqual(ds[35, 37], 42)
|
|
self.assertArrayEqual(ds[1, :], np.array((1,)*100, dtype='i4'))
|
|
|
|
|
|
class TestFloats(BaseDataset):
|
|
|
|
"""
|
|
Test support for mini and extended-precision floats
|
|
"""
|
|
|
|
def _exectest(self, dt):
|
|
dset = self.f.create_dataset('x', (100,), dtype=dt)
|
|
self.assertEqual(dset.dtype, dt)
|
|
data = np.ones((100,), dtype=dt)
|
|
dset[...] = data
|
|
self.assertArrayEqual(dset[...], data)
|
|
|
|
@ut.skipUnless(hasattr(np, 'float16'), "NumPy float16 support required")
|
|
def test_mini(self):
|
|
""" Mini-floats round trip """
|
|
self._exectest(np.dtype('float16'))
|
|
|
|
#TODO: move these tests to test_h5t
|
|
def test_mini_mapping(self):
|
|
""" Test mapping for float16 """
|
|
if hasattr(np, 'float16'):
|
|
self.assertEqual(h5t.IEEE_F16LE.dtype, np.dtype('<f2'))
|
|
else:
|
|
self.assertEqual(h5t.IEEE_F16LE.dtype, np.dtype('<f4'))
|
|
|
|
|
|
class TestTrackTimes(BaseDataset):
|
|
|
|
"""
|
|
Feature: track_times
|
|
"""
|
|
|
|
def test_disable_track_times(self):
|
|
""" check that when track_times=False, the time stamp=0 (Jan 1, 1970) """
|
|
ds = self.f.create_dataset('foo', (4,), track_times=False)
|
|
ds_mtime = h5py.h5g.get_objinfo(ds._id).mtime
|
|
self.assertEqual(0, ds_mtime)
|
|
|
|
|
|
class TestZeroShape(BaseDataset):
|
|
|
|
"""
|
|
Features of datasets with (0,)-shape axes
|
|
"""
|
|
|
|
def test_array_conversion(self):
|
|
""" Empty datasets can be converted to NumPy arrays """
|
|
ds = self.f.create_dataset('x', (0,), maxshape=(None,))
|
|
self.assertEqual(ds.shape, np.array(ds).shape)
|
|
|
|
ds = self.f.create_dataset('y', (0, 0), maxshape=(None, None))
|
|
self.assertEqual(ds.shape, np.array(ds).shape)
|
|
|
|
def test_reading(self):
|
|
""" Slicing into empty datasets works correctly """
|
|
dt = [('a', 'f'), ('b', 'i')]
|
|
ds = self.f.create_dataset('x', (0,), dtype=dt, maxshape=(None,))
|
|
arr = np.empty((0,), dtype=dt)
|
|
|
|
self.assertEqual(ds[...].shape, arr.shape)
|
|
self.assertEqual(ds[...].dtype, arr.dtype)
|
|
self.assertEqual(ds[()].shape, arr.shape)
|
|
self.assertEqual(ds[()].dtype, arr.dtype)
|
|
|
|
|
|
class TestRegionRefs(BaseDataset):
|
|
|
|
"""
|
|
Various features of region references
|
|
"""
|
|
|
|
def setUp(self):
|
|
BaseDataset.setUp(self)
|
|
self.data = np.arange(100*100).reshape((100, 100))
|
|
self.dset = self.f.create_dataset('x', data=self.data)
|
|
self.dset[...] = self.data
|
|
|
|
def test_create_ref(self):
|
|
""" Region references can be used as slicing arguments """
|
|
slic = np.s_[25:35, 10:100:5]
|
|
ref = self.dset.regionref[slic]
|
|
self.assertArrayEqual(self.dset[ref], self.data[slic])
|
|
|
|
def test_ref_shape(self):
|
|
""" Region reference shape and selection shape """
|
|
slic = np.s_[25:35, 10:100:5]
|
|
ref = self.dset.regionref[slic]
|
|
self.assertEqual(self.dset.regionref.shape(ref), self.dset.shape)
|
|
self.assertEqual(self.dset.regionref.selection(ref), (10, 18))
|
|
|
|
|
|
class TestAstype(BaseDataset):
|
|
|
|
"""
|
|
.astype context manager
|
|
"""
|
|
|
|
def test_astype(self):
|
|
|
|
dset = self.f.create_dataset('x', (100,), dtype='i2')
|
|
dset[...] = np.arange(100)
|
|
with dset.astype('f8'):
|
|
self.assertEqual(dset[...].dtype, np.dtype('f8'))
|
|
self.assertTrue(np.all(dset[...] == np.arange(100)))
|
|
|
|
|
|
class TestScalarCompound(BaseDataset):
|
|
|
|
"""
|
|
Retrieval of a single field from a scalar compound dataset should
|
|
strip the field info
|
|
"""
|
|
|
|
def test_scalar_compound(self):
|
|
|
|
dt = np.dtype([('a', 'i')])
|
|
dset = self.f.create_dataset('x', (), dtype=dt)
|
|
self.assertEqual(dset['a'].dtype, np.dtype('i'))
|
|
|
|
|
|
class TestVlen(BaseDataset):
|
|
def test_int(self):
|
|
dt = h5py.special_dtype(vlen=int)
|
|
ds = self.f.create_dataset('vlen', (4,), dtype=dt)
|
|
ds[0] = np.arange(3)
|
|
ds[1] = np.arange(0)
|
|
ds[2] = [1, 2, 3]
|
|
ds[3] = np.arange(1)
|
|
self.assertArrayEqual(ds[0], np.arange(3))
|
|
self.assertArrayEqual(ds[1], np.arange(0))
|
|
self.assertArrayEqual(ds[2], np.array([1, 2, 3]))
|
|
self.assertArrayEqual(ds[1], np.arange(0))
|
|
ds[0:2] = np.array([np.arange(5), np.arange(4)])
|
|
self.assertArrayEqual(ds[0], np.arange(5))
|
|
self.assertArrayEqual(ds[1], np.arange(4))
|
|
ds[0:2] = np.array([np.arange(3), np.arange(3)])
|
|
self.assertArrayEqual(ds[0], np.arange(3))
|
|
self.assertArrayEqual(ds[1], np.arange(3))
|
|
|
|
def test_reuse_from_other(self):
|
|
dt = h5py.special_dtype(vlen=int)
|
|
ds = self.f.create_dataset('vlen', (1,), dtype=dt)
|
|
self.f.create_dataset('vlen2', (1,), ds[()].dtype)
|
|
|
|
def test_reuse_struct_from_other(self):
|
|
dt = [('a', int), ('b', h5py.special_dtype(vlen=int))]
|
|
ds = self.f.create_dataset('vlen', (1,), dtype=dt)
|
|
fname = self.f.filename
|
|
self.f.close()
|
|
self.f = h5py.File(fname)
|
|
self.f.create_dataset('vlen2', (1,), self.f['vlen']['b'][()].dtype)
|
|
|
|
def test_convert(self):
|
|
dt = h5py.special_dtype(vlen=int)
|
|
ds = self.f.create_dataset('vlen', (3,), dtype=dt)
|
|
ds[0] = np.array([1.4, 1.2])
|
|
ds[1] = np.array([1.2])
|
|
ds[2] = [1.2, 2, 3]
|
|
self.assertArrayEqual(ds[0], np.array([1, 1]))
|
|
self.assertArrayEqual(ds[1], np.array([1]))
|
|
self.assertArrayEqual(ds[2], np.array([1, 2, 3]))
|
|
ds[0:2] = np.array([[0.1, 1.1, 2.1, 3.1, 4], np.arange(4)])
|
|
self.assertArrayEqual(ds[0], np.arange(5))
|
|
self.assertArrayEqual(ds[1], np.arange(4))
|
|
ds[0:2] = np.array([np.array([0.1, 1.2, 2.2]),
|
|
np.array([0.2, 1.2, 2.2])])
|
|
self.assertArrayEqual(ds[0], np.arange(3))
|
|
self.assertArrayEqual(ds[1], np.arange(3))
|
|
|
|
def test_multidim(self):
|
|
dt = h5py.special_dtype(vlen=int)
|
|
ds = self.f.create_dataset('vlen', (2, 2), dtype=dt)
|
|
ds[0, 0] = np.arange(1)
|
|
ds[:, :] = np.array([[np.arange(3), np.arange(2)],
|
|
[np.arange(1), np.arange(2)]])
|
|
ds[:, :] = np.array([[np.arange(2), np.arange(2)],
|
|
[np.arange(2), np.arange(2)]])
|
|
|
|
def _help_float_testing(self, np_dt, dataset_name='vlen'):
|
|
"""
|
|
Helper for testing various vlen numpy data types.
|
|
:param np_dt: Numpy datatype to test
|
|
:param dataset_name: String name of the dataset to create for testing.
|
|
"""
|
|
dt = h5py.special_dtype(vlen=np_dt)
|
|
ds = self.f.create_dataset(dataset_name, (5,), dtype=dt)
|
|
|
|
# Create some arrays, and assign them to the dataset
|
|
array_0 = np.array([1., 2., 30.], dtype=np_dt)
|
|
array_1 = np.array([100.3, 200.4, 98.1, -10.5, -300.0], dtype=np_dt)
|
|
|
|
# Test that a numpy array of different type gets cast correctly
|
|
array_2 = np.array([1, 2, 8], dtype=np.dtype('int32'))
|
|
casted_array_2 = array_2.astype(np_dt)
|
|
|
|
# Test that we can set a list of floats.
|
|
list_3 = [1., 2., 900., 0., -0.5]
|
|
list_array_3 = np.array(list_3, dtype=np_dt)
|
|
|
|
# Test that a list of integers gets casted correctly
|
|
list_4 = [-1, -100, 0, 1, 9999, 70]
|
|
list_array_4 = np.array(list_4, dtype=np_dt)
|
|
|
|
ds[0] = array_0
|
|
ds[1] = array_1
|
|
ds[2] = array_2
|
|
ds[3] = list_3
|
|
ds[4] = list_4
|
|
|
|
self.assertArrayEqual(array_0, ds[0])
|
|
self.assertArrayEqual(array_1, ds[1])
|
|
self.assertArrayEqual(casted_array_2, ds[2])
|
|
self.assertArrayEqual(list_array_3, ds[3])
|
|
self.assertArrayEqual(list_array_4, ds[4])
|
|
|
|
# Test that we can reassign arrays in the dataset
|
|
list_array_3 = np.array([0.3, 2.2], dtype=np_dt)
|
|
|
|
ds[0] = list_array_3[:]
|
|
|
|
self.assertArrayEqual(list_array_3, ds[0])
|
|
|
|
# Make sure we can close the file.
|
|
self.f.flush()
|
|
self.f.close()
|
|
|
|
def test_numpy_float16(self):
|
|
np_dt = np.dtype('float16')
|
|
self._help_float_testing(np_dt)
|
|
|
|
def test_numpy_float32(self):
|
|
np_dt = np.dtype('float32')
|
|
self._help_float_testing(np_dt)
|
|
|
|
def test_numpy_float64_from_dtype(self):
|
|
np_dt = np.dtype('float64')
|
|
self._help_float_testing(np_dt)
|
|
|
|
def test_numpy_float64_2(self):
|
|
np_dt = np.float64
|
|
self._help_float_testing(np_dt)
|
|
|
|
|
|
class TestLowOpen(BaseDataset):
|
|
|
|
def test_get_access_list(self):
|
|
""" Test H5Dget_access_plist """
|
|
ds = self.f.create_dataset('foo', (4,))
|
|
p_list = ds.id.get_access_plist()
|
|
|
|
def test_dapl(self):
|
|
""" Test the dapl keyword to h5d.open """
|
|
dapl = h5py.h5p.create(h5py.h5p.DATASET_ACCESS)
|
|
dset = self.f.create_dataset('x', (100,))
|
|
del dset
|
|
dsid = h5py.h5d.open(self.f.id, b'x', dapl)
|
|
self.assertIsInstance(dsid, h5py.h5d.DatasetID)
|