673 lines
22 KiB
Python
673 lines
22 KiB
Python
|
# encoding: utf-8
|
||
|
"""
|
||
|
# hickle_legacy2.py
|
||
|
|
||
|
Created by Danny Price 2016-02-03.
|
||
|
|
||
|
This is a legacy handler, for hickle v2 files.
|
||
|
If V3 reading fails, this will be called as a fail-over.
|
||
|
|
||
|
"""
|
||
|
|
||
|
import os
|
||
|
import numpy as np
|
||
|
import h5py as h5
|
||
|
import re
|
||
|
|
||
|
try:
|
||
|
from exceptions import Exception
|
||
|
from types import NoneType
|
||
|
except ImportError:
|
||
|
pass # above imports will fail in python3
|
||
|
|
||
|
import warnings
|
||
|
__version__ = "2.0.4"
|
||
|
__author__ = "Danny Price"
|
||
|
|
||
|
|
||
|
##################
|
||
|
# Error handling #
|
||
|
##################
|
||
|
|
||
|
class FileError(Exception):
|
||
|
""" An exception raised if the file is fishy """
|
||
|
def __init__(self):
|
||
|
return
|
||
|
|
||
|
def __str__(self):
|
||
|
return ("Cannot open file. Please pass either a filename "
|
||
|
"string, a file object, or a h5py.File")
|
||
|
|
||
|
|
||
|
class ClosedFileError(Exception):
|
||
|
""" An exception raised if the file is fishy """
|
||
|
def __init__(self):
|
||
|
return
|
||
|
|
||
|
def __str__(self):
|
||
|
return ("HDF5 file has been closed. Please pass either "
|
||
|
"a filename string, a file object, or an open h5py.File")
|
||
|
|
||
|
|
||
|
class NoMatchError(Exception):
|
||
|
""" An exception raised if the object type is not understood (or
|
||
|
supported)"""
|
||
|
def __init__(self):
|
||
|
return
|
||
|
|
||
|
def __str__(self):
|
||
|
return ("Error: this type of python object cannot be converted into a "
|
||
|
"hickle.")
|
||
|
|
||
|
|
||
|
class ToDoError(Exception):
|
||
|
""" An exception raised for non-implemented functionality"""
|
||
|
def __init__(self):
|
||
|
return
|
||
|
|
||
|
def __str__(self):
|
||
|
return "Error: this functionality hasn't been implemented yet."
|
||
|
|
||
|
|
||
|
######################
|
||
|
# H5PY file wrappers #
|
||
|
######################
|
||
|
|
||
|
class H5GroupWrapper(h5.Group):
|
||
|
""" Group wrapper that provides a track_times kwarg.
|
||
|
|
||
|
track_times is a boolean flag that can be set to False, so that two
|
||
|
files created at different times will have identical MD5 hashes.
|
||
|
"""
|
||
|
def create_dataset(self, *args, **kwargs):
|
||
|
kwargs['track_times'] = getattr(self, 'track_times', True)
|
||
|
return super(H5GroupWrapper, self).create_dataset(*args, **kwargs)
|
||
|
|
||
|
def create_group(self, *args, **kwargs):
|
||
|
group = super(H5GroupWrapper, self).create_group(*args, **kwargs)
|
||
|
group.__class__ = H5GroupWrapper
|
||
|
group.track_times = getattr(self, 'track_times', True)
|
||
|
return group
|
||
|
|
||
|
|
||
|
class H5FileWrapper(h5.File):
|
||
|
""" Wrapper for h5py File that provides a track_times kwarg.
|
||
|
|
||
|
track_times is a boolean flag that can be set to False, so that two
|
||
|
files created at different times will have identical MD5 hashes.
|
||
|
"""
|
||
|
def create_dataset(self, *args, **kwargs):
|
||
|
kwargs['track_times'] = getattr(self, 'track_times', True)
|
||
|
return super(H5FileWrapper, self).create_dataset(*args, **kwargs)
|
||
|
|
||
|
def create_group(self, *args, **kwargs):
|
||
|
group = super(H5FileWrapper, self).create_group(*args, **kwargs)
|
||
|
group.__class__ = H5GroupWrapper
|
||
|
group.track_times = getattr(self, 'track_times', True)
|
||
|
return group
|
||
|
|
||
|
|
||
|
def file_opener(f, mode='r', track_times=True):
|
||
|
""" A file opener helper function with some error handling. This can open
|
||
|
files through a file object, a h5py file, or just the filename.
|
||
|
|
||
|
Args:
|
||
|
f (file, h5py.File, or string): File-identifier, e.g. filename or file object.
|
||
|
mode (str): File open mode. Only required if opening by filename string.
|
||
|
track_times (bool): Track time in HDF5; turn off if you want hickling at
|
||
|
different times to produce identical files (e.g. for MD5 hash check).
|
||
|
|
||
|
"""
|
||
|
# Were we handed a file object or just a file name string?
|
||
|
if isinstance(f, file):
|
||
|
filename, mode = f.name, f.mode
|
||
|
f.close()
|
||
|
h5f = h5.File(filename, mode)
|
||
|
elif isinstance(f, str) or isinstance(f, unicode):
|
||
|
filename = f
|
||
|
h5f = h5.File(filename, mode)
|
||
|
elif isinstance(f, H5FileWrapper) or isinstance(f, h5._hl.files.File):
|
||
|
try:
|
||
|
filename = f.filename
|
||
|
except ValueError:
|
||
|
raise ClosedFileError()
|
||
|
h5f = f
|
||
|
else:
|
||
|
print(type(f))
|
||
|
raise FileError
|
||
|
|
||
|
h5f.__class__ = H5FileWrapper
|
||
|
h5f.track_times = track_times
|
||
|
return h5f
|
||
|
|
||
|
|
||
|
###########
|
||
|
# DUMPERS #
|
||
|
###########
|
||
|
|
||
|
def check_is_iterable(py_obj):
|
||
|
""" Check whether a python object is iterable.
|
||
|
|
||
|
Note: this treats unicode and string as NON ITERABLE
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to test
|
||
|
|
||
|
Returns:
|
||
|
iter_ok (bool): True if item is iterable, False is item is not
|
||
|
"""
|
||
|
if type(py_obj) in (str, unicode):
|
||
|
return False
|
||
|
try:
|
||
|
iter(py_obj)
|
||
|
return True
|
||
|
except TypeError:
|
||
|
return False
|
||
|
|
||
|
|
||
|
def check_iterable_item_type(iter_obj):
|
||
|
""" Check if all items within an iterable are the same type.
|
||
|
|
||
|
Args:
|
||
|
iter_obj: iterable object
|
||
|
|
||
|
Returns:
|
||
|
iter_type: type of item contained within the iterable. If
|
||
|
the iterable has many types, a boolean False is returned instead.
|
||
|
|
||
|
References:
|
||
|
http://stackoverflow.com/questions/13252333/python-check-if-all-elements-of-a-list-are-the-same-type
|
||
|
"""
|
||
|
iseq = iter(iter_obj)
|
||
|
first_type = type(next(iseq))
|
||
|
return first_type if all((type(x) is first_type) for x in iseq) else False
|
||
|
|
||
|
|
||
|
def check_is_numpy_array(py_obj):
|
||
|
""" Check if a python object is a numpy array (masked or regular)
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to check whether it is a numpy array
|
||
|
|
||
|
Returns
|
||
|
is_numpy (bool): Returns True if it is a numpy array, else False if it isn't
|
||
|
"""
|
||
|
|
||
|
is_numpy = type(py_obj) in (type(np.array([1])), type(np.ma.array([1])))
|
||
|
|
||
|
return is_numpy
|
||
|
|
||
|
|
||
|
def _dump(py_obj, h_group, call_id=0, **kwargs):
|
||
|
""" Dump a python object to a group within a HDF5 file.
|
||
|
|
||
|
This function is called recursively by the main dump() function.
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to dump.
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
"""
|
||
|
|
||
|
dumpable_dtypes = set([bool, int, float, long, complex, str, unicode])
|
||
|
|
||
|
# Firstly, check if item is a numpy array. If so, just dump it.
|
||
|
if check_is_numpy_array(py_obj):
|
||
|
create_hkl_dataset(py_obj, h_group, call_id, **kwargs)
|
||
|
|
||
|
# next, check if item is iterable
|
||
|
elif check_is_iterable(py_obj):
|
||
|
item_type = check_iterable_item_type(py_obj)
|
||
|
|
||
|
# item_type == False implies multiple types. Create a dataset
|
||
|
if item_type is False:
|
||
|
h_subgroup = create_hkl_group(py_obj, h_group, call_id)
|
||
|
for ii, py_subobj in enumerate(py_obj):
|
||
|
_dump(py_subobj, h_subgroup, call_id=ii, **kwargs)
|
||
|
|
||
|
# otherwise, subitems have same type. Check if subtype is an iterable
|
||
|
# (e.g. list of lists), or not (e.g. list of ints, which should be treated
|
||
|
# as a single dataset).
|
||
|
else:
|
||
|
if item_type in dumpable_dtypes:
|
||
|
create_hkl_dataset(py_obj, h_group, call_id, **kwargs)
|
||
|
else:
|
||
|
h_subgroup = create_hkl_group(py_obj, h_group, call_id)
|
||
|
for ii, py_subobj in enumerate(py_obj):
|
||
|
#print py_subobj, h_subgroup, ii
|
||
|
_dump(py_subobj, h_subgroup, call_id=ii, **kwargs)
|
||
|
|
||
|
# item is not iterable, so create a dataset for it
|
||
|
else:
|
||
|
create_hkl_dataset(py_obj, h_group, call_id, **kwargs)
|
||
|
|
||
|
|
||
|
def dump(py_obj, file_obj, mode='w', track_times=True, path='/', **kwargs):
|
||
|
""" Write a pickled representation of obj to the open file object file.
|
||
|
|
||
|
Args:
|
||
|
obj (object): python object o store in a Hickle
|
||
|
file: file object, filename string, or h5py.File object
|
||
|
file in which to store the object. A h5py.File or a filename is also
|
||
|
acceptable.
|
||
|
mode (str): optional argument, 'r' (read only), 'w' (write) or 'a' (append).
|
||
|
Ignored if file is a file object.
|
||
|
compression (str): optional argument. Applies compression to dataset. Options: None, gzip,
|
||
|
lzf (+ szip, if installed)
|
||
|
track_times (bool): optional argument. If set to False, repeated hickling will produce
|
||
|
identical files.
|
||
|
path (str): path within hdf5 file to save data to. Defaults to root /
|
||
|
"""
|
||
|
|
||
|
try:
|
||
|
# Open the file
|
||
|
h5f = file_opener(file_obj, mode, track_times)
|
||
|
h5f.attrs["CLASS"] = 'hickle'
|
||
|
h5f.attrs["VERSION"] = 2
|
||
|
h5f.attrs["type"] = ['hickle']
|
||
|
|
||
|
h_root_group = h5f.get(path)
|
||
|
|
||
|
if h_root_group is None:
|
||
|
h_root_group = h5f.create_group(path)
|
||
|
h_root_group.attrs["type"] = ['hickle']
|
||
|
|
||
|
_dump(py_obj, h_root_group, **kwargs)
|
||
|
h5f.close()
|
||
|
except NoMatchError:
|
||
|
fname = h5f.filename
|
||
|
h5f.close()
|
||
|
try:
|
||
|
os.remove(fname)
|
||
|
except OSError:
|
||
|
warnings.warn("Dump failed. Could not remove %s" % fname)
|
||
|
finally:
|
||
|
raise NoMatchError
|
||
|
|
||
|
|
||
|
def create_dataset_lookup(py_obj):
|
||
|
""" What type of object are we trying to pickle? This is a python
|
||
|
dictionary based equivalent of a case statement. It returns the correct
|
||
|
helper function for a given data type.
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to look-up what function to use to dump to disk
|
||
|
|
||
|
Returns:
|
||
|
match: function that should be used to dump data to a new dataset
|
||
|
"""
|
||
|
t = type(py_obj)
|
||
|
|
||
|
types = {
|
||
|
dict: create_dict_dataset,
|
||
|
list: create_listlike_dataset,
|
||
|
tuple: create_listlike_dataset,
|
||
|
set: create_listlike_dataset,
|
||
|
str: create_stringlike_dataset,
|
||
|
unicode: create_stringlike_dataset,
|
||
|
int: create_python_dtype_dataset,
|
||
|
float: create_python_dtype_dataset,
|
||
|
long: create_python_dtype_dataset,
|
||
|
bool: create_python_dtype_dataset,
|
||
|
complex: create_python_dtype_dataset,
|
||
|
NoneType: create_none_dataset,
|
||
|
np.ndarray: create_np_array_dataset,
|
||
|
np.ma.core.MaskedArray: create_np_array_dataset,
|
||
|
np.float16: create_np_dtype_dataset,
|
||
|
np.float32: create_np_dtype_dataset,
|
||
|
np.float64: create_np_dtype_dataset,
|
||
|
np.int8: create_np_dtype_dataset,
|
||
|
np.int16: create_np_dtype_dataset,
|
||
|
np.int32: create_np_dtype_dataset,
|
||
|
np.int64: create_np_dtype_dataset,
|
||
|
np.uint8: create_np_dtype_dataset,
|
||
|
np.uint16: create_np_dtype_dataset,
|
||
|
np.uint32: create_np_dtype_dataset,
|
||
|
np.uint64: create_np_dtype_dataset,
|
||
|
np.complex64: create_np_dtype_dataset,
|
||
|
np.complex128: create_np_dtype_dataset
|
||
|
}
|
||
|
|
||
|
match = types.get(t, no_match)
|
||
|
return match
|
||
|
|
||
|
|
||
|
def create_hkl_dataset(py_obj, h_group, call_id=0, **kwargs):
|
||
|
""" Create a dataset within the hickle HDF5 file
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to dump.
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
|
||
|
"""
|
||
|
#lookup dataset creator type based on python object type
|
||
|
create_dataset = create_dataset_lookup(py_obj)
|
||
|
|
||
|
# do the creation
|
||
|
create_dataset(py_obj, h_group, call_id, **kwargs)
|
||
|
|
||
|
|
||
|
def create_hkl_group(py_obj, h_group, call_id=0):
|
||
|
""" Create a new group within the hickle file
|
||
|
|
||
|
Args:
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
|
||
|
"""
|
||
|
h_subgroup = h_group.create_group('data_%i' % call_id)
|
||
|
h_subgroup.attrs["type"] = [str(type(py_obj))]
|
||
|
return h_subgroup
|
||
|
|
||
|
|
||
|
def create_listlike_dataset(py_obj, h_group, call_id=0, **kwargs):
|
||
|
""" Dumper for list, set, tuple
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to dump; should be list-like
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
"""
|
||
|
dtype = str(type(py_obj))
|
||
|
obj = list(py_obj)
|
||
|
d = h_group.create_dataset('data_%i' % call_id, data=obj, **kwargs)
|
||
|
d.attrs["type"] = [dtype]
|
||
|
|
||
|
|
||
|
def create_np_dtype_dataset(py_obj, h_group, call_id=0, **kwargs):
|
||
|
""" dumps an np dtype object to h5py file
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to dump; should be a numpy scalar, e.g. np.float16(1)
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
"""
|
||
|
d = h_group.create_dataset('data_%i' % call_id, data=py_obj, **kwargs)
|
||
|
d.attrs["type"] = ['np_dtype']
|
||
|
d.attrs["np_dtype"] = str(d.dtype)
|
||
|
|
||
|
|
||
|
def create_python_dtype_dataset(py_obj, h_group, call_id=0, **kwargs):
|
||
|
""" dumps a python dtype object to h5py file
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to dump; should be a python type (int, float, bool etc)
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
"""
|
||
|
d = h_group.create_dataset('data_%i' % call_id, data=py_obj,
|
||
|
dtype=type(py_obj), **kwargs)
|
||
|
d.attrs["type"] = ['python_dtype']
|
||
|
d.attrs['python_subdtype'] = str(type(py_obj))
|
||
|
|
||
|
|
||
|
def create_dict_dataset(py_obj, h_group, call_id=0, **kwargs):
|
||
|
""" Creates a data group for each key in dictionary
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to dump; should be dictionary
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
"""
|
||
|
h_dictgroup = h_group.create_group('data_%i' % call_id)
|
||
|
h_dictgroup.attrs["type"] = ['dict']
|
||
|
for key, py_subobj in py_obj.items():
|
||
|
h_subgroup = h_dictgroup.create_group(key)
|
||
|
h_subgroup.attrs["type"] = ['dict_item']
|
||
|
_dump(py_subobj, h_subgroup, call_id=0, **kwargs)
|
||
|
|
||
|
|
||
|
def create_np_array_dataset(py_obj, h_group, call_id=0, **kwargs):
|
||
|
""" dumps an ndarray object to h5py file
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to dump; should be a numpy array or np.ma.array (masked)
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
"""
|
||
|
if isinstance(py_obj, type(np.ma.array([1]))):
|
||
|
d = h_group.create_dataset('data_%i' % call_id, data=py_obj, **kwargs)
|
||
|
#m = h_group.create_dataset('mask_%i' % call_id, data=py_obj.mask, **kwargs)
|
||
|
m = h_group.create_dataset('data_%i_mask' % call_id, data=py_obj.mask, **kwargs)
|
||
|
d.attrs["type"] = ['ndarray_masked_data']
|
||
|
m.attrs["type"] = ['ndarray_masked_mask']
|
||
|
else:
|
||
|
d = h_group.create_dataset('data_%i' % call_id, data=py_obj, **kwargs)
|
||
|
d.attrs["type"] = ['ndarray']
|
||
|
|
||
|
|
||
|
def create_stringlike_dataset(py_obj, h_group, call_id=0, **kwargs):
|
||
|
""" dumps a list object to h5py file
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to dump; should be string-like (unicode or string)
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
"""
|
||
|
if isinstance(py_obj, str):
|
||
|
d = h_group.create_dataset('data_%i' % call_id, data=[py_obj], **kwargs)
|
||
|
d.attrs["type"] = ['string']
|
||
|
else:
|
||
|
dt = h5.special_dtype(vlen=unicode)
|
||
|
dset = h_group.create_dataset('data_%i' % call_id, shape=(1, ), dtype=dt, **kwargs)
|
||
|
dset[0] = py_obj
|
||
|
dset.attrs['type'] = ['unicode']
|
||
|
|
||
|
|
||
|
def create_none_dataset(py_obj, h_group, call_id=0, **kwargs):
|
||
|
""" Dump None type to file
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to dump; must be None object
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
"""
|
||
|
d = h_group.create_dataset('data_%i' % call_id, data=[0], **kwargs)
|
||
|
d.attrs["type"] = ['none']
|
||
|
|
||
|
|
||
|
def no_match(py_obj, h_group, call_id=0, **kwargs):
|
||
|
""" If no match is made, raise an exception
|
||
|
|
||
|
Args:
|
||
|
py_obj: python object to dump; default if item is not matched.
|
||
|
h_group (h5.File.group): group to dump data into.
|
||
|
call_id (int): index to identify object's relative location in the iterable.
|
||
|
"""
|
||
|
try:
|
||
|
import dill as cPickle
|
||
|
except ImportError:
|
||
|
import cPickle
|
||
|
|
||
|
pickled_obj = cPickle.dumps(py_obj)
|
||
|
d = h_group.create_dataset('data_%i' % call_id, data=[pickled_obj])
|
||
|
d.attrs["type"] = ['pickle']
|
||
|
|
||
|
warnings.warn("%s type not understood, data have been "
|
||
|
"serialized" % type(py_obj))
|
||
|
|
||
|
|
||
|
#############
|
||
|
## LOADERS ##
|
||
|
#############
|
||
|
|
||
|
class PyContainer(list):
|
||
|
""" A group-like object into which to load datasets.
|
||
|
|
||
|
In order to build up a tree-like structure, we need to be able
|
||
|
to load datasets into a container with an append() method.
|
||
|
Python tuples and sets do not allow this. This class provides
|
||
|
a list-like object that be converted into a list, tuple, set or dict.
|
||
|
"""
|
||
|
def __init__(self):
|
||
|
super(PyContainer, self).__init__()
|
||
|
self.container_type = None
|
||
|
self.name = None
|
||
|
|
||
|
def convert(self):
|
||
|
""" Convert from PyContainer to python core data type.
|
||
|
|
||
|
Returns: self, either as a list, tuple, set or dict
|
||
|
"""
|
||
|
if self.container_type == "<type 'list'>":
|
||
|
return list(self)
|
||
|
if self.container_type == "<type 'tuple'>":
|
||
|
return tuple(self)
|
||
|
if self.container_type == "<type 'set'>":
|
||
|
return set(self)
|
||
|
if self.container_type == "dict":
|
||
|
keys = [str(item.name.split('/')[-1]) for item in self]
|
||
|
items = [item[0] for item in self]
|
||
|
return dict(zip(keys, items))
|
||
|
else:
|
||
|
return self
|
||
|
|
||
|
|
||
|
def load(fileobj, path='/', safe=True):
|
||
|
""" Load a hickle file and reconstruct a python object
|
||
|
|
||
|
Args:
|
||
|
fileobj: file object, h5py.File, or filename string
|
||
|
safe (bool): Disable automatic depickling of arbitrary python objects.
|
||
|
DO NOT set this to False unless the file is from a trusted source.
|
||
|
(see http://www.cs.jhu.edu/~s/musings/pickle.html for an explanation)
|
||
|
|
||
|
path (str): path within hdf5 file to save data to. Defaults to root /
|
||
|
"""
|
||
|
|
||
|
try:
|
||
|
h5f = file_opener(fileobj)
|
||
|
h_root_group = h5f.get(path)
|
||
|
|
||
|
try:
|
||
|
assert 'CLASS' in h5f.attrs.keys()
|
||
|
assert 'VERSION' in h5f.attrs.keys()
|
||
|
py_container = PyContainer()
|
||
|
py_container.container_type = 'hickle'
|
||
|
py_container = _load(py_container, h_root_group)
|
||
|
return py_container[0][0]
|
||
|
except AssertionError:
|
||
|
import hickle_legacy
|
||
|
return hickle_legacy.load(fileobj, safe)
|
||
|
finally:
|
||
|
if 'h5f' in locals():
|
||
|
h5f.close()
|
||
|
|
||
|
|
||
|
def load_dataset(h_node):
|
||
|
""" Load a dataset, converting into its correct python type
|
||
|
|
||
|
Args:
|
||
|
h_node (h5py dataset): h5py dataset object to read
|
||
|
|
||
|
Returns:
|
||
|
data: reconstructed python object from loaded data
|
||
|
"""
|
||
|
py_type = h_node.attrs["type"][0]
|
||
|
|
||
|
if h_node.shape == ():
|
||
|
data = h_node.value
|
||
|
else:
|
||
|
data = h_node[:]
|
||
|
|
||
|
if py_type == "<type 'list'>":
|
||
|
#print self.name
|
||
|
return list(data)
|
||
|
elif py_type == "<type 'tuple'>":
|
||
|
return tuple(data)
|
||
|
elif py_type == "<type 'set'>":
|
||
|
return set(data)
|
||
|
elif py_type == "np_dtype":
|
||
|
subtype = h_node.attrs["np_dtype"]
|
||
|
data = np.array(data, dtype=subtype)
|
||
|
return data
|
||
|
elif py_type == 'ndarray':
|
||
|
return np.array(data)
|
||
|
elif py_type == 'ndarray_masked_data':
|
||
|
try:
|
||
|
mask_path = h_node.name + "_mask"
|
||
|
h_root = h_node.parent
|
||
|
mask = h_root.get(mask_path)[:]
|
||
|
except IndexError:
|
||
|
mask = h_root.get(mask_path)
|
||
|
except ValueError:
|
||
|
mask = h_root.get(mask_path)
|
||
|
data = np.ma.array(data, mask=mask)
|
||
|
return data
|
||
|
elif py_type == 'python_dtype':
|
||
|
subtype = h_node.attrs["python_subdtype"]
|
||
|
type_dict = {
|
||
|
"<type 'int'>": int,
|
||
|
"<type 'float'>": float,
|
||
|
"<type 'long'>": long,
|
||
|
"<type 'bool'>": bool,
|
||
|
"<type 'complex'>": complex
|
||
|
}
|
||
|
tcast = type_dict.get(subtype)
|
||
|
return tcast(data)
|
||
|
elif py_type == 'string':
|
||
|
return str(data[0])
|
||
|
elif py_type == 'unicode':
|
||
|
return unicode(data[0])
|
||
|
elif py_type == 'none':
|
||
|
return None
|
||
|
else:
|
||
|
print(h_node.name, py_type, h_node.attrs.keys())
|
||
|
return data
|
||
|
|
||
|
|
||
|
def sort_keys(key_list):
|
||
|
""" Take a list of strings and sort it by integer value within string
|
||
|
|
||
|
Args:
|
||
|
key_list (list): List of keys
|
||
|
|
||
|
Returns:
|
||
|
key_list_sorted (list): List of keys, sorted by integer
|
||
|
"""
|
||
|
to_int = lambda x: int(re.search('\d+', x).group(0))
|
||
|
keys_by_int = sorted([(to_int(key), key) for key in key_list])
|
||
|
return [ii[1] for ii in keys_by_int]
|
||
|
|
||
|
|
||
|
def _load(py_container, h_group):
|
||
|
""" Load a hickle file
|
||
|
|
||
|
Recursive funnction to load hdf5 data into a PyContainer()
|
||
|
|
||
|
Args:
|
||
|
py_container (PyContainer): Python container to load data into
|
||
|
h_group (h5 group or dataset): h5py object, group or dataset, to spider
|
||
|
and load all datasets.
|
||
|
"""
|
||
|
|
||
|
group_dtype = h5._hl.group.Group
|
||
|
dataset_dtype = h5._hl.dataset.Dataset
|
||
|
|
||
|
#either a file, group, or dataset
|
||
|
if isinstance(h_group, H5FileWrapper) or isinstance(h_group, group_dtype):
|
||
|
py_subcontainer = PyContainer()
|
||
|
py_subcontainer.container_type = h_group.attrs['type'][0]
|
||
|
py_subcontainer.name = h_group.name
|
||
|
|
||
|
if py_subcontainer.container_type != 'dict':
|
||
|
h_keys = sort_keys(h_group.keys())
|
||
|
else:
|
||
|
h_keys = h_group.keys()
|
||
|
|
||
|
for h_name in h_keys:
|
||
|
h_node = h_group[h_name]
|
||
|
py_subcontainer = _load(py_subcontainer, h_node)
|
||
|
|
||
|
sub_data = py_subcontainer.convert()
|
||
|
py_container.append(sub_data)
|
||
|
|
||
|
else:
|
||
|
# must be a dataset
|
||
|
subdata = load_dataset(h_group)
|
||
|
py_container.append(subdata)
|
||
|
|
||
|
#print h_group.name, py_container
|
||
|
return py_container
|