laywerrobot/lib/python3.6/site-packages/hickle/loaders/load_python3.py
2020-08-27 21:55:39 +02:00

201 lines
6.2 KiB
Python

# encoding: utf-8
"""
# load_python.py
Handlers for dumping and loading built-in python types.
NB: As these are for built-in types, they are critical to the functioning of hickle.
"""
import six
from hickle.helpers import get_type_and_data
try:
from exceptions import Exception
except ImportError:
pass # above imports will fail in python3
try:
ModuleNotFoundError # This fails on Py3.5 and below
except NameError:
ModuleNotFoundError = ImportError
import h5py as h5
def get_py3_string_type(h_node):
""" Helper function to return the python string type for items in a list.
Notes:
Py3 string handling is a bit funky and doesn't play too nicely with HDF5.
We needed to add metadata to say if the strings in a list started off as
bytes, string, etc. This helper loads
"""
try:
py_type = h_node.attrs["py3_string_type"][0]
return py_type
except:
return None
def create_listlike_dataset(py_obj, h_group, call_id=0, **kwargs):
""" Dumper for list, set, tuple
Args:
py_obj: python object to dump; should be list-like
h_group (h5.File.group): group to dump data into.
call_id (int): index to identify object's relative location in the iterable.
"""
dtype = str(type(py_obj))
obj = list(py_obj)
# h5py does not handle Py3 'str' objects well. Need to catch this
# Only need to check first element as this method
# is only called if all elements have same dtype
py3_str_type = None
if type(obj[0]) in (str, bytes):
py3_str_type = bytes(str(type(obj[0])), 'ascii')
if type(obj[0]) is str:
#print(py3_str_type)
#print(obj, "HERE")
obj = [bytes(oo, 'utf8') for oo in obj]
#print(obj, "HERE")
d = h_group.create_dataset('data_%i' % call_id, data=obj, **kwargs)
d.attrs["type"] = [bytes(dtype, 'ascii')]
# Need to add some metadata to aid in unpickling if it's a string type
if py3_str_type is not None:
d.attrs["py3_string_type"] = [py3_str_type]
def create_python_dtype_dataset(py_obj, h_group, call_id=0, **kwargs):
""" dumps a python dtype object to h5py file
Args:
py_obj: python object to dump; should be a python type (int, float, bool etc)
h_group (h5.File.group): group to dump data into.
call_id (int): index to identify object's relative location in the iterable.
"""
# kwarg compression etc does not work on scalars
d = h_group.create_dataset('data_%i' % call_id, data=py_obj,
dtype=type(py_obj)) #, **kwargs)
d.attrs["type"] = [b'python_dtype']
d.attrs['python_subdtype'] = bytes(str(type(py_obj)), 'ascii')
def create_stringlike_dataset(py_obj, h_group, call_id=0, **kwargs):
""" dumps a list object to h5py file
Args:
py_obj: python object to dump; should be string-like (unicode or string)
h_group (h5.File.group): group to dump data into.
call_id (int): index to identify object's relative location in the iterable.
"""
if isinstance(py_obj, bytes):
d = h_group.create_dataset('data_%i' % call_id, data=[py_obj], **kwargs)
d.attrs["type"] = [b'bytes']
elif isinstance(py_obj, str):
dt = h5.special_dtype(vlen=str)
dset = h_group.create_dataset('data_%i' % call_id, shape=(1, ), dtype=dt, **kwargs)
dset[0] = py_obj
dset.attrs['type'] = [b'string']
def create_none_dataset(py_obj, h_group, call_id=0, **kwargs):
""" Dump None type to file
Args:
py_obj: python object to dump; must be None object
h_group (h5.File.group): group to dump data into.
call_id (int): index to identify object's relative location in the iterable.
"""
d = h_group.create_dataset('data_%i' % call_id, data=[0], **kwargs)
d.attrs["type"] = [b'none']
def load_list_dataset(h_node):
py_type, data = get_type_and_data(h_node)
py3_str_type = get_py3_string_type(h_node)
if py3_str_type == b"<class 'bytes'>":
# Yuck. Convert numpy._bytes -> str -> bytes
return [bytes(str(item, 'utf8'), 'utf8') for item in data]
if py3_str_type == b"<class 'str'>":
return [str(item, 'utf8') for item in data]
else:
return list(data)
def load_tuple_dataset(h_node):
data = load_list_dataset(h_node)
return tuple(data)
def load_set_dataset(h_node):
data = load_list_dataset(h_node)
return set(data)
def load_bytes_dataset(h_node):
py_type, data = get_type_and_data(h_node)
return bytes(data[0])
def load_string_dataset(h_node):
py_type, data = get_type_and_data(h_node)
return str(data[0])
def load_unicode_dataset(h_node):
py_type, data = get_type_and_data(h_node)
return unicode(data[0])
def load_none_dataset(h_node):
return None
def load_pickled_data(h_node):
py_type, data = get_type_and_data(h_node)
try:
import cPickle as pickle
except ModuleNotFoundError:
import pickle
return pickle.loads(data[0])
def load_python_dtype_dataset(h_node):
py_type, data = get_type_and_data(h_node)
subtype = h_node.attrs["python_subdtype"]
type_dict = {
b"<class 'int'>": int,
b"<class 'float'>": float,
b"<class 'bool'>": bool,
b"<class 'complex'>": complex
}
tcast = type_dict.get(subtype)
return tcast(data)
types_dict = {
list: create_listlike_dataset,
tuple: create_listlike_dataset,
set: create_listlike_dataset,
bytes: create_stringlike_dataset,
str: create_stringlike_dataset,
#bytearray: create_stringlike_dataset,
int: create_python_dtype_dataset,
float: create_python_dtype_dataset,
bool: create_python_dtype_dataset,
complex: create_python_dtype_dataset,
type(None): create_none_dataset,
}
hkl_types_dict = {
b"<class 'list'>" : load_list_dataset,
b"<class 'tuple'>" : load_tuple_dataset,
b"<class 'set'>" : load_set_dataset,
b"bytes" : load_bytes_dataset,
b"python_dtype" : load_python_dtype_dataset,
b"string" : load_string_dataset,
b"pickle" : load_pickled_data,
b"none" : load_none_dataset,
}