You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

241 lines
7.3 KiB

4 years ago
  1. """
  2. #lookup.py
  3. This file contains all the mappings between hickle/HDF5 metadata and python types.
  4. There are four dictionaries and one set that are populated here:
  5. 1) types_dict
  6. types_dict: mapping between python types and dataset creation functions, e.g.
  7. types_dict = {
  8. list: create_listlike_dataset,
  9. int: create_python_dtype_dataset,
  10. np.ndarray: create_np_array_dataset
  11. }
  12. 2) hkl_types_dict
  13. hkl_types_dict: mapping between hickle metadata and dataset loading functions, e.g.
  14. hkl_types_dict = {
  15. "<type 'list'>" : load_list_dataset,
  16. "<type 'tuple'>" : load_tuple_dataset
  17. }
  18. 3) container_types_dict
  19. container_types_dict: mapping required to convert the PyContainer object in hickle.py
  20. back into the required native type. PyContainer is required as
  21. some iterable types are immutable (do not have an append() function).
  22. Here is an example:
  23. container_types_dict = {
  24. "<type 'list'>": list,
  25. "<type 'tuple'>": tuple
  26. }
  27. 4) container_key_types_dict
  28. container_key_types_dict: mapping specifically for converting hickled dict data back into
  29. a dictionary with the same key type. While python dictionary keys
  30. can be any hashable object, in HDF5 a unicode/string is required
  31. for a dataset name. Example:
  32. container_key_types_dict = {
  33. "<type 'str'>": str,
  34. "<type 'unicode'>": unicode
  35. }
  36. 5) types_not_to_sort
  37. type_not_to_sort is a list of hickle type attributes that may be hierarchical,
  38. but don't require sorting by integer index.
  39. ## Extending hickle to add support for other classes and types
  40. The process to add new load/dump capabilities is as follows:
  41. 1) Create a file called load_[newstuff].py in loaders/
  42. 2) In the load_[newstuff].py file, define your create_dataset and load_dataset functions,
  43. along with all required mapping dictionaries.
  44. 3) Add an import call here, and populate the lookup dictionaries with update() calls:
  45. # Add loaders for [newstuff]
  46. try:
  47. from .loaders.load_[newstuff[ import types_dict as ns_types_dict
  48. from .loaders.load_[newstuff[ import hkl_types_dict as ns_hkl_types_dict
  49. types_dict.update(ns_types_dict)
  50. hkl_types_dict.update(ns_hkl_types_dict)
  51. ... (Add container_types_dict etc if required)
  52. except ImportError:
  53. raise
  54. """
  55. import six
  56. import pkg_resources
  57. def return_first(x):
  58. """ Return first element of a list """
  59. return x[0]
  60. def load_nothing(h_hode):
  61. pass
  62. types_dict = {}
  63. hkl_types_dict = {}
  64. types_not_to_sort = [b'dict', b'csr_matrix', b'csc_matrix', b'bsr_matrix']
  65. container_types_dict = {
  66. b"<type 'list'>": list,
  67. b"<type 'tuple'>": tuple,
  68. b"<type 'set'>": set,
  69. b"<class 'list'>": list,
  70. b"<class 'tuple'>": tuple,
  71. b"<class 'set'>": set,
  72. b"csr_matrix": return_first,
  73. b"csc_matrix": return_first,
  74. b"bsr_matrix": return_first
  75. }
  76. # Technically, any hashable object can be used, for now sticking with built-in types
  77. container_key_types_dict = {
  78. b"<type 'str'>": str,
  79. b"<type 'float'>": float,
  80. b"<type 'bool'>": bool,
  81. b"<type 'int'>": int,
  82. b"<type 'complex'>": complex,
  83. b"<class 'str'>": str,
  84. b"<class 'float'>": float,
  85. b"<class 'bool'>": bool,
  86. b"<class 'int'>": int,
  87. b"<class 'complex'>": complex
  88. }
  89. if six.PY2:
  90. container_key_types_dict[b"<type 'unicode'>"] = unicode
  91. container_key_types_dict[b"<type 'long'>"] = long
  92. # Add loaders for built-in python types
  93. if six.PY2:
  94. from .loaders.load_python import types_dict as py_types_dict
  95. from .loaders.load_python import hkl_types_dict as py_hkl_types_dict
  96. else:
  97. from .loaders.load_python3 import types_dict as py_types_dict
  98. from .loaders.load_python3 import hkl_types_dict as py_hkl_types_dict
  99. types_dict.update(py_types_dict)
  100. hkl_types_dict.update(py_hkl_types_dict)
  101. # Add loaders for numpy types
  102. from .loaders.load_numpy import types_dict as np_types_dict
  103. from .loaders.load_numpy import hkl_types_dict as np_hkl_types_dict
  104. from .loaders.load_numpy import check_is_numpy_array
  105. types_dict.update(np_types_dict)
  106. hkl_types_dict.update(np_hkl_types_dict)
  107. #####################
  108. # ND-ARRAY checking #
  109. #####################
  110. ndarray_like_check_fns = [
  111. check_is_numpy_array
  112. ]
  113. def check_is_ndarray_like(py_obj):
  114. is_ndarray_like = False
  115. for ii, check_fn in enumerate(ndarray_like_check_fns):
  116. is_ndarray_like = check_fn(py_obj)
  117. if is_ndarray_like:
  118. break
  119. return is_ndarray_like
  120. #####################
  121. # loading optional #
  122. #####################
  123. def register_class(myclass_type, hkl_str, dump_function, load_function,
  124. to_sort=True, ndarray_check_fn=None):
  125. """ Register a new hickle class.
  126. Args:
  127. myclass_type type(class): type of class
  128. dump_function (function def): function to write data to HDF5
  129. load_function (function def): function to load data from HDF5
  130. hkl_str (str): String to write to HDF5 file to describe class
  131. to_sort (bool): If the item is iterable, does it require sorting?
  132. ndarray_check_fn (function def): function to use to check if
  133. """
  134. types_dict.update({myclass_type: dump_function})
  135. hkl_types_dict.update({hkl_str: load_function})
  136. if to_sort == False:
  137. types_not_to_sort.append(hkl_str)
  138. if ndarray_check_fn is not None:
  139. ndarray_like_check_fns.append(ndarray_check_fn)
  140. def register_class_list(class_list):
  141. """ Register multiple classes in a list
  142. Args:
  143. class_list (list): A list, where each item is an argument to
  144. the register_class() function.
  145. Notes: This just runs the code:
  146. for item in mylist:
  147. register_class(*item)
  148. """
  149. for class_item in class_list:
  150. register_class(*class_item)
  151. def register_class_exclude(hkl_str_to_ignore):
  152. """ Tell loading funciton to ignore any HDF5 dataset with attribute 'type=XYZ'
  153. Args:
  154. hkl_str_to_ignore (str): attribute type=string to ignore and exclude from loading.
  155. """
  156. hkl_types_dict[hkl_str_to_ignore] = load_nothing
  157. def register_exclude_list(exclude_list):
  158. """ Ignore HDF5 datasets with attribute type='XYZ' from loading
  159. ArgsL
  160. exclude_list (list): List of strings, which correspond to hdf5/hickle
  161. type= attributes not to load.
  162. """
  163. for hkl_str in exclude_list:
  164. register_class_exclude(hkl_str)
  165. ######################
  166. # Scipy sparse array #
  167. ######################
  168. try:
  169. pkg_resources.require('hickle[scipy]')
  170. from .loaders.load_scipy import class_register, exclude_register
  171. register_class_list(class_register)
  172. register_exclude_list(exclude_register)
  173. except pkg_resources.DistributionNotFound:
  174. pass
  175. ##################
  176. # Astropy stuff #
  177. ##################
  178. try:
  179. pkg_resources.require('hickle[astropy]')
  180. from .loaders.load_astropy import class_register
  181. register_class_list(class_register)
  182. except pkg_resources.DistributionNotFound:
  183. pass
  184. ################
  185. # Pandas stuff #
  186. ################
  187. try:
  188. pkg_resources.require('hickle[pandas]')
  189. from .loaders.load_pandas import class_register
  190. register_class_list(class_register)
  191. except pkg_resources.DistributionNotFound:
  192. pass