You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

197 lines
6.4 KiB

4 years ago
  1. """Utilities to manipulate JSON objects."""
  2. # Copyright (c) IPython Development Team.
  3. # Distributed under the terms of the Modified BSD License.
  4. from binascii import b2a_base64
  5. import math
  6. import re
  7. import types
  8. from datetime import datetime
  9. import numbers
  10. from ipython_genutils import py3compat
  11. from ipython_genutils.py3compat import unicode_type, iteritems
  12. from ipython_genutils.encoding import DEFAULT_ENCODING
  13. next_attr_name = '__next__' if py3compat.PY3 else 'next'
  14. #-----------------------------------------------------------------------------
  15. # Globals and constants
  16. #-----------------------------------------------------------------------------
  17. # timestamp formats
  18. ISO8601 = "%Y-%m-%dT%H:%M:%S.%f"
  19. ISO8601_PAT=re.compile(r"^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})(\.\d{1,6})?Z?([\+\-]\d{2}:?\d{2})?$")
  20. # holy crap, strptime is not threadsafe.
  21. # Calling it once at import seems to help.
  22. datetime.strptime("1", "%d")
  23. #-----------------------------------------------------------------------------
  24. # Classes and functions
  25. #-----------------------------------------------------------------------------
  26. # constants for identifying png/jpeg data
  27. PNG = b'\x89PNG\r\n\x1a\n'
  28. # front of PNG base64-encoded
  29. PNG64 = b'iVBORw0KG'
  30. JPEG = b'\xff\xd8'
  31. # front of JPEG base64-encoded
  32. JPEG64 = b'/9'
  33. # constants for identifying gif data
  34. GIF_64 = b'R0lGODdh'
  35. GIF89_64 = b'R0lGODlh'
  36. # front of PDF base64-encoded
  37. PDF64 = b'JVBER'
  38. def encode_images(format_dict):
  39. """b64-encodes images in a displaypub format dict
  40. Perhaps this should be handled in json_clean itself?
  41. Parameters
  42. ----------
  43. format_dict : dict
  44. A dictionary of display data keyed by mime-type
  45. Returns
  46. -------
  47. format_dict : dict
  48. A copy of the same dictionary,
  49. but binary image data ('image/png', 'image/jpeg' or 'application/pdf')
  50. is base64-encoded.
  51. """
  52. # no need for handling of ambiguous bytestrings on Python 3,
  53. # where bytes objects always represent binary data and thus
  54. # base64-encoded.
  55. if py3compat.PY3:
  56. return format_dict
  57. encoded = format_dict.copy()
  58. pngdata = format_dict.get('image/png')
  59. if isinstance(pngdata, bytes):
  60. # make sure we don't double-encode
  61. if not pngdata.startswith(PNG64):
  62. pngdata = b2a_base64(pngdata)
  63. encoded['image/png'] = pngdata.decode('ascii')
  64. jpegdata = format_dict.get('image/jpeg')
  65. if isinstance(jpegdata, bytes):
  66. # make sure we don't double-encode
  67. if not jpegdata.startswith(JPEG64):
  68. jpegdata = b2a_base64(jpegdata)
  69. encoded['image/jpeg'] = jpegdata.decode('ascii')
  70. gifdata = format_dict.get('image/gif')
  71. if isinstance(gifdata, bytes):
  72. # make sure we don't double-encode
  73. if not gifdata.startswith((GIF_64, GIF89_64)):
  74. gifdata = b2a_base64(gifdata)
  75. encoded['image/gif'] = gifdata.decode('ascii')
  76. pdfdata = format_dict.get('application/pdf')
  77. if isinstance(pdfdata, bytes):
  78. # make sure we don't double-encode
  79. if not pdfdata.startswith(PDF64):
  80. pdfdata = b2a_base64(pdfdata)
  81. encoded['application/pdf'] = pdfdata.decode('ascii')
  82. return encoded
  83. def json_clean(obj):
  84. """Clean an object to ensure it's safe to encode in JSON.
  85. Atomic, immutable objects are returned unmodified. Sets and tuples are
  86. converted to lists, lists are copied and dicts are also copied.
  87. Note: dicts whose keys could cause collisions upon encoding (such as a dict
  88. with both the number 1 and the string '1' as keys) will cause a ValueError
  89. to be raised.
  90. Parameters
  91. ----------
  92. obj : any python object
  93. Returns
  94. -------
  95. out : object
  96. A version of the input which will not cause an encoding error when
  97. encoded as JSON. Note that this function does not *encode* its inputs,
  98. it simply sanitizes it so that there will be no encoding errors later.
  99. """
  100. # types that are 'atomic' and ok in json as-is.
  101. atomic_ok = (unicode_type, type(None))
  102. # containers that we need to convert into lists
  103. container_to_list = (tuple, set, types.GeneratorType)
  104. # Since bools are a subtype of Integrals, which are a subtype of Reals,
  105. # we have to check them in that order.
  106. if isinstance(obj, bool):
  107. return obj
  108. if isinstance(obj, numbers.Integral):
  109. # cast int to int, in case subclasses override __str__ (e.g. boost enum, #4598)
  110. return int(obj)
  111. if isinstance(obj, numbers.Real):
  112. # cast out-of-range floats to their reprs
  113. if math.isnan(obj) or math.isinf(obj):
  114. return repr(obj)
  115. return float(obj)
  116. if isinstance(obj, atomic_ok):
  117. return obj
  118. if isinstance(obj, bytes):
  119. if py3compat.PY3:
  120. # unanmbiguous binary data is base64-encoded
  121. # (this probably should have happened upstream)
  122. return b2a_base64(obj).decode('ascii')
  123. else:
  124. # Python 2 bytestr is ambiguous,
  125. # needs special handling for possible binary bytestrings.
  126. # imperfect workaround: if ascii, assume text.
  127. # otherwise assume binary, base64-encode (py3 behavior).
  128. try:
  129. return obj.decode('ascii')
  130. except UnicodeDecodeError:
  131. return b2a_base64(obj).decode('ascii')
  132. if isinstance(obj, container_to_list) or (
  133. hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)):
  134. obj = list(obj)
  135. if isinstance(obj, list):
  136. return [json_clean(x) for x in obj]
  137. if isinstance(obj, dict):
  138. # First, validate that the dict won't lose data in conversion due to
  139. # key collisions after stringification. This can happen with keys like
  140. # True and 'true' or 1 and '1', which collide in JSON.
  141. nkeys = len(obj)
  142. nkeys_collapsed = len(set(map(unicode_type, obj)))
  143. if nkeys != nkeys_collapsed:
  144. raise ValueError('dict cannot be safely converted to JSON: '
  145. 'key collision would lead to dropped values')
  146. # If all OK, proceed by making the new dict that will be json-safe
  147. out = {}
  148. for k,v in iteritems(obj):
  149. out[unicode_type(k)] = json_clean(v)
  150. return out
  151. if isinstance(obj, datetime):
  152. return obj.strftime(ISO8601)
  153. # we don't understand it, it's probably an unserializable object
  154. raise ValueError("Can't clean for JSON: %r" % obj)