You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

249 lines
8.3 KiB

4 years ago
  1. """
  2. SAX-based adapter to copy trees from/to the Python standard library.
  3. Use the `ElementTreeContentHandler` class to build an ElementTree from
  4. SAX events.
  5. Use the `ElementTreeProducer` class or the `saxify()` function to fire
  6. the SAX events of an ElementTree against a SAX ContentHandler.
  7. See
  8. """
  9. from xml.sax.handler import ContentHandler
  10. from lxml import etree
  11. from lxml.etree import ElementTree, SubElement
  12. from lxml.etree import Comment, ProcessingInstruction
  13. class SaxError(etree.LxmlError):
  14. """General SAX error.
  15. """
  16. def _getNsTag(tag):
  17. if tag[0] == '{':
  18. return tuple(tag[1:].split('}', 1))
  19. else:
  20. return (None, tag)
  21. class ElementTreeContentHandler(ContentHandler):
  22. """Build an lxml ElementTree from SAX events.
  23. """
  24. def __init__(self, makeelement=None):
  25. ContentHandler.__init__(self)
  26. self._root = None
  27. self._root_siblings = []
  28. self._element_stack = []
  29. self._default_ns = None
  30. self._ns_mapping = { None : [None] }
  31. self._new_mappings = {}
  32. if makeelement is None:
  33. makeelement = etree.Element
  34. self._makeelement = makeelement
  35. def _get_etree(self):
  36. "Contains the generated ElementTree after parsing is finished."
  37. return ElementTree(self._root)
  38. etree = property(_get_etree, doc=_get_etree.__doc__)
  39. def setDocumentLocator(self, locator):
  40. pass
  41. def startDocument(self):
  42. pass
  43. def endDocument(self):
  44. pass
  45. def startPrefixMapping(self, prefix, uri):
  46. self._new_mappings[prefix] = uri
  47. try:
  48. self._ns_mapping[prefix].append(uri)
  49. except KeyError:
  50. self._ns_mapping[prefix] = [uri]
  51. if prefix is None:
  52. self._default_ns = uri
  53. def endPrefixMapping(self, prefix):
  54. ns_uri_list = self._ns_mapping[prefix]
  55. ns_uri_list.pop()
  56. if prefix is None:
  57. self._default_ns = ns_uri_list[-1]
  58. def _buildTag(self, ns_name_tuple):
  59. ns_uri, local_name = ns_name_tuple
  60. if ns_uri:
  61. el_tag = "{%s}%s" % ns_name_tuple
  62. elif self._default_ns:
  63. el_tag = "{%s}%s" % (self._default_ns, local_name)
  64. else:
  65. el_tag = local_name
  66. return el_tag
  67. def startElementNS(self, ns_name, qname, attributes=None):
  68. el_name = self._buildTag(ns_name)
  69. if attributes:
  70. attrs = {}
  71. try:
  72. iter_attributes = attributes.iteritems()
  73. except AttributeError:
  74. iter_attributes = attributes.items()
  75. for name_tuple, value in iter_attributes:
  76. if name_tuple[0]:
  77. attr_name = "{%s}%s" % name_tuple
  78. else:
  79. attr_name = name_tuple[1]
  80. attrs[attr_name] = value
  81. else:
  82. attrs = None
  83. element_stack = self._element_stack
  84. if self._root is None:
  85. element = self._root = \
  86. self._makeelement(el_name, attrs, self._new_mappings)
  87. if self._root_siblings and hasattr(element, 'addprevious'):
  88. for sibling in self._root_siblings:
  89. element.addprevious(sibling)
  90. del self._root_siblings[:]
  91. else:
  92. element = SubElement(element_stack[-1], el_name,
  93. attrs, self._new_mappings)
  94. element_stack.append(element)
  95. self._new_mappings.clear()
  96. def processingInstruction(self, target, data):
  97. pi = ProcessingInstruction(target, data)
  98. if self._root is None:
  99. self._root_siblings.append(pi)
  100. else:
  101. self._element_stack[-1].append(pi)
  102. def endElementNS(self, ns_name, qname):
  103. element = self._element_stack.pop()
  104. el_tag = self._buildTag(ns_name)
  105. if el_tag != element.tag:
  106. raise SaxError("Unexpected element closed: " + el_tag)
  107. def startElement(self, name, attributes=None):
  108. if attributes:
  109. attributes = dict(
  110. [((None, k), v) for k, v in attributes.items()]
  111. )
  112. self.startElementNS((None, name), name, attributes)
  113. def endElement(self, name):
  114. self.endElementNS((None, name), name)
  115. def characters(self, data):
  116. last_element = self._element_stack[-1]
  117. try:
  118. # if there already is a child element, we must append to its tail
  119. last_element = last_element[-1]
  120. last_element.tail = (last_element.tail or '') + data
  121. except IndexError:
  122. # otherwise: append to the text
  123. last_element.text = (last_element.text or '') + data
  124. ignorableWhitespace = characters
  125. class ElementTreeProducer(object):
  126. """Produces SAX events for an element and children.
  127. """
  128. def __init__(self, element_or_tree, content_handler):
  129. try:
  130. element = element_or_tree.getroot()
  131. except AttributeError:
  132. element = element_or_tree
  133. self._element = element
  134. self._content_handler = content_handler
  135. from xml.sax.xmlreader import AttributesNSImpl as attr_class
  136. self._attr_class = attr_class
  137. self._empty_attributes = attr_class({}, {})
  138. def saxify(self):
  139. self._content_handler.startDocument()
  140. element = self._element
  141. if hasattr(element, 'getprevious'):
  142. siblings = []
  143. sibling = element.getprevious()
  144. while getattr(sibling, 'tag', None) is ProcessingInstruction:
  145. siblings.append(sibling)
  146. sibling = sibling.getprevious()
  147. for sibling in siblings[::-1]:
  148. self._recursive_saxify(sibling, {})
  149. self._recursive_saxify(element, {})
  150. if hasattr(element, 'getnext'):
  151. sibling = element.getnext()
  152. while getattr(sibling, 'tag', None) is ProcessingInstruction:
  153. self._recursive_saxify(sibling, {})
  154. sibling = sibling.getnext()
  155. self._content_handler.endDocument()
  156. def _recursive_saxify(self, element, prefixes):
  157. content_handler = self._content_handler
  158. tag = element.tag
  159. if tag is Comment or tag is ProcessingInstruction:
  160. if tag is ProcessingInstruction:
  161. content_handler.processingInstruction(
  162., element.text)
  163. if element.tail:
  164. content_handler.characters(element.tail)
  165. return
  166. new_prefixes = []
  167. build_qname = self._build_qname
  168. attribs = element.items()
  169. if attribs:
  170. attr_values = {}
  171. attr_qnames = {}
  172. for attr_ns_name, value in attribs:
  173. attr_ns_tuple = _getNsTag(attr_ns_name)
  174. attr_values[attr_ns_tuple] = value
  175. attr_qnames[attr_ns_tuple] = build_qname(
  176. attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
  177. sax_attributes = self._attr_class(attr_values, attr_qnames)
  178. else:
  179. sax_attributes = self._empty_attributes
  180. ns_uri, local_name = _getNsTag(tag)
  181. qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
  182. for prefix, uri in new_prefixes:
  183. content_handler.startPrefixMapping(prefix, uri)
  184. content_handler.startElementNS((ns_uri, local_name),
  185. qname, sax_attributes)
  186. if element.text:
  187. content_handler.characters(element.text)
  188. for child in element:
  189. self._recursive_saxify(child, prefixes)
  190. content_handler.endElementNS((ns_uri, local_name), qname)
  191. for prefix, uri in new_prefixes:
  192. content_handler.endPrefixMapping(prefix)
  193. if element.tail:
  194. content_handler.characters(element.tail)
  195. def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
  196. if ns_uri is None:
  197. return local_name
  198. try:
  199. prefix = prefixes[ns_uri]
  200. except KeyError:
  201. prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
  202. new_prefixes.append( (prefix, ns_uri) )
  203. return prefix + ':' + local_name
  204. def saxify(element_or_tree, content_handler):
  205. """One-shot helper to generate SAX events from an XML tree and fire
  206. them against a SAX ContentHandler.
  207. """
  208. return ElementTreeProducer(element_or_tree, content_handler).saxify()