You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

249 lines
8.3 KiB

4 years ago
  1. """
  2. SAX-based adapter to copy trees from/to the Python standard library.
  3. Use the `ElementTreeContentHandler` class to build an ElementTree from
  4. SAX events.
  5. Use the `ElementTreeProducer` class or the `saxify()` function to fire
  6. the SAX events of an ElementTree against a SAX ContentHandler.
  7. See http://codespeak.net/lxml/sax.html
  8. """
  9. from xml.sax.handler import ContentHandler
  10. from lxml import etree
  11. from lxml.etree import ElementTree, SubElement
  12. from lxml.etree import Comment, ProcessingInstruction
  13. class SaxError(etree.LxmlError):
  14. """General SAX error.
  15. """
  16. def _getNsTag(tag):
  17. if tag[0] == '{':
  18. return tuple(tag[1:].split('}', 1))
  19. else:
  20. return (None, tag)
  21. class ElementTreeContentHandler(ContentHandler):
  22. """Build an lxml ElementTree from SAX events.
  23. """
  24. def __init__(self, makeelement=None):
  25. ContentHandler.__init__(self)
  26. self._root = None
  27. self._root_siblings = []
  28. self._element_stack = []
  29. self._default_ns = None
  30. self._ns_mapping = { None : [None] }
  31. self._new_mappings = {}
  32. if makeelement is None:
  33. makeelement = etree.Element
  34. self._makeelement = makeelement
  35. def _get_etree(self):
  36. "Contains the generated ElementTree after parsing is finished."
  37. return ElementTree(self._root)
  38. etree = property(_get_etree, doc=_get_etree.__doc__)
  39. def setDocumentLocator(self, locator):
  40. pass
  41. def startDocument(self):
  42. pass
  43. def endDocument(self):
  44. pass
  45. def startPrefixMapping(self, prefix, uri):
  46. self._new_mappings[prefix] = uri
  47. try:
  48. self._ns_mapping[prefix].append(uri)
  49. except KeyError:
  50. self._ns_mapping[prefix] = [uri]
  51. if prefix is None:
  52. self._default_ns = uri
  53. def endPrefixMapping(self, prefix):
  54. ns_uri_list = self._ns_mapping[prefix]
  55. ns_uri_list.pop()
  56. if prefix is None:
  57. self._default_ns = ns_uri_list[-1]
  58. def _buildTag(self, ns_name_tuple):
  59. ns_uri, local_name = ns_name_tuple
  60. if ns_uri:
  61. el_tag = "{%s}%s" % ns_name_tuple
  62. elif self._default_ns:
  63. el_tag = "{%s}%s" % (self._default_ns, local_name)
  64. else:
  65. el_tag = local_name
  66. return el_tag
  67. def startElementNS(self, ns_name, qname, attributes=None):
  68. el_name = self._buildTag(ns_name)
  69. if attributes:
  70. attrs = {}
  71. try:
  72. iter_attributes = attributes.iteritems()
  73. except AttributeError:
  74. iter_attributes = attributes.items()
  75. for name_tuple, value in iter_attributes:
  76. if name_tuple[0]:
  77. attr_name = "{%s}%s" % name_tuple
  78. else:
  79. attr_name = name_tuple[1]
  80. attrs[attr_name] = value
  81. else:
  82. attrs = None
  83. element_stack = self._element_stack
  84. if self._root is None:
  85. element = self._root = \
  86. self._makeelement(el_name, attrs, self._new_mappings)
  87. if self._root_siblings and hasattr(element, 'addprevious'):
  88. for sibling in self._root_siblings:
  89. element.addprevious(sibling)
  90. del self._root_siblings[:]
  91. else:
  92. element = SubElement(element_stack[-1], el_name,
  93. attrs, self._new_mappings)
  94. element_stack.append(element)
  95. self._new_mappings.clear()
  96. def processingInstruction(self, target, data):
  97. pi = ProcessingInstruction(target, data)
  98. if self._root is None:
  99. self._root_siblings.append(pi)
  100. else:
  101. self._element_stack[-1].append(pi)
  102. def endElementNS(self, ns_name, qname):
  103. element = self._element_stack.pop()
  104. el_tag = self._buildTag(ns_name)
  105. if el_tag != element.tag:
  106. raise SaxError("Unexpected element closed: " + el_tag)
  107. def startElement(self, name, attributes=None):
  108. if attributes:
  109. attributes = dict(
  110. [((None, k), v) for k, v in attributes.items()]
  111. )
  112. self.startElementNS((None, name), name, attributes)
  113. def endElement(self, name):
  114. self.endElementNS((None, name), name)
  115. def characters(self, data):
  116. last_element = self._element_stack[-1]
  117. try:
  118. # if there already is a child element, we must append to its tail
  119. last_element = last_element[-1]
  120. last_element.tail = (last_element.tail or '') + data
  121. except IndexError:
  122. # otherwise: append to the text
  123. last_element.text = (last_element.text or '') + data
  124. ignorableWhitespace = characters
  125. class ElementTreeProducer(object):
  126. """Produces SAX events for an element and children.
  127. """
  128. def __init__(self, element_or_tree, content_handler):
  129. try:
  130. element = element_or_tree.getroot()
  131. except AttributeError:
  132. element = element_or_tree
  133. self._element = element
  134. self._content_handler = content_handler
  135. from xml.sax.xmlreader import AttributesNSImpl as attr_class
  136. self._attr_class = attr_class
  137. self._empty_attributes = attr_class({}, {})
  138. def saxify(self):
  139. self._content_handler.startDocument()
  140. element = self._element
  141. if hasattr(element, 'getprevious'):
  142. siblings = []
  143. sibling = element.getprevious()
  144. while getattr(sibling, 'tag', None) is ProcessingInstruction:
  145. siblings.append(sibling)
  146. sibling = sibling.getprevious()
  147. for sibling in siblings[::-1]:
  148. self._recursive_saxify(sibling, {})
  149. self._recursive_saxify(element, {})
  150. if hasattr(element, 'getnext'):
  151. sibling = element.getnext()
  152. while getattr(sibling, 'tag', None) is ProcessingInstruction:
  153. self._recursive_saxify(sibling, {})
  154. sibling = sibling.getnext()
  155. self._content_handler.endDocument()
  156. def _recursive_saxify(self, element, prefixes):
  157. content_handler = self._content_handler
  158. tag = element.tag
  159. if tag is Comment or tag is ProcessingInstruction:
  160. if tag is ProcessingInstruction:
  161. content_handler.processingInstruction(
  162. element.target, element.text)
  163. if element.tail:
  164. content_handler.characters(element.tail)
  165. return
  166. new_prefixes = []
  167. build_qname = self._build_qname
  168. attribs = element.items()
  169. if attribs:
  170. attr_values = {}
  171. attr_qnames = {}
  172. for attr_ns_name, value in attribs:
  173. attr_ns_tuple = _getNsTag(attr_ns_name)
  174. attr_values[attr_ns_tuple] = value
  175. attr_qnames[attr_ns_tuple] = build_qname(
  176. attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes)
  177. sax_attributes = self._attr_class(attr_values, attr_qnames)
  178. else:
  179. sax_attributes = self._empty_attributes
  180. ns_uri, local_name = _getNsTag(tag)
  181. qname = build_qname(ns_uri, local_name, prefixes, new_prefixes)
  182. for prefix, uri in new_prefixes:
  183. content_handler.startPrefixMapping(prefix, uri)
  184. content_handler.startElementNS((ns_uri, local_name),
  185. qname, sax_attributes)
  186. if element.text:
  187. content_handler.characters(element.text)
  188. for child in element:
  189. self._recursive_saxify(child, prefixes)
  190. content_handler.endElementNS((ns_uri, local_name), qname)
  191. for prefix, uri in new_prefixes:
  192. content_handler.endPrefixMapping(prefix)
  193. if element.tail:
  194. content_handler.characters(element.tail)
  195. def _build_qname(self, ns_uri, local_name, prefixes, new_prefixes):
  196. if ns_uri is None:
  197. return local_name
  198. try:
  199. prefix = prefixes[ns_uri]
  200. except KeyError:
  201. prefix = prefixes[ns_uri] = 'ns%02d' % len(prefixes)
  202. new_prefixes.append( (prefix, ns_uri) )
  203. return prefix + ':' + local_name
  204. def saxify(element_or_tree, content_handler):
  205. """One-shot helper to generate SAX events from an XML tree and fire
  206. them against a SAX ContentHandler.
  207. """
  208. return ElementTreeProducer(element_or_tree, content_handler).saxify()