You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

100 lines
3.2 KiB

4 years ago
  1. """
  2. Legacy module - don't use in new code!
  3. html5lib now has its own proper implementation.
  4. This module implements a tree builder for html5lib that generates lxml
  5. html element trees. This module uses camelCase as it follows the
  6. html5lib style guide.
  7. """
  8. from html5lib.treebuilders import _base, etree as etree_builders
  9. from lxml import html, etree
  10. class DocumentType(object):
  11. def __init__(self, name, publicId, systemId):
  12. self.name = name
  13. self.publicId = publicId
  14. self.systemId = systemId
  15. class Document(object):
  16. def __init__(self):
  17. self._elementTree = None
  18. self.childNodes = []
  19. def appendChild(self, element):
  20. self._elementTree.getroot().addnext(element._element)
  21. class TreeBuilder(_base.TreeBuilder):
  22. documentClass = Document
  23. doctypeClass = DocumentType
  24. elementClass = None
  25. commentClass = None
  26. fragmentClass = Document
  27. def __init__(self, *args, **kwargs):
  28. html_builder = etree_builders.getETreeModule(html, fullTree=False)
  29. etree_builder = etree_builders.getETreeModule(etree, fullTree=False)
  30. self.elementClass = html_builder.Element
  31. self.commentClass = etree_builder.Comment
  32. _base.TreeBuilder.__init__(self, *args, **kwargs)
  33. def reset(self):
  34. _base.TreeBuilder.reset(self)
  35. self.rootInserted = False
  36. self.initialComments = []
  37. self.doctype = None
  38. def getDocument(self):
  39. return self.document._elementTree
  40. def getFragment(self):
  41. fragment = []
  42. element = self.openElements[0]._element
  43. if element.text:
  44. fragment.append(element.text)
  45. fragment.extend(element.getchildren())
  46. if element.tail:
  47. fragment.append(element.tail)
  48. return fragment
  49. def insertDoctype(self, name, publicId, systemId):
  50. doctype = self.doctypeClass(name, publicId, systemId)
  51. self.doctype = doctype
  52. def insertComment(self, data, parent=None):
  53. if not self.rootInserted:
  54. self.initialComments.append(data)
  55. else:
  56. _base.TreeBuilder.insertComment(self, data, parent)
  57. def insertRoot(self, name):
  58. buf = []
  59. if self.doctype and self.doctype.name:
  60. buf.append('<!DOCTYPE %s' % self.doctype.name)
  61. if self.doctype.publicId is not None or self.doctype.systemId is not None:
  62. buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId,
  63. self.doctype.systemId))
  64. buf.append('>')
  65. buf.append('<html></html>')
  66. root = html.fromstring(''.join(buf))
  67. # Append the initial comments:
  68. for comment in self.initialComments:
  69. root.addprevious(etree.Comment(comment))
  70. # Create the root document and add the ElementTree to it
  71. self.document = self.documentClass()
  72. self.document._elementTree = root.getroottree()
  73. # Add the root element to the internal child/open data structures
  74. root_element = self.elementClass(name)
  75. root_element._element = root
  76. self.document.childNodes.append(root_element)
  77. self.openElements.append(root_element)
  78. self.rootInserted = True