You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

153 lines
4.9 KiB

4 years ago
  1. # defusedxml
  2. #
  3. # Copyright (c) 2013 by Christian Heimes <christian@python.org>
  4. # Licensed to PSF under a Contributor Agreement.
  5. # See http://www.python.org/psf/license for licensing details.
  6. """Example code for lxml.etree protection
  7. The code has NO protection against decompression bombs.
  8. """
  9. from __future__ import print_function, absolute_import
  10. import threading
  11. from lxml import etree as _etree
  12. from .common import DTDForbidden, EntitiesForbidden, NotSupportedError
  13. LXML3 = _etree.LXML_VERSION[0] >= 3
  14. __origin__ = "lxml.etree"
  15. tostring = _etree.tostring
  16. class RestrictedElement(_etree.ElementBase):
  17. """A restricted Element class that filters out instances of some classes
  18. """
  19. __slots__ = ()
  20. # blacklist = (etree._Entity, etree._ProcessingInstruction, etree._Comment)
  21. blacklist = _etree._Entity
  22. def _filter(self, iterator):
  23. blacklist = self.blacklist
  24. for child in iterator:
  25. if isinstance(child, blacklist):
  26. continue
  27. yield child
  28. def __iter__(self):
  29. iterator = super(RestrictedElement, self).__iter__()
  30. return self._filter(iterator)
  31. def iterchildren(self, tag=None, reversed=False):
  32. iterator = super(RestrictedElement, self).iterchildren(
  33. tag=tag, reversed=reversed)
  34. return self._filter(iterator)
  35. def iter(self, tag=None, *tags):
  36. iterator = super(RestrictedElement, self).iter(tag=tag, *tags)
  37. return self._filter(iterator)
  38. def iterdescendants(self, tag=None, *tags):
  39. iterator = super(RestrictedElement,
  40. self).iterdescendants(tag=tag, *tags)
  41. return self._filter(iterator)
  42. def itersiblings(self, tag=None, preceding=False):
  43. iterator = super(RestrictedElement, self).itersiblings(
  44. tag=tag, preceding=preceding)
  45. return self._filter(iterator)
  46. def getchildren(self):
  47. iterator = super(RestrictedElement, self).__iter__()
  48. return list(self._filter(iterator))
  49. def getiterator(self, tag=None):
  50. iterator = super(RestrictedElement, self).getiterator(tag)
  51. return self._filter(iterator)
  52. class GlobalParserTLS(threading.local):
  53. """Thread local context for custom parser instances
  54. """
  55. parser_config = {
  56. 'resolve_entities': False,
  57. # 'remove_comments': True,
  58. # 'remove_pis': True,
  59. }
  60. element_class = RestrictedElement
  61. def createDefaultParser(self):
  62. parser = _etree.XMLParser(**self.parser_config)
  63. element_class = self.element_class
  64. if self.element_class is not None:
  65. lookup = _etree.ElementDefaultClassLookup(element=element_class)
  66. parser.set_element_class_lookup(lookup)
  67. return parser
  68. def setDefaultParser(self, parser):
  69. self._default_parser = parser
  70. def getDefaultParser(self):
  71. parser = getattr(self, "_default_parser", None)
  72. if parser is None:
  73. parser = self.createDefaultParser()
  74. self.setDefaultParser(parser)
  75. return parser
  76. _parser_tls = GlobalParserTLS()
  77. getDefaultParser = _parser_tls.getDefaultParser
  78. def check_docinfo(elementtree, forbid_dtd=False, forbid_entities=True):
  79. """Check docinfo of an element tree for DTD and entity declarations
  80. The check for entity declarations needs lxml 3 or newer. lxml 2.x does
  81. not support dtd.iterentities().
  82. """
  83. docinfo = elementtree.docinfo
  84. if docinfo.doctype:
  85. if forbid_dtd:
  86. raise DTDForbidden(docinfo.doctype,
  87. docinfo.system_url,
  88. docinfo.public_id)
  89. if forbid_entities and not LXML3:
  90. # lxml < 3 has no iterentities()
  91. raise NotSupportedError("Unable to check for entity declarations "
  92. "in lxml 2.x")
  93. if forbid_entities:
  94. for dtd in docinfo.internalDTD, docinfo.externalDTD:
  95. if dtd is None:
  96. continue
  97. for entity in dtd.iterentities():
  98. raise EntitiesForbidden(entity.name, entity.content, None,
  99. None, None, None)
  100. def parse(source, parser=None, base_url=None, forbid_dtd=False,
  101. forbid_entities=True):
  102. if parser is None:
  103. parser = getDefaultParser()
  104. elementtree = _etree.parse(source, parser, base_url=base_url)
  105. check_docinfo(elementtree, forbid_dtd, forbid_entities)
  106. return elementtree
  107. def fromstring(text, parser=None, base_url=None, forbid_dtd=False,
  108. forbid_entities=True):
  109. if parser is None:
  110. parser = getDefaultParser()
  111. rootelement = _etree.fromstring(text, parser, base_url=base_url)
  112. elementtree = rootelement.getroottree()
  113. check_docinfo(elementtree, forbid_dtd, forbid_entities)
  114. return rootelement
  115. XML = fromstring
  116. def iterparse(*args, **kwargs):
  117. raise NotSupportedError("defused lxml.etree.iterparse not available")