|
|
- from __future__ import absolute_import, division, unicode_literals
-
- import re
- from xml.sax.saxutils import escape, unescape
-
- from pip._vendor.six.moves import urllib_parse as urlparse
-
- from . import base
- from ..constants import namespaces, prefixes
-
- __all__ = ["Filter"]
-
-
- allowed_elements = frozenset((
- (namespaces['html'], 'a'),
- (namespaces['html'], 'abbr'),
- (namespaces['html'], 'acronym'),
- (namespaces['html'], 'address'),
- (namespaces['html'], 'area'),
- (namespaces['html'], 'article'),
- (namespaces['html'], 'aside'),
- (namespaces['html'], 'audio'),
- (namespaces['html'], 'b'),
- (namespaces['html'], 'big'),
- (namespaces['html'], 'blockquote'),
- (namespaces['html'], 'br'),
- (namespaces['html'], 'button'),
- (namespaces['html'], 'canvas'),
- (namespaces['html'], 'caption'),
- (namespaces['html'], 'center'),
- (namespaces['html'], 'cite'),
- (namespaces['html'], 'code'),
- (namespaces['html'], 'col'),
- (namespaces['html'], 'colgroup'),
- (namespaces['html'], 'command'),
- (namespaces['html'], 'datagrid'),
- (namespaces['html'], 'datalist'),
- (namespaces['html'], 'dd'),
- (namespaces['html'], 'del'),
- (namespaces['html'], 'details'),
- (namespaces['html'], 'dfn'),
- (namespaces['html'], 'dialog'),
- (namespaces['html'], 'dir'),
- (namespaces['html'], 'div'),
- (namespaces['html'], 'dl'),
- (namespaces['html'], 'dt'),
- (namespaces['html'], 'em'),
- (namespaces['html'], 'event-source'),
- (namespaces['html'], 'fieldset'),
- (namespaces['html'], 'figcaption'),
- (namespaces['html'], 'figure'),
- (namespaces['html'], 'footer'),
- (namespaces['html'], 'font'),
- (namespaces['html'], 'form'),
- (namespaces['html'], 'header'),
- (namespaces['html'], 'h1'),
- (namespaces['html'], 'h2'),
- (namespaces['html'], 'h3'),
- (namespaces['html'], 'h4'),
- (namespaces['html'], 'h5'),
- (namespaces['html'], 'h6'),
- (namespaces['html'], 'hr'),
- (namespaces['html'], 'i'),
- (namespaces['html'], 'img'),
- (namespaces['html'], 'input'),
- (namespaces['html'], 'ins'),
- (namespaces['html'], 'keygen'),
- (namespaces['html'], 'kbd'),
- (namespaces['html'], 'label'),
- (namespaces['html'], 'legend'),
- (namespaces['html'], 'li'),
- (namespaces['html'], 'm'),
- (namespaces['html'], 'map'),
- (namespaces['html'], 'menu'),
- (namespaces['html'], 'meter'),
- (namespaces['html'], 'multicol'),
- (namespaces['html'], 'nav'),
- (namespaces['html'], 'nextid'),
- (namespaces['html'], 'ol'),
- (namespaces['html'], 'output'),
- (namespaces['html'], 'optgroup'),
- (namespaces['html'], 'option'),
- (namespaces['html'], 'p'),
- (namespaces['html'], 'pre'),
- (namespaces['html'], 'progress'),
- (namespaces['html'], 'q'),
- (namespaces['html'], 's'),
- (namespaces['html'], 'samp'),
- (namespaces['html'], 'section'),
- (namespaces['html'], 'select'),
- (namespaces['html'], 'small'),
- (namespaces['html'], 'sound'),
- (namespaces['html'], 'source'),
- (namespaces['html'], 'spacer'),
- (namespaces['html'], 'span'),
- (namespaces['html'], 'strike'),
- (namespaces['html'], 'strong'),
- (namespaces['html'], 'sub'),
- (namespaces['html'], 'sup'),
- (namespaces['html'], 'table'),
- (namespaces['html'], 'tbody'),
- (namespaces['html'], 'td'),
- (namespaces['html'], 'textarea'),
- (namespaces['html'], 'time'),
- (namespaces['html'], 'tfoot'),
- (namespaces['html'], 'th'),
- (namespaces['html'], 'thead'),
- (namespaces['html'], 'tr'),
- (namespaces['html'], 'tt'),
- (namespaces['html'], 'u'),
- (namespaces['html'], 'ul'),
- (namespaces['html'], 'var'),
- (namespaces['html'], 'video'),
- (namespaces['mathml'], 'maction'),
- (namespaces['mathml'], 'math'),
- (namespaces['mathml'], 'merror'),
- (namespaces['mathml'], 'mfrac'),
- (namespaces['mathml'], 'mi'),
- (namespaces['mathml'], 'mmultiscripts'),
- (namespaces['mathml'], 'mn'),
- (namespaces['mathml'], 'mo'),
- (namespaces['mathml'], 'mover'),
- (namespaces['mathml'], 'mpadded'),
- (namespaces['mathml'], 'mphantom'),
- (namespaces['mathml'], 'mprescripts'),
- (namespaces['mathml'], 'mroot'),
- (namespaces['mathml'], 'mrow'),
- (namespaces['mathml'], 'mspace'),
- (namespaces['mathml'], 'msqrt'),
- (namespaces['mathml'], 'mstyle'),
- (namespaces['mathml'], 'msub'),
- (namespaces['mathml'], 'msubsup'),
- (namespaces['mathml'], 'msup'),
- (namespaces['mathml'], 'mtable'),
- (namespaces['mathml'], 'mtd'),
- (namespaces['mathml'], 'mtext'),
- (namespaces['mathml'], 'mtr'),
- (namespaces['mathml'], 'munder'),
- (namespaces['mathml'], 'munderover'),
- (namespaces['mathml'], 'none'),
- (namespaces['svg'], 'a'),
- (namespaces['svg'], 'animate'),
- (namespaces['svg'], 'animateColor'),
- (namespaces['svg'], 'animateMotion'),
- (namespaces['svg'], 'animateTransform'),
- (namespaces['svg'], 'clipPath'),
- (namespaces['svg'], 'circle'),
- (namespaces['svg'], 'defs'),
- (namespaces['svg'], 'desc'),
- (namespaces['svg'], 'ellipse'),
- (namespaces['svg'], 'font-face'),
- (namespaces['svg'], 'font-face-name'),
- (namespaces['svg'], 'font-face-src'),
- (namespaces['svg'], 'g'),
- (namespaces['svg'], 'glyph'),
- (namespaces['svg'], 'hkern'),
- (namespaces['svg'], 'linearGradient'),
- (namespaces['svg'], 'line'),
- (namespaces['svg'], 'marker'),
- (namespaces['svg'], 'metadata'),
- (namespaces['svg'], 'missing-glyph'),
- (namespaces['svg'], 'mpath'),
- (namespaces['svg'], 'path'),
- (namespaces['svg'], 'polygon'),
- (namespaces['svg'], 'polyline'),
- (namespaces['svg'], 'radialGradient'),
- (namespaces['svg'], 'rect'),
- (namespaces['svg'], 'set'),
- (namespaces['svg'], 'stop'),
- (namespaces['svg'], 'svg'),
- (namespaces['svg'], 'switch'),
- (namespaces['svg'], 'text'),
- (namespaces['svg'], 'title'),
- (namespaces['svg'], 'tspan'),
- (namespaces['svg'], 'use'),
- ))
-
- allowed_attributes = frozenset((
- # HTML attributes
- (None, 'abbr'),
- (None, 'accept'),
- (None, 'accept-charset'),
- (None, 'accesskey'),
- (None, 'action'),
- (None, 'align'),
- (None, 'alt'),
- (None, 'autocomplete'),
- (None, 'autofocus'),
- (None, 'axis'),
- (None, 'background'),
- (None, 'balance'),
- (None, 'bgcolor'),
- (None, 'bgproperties'),
- (None, 'border'),
- (None, 'bordercolor'),
- (None, 'bordercolordark'),
- (None, 'bordercolorlight'),
- (None, 'bottompadding'),
- (None, 'cellpadding'),
- (None, 'cellspacing'),
- (None, 'ch'),
- (None, 'challenge'),
- (None, 'char'),
- (None, 'charoff'),
- (None, 'choff'),
- (None, 'charset'),
- (None, 'checked'),
- (None, 'cite'),
- (None, 'class'),
- (None, 'clear'),
- (None, 'color'),
- (None, 'cols'),
- (None, 'colspan'),
- (None, 'compact'),
- (None, 'contenteditable'),
- (None, 'controls'),
- (None, 'coords'),
- (None, 'data'),
- (None, 'datafld'),
- (None, 'datapagesize'),
- (None, 'datasrc'),
- (None, 'datetime'),
- (None, 'default'),
- (None, 'delay'),
- (None, 'dir'),
- (None, 'disabled'),
- (None, 'draggable'),
- (None, 'dynsrc'),
- (None, 'enctype'),
- (None, 'end'),
- (None, 'face'),
- (None, 'for'),
- (None, 'form'),
- (None, 'frame'),
- (None, 'galleryimg'),
- (None, 'gutter'),
- (None, 'headers'),
- (None, 'height'),
- (None, 'hidefocus'),
- (None, 'hidden'),
- (None, 'high'),
- (None, 'href'),
- (None, 'hreflang'),
- (None, 'hspace'),
- (None, 'icon'),
- (None, 'id'),
- (None, 'inputmode'),
- (None, 'ismap'),
- (None, 'keytype'),
- (None, 'label'),
- (None, 'leftspacing'),
- (None, 'lang'),
- (None, 'list'),
- (None, 'longdesc'),
- (None, 'loop'),
- (None, 'loopcount'),
- (None, 'loopend'),
- (None, 'loopstart'),
- (None, 'low'),
- (None, 'lowsrc'),
- (None, 'max'),
- (None, 'maxlength'),
- (None, 'media'),
- (None, 'method'),
- (None, 'min'),
- (None, 'multiple'),
- (None, 'name'),
- (None, 'nohref'),
- (None, 'noshade'),
- (None, 'nowrap'),
- (None, 'open'),
- (None, 'optimum'),
- (None, 'pattern'),
- (None, 'ping'),
- (None, 'point-size'),
- (None, 'poster'),
- (None, 'pqg'),
- (None, 'preload'),
- (None, 'prompt'),
- (None, 'radiogroup'),
- (None, 'readonly'),
- (None, 'rel'),
- (None, 'repeat-max'),
- (None, 'repeat-min'),
- (None, 'replace'),
- (None, 'required'),
- (None, 'rev'),
- (None, 'rightspacing'),
- (None, 'rows'),
- (None, 'rowspan'),
- (None, 'rules'),
- (None, 'scope'),
- (None, 'selected'),
- (None, 'shape'),
- (None, 'size'),
- (None, 'span'),
- (None, 'src'),
- (None, 'start'),
- (None, 'step'),
- (None, 'style'),
- (None, 'summary'),
- (None, 'suppress'),
- (None, 'tabindex'),
- (None, 'target'),
- (None, 'template'),
- (None, 'title'),
- (None, 'toppadding'),
- (None, 'type'),
- (None, 'unselectable'),
- (None, 'usemap'),
- (None, 'urn'),
- (None, 'valign'),
- (None, 'value'),
- (None, 'variable'),
- (None, 'volume'),
- (None, 'vspace'),
- (None, 'vrml'),
- (None, 'width'),
- (None, 'wrap'),
- (namespaces['xml'], 'lang'),
- # MathML attributes
- (None, 'actiontype'),
- (None, 'align'),
- (None, 'columnalign'),
- (None, 'columnalign'),
- (None, 'columnalign'),
- (None, 'columnlines'),
- (None, 'columnspacing'),
- (None, 'columnspan'),
- (None, 'depth'),
- (None, 'display'),
- (None, 'displaystyle'),
- (None, 'equalcolumns'),
- (None, 'equalrows'),
- (None, 'fence'),
- (None, 'fontstyle'),
- (None, 'fontweight'),
- (None, 'frame'),
- (None, 'height'),
- (None, 'linethickness'),
- (None, 'lspace'),
- (None, 'mathbackground'),
- (None, 'mathcolor'),
- (None, 'mathvariant'),
- (None, 'mathvariant'),
- (None, 'maxsize'),
- (None, 'minsize'),
- (None, 'other'),
- (None, 'rowalign'),
- (None, 'rowalign'),
- (None, 'rowalign'),
- (None, 'rowlines'),
- (None, 'rowspacing'),
- (None, 'rowspan'),
- (None, 'rspace'),
- (None, 'scriptlevel'),
- (None, 'selection'),
- (None, 'separator'),
- (None, 'stretchy'),
- (None, 'width'),
- (None, 'width'),
- (namespaces['xlink'], 'href'),
- (namespaces['xlink'], 'show'),
- (namespaces['xlink'], 'type'),
- # SVG attributes
- (None, 'accent-height'),
- (None, 'accumulate'),
- (None, 'additive'),
- (None, 'alphabetic'),
- (None, 'arabic-form'),
- (None, 'ascent'),
- (None, 'attributeName'),
- (None, 'attributeType'),
- (None, 'baseProfile'),
- (None, 'bbox'),
- (None, 'begin'),
- (None, 'by'),
- (None, 'calcMode'),
- (None, 'cap-height'),
- (None, 'class'),
- (None, 'clip-path'),
- (None, 'color'),
- (None, 'color-rendering'),
- (None, 'content'),
- (None, 'cx'),
- (None, 'cy'),
- (None, 'd'),
- (None, 'dx'),
- (None, 'dy'),
- (None, 'descent'),
- (None, 'display'),
- (None, 'dur'),
- (None, 'end'),
- (None, 'fill'),
- (None, 'fill-opacity'),
- (None, 'fill-rule'),
- (None, 'font-family'),
- (None, 'font-size'),
- (None, 'font-stretch'),
- (None, 'font-style'),
- (None, 'font-variant'),
- (None, 'font-weight'),
- (None, 'from'),
- (None, 'fx'),
- (None, 'fy'),
- (None, 'g1'),
- (None, 'g2'),
- (None, 'glyph-name'),
- (None, 'gradientUnits'),
- (None, 'hanging'),
- (None, 'height'),
- (None, 'horiz-adv-x'),
- (None, 'horiz-origin-x'),
- (None, 'id'),
- (None, 'ideographic'),
- (None, 'k'),
- (None, 'keyPoints'),
- (None, 'keySplines'),
- (None, 'keyTimes'),
- (None, 'lang'),
- (None, 'marker-end'),
- (None, 'marker-mid'),
- (None, 'marker-start'),
- (None, 'markerHeight'),
- (None, 'markerUnits'),
- (None, 'markerWidth'),
- (None, 'mathematical'),
- (None, 'max'),
- (None, 'min'),
- (None, 'name'),
- (None, 'offset'),
- (None, 'opacity'),
- (None, 'orient'),
- (None, 'origin'),
- (None, 'overline-position'),
- (None, 'overline-thickness'),
- (None, 'panose-1'),
- (None, 'path'),
- (None, 'pathLength'),
- (None, 'points'),
- (None, 'preserveAspectRatio'),
- (None, 'r'),
- (None, 'refX'),
- (None, 'refY'),
- (None, 'repeatCount'),
- (None, 'repeatDur'),
- (None, 'requiredExtensions'),
- (None, 'requiredFeatures'),
- (None, 'restart'),
- (None, 'rotate'),
- (None, 'rx'),
- (None, 'ry'),
- (None, 'slope'),
- (None, 'stemh'),
- (None, 'stemv'),
- (None, 'stop-color'),
- (None, 'stop-opacity'),
- (None, 'strikethrough-position'),
- (None, 'strikethrough-thickness'),
- (None, 'stroke'),
- (None, 'stroke-dasharray'),
- (None, 'stroke-dashoffset'),
- (None, 'stroke-linecap'),
- (None, 'stroke-linejoin'),
- (None, 'stroke-miterlimit'),
- (None, 'stroke-opacity'),
- (None, 'stroke-width'),
- (None, 'systemLanguage'),
- (None, 'target'),
- (None, 'text-anchor'),
- (None, 'to'),
- (None, 'transform'),
- (None, 'type'),
- (None, 'u1'),
- (None, 'u2'),
- (None, 'underline-position'),
- (None, 'underline-thickness'),
- (None, 'unicode'),
- (None, 'unicode-range'),
- (None, 'units-per-em'),
- (None, 'values'),
- (None, 'version'),
- (None, 'viewBox'),
- (None, 'visibility'),
- (None, 'width'),
- (None, 'widths'),
- (None, 'x'),
- (None, 'x-height'),
- (None, 'x1'),
- (None, 'x2'),
- (namespaces['xlink'], 'actuate'),
- (namespaces['xlink'], 'arcrole'),
- (namespaces['xlink'], 'href'),
- (namespaces['xlink'], 'role'),
- (namespaces['xlink'], 'show'),
- (namespaces['xlink'], 'title'),
- (namespaces['xlink'], 'type'),
- (namespaces['xml'], 'base'),
- (namespaces['xml'], 'lang'),
- (namespaces['xml'], 'space'),
- (None, 'y'),
- (None, 'y1'),
- (None, 'y2'),
- (None, 'zoomAndPan'),
- ))
-
- attr_val_is_uri = frozenset((
- (None, 'href'),
- (None, 'src'),
- (None, 'cite'),
- (None, 'action'),
- (None, 'longdesc'),
- (None, 'poster'),
- (None, 'background'),
- (None, 'datasrc'),
- (None, 'dynsrc'),
- (None, 'lowsrc'),
- (None, 'ping'),
- (namespaces['xlink'], 'href'),
- (namespaces['xml'], 'base'),
- ))
-
- svg_attr_val_allows_ref = frozenset((
- (None, 'clip-path'),
- (None, 'color-profile'),
- (None, 'cursor'),
- (None, 'fill'),
- (None, 'filter'),
- (None, 'marker'),
- (None, 'marker-start'),
- (None, 'marker-mid'),
- (None, 'marker-end'),
- (None, 'mask'),
- (None, 'stroke'),
- ))
-
- svg_allow_local_href = frozenset((
- (None, 'altGlyph'),
- (None, 'animate'),
- (None, 'animateColor'),
- (None, 'animateMotion'),
- (None, 'animateTransform'),
- (None, 'cursor'),
- (None, 'feImage'),
- (None, 'filter'),
- (None, 'linearGradient'),
- (None, 'pattern'),
- (None, 'radialGradient'),
- (None, 'textpath'),
- (None, 'tref'),
- (None, 'set'),
- (None, 'use')
- ))
-
- allowed_css_properties = frozenset((
- 'azimuth',
- 'background-color',
- 'border-bottom-color',
- 'border-collapse',
- 'border-color',
- 'border-left-color',
- 'border-right-color',
- 'border-top-color',
- 'clear',
- 'color',
- 'cursor',
- 'direction',
- 'display',
- 'elevation',
- 'float',
- 'font',
- 'font-family',
- 'font-size',
- 'font-style',
- 'font-variant',
- 'font-weight',
- 'height',
- 'letter-spacing',
- 'line-height',
- 'overflow',
- 'pause',
- 'pause-after',
- 'pause-before',
- 'pitch',
- 'pitch-range',
- 'richness',
- 'speak',
- 'speak-header',
- 'speak-numeral',
- 'speak-punctuation',
- 'speech-rate',
- 'stress',
- 'text-align',
- 'text-decoration',
- 'text-indent',
- 'unicode-bidi',
- 'vertical-align',
- 'voice-family',
- 'volume',
- 'white-space',
- 'width',
- ))
-
- allowed_css_keywords = frozenset((
- 'auto',
- 'aqua',
- 'black',
- 'block',
- 'blue',
- 'bold',
- 'both',
- 'bottom',
- 'brown',
- 'center',
- 'collapse',
- 'dashed',
- 'dotted',
- 'fuchsia',
- 'gray',
- 'green',
- '!important',
- 'italic',
- 'left',
- 'lime',
- 'maroon',
- 'medium',
- 'none',
- 'navy',
- 'normal',
- 'nowrap',
- 'olive',
- 'pointer',
- 'purple',
- 'red',
- 'right',
- 'solid',
- 'silver',
- 'teal',
- 'top',
- 'transparent',
- 'underline',
- 'white',
- 'yellow',
- ))
-
- allowed_svg_properties = frozenset((
- 'fill',
- 'fill-opacity',
- 'fill-rule',
- 'stroke',
- 'stroke-width',
- 'stroke-linecap',
- 'stroke-linejoin',
- 'stroke-opacity',
- ))
-
- allowed_protocols = frozenset((
- 'ed2k',
- 'ftp',
- 'http',
- 'https',
- 'irc',
- 'mailto',
- 'news',
- 'gopher',
- 'nntp',
- 'telnet',
- 'webcal',
- 'xmpp',
- 'callto',
- 'feed',
- 'urn',
- 'aim',
- 'rsync',
- 'tag',
- 'ssh',
- 'sftp',
- 'rtsp',
- 'afs',
- 'data',
- ))
-
- allowed_content_types = frozenset((
- 'image/png',
- 'image/jpeg',
- 'image/gif',
- 'image/webp',
- 'image/bmp',
- 'text/plain',
- ))
-
-
- data_content_type = re.compile(r'''
- ^
- # Match a content type <application>/<type>
- (?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
- # Match any character set and encoding
- (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
- |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
- # Assume the rest is data
- ,.*
- $
- ''',
- re.VERBOSE)
-
-
- class Filter(base.Filter):
- """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
- def __init__(self,
- source,
- allowed_elements=allowed_elements,
- allowed_attributes=allowed_attributes,
- allowed_css_properties=allowed_css_properties,
- allowed_css_keywords=allowed_css_keywords,
- allowed_svg_properties=allowed_svg_properties,
- allowed_protocols=allowed_protocols,
- allowed_content_types=allowed_content_types,
- attr_val_is_uri=attr_val_is_uri,
- svg_attr_val_allows_ref=svg_attr_val_allows_ref,
- svg_allow_local_href=svg_allow_local_href):
- """Creates a Filter
-
- :arg allowed_elements: set of elements to allow--everything else will
- be escaped
-
- :arg allowed_attributes: set of attributes to allow in
- elements--everything else will be stripped
-
- :arg allowed_css_properties: set of CSS properties to allow--everything
- else will be stripped
-
- :arg allowed_css_keywords: set of CSS keywords to allow--everything
- else will be stripped
-
- :arg allowed_svg_properties: set of SVG properties to allow--everything
- else will be removed
-
- :arg allowed_protocols: set of allowed protocols for URIs
-
- :arg allowed_content_types: set of allowed content types for ``data`` URIs.
-
- :arg attr_val_is_uri: set of attributes that have URI values--values
- that have a scheme not listed in ``allowed_protocols`` are removed
-
- :arg svg_attr_val_allows_ref: set of SVG attributes that can have
- references
-
- :arg svg_allow_local_href: set of SVG elements that can have local
- hrefs--these are removed
-
- """
- super(Filter, self).__init__(source)
- self.allowed_elements = allowed_elements
- self.allowed_attributes = allowed_attributes
- self.allowed_css_properties = allowed_css_properties
- self.allowed_css_keywords = allowed_css_keywords
- self.allowed_svg_properties = allowed_svg_properties
- self.allowed_protocols = allowed_protocols
- self.allowed_content_types = allowed_content_types
- self.attr_val_is_uri = attr_val_is_uri
- self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
- self.svg_allow_local_href = svg_allow_local_href
-
- def __iter__(self):
- for token in base.Filter.__iter__(self):
- token = self.sanitize_token(token)
- if token:
- yield token
-
- # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
- # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
- # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
- # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
- # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
- # allowed.
- #
- # sanitize_html('<script> do_nasty_stuff() </script>')
- # => <script> do_nasty_stuff() </script>
- # sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
- # => <a>Click here for $100</a>
- def sanitize_token(self, token):
-
- # accommodate filters which use token_type differently
- token_type = token["type"]
- if token_type in ("StartTag", "EndTag", "EmptyTag"):
- name = token["name"]
- namespace = token["namespace"]
- if ((namespace, name) in self.allowed_elements or
- (namespace is None and
- (namespaces["html"], name) in self.allowed_elements)):
- return self.allowed_token(token)
- else:
- return self.disallowed_token(token)
- elif token_type == "Comment":
- pass
- else:
- return token
-
- def allowed_token(self, token):
- if "data" in token:
- attrs = token["data"]
- attr_names = set(attrs.keys())
-
- # Remove forbidden attributes
- for to_remove in (attr_names - self.allowed_attributes):
- del token["data"][to_remove]
- attr_names.remove(to_remove)
-
- # Remove attributes with disallowed URL values
- for attr in (attr_names & self.attr_val_is_uri):
- assert attr in attrs
- # I don't have a clue where this regexp comes from or why it matches those
- # characters, nor why we call unescape. I just know it's always been here.
- # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
- # this will do is remove *more* than it otherwise would.
- val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
- unescape(attrs[attr])).lower()
- # remove replacement characters from unescaped characters
- val_unescaped = val_unescaped.replace("\ufffd", "")
- try:
- uri = urlparse.urlparse(val_unescaped)
- except ValueError:
- uri = None
- del attrs[attr]
- if uri and uri.scheme:
- if uri.scheme not in self.allowed_protocols:
- del attrs[attr]
- if uri.scheme == 'data':
- m = data_content_type.match(uri.path)
- if not m:
- del attrs[attr]
- elif m.group('content_type') not in self.allowed_content_types:
- del attrs[attr]
-
- for attr in self.svg_attr_val_allows_ref:
- if attr in attrs:
- attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
- ' ',
- unescape(attrs[attr]))
- if (token["name"] in self.svg_allow_local_href and
- (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
- attrs[(namespaces['xlink'], 'href')])):
- del attrs[(namespaces['xlink'], 'href')]
- if (None, 'style') in attrs:
- attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
- token["data"] = attrs
- return token
-
- def disallowed_token(self, token):
- token_type = token["type"]
- if token_type == "EndTag":
- token["data"] = "</%s>" % token["name"]
- elif token["data"]:
- assert token_type in ("StartTag", "EmptyTag")
- attrs = []
- for (ns, name), v in token["data"].items():
- attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
- token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
- else:
- token["data"] = "<%s>" % token["name"]
- if token.get("selfClosing"):
- token["data"] = token["data"][:-1] + "/>"
-
- token["type"] = "Characters"
-
- del token["name"]
- return token
-
- def sanitize_css(self, style):
- # disallow urls
- style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
-
- # gauntlet
- if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
- return ''
- if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
- return ''
-
- clean = []
- for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
- if not value:
- continue
- if prop.lower() in self.allowed_css_properties:
- clean.append(prop + ': ' + value + ';')
- elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
- 'padding']:
- for keyword in value.split():
- if keyword not in self.allowed_css_keywords and \
- not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
- break
- else:
- clean.append(prop + ': ' + value + ';')
- elif prop.lower() in self.allowed_svg_properties:
- clean.append(prop + ': ' + value + ';')
-
- return ' '.join(clean)
|