from __future__ import absolute_import, division, unicode_literals
|
|
|
|
import re
|
|
from xml.sax.saxutils import escape, unescape
|
|
|
|
from pip._vendor.six.moves import urllib_parse as urlparse
|
|
|
|
from . import base
|
|
from ..constants import namespaces, prefixes
|
|
|
|
__all__ = ["Filter"]
|
|
|
|
|
|
allowed_elements = frozenset((
|
|
(namespaces['html'], 'a'),
|
|
(namespaces['html'], 'abbr'),
|
|
(namespaces['html'], 'acronym'),
|
|
(namespaces['html'], 'address'),
|
|
(namespaces['html'], 'area'),
|
|
(namespaces['html'], 'article'),
|
|
(namespaces['html'], 'aside'),
|
|
(namespaces['html'], 'audio'),
|
|
(namespaces['html'], 'b'),
|
|
(namespaces['html'], 'big'),
|
|
(namespaces['html'], 'blockquote'),
|
|
(namespaces['html'], 'br'),
|
|
(namespaces['html'], 'button'),
|
|
(namespaces['html'], 'canvas'),
|
|
(namespaces['html'], 'caption'),
|
|
(namespaces['html'], 'center'),
|
|
(namespaces['html'], 'cite'),
|
|
(namespaces['html'], 'code'),
|
|
(namespaces['html'], 'col'),
|
|
(namespaces['html'], 'colgroup'),
|
|
(namespaces['html'], 'command'),
|
|
(namespaces['html'], 'datagrid'),
|
|
(namespaces['html'], 'datalist'),
|
|
(namespaces['html'], 'dd'),
|
|
(namespaces['html'], 'del'),
|
|
(namespaces['html'], 'details'),
|
|
(namespaces['html'], 'dfn'),
|
|
(namespaces['html'], 'dialog'),
|
|
(namespaces['html'], 'dir'),
|
|
(namespaces['html'], 'div'),
|
|
(namespaces['html'], 'dl'),
|
|
(namespaces['html'], 'dt'),
|
|
(namespaces['html'], 'em'),
|
|
(namespaces['html'], 'event-source'),
|
|
(namespaces['html'], 'fieldset'),
|
|
(namespaces['html'], 'figcaption'),
|
|
(namespaces['html'], 'figure'),
|
|
(namespaces['html'], 'footer'),
|
|
(namespaces['html'], 'font'),
|
|
(namespaces['html'], 'form'),
|
|
(namespaces['html'], 'header'),
|
|
(namespaces['html'], 'h1'),
|
|
(namespaces['html'], 'h2'),
|
|
(namespaces['html'], 'h3'),
|
|
(namespaces['html'], 'h4'),
|
|
(namespaces['html'], 'h5'),
|
|
(namespaces['html'], 'h6'),
|
|
(namespaces['html'], 'hr'),
|
|
(namespaces['html'], 'i'),
|
|
(namespaces['html'], 'img'),
|
|
(namespaces['html'], 'input'),
|
|
(namespaces['html'], 'ins'),
|
|
(namespaces['html'], 'keygen'),
|
|
(namespaces['html'], 'kbd'),
|
|
(namespaces['html'], 'label'),
|
|
(namespaces['html'], 'legend'),
|
|
(namespaces['html'], 'li'),
|
|
(namespaces['html'], 'm'),
|
|
(namespaces['html'], 'map'),
|
|
(namespaces['html'], 'menu'),
|
|
(namespaces['html'], 'meter'),
|
|
(namespaces['html'], 'multicol'),
|
|
(namespaces['html'], 'nav'),
|
|
(namespaces['html'], 'nextid'),
|
|
(namespaces['html'], 'ol'),
|
|
(namespaces['html'], 'output'),
|
|
(namespaces['html'], 'optgroup'),
|
|
(namespaces['html'], 'option'),
|
|
(namespaces['html'], 'p'),
|
|
(namespaces['html'], 'pre'),
|
|
(namespaces['html'], 'progress'),
|
|
(namespaces['html'], 'q'),
|
|
(namespaces['html'], 's'),
|
|
(namespaces['html'], 'samp'),
|
|
(namespaces['html'], 'section'),
|
|
(namespaces['html'], 'select'),
|
|
(namespaces['html'], 'small'),
|
|
(namespaces['html'], 'sound'),
|
|
(namespaces['html'], 'source'),
|
|
(namespaces['html'], 'spacer'),
|
|
(namespaces['html'], 'span'),
|
|
(namespaces['html'], 'strike'),
|
|
(namespaces['html'], 'strong'),
|
|
(namespaces['html'], 'sub'),
|
|
(namespaces['html'], 'sup'),
|
|
(namespaces['html'], 'table'),
|
|
(namespaces['html'], 'tbody'),
|
|
(namespaces['html'], 'td'),
|
|
(namespaces['html'], 'textarea'),
|
|
(namespaces['html'], 'time'),
|
|
(namespaces['html'], 'tfoot'),
|
|
(namespaces['html'], 'th'),
|
|
(namespaces['html'], 'thead'),
|
|
(namespaces['html'], 'tr'),
|
|
(namespaces['html'], 'tt'),
|
|
(namespaces['html'], 'u'),
|
|
(namespaces['html'], 'ul'),
|
|
(namespaces['html'], 'var'),
|
|
(namespaces['html'], 'video'),
|
|
(namespaces['mathml'], 'maction'),
|
|
(namespaces['mathml'], 'math'),
|
|
(namespaces['mathml'], 'merror'),
|
|
(namespaces['mathml'], 'mfrac'),
|
|
(namespaces['mathml'], 'mi'),
|
|
(namespaces['mathml'], 'mmultiscripts'),
|
|
(namespaces['mathml'], 'mn'),
|
|
(namespaces['mathml'], 'mo'),
|
|
(namespaces['mathml'], 'mover'),
|
|
(namespaces['mathml'], 'mpadded'),
|
|
(namespaces['mathml'], 'mphantom'),
|
|
(namespaces['mathml'], 'mprescripts'),
|
|
(namespaces['mathml'], 'mroot'),
|
|
(namespaces['mathml'], 'mrow'),
|
|
(namespaces['mathml'], 'mspace'),
|
|
(namespaces['mathml'], 'msqrt'),
|
|
(namespaces['mathml'], 'mstyle'),
|
|
(namespaces['mathml'], 'msub'),
|
|
(namespaces['mathml'], 'msubsup'),
|
|
(namespaces['mathml'], 'msup'),
|
|
(namespaces['mathml'], 'mtable'),
|
|
(namespaces['mathml'], 'mtd'),
|
|
(namespaces['mathml'], 'mtext'),
|
|
(namespaces['mathml'], 'mtr'),
|
|
(namespaces['mathml'], 'munder'),
|
|
(namespaces['mathml'], 'munderover'),
|
|
(namespaces['mathml'], 'none'),
|
|
(namespaces['svg'], 'a'),
|
|
(namespaces['svg'], 'animate'),
|
|
(namespaces['svg'], 'animateColor'),
|
|
(namespaces['svg'], 'animateMotion'),
|
|
(namespaces['svg'], 'animateTransform'),
|
|
(namespaces['svg'], 'clipPath'),
|
|
(namespaces['svg'], 'circle'),
|
|
(namespaces['svg'], 'defs'),
|
|
(namespaces['svg'], 'desc'),
|
|
(namespaces['svg'], 'ellipse'),
|
|
(namespaces['svg'], 'font-face'),
|
|
(namespaces['svg'], 'font-face-name'),
|
|
(namespaces['svg'], 'font-face-src'),
|
|
(namespaces['svg'], 'g'),
|
|
(namespaces['svg'], 'glyph'),
|
|
(namespaces['svg'], 'hkern'),
|
|
(namespaces['svg'], 'linearGradient'),
|
|
(namespaces['svg'], 'line'),
|
|
(namespaces['svg'], 'marker'),
|
|
(namespaces['svg'], 'metadata'),
|
|
(namespaces['svg'], 'missing-glyph'),
|
|
(namespaces['svg'], 'mpath'),
|
|
(namespaces['svg'], 'path'),
|
|
(namespaces['svg'], 'polygon'),
|
|
(namespaces['svg'], 'polyline'),
|
|
(namespaces['svg'], 'radialGradient'),
|
|
(namespaces['svg'], 'rect'),
|
|
(namespaces['svg'], 'set'),
|
|
(namespaces['svg'], 'stop'),
|
|
(namespaces['svg'], 'svg'),
|
|
(namespaces['svg'], 'switch'),
|
|
(namespaces['svg'], 'text'),
|
|
(namespaces['svg'], 'title'),
|
|
(namespaces['svg'], 'tspan'),
|
|
(namespaces['svg'], 'use'),
|
|
))
|
|
|
|
allowed_attributes = frozenset((
|
|
# HTML attributes
|
|
(None, 'abbr'),
|
|
(None, 'accept'),
|
|
(None, 'accept-charset'),
|
|
(None, 'accesskey'),
|
|
(None, 'action'),
|
|
(None, 'align'),
|
|
(None, 'alt'),
|
|
(None, 'autocomplete'),
|
|
(None, 'autofocus'),
|
|
(None, 'axis'),
|
|
(None, 'background'),
|
|
(None, 'balance'),
|
|
(None, 'bgcolor'),
|
|
(None, 'bgproperties'),
|
|
(None, 'border'),
|
|
(None, 'bordercolor'),
|
|
(None, 'bordercolordark'),
|
|
(None, 'bordercolorlight'),
|
|
(None, 'bottompadding'),
|
|
(None, 'cellpadding'),
|
|
(None, 'cellspacing'),
|
|
(None, 'ch'),
|
|
(None, 'challenge'),
|
|
(None, 'char'),
|
|
(None, 'charoff'),
|
|
(None, 'choff'),
|
|
(None, 'charset'),
|
|
(None, 'checked'),
|
|
(None, 'cite'),
|
|
(None, 'class'),
|
|
(None, 'clear'),
|
|
(None, 'color'),
|
|
(None, 'cols'),
|
|
(None, 'colspan'),
|
|
(None, 'compact'),
|
|
(None, 'contenteditable'),
|
|
(None, 'controls'),
|
|
(None, 'coords'),
|
|
(None, 'data'),
|
|
(None, 'datafld'),
|
|
(None, 'datapagesize'),
|
|
(None, 'datasrc'),
|
|
(None, 'datetime'),
|
|
(None, 'default'),
|
|
(None, 'delay'),
|
|
(None, 'dir'),
|
|
(None, 'disabled'),
|
|
(None, 'draggable'),
|
|
(None, 'dynsrc'),
|
|
(None, 'enctype'),
|
|
(None, 'end'),
|
|
(None, 'face'),
|
|
(None, 'for'),
|
|
(None, 'form'),
|
|
(None, 'frame'),
|
|
(None, 'galleryimg'),
|
|
(None, 'gutter'),
|
|
(None, 'headers'),
|
|
(None, 'height'),
|
|
(None, 'hidefocus'),
|
|
(None, 'hidden'),
|
|
(None, 'high'),
|
|
(None, 'href'),
|
|
(None, 'hreflang'),
|
|
(None, 'hspace'),
|
|
(None, 'icon'),
|
|
(None, 'id'),
|
|
(None, 'inputmode'),
|
|
(None, 'ismap'),
|
|
(None, 'keytype'),
|
|
(None, 'label'),
|
|
(None, 'leftspacing'),
|
|
(None, 'lang'),
|
|
(None, 'list'),
|
|
(None, 'longdesc'),
|
|
(None, 'loop'),
|
|
(None, 'loopcount'),
|
|
(None, 'loopend'),
|
|
(None, 'loopstart'),
|
|
(None, 'low'),
|
|
(None, 'lowsrc'),
|
|
(None, 'max'),
|
|
(None, 'maxlength'),
|
|
(None, 'media'),
|
|
(None, 'method'),
|
|
(None, 'min'),
|
|
(None, 'multiple'),
|
|
(None, 'name'),
|
|
(None, 'nohref'),
|
|
(None, 'noshade'),
|
|
(None, 'nowrap'),
|
|
(None, 'open'),
|
|
(None, 'optimum'),
|
|
(None, 'pattern'),
|
|
(None, 'ping'),
|
|
(None, 'point-size'),
|
|
(None, 'poster'),
|
|
(None, 'pqg'),
|
|
(None, 'preload'),
|
|
(None, 'prompt'),
|
|
(None, 'radiogroup'),
|
|
(None, 'readonly'),
|
|
(None, 'rel'),
|
|
(None, 'repeat-max'),
|
|
(None, 'repeat-min'),
|
|
(None, 'replace'),
|
|
(None, 'required'),
|
|
(None, 'rev'),
|
|
(None, 'rightspacing'),
|
|
(None, 'rows'),
|
|
(None, 'rowspan'),
|
|
(None, 'rules'),
|
|
(None, 'scope'),
|
|
(None, 'selected'),
|
|
(None, 'shape'),
|
|
(None, 'size'),
|
|
(None, 'span'),
|
|
(None, 'src'),
|
|
(None, 'start'),
|
|
(None, 'step'),
|
|
(None, 'style'),
|
|
(None, 'summary'),
|
|
(None, 'suppress'),
|
|
(None, 'tabindex'),
|
|
(None, 'target'),
|
|
(None, 'template'),
|
|
(None, 'title'),
|
|
(None, 'toppadding'),
|
|
(None, 'type'),
|
|
(None, 'unselectable'),
|
|
(None, 'usemap'),
|
|
(None, 'urn'),
|
|
(None, 'valign'),
|
|
(None, 'value'),
|
|
(None, 'variable'),
|
|
(None, 'volume'),
|
|
(None, 'vspace'),
|
|
(None, 'vrml'),
|
|
(None, 'width'),
|
|
(None, 'wrap'),
|
|
(namespaces['xml'], 'lang'),
|
|
# MathML attributes
|
|
(None, 'actiontype'),
|
|
(None, 'align'),
|
|
(None, 'columnalign'),
|
|
(None, 'columnalign'),
|
|
(None, 'columnalign'),
|
|
(None, 'columnlines'),
|
|
(None, 'columnspacing'),
|
|
(None, 'columnspan'),
|
|
(None, 'depth'),
|
|
(None, 'display'),
|
|
(None, 'displaystyle'),
|
|
(None, 'equalcolumns'),
|
|
(None, 'equalrows'),
|
|
(None, 'fence'),
|
|
(None, 'fontstyle'),
|
|
(None, 'fontweight'),
|
|
(None, 'frame'),
|
|
(None, 'height'),
|
|
(None, 'linethickness'),
|
|
(None, 'lspace'),
|
|
(None, 'mathbackground'),
|
|
(None, 'mathcolor'),
|
|
(None, 'mathvariant'),
|
|
(None, 'mathvariant'),
|
|
(None, 'maxsize'),
|
|
(None, 'minsize'),
|
|
(None, 'other'),
|
|
(None, 'rowalign'),
|
|
(None, 'rowalign'),
|
|
(None, 'rowalign'),
|
|
(None, 'rowlines'),
|
|
(None, 'rowspacing'),
|
|
(None, 'rowspan'),
|
|
(None, 'rspace'),
|
|
(None, 'scriptlevel'),
|
|
(None, 'selection'),
|
|
(None, 'separator'),
|
|
(None, 'stretchy'),
|
|
(None, 'width'),
|
|
(None, 'width'),
|
|
(namespaces['xlink'], 'href'),
|
|
(namespaces['xlink'], 'show'),
|
|
(namespaces['xlink'], 'type'),
|
|
# SVG attributes
|
|
(None, 'accent-height'),
|
|
(None, 'accumulate'),
|
|
(None, 'additive'),
|
|
(None, 'alphabetic'),
|
|
(None, 'arabic-form'),
|
|
(None, 'ascent'),
|
|
(None, 'attributeName'),
|
|
(None, 'attributeType'),
|
|
(None, 'baseProfile'),
|
|
(None, 'bbox'),
|
|
(None, 'begin'),
|
|
(None, 'by'),
|
|
(None, 'calcMode'),
|
|
(None, 'cap-height'),
|
|
(None, 'class'),
|
|
(None, 'clip-path'),
|
|
(None, 'color'),
|
|
(None, 'color-rendering'),
|
|
(None, 'content'),
|
|
(None, 'cx'),
|
|
(None, 'cy'),
|
|
(None, 'd'),
|
|
(None, 'dx'),
|
|
(None, 'dy'),
|
|
(None, 'descent'),
|
|
(None, 'display'),
|
|
(None, 'dur'),
|
|
(None, 'end'),
|
|
(None, 'fill'),
|
|
(None, 'fill-opacity'),
|
|
(None, 'fill-rule'),
|
|
(None, 'font-family'),
|
|
(None, 'font-size'),
|
|
(None, 'font-stretch'),
|
|
(None, 'font-style'),
|
|
(None, 'font-variant'),
|
|
(None, 'font-weight'),
|
|
(None, 'from'),
|
|
(None, 'fx'),
|
|
(None, 'fy'),
|
|
(None, 'g1'),
|
|
(None, 'g2'),
|
|
(None, 'glyph-name'),
|
|
(None, 'gradientUnits'),
|
|
(None, 'hanging'),
|
|
(None, 'height'),
|
|
(None, 'horiz-adv-x'),
|
|
(None, 'horiz-origin-x'),
|
|
(None, 'id'),
|
|
(None, 'ideographic'),
|
|
(None, 'k'),
|
|
(None, 'keyPoints'),
|
|
(None, 'keySplines'),
|
|
(None, 'keyTimes'),
|
|
(None, 'lang'),
|
|
(None, 'marker-end'),
|
|
(None, 'marker-mid'),
|
|
(None, 'marker-start'),
|
|
(None, 'markerHeight'),
|
|
(None, 'markerUnits'),
|
|
(None, 'markerWidth'),
|
|
(None, 'mathematical'),
|
|
(None, 'max'),
|
|
(None, 'min'),
|
|
(None, 'name'),
|
|
(None, 'offset'),
|
|
(None, 'opacity'),
|
|
(None, 'orient'),
|
|
(None, 'origin'),
|
|
(None, 'overline-position'),
|
|
(None, 'overline-thickness'),
|
|
(None, 'panose-1'),
|
|
(None, 'path'),
|
|
(None, 'pathLength'),
|
|
(None, 'points'),
|
|
(None, 'preserveAspectRatio'),
|
|
(None, 'r'),
|
|
(None, 'refX'),
|
|
(None, 'refY'),
|
|
(None, 'repeatCount'),
|
|
(None, 'repeatDur'),
|
|
(None, 'requiredExtensions'),
|
|
(None, 'requiredFeatures'),
|
|
(None, 'restart'),
|
|
(None, 'rotate'),
|
|
(None, 'rx'),
|
|
(None, 'ry'),
|
|
(None, 'slope'),
|
|
(None, 'stemh'),
|
|
(None, 'stemv'),
|
|
(None, 'stop-color'),
|
|
(None, 'stop-opacity'),
|
|
(None, 'strikethrough-position'),
|
|
(None, 'strikethrough-thickness'),
|
|
(None, 'stroke'),
|
|
(None, 'stroke-dasharray'),
|
|
(None, 'stroke-dashoffset'),
|
|
(None, 'stroke-linecap'),
|
|
(None, 'stroke-linejoin'),
|
|
(None, 'stroke-miterlimit'),
|
|
(None, 'stroke-opacity'),
|
|
(None, 'stroke-width'),
|
|
(None, 'systemLanguage'),
|
|
(None, 'target'),
|
|
(None, 'text-anchor'),
|
|
(None, 'to'),
|
|
(None, 'transform'),
|
|
(None, 'type'),
|
|
(None, 'u1'),
|
|
(None, 'u2'),
|
|
(None, 'underline-position'),
|
|
(None, 'underline-thickness'),
|
|
(None, 'unicode'),
|
|
(None, 'unicode-range'),
|
|
(None, 'units-per-em'),
|
|
(None, 'values'),
|
|
(None, 'version'),
|
|
(None, 'viewBox'),
|
|
(None, 'visibility'),
|
|
(None, 'width'),
|
|
(None, 'widths'),
|
|
(None, 'x'),
|
|
(None, 'x-height'),
|
|
(None, 'x1'),
|
|
(None, 'x2'),
|
|
(namespaces['xlink'], 'actuate'),
|
|
(namespaces['xlink'], 'arcrole'),
|
|
(namespaces['xlink'], 'href'),
|
|
(namespaces['xlink'], 'role'),
|
|
(namespaces['xlink'], 'show'),
|
|
(namespaces['xlink'], 'title'),
|
|
(namespaces['xlink'], 'type'),
|
|
(namespaces['xml'], 'base'),
|
|
(namespaces['xml'], 'lang'),
|
|
(namespaces['xml'], 'space'),
|
|
(None, 'y'),
|
|
(None, 'y1'),
|
|
(None, 'y2'),
|
|
(None, 'zoomAndPan'),
|
|
))
|
|
|
|
attr_val_is_uri = frozenset((
|
|
(None, 'href'),
|
|
(None, 'src'),
|
|
(None, 'cite'),
|
|
(None, 'action'),
|
|
(None, 'longdesc'),
|
|
(None, 'poster'),
|
|
(None, 'background'),
|
|
(None, 'datasrc'),
|
|
(None, 'dynsrc'),
|
|
(None, 'lowsrc'),
|
|
(None, 'ping'),
|
|
(namespaces['xlink'], 'href'),
|
|
(namespaces['xml'], 'base'),
|
|
))
|
|
|
|
svg_attr_val_allows_ref = frozenset((
|
|
(None, 'clip-path'),
|
|
(None, 'color-profile'),
|
|
(None, 'cursor'),
|
|
(None, 'fill'),
|
|
(None, 'filter'),
|
|
(None, 'marker'),
|
|
(None, 'marker-start'),
|
|
(None, 'marker-mid'),
|
|
(None, 'marker-end'),
|
|
(None, 'mask'),
|
|
(None, 'stroke'),
|
|
))
|
|
|
|
svg_allow_local_href = frozenset((
|
|
(None, 'altGlyph'),
|
|
(None, 'animate'),
|
|
(None, 'animateColor'),
|
|
(None, 'animateMotion'),
|
|
(None, 'animateTransform'),
|
|
(None, 'cursor'),
|
|
(None, 'feImage'),
|
|
(None, 'filter'),
|
|
(None, 'linearGradient'),
|
|
(None, 'pattern'),
|
|
(None, 'radialGradient'),
|
|
(None, 'textpath'),
|
|
(None, 'tref'),
|
|
(None, 'set'),
|
|
(None, 'use')
|
|
))
|
|
|
|
allowed_css_properties = frozenset((
|
|
'azimuth',
|
|
'background-color',
|
|
'border-bottom-color',
|
|
'border-collapse',
|
|
'border-color',
|
|
'border-left-color',
|
|
'border-right-color',
|
|
'border-top-color',
|
|
'clear',
|
|
'color',
|
|
'cursor',
|
|
'direction',
|
|
'display',
|
|
'elevation',
|
|
'float',
|
|
'font',
|
|
'font-family',
|
|
'font-size',
|
|
'font-style',
|
|
'font-variant',
|
|
'font-weight',
|
|
'height',
|
|
'letter-spacing',
|
|
'line-height',
|
|
'overflow',
|
|
'pause',
|
|
'pause-after',
|
|
'pause-before',
|
|
'pitch',
|
|
'pitch-range',
|
|
'richness',
|
|
'speak',
|
|
'speak-header',
|
|
'speak-numeral',
|
|
'speak-punctuation',
|
|
'speech-rate',
|
|
'stress',
|
|
'text-align',
|
|
'text-decoration',
|
|
'text-indent',
|
|
'unicode-bidi',
|
|
'vertical-align',
|
|
'voice-family',
|
|
'volume',
|
|
'white-space',
|
|
'width',
|
|
))
|
|
|
|
allowed_css_keywords = frozenset((
|
|
'auto',
|
|
'aqua',
|
|
'black',
|
|
'block',
|
|
'blue',
|
|
'bold',
|
|
'both',
|
|
'bottom',
|
|
'brown',
|
|
'center',
|
|
'collapse',
|
|
'dashed',
|
|
'dotted',
|
|
'fuchsia',
|
|
'gray',
|
|
'green',
|
|
'!important',
|
|
'italic',
|
|
'left',
|
|
'lime',
|
|
'maroon',
|
|
'medium',
|
|
'none',
|
|
'navy',
|
|
'normal',
|
|
'nowrap',
|
|
'olive',
|
|
'pointer',
|
|
'purple',
|
|
'red',
|
|
'right',
|
|
'solid',
|
|
'silver',
|
|
'teal',
|
|
'top',
|
|
'transparent',
|
|
'underline',
|
|
'white',
|
|
'yellow',
|
|
))
|
|
|
|
allowed_svg_properties = frozenset((
|
|
'fill',
|
|
'fill-opacity',
|
|
'fill-rule',
|
|
'stroke',
|
|
'stroke-width',
|
|
'stroke-linecap',
|
|
'stroke-linejoin',
|
|
'stroke-opacity',
|
|
))
|
|
|
|
allowed_protocols = frozenset((
|
|
'ed2k',
|
|
'ftp',
|
|
'http',
|
|
'https',
|
|
'irc',
|
|
'mailto',
|
|
'news',
|
|
'gopher',
|
|
'nntp',
|
|
'telnet',
|
|
'webcal',
|
|
'xmpp',
|
|
'callto',
|
|
'feed',
|
|
'urn',
|
|
'aim',
|
|
'rsync',
|
|
'tag',
|
|
'ssh',
|
|
'sftp',
|
|
'rtsp',
|
|
'afs',
|
|
'data',
|
|
))
|
|
|
|
allowed_content_types = frozenset((
|
|
'image/png',
|
|
'image/jpeg',
|
|
'image/gif',
|
|
'image/webp',
|
|
'image/bmp',
|
|
'text/plain',
|
|
))
|
|
|
|
|
|
data_content_type = re.compile(r'''
|
|
^
|
|
# Match a content type <application>/<type>
|
|
(?P<content_type>[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+)
|
|
# Match any character set and encoding
|
|
(?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?)
|
|
|(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?)
|
|
# Assume the rest is data
|
|
,.*
|
|
$
|
|
''',
|
|
re.VERBOSE)
|
|
|
|
|
|
class Filter(base.Filter):
|
|
"""Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
|
|
def __init__(self,
|
|
source,
|
|
allowed_elements=allowed_elements,
|
|
allowed_attributes=allowed_attributes,
|
|
allowed_css_properties=allowed_css_properties,
|
|
allowed_css_keywords=allowed_css_keywords,
|
|
allowed_svg_properties=allowed_svg_properties,
|
|
allowed_protocols=allowed_protocols,
|
|
allowed_content_types=allowed_content_types,
|
|
attr_val_is_uri=attr_val_is_uri,
|
|
svg_attr_val_allows_ref=svg_attr_val_allows_ref,
|
|
svg_allow_local_href=svg_allow_local_href):
|
|
"""Creates a Filter
|
|
|
|
:arg allowed_elements: set of elements to allow--everything else will
|
|
be escaped
|
|
|
|
:arg allowed_attributes: set of attributes to allow in
|
|
elements--everything else will be stripped
|
|
|
|
:arg allowed_css_properties: set of CSS properties to allow--everything
|
|
else will be stripped
|
|
|
|
:arg allowed_css_keywords: set of CSS keywords to allow--everything
|
|
else will be stripped
|
|
|
|
:arg allowed_svg_properties: set of SVG properties to allow--everything
|
|
else will be removed
|
|
|
|
:arg allowed_protocols: set of allowed protocols for URIs
|
|
|
|
:arg allowed_content_types: set of allowed content types for ``data`` URIs.
|
|
|
|
:arg attr_val_is_uri: set of attributes that have URI values--values
|
|
that have a scheme not listed in ``allowed_protocols`` are removed
|
|
|
|
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
|
|
references
|
|
|
|
:arg svg_allow_local_href: set of SVG elements that can have local
|
|
hrefs--these are removed
|
|
|
|
"""
|
|
super(Filter, self).__init__(source)
|
|
self.allowed_elements = allowed_elements
|
|
self.allowed_attributes = allowed_attributes
|
|
self.allowed_css_properties = allowed_css_properties
|
|
self.allowed_css_keywords = allowed_css_keywords
|
|
self.allowed_svg_properties = allowed_svg_properties
|
|
self.allowed_protocols = allowed_protocols
|
|
self.allowed_content_types = allowed_content_types
|
|
self.attr_val_is_uri = attr_val_is_uri
|
|
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
|
|
self.svg_allow_local_href = svg_allow_local_href
|
|
|
|
def __iter__(self):
|
|
for token in base.Filter.__iter__(self):
|
|
token = self.sanitize_token(token)
|
|
if token:
|
|
yield token
|
|
|
|
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
|
|
# stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
|
|
# are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
|
|
# ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
|
|
# are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
|
|
# allowed.
|
|
#
|
|
# sanitize_html('<script> do_nasty_stuff() </script>')
|
|
# => <script> do_nasty_stuff() </script>
|
|
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
|
|
# => <a>Click here for $100</a>
|
|
def sanitize_token(self, token):
|
|
|
|
# accommodate filters which use token_type differently
|
|
token_type = token["type"]
|
|
if token_type in ("StartTag", "EndTag", "EmptyTag"):
|
|
name = token["name"]
|
|
namespace = token["namespace"]
|
|
if ((namespace, name) in self.allowed_elements or
|
|
(namespace is None and
|
|
(namespaces["html"], name) in self.allowed_elements)):
|
|
return self.allowed_token(token)
|
|
else:
|
|
return self.disallowed_token(token)
|
|
elif token_type == "Comment":
|
|
pass
|
|
else:
|
|
return token
|
|
|
|
def allowed_token(self, token):
|
|
if "data" in token:
|
|
attrs = token["data"]
|
|
attr_names = set(attrs.keys())
|
|
|
|
# Remove forbidden attributes
|
|
for to_remove in (attr_names - self.allowed_attributes):
|
|
del token["data"][to_remove]
|
|
attr_names.remove(to_remove)
|
|
|
|
# Remove attributes with disallowed URL values
|
|
for attr in (attr_names & self.attr_val_is_uri):
|
|
assert attr in attrs
|
|
# I don't have a clue where this regexp comes from or why it matches those
|
|
# characters, nor why we call unescape. I just know it's always been here.
|
|
# Should you be worried by this comment in a sanitizer? Yes. On the other hand, all
|
|
# this will do is remove *more* than it otherwise would.
|
|
val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '',
|
|
unescape(attrs[attr])).lower()
|
|
# remove replacement characters from unescaped characters
|
|
val_unescaped = val_unescaped.replace("\ufffd", "")
|
|
try:
|
|
uri = urlparse.urlparse(val_unescaped)
|
|
except ValueError:
|
|
uri = None
|
|
del attrs[attr]
|
|
if uri and uri.scheme:
|
|
if uri.scheme not in self.allowed_protocols:
|
|
del attrs[attr]
|
|
if uri.scheme == 'data':
|
|
m = data_content_type.match(uri.path)
|
|
if not m:
|
|
del attrs[attr]
|
|
elif m.group('content_type') not in self.allowed_content_types:
|
|
del attrs[attr]
|
|
|
|
for attr in self.svg_attr_val_allows_ref:
|
|
if attr in attrs:
|
|
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
|
|
' ',
|
|
unescape(attrs[attr]))
|
|
if (token["name"] in self.svg_allow_local_href and
|
|
(namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*',
|
|
attrs[(namespaces['xlink'], 'href')])):
|
|
del attrs[(namespaces['xlink'], 'href')]
|
|
if (None, 'style') in attrs:
|
|
attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')])
|
|
token["data"] = attrs
|
|
return token
|
|
|
|
def disallowed_token(self, token):
|
|
token_type = token["type"]
|
|
if token_type == "EndTag":
|
|
token["data"] = "</%s>" % token["name"]
|
|
elif token["data"]:
|
|
assert token_type in ("StartTag", "EmptyTag")
|
|
attrs = []
|
|
for (ns, name), v in token["data"].items():
|
|
attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
|
|
token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
|
|
else:
|
|
token["data"] = "<%s>" % token["name"]
|
|
if token.get("selfClosing"):
|
|
token["data"] = token["data"][:-1] + "/>"
|
|
|
|
token["type"] = "Characters"
|
|
|
|
del token["name"]
|
|
return token
|
|
|
|
def sanitize_css(self, style):
|
|
# disallow urls
|
|
style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style)
|
|
|
|
# gauntlet
|
|
if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style):
|
|
return ''
|
|
if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style):
|
|
return ''
|
|
|
|
clean = []
|
|
for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style):
|
|
if not value:
|
|
continue
|
|
if prop.lower() in self.allowed_css_properties:
|
|
clean.append(prop + ': ' + value + ';')
|
|
elif prop.split('-')[0].lower() in ['background', 'border', 'margin',
|
|
'padding']:
|
|
for keyword in value.split():
|
|
if keyword not in self.allowed_css_keywords and \
|
|
not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa
|
|
break
|
|
else:
|
|
clean.append(prop + ': ' + value + ';')
|
|
elif prop.lower() in self.allowed_svg_properties:
|
|
clean.append(prop + ': ' + value + ';')
|
|
|
|
return ' '.join(clean)
|