You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

177 lines
5.5 KiB

4 years ago
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. import re
  4. import sys
  5. """
  6. Python 3 Stuff
  7. =============================================================================
  8. """
  9. PY3 = sys.version_info[0] == 3
  10. if PY3: # pragma: no cover
  11. string_type = str
  12. text_type = str
  13. int2str = chr
  14. else: # pragma: no cover
  15. string_type = basestring # noqa
  16. text_type = unicode # noqa
  17. int2str = unichr # noqa
  18. """
  19. Constants you might want to modify
  20. -----------------------------------------------------------------------------
  21. """
  22. BLOCK_LEVEL_ELEMENTS = re.compile(
  23. r"^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
  24. r"|script|noscript|form|fieldset|iframe|math"
  25. r"|hr|hr/|style|li|dt|dd|thead|tbody"
  26. r"|tr|th|td|section|footer|header|group|figure"
  27. r"|figcaption|aside|article|canvas|output"
  28. r"|progress|video|nav|main)$",
  29. re.IGNORECASE
  30. )
  31. # Placeholders
  32. STX = '\u0002' # Use STX ("Start of text") for start-of-placeholder
  33. ETX = '\u0003' # Use ETX ("End of text") for end-of-placeholder
  34. INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
  35. INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
  36. INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')
  37. AMP_SUBSTITUTE = STX+"amp"+ETX
  38. HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
  39. HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
  40. TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX
  41. """
  42. Constants you probably do not need to change
  43. -----------------------------------------------------------------------------
  44. """
  45. RTL_BIDI_RANGES = (
  46. ('\u0590', '\u07FF'),
  47. # Hebrew (0590-05FF), Arabic (0600-06FF),
  48. # Syriac (0700-074F), Arabic supplement (0750-077F),
  49. # Thaana (0780-07BF), Nko (07C0-07FF).
  50. ('\u2D30', '\u2D7F') # Tifinagh
  51. )
  52. # Extensions should use "markdown.util.etree" instead of "etree" (or do `from
  53. # markdown.util import etree`). Do not import it by yourself.
  54. try: # pragma: no cover
  55. # Is the C implementation of ElementTree available?
  56. import xml.etree.cElementTree as etree
  57. from xml.etree.ElementTree import Comment
  58. # Serializers (including ours) test with non-c Comment
  59. etree.test_comment = Comment
  60. if etree.VERSION < "1.0.5":
  61. raise RuntimeError("cElementTree version 1.0.5 or higher is required.")
  62. except (ImportError, RuntimeError): # pragma: no cover
  63. # Use the Python implementation of ElementTree?
  64. import xml.etree.ElementTree as etree
  65. if etree.VERSION < "1.1":
  66. raise RuntimeError("ElementTree version 1.1 or higher is required")
  67. """
  68. AUXILIARY GLOBAL FUNCTIONS
  69. =============================================================================
  70. """
  71. def isBlockLevel(tag):
  72. """Check if the tag is a block level HTML tag."""
  73. if isinstance(tag, string_type):
  74. return BLOCK_LEVEL_ELEMENTS.match(tag)
  75. # Some ElementTree tags are not strings, so return False.
  76. return False
  77. def parseBoolValue(value, fail_on_errors=True, preserve_none=False):
  78. """Parses a string representing bool value. If parsing was successful,
  79. returns True or False. If preserve_none=True, returns True, False,
  80. or None. If parsing was not successful, raises ValueError, or, if
  81. fail_on_errors=False, returns None."""
  82. if not isinstance(value, string_type):
  83. if preserve_none and value is None:
  84. return value
  85. return bool(value)
  86. elif preserve_none and value.lower() == 'none':
  87. return None
  88. elif value.lower() in ('true', 'yes', 'y', 'on', '1'):
  89. return True
  90. elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):
  91. return False
  92. elif fail_on_errors:
  93. raise ValueError('Cannot parse bool value: %r' % value)
  94. """
  95. MISC AUXILIARY CLASSES
  96. =============================================================================
  97. """
  98. class AtomicString(text_type):
  99. """A string which should not be further processed."""
  100. pass
  101. class Processor(object):
  102. def __init__(self, markdown_instance=None):
  103. if markdown_instance:
  104. self.markdown = markdown_instance
  105. class HtmlStash(object):
  106. """
  107. This class is used for stashing HTML objects that we extract
  108. in the beginning and replace with place-holders.
  109. """
  110. def __init__(self):
  111. """ Create a HtmlStash. """
  112. self.html_counter = 0 # for counting inline html segments
  113. self.rawHtmlBlocks = []
  114. self.tag_counter = 0
  115. self.tag_data = [] # list of dictionaries in the order tags appear
  116. def store(self, html, safe=False):
  117. """
  118. Saves an HTML segment for later reinsertion. Returns a
  119. placeholder string that needs to be inserted into the
  120. document.
  121. Keyword arguments:
  122. * html: an html segment
  123. * safe: label an html segment as safe for safemode
  124. Returns : a placeholder string
  125. """
  126. self.rawHtmlBlocks.append((html, safe))
  127. placeholder = self.get_placeholder(self.html_counter)
  128. self.html_counter += 1
  129. return placeholder
  130. def reset(self):
  131. self.html_counter = 0
  132. self.rawHtmlBlocks = []
  133. def get_placeholder(self, key):
  134. return HTML_PLACEHOLDER % key
  135. def store_tag(self, tag, attrs, left_index, right_index):
  136. """Store tag data and return a placeholder."""
  137. self.tag_data.append({'tag': tag, 'attrs': attrs,
  138. 'left_index': left_index,
  139. 'right_index': right_index})
  140. placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
  141. self.tag_counter += 1 # equal to the tag's index in self.tag_data
  142. return placeholder