# $Id: __init__.py 8035 2017-02-13 22:01:47Z milde $ # Author: David Goodger # Maintainer: docutils-develop@lists.sourceforge.net # Copyright: This module has been placed in the public domain. """ Simple HyperText Markup Language document tree Writer. The output conforms to the XHTML version 1.0 Transitional DTD (*almost* strict). The output contains a minimum of formatting information. The cascading style sheet "html4css1.css" is required for proper viewing with a modern graphical browser. """ __docformat__ = 'reStructuredText' import os.path import docutils from docutils import frontend, nodes, writers, io from docutils.transforms import writer_aux from docutils.writers import _html_base class Writer(writers._html_base.Writer): supported = ('html', 'html4', 'html4css1', 'xhtml', 'xhtml10') """Formats this writer supports.""" default_stylesheets = ['html4css1.css'] default_stylesheet_dirs = ['.', os.path.abspath(os.path.dirname(__file__)), # for math.css os.path.abspath(os.path.join( os.path.dirname(os.path.dirname(__file__)), 'html5_polyglot')) ] default_template = 'template.txt' default_template_path = os.path.join( os.path.dirname(os.path.abspath(__file__)), default_template) settings_spec = ( 'HTML-Specific Options', None, (('Specify the template file (UTF-8 encoded). Default is "%s".' % default_template_path, ['--template'], {'default': default_template_path, 'metavar': ''}), ('Comma separated list of stylesheet URLs. ' 'Overrides previous --stylesheet and --stylesheet-path settings.', ['--stylesheet'], {'metavar': '', 'overrides': 'stylesheet_path', 'validator': frontend.validate_comma_separated_list}), ('Comma separated list of stylesheet paths. ' 'Relative paths are expanded if a matching file is found in ' 'the --stylesheet-dirs. With --link-stylesheet, ' 'the path is rewritten relative to the output HTML file. ' 'Default: "%s"' % ','.join(default_stylesheets), ['--stylesheet-path'], {'metavar': '', 'overrides': 'stylesheet', 'validator': frontend.validate_comma_separated_list, 'default': default_stylesheets}), ('Embed the stylesheet(s) in the output HTML file. The stylesheet ' 'files must be accessible during processing. This is the default.', ['--embed-stylesheet'], {'default': 1, 'action': 'store_true', 'validator': frontend.validate_boolean}), ('Link to the stylesheet(s) in the output HTML file. ' 'Default: embed stylesheets.', ['--link-stylesheet'], {'dest': 'embed_stylesheet', 'action': 'store_false'}), ('Comma-separated list of directories where stylesheets are found. ' 'Used by --stylesheet-path when expanding relative path arguments. ' 'Default: "%s"' % default_stylesheet_dirs, ['--stylesheet-dirs'], {'metavar': '', 'validator': frontend.validate_comma_separated_list, 'default': default_stylesheet_dirs}), ('Specify the initial header level. Default is 1 for "

". ' 'Does not affect document title & subtitle (see --no-doc-title).', ['--initial-header-level'], {'choices': '1 2 3 4 5 6'.split(), 'default': '1', 'metavar': ''}), ('Specify the maximum width (in characters) for one-column field ' 'names. Longer field names will span an entire row of the table ' 'used to render the field list. Default is 14 characters. ' 'Use 0 for "no limit".', ['--field-name-limit'], {'default': 14, 'metavar': '', 'validator': frontend.validate_nonnegative_int}), ('Specify the maximum width (in characters) for options in option ' 'lists. Longer options will span an entire row of the table used ' 'to render the option list. Default is 14 characters. ' 'Use 0 for "no limit".', ['--option-limit'], {'default': 14, 'metavar': '', 'validator': frontend.validate_nonnegative_int}), ('Format for footnote references: one of "superscript" or ' '"brackets". Default is "brackets".', ['--footnote-references'], {'choices': ['superscript', 'brackets'], 'default': 'brackets', 'metavar': '', 'overrides': 'trim_footnote_reference_space'}), ('Format for block quote attributions: one of "dash" (em-dash ' 'prefix), "parentheses"/"parens", or "none". Default is "dash".', ['--attribution'], {'choices': ['dash', 'parentheses', 'parens', 'none'], 'default': 'dash', 'metavar': ''}), ('Remove extra vertical whitespace between items of "simple" bullet ' 'lists and enumerated lists. Default: enabled.', ['--compact-lists'], {'default': 1, 'action': 'store_true', 'validator': frontend.validate_boolean}), ('Disable compact simple bullet and enumerated lists.', ['--no-compact-lists'], {'dest': 'compact_lists', 'action': 'store_false'}), ('Remove extra vertical whitespace between items of simple field ' 'lists. Default: enabled.', ['--compact-field-lists'], {'default': 1, 'action': 'store_true', 'validator': frontend.validate_boolean}), ('Disable compact simple field lists.', ['--no-compact-field-lists'], {'dest': 'compact_field_lists', 'action': 'store_false'}), ('Added to standard table classes. ' 'Defined styles: "borderless". Default: ""', ['--table-style'], {'default': ''}), ('Math output format, one of "MathML", "HTML", "MathJax" ' 'or "LaTeX". Default: "HTML math.css"', ['--math-output'], {'default': 'HTML math.css'}), ('Omit the XML declaration. Use with caution.', ['--no-xml-declaration'], {'dest': 'xml_declaration', 'default': 1, 'action': 'store_false', 'validator': frontend.validate_boolean}), ('Obfuscate email addresses to confuse harvesters while still ' 'keeping email links usable with standards-compliant browsers.', ['--cloak-email-addresses'], {'action': 'store_true', 'validator': frontend.validate_boolean}),)) config_section = 'html4css1 writer' def __init__(self): self.parts = {} self.translator_class = HTMLTranslator class HTMLTranslator(writers._html_base.HTMLTranslator): """ The html4css1 writer has been optimized to produce visually compact lists (less vertical whitespace). HTML's mixed content models allow list items to contain "
  • body elements

  • " or "
  • just text
  • " or even "
  • text

    and body elements

    combined
  • ", each with different effects. It would be best to stick with strict body elements in list items, but they affect vertical spacing in older browsers (although they really shouldn't). The html5_polyglot writer solves this using CSS2. Here is an outline of the optimization: - Check for and omit

    tags in "simple" lists: list items contain either a single paragraph, a nested simple list, or a paragraph followed by a nested simple list. This means that this list can be compact: - Item 1. - Item 2. But this list cannot be compact: - Item 1. This second paragraph forces space between list items. - Item 2. - In non-list contexts, omit

    tags on a paragraph if that paragraph is the only child of its parent (footnotes & citations are allowed a label first). - Regardless of the above, in definitions, table cells, field bodies, option descriptions, and list items, mark the first child with 'class="first"' and the last child with 'class="last"'. The stylesheet sets the margins (top & bottom respectively) to 0 for these elements. The ``no_compact_lists`` setting (``--no-compact-lists`` command-line option) disables list whitespace optimization. """ # The following definitions are required for display in browsers limited # to CSS1 or backwards compatible behaviour of the writer: doctype = ( '\n') content_type = ('\n') content_type_mathml = ('\n') # encode also non-breaking space special_characters = dict(_html_base.HTMLTranslator.special_characters) special_characters[0xa0] = ' ' # use character reference for dash (not valid in HTML5) attribution_formats = {'dash': ('—', ''), 'parentheses': ('(', ')'), 'parens': ('(', ')'), 'none': ('', '')} # ersatz for first/last pseudo-classes missing in CSS1 def set_first_last(self, node): self.set_class_on_child(node, 'first', 0) self.set_class_on_child(node, 'last', -1) # add newline after opening tag def visit_address(self, node): self.visit_docinfo_item(node, 'address', meta=False) self.body.append(self.starttag(node, 'pre', CLASS='address')) # ersatz for first/last pseudo-classes def visit_admonition(self, node): node['classes'].insert(0, 'admonition') self.body.append(self.starttag(node, 'div')) self.set_first_last(node) # author, authors: use
    instead of paragraphs def visit_author(self, node): if isinstance(node.parent, nodes.authors): if self.author_in_authors: self.body.append('\n
    ') else: self.visit_docinfo_item(node, 'author') def depart_author(self, node): if isinstance(node.parent, nodes.authors): self.author_in_authors = True else: self.depart_docinfo_item() def visit_authors(self, node): self.visit_docinfo_item(node, 'authors') self.author_in_authors = False # initialize def depart_authors(self, node): self.depart_docinfo_item() # use "width" argument insted of "style: 'width'": def visit_colspec(self, node): self.colspecs.append(node) # "stubs" list is an attribute of the tgroup element: node.parent.stubs.append(node.attributes.get('stub')) # def depart_colspec(self, node): # write out when all colspecs are processed if isinstance(node.next_node(descend=False, siblings=True), nodes.colspec): return if 'colwidths-auto' in node.parent.parent['classes'] or ( 'colwidths-auto' in self.settings.table_style and ('colwidths-given' not in node.parent.parent['classes'])): return total_width = sum(node['colwidth'] for node in self.colspecs) self.body.append(self.starttag(node, 'colgroup')) for node in self.colspecs: colwidth = int(node['colwidth'] * 100.0 / total_width + 0.5) self.body.append(self.emptytag(node, 'col', width='%i%%' % colwidth)) self.body.append('\n') # Compact lists: # exclude definition lists and field lists (non-compact by default) def is_compactable(self, node): return ('compact' in node['classes'] or (self.settings.compact_lists and 'open' not in node['classes'] and (self.compact_simple or self.topic_classes == ['contents'] # TODO: self.in_contents or self.check_simple_list(node)))) # citations: Use table for bibliographic references. def visit_citation(self, node): self.body.append(self.starttag(node, 'table', CLASS='docutils citation', frame="void", rules="none")) self.body.append('\n' '\n' '') self.footnote_backrefs(node) def depart_citation(self, node): self.body.append('\n' '\n\n') # insert classifier-delimiter (not required with CSS2) def visit_classifier(self, node): self.body.append(' : ') self.body.append(self.starttag(node, 'span', '', CLASS='classifier')) # ersatz for first/last pseudo-classes def visit_definition(self, node): self.body.append('\n') self.body.append(self.starttag(node, 'dd', '')) self.set_first_last(node) # don't add "simple" class value def visit_definition_list(self, node): self.body.append(self.starttag(node, 'dl', CLASS='docutils')) # use a table for description lists def visit_description(self, node): self.body.append(self.starttag(node, 'td', '')) self.set_first_last(node) def depart_description(self, node): self.body.append('') # use table for docinfo def visit_docinfo(self, node): self.context.append(len(self.body)) self.body.append(self.starttag(node, 'table', CLASS='docinfo', frame="void", rules="none")) self.body.append('\n' '\n' '\n') self.in_docinfo = True def depart_docinfo(self, node): self.body.append('\n\n') self.in_docinfo = False start = self.context.pop() self.docinfo = self.body[start:] self.body = [] def visit_docinfo_item(self, node, name, meta=True): if meta: meta_tag = '\n' \ % (name, self.attval(node.astext())) self.add_meta(meta_tag) self.body.append(self.starttag(node, 'tr', '')) self.body.append('%s:\n' % self.language.labels[name]) if len(node): if isinstance(node[0], nodes.Element): node[0]['classes'].append('first') if isinstance(node[-1], nodes.Element): node[-1]['classes'].append('last') def depart_docinfo_item(self): self.body.append('\n') # add newline after opening tag def visit_doctest_block(self, node): self.body.append(self.starttag(node, 'pre', CLASS='doctest-block')) # insert an NBSP into empty cells, ersatz for first/last def visit_entry(self, node): writers._html_base.HTMLTranslator.visit_entry(self, node) if len(node) == 0: # empty cell self.body.append(' ') self.set_first_last(node) # ersatz for first/last pseudo-classes def visit_enumerated_list(self, node): """ The 'start' attribute does not conform to HTML 4.01's strict.dtd, but cannot be emulated in CSS1 (HTML 5 reincludes it). """ atts = {} if 'start' in node: atts['start'] = node['start'] if 'enumtype' in node: atts['class'] = node['enumtype'] # @@@ To do: prefix, suffix. How? Change prefix/suffix to a # single "format" attribute? Use CSS2? old_compact_simple = self.compact_simple self.context.append((self.compact_simple, self.compact_p)) self.compact_p = None self.compact_simple = self.is_compactable(node) if self.compact_simple and not old_compact_simple: atts['class'] = (atts.get('class', '') + ' simple').strip() self.body.append(self.starttag(node, 'ol', **atts)) def depart_enumerated_list(self, node): self.compact_simple, self.compact_p = self.context.pop() self.body.append('\n') # use table for field-list: def visit_field(self, node): self.body.append(self.starttag(node, 'tr', '', CLASS='field')) def depart_field(self, node): self.body.append('\n') def visit_field_body(self, node): self.body.append(self.starttag(node, 'td', '', CLASS='field-body')) self.set_class_on_child(node, 'first', 0) field = node.parent if (self.compact_field_list or isinstance(field.parent, nodes.docinfo) or field.parent.index(field) == len(field.parent) - 1): # If we are in a compact list, the docinfo, or if this is # the last field of the field list, do not add vertical # space after last element. self.set_class_on_child(node, 'last', -1) def depart_field_body(self, node): self.body.append('\n') def visit_field_list(self, node): self.context.append((self.compact_field_list, self.compact_p)) self.compact_p = None if 'compact' in node['classes']: self.compact_field_list = True elif (self.settings.compact_field_lists and 'open' not in node['classes']): self.compact_field_list = True if self.compact_field_list: for field in node: field_body = field[-1] assert isinstance(field_body, nodes.field_body) children = [n for n in field_body if not isinstance(n, nodes.Invisible)] if not (len(children) == 0 or len(children) == 1 and isinstance(children[0], (nodes.paragraph, nodes.line_block))): self.compact_field_list = False break self.body.append(self.starttag(node, 'table', frame='void', rules='none', CLASS='docutils field-list')) self.body.append('\n' '\n' '\n') def depart_field_list(self, node): self.body.append('\n\n') self.compact_field_list, self.compact_p = self.context.pop() def visit_field_name(self, node): atts = {} if self.in_docinfo: atts['class'] = 'docinfo-name' else: atts['class'] = 'field-name' if ( self.settings.field_name_limit and len(node.astext()) > self.settings.field_name_limit): atts['colspan'] = 2 self.context.append('\n' + self.starttag(node.parent, 'tr', '', CLASS='field') + ' ') else: self.context.append('') self.body.append(self.starttag(node, 'th', '', **atts)) def depart_field_name(self, node): self.body.append(':') self.body.append(self.context.pop()) # use table for footnote text def visit_footnote(self, node): self.body.append(self.starttag(node, 'table', CLASS='docutils footnote', frame="void", rules="none")) self.body.append('\n' '\n' '') self.footnote_backrefs(node) def footnote_backrefs(self, node): backlinks = [] backrefs = node['backrefs'] if self.settings.footnote_backlinks and backrefs: if len(backrefs) == 1: self.context.append('') self.context.append('') self.context.append('' % backrefs[0]) else: # Python 2.4 fails with enumerate(backrefs, 1) for (i, backref) in enumerate(backrefs): backlinks.append('%s' % (backref, i+1)) self.context.append('(%s) ' % ', '.join(backlinks)) self.context += ['', ''] else: self.context.append('') self.context += ['', ''] # If the node does not only consist of a label. if len(node) > 1: # If there are preceding backlinks, we do not set class # 'first', because we need to retain the top-margin. if not backlinks: node[1]['classes'].append('first') node[-1]['classes'].append('last') def depart_footnote(self, node): self.body.append('\n' '\n\n') # insert markers in text as pseudo-classes are not supported in CSS1: def visit_footnote_reference(self, node): href = '#' + node['refid'] format = self.settings.footnote_references if format == 'brackets': suffix = '[' self.context.append(']') else: assert format == 'superscript' suffix = '' self.context.append('') self.body.append(self.starttag(node, 'a', suffix, CLASS='footnote-reference', href=href)) def depart_footnote_reference(self, node): self.body.append(self.context.pop() + '') # just pass on generated text def visit_generated(self, node): pass # Image types to place in an element # SVG not supported by IE up to version 8 # (html4css1 strives for IE6 compatibility) object_image_types = {'.svg': 'image/svg+xml', '.swf': 'application/x-shockwave-flash'} # use table for footnote text, # context added in footnote_backrefs. def visit_label(self, node): self.body.append(self.starttag(node, 'td', '%s[' % self.context.pop(), CLASS='label')) def depart_label(self, node): self.body.append(']%s%s' % (self.context.pop(), self.context.pop())) # ersatz for first/last pseudo-classes def visit_list_item(self, node): self.body.append(self.starttag(node, 'li', '')) if len(node): node[0]['classes'].append('first') # use (not supported by HTML5), # cater for limited styling options in CSS1 using hard-coded NBSPs def visit_literal(self, node): # special case: "code" role classes = node.get('classes', []) if 'code' in classes: # filter 'code' from class arguments node['classes'] = [cls for cls in classes if cls != 'code'] self.body.append(self.starttag(node, 'code', '')) return self.body.append( self.starttag(node, 'tt', '', CLASS='docutils literal')) text = node.astext() for token in self.words_and_spaces.findall(text): if token.strip(): # Protect text like "--an-option" and the regular expression # ``[+]?(\d+(\.\d*)?|\.\d+)`` from bad line wrapping if self.in_word_wrap_point.search(token): self.body.append('%s' % self.encode(token)) else: self.body.append(self.encode(token)) elif token in ('\n', ' '): # Allow breaks at whitespace: self.body.append(token) else: # Protect runs of multiple spaces; the last space can wrap: self.body.append(' ' * (len(token) - 1) + ' ') self.body.append('') # Content already processed: raise nodes.SkipNode # add newline after opening tag, don't use for code def visit_literal_block(self, node): self.body.append(self.starttag(node, 'pre', CLASS='literal-block')) # add newline def depart_literal_block(self, node): self.body.append('\n\n') # use table for option list def visit_option_group(self, node): atts = {} if ( self.settings.option_limit and len(node.astext()) > self.settings.option_limit): atts['colspan'] = 2 self.context.append('\n ') else: self.context.append('') self.body.append( self.starttag(node, 'td', CLASS='option-group', **atts)) self.body.append('') self.context.append(0) # count number of options def depart_option_group(self, node): self.context.pop() self.body.append('\n') self.body.append(self.context.pop()) def visit_option_list(self, node): self.body.append( self.starttag(node, 'table', CLASS='docutils option-list', frame="void", rules="none")) self.body.append('\n' '\n' '\n') def depart_option_list(self, node): self.body.append('\n\n') def visit_option_list_item(self, node): self.body.append(self.starttag(node, 'tr', '')) def depart_option_list_item(self, node): self.body.append('\n') # Omit

    tags to produce visually compact lists (less vertical # whitespace) as CSS styling requires CSS2. def should_be_compact_paragraph(self, node): """ Determine if the

    tags around paragraph ``node`` can be omitted. """ if (isinstance(node.parent, nodes.document) or isinstance(node.parent, nodes.compound)): # Never compact paragraphs in document or compound. return False for key, value in node.attlist(): if (node.is_not_default(key) and not (key == 'classes' and value in ([], ['first'], ['last'], ['first', 'last']))): # Attribute which needs to survive. return False first = isinstance(node.parent[0], nodes.label) # skip label for child in node.parent.children[first:]: # only first paragraph can be compact if isinstance(child, nodes.Invisible): continue if child is node: break return False parent_length = len([n for n in node.parent if not isinstance( n, (nodes.Invisible, nodes.label))]) if ( self.compact_simple or self.compact_field_list or self.compact_p and parent_length == 1): return True return False def visit_paragraph(self, node): if self.should_be_compact_paragraph(node): self.context.append('') else: self.body.append(self.starttag(node, 'p', '')) self.context.append('

    \n') def depart_paragraph(self, node): self.body.append(self.context.pop()) # ersatz for first/last pseudo-classes def visit_sidebar(self, node): self.body.append( self.starttag(node, 'div', CLASS='sidebar')) self.set_first_last(node) self.in_sidebar = True # not allowed in
        def visit_subscript(self, node):
            if isinstance(node.parent, nodes.literal_block):
                self.body.append(self.starttag(node, 'span', '',
                                               CLASS='subscript'))
            else:
                self.body.append(self.starttag(node, 'sub', ''))
    
        def depart_subscript(self, node):
            if isinstance(node.parent, nodes.literal_block):
                self.body.append('')
            else:
                self.body.append('')
    
        # Use  for subtitles (deprecated in HTML 5)
        def visit_subtitle(self, node):
            if isinstance(node.parent, nodes.sidebar):
                self.body.append(self.starttag(node, 'p', '',
                                               CLASS='sidebar-subtitle'))
                self.context.append('

    \n') elif isinstance(node.parent, nodes.document): self.body.append(self.starttag(node, 'h2', '', CLASS='subtitle')) self.context.append('\n') self.in_document_title = len(self.body) elif isinstance(node.parent, nodes.section): tag = 'h%s' % (self.section_level + self.initial_header_level - 1) self.body.append( self.starttag(node, tag, '', CLASS='section-subtitle') + self.starttag({}, 'span', '', CLASS='section-subtitle')) self.context.append('\n' % tag) def depart_subtitle(self, node): self.body.append(self.context.pop()) if self.in_document_title: self.subtitle = self.body[self.in_document_title:-1] self.in_document_title = 0 self.body_pre_docinfo.extend(self.body) self.html_subtitle.extend(self.body) del self.body[:] # not allowed in
     in HTML 4
        def visit_superscript(self, node):
            if isinstance(node.parent, nodes.literal_block):
                self.body.append(self.starttag(node, 'span', '',
                                               CLASS='superscript'))
            else:
                self.body.append(self.starttag(node, 'sup', ''))
    
        def depart_superscript(self, node):
            if isinstance(node.parent, nodes.literal_block):
                self.body.append('')
            else:
                self.body.append('')
    
        #  element deprecated in HTML 5
        def visit_system_message(self, node):
            self.body.append(self.starttag(node, 'div', CLASS='system-message'))
            self.body.append('

    ') backref_text = '' if len(node['backrefs']): backrefs = node['backrefs'] if len(backrefs) == 1: backref_text = ('; backlink' % backrefs[0]) else: i = 1 backlinks = [] for backref in backrefs: backlinks.append('%s' % (backref, i)) i += 1 backref_text = ('; backlinks: %s' % ', '.join(backlinks)) if node.hasattr('line'): line = ', line %s' % node['line'] else: line = '' self.body.append('System Message: %s/%s ' '(%s%s)%s

    \n' % (node['type'], node['level'], self.encode(node['source']), line, backref_text)) # "hard coded" border setting def visit_table(self, node): self.context.append(self.compact_p) self.compact_p = True classes = ['docutils', self.settings.table_style] if 'align' in node: classes.append('align-%s' % node['align']) self.body.append( self.starttag(node, 'table', CLASS=' '.join(classes), border="1")) def depart_table(self, node): self.compact_p = self.context.pop() self.body.append('\n') # hard-coded vertical alignment def visit_tbody(self, node): self.body.append(self.starttag(node, 'tbody', valign='top')) # def depart_tbody(self, node): self.body.append('\n') # hard-coded vertical alignment def visit_thead(self, node): self.body.append(self.starttag(node, 'thead', valign='bottom')) # def depart_thead(self, node): self.body.append('\n') class SimpleListChecker(writers._html_base.SimpleListChecker): """ Raise `nodes.NodeFound` if non-simple list item is encountered. Here "simple" means a list item containing nothing other than a single paragraph, a simple list, or a paragraph followed by a simple list. """ def visit_list_item(self, node): children = [] for child in node.children: if not isinstance(child, nodes.Invisible): children.append(child) if (children and isinstance(children[0], nodes.paragraph) and (isinstance(children[-1], nodes.bullet_list) or isinstance(children[-1], nodes.enumerated_list))): children.pop() if len(children) <= 1: return else: raise nodes.NodeFound # def visit_bullet_list(self, node): # pass # def visit_enumerated_list(self, node): # pass # def visit_paragraph(self, node): # raise nodes.SkipNode def visit_definition_list(self, node): raise nodes.NodeFound def visit_docinfo(self, node): raise nodes.NodeFound def visit_definition_list(self, node): raise nodes.NodeFound