#!/usr/bin/env python # -*- coding: utf-8 -*- # :Author: David Goodger, Günter Milde # Based on the html4css1 writer by David Goodger. # :Maintainer: docutils-develop@lists.sourceforge.net # :Revision: $Revision: 8118 $ # :Date: $Date: 2005-06-28$ # :Copyright: © 2016 David Goodger, Günter Milde # :License: Released under the terms of the `2-Clause BSD license`_, in short: # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. # This file is offered as-is, without any warranty. # # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause """common definitions for Docutils HTML writers""" import sys import os.path import re import urllib.request, urllib.parse, urllib.error try: # check for the Python Imaging Library import PIL.Image except ImportError: try: # sometimes PIL modules are put in PYTHONPATH's root import Image class PIL(object): pass # dummy wrapper PIL.Image = Image except ImportError: PIL = None import docutils from docutils import nodes, utils, writers, languages, io from docutils.utils.error_reporting import SafeString from docutils.transforms import writer_aux from docutils.utils.math import (unichar2tex, pick_math_environment, math2html, latex2mathml, tex2mathml_extern) class Writer(writers.Writer): supported = ('html', 'xhtml') # update in subclass """Formats this writer supports.""" # default_stylesheets = [] # set in subclass! # default_stylesheet_dirs = ['.'] # set in subclass! default_template = 'template.txt' # default_template_path = ... # set in subclass! # settings_spec = ... # set in subclass! settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace'} # config_section = ... # set in subclass! config_section_dependencies = ['writers', 'html writers'] visitor_attributes = ( 'head_prefix', 'head', 'stylesheet', 'body_prefix', 'body_pre_docinfo', 'docinfo', 'body', 'body_suffix', 'title', 'subtitle', 'header', 'footer', 'meta', 'fragment', 'html_prolog', 'html_head', 'html_title', 'html_subtitle', 'html_body') def get_transforms(self): return writers.Writer.get_transforms(self) + [writer_aux.Admonitions] def translate(self): self.visitor = visitor = self.translator_class(self.document) self.document.walkabout(visitor) for attr in self.visitor_attributes: setattr(self, attr, getattr(visitor, attr)) self.output = self.apply_template() def apply_template(self): template_file = open(self.document.settings.template, 'rb') template = str(template_file.read(), 'utf-8') template_file.close() subs = self.interpolation_dict() return template % subs def interpolation_dict(self): subs = {} settings = self.document.settings for attr in self.visitor_attributes: subs[attr] = ''.join(getattr(self, attr)).rstrip('\n') subs['encoding'] = settings.output_encoding subs['version'] = docutils.__version__ return subs def assemble_parts(self): writers.Writer.assemble_parts(self) for part in self.visitor_attributes: self.parts[part] = ''.join(getattr(self, part)) class HTMLTranslator(nodes.NodeVisitor): """ Generic Docutils to HTML translator. See the `html4css1` and `html5_polyglot` writers for full featured HTML writers. .. IMPORTANT:: The `visit_*` and `depart_*` methods use a heterogeneous stack, `self.context`. When subclassing, make sure to be consistent in its use! Examples for robust coding: a) Override both `visit_*` and `depart_*` methods, don't call the parent functions. b) Extend both and unconditionally call the parent functions:: def visit_example(self, node): if foo: self.body.append('
') html4css1.HTMLTranslator.visit_example(self, node) def depart_example(self, node): html4css1.HTMLTranslator.depart_example(self, node) if foo: self.body.append('
') c) Extend both, calling the parent functions under the same conditions:: def visit_example(self, node): if foo: self.body.append('
\n') else: # call the parent method _html_base.HTMLTranslator.visit_example(self, node) def depart_example(self, node): if foo: self.body.append('
\n') else: # call the parent method _html_base.HTMLTranslator.depart_example(self, node) d) Extend one method (call the parent), but don't otherwise use the `self.context` stack:: def depart_example(self, node): _html_base.HTMLTranslator.depart_example(self, node) if foo: # implementation-specific code # that does not use `self.context` self.body.append('\n') This way, changes in stack use will not bite you. """ xml_declaration = '\n' doctype = '\n' doctype_mathml = doctype head_prefix_template = ('\n\n') content_type = ('\n') generator = ('\n') # Template for the MathJax script in the header: mathjax_script = '\n' mathjax_url = 'file:/usr/share/javascript/mathjax/MathJax.js' """ URL of the MathJax javascript library. The MathJax library ought to be installed on the same server as the rest of the deployed site files and specified in the `math-output` setting appended to "mathjax". See `Docutils Configuration`__. __ http://docutils.sourceforge.net/docs/user/config.html#math-output The fallback tries a local MathJax installation at ``/usr/share/javascript/mathjax/MathJax.js``. """ stylesheet_link = '\n' embedded_stylesheet = '\n' words_and_spaces = re.compile(r'\S+| +|\n') # wrap point inside word: in_word_wrap_point = re.compile(r'.+\W\W.+|[-?].+', re.U) lang_attribute = 'lang' # name changes to 'xml:lang' in XHTML 1.1 special_characters = {ord('&'): '&', ord('<'): '<', ord('"'): '"', ord('>'): '>', ord('@'): '@', # may thwart address harvesters } """Character references for characters with a special meaning in HTML.""" def __init__(self, document): nodes.NodeVisitor.__init__(self, document) self.settings = settings = document.settings lcode = settings.language_code self.language = languages.get_language(lcode, document.reporter) self.meta = [self.generator % docutils.__version__] self.head_prefix = [] self.html_prolog = [] if settings.xml_declaration: self.head_prefix.append(self.xml_declaration % settings.output_encoding) # self.content_type = "" # encoding not interpolated: self.html_prolog.append(self.xml_declaration) self.head = self.meta[:] self.stylesheet = [self.stylesheet_call(path) for path in utils.get_stylesheet_list(settings)] self.body_prefix = ['\n\n'] # document title, subtitle display self.body_pre_docinfo = [] # author, date, etc. self.docinfo = [] self.body = [] self.fragment = [] self.body_suffix = ['\n\n'] self.section_level = 0 self.initial_header_level = int(settings.initial_header_level) self.math_output = settings.math_output.split() self.math_output_options = self.math_output[1:] self.math_output = self.math_output[0].lower() self.context = [] """Heterogeneous stack. Used by visit_* and depart_* functions in conjunction with the tree traversal. Make sure that the pops correspond to the pushes.""" self.topic_classes = [] # TODO: replace with self_in_contents self.colspecs = [] self.compact_p = True self.compact_simple = False self.compact_field_list = False self.in_docinfo = False self.in_sidebar = False self.in_footnote_list = False self.title = [] self.subtitle = [] self.header = [] self.footer = [] self.html_head = [self.content_type] # charset not interpolated self.html_title = [] self.html_subtitle = [] self.html_body = [] self.in_document_title = 0 # len(self.body) or 0 self.in_mailto = False self.author_in_authors = False # for html4css1 self.math_header = [] def astext(self): return ''.join(self.head_prefix + self.head + self.stylesheet + self.body_prefix + self.body_pre_docinfo + self.docinfo + self.body + self.body_suffix) def encode(self, text): """Encode special characters in `text` & return.""" # Use only named entities known in both XML and HTML # other characters are automatically encoded "by number" if required. # @@@ A codec to do these and all other HTML entities would be nice. text = str(text) return text.translate(self.special_characters) def cloak_mailto(self, uri): """Try to hide a mailto: URL from harvesters.""" # Encode "@" using a URL octet reference (see RFC 1738). # Further cloaking with HTML entities will be done in the # `attval` function. return uri.replace('@', '%40') def cloak_email(self, addr): """Try to hide the link text of a email link from harversters.""" # Surround at-signs and periods with tags. ("@" has # already been encoded to "@" by the `encode` method.) addr = addr.replace('@', '@') addr = addr.replace('.', '.') return addr def attval(self, text, whitespace=re.compile('[\n\r\t\v\f]')): """Cleanse, HTML encode, and return attribute value text.""" encoded = self.encode(whitespace.sub(' ', text)) if self.in_mailto and self.settings.cloak_email_addresses: # Cloak at-signs ("%40") and periods with HTML entities. encoded = encoded.replace('%40', '%40') encoded = encoded.replace('.', '.') return encoded def stylesheet_call(self, path): """Return code to reference or embed stylesheet file `path`""" if self.settings.embed_stylesheet: try: content = io.FileInput(source_path=path, encoding='utf-8').read() self.settings.record_dependencies.add(path) except IOError as err: msg = "Cannot embed stylesheet '%s': %s." % ( path, SafeString(err.strerror)) self.document.reporter.error(msg) return '<--- %s --->\n' % msg return self.embedded_stylesheet % content # else link to style file: if self.settings.stylesheet_path: # adapt path relative to output (cf. config.html#stylesheet-path) path = utils.relative_path(self.settings._destination, path) return self.stylesheet_link % self.encode(path) def starttag(self, node, tagname, suffix='\n', empty=False, **attributes): """ Construct and return a start tag given a node (id & class attributes are extracted), tag name, and optional attributes. """ tagname = tagname.lower() prefix = [] atts = {} ids = [] for (name, value) in list(attributes.items()): atts[name.lower()] = value classes = [] languages = [] # unify class arguments and move language specification for cls in node.get('classes', []) + atts.pop('class', '').split() : if cls.startswith('language-'): languages.append(cls[9:]) elif cls.strip() and cls not in classes: classes.append(cls) if languages: # attribute name is 'lang' in XHTML 1.0 but 'xml:lang' in 1.1 atts[self.lang_attribute] = languages[0] if classes: atts['class'] = ' '.join(classes) assert 'id' not in atts ids.extend(node.get('ids', [])) if 'ids' in atts: ids.extend(atts['ids']) del atts['ids'] if ids: atts['id'] = ids[0] for id in ids[1:]: # Add empty "span" elements for additional IDs. Note # that we cannot use empty "a" elements because there # may be targets inside of references, but nested "a" # elements aren't allowed in XHTML (even if they do # not all have a "href" attribute). if empty or isinstance(node, (nodes.bullet_list, nodes.docinfo, nodes.definition_list, nodes.enumerated_list, nodes.field_list, nodes.option_list, nodes.table)): # Insert target right in front of element. prefix.append('' % id) else: # Non-empty tag. Place the auxiliary tag # *inside* the element, as the first child. suffix += '' % id attlist = list(atts.items()) attlist.sort() parts = [tagname] for name, value in attlist: # value=None was used for boolean attributes without # value, but this isn't supported by XHTML. assert value is not None if isinstance(value, list): values = [str(v) for v in value] parts.append('%s="%s"' % (name.lower(), self.attval(' '.join(values)))) else: parts.append('%s="%s"' % (name.lower(), self.attval(str(value)))) if empty: infix = ' /' else: infix = '' return ''.join(prefix) + '<%s%s>' % (' '.join(parts), infix) + suffix def emptytag(self, node, tagname, suffix='\n', **attributes): """Construct and return an XML-compatible empty tag.""" return self.starttag(node, tagname, suffix, empty=True, **attributes) def set_class_on_child(self, node, class_, index=0): """ Set class `class_` on the visible child no. index of `node`. Do nothing if node has fewer children than `index`. """ children = [n for n in node if not isinstance(n, nodes.Invisible)] try: child = children[index] except IndexError: return child['classes'].append(class_) def visit_Text(self, node): text = node.astext() encoded = self.encode(text) if self.in_mailto and self.settings.cloak_email_addresses: encoded = self.cloak_email(encoded) self.body.append(encoded) def depart_Text(self, node): pass def visit_abbreviation(self, node): # @@@ implementation incomplete ("title" attribute) self.body.append(self.starttag(node, 'abbr', '')) def depart_abbreviation(self, node): self.body.append('') def visit_acronym(self, node): # @@@ implementation incomplete ("title" attribute) self.body.append(self.starttag(node, 'acronym', '')) def depart_acronym(self, node): self.body.append('') def visit_address(self, node): self.visit_docinfo_item(node, 'address', meta=False) self.body.append(self.starttag(node, 'pre', suffix= '', CLASS='address')) def depart_address(self, node): self.body.append('\n\n') self.depart_docinfo_item() def visit_admonition(self, node): node['classes'].insert(0, 'admonition') self.body.append(self.starttag(node, 'div')) def depart_admonition(self, node=None): self.body.append('\n') attribution_formats = {'dash': ('\u2014', ''), 'parentheses': ('(', ')'), 'parens': ('(', ')'), 'none': ('', '')} def visit_attribution(self, node): prefix, suffix = self.attribution_formats[self.settings.attribution] self.context.append(suffix) self.body.append( self.starttag(node, 'p', prefix, CLASS='attribution')) def depart_attribution(self, node): self.body.append(self.context.pop() + '

\n') def visit_author(self, node): if not(isinstance(node.parent, nodes.authors)): self.visit_docinfo_item(node, 'author') self.body.append('

') def depart_author(self, node): self.body.append('

') if isinstance(node.parent, nodes.authors): self.body.append('\n') else: self.depart_docinfo_item() def visit_authors(self, node): self.visit_docinfo_item(node, 'authors') def depart_authors(self, node): self.depart_docinfo_item() def visit_block_quote(self, node): self.body.append(self.starttag(node, 'blockquote')) def depart_block_quote(self, node): self.body.append('\n') def check_simple_list(self, node): """Check for a simple list that can be rendered compactly.""" visitor = SimpleListChecker(self.document) try: node.walk(visitor) except nodes.NodeFound: return False else: return True # Compact lists # ------------ # Include definition lists and field lists (in addition to ordered # and unordered lists) in the test if a list is "simple" (cf. the # html4css1.HTMLTranslator docstring and the SimpleListChecker class at # the end of this file). def is_compactable(self, node): # print "is_compactable %s ?" % node.__class__, # explicite class arguments have precedence if 'compact' in node['classes']: return True if 'open' in node['classes']: return False # check config setting: if (isinstance(node, (nodes.field_list, nodes.definition_list)) and not self.settings.compact_field_lists): # print "`compact-field-lists` is False" return False if (isinstance(node, (nodes.enumerated_list, nodes.bullet_list)) and not self.settings.compact_lists): # print "`compact-lists` is False" return False # more special cases: if (self.topic_classes == ['contents']): # TODO: self.in_contents return True # check the list items: return self.check_simple_list(node) def visit_bullet_list(self, node): atts = {} old_compact_simple = self.compact_simple self.context.append((self.compact_simple, self.compact_p)) self.compact_p = None self.compact_simple = self.is_compactable(node) if self.compact_simple and not old_compact_simple: atts['class'] = 'simple' self.body.append(self.starttag(node, 'ul', **atts)) def depart_bullet_list(self, node): self.compact_simple, self.compact_p = self.context.pop() self.body.append('\n') def visit_caption(self, node): self.body.append(self.starttag(node, 'p', '', CLASS='caption')) def depart_caption(self, node): self.body.append('

\n') # citations # --------- # Use definition list instead of table for bibliographic references. # Join adjacent citation entries. def visit_citation(self, node): if not self.in_footnote_list: self.body.append('
\n') self.in_footnote_list = True def depart_citation(self, node): self.body.append('\n') if not isinstance(node.next_node(descend=False, siblings=True), nodes.citation): self.body.append('
\n') self.in_footnote_list = False def visit_citation_reference(self, node): href = '#' if 'refid' in node: href += node['refid'] elif 'refname' in node: href += self.document.nameids[node['refname']] # else: # TODO system message (or already in the transform)? # 'Citation reference missing.' self.body.append(self.starttag( node, 'a', '[', CLASS='citation-reference', href=href)) def depart_citation_reference(self, node): self.body.append(']') # classifier # ---------- # don't insert classifier-delimiter here (done by CSS) def visit_classifier(self, node): self.body.append(self.starttag(node, 'span', '', CLASS='classifier')) def depart_classifier(self, node): self.body.append('
') def visit_colspec(self, node): self.colspecs.append(node) # "stubs" list is an attribute of the tgroup element: node.parent.stubs.append(node.attributes.get('stub')) def depart_colspec(self, node): # write out when all colspecs are processed if isinstance(node.next_node(descend=False, siblings=True), nodes.colspec): return if 'colwidths-auto' in node.parent.parent['classes'] or ( 'colwidths-auto' in self.settings.table_style and ('colwidths-given' not in node.parent.parent['classes'])): return total_width = sum(node['colwidth'] for node in self.colspecs) self.body.append(self.starttag(node, 'colgroup')) for node in self.colspecs: colwidth = int(node['colwidth'] * 100.0 / total_width + 0.5) self.body.append(self.emptytag(node, 'col', style='width: %i%%' % colwidth)) self.body.append('\n') def visit_comment(self, node, sub=re.compile('-(?=-)').sub): """Escape double-dashes in comment text.""" self.body.append('\n' % sub('- ', node.astext())) # Content already processed: raise nodes.SkipNode def visit_compound(self, node): self.body.append(self.starttag(node, 'div', CLASS='compound')) if len(node) > 1: node[0]['classes'].append('compound-first') node[-1]['classes'].append('compound-last') for child in node[1:-1]: child['classes'].append('compound-middle') def depart_compound(self, node): self.body.append('\n') def visit_container(self, node): self.body.append(self.starttag(node, 'div', CLASS='docutils container')) def depart_container(self, node): self.body.append('\n') def visit_contact(self, node): self.visit_docinfo_item(node, 'contact', meta=False) def depart_contact(self, node): self.depart_docinfo_item() def visit_copyright(self, node): self.visit_docinfo_item(node, 'copyright') def depart_copyright(self, node): self.depart_docinfo_item() def visit_date(self, node): self.visit_docinfo_item(node, 'date') def depart_date(self, node): self.depart_docinfo_item() def visit_decoration(self, node): pass def depart_decoration(self, node): pass def visit_definition(self, node): self.body.append('\n') self.body.append(self.starttag(node, 'dd', '')) def depart_definition(self, node): self.body.append('\n') def visit_definition_list(self, node): classes = node.setdefault('classes', []) if self.is_compactable(node): classes.append('simple') self.body.append(self.starttag(node, 'dl')) def depart_definition_list(self, node): self.body.append('\n') def visit_definition_list_item(self, node): # pass class arguments, ids and names to definition term: node.children[0]['classes'] = ( node.get('classes', []) + node.children[0].get('classes', [])) node.children[0]['ids'] = ( node.get('ids', []) + node.children[0].get('ids', [])) node.children[0]['names'] = ( node.get('names', []) + node.children[0].get('names', [])) def depart_definition_list_item(self, node): pass def visit_description(self, node): self.body.append(self.starttag(node, 'dd', '')) def depart_description(self, node): self.body.append('\n') def visit_docinfo(self, node): classes = 'docinfo' if (self.is_compactable(node)): classes += ' simple' self.body.append(self.starttag(node, 'dl', CLASS=classes)) def depart_docinfo(self, node): self.body.append('\n') def visit_docinfo_item(self, node, name, meta=True): if meta: meta_tag = '\n' \ % (name, self.attval(node.astext())) self.add_meta(meta_tag) self.body.append('
%s
\n' % (name, self.language.labels[name])) self.body.append(self.starttag(node, 'dd', '', CLASS=name)) def depart_docinfo_item(self): self.body.append('\n') def visit_doctest_block(self, node): self.body.append(self.starttag(node, 'pre', suffix='', CLASS='code python doctest')) def depart_doctest_block(self, node): self.body.append('\n\n') def visit_document(self, node): title = (node.get('title', '') or os.path.basename(node['source']) or 'docutils document without title') self.head.append('%s\n' % self.encode(title)) def depart_document(self, node): self.head_prefix.extend([self.doctype, self.head_prefix_template % {'lang': self.settings.language_code}]) self.html_prolog.append(self.doctype) self.meta.insert(0, self.content_type % self.settings.output_encoding) self.head.insert(0, self.content_type % self.settings.output_encoding) if self.math_header: if self.math_output == 'mathjax': self.head.extend(self.math_header) else: self.stylesheet.extend(self.math_header) # skip content-type meta tag with interpolated charset value: self.html_head.extend(self.head[1:]) self.body_prefix.append(self.starttag(node, 'div', CLASS='document')) self.body_suffix.insert(0, '\n') self.fragment.extend(self.body) # self.fragment is the "naked" body self.html_body.extend(self.body_prefix[1:] + self.body_pre_docinfo + self.docinfo + self.body + self.body_suffix[:-1]) assert not self.context, 'len(context) = %s' % len(self.context) def visit_emphasis(self, node): self.body.append(self.starttag(node, 'em', '')) def depart_emphasis(self, node): self.body.append('') def visit_entry(self, node): atts = {'class': []} if isinstance(node.parent.parent, nodes.thead): atts['class'].append('head') if node.parent.parent.parent.stubs[node.parent.column]: # "stubs" list is an attribute of the tgroup element atts['class'].append('stub') if atts['class']: tagname = 'th' atts['class'] = ' '.join(atts['class']) else: tagname = 'td' del atts['class'] node.parent.column += 1 if 'morerows' in node: atts['rowspan'] = node['morerows'] + 1 if 'morecols' in node: atts['colspan'] = node['morecols'] + 1 node.parent.column += node['morecols'] self.body.append(self.starttag(node, tagname, '', **atts)) self.context.append('\n' % tagname.lower()) # TODO: why does the html4css1 writer insert an NBSP into empty cells? # if len(node) == 0: # empty cell # self.body.append(' ') # no-break space def depart_entry(self, node): self.body.append(self.context.pop()) def visit_enumerated_list(self, node): atts = {} if 'start' in node: atts['start'] = node['start'] if 'enumtype' in node: atts['class'] = node['enumtype'] if self.is_compactable(node): atts['class'] = (atts.get('class', '') + ' simple').strip() self.body.append(self.starttag(node, 'ol', **atts)) def depart_enumerated_list(self, node): self.body.append('\n') def visit_field_list(self, node): # Keep simple paragraphs in the field_body to enable CSS # rule to start body on new line if the label is too long classes = 'field-list' if (self.is_compactable(node)): classes += ' simple' self.body.append(self.starttag(node, 'dl', CLASS=classes)) def depart_field_list(self, node): self.body.append('\n') def visit_field(self, node): pass def depart_field(self, node): pass # as field is ignored, pass class arguments to field-name and field-body: def visit_field_name(self, node): self.body.append(self.starttag(node, 'dt', '', CLASS=''.join(node.parent['classes']))) def depart_field_name(self, node): self.body.append('\n') def visit_field_body(self, node): self.body.append(self.starttag(node, 'dd', '', CLASS=''.join(node.parent['classes']))) # prevent misalignment of following content if the field is empty: if not node.children: self.body.append('

') def depart_field_body(self, node): self.body.append('\n') def visit_figure(self, node): atts = {'class': 'figure'} if node.get('width'): atts['style'] = 'width: %s' % node['width'] if node.get('align'): atts['class'] += " align-" + node['align'] self.body.append(self.starttag(node, 'div', **atts)) def depart_figure(self, node): self.body.append('\n') # use HTML 5