# -*- coding: utf-8 -*- """ Module for formatting output data in HTML. """ from __future__ import print_function from distutils.version import LooseVersion from textwrap import dedent import pandas.core.common as com from pandas.core.index import MultiIndex from pandas import compat from pandas.compat import (lzip, range, map, zip, u, OrderedDict, unichr) from pandas.core.config import get_option from pandas.io.formats.printing import pprint_thing from pandas.io.formats.format import (get_level_lengths, buffer_put_lines) from pandas.io.formats.format import TableFormatter class HTMLFormatter(TableFormatter): indent_delta = 2 def __init__(self, formatter, classes=None, max_rows=None, max_cols=None, notebook=False, border=None, table_id=None): self.fmt = formatter self.classes = classes self.frame = self.fmt.frame self.columns = self.fmt.tr_frame.columns self.elements = [] self.bold_rows = self.fmt.kwds.get('bold_rows', False) self.escape = self.fmt.kwds.get('escape', True) self.max_rows = max_rows or len(self.fmt.frame) self.max_cols = max_cols or len(self.fmt.columns) self.show_dimensions = self.fmt.show_dimensions self.is_truncated = (self.max_rows < len(self.fmt.frame) or self.max_cols < len(self.fmt.columns)) self.notebook = notebook if border is None: border = get_option('display.html.border') self.border = border self.table_id = table_id def write(self, s, indent=0): rs = pprint_thing(s) self.elements.append(' ' * indent + rs) def write_th(self, s, indent=0, tags=None): if self.fmt.col_space is not None and self.fmt.col_space > 0: tags = (tags or "") tags += ('style="min-width: {colspace};"' .format(colspace=self.fmt.col_space)) return self._write_cell(s, kind='th', indent=indent, tags=tags) def write_td(self, s, indent=0, tags=None): return self._write_cell(s, kind='td', indent=indent, tags=tags) def _write_cell(self, s, kind='td', indent=0, tags=None): if tags is not None: start_tag = '<{kind} {tags}>'.format(kind=kind, tags=tags) else: start_tag = '<{kind}>'.format(kind=kind) if self.escape: # escape & first to prevent double escaping of & esc = OrderedDict([('&', r'&'), ('<', r'<'), ('>', r'>')]) else: esc = {} rs = pprint_thing(s, escape_chars=esc).strip() self.write(u'{start}{rs}' .format(start=start_tag, rs=rs, kind=kind), indent) def write_tr(self, line, indent=0, indent_delta=4, header=False, align=None, tags=None, nindex_levels=0): if tags is None: tags = {} if align is None: self.write('', indent) else: self.write('' .format(align=align), indent) indent += indent_delta for i, s in enumerate(line): val_tag = tags.get(i, None) if header or (self.bold_rows and i < nindex_levels): self.write_th(s, indent, tags=val_tag) else: self.write_td(s, indent, tags=val_tag) indent -= indent_delta self.write('', indent) def write_style(self): # We use the "scoped" attribute here so that the desired # style properties for the data frame are not then applied # throughout the entire notebook. template_first = """\ """ template_select = """\ .dataframe %s { %s: %s; }""" element_props = [('tbody tr th:only-of-type', 'vertical-align', 'middle'), ('tbody tr th', 'vertical-align', 'top')] if isinstance(self.columns, MultiIndex): element_props.append(('thead tr th', 'text-align', 'left')) if all((self.fmt.has_index_names, self.fmt.index, self.fmt.show_index_names)): element_props.append(('thead tr:last-of-type th', 'text-align', 'right')) else: element_props.append(('thead th', 'text-align', 'right')) template_mid = '\n\n'.join(map(lambda t: template_select % t, element_props)) template = dedent('\n'.join((template_first, template_mid, template_last))) if self.notebook: self.write(template) def write_result(self, buf): indent = 0 id_section = "" frame = self.frame _classes = ['dataframe'] # Default class. use_mathjax = get_option("display.html.use_mathjax") if not use_mathjax: _classes.append('tex2jax_ignore') if self.classes is not None: if isinstance(self.classes, str): self.classes = self.classes.split() if not isinstance(self.classes, (list, tuple)): raise AssertionError('classes must be list or tuple, not {typ}' .format(typ=type(self.classes))) _classes.extend(self.classes) if self.notebook: div_style = '' try: import IPython if IPython.__version__ < LooseVersion('3.0.0'): div_style = ' style="max-width:1500px;overflow:auto;"' except (ImportError, AttributeError): pass self.write(''.format(style=div_style)) self.write_style() if self.table_id is not None: id_section = ' id="{table_id}"'.format(table_id=self.table_id) self.write('' .format(border=self.border, cls=' '.join(_classes), id_section=id_section), indent) indent += self.indent_delta indent = self._write_header(indent) indent = self._write_body(indent) self.write('
', indent) if self.should_show_dimensions: by = chr(215) if compat.PY3 else unichr(215) # × self.write(u('

{rows} rows {by} {cols} columns

') .format(rows=len(frame), by=by, cols=len(frame.columns))) if self.notebook: self.write('') buffer_put_lines(buf, self.elements) def _write_header(self, indent): truncate_h = self.fmt.truncate_h row_levels = self.frame.index.nlevels if not self.fmt.header: # write nothing return indent def _column_header(): if self.fmt.index: row = [''] * (self.frame.index.nlevels - 1) else: row = [] if isinstance(self.columns, MultiIndex): if self.fmt.has_column_names and self.fmt.index: row.append(single_column_table(self.columns.names)) else: row.append('') style = "text-align: {just};".format(just=self.fmt.justify) row.extend([single_column_table(c, self.fmt.justify, style) for c in self.columns]) else: if self.fmt.index: row.append(self.columns.name or '') row.extend(self.columns) return row self.write('', indent) row = [] indent += self.indent_delta if isinstance(self.columns, MultiIndex): template = 'colspan="{span:d}" halign="left"' if self.fmt.sparsify: # GH3547 sentinel = com.sentinel_factory() else: sentinel = None levels = self.columns.format(sparsify=sentinel, adjoin=False, names=False) level_lengths = get_level_lengths(levels, sentinel) inner_lvl = len(level_lengths) - 1 for lnum, (records, values) in enumerate(zip(level_lengths, levels)): if truncate_h: # modify the header lines ins_col = self.fmt.tr_col_num if self.fmt.sparsify: recs_new = {} # Increment tags after ... col. for tag, span in list(records.items()): if tag >= ins_col: recs_new[tag + 1] = span elif tag + span > ins_col: recs_new[tag] = span + 1 if lnum == inner_lvl: values = (values[:ins_col] + (u('...'),) + values[ins_col:]) else: # sparse col headers do not receive a ... values = (values[:ins_col] + (values[ins_col - 1], ) + values[ins_col:]) else: recs_new[tag] = span # if ins_col lies between tags, all col headers # get ... if tag + span == ins_col: recs_new[ins_col] = 1 values = (values[:ins_col] + (u('...'),) + values[ins_col:]) records = recs_new inner_lvl = len(level_lengths) - 1 if lnum == inner_lvl: records[ins_col] = 1 else: recs_new = {} for tag, span in list(records.items()): if tag >= ins_col: recs_new[tag + 1] = span else: recs_new[tag] = span recs_new[ins_col] = 1 records = recs_new values = (values[:ins_col] + [u('...')] + values[ins_col:]) name = self.columns.names[lnum] row = [''] * (row_levels - 1) + ['' if name is None else pprint_thing(name)] if row == [""] and self.fmt.index is False: row = [] tags = {} j = len(row) for i, v in enumerate(values): if i in records: if records[i] > 1: tags[j] = template.format(span=records[i]) else: continue j += 1 row.append(v) self.write_tr(row, indent, self.indent_delta, tags=tags, header=True) else: col_row = _column_header() align = self.fmt.justify if truncate_h: ins_col = row_levels + self.fmt.tr_col_num col_row.insert(ins_col, '...') self.write_tr(col_row, indent, self.indent_delta, header=True, align=align) if all((self.fmt.has_index_names, self.fmt.index, self.fmt.show_index_names)): row = ([x if x is not None else '' for x in self.frame.index.names] + [''] * min(len(self.columns), self.max_cols)) if truncate_h: ins_col = row_levels + self.fmt.tr_col_num row.insert(ins_col, '') self.write_tr(row, indent, self.indent_delta, header=True) indent -= self.indent_delta self.write('', indent) return indent def _write_body(self, indent): self.write('', indent) indent += self.indent_delta fmt_values = {} for i in range(min(len(self.columns), self.max_cols)): fmt_values[i] = self.fmt._format_col(i) # write values if self.fmt.index: if isinstance(self.frame.index, MultiIndex): self._write_hierarchical_rows(fmt_values, indent) else: self._write_regular_rows(fmt_values, indent) else: for i in range(min(len(self.frame), self.max_rows)): row = [fmt_values[j][i] for j in range(len(self.columns))] self.write_tr(row, indent, self.indent_delta, tags=None) indent -= self.indent_delta self.write('', indent) indent -= self.indent_delta return indent def _write_regular_rows(self, fmt_values, indent): truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v ncols = len(self.fmt.tr_frame.columns) nrows = len(self.fmt.tr_frame) fmt = self.fmt._get_formatter('__index__') if fmt is not None: index_values = self.fmt.tr_frame.index.map(fmt) else: index_values = self.fmt.tr_frame.index.format() row = [] for i in range(nrows): if truncate_v and i == (self.fmt.tr_row_num): str_sep_row = ['...' for ele in row] self.write_tr(str_sep_row, indent, self.indent_delta, tags=None, nindex_levels=1) row = [] row.append(index_values[i]) row.extend(fmt_values[j][i] for j in range(ncols)) if truncate_h: dot_col_ix = self.fmt.tr_col_num + 1 row.insert(dot_col_ix, '...') self.write_tr(row, indent, self.indent_delta, tags=None, nindex_levels=1) def _write_hierarchical_rows(self, fmt_values, indent): template = 'rowspan="{span}" valign="top"' truncate_h = self.fmt.truncate_h truncate_v = self.fmt.truncate_v frame = self.fmt.tr_frame ncols = len(frame.columns) nrows = len(frame) row_levels = self.frame.index.nlevels idx_values = frame.index.format(sparsify=False, adjoin=False, names=False) idx_values = lzip(*idx_values) if self.fmt.sparsify: # GH3547 sentinel = com.sentinel_factory() levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False) level_lengths = get_level_lengths(levels, sentinel) inner_lvl = len(level_lengths) - 1 if truncate_v: # Insert ... row and adjust idx_values and # level_lengths to take this into account. ins_row = self.fmt.tr_row_num inserted = False for lnum, records in enumerate(level_lengths): rec_new = {} for tag, span in list(records.items()): if tag >= ins_row: rec_new[tag + 1] = span elif tag + span > ins_row: rec_new[tag] = span + 1 # GH 14882 - Make sure insertion done once if not inserted: dot_row = list(idx_values[ins_row - 1]) dot_row[-1] = u('...') idx_values.insert(ins_row, tuple(dot_row)) inserted = True else: dot_row = list(idx_values[ins_row]) dot_row[inner_lvl - lnum] = u('...') idx_values[ins_row] = tuple(dot_row) else: rec_new[tag] = span # If ins_row lies between tags, all cols idx cols # receive ... if tag + span == ins_row: rec_new[ins_row] = 1 if lnum == 0: idx_values.insert(ins_row, tuple( [u('...')] * len(level_lengths))) # GH 14882 - Place ... in correct level elif inserted: dot_row = list(idx_values[ins_row]) dot_row[inner_lvl - lnum] = u('...') idx_values[ins_row] = tuple(dot_row) level_lengths[lnum] = rec_new level_lengths[inner_lvl][ins_row] = 1 for ix_col in range(len(fmt_values)): fmt_values[ix_col].insert(ins_row, '...') nrows += 1 for i in range(nrows): row = [] tags = {} sparse_offset = 0 j = 0 for records, v in zip(level_lengths, idx_values[i]): if i in records: if records[i] > 1: tags[j] = template.format(span=records[i]) else: sparse_offset += 1 continue j += 1 row.append(v) row.extend(fmt_values[j][i] for j in range(ncols)) if truncate_h: row.insert(row_levels - sparse_offset + self.fmt.tr_col_num, '...') self.write_tr(row, indent, self.indent_delta, tags=tags, nindex_levels=len(levels) - sparse_offset) else: for i in range(len(frame)): idx_values = list(zip(*frame.index.format( sparsify=False, adjoin=False, names=False))) row = [] row.extend(idx_values[i]) row.extend(fmt_values[j][i] for j in range(ncols)) if truncate_h: row.insert(row_levels + self.fmt.tr_col_num, '...') self.write_tr(row, indent, self.indent_delta, tags=None, nindex_levels=frame.index.nlevels) def single_column_table(column, align=None, style=None): table = '{i!s}'.format(i=i)) table += '' return table def single_row_table(row): # pragma: no cover table = '' for i in row: table += (''.format(i=i)) table += '
{i!s}
' return table