""" Module for applying conditional formatting to DataFrames and Series. """ from functools import partial from itertools import product from contextlib import contextmanager from uuid import uuid1 import copy from collections import defaultdict, MutableMapping try: from jinja2 import ( PackageLoader, Environment, ChoiceLoader, FileSystemLoader ) except ImportError: msg = "pandas.Styler requires jinja2. "\ "Please install with `conda install Jinja2`\n"\ "or `pip install Jinja2`" raise ImportError(msg) from pandas.core.dtypes.common import is_float, is_string_like import numpy as np import pandas as pd from pandas.api.types import is_list_like from pandas.compat import range from pandas.core.config import get_option from pandas.core.generic import _shared_docs import pandas.core.common as com from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice from pandas.util._decorators import Appender try: import matplotlib.pyplot as plt from matplotlib import colors has_mpl = True except ImportError: has_mpl = False no_mpl_message = "{0} requires matplotlib." @contextmanager def _mpl(func): if has_mpl: yield plt, colors else: raise ImportError(no_mpl_message.format(func.__name__)) class Styler(object): """ Helps style a DataFrame or Series according to the data with HTML and CSS. Parameters ---------- data: Series or DataFrame precision: int precision to round floats to, defaults to pd.options.display.precision table_styles: list-like, default None list of {selector: (attr, value)} dicts; see Notes uuid: str, default None a unique identifier to avoid CSS collisons; generated automatically caption: str, default None caption to attach to the table Attributes ---------- env : Jinja2 Environment template : Jinja2 Template loader : Jinja2 Loader Notes ----- Most styling will be done by passing style functions into ``Styler.apply`` or ``Styler.applymap``. Style functions should return values with strings containing CSS ``'attr: value'`` that will be applied to the indicated cells. If using in the Jupyter notebook, Styler has defined a ``_repr_html_`` to automatically render itself. Otherwise call Styler.render to get the genterated HTML. CSS classes are attached to the generated HTML * Index and Column names include ``index_name`` and ``level`` where `k` is its level in a MultiIndex * Index label cells include * ``row_heading`` * ``row`` where `n` is the numeric position of the row * ``level`` where `k` is the level in a MultiIndex * Column label cells include * ``col_heading`` * ``col`` where `n` is the numeric position of the column * ``evel`` where `k` is the level in a MultiIndex * Blank cells include ``blank`` * Data cells include ``data`` See Also -------- pandas.DataFrame.style """ loader = PackageLoader("pandas", "io/formats/templates") env = Environment( loader=loader, trim_blocks=True, ) template = env.get_template("html.tpl") def __init__(self, data, precision=None, table_styles=None, uuid=None, caption=None, table_attributes=None): self.ctx = defaultdict(list) self._todo = [] if not isinstance(data, (pd.Series, pd.DataFrame)): raise TypeError("``data`` must be a Series or DataFrame") if data.ndim == 1: data = data.to_frame() if not data.index.is_unique or not data.columns.is_unique: raise ValueError("style is not supported for non-unique indicies.") self.data = data self.index = data.index self.columns = data.columns self.uuid = uuid self.table_styles = table_styles self.caption = caption if precision is None: precision = get_option('display.precision') self.precision = precision self.table_attributes = table_attributes self.hidden_index = False self.hidden_columns = [] # display_funcs maps (row, col) -> formatting function def default_display_func(x): if is_float(x): return '{:>.{precision}g}'.format(x, precision=self.precision) else: return x self._display_funcs = defaultdict(lambda: default_display_func) def _repr_html_(self): """Hooks into Jupyter notebook rich display system.""" return self.render() @Appender(_shared_docs['to_excel'] % dict( axes='index, columns', klass='Styler', axes_single_arg="{0 or 'index', 1 or 'columns'}", optional_by=""" by : str or list of str Name or list of names which refer to the axis items.""", versionadded_to_excel='\n .. versionadded:: 0.20')) def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=True, freeze_panes=None): from pandas.io.formats.excel import ExcelFormatter formatter = ExcelFormatter(self, na_rep=na_rep, cols=columns, header=header, float_format=float_format, index=index, index_label=index_label, merge_cells=merge_cells, inf_rep=inf_rep) formatter.write(excel_writer, sheet_name=sheet_name, startrow=startrow, startcol=startcol, freeze_panes=freeze_panes, engine=engine) def _translate(self): """ Convert the DataFrame in `self.data` and the attrs from `_build_styles` into a dictionary of {head, body, uuid, cellstyle} """ table_styles = self.table_styles or [] caption = self.caption ctx = self.ctx precision = self.precision hidden_index = self.hidden_index hidden_columns = self.hidden_columns uuid = self.uuid or str(uuid1()).replace("-", "_") ROW_HEADING_CLASS = "row_heading" COL_HEADING_CLASS = "col_heading" INDEX_NAME_CLASS = "index_name" DATA_CLASS = "data" BLANK_CLASS = "blank" BLANK_VALUE = "" def format_attr(pair): return "{key}={value}".format(**pair) # for sparsifying a MultiIndex idx_lengths = _get_level_lengths(self.index) col_lengths = _get_level_lengths(self.columns, hidden_columns) cell_context = dict() n_rlvls = self.data.index.nlevels n_clvls = self.data.columns.nlevels rlabels = self.data.index.tolist() clabels = self.data.columns.tolist() if n_rlvls == 1: rlabels = [[x] for x in rlabels] if n_clvls == 1: clabels = [[x] for x in clabels] clabels = list(zip(*clabels)) cellstyle = [] head = [] for r in range(n_clvls): # Blank for Index columns... row_es = [{"type": "th", "value": BLANK_VALUE, "display_value": BLANK_VALUE, "is_visible": not hidden_index, "class": " ".join([BLANK_CLASS])}] * (n_rlvls - 1) # ... except maybe the last for columns.names name = self.data.columns.names[r] cs = [BLANK_CLASS if name is None else INDEX_NAME_CLASS, "level{lvl}".format(lvl=r)] name = BLANK_VALUE if name is None else name row_es.append({"type": "th", "value": name, "display_value": name, "class": " ".join(cs), "is_visible": not hidden_index}) if clabels: for c, value in enumerate(clabels[r]): cs = [COL_HEADING_CLASS, "level{lvl}".format(lvl=r), "col{col}".format(col=c)] cs.extend(cell_context.get( "col_headings", {}).get(r, {}).get(c, [])) es = { "type": "th", "value": value, "display_value": value, "class": " ".join(cs), "is_visible": _is_visible(c, r, col_lengths), } colspan = col_lengths.get((r, c), 0) if colspan > 1: es["attributes"] = [ format_attr({"key": "colspan", "value": colspan}) ] row_es.append(es) head.append(row_es) if (self.data.index.names and com._any_not_none(*self.data.index.names) and not hidden_index): index_header_row = [] for c, name in enumerate(self.data.index.names): cs = [INDEX_NAME_CLASS, "level{lvl}".format(lvl=c)] name = '' if name is None else name index_header_row.append({"type": "th", "value": name, "class": " ".join(cs)}) index_header_row.extend( [{"type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS]) }] * (len(clabels[0]) - len(hidden_columns))) head.append(index_header_row) body = [] for r, idx in enumerate(self.data.index): row_es = [] for c, value in enumerate(rlabels[r]): rid = [ROW_HEADING_CLASS, "level{lvl}".format(lvl=c), "row{row}".format(row=r)] es = { "type": "th", "is_visible": (_is_visible(r, c, idx_lengths) and not hidden_index), "value": value, "display_value": value, "id": "_".join(rid[1:]), "class": " ".join(rid) } rowspan = idx_lengths.get((c, r), 0) if rowspan > 1: es["attributes"] = [ format_attr({"key": "rowspan", "value": rowspan}) ] row_es.append(es) for c, col in enumerate(self.data.columns): cs = [DATA_CLASS, "row{row}".format(row=r), "col{col}".format(col=c)] cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) formatter = self._display_funcs[(r, c)] value = self.data.iloc[r, c] row_es.append({ "type": "td", "value": value, "class": " ".join(cs), "id": "_".join(cs[1:]), "display_value": formatter(value), "is_visible": (c not in hidden_columns) }) props = [] for x in ctx[r, c]: # have to handle empty styles like [''] if x.count(":"): props.append(x.split(":")) else: props.append(['', '']) cellstyle.append({'props': props, 'selector': "row{row}_col{col}" .format(row=r, col=c)}) body.append(row_es) table_attr = self.table_attributes use_mathjax = get_option("display.html.use_mathjax") if not use_mathjax: table_attr = table_attr or '' if 'class="' in table_attr: table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ') else: table_attr += ' class="tex2jax_ignore"' return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid, precision=precision, table_styles=table_styles, caption=caption, table_attributes=table_attr) def format(self, formatter, subset=None): """ Format the text display value of cells. .. versionadded:: 0.18.0 Parameters ---------- formatter: str, callable, or dict subset: IndexSlice An argument to ``DataFrame.loc`` that restricts which elements ``formatter`` is applied to. Returns ------- self : Styler Notes ----- ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where ``a`` is one of - str: this will be wrapped in: ``a.format(x)`` - callable: called with the value of an individual cell The default display value for numeric values is the "general" (``g``) format with ``pd.options.display.precision`` precision. Examples -------- >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) >>> df.style.format("{:.2%}") >>> df['c'] = ['a', 'b', 'c', 'd'] >>> df.style.format({'c': str.upper}) """ if subset is None: row_locs = range(len(self.data)) col_locs = range(len(self.data.columns)) else: subset = _non_reducing_slice(subset) if len(subset) == 1: subset = subset, self.data.columns sub_df = self.data.loc[subset] row_locs = self.data.index.get_indexer_for(sub_df.index) col_locs = self.data.columns.get_indexer_for(sub_df.columns) if isinstance(formatter, MutableMapping): for col, col_formatter in formatter.items(): # formatter must be callable, so '{}' are converted to lambdas col_formatter = _maybe_wrap_formatter(col_formatter) col_num = self.data.columns.get_indexer_for([col])[0] for row_num in row_locs: self._display_funcs[(row_num, col_num)] = col_formatter else: # single scalar to format all cells with locs = product(*(row_locs, col_locs)) for i, j in locs: formatter = _maybe_wrap_formatter(formatter) self._display_funcs[(i, j)] = formatter return self def render(self, **kwargs): """Render the built up styles to HTML Parameters ---------- `**kwargs`: Any additional keyword arguments are passed through to ``self.template.render``. This is useful when you need to provide additional variables for a custom template. .. versionadded:: 0.20 Returns ------- rendered: str the rendered HTML Notes ----- ``Styler`` objects have defined the ``_repr_html_`` method which automatically calls ``self.render()`` when it's the last item in a Notebook cell. When calling ``Styler.render()`` directly, wrap the result in ``IPython.display.HTML`` to view the rendered HTML in the notebook. Pandas uses the following keys in render. Arguments passed in ``**kwargs`` take precedence, so think carefully if you want to override them: * head * cellstyle * body * uuid * precision * table_styles * caption * table_attributes """ self._compute() # TODO: namespace all the pandas keys d = self._translate() # filter out empty styles, every cell will have a class # but the list of props may just be [['', '']]. # so we have the neested anys below trimmed = [x for x in d['cellstyle'] if any(any(y) for y in x['props'])] d['cellstyle'] = trimmed d.update(kwargs) return self.template.render(**d) def _update_ctx(self, attrs): """ update the state of the Styler. Collects a mapping of {index_label: [': ']} attrs: Series or DataFrame should contain strings of ': ;: ' Whitespace shouldn't matter and the final trailing ';' shouldn't matter. """ for row_label, v in attrs.iterrows(): for col_label, col in v.iteritems(): i = self.index.get_indexer([row_label])[0] j = self.columns.get_indexer([col_label])[0] for pair in col.rstrip(";").split(";"): self.ctx[(i, j)].append(pair) def _copy(self, deepcopy=False): styler = Styler(self.data, precision=self.precision, caption=self.caption, uuid=self.uuid, table_styles=self.table_styles) if deepcopy: styler.ctx = copy.deepcopy(self.ctx) styler._todo = copy.deepcopy(self._todo) else: styler.ctx = self.ctx styler._todo = self._todo return styler def __copy__(self): """ Deep copy by default. """ return self._copy(deepcopy=False) def __deepcopy__(self, memo): return self._copy(deepcopy=True) def clear(self): """"Reset" the styler, removing any previously applied styles. Returns None. """ self.ctx.clear() self._todo = [] def _compute(self): """ Execute the style functions built up in `self._todo`. Relies on the conventions that all style functions go through .apply or .applymap. The append styles to apply as tuples of (application method, *args, **kwargs) """ r = self for func, args, kwargs in self._todo: r = func(self)(*args, **kwargs) return r def _apply(self, func, axis=0, subset=None, **kwargs): subset = slice(None) if subset is None else subset subset = _non_reducing_slice(subset) data = self.data.loc[subset] if axis is not None: result = data.apply(func, axis=axis, result_type='expand', **kwargs) result.columns = data.columns else: result = func(data, **kwargs) if not isinstance(result, pd.DataFrame): raise TypeError( "Function {func!r} must return a DataFrame when " "passed to `Styler.apply` with axis=None" .format(func=func)) if not (result.index.equals(data.index) and result.columns.equals(data.columns)): msg = ('Result of {func!r} must have identical index and ' 'columns as the input'.format(func=func)) raise ValueError(msg) result_shape = result.shape expected_shape = self.data.loc[subset].shape if result_shape != expected_shape: msg = ("Function {func!r} returned the wrong shape.\n" "Result has shape: {res}\n" "Expected shape: {expect}".format(func=func, res=result.shape, expect=expected_shape)) raise ValueError(msg) self._update_ctx(result) return self def apply(self, func, axis=0, subset=None, **kwargs): """ Apply a function column-wise, row-wise, or table-wase, updating the HTML representation with the result. Parameters ---------- func : function ``func`` should take a Series or DataFrame (depending on ``axis``), and return an object with the same shape. Must return a DataFrame with identical index and column labels when ``axis=None`` axis : int, str or None apply to each column (``axis=0`` or ``'index'``) or to each row (``axis=1`` or ``'columns'``) or to the entire DataFrame at once with ``axis=None`` subset : IndexSlice a valid indexer to limit ``data`` to *before* applying the function. Consider using a pandas.IndexSlice kwargs : dict pass along to ``func`` Returns ------- self : Styler Notes ----- The output shape of ``func`` should match the input, i.e. if ``x`` is the input row, column, or table (depending on ``axis``), then ``func(x.shape) == x.shape`` should be true. This is similar to ``DataFrame.apply``, except that ``axis=None`` applies the function to the entire DataFrame at once, rather than column-wise or row-wise. Examples -------- >>> def highlight_max(x): ... return ['background-color: yellow' if v == x.max() else '' for v in x] ... >>> df = pd.DataFrame(np.random.randn(5, 2)) >>> df.style.apply(highlight_max) """ self._todo.append((lambda instance: getattr(instance, '_apply'), (func, axis, subset), kwargs)) return self def _applymap(self, func, subset=None, **kwargs): func = partial(func, **kwargs) # applymap doesn't take kwargs? if subset is None: subset = pd.IndexSlice[:] subset = _non_reducing_slice(subset) result = self.data.loc[subset].applymap(func) self._update_ctx(result) return self def applymap(self, func, subset=None, **kwargs): """ Apply a function elementwise, updating the HTML representation with the result. Parameters ---------- func : function ``func`` should take a scalar and return a scalar subset : IndexSlice a valid indexer to limit ``data`` to *before* applying the function. Consider using a pandas.IndexSlice kwargs : dict pass along to ``func`` Returns ------- self : Styler See Also -------- Styler.where """ self._todo.append((lambda instance: getattr(instance, '_applymap'), (func, subset), kwargs)) return self def where(self, cond, value, other=None, subset=None, **kwargs): """ Apply a function elementwise, updating the HTML representation with a style which is selected in accordance with the return value of a function. .. versionadded:: 0.21.0 Parameters ---------- cond : callable ``cond`` should take a scalar and return a boolean value : str applied when ``cond`` returns true other : str applied when ``cond`` returns false subset : IndexSlice a valid indexer to limit ``data`` to *before* applying the function. Consider using a pandas.IndexSlice kwargs : dict pass along to ``cond`` Returns ------- self : Styler See Also -------- Styler.applymap """ if other is None: other = '' return self.applymap(lambda val: value if cond(val) else other, subset=subset, **kwargs) def set_precision(self, precision): """ Set the precision used to render. Parameters ---------- precision: int Returns ------- self : Styler """ self.precision = precision return self def set_table_attributes(self, attributes): """ Set the table attributes. These are the items that show up in the opening ```` tag in addition to to automatic (by default) id. Parameters ---------- attributes : string Returns ------- self : Styler Examples -------- >>> df = pd.DataFrame(np.random.randn(10, 4)) >>> df.style.set_table_attributes('class="pure-table"') # ...

... """ self.table_attributes = attributes return self def export(self): """ Export the styles to applied to the current Styler. Can be applied to a second style with ``Styler.use``. Returns ------- styles: list See Also -------- Styler.use """ return self._todo def use(self, styles): """ Set the styles on the current Styler, possibly using styles from ``Styler.export``. Parameters ---------- styles: list list of style functions Returns ------- self : Styler See Also -------- Styler.export """ self._todo.extend(styles) return self def set_uuid(self, uuid): """ Set the uuid for a Styler. Parameters ---------- uuid: str Returns ------- self : Styler """ self.uuid = uuid return self def set_caption(self, caption): """ Set the caption on a Styler Parameters ---------- caption: str Returns ------- self : Styler """ self.caption = caption return self def set_table_styles(self, table_styles): """ Set the table styles on a Styler. These are placed in a ``