"""Utilities for conversion to writer-agnostic Excel representation """ import re import warnings import itertools import numpy as np from pandas.compat import reduce from pandas.io.formats.css import CSSResolver, CSSWarning from pandas.io.formats.printing import pprint_thing import pandas.core.common as com from pandas.core.dtypes.common import is_float, is_scalar from pandas.core.dtypes import missing from pandas import Index, MultiIndex, PeriodIndex from pandas.io.formats.format import get_level_lengths class ExcelCell(object): __fields__ = ('row', 'col', 'val', 'style', 'mergestart', 'mergeend') __slots__ = __fields__ def __init__(self, row, col, val, style=None, mergestart=None, mergeend=None): self.row = row self.col = col self.val = val self.style = style self.mergestart = mergestart self.mergeend = mergeend header_style = {"font": {"bold": True}, "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"}, "alignment": {"horizontal": "center", "vertical": "top"}} class CSSToExcelConverter(object): """A callable for converting CSS declarations to ExcelWriter styles Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow), focusing on font styling, backgrounds, borders and alignment. Operates by first computing CSS styles in a fairly generic way (see :meth:`compute_css`) then determining Excel style properties from CSS properties (see :meth:`build_xlstyle`). Parameters ---------- inherited : str, optional CSS declarations understood to be the containing scope for the CSS processed by :meth:`__call__`. """ # NB: Most of the methods here could be classmethods, as only __init__ # and __call__ make use of instance attributes. We leave them as # instancemethods so that users can easily experiment with extensions # without monkey-patching. def __init__(self, inherited=None): if inherited is not None: inherited = self.compute_css(inherited, self.compute_css.INITIAL_STYLE) self.inherited = inherited compute_css = CSSResolver() def __call__(self, declarations_str): """Convert CSS declarations to ExcelWriter style Parameters ---------- declarations_str : str List of CSS declarations. e.g. "font-weight: bold; background: blue" Returns ------- xlstyle : dict A style as interpreted by ExcelWriter when found in ExcelCell.style. """ # TODO: memoize? properties = self.compute_css(declarations_str, self.inherited) return self.build_xlstyle(properties) def build_xlstyle(self, props): out = { 'alignment': self.build_alignment(props), 'border': self.build_border(props), 'fill': self.build_fill(props), 'font': self.build_font(props), } # TODO: support number format # TODO: handle cell width and height: needs support in pandas.io.excel def remove_none(d): """Remove key where value is None, through nested dicts""" for k, v in list(d.items()): if v is None: del d[k] elif isinstance(v, dict): remove_none(v) if not v: del d[k] remove_none(out) return out VERTICAL_MAP = { 'top': 'top', 'text-top': 'top', 'middle': 'center', 'baseline': 'bottom', 'bottom': 'bottom', 'text-bottom': 'bottom', # OpenXML also has 'justify', 'distributed' } def build_alignment(self, props): # TODO: text-indent, padding-left -> alignment.indent return {'horizontal': props.get('text-align'), 'vertical': self.VERTICAL_MAP.get(props.get('vertical-align')), 'wrap_text': (None if props.get('white-space') is None else props['white-space'] not in ('nowrap', 'pre', 'pre-line')) } def build_border(self, props): return {side: { 'style': self._border_style(props.get('border-{side}-style' .format(side=side)), props.get('border-{side}-width' .format(side=side))), 'color': self.color_to_excel( props.get('border-{side}-color'.format(side=side))), } for side in ['top', 'right', 'bottom', 'left']} def _border_style(self, style, width): # convert styles and widths to openxml, one of: # 'dashDot' # 'dashDotDot' # 'dashed' # 'dotted' # 'double' # 'hair' # 'medium' # 'mediumDashDot' # 'mediumDashDotDot' # 'mediumDashed' # 'slantDashDot' # 'thick' # 'thin' if width is None and style is None: return None if style == 'none' or style == 'hidden': return None if width is None: width = '2pt' width = float(width[:-2]) if width < 1e-5: return None elif width < 1.3: width_name = 'thin' elif width < 2.8: width_name = 'medium' else: width_name = 'thick' if style in (None, 'groove', 'ridge', 'inset', 'outset'): # not handled style = 'solid' if style == 'double': return 'double' if style == 'solid': return width_name if style == 'dotted': if width_name in ('hair', 'thin'): return 'dotted' return 'mediumDashDotDot' if style == 'dashed': if width_name in ('hair', 'thin'): return 'dashed' return 'mediumDashed' def build_fill(self, props): # TODO: perhaps allow for special properties # -excel-pattern-bgcolor and -excel-pattern-type fill_color = props.get('background-color') if fill_color not in (None, 'transparent', 'none'): return { 'fgColor': self.color_to_excel(fill_color), 'patternType': 'solid', } BOLD_MAP = {'bold': True, 'bolder': True, '600': True, '700': True, '800': True, '900': True, 'normal': False, 'lighter': False, '100': False, '200': False, '300': False, '400': False, '500': False} ITALIC_MAP = {'normal': False, 'italic': True, 'oblique': True} def build_font(self, props): size = props.get('font-size') if size is not None: assert size.endswith('pt') size = float(size[:-2]) font_names_tmp = re.findall(r'''(?x) ( "(?:[^"]|\\")+" | '(?:[^']|\\')+' | [^'",]+ )(?=,|\s*$) ''', props.get('font-family', '')) font_names = [] for name in font_names_tmp: if name[:1] == '"': name = name[1:-1].replace('\\"', '"') elif name[:1] == '\'': name = name[1:-1].replace('\\\'', '\'') else: name = name.strip() if name: font_names.append(name) family = None for name in font_names: if name == 'serif': family = 1 # roman break elif name == 'sans-serif': family = 2 # swiss break elif name == 'cursive': family = 4 # script break elif name == 'fantasy': family = 5 # decorative break decoration = props.get('text-decoration') if decoration is not None: decoration = decoration.split() else: decoration = () return { 'name': font_names[0] if font_names else None, 'family': family, 'size': size, 'bold': self.BOLD_MAP.get(props.get('font-weight')), 'italic': self.ITALIC_MAP.get(props.get('font-style')), 'underline': ('single' if 'underline' in decoration else None), 'strike': ('line-through' in decoration) or None, 'color': self.color_to_excel(props.get('color')), # shadow if nonzero digit before shadow color 'shadow': (bool(re.search('^[^#(]*[1-9]', props['text-shadow'])) if 'text-shadow' in props else None), # 'vertAlign':, # 'charset': , # 'scheme': , # 'outline': , # 'condense': , } NAMED_COLORS = { 'maroon': '800000', 'brown': 'A52A2A', 'red': 'FF0000', 'pink': 'FFC0CB', 'orange': 'FFA500', 'yellow': 'FFFF00', 'olive': '808000', 'green': '008000', 'purple': '800080', 'fuchsia': 'FF00FF', 'lime': '00FF00', 'teal': '008080', 'aqua': '00FFFF', 'blue': '0000FF', 'navy': '000080', 'black': '000000', 'gray': '808080', 'grey': '808080', 'silver': 'C0C0C0', 'white': 'FFFFFF', } def color_to_excel(self, val): if val is None: return None if val.startswith('#') and len(val) == 7: return val[1:].upper() if val.startswith('#') and len(val) == 4: return (val[1] * 2 + val[2] * 2 + val[3] * 2).upper() try: return self.NAMED_COLORS[val] except KeyError: warnings.warn('Unhandled color format: {val!r}'.format(val=val), CSSWarning) class ExcelFormatter(object): """ Class for formatting a DataFrame to a list of ExcelCells, Parameters ---------- df : DataFrame or Styler na_rep: na representation float_format : string, default None Format string for floating point numbers cols : sequence, optional Columns to write header : boolean or list of string, default True Write out column names. If a list of string is given it is assumed to be aliases for the column names index : boolean, default True output row names (index) index_label : string or sequence, default None Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. merge_cells : boolean, default False Format MultiIndex and Hierarchical Rows as merged cells. inf_rep : string, default `'inf'` representation for np.inf values (which aren't representable in Excel) A `'-'` sign will be added in front of -inf. style_converter : callable, optional This translates Styler styles (CSS) into ExcelWriter styles. Defaults to ``CSSToExcelConverter()``. It should have signature css_declarations string -> excel style. This is only called for body cells. """ def __init__(self, df, na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, merge_cells=False, inf_rep='inf', style_converter=None): self.rowcounter = 0 self.na_rep = na_rep if hasattr(df, 'render'): self.styler = df df = df.data if style_converter is None: style_converter = CSSToExcelConverter() self.style_converter = style_converter else: self.styler = None self.df = df if cols is not None: # all missing, raise if not len(Index(cols) & df.columns): raise KeyError( "passes columns are not ALL present dataframe") # deprecatedin gh-17295 # 1 missing is ok (for now) if len(Index(cols) & df.columns) != len(cols): warnings.warn( "Not all names specified in 'columns' are found; " "this will raise a KeyError in the future", FutureWarning) self.df = df.reindex(columns=cols) self.columns = self.df.columns self.float_format = float_format self.index = index self.index_label = index_label self.header = header self.merge_cells = merge_cells self.inf_rep = inf_rep def _format_value(self, val): if is_scalar(val) and missing.isna(val): val = self.na_rep elif is_float(val): if missing.isposinf_scalar(val): val = self.inf_rep elif missing.isneginf_scalar(val): val = '-{inf}'.format(inf=self.inf_rep) elif self.float_format is not None: val = float(self.float_format % val) return val def _format_header_mi(self): if self.columns.nlevels > 1: if not self.index: raise NotImplementedError("Writing to Excel with MultiIndex" " columns and no index " "('index'=False) is not yet " "implemented.") has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) if not (has_aliases or self.header): return columns = self.columns level_strs = columns.format(sparsify=self.merge_cells, adjoin=False, names=False) level_lengths = get_level_lengths(level_strs) coloffset = 0 lnum = 0 if self.index and isinstance(self.df.index, MultiIndex): coloffset = len(self.df.index[0]) - 1 if self.merge_cells: # Format multi-index as a merged cells. for lnum in range(len(level_lengths)): name = columns.names[lnum] yield ExcelCell(lnum, coloffset, name, header_style) for lnum, (spans, levels, labels) in enumerate(zip( level_lengths, columns.levels, columns.labels)): values = levels.take(labels) for i in spans: if spans[i] > 1: yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style, lnum, coloffset + i + spans[i]) else: yield ExcelCell(lnum, coloffset + i + 1, values[i], header_style) else: # Format in legacy format with dots to indicate levels. for i, values in enumerate(zip(*level_strs)): v = ".".join(map(pprint_thing, values)) yield ExcelCell(lnum, coloffset + i + 1, v, header_style) self.rowcounter = lnum def _format_header_regular(self): has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) if has_aliases or self.header: coloffset = 0 if self.index: coloffset = 1 if isinstance(self.df.index, MultiIndex): coloffset = len(self.df.index[0]) colnames = self.columns if has_aliases: if len(self.header) != len(self.columns): raise ValueError('Writing {cols} cols but got {alias} ' 'aliases'.format(cols=len(self.columns), alias=len(self.header))) else: colnames = self.header for colindex, colname in enumerate(colnames): yield ExcelCell(self.rowcounter, colindex + coloffset, colname, header_style) def _format_header(self): if isinstance(self.columns, MultiIndex): gen = self._format_header_mi() else: gen = self._format_header_regular() gen2 = () if self.df.index.names: row = [x if x is not None else '' for x in self.df.index.names] + [''] * len(self.columns) if reduce(lambda x, y: x and y, map(lambda x: x != '', row)): gen2 = (ExcelCell(self.rowcounter, colindex, val, header_style) for colindex, val in enumerate(row)) self.rowcounter += 1 return itertools.chain(gen, gen2) def _format_body(self): if isinstance(self.df.index, MultiIndex): return self._format_hierarchical_rows() else: return self._format_regular_rows() def _format_regular_rows(self): has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) if has_aliases or self.header: self.rowcounter += 1 # output index and index_label? if self.index: # check aliases # if list only take first as this is not a MultiIndex if (self.index_label and isinstance(self.index_label, (list, tuple, np.ndarray, Index))): index_label = self.index_label[0] # if string good to go elif self.index_label and isinstance(self.index_label, str): index_label = self.index_label else: index_label = self.df.index.names[0] if isinstance(self.columns, MultiIndex): self.rowcounter += 1 if index_label and self.header is not False: yield ExcelCell(self.rowcounter - 1, 0, index_label, header_style) # write index_values index_values = self.df.index if isinstance(self.df.index, PeriodIndex): index_values = self.df.index.to_timestamp() for idx, idxval in enumerate(index_values): yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style) coloffset = 1 else: coloffset = 0 for cell in self._generate_body(coloffset): yield cell def _format_hierarchical_rows(self): has_aliases = isinstance(self.header, (tuple, list, np.ndarray, Index)) if has_aliases or self.header: self.rowcounter += 1 gcolidx = 0 if self.index: index_labels = self.df.index.names # check for aliases if (self.index_label and isinstance(self.index_label, (list, tuple, np.ndarray, Index))): index_labels = self.index_label # MultiIndex columns require an extra row # with index names (blank if None) for # unambigous round-trip, unless not merging, # in which case the names all go on one row Issue #11328 if isinstance(self.columns, MultiIndex) and self.merge_cells: self.rowcounter += 1 # if index labels are not empty go ahead and dump if com._any_not_none(*index_labels) and self.header is not False: for cidx, name in enumerate(index_labels): yield ExcelCell(self.rowcounter - 1, cidx, name, header_style) if self.merge_cells: # Format hierarchical rows as merged cells. level_strs = self.df.index.format(sparsify=True, adjoin=False, names=False) level_lengths = get_level_lengths(level_strs) for spans, levels, labels in zip(level_lengths, self.df.index.levels, self.df.index.labels): values = levels.take(labels, allow_fill=levels._can_hold_na, fill_value=True) for i in spans: if spans[i] > 1: yield ExcelCell(self.rowcounter + i, gcolidx, values[i], header_style, self.rowcounter + i + spans[i] - 1, gcolidx) else: yield ExcelCell(self.rowcounter + i, gcolidx, values[i], header_style) gcolidx += 1 else: # Format hierarchical rows with non-merged values. for indexcolvals in zip(*self.df.index): for idx, indexcolval in enumerate(indexcolvals): yield ExcelCell(self.rowcounter + idx, gcolidx, indexcolval, header_style) gcolidx += 1 for cell in self._generate_body(gcolidx): yield cell def _generate_body(self, coloffset): if self.styler is None: styles = None else: styles = self.styler._compute().ctx if not styles: styles = None xlstyle = None # Write the body of the frame data series by series. for colidx in range(len(self.columns)): series = self.df.iloc[:, colidx] for i, val in enumerate(series): if styles is not None: xlstyle = self.style_converter(';'.join(styles[i, colidx])) yield ExcelCell(self.rowcounter + i, colidx + coloffset, val, xlstyle) def get_formatted_cells(self): for cell in itertools.chain(self._format_header(), self._format_body()): cell.val = self._format_value(cell.val) yield cell def write(self, writer, sheet_name='Sheet1', startrow=0, startcol=0, freeze_panes=None, engine=None): """ writer : string or ExcelWriter object File path or existing ExcelWriter sheet_name : string, default 'Sheet1' Name of sheet which will contain DataFrame startrow : upper left cell row to dump data frame startcol : upper left cell column to dump data frame freeze_panes : tuple of integer (length 2), default None Specifies the one-based bottommost row and rightmost column that is to be frozen engine : string, default None write engine to use if writer is a path - you can also set this via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and ``io.excel.xlsm.writer``. """ from pandas.io.excel import ExcelWriter from pandas.io.common import _stringify_path if isinstance(writer, ExcelWriter): need_save = False else: writer = ExcelWriter(_stringify_path(writer), engine=engine) need_save = True formatted_cells = self.get_formatted_cells() writer.write_cells(formatted_cells, sheet_name, startrow=startrow, startcol=startcol, freeze_panes=freeze_panes) if need_save: writer.save()