""" Arithmetic operations for PandasObjects This is not a public API. """ # necessary to enforce truediv in Python 2.X from __future__ import division import datetime import operator import textwrap import warnings import numpy as np import pandas as pd from pandas._libs import algos as libalgos, ops as libops from pandas import compat from pandas.util._decorators import Appender from pandas.compat import bind_method import pandas.core.missing as missing import pandas.core.common as com from pandas.errors import NullFrequencyError from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.common import ( needs_i8_conversion, is_datetimelike_v_numeric, is_integer_dtype, is_categorical_dtype, is_object_dtype, is_timedelta64_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_bool_dtype, is_list_like, is_scalar, _ensure_object) from pandas.core.dtypes.cast import ( maybe_upcast_putmask, find_common_type, construct_1d_object_array_from_listlike) from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, ABCPanel, ABCIndex, ABCSparseSeries, ABCSparseArray) # ----------------------------------------------------------------------------- # Ops Wrapping Utilities def get_op_result_name(left, right): """ Find the appropriate name to pin to an operation result. This result should always be either an Index or a Series. Parameters ---------- left : {Series, Index} right : object Returns ------- name : object Usually a string """ # `left` is always a pd.Series when called from within ops if isinstance(right, (ABCSeries, pd.Index)): name = _maybe_match_name(left, right) else: name = left.name return name def _maybe_match_name(a, b): """ Try to find a name to attach to the result of an operation between a and b. If only one of these has a `name` attribute, return that name. Otherwise return a consensus name if they match of None if they have different names. Parameters ---------- a : object b : object Returns ------- name : str or None See also -------- pandas.core.common._consensus_name_attr """ a_has = hasattr(a, 'name') b_has = hasattr(b, 'name') if a_has and b_has: if a.name == b.name: return a.name else: # TODO: what if they both have np.nan for their names? return None elif a_has: return a.name elif b_has: return b.name return None # ----------------------------------------------------------------------------- # Reversed Operations not available in the stdlib operator module. # Defining these instead of using lambdas allows us to reference them by name. def radd(left, right): return right + left def rsub(left, right): return right - left def rmul(left, right): return right * left def rdiv(left, right): return right / left def rtruediv(left, right): return right / left def rfloordiv(left, right): return right // left def rmod(left, right): return right % left def rdivmod(left, right): return divmod(right, left) def rpow(left, right): return right ** left def rand_(left, right): return operator.and_(right, left) def ror_(left, right): return operator.or_(right, left) def rxor(left, right): return operator.xor(right, left) # ----------------------------------------------------------------------------- def make_invalid_op(name): """ Return a binary method that always raises a TypeError. Parameters ---------- name : str Returns ------- invalid_op : function """ def invalid_op(self, other=None): raise TypeError("cannot perform {name} with this index type: " "{typ}".format(name=name, typ=type(self).__name__)) invalid_op.__name__ = name return invalid_op def _gen_eval_kwargs(name): """ Find the keyword arguments to pass to numexpr for the given operation. Parameters ---------- name : str Returns ------- eval_kwargs : dict Examples -------- >>> _gen_eval_kwargs("__add__") {} >>> _gen_eval_kwargs("rtruediv") {"reversed": True, "truediv": True} """ kwargs = {} # Series and Panel appear to only pass __add__, __radd__, ... # but DataFrame gets both these dunder names _and_ non-dunder names # add, radd, ... name = name.replace('__', '') if name.startswith('r'): if name not in ['radd', 'rand', 'ror', 'rxor']: # Exclude commutative operations kwargs['reversed'] = True if name in ['truediv', 'rtruediv']: kwargs['truediv'] = True if name in ['ne']: kwargs['masker'] = True return kwargs def _gen_fill_zeros(name): """ Find the appropriate fill value to use when filling in undefined values in the results of the given operation caused by operating on (generally dividing by) zero. Parameters ---------- name : str Returns ------- fill_value : {None, np.nan, np.inf} """ name = name.strip('__') if 'div' in name: # truediv, floordiv, div, and reversed variants fill_value = np.inf elif 'mod' in name: # mod, rmod fill_value = np.nan else: fill_value = None return fill_value def _get_frame_op_default_axis(name): """ Only DataFrame cares about default_axis, specifically: special methods have default_axis=None and flex methods have default_axis='columns'. Parameters ---------- name : str Returns ------- default_axis: str or None """ if name.replace('__r', '__') in ['__and__', '__or__', '__xor__']: # bool methods return 'columns' elif name.startswith('__'): # __add__, __mul__, ... return None else: # add, mul, ... return 'columns' def _get_opstr(op, cls): """ Find the operation string, if any, to pass to numexpr for this operation. Parameters ---------- op : binary operator cls : class Returns ------- op_str : string or None """ # numexpr is available for non-sparse classes subtyp = getattr(cls, '_subtyp', '') use_numexpr = 'sparse' not in subtyp if not use_numexpr: # if we're not using numexpr, then don't pass a str_rep return None return {operator.add: '+', radd: '+', operator.mul: '*', rmul: '*', operator.sub: '-', rsub: '-', operator.truediv: '/', rtruediv: '/', operator.floordiv: '//', rfloordiv: '//', operator.mod: None, # TODO: Why None for mod but '%' for rmod? rmod: '%', operator.pow: '**', rpow: '**', operator.eq: '==', operator.ne: '!=', operator.le: '<=', operator.lt: '<', operator.ge: '>=', operator.gt: '>', operator.and_: '&', rand_: '&', operator.or_: '|', ror_: '|', operator.xor: '^', rxor: '^', divmod: None, rdivmod: None}[op] def _get_op_name(op, special): """ Find the name to attach to this method according to conventions for special and non-special methods. Parameters ---------- op : binary operator special : bool Returns ------- op_name : str """ opname = op.__name__.strip('_') if special: opname = '__{opname}__'.format(opname=opname) return opname # ----------------------------------------------------------------------------- # Docstring Generation and Templates _add_example_FRAME = """ >>> a = pd.DataFrame([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd'], ... columns=['one']) >>> a one a 1.0 b 1.0 c 1.0 d NaN >>> b = pd.DataFrame(dict(one=[1, np.nan, 1, np.nan], ... two=[np.nan, 2, np.nan, 2]), ... index=['a', 'b', 'd', 'e']) >>> b one two a 1.0 NaN b NaN 2.0 d 1.0 NaN e NaN 2.0 >>> a.add(b, fill_value=0) one two a 2.0 NaN b 1.0 2.0 c 1.0 NaN d 1.0 NaN e NaN 2.0 """ _sub_example_FRAME = """ >>> a = pd.DataFrame([2, 1, 1, np.nan], index=['a', 'b', 'c', 'd'], ... columns=['one']) >>> a one a 2.0 b 1.0 c 1.0 d NaN >>> b = pd.DataFrame(dict(one=[1, np.nan, 1, np.nan], ... two=[3, 2, np.nan, 2]), ... index=['a', 'b', 'd', 'e']) >>> b one two a 1.0 3.0 b NaN 2.0 d 1.0 NaN e NaN 2.0 >>> a.sub(b, fill_value=0) one two a 1.0 -3.0 b 1.0 -2.0 c 1.0 NaN d -1.0 NaN e NaN -2.0 """ _op_descriptions = { # Arithmetic Operators 'add': {'op': '+', 'desc': 'Addition', 'reverse': 'radd', 'df_examples': _add_example_FRAME}, 'sub': {'op': '-', 'desc': 'Subtraction', 'reverse': 'rsub', 'df_examples': _sub_example_FRAME}, 'mul': {'op': '*', 'desc': 'Multiplication', 'reverse': 'rmul', 'df_examples': None}, 'mod': {'op': '%', 'desc': 'Modulo', 'reverse': 'rmod', 'df_examples': None}, 'pow': {'op': '**', 'desc': 'Exponential power', 'reverse': 'rpow', 'df_examples': None}, 'truediv': {'op': '/', 'desc': 'Floating division', 'reverse': 'rtruediv', 'df_examples': None}, 'floordiv': {'op': '//', 'desc': 'Integer division', 'reverse': 'rfloordiv', 'df_examples': None}, 'divmod': {'op': 'divmod', 'desc': 'Integer division and modulo', 'reverse': None, 'df_examples': None}, # Comparison Operators 'eq': {'op': '==', 'desc': 'Equal to', 'reverse': None, 'df_examples': None}, 'ne': {'op': '!=', 'desc': 'Not equal to', 'reverse': None, 'df_examples': None}, 'lt': {'op': '<', 'desc': 'Less than', 'reverse': None, 'df_examples': None}, 'le': {'op': '<=', 'desc': 'Less than or equal to', 'reverse': None, 'df_examples': None}, 'gt': {'op': '>', 'desc': 'Greater than', 'reverse': None, 'df_examples': None}, 'ge': {'op': '>=', 'desc': 'Greater than or equal to', 'reverse': None, 'df_examples': None}} _op_names = list(_op_descriptions.keys()) for key in _op_names: _op_descriptions[key]['reversed'] = False reverse_op = _op_descriptions[key]['reverse'] if reverse_op is not None: _op_descriptions[reverse_op] = _op_descriptions[key].copy() _op_descriptions[reverse_op]['reversed'] = True _op_descriptions[reverse_op]['reverse'] = key _flex_doc_SERIES = """ {desc} of series and other, element-wise (binary operator `{op_name}`). Equivalent to ``{equiv}``, but with support to substitute a fill_value for missing data in one of the inputs. Parameters ---------- other : Series or scalar value fill_value : None or float value, default None (NaN) Fill existing missing (NaN) values, and any new element needed for successful Series alignment, with this value before computation. If data in both corresponding Series locations is missing the result will be missing level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level Returns ------- result : Series Examples -------- >>> a = pd.Series([1, 1, 1, np.nan], index=['a', 'b', 'c', 'd']) >>> a a 1.0 b 1.0 c 1.0 d NaN dtype: float64 >>> b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e']) >>> b a 1.0 b NaN d 1.0 e NaN dtype: float64 >>> a.add(b, fill_value=0) a 2.0 b 1.0 c 1.0 d 1.0 e NaN dtype: float64 See also -------- Series.{reverse} """ _arith_doc_FRAME = """ Binary operator %s with support to substitute a fill_value for missing data in one of the inputs Parameters ---------- other : Series, DataFrame, or constant axis : {0, 1, 'index', 'columns'} For Series input, axis to match Series index on fill_value : None or float value, default None Fill existing missing (NaN) values, and any new element needed for successful DataFrame alignment, with this value before computation. If data in both corresponding DataFrame locations is missing the result will be missing level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level Notes ----- Mismatched indices will be unioned together Returns ------- result : DataFrame """ _flex_doc_FRAME = """ {desc} of dataframe and other, element-wise (binary operator `{op_name}`). Equivalent to ``{equiv}``, but with support to substitute a fill_value for missing data in one of the inputs. Parameters ---------- other : Series, DataFrame, or constant axis : {{0, 1, 'index', 'columns'}} For Series input, axis to match Series index on level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level fill_value : None or float value, default None Fill existing missing (NaN) values, and any new element needed for successful DataFrame alignment, with this value before computation. If data in both corresponding DataFrame locations is missing the result will be missing Notes ----- Mismatched indices will be unioned together Returns ------- result : DataFrame Examples -------- {df_examples} See also -------- DataFrame.{reverse} """ _flex_doc_PANEL = """ {desc} of series and other, element-wise (binary operator `{op_name}`). Equivalent to ``{equiv}``. Parameters ---------- other : DataFrame or Panel axis : {{items, major_axis, minor_axis}} Axis to broadcast over Returns ------- Panel See also -------- Panel.{reverse} """ _agg_doc_PANEL = """ Wrapper method for {op_name} Parameters ---------- other : DataFrame or Panel axis : {{items, major_axis, minor_axis}} Axis to broadcast over Returns ------- Panel """ def _make_flex_doc(op_name, typ): """ Make the appropriate substitutions for the given operation and class-typ into either _flex_doc_SERIES or _flex_doc_FRAME to return the docstring to attach to a generated method. Parameters ---------- op_name : str {'__add__', '__sub__', ... '__eq__', '__ne__', ...} typ : str {series, 'dataframe']} Returns ------- doc : str """ op_name = op_name.replace('__', '') op_desc = _op_descriptions[op_name] if op_desc['reversed']: equiv = 'other ' + op_desc['op'] + ' ' + typ else: equiv = typ + ' ' + op_desc['op'] + ' other' if typ == 'series': base_doc = _flex_doc_SERIES doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, equiv=equiv, reverse=op_desc['reverse']) elif typ == 'dataframe': base_doc = _flex_doc_FRAME doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, equiv=equiv, reverse=op_desc['reverse'], df_examples=op_desc['df_examples']) elif typ == 'panel': base_doc = _flex_doc_PANEL doc = base_doc.format(desc=op_desc['desc'], op_name=op_name, equiv=equiv, reverse=op_desc['reverse']) else: raise AssertionError('Invalid typ argument.') return doc # ----------------------------------------------------------------------------- # Masking NA values and fallbacks for operations numpy does not support def fill_binop(left, right, fill_value): """ If a non-None fill_value is given, replace null entries in left and right with this value, but only in positions where _one_ of left/right is null, not both. Parameters ---------- left : array-like right : array-like fill_value : object Returns ------- left : array-like right : array-like Notes ----- Makes copies if fill_value is not None """ # TODO: can we make a no-copy implementation? if fill_value is not None: left_mask = isna(left) right_mask = isna(right) left = left.copy() right = right.copy() # one but not both mask = left_mask ^ right_mask left[left_mask & mask] = fill_value right[right_mask & mask] = fill_value return left, right def mask_cmp_op(x, y, op, allowed_types): """ Apply the function `op` to only non-null points in x and y. Parameters ---------- x : array-like y : array-like op : binary operation allowed_types : class or tuple of classes Returns ------- result : ndarray[bool] """ # TODO: Can we make the allowed_types arg unnecessary? xrav = x.ravel() result = np.empty(x.size, dtype=bool) if isinstance(y, allowed_types): yrav = y.ravel() mask = notna(xrav) & notna(yrav) result[mask] = op(np.array(list(xrav[mask])), np.array(list(yrav[mask]))) else: mask = notna(xrav) result[mask] = op(np.array(list(xrav[mask])), y) if op == operator.ne: # pragma: no cover np.putmask(result, ~mask, True) else: np.putmask(result, ~mask, False) result = result.reshape(x.shape) return result # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory # methods def _get_method_wrappers(cls): """ Find the appropriate operation-wrappers to use when defining flex/special arithmetic, boolean, and comparison operations with the given class. Parameters ---------- cls : class Returns ------- arith_flex : function or None comp_flex : function or None arith_special : function comp_special : function bool_special : function Notes ----- None is only returned for SparseArray """ if issubclass(cls, ABCSparseSeries): # Be sure to catch this before ABCSeries and ABCSparseArray, # as they will both come see SparseSeries as a subclass arith_flex = _flex_method_SERIES comp_flex = _flex_method_SERIES arith_special = _arith_method_SPARSE_SERIES comp_special = _arith_method_SPARSE_SERIES bool_special = _bool_method_SERIES # TODO: I don't think the functions defined by bool_method are tested elif issubclass(cls, ABCSeries): # Just Series; SparseSeries is caught above arith_flex = _flex_method_SERIES comp_flex = _flex_method_SERIES arith_special = _arith_method_SERIES comp_special = _comp_method_SERIES bool_special = _bool_method_SERIES elif issubclass(cls, ABCSparseArray): arith_flex = None comp_flex = None arith_special = _arith_method_SPARSE_ARRAY comp_special = _arith_method_SPARSE_ARRAY bool_special = _arith_method_SPARSE_ARRAY elif issubclass(cls, ABCPanel): arith_flex = _flex_method_PANEL comp_flex = _comp_method_PANEL arith_special = _arith_method_PANEL comp_special = _comp_method_PANEL bool_special = _arith_method_PANEL elif issubclass(cls, ABCDataFrame): # Same for DataFrame and SparseDataFrame arith_flex = _arith_method_FRAME comp_flex = _flex_comp_method_FRAME arith_special = _arith_method_FRAME comp_special = _comp_method_FRAME bool_special = _arith_method_FRAME return arith_flex, comp_flex, arith_special, comp_special, bool_special def _create_methods(cls, arith_method, comp_method, bool_method, special=False): # creates actual methods based upon arithmetic, comp and bool method # constructors. have_divmod = issubclass(cls, ABCSeries) # divmod is available for Series and SparseSeries # yapf: disable new_methods = dict( add=arith_method(cls, operator.add, special), radd=arith_method(cls, radd, special), sub=arith_method(cls, operator.sub, special), mul=arith_method(cls, operator.mul, special), truediv=arith_method(cls, operator.truediv, special), floordiv=arith_method(cls, operator.floordiv, special), # Causes a floating point exception in the tests when numexpr enabled, # so for now no speedup mod=arith_method(cls, operator.mod, special), pow=arith_method(cls, operator.pow, special), # not entirely sure why this is necessary, but previously was included # so it's here to maintain compatibility rmul=arith_method(cls, rmul, special), rsub=arith_method(cls, rsub, special), rtruediv=arith_method(cls, rtruediv, special), rfloordiv=arith_method(cls, rfloordiv, special), rpow=arith_method(cls, rpow, special), rmod=arith_method(cls, rmod, special)) # yapf: enable new_methods['div'] = new_methods['truediv'] new_methods['rdiv'] = new_methods['rtruediv'] if have_divmod: # divmod doesn't have an op that is supported by numexpr new_methods['divmod'] = arith_method(cls, divmod, special) new_methods.update(dict( eq=comp_method(cls, operator.eq, special), ne=comp_method(cls, operator.ne, special), lt=comp_method(cls, operator.lt, special), gt=comp_method(cls, operator.gt, special), le=comp_method(cls, operator.le, special), ge=comp_method(cls, operator.ge, special))) if bool_method: new_methods.update( dict(and_=bool_method(cls, operator.and_, special), or_=bool_method(cls, operator.or_, special), # For some reason ``^`` wasn't used in original. xor=bool_method(cls, operator.xor, special), rand_=bool_method(cls, rand_, special), ror_=bool_method(cls, ror_, special), rxor=bool_method(cls, rxor, special))) if special: dunderize = lambda x: '__{name}__'.format(name=x.strip('_')) else: dunderize = lambda x: x new_methods = {dunderize(k): v for k, v in new_methods.items()} return new_methods def add_methods(cls, new_methods): for name, method in new_methods.items(): # For most methods, if we find that the class already has a method # of the same name, it is OK to over-write it. The exception is # inplace methods (__iadd__, __isub__, ...) for SparseArray, which # retain the np.ndarray versions. force = not (issubclass(cls, ABCSparseArray) and name.startswith('__i')) if force or name not in cls.__dict__: bind_method(cls, name, method) # ---------------------------------------------------------------------- # Arithmetic def add_special_arithmetic_methods(cls): """ Adds the full suite of special arithmetic methods (``__add__``, ``__sub__``, etc.) to the class. Parameters ---------- cls : class special methods will be defined and pinned to this class """ _, _, arith_method, comp_method, bool_method = _get_method_wrappers(cls) new_methods = _create_methods(cls, arith_method, comp_method, bool_method, special=True) # inplace operators (I feel like these should get passed an `inplace=True` # or just be removed def _wrap_inplace_method(method): """ return an inplace wrapper for this method """ def f(self, other): result = method(self, other) # this makes sure that we are aligned like the input # we are updating inplace so we want to ignore is_copy self._update_inplace(result.reindex_like(self, copy=False)._data, verify_is_copy=False) return self return f new_methods.update( dict(__iadd__=_wrap_inplace_method(new_methods["__add__"]), __isub__=_wrap_inplace_method(new_methods["__sub__"]), __imul__=_wrap_inplace_method(new_methods["__mul__"]), __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]), __ifloordiv__=_wrap_inplace_method(new_methods["__floordiv__"]), __imod__=_wrap_inplace_method(new_methods["__mod__"]), __ipow__=_wrap_inplace_method(new_methods["__pow__"]))) if not compat.PY3: new_methods["__idiv__"] = _wrap_inplace_method(new_methods["__div__"]) new_methods.update( dict(__iand__=_wrap_inplace_method(new_methods["__and__"]), __ior__=_wrap_inplace_method(new_methods["__or__"]), __ixor__=_wrap_inplace_method(new_methods["__xor__"]))) add_methods(cls, new_methods=new_methods) def add_flex_arithmetic_methods(cls): """ Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. Parameters ---------- cls : class flex methods will be defined and pinned to this class """ flex_arith_method, flex_comp_method, _, _, _ = _get_method_wrappers(cls) new_methods = _create_methods(cls, flex_arith_method, flex_comp_method, bool_method=None, special=False) new_methods.update(dict(multiply=new_methods['mul'], subtract=new_methods['sub'], divide=new_methods['div'])) # opt out of bool flex methods for now assert not any(kname in new_methods for kname in ('ror_', 'rxor', 'rand_')) add_methods(cls, new_methods=new_methods) # ----------------------------------------------------------------------------- # Series def _align_method_SERIES(left, right, align_asobject=False): """ align lhs and rhs Series """ # ToDo: Different from _align_method_FRAME, list, tuple and ndarray # are not coerced here # because Series has inconsistencies described in #13637 if isinstance(right, ABCSeries): # avoid repeated alignment if not left.index.equals(right.index): if align_asobject: # to keep original value's dtype for bool ops left = left.astype(object) right = right.astype(object) left, right = left.align(right, copy=False) return left, right def _construct_result(left, result, index, name, dtype): """ If the raw op result has a non-None name (e.g. it is an Index object) and the name argument is None, then passing name to the constructor will not be enough; we still need to override the name attribute. """ out = left._constructor(result, index=index, dtype=dtype) out.name = name return out def _construct_divmod_result(left, result, index, name, dtype): """divmod returns a tuple of like indexed series instead of a single series. """ constructor = left._constructor return ( constructor(result[0], index=index, name=name, dtype=dtype), constructor(result[1], index=index, name=name, dtype=dtype), ) def _arith_method_SERIES(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ str_rep = _get_opstr(op, cls) op_name = _get_op_name(op, special) eval_kwargs = _gen_eval_kwargs(op_name) fill_zeros = _gen_fill_zeros(op_name) construct_result = (_construct_divmod_result if op is divmod else _construct_result) def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): dtype = find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) mask = notna(x) & notna(y) result[mask] = op(x[mask], com._values_from_object(y[mask])) else: assert isinstance(x, np.ndarray) result = np.empty(len(x), dtype=x.dtype) mask = notna(x) result[mask] = op(x[mask], y) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = missing.fill_zeros(result, x, y, op_name, fill_zeros) return result def safe_na_op(lvalues, rvalues): try: with np.errstate(all='ignore'): return na_op(lvalues, rvalues) except Exception: if is_object_dtype(lvalues): return libalgos.arrmap_object(lvalues, lambda x: op(x, rvalues)) raise def wrapper(left, right): if isinstance(right, ABCDataFrame): return NotImplemented left, right = _align_method_SERIES(left, right) res_name = get_op_result_name(left, right) if is_datetime64_dtype(left) or is_datetime64tz_dtype(left): result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex) return construct_result(left, result, index=left.index, name=res_name, dtype=result.dtype) elif is_timedelta64_dtype(left): result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex) return construct_result(left, result, index=left.index, name=res_name, dtype=result.dtype) elif is_categorical_dtype(left): raise TypeError("{typ} cannot perform the operation " "{op}".format(typ=type(left).__name__, op=str_rep)) lvalues = left.values rvalues = right if isinstance(rvalues, ABCSeries): rvalues = rvalues.values result = safe_na_op(lvalues, rvalues) return construct_result(left, result, index=left.index, name=res_name, dtype=None) return wrapper def dispatch_to_index_op(op, left, right, index_class): """ Wrap Series left in the given index_class to delegate the operation op to the index implementation. DatetimeIndex and TimedeltaIndex perform type checking, timezone handling, overflow checks, etc. Parameters ---------- op : binary operator (operator.add, operator.sub, ...) left : Series right : object index_class : DatetimeIndex or TimedeltaIndex Returns ------- result : object, usually DatetimeIndex, TimedeltaIndex, or Series """ left_idx = index_class(left) # avoid accidentally allowing integer add/sub. For datetime64[tz] dtypes, # left_idx may inherit a freq from a cached DatetimeIndex. # See discussion in GH#19147. if getattr(left_idx, 'freq', None) is not None: left_idx = left_idx._shallow_copy(freq=None) try: result = op(left_idx, right) except NullFrequencyError: # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError # on add/sub of integers (or int-like). We re-raise as a TypeError. raise TypeError('incompatible type for a datetime/timedelta ' 'operation [{name}]'.format(name=op.__name__)) return result def _comp_method_OBJECT_ARRAY(op, x, y): if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) if isinstance(y, (np.ndarray, ABCSeries, ABCIndex)): if not is_object_dtype(y.dtype): y = y.astype(np.object_) if isinstance(y, (ABCSeries, ABCIndex)): y = y.values result = libops.vec_compare(x, y, op) else: result = libops.scalar_compare(x, y, op) return result def _comp_method_SERIES(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ op_name = _get_op_name(op, special) masker = _gen_eval_kwargs(op_name).get('masker', False) def na_op(x, y): # dispatch to the categorical if we have a categorical # in either operand if is_categorical_dtype(y) and not is_scalar(y): # The `not is_scalar(y)` check excludes the string "category" return op(y, x) elif is_object_dtype(x.dtype): result = _comp_method_OBJECT_ARRAY(op, x, y) elif is_datetimelike_v_numeric(x, y): raise TypeError("invalid type comparison") else: # we want to compare like types # we only want to convert to integer like if # we are not NotImplemented, otherwise # we would allow datetime64 (but viewed as i8) against # integer comparisons # we have a datetime/timedelta and may need to convert assert not needs_i8_conversion(x) mask = None if not is_scalar(y) and needs_i8_conversion(y): mask = isna(x) | isna(y) y = y.view('i8') x = x.view('i8') method = getattr(x, op_name, None) if method is not None: with np.errstate(all='ignore'): result = method(y) if result is NotImplemented: raise TypeError("invalid type comparison") else: result = op(x, y) if mask is not None and mask.any(): result[mask] = masker return result def wrapper(self, other, axis=None): # Validate the axis parameter if axis is not None: self._get_axis_number(axis) res_name = get_op_result_name(self, other) if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early return NotImplemented elif isinstance(other, ABCSeries) and not self._indexed_same(other): raise ValueError("Can only compare identically-labeled " "Series objects") elif is_categorical_dtype(self): # Dispatch to Categorical implementation; pd.CategoricalIndex # behavior is non-canonical GH#19513 res_values = dispatch_to_index_op(op, self, other, pd.Categorical) return self._constructor(res_values, index=self.index, name=res_name) if is_datetime64_dtype(self) or is_datetime64tz_dtype(self): # Dispatch to DatetimeIndex to ensure identical # Series/Index behavior if (isinstance(other, datetime.date) and not isinstance(other, datetime.datetime)): # https://github.com/pandas-dev/pandas/issues/21152 # Compatibility for difference between Series comparison w/ # datetime and date msg = ( "Comparing Series of datetimes with 'datetime.date'. " "Currently, the 'datetime.date' is coerced to a " "datetime. In the future pandas will not coerce, " "and {future}. " "To retain the current behavior, " "convert the 'datetime.date' to a datetime with " "'pd.Timestamp'." ) if op in {operator.lt, operator.le, operator.gt, operator.ge}: future = "a TypeError will be raised" else: future = ( "'the values will not compare equal to the " "'datetime.date'" ) msg = '\n'.join(textwrap.wrap(msg.format(future=future))) warnings.warn(msg, FutureWarning, stacklevel=2) other = pd.Timestamp(other) res_values = dispatch_to_index_op(op, self, other, pd.DatetimeIndex) return self._constructor(res_values, index=self.index, name=res_name) elif is_timedelta64_dtype(self): res_values = dispatch_to_index_op(op, self, other, pd.TimedeltaIndex) return self._constructor(res_values, index=self.index, name=res_name) elif isinstance(other, ABCSeries): # By this point we have checked that self._indexed_same(other) res_values = na_op(self.values, other.values) # rename is needed in case res_name is None and res_values.name # is not. return self._constructor(res_values, index=self.index, name=res_name).rename(res_name) elif isinstance(other, (np.ndarray, pd.Index)): # do not check length of zerodim array # as it will broadcast if other.ndim != 0 and len(self) != len(other): raise ValueError('Lengths must match to compare') res_values = na_op(self.values, np.asarray(other)) result = self._constructor(res_values, index=self.index) # rename is needed in case res_name is None and self.name # is not. return result.__finalize__(self).rename(res_name) elif isinstance(other, pd.Categorical): # ordering of checks matters; by this point we know # that not is_categorical_dtype(self) res_values = op(self.values, other) return self._constructor(res_values, index=self.index, name=res_name) elif is_scalar(other) and isna(other): # numpy does not like comparisons vs None if op is operator.ne: res_values = np.ones(len(self), dtype=bool) else: res_values = np.zeros(len(self), dtype=bool) return self._constructor(res_values, index=self.index, name=res_name, dtype='bool') else: values = self.get_values() if isinstance(other, list): other = np.asarray(other) with np.errstate(all='ignore'): res = na_op(values, other) if is_scalar(res): raise TypeError('Could not compare {typ} type with Series' .format(typ=type(other))) # always return a full value series here res_values = com._values_from_object(res) return self._constructor(res_values, index=self.index, name=res_name, dtype='bool') return wrapper def _bool_method_SERIES(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ def na_op(x, y): try: result = op(x, y) except TypeError: if isinstance(y, list): y = construct_1d_object_array_from_listlike(y) if isinstance(y, (np.ndarray, ABCSeries)): if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): result = op(x, y) # when would this be hit? else: x = _ensure_object(x) y = _ensure_object(y) result = libops.vec_binop(x, y, op) else: # let null fall thru if not isna(y): y = bool(y) try: result = libops.scalar_binop(x, y, op) except: raise TypeError("cannot compare a dtyped [{dtype}] array " "with a scalar of type [{typ}]" .format(dtype=x.dtype, typ=type(y).__name__)) return result fill_int = lambda x: x.fillna(0) fill_bool = lambda x: x.fillna(False).astype(bool) def wrapper(self, other): is_self_int_dtype = is_integer_dtype(self.dtype) self, other = _align_method_SERIES(self, other, align_asobject=True) if isinstance(other, ABCDataFrame): # Defer to DataFrame implementation; fail early return NotImplemented elif isinstance(other, ABCSeries): name = get_op_result_name(self, other) is_other_int_dtype = is_integer_dtype(other.dtype) other = fill_int(other) if is_other_int_dtype else fill_bool(other) filler = (fill_int if is_self_int_dtype and is_other_int_dtype else fill_bool) res_values = na_op(self.values, other.values) unfilled = self._constructor(res_values, index=self.index, name=name) return filler(unfilled) else: # scalars, list, tuple, np.array filler = (fill_int if is_self_int_dtype and is_integer_dtype(np.asarray(other)) else fill_bool) res_values = na_op(self.values, other) unfilled = self._constructor(res_values, index=self.index) return filler(unfilled).__finalize__(self) return wrapper def _flex_method_SERIES(cls, op, special): name = _get_op_name(op, special) doc = _make_flex_doc(name, 'series') @Appender(doc) def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # validate axis if axis is not None: self._get_axis_number(axis) if isinstance(other, ABCSeries): return self._binop(other, op, level=level, fill_value=fill_value) elif isinstance(other, (np.ndarray, list, tuple)): if len(other) != len(self): raise ValueError('Lengths must be equal') other = self._constructor(other, self.index) return self._binop(other, op, level=level, fill_value=fill_value) else: if fill_value is not None: self = self.fillna(fill_value) return self._constructor(op(self, other), self.index).__finalize__(self) flex_wrapper.__name__ = name return flex_wrapper # ----------------------------------------------------------------------------- # DataFrame def _combine_series_frame(self, other, func, fill_value=None, axis=None, level=None, try_cast=True): """ Apply binary operator `func` to self, other using alignment and fill conventions determined by the fill_value, axis, level, and try_cast kwargs. Parameters ---------- self : DataFrame other : Series func : binary operator fill_value : object, default None axis : {0, 1, 'columns', 'index', None}, default None level : int or None, default None try_cast : bool, default True Returns ------- result : DataFrame """ if fill_value is not None: raise NotImplementedError("fill_value {fill} not supported." .format(fill=fill_value)) if axis is not None: axis = self._get_axis_number(axis) if axis == 0: return self._combine_match_index(other, func, level=level) else: return self._combine_match_columns(other, func, level=level, try_cast=try_cast) else: if not len(other): return self * np.nan if not len(self): # Ambiguous case, use _series so works with DataFrame return self._constructor(data=self._series, index=self.index, columns=self.columns) # default axis is columns return self._combine_match_columns(other, func, level=level, try_cast=try_cast) def _align_method_FRAME(left, right, axis): """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """ def to_series(right): msg = ('Unable to coerce to Series, length must be {req_len}: ' 'given {given_len}') if axis is not None and left._get_axis_name(axis) == 'index': if len(left.index) != len(right): raise ValueError(msg.format(req_len=len(left.index), given_len=len(right))) right = left._constructor_sliced(right, index=left.index) else: if len(left.columns) != len(right): raise ValueError(msg.format(req_len=len(left.columns), given_len=len(right))) right = left._constructor_sliced(right, index=left.columns) return right if isinstance(right, np.ndarray): if right.ndim == 1: right = to_series(right) elif right.ndim == 2: if left.shape != right.shape: raise ValueError("Unable to coerce to DataFrame, shape " "must be {req_shape}: given {given_shape}" .format(req_shape=left.shape, given_shape=right.shape)) right = left._constructor(right, index=left.index, columns=left.columns) elif right.ndim > 2: raise ValueError('Unable to coerce to Series/DataFrame, dim ' 'must be <= 2: {dim}'.format(dim=right.shape)) elif (is_list_like(right) and not isinstance(right, (ABCSeries, ABCDataFrame))): # GH17901 right = to_series(right) return right def _arith_method_FRAME(cls, op, special): str_rep = _get_opstr(op, cls) op_name = _get_op_name(op, special) eval_kwargs = _gen_eval_kwargs(op_name) fill_zeros = _gen_fill_zeros(op_name) default_axis = _get_frame_op_default_axis(op_name) def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs) except TypeError: xrav = x.ravel() if isinstance(y, (np.ndarray, ABCSeries)): dtype = find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) yrav = y.ravel() mask = notna(xrav) & notna(yrav) xrav = xrav[mask] if yrav.shape != mask.shape: # FIXME: GH#5284, GH#5035, GH#19448 # Without specifically raising here we get mismatched # errors in Py3 (TypeError) vs Py2 (ValueError) raise ValueError('Cannot broadcast operands together.') yrav = yrav[mask] if xrav.size: with np.errstate(all='ignore'): result[mask] = op(xrav, yrav) elif isinstance(x, np.ndarray): # mask is only meaningful for x result = np.empty(x.size, dtype=x.dtype) mask = notna(xrav) xrav = xrav[mask] if xrav.size: with np.errstate(all='ignore'): result[mask] = op(xrav, y) else: raise TypeError("cannot perform operation {op} between " "objects of type {x} and {y}" .format(op=op_name, x=type(x), y=type(y))) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) result = missing.fill_zeros(result, x, y, op_name, fill_zeros) return result if op_name in _op_descriptions: # i.e. include "add" but not "__add__" doc = _make_flex_doc(op_name, 'dataframe') else: doc = _arith_doc_FRAME % op_name @Appender(doc) def f(self, other, axis=default_axis, level=None, fill_value=None): other = _align_method_FRAME(self, other, axis) if isinstance(other, ABCDataFrame): # Another DataFrame return self._combine_frame(other, na_op, fill_value, level) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, fill_value=fill_value, axis=axis, level=level, try_cast=True) else: if fill_value is not None: self = self.fillna(fill_value) return self._combine_const(other, na_op, try_cast=True) f.__name__ = op_name return f def _flex_comp_method_FRAME(cls, op, special): str_rep = _get_opstr(op, cls) op_name = _get_op_name(op, special) default_axis = _get_frame_op_default_axis(op_name) def na_op(x, y): try: with np.errstate(invalid='ignore'): result = op(x, y) except TypeError: result = mask_cmp_op(x, y, op, (np.ndarray, ABCSeries)) return result @Appender('Wrapper for flexible comparison methods {name}' .format(name=op_name)) def f(self, other, axis=default_axis, level=None): other = _align_method_FRAME(self, other, axis) if isinstance(other, ABCDataFrame): # Another DataFrame if not self._indexed_same(other): self, other = self.align(other, 'outer', level=level, copy=False) return self._compare_frame(other, na_op, str_rep) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, na_op, fill_value=None, axis=axis, level=level, try_cast=False) else: return self._combine_const(other, na_op, try_cast=False) f.__name__ = op_name return f def _comp_method_FRAME(cls, func, special): str_rep = _get_opstr(func, cls) op_name = _get_op_name(func, special) @Appender('Wrapper for comparison method {name}'.format(name=op_name)) def f(self, other): if isinstance(other, ABCDataFrame): # Another DataFrame if not self._indexed_same(other): raise ValueError('Can only compare identically-labeled ' 'DataFrame objects') return self._compare_frame(other, func, str_rep) elif isinstance(other, ABCSeries): return _combine_series_frame(self, other, func, fill_value=None, axis=None, level=None, try_cast=False) else: # straight boolean comparisons we want to allow all columns # (regardless of dtype to pass thru) See #4537 for discussion. res = self._combine_const(other, func, errors='ignore', try_cast=False) return res.fillna(True).astype(bool) f.__name__ = op_name return f # ----------------------------------------------------------------------------- # Panel def _arith_method_PANEL(cls, op, special): # work only for scalars op_name = _get_op_name(op, special) def f(self, other): if not is_scalar(other): raise ValueError('Simple arithmetic with {name} can only be ' 'done with scalar values' .format(name=self._constructor.__name__)) return self._combine(other, op) f.__name__ = op_name return f def _comp_method_PANEL(cls, op, special): str_rep = _get_opstr(op, cls) op_name = _get_op_name(op, special) def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y) except TypeError: result = mask_cmp_op(x, y, op, np.ndarray) return result @Appender('Wrapper for comparison method {name}'.format(name=op_name)) def f(self, other, axis=None): # Validate the axis parameter if axis is not None: axis = self._get_axis_number(axis) if isinstance(other, self._constructor): return self._compare_constructor(other, na_op, try_cast=False) elif isinstance(other, (self._constructor_sliced, ABCDataFrame, ABCSeries)): raise Exception("input needs alignment for this object [{object}]" .format(object=self._constructor)) else: return self._combine_const(other, na_op, try_cast=False) f.__name__ = op_name return f def _flex_method_PANEL(cls, op, special): str_rep = _get_opstr(op, cls) op_name = _get_op_name(op, special) eval_kwargs = _gen_eval_kwargs(op_name) fill_zeros = _gen_fill_zeros(op_name) def na_op(x, y): import pandas.core.computation.expressions as expressions try: result = expressions.evaluate(op, str_rep, x, y, errors='raise', **eval_kwargs) except TypeError: result = op(x, y) # handles discrepancy between numpy and numexpr on division/mod # by 0 though, given that these are generally (always?) # non-scalars, I'm not sure whether it's worth it at the moment result = missing.fill_zeros(result, x, y, op_name, fill_zeros) return result if op_name in _op_descriptions: doc = _make_flex_doc(op_name, 'panel') else: # doc strings substitors doc = _agg_doc_PANEL.format(op_name=op_name) @Appender(doc) def f(self, other, axis=0): return self._combine(other, na_op, axis=axis) f.__name__ = op_name return f # ----------------------------------------------------------------------------- # Sparse def _cast_sparse_series_op(left, right, opname): """ For SparseSeries operation, coerce to float64 if the result is expected to have NaN or inf values Parameters ---------- left : SparseArray right : SparseArray opname : str Returns ------- left : SparseArray right : SparseArray """ opname = opname.strip('_') if is_integer_dtype(left) and is_integer_dtype(right): # series coerces to float64 if result should have NaN/inf if opname in ('floordiv', 'mod') and (right.values == 0).any(): left = left.astype(np.float64) right = right.astype(np.float64) elif opname in ('rfloordiv', 'rmod') and (left.values == 0).any(): left = left.astype(np.float64) right = right.astype(np.float64) return left, right def _arith_method_SPARSE_SERIES(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ op_name = _get_op_name(op, special) def wrapper(self, other): if isinstance(other, ABCDataFrame): return NotImplemented elif isinstance(other, ABCSeries): if not isinstance(other, ABCSparseSeries): other = other.to_sparse(fill_value=self.fill_value) return _sparse_series_op(self, other, op, op_name) elif is_scalar(other): with np.errstate(all='ignore'): new_values = op(self.values, other) return self._constructor(new_values, index=self.index, name=self.name) else: # pragma: no cover raise TypeError('operation with {other} not supported' .format(other=type(other))) wrapper.__name__ = op_name return wrapper def _sparse_series_op(left, right, op, name): left, right = left.align(right, join='outer', copy=False) new_index = left.index new_name = get_op_result_name(left, right) from pandas.core.sparse.array import _sparse_array_op lvalues, rvalues = _cast_sparse_series_op(left.values, right.values, name) result = _sparse_array_op(lvalues, rvalues, op, name) return left._constructor(result, index=new_index, name=new_name) def _arith_method_SPARSE_ARRAY(cls, op, special): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ op_name = _get_op_name(op, special) def wrapper(self, other): from pandas.core.sparse.array import ( SparseArray, _sparse_array_op, _wrap_result, _get_fill) if isinstance(other, np.ndarray): if len(self) != len(other): raise AssertionError("length mismatch: {self} vs. {other}" .format(self=len(self), other=len(other))) if not isinstance(other, SparseArray): dtype = getattr(other, 'dtype', None) other = SparseArray(other, fill_value=self.fill_value, dtype=dtype) return _sparse_array_op(self, other, op, op_name) elif is_scalar(other): with np.errstate(all='ignore'): fill = op(_get_fill(self), np.asarray(other)) result = op(self.sp_values, other) return _wrap_result(op_name, result, self.sp_index, fill) else: # pragma: no cover raise TypeError('operation with {other} not supported' .format(other=type(other))) wrapper.__name__ = op_name return wrapper