laywerrobot/lib/python3.6/site-packages/pandas/core/apply.py

import warnings
import numpy as np
from pandas import compat
from pandas._libs import reduction
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.common import (
    is_extension_type,
    is_sequence)
from pandas.util._decorators import cache_readonly

from pandas.io.formats.printing import pprint_thing


def frame_apply(obj, func, axis=0, broadcast=None,
                raw=False, reduce=None, result_type=None,
                ignore_failures=False,
                args=None, kwds=None):
    """ construct and return a row or column based frame apply object """

    axis = obj._get_axis_number(axis)
    if axis == 0:
        klass = FrameRowApply
    elif axis == 1:
        klass = FrameColumnApply

    return klass(obj, func, broadcast=broadcast,
                 raw=raw, reduce=reduce, result_type=result_type,
                 ignore_failures=ignore_failures,
                 args=args, kwds=kwds)


class FrameApply(object):

    def __init__(self, obj, func, broadcast, raw, reduce, result_type,
                 ignore_failures, args, kwds):
        self.obj = obj
        self.raw = raw
        self.ignore_failures = ignore_failures
        self.args = args or ()
        self.kwds = kwds or {}

        if result_type not in [None, 'reduce', 'broadcast', 'expand']:
            raise ValueError("invalid value for result_type, must be one "
                             "of {None, 'reduce', 'broadcast', 'expand'}")

        if broadcast is not None:
            warnings.warn("The broadcast argument is deprecated and will "
                          "be removed in a future version. You can specify "
                          "result_type='broadcast' to broadcast the result "
                          "to the original dimensions",
                          FutureWarning, stacklevel=4)
            if broadcast:
                result_type = 'broadcast'

        if reduce is not None:
            warnings.warn("The reduce argument is deprecated and will "
                          "be removed in a future version. You can specify "
                          "result_type='reduce' to try to reduce the result "
                          "to the original dimensions",
                          FutureWarning, stacklevel=4)
            if reduce:

                if result_type is not None:
                    raise ValueError(
                        "cannot pass both reduce=True and result_type")

                result_type = 'reduce'

        self.result_type = result_type

        # curry if needed
        if kwds or args and not isinstance(func, np.ufunc):
            def f(x):
                return func(x, *args, **kwds)
        else:
            f = func

        self.f = f

        # results
        self.result = None
        self.res_index = None
        self.res_columns = None

    @property
    def columns(self):
        return self.obj.columns

    @property
    def index(self):
        return self.obj.index

    @cache_readonly
    def values(self):
        return self.obj.values

    @cache_readonly
    def dtypes(self):
        return self.obj.dtypes

    @property
    def agg_axis(self):
        return self.obj._get_agg_axis(self.axis)

    def get_result(self):
        """ compute the results """

        # all empty
        if len(self.columns) == 0 and len(self.index) == 0:
            return self.apply_empty_result()

        # string dispatch
        if isinstance(self.f, compat.string_types):
            # Support for `frame.transform('method')`
            # Some methods (shift, etc.) require the axis argument, others
            # don't, so inspect and insert if nescessary.
            func = getattr(self.obj, self.f)
            sig = compat.signature(func)
            if 'axis' in sig.args:
                self.kwds['axis'] = self.axis
            return func(*self.args, **self.kwds)

        # ufunc
        elif isinstance(self.f, np.ufunc):
            with np.errstate(all='ignore'):
                results = self.f(self.values)
            return self.obj._constructor(data=results, index=self.index,
                                         columns=self.columns, copy=False)

        # broadcasting
        if self.result_type == 'broadcast':
            return self.apply_broadcast()

        # one axis empty
        elif not all(self.obj.shape):
            return self.apply_empty_result()

        # raw
        elif self.raw and not self.obj._is_mixed_type:
            return self.apply_raw()

        return self.apply_standard()

    def apply_empty_result(self):
        """
        we have an empty result; at least 1 axis is 0

        we will try to apply the function to an empty
        series in order to see if this is a reduction function
        """

        # we are not asked to reduce or infer reduction
        # so just return a copy of the existing object
        if self.result_type not in ['reduce', None]:
            return self.obj.copy()

        # we may need to infer
        reduce = self.result_type == 'reduce'

        from pandas import Series
        if not reduce:

            EMPTY_SERIES = Series([])
            try:
                r = self.f(EMPTY_SERIES, *self.args, **self.kwds)
                reduce = not isinstance(r, Series)
            except Exception:
                pass

        if reduce:
            return self.obj._constructor_sliced(np.nan, index=self.agg_axis)
        else:
            return self.obj.copy()

    def apply_raw(self):
        """ apply to the values as a numpy array """

        try:
            result = reduction.reduce(self.values, self.f, axis=self.axis)
        except Exception:
            result = np.apply_along_axis(self.f, self.axis, self.values)

        # TODO: mixed type case
        if result.ndim == 2:
            return self.obj._constructor(result,
                                         index=self.index,
                                         columns=self.columns)
        else:
            return self.obj._constructor_sliced(result,
                                                index=self.agg_axis)

    def apply_broadcast(self, target):
        result_values = np.empty_like(target.values)

        # axis which we want to compare compliance
        result_compare = target.shape[0]

        for i, col in enumerate(target.columns):
            res = self.f(target[col])
            ares = np.asarray(res).ndim

            # must be a scalar or 1d
            if ares > 1:
                raise ValueError("too many dims to broadcast")
            elif ares == 1:

                # must match return dim
                if result_compare != len(res):
                    raise ValueError("cannot broadcast result")

            result_values[:, i] = res

        # we *always* preserve the original index / columns
        result = self.obj._constructor(result_values,
                                       index=target.index,
                                       columns=target.columns)
        return result

    def apply_standard(self):

        # try to reduce first (by default)
        # this only matters if the reduction in values is of different dtype
        # e.g. if we want to apply to a SparseFrame, then can't directly reduce

        # we cannot reduce using non-numpy dtypes,
        # as demonstrated in gh-12244
        if (self.result_type in ['reduce', None] and
                not self.dtypes.apply(is_extension_type).any()):

            # Create a dummy Series from an empty array
            from pandas import Series
            values = self.values
            index = self.obj._get_axis(self.axis)
            labels = self.agg_axis
            empty_arr = np.empty(len(index), dtype=values.dtype)
            dummy = Series(empty_arr, index=index, dtype=values.dtype)

            try:
                result = reduction.reduce(values, self.f,
                                          axis=self.axis,
                                          dummy=dummy,
                                          labels=labels)
                return self.obj._constructor_sliced(result, index=labels)
            except Exception:
                pass

        # compute the result using the series generator
        self.apply_series_generator()

        # wrap results
        return self.wrap_results()

    def apply_series_generator(self):
        series_gen = self.series_generator
        res_index = self.result_index

        i = None
        keys = []
        results = {}
        if self.ignore_failures:
            successes = []
            for i, v in enumerate(series_gen):
                try:
                    results[i] = self.f(v)
                    keys.append(v.name)
                    successes.append(i)
                except Exception:
                    pass

            # so will work with MultiIndex
            if len(successes) < len(res_index):
                res_index = res_index.take(successes)

        else:
            try:
                for i, v in enumerate(series_gen):
                    results[i] = self.f(v)
                    keys.append(v.name)
            except Exception as e:
                if hasattr(e, 'args'):

                    # make sure i is defined
                    if i is not None:
                        k = res_index[i]
                        e.args = e.args + ('occurred at index %s' %
                                           pprint_thing(k), )
                raise

        self.results = results
        self.res_index = res_index
        self.res_columns = self.result_columns

    def wrap_results(self):
        results = self.results

        # see if we can infer the results
        if len(results) > 0 and is_sequence(results[0]):

            return self.wrap_results_for_axis()

        # dict of scalars
        result = self.obj._constructor_sliced(results)
        result.index = self.res_index

        return result


class FrameRowApply(FrameApply):
    axis = 0

    def get_result(self):

        # dispatch to agg
        if isinstance(self.f, (list, dict)):
            return self.obj.aggregate(self.f, axis=self.axis,
                                      *self.args, **self.kwds)

        return super(FrameRowApply, self).get_result()

    def apply_broadcast(self):
        return super(FrameRowApply, self).apply_broadcast(self.obj)

    @property
    def series_generator(self):
        return (self.obj._ixs(i, axis=1)
                for i in range(len(self.columns)))

    @property
    def result_index(self):
        return self.columns

    @property
    def result_columns(self):
        return self.index

    def wrap_results_for_axis(self):
        """ return the results for the rows """

        results = self.results
        result = self.obj._constructor(data=results)

        if not isinstance(results[0], ABCSeries):
            try:
                result.index = self.res_columns
            except ValueError:
                pass

        try:
            result.columns = self.res_index
        except ValueError:
            pass

        return result


class FrameColumnApply(FrameApply):
    axis = 1

    def apply_broadcast(self):
        result = super(FrameColumnApply, self).apply_broadcast(self.obj.T)
        return result.T

    @property
    def series_generator(self):
        constructor = self.obj._constructor_sliced
        return (constructor(arr, index=self.columns, name=name)
                for i, (arr, name) in enumerate(zip(self.values,
                                                    self.index)))

    @property
    def result_index(self):
        return self.index

    @property
    def result_columns(self):
        return self.columns

    def wrap_results_for_axis(self):
        """ return the results for the columns """
        results = self.results

        # we have requested to expand
        if self.result_type == 'expand':
            result = self.infer_to_same_shape()

        # we have a non-series and don't want inference
        elif not isinstance(results[0], ABCSeries):
            from pandas import Series
            result = Series(results)
            result.index = self.res_index

        # we may want to infer results
        else:
            result = self.infer_to_same_shape()

        return result

    def infer_to_same_shape(self):
        """ infer the results to the same shape as the input object """
        results = self.results

        result = self.obj._constructor(data=results)
        result = result.T

        # set the index
        result.index = self.res_index

        # infer dtypes
        result = result.infer_objects()

        return result
first commit 2020-08-27 21:55:39 +02:00			`import warnings`
			`import numpy as np`
			`from pandas import compat`
			`from pandas._libs import reduction`
			`from pandas.core.dtypes.generic import ABCSeries`
			`from pandas.core.dtypes.common import (`
			`is_extension_type,`
			`is_sequence)`
			`from pandas.util._decorators import cache_readonly`

			`from pandas.io.formats.printing import pprint_thing`


			`def frame_apply(obj, func, axis=0, broadcast=None,`
			`raw=False, reduce=None, result_type=None,`
			`ignore_failures=False,`
			`args=None, kwds=None):`
			`""" construct and return a row or column based frame apply object """`

			`axis = obj._get_axis_number(axis)`
			`if axis == 0:`
			`klass = FrameRowApply`
			`elif axis == 1:`
			`klass = FrameColumnApply`

			`return klass(obj, func, broadcast=broadcast,`
			`raw=raw, reduce=reduce, result_type=result_type,`
			`ignore_failures=ignore_failures,`
			`args=args, kwds=kwds)`


			`class FrameApply(object):`

			`def __init__(self, obj, func, broadcast, raw, reduce, result_type,`
			`ignore_failures, args, kwds):`
			`self.obj = obj`
			`self.raw = raw`
			`self.ignore_failures = ignore_failures`
			`self.args = args or ()`
			`self.kwds = kwds or {}`

			`if result_type not in [None, 'reduce', 'broadcast', 'expand']:`
			`raise ValueError("invalid value for result_type, must be one "`
			`"of {None, 'reduce', 'broadcast', 'expand'}")`

			`if broadcast is not None:`
			`warnings.warn("The broadcast argument is deprecated and will "`
			`"be removed in a future version. You can specify "`
			`"result_type='broadcast' to broadcast the result "`
			`"to the original dimensions",`
			`FutureWarning, stacklevel=4)`
			`if broadcast:`
			`result_type = 'broadcast'`

			`if reduce is not None:`
			`warnings.warn("The reduce argument is deprecated and will "`
			`"be removed in a future version. You can specify "`
			`"result_type='reduce' to try to reduce the result "`
			`"to the original dimensions",`
			`FutureWarning, stacklevel=4)`
			`if reduce:`

			`if result_type is not None:`
			`raise ValueError(`
			`"cannot pass both reduce=True and result_type")`

			`result_type = 'reduce'`

			`self.result_type = result_type`

			`# curry if needed`
			`if kwds or args and not isinstance(func, np.ufunc):`
			`def f(x):`
			`return func(x, args, *kwds)`
			`else:`
			`f = func`

			`self.f = f`

			`# results`
			`self.result = None`
			`self.res_index = None`
			`self.res_columns = None`

			`@property`
			`def columns(self):`
			`return self.obj.columns`

			`@property`
			`def index(self):`
			`return self.obj.index`

			`@cache_readonly`
			`def values(self):`
			`return self.obj.values`

			`@cache_readonly`
			`def dtypes(self):`
			`return self.obj.dtypes`

			`@property`
			`def agg_axis(self):`
			`return self.obj._get_agg_axis(self.axis)`

			`def get_result(self):`
			`""" compute the results """`

			`# all empty`
			`if len(self.columns) == 0 and len(self.index) == 0:`
			`return self.apply_empty_result()`

			`# string dispatch`
			`if isinstance(self.f, compat.string_types):`
			# Support for `frame.transform('method')`
			`# Some methods (shift, etc.) require the axis argument, others`
			`# don't, so inspect and insert if nescessary.`
			`func = getattr(self.obj, self.f)`
			`sig = compat.signature(func)`
			`if 'axis' in sig.args:`
			`self.kwds['axis'] = self.axis`
			`return func(self.args, *self.kwds)`

			`# ufunc`
			`elif isinstance(self.f, np.ufunc):`
			`with np.errstate(all='ignore'):`
			`results = self.f(self.values)`
			`return self.obj._constructor(data=results, index=self.index,`
			`columns=self.columns, copy=False)`

			`# broadcasting`
			`if self.result_type == 'broadcast':`
			`return self.apply_broadcast()`

			`# one axis empty`
			`elif not all(self.obj.shape):`
			`return self.apply_empty_result()`

			`# raw`
			`elif self.raw and not self.obj._is_mixed_type:`
			`return self.apply_raw()`

			`return self.apply_standard()`

			`def apply_empty_result(self):`
			`"""`
			`we have an empty result; at least 1 axis is 0`

			`we will try to apply the function to an empty`
			`series in order to see if this is a reduction function`
			`"""`

			`# we are not asked to reduce or infer reduction`
			`# so just return a copy of the existing object`
			`if self.result_type not in ['reduce', None]:`
			`return self.obj.copy()`

			`# we may need to infer`
			`reduce = self.result_type == 'reduce'`

			`from pandas import Series`
			`if not reduce:`

			`EMPTY_SERIES = Series([])`
			`try:`
			`r = self.f(EMPTY_SERIES, self.args, *self.kwds)`
			`reduce = not isinstance(r, Series)`
			`except Exception:`
			`pass`

			`if reduce:`
			`return self.obj._constructor_sliced(np.nan, index=self.agg_axis)`
			`else:`
			`return self.obj.copy()`

			`def apply_raw(self):`
			`""" apply to the values as a numpy array """`

			`try:`
			`result = reduction.reduce(self.values, self.f, axis=self.axis)`
			`except Exception:`
			`result = np.apply_along_axis(self.f, self.axis, self.values)`

			`# TODO: mixed type case`
			`if result.ndim == 2:`
			`return self.obj._constructor(result,`
			`index=self.index,`
			`columns=self.columns)`
			`else:`
			`return self.obj._constructor_sliced(result,`
			`index=self.agg_axis)`

			`def apply_broadcast(self, target):`
			`result_values = np.empty_like(target.values)`

			`# axis which we want to compare compliance`
			`result_compare = target.shape[0]`

			`for i, col in enumerate(target.columns):`
			`res = self.f(target[col])`
			`ares = np.asarray(res).ndim`

			`# must be a scalar or 1d`
			`if ares > 1:`
			`raise ValueError("too many dims to broadcast")`
			`elif ares == 1:`

			`# must match return dim`
			`if result_compare != len(res):`
			`raise ValueError("cannot broadcast result")`

			`result_values[:, i] = res`

			`# we always preserve the original index / columns`
			`result = self.obj._constructor(result_values,`
			`index=target.index,`
			`columns=target.columns)`
			`return result`

			`def apply_standard(self):`

			`# try to reduce first (by default)`
			`# this only matters if the reduction in values is of different dtype`
			`# e.g. if we want to apply to a SparseFrame, then can't directly reduce`

			`# we cannot reduce using non-numpy dtypes,`
			`# as demonstrated in gh-12244`
			`if (self.result_type in ['reduce', None] and`
			`not self.dtypes.apply(is_extension_type).any()):`

			`# Create a dummy Series from an empty array`
			`from pandas import Series`
			`values = self.values`
			`index = self.obj._get_axis(self.axis)`
			`labels = self.agg_axis`
			`empty_arr = np.empty(len(index), dtype=values.dtype)`
			`dummy = Series(empty_arr, index=index, dtype=values.dtype)`

			`try:`
			`result = reduction.reduce(values, self.f,`
			`axis=self.axis,`
			`dummy=dummy,`
			`labels=labels)`
			`return self.obj._constructor_sliced(result, index=labels)`
			`except Exception:`
			`pass`

			`# compute the result using the series generator`
			`self.apply_series_generator()`

			`# wrap results`
			`return self.wrap_results()`

			`def apply_series_generator(self):`
			`series_gen = self.series_generator`
			`res_index = self.result_index`

			`i = None`
			`keys = []`
			`results = {}`
			`if self.ignore_failures:`
			`successes = []`
			`for i, v in enumerate(series_gen):`
			`try:`
			`results[i] = self.f(v)`
			`keys.append(v.name)`
			`successes.append(i)`
			`except Exception:`
			`pass`

			`# so will work with MultiIndex`
			`if len(successes) < len(res_index):`
			`res_index = res_index.take(successes)`

			`else:`
			`try:`
			`for i, v in enumerate(series_gen):`
			`results[i] = self.f(v)`
			`keys.append(v.name)`
			`except Exception as e:`
			`if hasattr(e, 'args'):`

			`# make sure i is defined`
			`if i is not None:`
			`k = res_index[i]`
			`e.args = e.args + ('occurred at index %s' %`
			`pprint_thing(k), )`
			`raise`

			`self.results = results`
			`self.res_index = res_index`
			`self.res_columns = self.result_columns`

			`def wrap_results(self):`
			`results = self.results`

			`# see if we can infer the results`
			`if len(results) > 0 and is_sequence(results[0]):`

			`return self.wrap_results_for_axis()`

			`# dict of scalars`
			`result = self.obj._constructor_sliced(results)`
			`result.index = self.res_index`

			`return result`


			`class FrameRowApply(FrameApply):`
			`axis = 0`

			`def get_result(self):`

			`# dispatch to agg`
			`if isinstance(self.f, (list, dict)):`
			`return self.obj.aggregate(self.f, axis=self.axis,`
			`self.args, *self.kwds)`

			`return super(FrameRowApply, self).get_result()`

			`def apply_broadcast(self):`
			`return super(FrameRowApply, self).apply_broadcast(self.obj)`

			`@property`
			`def series_generator(self):`
			`return (self.obj._ixs(i, axis=1)`
			`for i in range(len(self.columns)))`

			`@property`
			`def result_index(self):`
			`return self.columns`

			`@property`
			`def result_columns(self):`
			`return self.index`

			`def wrap_results_for_axis(self):`
			`""" return the results for the rows """`

			`results = self.results`
			`result = self.obj._constructor(data=results)`

			`if not isinstance(results[0], ABCSeries):`
			`try:`
			`result.index = self.res_columns`
			`except ValueError:`
			`pass`

			`try:`
			`result.columns = self.res_index`
			`except ValueError:`
			`pass`

			`return result`


			`class FrameColumnApply(FrameApply):`
			`axis = 1`

			`def apply_broadcast(self):`
			`result = super(FrameColumnApply, self).apply_broadcast(self.obj.T)`
			`return result.T`

			`@property`
			`def series_generator(self):`
			`constructor = self.obj._constructor_sliced`
			`return (constructor(arr, index=self.columns, name=name)`
			`for i, (arr, name) in enumerate(zip(self.values,`
			`self.index)))`

			`@property`
			`def result_index(self):`
			`return self.index`

			`@property`
			`def result_columns(self):`
			`return self.columns`

			`def wrap_results_for_axis(self):`
			`""" return the results for the columns """`
			`results = self.results`

			`# we have requested to expand`
			`if self.result_type == 'expand':`
			`result = self.infer_to_same_shape()`

			`# we have a non-series and don't want inference`
			`elif not isinstance(results[0], ABCSeries):`
			`from pandas import Series`
			`result = Series(results)`
			`result.index = self.res_index`

			`# we may want to infer results`
			`else:`
			`result = self.infer_to_same_shape()`

			`return result`

			`def infer_to_same_shape(self):`
			`""" infer the results to the same shape as the input object """`
			`results = self.results`

			`result = self.obj._constructor(data=results)`
			`result = result.T`

			`# set the index`
			`result.index = self.res_index`

			`# infer dtypes`
			`result = result.infer_objects()`

			`return result`