411 lines
12 KiB
Python
411 lines
12 KiB
Python
import warnings
|
|
import numpy as np
|
|
from pandas import compat
|
|
from pandas._libs import reduction
|
|
from pandas.core.dtypes.generic import ABCSeries
|
|
from pandas.core.dtypes.common import (
|
|
is_extension_type,
|
|
is_sequence)
|
|
from pandas.util._decorators import cache_readonly
|
|
|
|
from pandas.io.formats.printing import pprint_thing
|
|
|
|
|
|
def frame_apply(obj, func, axis=0, broadcast=None,
|
|
raw=False, reduce=None, result_type=None,
|
|
ignore_failures=False,
|
|
args=None, kwds=None):
|
|
""" construct and return a row or column based frame apply object """
|
|
|
|
axis = obj._get_axis_number(axis)
|
|
if axis == 0:
|
|
klass = FrameRowApply
|
|
elif axis == 1:
|
|
klass = FrameColumnApply
|
|
|
|
return klass(obj, func, broadcast=broadcast,
|
|
raw=raw, reduce=reduce, result_type=result_type,
|
|
ignore_failures=ignore_failures,
|
|
args=args, kwds=kwds)
|
|
|
|
|
|
class FrameApply(object):
|
|
|
|
def __init__(self, obj, func, broadcast, raw, reduce, result_type,
|
|
ignore_failures, args, kwds):
|
|
self.obj = obj
|
|
self.raw = raw
|
|
self.ignore_failures = ignore_failures
|
|
self.args = args or ()
|
|
self.kwds = kwds or {}
|
|
|
|
if result_type not in [None, 'reduce', 'broadcast', 'expand']:
|
|
raise ValueError("invalid value for result_type, must be one "
|
|
"of {None, 'reduce', 'broadcast', 'expand'}")
|
|
|
|
if broadcast is not None:
|
|
warnings.warn("The broadcast argument is deprecated and will "
|
|
"be removed in a future version. You can specify "
|
|
"result_type='broadcast' to broadcast the result "
|
|
"to the original dimensions",
|
|
FutureWarning, stacklevel=4)
|
|
if broadcast:
|
|
result_type = 'broadcast'
|
|
|
|
if reduce is not None:
|
|
warnings.warn("The reduce argument is deprecated and will "
|
|
"be removed in a future version. You can specify "
|
|
"result_type='reduce' to try to reduce the result "
|
|
"to the original dimensions",
|
|
FutureWarning, stacklevel=4)
|
|
if reduce:
|
|
|
|
if result_type is not None:
|
|
raise ValueError(
|
|
"cannot pass both reduce=True and result_type")
|
|
|
|
result_type = 'reduce'
|
|
|
|
self.result_type = result_type
|
|
|
|
# curry if needed
|
|
if kwds or args and not isinstance(func, np.ufunc):
|
|
def f(x):
|
|
return func(x, *args, **kwds)
|
|
else:
|
|
f = func
|
|
|
|
self.f = f
|
|
|
|
# results
|
|
self.result = None
|
|
self.res_index = None
|
|
self.res_columns = None
|
|
|
|
@property
|
|
def columns(self):
|
|
return self.obj.columns
|
|
|
|
@property
|
|
def index(self):
|
|
return self.obj.index
|
|
|
|
@cache_readonly
|
|
def values(self):
|
|
return self.obj.values
|
|
|
|
@cache_readonly
|
|
def dtypes(self):
|
|
return self.obj.dtypes
|
|
|
|
@property
|
|
def agg_axis(self):
|
|
return self.obj._get_agg_axis(self.axis)
|
|
|
|
def get_result(self):
|
|
""" compute the results """
|
|
|
|
# all empty
|
|
if len(self.columns) == 0 and len(self.index) == 0:
|
|
return self.apply_empty_result()
|
|
|
|
# string dispatch
|
|
if isinstance(self.f, compat.string_types):
|
|
# Support for `frame.transform('method')`
|
|
# Some methods (shift, etc.) require the axis argument, others
|
|
# don't, so inspect and insert if nescessary.
|
|
func = getattr(self.obj, self.f)
|
|
sig = compat.signature(func)
|
|
if 'axis' in sig.args:
|
|
self.kwds['axis'] = self.axis
|
|
return func(*self.args, **self.kwds)
|
|
|
|
# ufunc
|
|
elif isinstance(self.f, np.ufunc):
|
|
with np.errstate(all='ignore'):
|
|
results = self.f(self.values)
|
|
return self.obj._constructor(data=results, index=self.index,
|
|
columns=self.columns, copy=False)
|
|
|
|
# broadcasting
|
|
if self.result_type == 'broadcast':
|
|
return self.apply_broadcast()
|
|
|
|
# one axis empty
|
|
elif not all(self.obj.shape):
|
|
return self.apply_empty_result()
|
|
|
|
# raw
|
|
elif self.raw and not self.obj._is_mixed_type:
|
|
return self.apply_raw()
|
|
|
|
return self.apply_standard()
|
|
|
|
def apply_empty_result(self):
|
|
"""
|
|
we have an empty result; at least 1 axis is 0
|
|
|
|
we will try to apply the function to an empty
|
|
series in order to see if this is a reduction function
|
|
"""
|
|
|
|
# we are not asked to reduce or infer reduction
|
|
# so just return a copy of the existing object
|
|
if self.result_type not in ['reduce', None]:
|
|
return self.obj.copy()
|
|
|
|
# we may need to infer
|
|
reduce = self.result_type == 'reduce'
|
|
|
|
from pandas import Series
|
|
if not reduce:
|
|
|
|
EMPTY_SERIES = Series([])
|
|
try:
|
|
r = self.f(EMPTY_SERIES, *self.args, **self.kwds)
|
|
reduce = not isinstance(r, Series)
|
|
except Exception:
|
|
pass
|
|
|
|
if reduce:
|
|
return self.obj._constructor_sliced(np.nan, index=self.agg_axis)
|
|
else:
|
|
return self.obj.copy()
|
|
|
|
def apply_raw(self):
|
|
""" apply to the values as a numpy array """
|
|
|
|
try:
|
|
result = reduction.reduce(self.values, self.f, axis=self.axis)
|
|
except Exception:
|
|
result = np.apply_along_axis(self.f, self.axis, self.values)
|
|
|
|
# TODO: mixed type case
|
|
if result.ndim == 2:
|
|
return self.obj._constructor(result,
|
|
index=self.index,
|
|
columns=self.columns)
|
|
else:
|
|
return self.obj._constructor_sliced(result,
|
|
index=self.agg_axis)
|
|
|
|
def apply_broadcast(self, target):
|
|
result_values = np.empty_like(target.values)
|
|
|
|
# axis which we want to compare compliance
|
|
result_compare = target.shape[0]
|
|
|
|
for i, col in enumerate(target.columns):
|
|
res = self.f(target[col])
|
|
ares = np.asarray(res).ndim
|
|
|
|
# must be a scalar or 1d
|
|
if ares > 1:
|
|
raise ValueError("too many dims to broadcast")
|
|
elif ares == 1:
|
|
|
|
# must match return dim
|
|
if result_compare != len(res):
|
|
raise ValueError("cannot broadcast result")
|
|
|
|
result_values[:, i] = res
|
|
|
|
# we *always* preserve the original index / columns
|
|
result = self.obj._constructor(result_values,
|
|
index=target.index,
|
|
columns=target.columns)
|
|
return result
|
|
|
|
def apply_standard(self):
|
|
|
|
# try to reduce first (by default)
|
|
# this only matters if the reduction in values is of different dtype
|
|
# e.g. if we want to apply to a SparseFrame, then can't directly reduce
|
|
|
|
# we cannot reduce using non-numpy dtypes,
|
|
# as demonstrated in gh-12244
|
|
if (self.result_type in ['reduce', None] and
|
|
not self.dtypes.apply(is_extension_type).any()):
|
|
|
|
# Create a dummy Series from an empty array
|
|
from pandas import Series
|
|
values = self.values
|
|
index = self.obj._get_axis(self.axis)
|
|
labels = self.agg_axis
|
|
empty_arr = np.empty(len(index), dtype=values.dtype)
|
|
dummy = Series(empty_arr, index=index, dtype=values.dtype)
|
|
|
|
try:
|
|
result = reduction.reduce(values, self.f,
|
|
axis=self.axis,
|
|
dummy=dummy,
|
|
labels=labels)
|
|
return self.obj._constructor_sliced(result, index=labels)
|
|
except Exception:
|
|
pass
|
|
|
|
# compute the result using the series generator
|
|
self.apply_series_generator()
|
|
|
|
# wrap results
|
|
return self.wrap_results()
|
|
|
|
def apply_series_generator(self):
|
|
series_gen = self.series_generator
|
|
res_index = self.result_index
|
|
|
|
i = None
|
|
keys = []
|
|
results = {}
|
|
if self.ignore_failures:
|
|
successes = []
|
|
for i, v in enumerate(series_gen):
|
|
try:
|
|
results[i] = self.f(v)
|
|
keys.append(v.name)
|
|
successes.append(i)
|
|
except Exception:
|
|
pass
|
|
|
|
# so will work with MultiIndex
|
|
if len(successes) < len(res_index):
|
|
res_index = res_index.take(successes)
|
|
|
|
else:
|
|
try:
|
|
for i, v in enumerate(series_gen):
|
|
results[i] = self.f(v)
|
|
keys.append(v.name)
|
|
except Exception as e:
|
|
if hasattr(e, 'args'):
|
|
|
|
# make sure i is defined
|
|
if i is not None:
|
|
k = res_index[i]
|
|
e.args = e.args + ('occurred at index %s' %
|
|
pprint_thing(k), )
|
|
raise
|
|
|
|
self.results = results
|
|
self.res_index = res_index
|
|
self.res_columns = self.result_columns
|
|
|
|
def wrap_results(self):
|
|
results = self.results
|
|
|
|
# see if we can infer the results
|
|
if len(results) > 0 and is_sequence(results[0]):
|
|
|
|
return self.wrap_results_for_axis()
|
|
|
|
# dict of scalars
|
|
result = self.obj._constructor_sliced(results)
|
|
result.index = self.res_index
|
|
|
|
return result
|
|
|
|
|
|
class FrameRowApply(FrameApply):
|
|
axis = 0
|
|
|
|
def get_result(self):
|
|
|
|
# dispatch to agg
|
|
if isinstance(self.f, (list, dict)):
|
|
return self.obj.aggregate(self.f, axis=self.axis,
|
|
*self.args, **self.kwds)
|
|
|
|
return super(FrameRowApply, self).get_result()
|
|
|
|
def apply_broadcast(self):
|
|
return super(FrameRowApply, self).apply_broadcast(self.obj)
|
|
|
|
@property
|
|
def series_generator(self):
|
|
return (self.obj._ixs(i, axis=1)
|
|
for i in range(len(self.columns)))
|
|
|
|
@property
|
|
def result_index(self):
|
|
return self.columns
|
|
|
|
@property
|
|
def result_columns(self):
|
|
return self.index
|
|
|
|
def wrap_results_for_axis(self):
|
|
""" return the results for the rows """
|
|
|
|
results = self.results
|
|
result = self.obj._constructor(data=results)
|
|
|
|
if not isinstance(results[0], ABCSeries):
|
|
try:
|
|
result.index = self.res_columns
|
|
except ValueError:
|
|
pass
|
|
|
|
try:
|
|
result.columns = self.res_index
|
|
except ValueError:
|
|
pass
|
|
|
|
return result
|
|
|
|
|
|
class FrameColumnApply(FrameApply):
|
|
axis = 1
|
|
|
|
def apply_broadcast(self):
|
|
result = super(FrameColumnApply, self).apply_broadcast(self.obj.T)
|
|
return result.T
|
|
|
|
@property
|
|
def series_generator(self):
|
|
constructor = self.obj._constructor_sliced
|
|
return (constructor(arr, index=self.columns, name=name)
|
|
for i, (arr, name) in enumerate(zip(self.values,
|
|
self.index)))
|
|
|
|
@property
|
|
def result_index(self):
|
|
return self.index
|
|
|
|
@property
|
|
def result_columns(self):
|
|
return self.columns
|
|
|
|
def wrap_results_for_axis(self):
|
|
""" return the results for the columns """
|
|
results = self.results
|
|
|
|
# we have requested to expand
|
|
if self.result_type == 'expand':
|
|
result = self.infer_to_same_shape()
|
|
|
|
# we have a non-series and don't want inference
|
|
elif not isinstance(results[0], ABCSeries):
|
|
from pandas import Series
|
|
result = Series(results)
|
|
result.index = self.res_index
|
|
|
|
# we may want to infer results
|
|
else:
|
|
result = self.infer_to_same_shape()
|
|
|
|
return result
|
|
|
|
def infer_to_same_shape(self):
|
|
""" infer the results to the same shape as the input object """
|
|
results = self.results
|
|
|
|
result = self.obj._constructor(data=results)
|
|
result = result.T
|
|
|
|
# set the index
|
|
result.index = self.res_index
|
|
|
|
# infer dtypes
|
|
result = result.infer_objects()
|
|
|
|
return result
|