laywerrobot/lib/python3.6/site-packages/pandas/core/common.py

633 lines
16 KiB
Python
Raw Normal View History

2020-08-27 21:55:39 +02:00
"""
Misc tools for implementing data structures
"""
from datetime import datetime, timedelta
from functools import partial
import inspect
import collections
import numpy as np
from pandas._libs import lib, tslib
from pandas import compat
from pandas.compat import long, zip, iteritems, PY36, OrderedDict
from pandas.core.config import get_option
from pandas.core.dtypes.generic import ABCSeries, ABCIndex
from pandas.core.dtypes.common import _NS_DTYPE, is_integer
from pandas.core.dtypes.inference import _iterable_not_string
from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
class SettingWithCopyError(ValueError):
pass
class SettingWithCopyWarning(Warning):
pass
def flatten(l):
"""Flatten an arbitrarily nested sequence.
Parameters
----------
l : sequence
The non string sequence to flatten
Notes
-----
This doesn't consider strings sequences.
Returns
-------
flattened : generator
"""
for el in l:
if _iterable_not_string(el):
for s in flatten(el):
yield s
else:
yield el
def _consensus_name_attr(objs):
name = objs[0].name
for obj in objs[1:]:
try:
if obj.name != name:
name = None
except ValueError:
name = None
return name
def _get_info_slice(obj, indexer):
"""Slice the info axis of `obj` with `indexer`."""
if not hasattr(obj, '_info_axis_number'):
msg = 'object of type {typ!r} has no info axis'
raise TypeError(msg.format(typ=type(obj).__name__))
slices = [slice(None)] * obj.ndim
slices[obj._info_axis_number] = indexer
return tuple(slices)
def _maybe_box(indexer, values, obj, key):
# if we have multiples coming back, box em
if isinstance(values, np.ndarray):
return obj[indexer.get_loc(key)]
# return the value
return values
def _maybe_box_datetimelike(value):
# turn a datetime like into a Timestamp/timedelta as needed
if isinstance(value, (np.datetime64, datetime)):
value = tslib.Timestamp(value)
elif isinstance(value, (np.timedelta64, timedelta)):
value = tslib.Timedelta(value)
return value
_values_from_object = lib.values_from_object
def is_bool_indexer(key):
if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)):
if key.dtype == np.object_:
key = np.asarray(_values_from_object(key))
if not lib.is_bool_array(key):
if isna(key).any():
raise ValueError('cannot index with vector containing '
'NA / NaN values')
return False
return True
elif key.dtype == np.bool_:
return True
elif isinstance(key, list):
try:
arr = np.asarray(key)
return arr.dtype == np.bool_ and len(arr) == len(key)
except TypeError: # pragma: no cover
return False
return False
def _default_index(n):
from pandas.core.index import RangeIndex
return RangeIndex(0, n, name=None)
def _mut_exclusive(**kwargs):
item1, item2 = kwargs.items()
label1, val1 = item1
label2, val2 = item2
if val1 is not None and val2 is not None:
msg = 'mutually exclusive arguments: {label1!r} and {label2!r}'
raise TypeError(msg.format(label1=label1, label2=label2))
elif val1 is not None:
return val1
else:
return val2
def _not_none(*args):
"""Returns a generator consisting of the arguments that are not None"""
return (arg for arg in args if arg is not None)
def _any_none(*args):
"""Returns a boolean indicating if any argument is None"""
for arg in args:
if arg is None:
return True
return False
def _all_none(*args):
"""Returns a boolean indicating if all arguments are None"""
for arg in args:
if arg is not None:
return False
return True
def _any_not_none(*args):
"""Returns a boolean indicating if any argument is not None"""
for arg in args:
if arg is not None:
return True
return False
def _all_not_none(*args):
"""Returns a boolean indicating if all arguments are not None"""
for arg in args:
if arg is None:
return False
return True
def _count_not_none(*args):
"""Returns the count of arguments that are not None"""
return sum(x is not None for x in args)
def _try_sort(iterable):
listed = list(iterable)
try:
return sorted(listed)
except Exception:
return listed
def _dict_keys_to_ordered_list(mapping):
# when pandas drops support for Python < 3.6, this function
# can be replaced by a simple list(mapping.keys())
if PY36 or isinstance(mapping, OrderedDict):
keys = list(mapping.keys())
else:
keys = _try_sort(mapping)
return keys
def iterpairs(seq):
"""
Parameters
----------
seq : sequence
Returns
-------
iterator returning overlapping pairs of elements
Examples
--------
>>> list(iterpairs([1, 2, 3, 4]))
[(1, 2), (2, 3), (3, 4)]
"""
# input may not be sliceable
seq_it = iter(seq)
seq_it_next = iter(seq)
next(seq_it_next)
return zip(seq_it, seq_it_next)
def split_ranges(mask):
""" Generates tuples of ranges which cover all True value in mask
>>> list(split_ranges([1,0,0,1,0]))
[(0, 1), (3, 4)]
"""
ranges = [(0, len(mask))]
for pos, val in enumerate(mask):
if not val: # this pos should be omitted, split off the prefix range
r = ranges.pop()
if pos > r[0]: # yield non-zero range
yield (r[0], pos)
if pos + 1 < len(mask): # save the rest for processing
ranges.append((pos + 1, len(mask)))
if ranges:
yield ranges[-1]
def _long_prod(vals):
result = long(1)
for x in vals:
result *= x
return result
class groupby(dict):
"""
A simple groupby different from the one in itertools.
Does not require the sequence elements to be sorted by keys,
however it is slower.
"""
def __init__(self, seq, key=lambda x: x):
for value in seq:
k = key(value)
self.setdefault(k, []).append(value)
try:
__iter__ = dict.iteritems
except AttributeError: # pragma: no cover
# Python 3
def __iter__(self):
return iter(dict.items(self))
def map_indices_py(arr):
"""
Returns a dictionary with (element, index) pairs for each element in the
given array/list
"""
return {x: i for i, x in enumerate(arr)}
def union(*seqs):
result = set([])
for seq in seqs:
if not isinstance(seq, set):
seq = set(seq)
result |= seq
return type(seqs[0])(list(result))
def difference(a, b):
return type(a)(list(set(a) - set(b)))
def intersection(*seqs):
result = set(seqs[0])
for seq in seqs:
if not isinstance(seq, set):
seq = set(seq)
result &= seq
return type(seqs[0])(list(result))
def _asarray_tuplesafe(values, dtype=None):
from pandas.core.index import Index
if not (isinstance(values, (list, tuple)) or hasattr(values, '__array__')):
values = list(values)
elif isinstance(values, Index):
return values.values
if isinstance(values, list) and dtype in [np.object_, object]:
return construct_1d_object_array_from_listlike(values)
result = np.asarray(values, dtype=dtype)
if issubclass(result.dtype.type, compat.string_types):
result = np.asarray(values, dtype=object)
if result.ndim == 2:
# Avoid building an array of arrays:
# TODO: verify whether any path hits this except #18819 (invalid)
values = [tuple(x) for x in values]
result = construct_1d_object_array_from_listlike(values)
return result
def _index_labels_to_array(labels, dtype=None):
"""
Transform label or iterable of labels to array, for use in Index.
Parameters
----------
dtype : dtype
If specified, use as dtype of the resulting array, otherwise infer.
Returns
-------
array
"""
if isinstance(labels, (compat.string_types, tuple)):
labels = [labels]
if not isinstance(labels, (list, np.ndarray)):
try:
labels = list(labels)
except TypeError: # non-iterable
labels = [labels]
labels = _asarray_tuplesafe(labels, dtype=dtype)
return labels
def _maybe_make_list(obj):
if obj is not None and not isinstance(obj, (tuple, list)):
return [obj]
return obj
def is_null_slice(obj):
""" we have a null slice """
return (isinstance(obj, slice) and obj.start is None and
obj.stop is None and obj.step is None)
def is_true_slices(l):
"""
Find non-trivial slices in "l": return a list of booleans with same length.
"""
return [isinstance(k, slice) and not is_null_slice(k) for k in l]
def is_full_slice(obj, l):
""" we have a full length slice """
return (isinstance(obj, slice) and obj.start == 0 and obj.stop == l and
obj.step is None)
def _get_callable_name(obj):
# typical case has name
if hasattr(obj, '__name__'):
return getattr(obj, '__name__')
# some objects don't; could recurse
if isinstance(obj, partial):
return _get_callable_name(obj.func)
# fall back to class name
if hasattr(obj, '__call__'):
return obj.__class__.__name__
# everything failed (probably because the argument
# wasn't actually callable); we return None
# instead of the empty string in this case to allow
# distinguishing between no name and a name of ''
return None
def _apply_if_callable(maybe_callable, obj, **kwargs):
"""
Evaluate possibly callable input using obj and kwargs if it is callable,
otherwise return as it is
Parameters
----------
maybe_callable : possibly a callable
obj : NDFrame
**kwargs
"""
if callable(maybe_callable):
return maybe_callable(obj, **kwargs)
return maybe_callable
def _where_compat(mask, arr1, arr2):
if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE:
new_vals = np.where(mask, arr1.view('i8'), arr2.view('i8'))
return new_vals.view(_NS_DTYPE)
if arr1.dtype == _NS_DTYPE:
arr1 = tslib.ints_to_pydatetime(arr1.view('i8'))
if arr2.dtype == _NS_DTYPE:
arr2 = tslib.ints_to_pydatetime(arr2.view('i8'))
return np.where(mask, arr1, arr2)
def _dict_compat(d):
"""
Helper function to convert datetimelike-keyed dicts to Timestamp-keyed dict
Parameters
----------
d: dict like object
Returns
-------
dict
"""
return dict((_maybe_box_datetimelike(key), value)
for key, value in iteritems(d))
def standardize_mapping(into):
"""
Helper function to standardize a supplied mapping.
.. versionadded:: 0.21.0
Parameters
----------
into : instance or subclass of collections.Mapping
Must be a class, an initialized collections.defaultdict,
or an instance of a collections.Mapping subclass.
Returns
-------
mapping : a collections.Mapping subclass or other constructor
a callable object that can accept an iterator to create
the desired Mapping.
See Also
--------
DataFrame.to_dict
Series.to_dict
"""
if not inspect.isclass(into):
if isinstance(into, collections.defaultdict):
return partial(
collections.defaultdict, into.default_factory)
into = type(into)
if not issubclass(into, collections.Mapping):
raise TypeError('unsupported type: {into}'.format(into=into))
elif into == collections.defaultdict:
raise TypeError(
'to_dict() only accepts initialized defaultdicts')
return into
def sentinel_factory():
class Sentinel(object):
pass
return Sentinel()
# ----------------------------------------------------------------------
# Detect our environment
def in_interactive_session():
""" check if we're running in an interactive shell
returns True if running under python/ipython interactive shell
"""
def check_main():
import __main__ as main
return (not hasattr(main, '__file__') or
get_option('mode.sim_interactive'))
try:
return __IPYTHON__ or check_main() # noqa
except:
return check_main()
def in_qtconsole():
"""
check if we're inside an IPython qtconsole
.. deprecated:: 0.14.1
This is no longer needed, or working, in IPython 3 and above.
"""
try:
ip = get_ipython() # noqa
front_end = (
ip.config.get('KernelApp', {}).get('parent_appname', "") or
ip.config.get('IPKernelApp', {}).get('parent_appname', ""))
if 'qtconsole' in front_end.lower():
return True
except:
return False
return False
def in_ipnb():
"""
check if we're inside an IPython Notebook
.. deprecated:: 0.14.1
This is no longer needed, or working, in IPython 3 and above.
"""
try:
ip = get_ipython() # noqa
front_end = (
ip.config.get('KernelApp', {}).get('parent_appname', "") or
ip.config.get('IPKernelApp', {}).get('parent_appname', ""))
if 'notebook' in front_end.lower():
return True
except:
return False
return False
def in_ipython_frontend():
"""
check if we're inside an an IPython zmq frontend
"""
try:
ip = get_ipython() # noqa
return 'zmq' in str(type(ip)).lower()
except:
pass
return False
def _random_state(state=None):
"""
Helper function for processing random_state arguments.
Parameters
----------
state : int, np.random.RandomState, None.
If receives an int, passes to np.random.RandomState() as seed.
If receives an np.random.RandomState object, just returns object.
If receives `None`, returns np.random.
If receives anything else, raises an informative ValueError.
Default None.
Returns
-------
np.random.RandomState
"""
if is_integer(state):
return np.random.RandomState(state)
elif isinstance(state, np.random.RandomState):
return state
elif state is None:
return np.random
else:
raise ValueError("random_state must be an integer, a numpy "
"RandomState, or None")
def _get_distinct_objs(objs):
"""
Return a list with distinct elements of "objs" (different ids).
Preserves order.
"""
ids = set()
res = []
for obj in objs:
if not id(obj) in ids:
ids.add(id(obj))
res.append(obj)
return res
def _pipe(obj, func, *args, **kwargs):
"""
Apply a function ``func`` to object ``obj`` either by passing obj as the
first argument to the function or, in the case that the func is a tuple,
interpret the first element of the tuple as a function and pass the obj to
that function as a keyword argument whose key is the value of the second
element of the tuple.
Parameters
----------
func : callable or tuple of (callable, string)
Function to apply to this object or, alternatively, a
``(callable, data_keyword)`` tuple where ``data_keyword`` is a
string indicating the keyword of `callable`` that expects the
object.
args : iterable, optional
positional arguments passed into ``func``.
kwargs : dict, optional
a dictionary of keyword arguments passed into ``func``.
Returns
-------
object : the return type of ``func``.
"""
if isinstance(func, tuple):
func, target = func
if target in kwargs:
msg = '%s is both the pipe target and a keyword argument' % target
raise ValueError(msg)
kwargs[target] = obj
return func(*args, **kwargs)
else:
return func(obj, *args, **kwargs)