4012 lines
121 KiB
Python
4012 lines
121 KiB
Python
|
"""
|
||
|
|
||
|
Numerical python functions written for compatibility with MATLAB
|
||
|
commands with the same names.
|
||
|
|
||
|
MATLAB compatible functions
|
||
|
---------------------------
|
||
|
|
||
|
:func:`cohere`
|
||
|
Coherence (normalized cross spectral density)
|
||
|
|
||
|
:func:`csd`
|
||
|
Cross spectral density using Welch's average periodogram
|
||
|
|
||
|
:func:`detrend`
|
||
|
Remove the mean or best fit line from an array
|
||
|
|
||
|
:func:`find`
|
||
|
Return the indices where some condition is true;
|
||
|
numpy.nonzero is similar but more general.
|
||
|
|
||
|
:func:`griddata`
|
||
|
Interpolate irregularly distributed data to a
|
||
|
regular grid.
|
||
|
|
||
|
:func:`prctile`
|
||
|
Find the percentiles of a sequence
|
||
|
|
||
|
:func:`prepca`
|
||
|
Principal Component Analysis
|
||
|
|
||
|
:func:`psd`
|
||
|
Power spectral density using Welch's average periodogram
|
||
|
|
||
|
:func:`rk4`
|
||
|
A 4th order runge kutta integrator for 1D or ND systems
|
||
|
|
||
|
:func:`specgram`
|
||
|
Spectrogram (spectrum over segments of time)
|
||
|
|
||
|
Miscellaneous functions
|
||
|
-----------------------
|
||
|
|
||
|
Functions that don't exist in MATLAB, but are useful anyway:
|
||
|
|
||
|
:func:`cohere_pairs`
|
||
|
Coherence over all pairs. This is not a MATLAB function, but we
|
||
|
compute coherence a lot in my lab, and we compute it for a lot of
|
||
|
pairs. This function is optimized to do this efficiently by
|
||
|
caching the direct FFTs.
|
||
|
|
||
|
:func:`rk4`
|
||
|
A 4th order Runge-Kutta ODE integrator in case you ever find
|
||
|
yourself stranded without scipy (and the far superior
|
||
|
scipy.integrate tools)
|
||
|
|
||
|
:func:`contiguous_regions`
|
||
|
Return the indices of the regions spanned by some logical mask
|
||
|
|
||
|
:func:`cross_from_below`
|
||
|
Return the indices where a 1D array crosses a threshold from below
|
||
|
|
||
|
:func:`cross_from_above`
|
||
|
Return the indices where a 1D array crosses a threshold from above
|
||
|
|
||
|
:func:`complex_spectrum`
|
||
|
Return the complex-valued frequency spectrum of a signal
|
||
|
|
||
|
:func:`magnitude_spectrum`
|
||
|
Return the magnitude of the frequency spectrum of a signal
|
||
|
|
||
|
:func:`angle_spectrum`
|
||
|
Return the angle (wrapped phase) of the frequency spectrum of a signal
|
||
|
|
||
|
:func:`phase_spectrum`
|
||
|
Return the phase (unwrapped angle) of the frequency spectrum of a signal
|
||
|
|
||
|
:func:`detrend_mean`
|
||
|
Remove the mean from a line.
|
||
|
|
||
|
:func:`demean`
|
||
|
Remove the mean from a line. This function is the same as
|
||
|
:func:`detrend_mean` except for the default *axis*.
|
||
|
|
||
|
:func:`detrend_linear`
|
||
|
Remove the best fit line from a line.
|
||
|
|
||
|
:func:`detrend_none`
|
||
|
Return the original line.
|
||
|
|
||
|
:func:`stride_windows`
|
||
|
Get all windows in an array in a memory-efficient manner
|
||
|
|
||
|
:func:`stride_repeat`
|
||
|
Repeat an array in a memory-efficient manner
|
||
|
|
||
|
:func:`apply_window`
|
||
|
Apply a window along a given axis
|
||
|
|
||
|
|
||
|
record array helper functions
|
||
|
-----------------------------
|
||
|
|
||
|
A collection of helper methods for numpyrecord arrays
|
||
|
|
||
|
.. _htmlonly:
|
||
|
|
||
|
See :ref:`misc-examples-index`
|
||
|
|
||
|
:func:`rec2txt`
|
||
|
Pretty print a record array
|
||
|
|
||
|
:func:`rec2csv`
|
||
|
Store record array in CSV file
|
||
|
|
||
|
:func:`csv2rec`
|
||
|
Import record array from CSV file with type inspection
|
||
|
|
||
|
:func:`rec_append_fields`
|
||
|
Adds field(s)/array(s) to record array
|
||
|
|
||
|
:func:`rec_drop_fields`
|
||
|
Drop fields from record array
|
||
|
|
||
|
:func:`rec_join`
|
||
|
Join two record arrays on sequence of fields
|
||
|
|
||
|
:func:`recs_join`
|
||
|
A simple join of multiple recarrays using a single column as a key
|
||
|
|
||
|
:func:`rec_groupby`
|
||
|
Summarize data by groups (similar to SQL GROUP BY)
|
||
|
|
||
|
:func:`rec_summarize`
|
||
|
Helper code to filter rec array fields into new fields
|
||
|
|
||
|
For the rec viewer functions(e rec2csv), there are a bunch of Format
|
||
|
objects you can pass into the functions that will do things like color
|
||
|
negative values red, set percent formatting and scaling, etc.
|
||
|
|
||
|
Example usage::
|
||
|
|
||
|
r = csv2rec('somefile.csv', checkrows=0)
|
||
|
|
||
|
formatd = dict(
|
||
|
weight = FormatFloat(2),
|
||
|
change = FormatPercent(2),
|
||
|
cost = FormatThousands(2),
|
||
|
)
|
||
|
|
||
|
|
||
|
rec2excel(r, 'test.xls', formatd=formatd)
|
||
|
rec2csv(r, 'test.csv', formatd=formatd)
|
||
|
|
||
|
"""
|
||
|
|
||
|
import copy
|
||
|
import csv
|
||
|
import operator
|
||
|
import os
|
||
|
import warnings
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
import matplotlib.cbook as cbook
|
||
|
from matplotlib import docstring
|
||
|
from matplotlib.path import Path
|
||
|
import math
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", alternative='numpy.logspace or numpy.geomspace')
|
||
|
def logspace(xmin, xmax, N):
|
||
|
'''
|
||
|
Return N values logarithmically spaced between xmin and xmax.
|
||
|
|
||
|
'''
|
||
|
return np.exp(np.linspace(np.log(xmin), np.log(xmax), N))
|
||
|
|
||
|
|
||
|
def window_hanning(x):
|
||
|
'''
|
||
|
Return x times the hanning window of len(x).
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`window_none`
|
||
|
:func:`window_none` is another window algorithm.
|
||
|
'''
|
||
|
return np.hanning(len(x))*x
|
||
|
|
||
|
|
||
|
def window_none(x):
|
||
|
'''
|
||
|
No window function; simply return x.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`window_hanning`
|
||
|
:func:`window_hanning` is another window algorithm.
|
||
|
'''
|
||
|
return x
|
||
|
|
||
|
|
||
|
def apply_window(x, window, axis=0, return_window=None):
|
||
|
'''
|
||
|
Apply the given window to the given 1D or 2D array along the given axis.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : 1D or 2D array or sequence
|
||
|
Array or sequence containing the data.
|
||
|
|
||
|
window : function or array.
|
||
|
Either a function to generate a window or an array with length
|
||
|
*x*.shape[*axis*]
|
||
|
|
||
|
axis : integer
|
||
|
The axis over which to do the repetition.
|
||
|
Must be 0 or 1. The default is 0
|
||
|
|
||
|
return_window : bool
|
||
|
If true, also return the 1D values of the window that was applied
|
||
|
'''
|
||
|
x = np.asarray(x)
|
||
|
|
||
|
if x.ndim < 1 or x.ndim > 2:
|
||
|
raise ValueError('only 1D or 2D arrays can be used')
|
||
|
if axis+1 > x.ndim:
|
||
|
raise ValueError('axis(=%s) out of bounds' % axis)
|
||
|
|
||
|
xshape = list(x.shape)
|
||
|
xshapetarg = xshape.pop(axis)
|
||
|
|
||
|
if cbook.iterable(window):
|
||
|
if len(window) != xshapetarg:
|
||
|
raise ValueError('The len(window) must be the same as the shape '
|
||
|
'of x for the chosen axis')
|
||
|
windowVals = window
|
||
|
else:
|
||
|
windowVals = window(np.ones(xshapetarg, dtype=x.dtype))
|
||
|
|
||
|
if x.ndim == 1:
|
||
|
if return_window:
|
||
|
return windowVals * x, windowVals
|
||
|
else:
|
||
|
return windowVals * x
|
||
|
|
||
|
xshapeother = xshape.pop()
|
||
|
|
||
|
otheraxis = (axis+1) % 2
|
||
|
|
||
|
windowValsRep = stride_repeat(windowVals, xshapeother, axis=otheraxis)
|
||
|
|
||
|
if return_window:
|
||
|
return windowValsRep * x, windowVals
|
||
|
else:
|
||
|
return windowValsRep * x
|
||
|
|
||
|
|
||
|
def detrend(x, key=None, axis=None):
|
||
|
'''
|
||
|
Return x with its trend removed.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array or sequence
|
||
|
Array or sequence containing the data.
|
||
|
|
||
|
key : [ 'default' | 'constant' | 'mean' | 'linear' | 'none'] or function
|
||
|
Specifies the detrend algorithm to use. 'default' is 'mean', which is
|
||
|
the same as :func:`detrend_mean`. 'constant' is the same. 'linear' is
|
||
|
the same as :func:`detrend_linear`. 'none' is the same as
|
||
|
:func:`detrend_none`. The default is 'mean'. See the corresponding
|
||
|
functions for more details regarding the algorithms. Can also be a
|
||
|
function that carries out the detrend operation.
|
||
|
|
||
|
axis : integer
|
||
|
The axis along which to do the detrending.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`detrend_mean`
|
||
|
:func:`detrend_mean` implements the 'mean' algorithm.
|
||
|
|
||
|
:func:`detrend_linear`
|
||
|
:func:`detrend_linear` implements the 'linear' algorithm.
|
||
|
|
||
|
:func:`detrend_none`
|
||
|
:func:`detrend_none` implements the 'none' algorithm.
|
||
|
'''
|
||
|
if key is None or key in ['constant', 'mean', 'default']:
|
||
|
return detrend(x, key=detrend_mean, axis=axis)
|
||
|
elif key == 'linear':
|
||
|
return detrend(x, key=detrend_linear, axis=axis)
|
||
|
elif key == 'none':
|
||
|
return detrend(x, key=detrend_none, axis=axis)
|
||
|
elif isinstance(key, str):
|
||
|
raise ValueError("Unknown value for key %s, must be one of: "
|
||
|
"'default', 'constant', 'mean', "
|
||
|
"'linear', or a function" % key)
|
||
|
|
||
|
if not callable(key):
|
||
|
raise ValueError("Unknown value for key %s, must be one of: "
|
||
|
"'default', 'constant', 'mean', "
|
||
|
"'linear', or a function" % key)
|
||
|
|
||
|
x = np.asarray(x)
|
||
|
|
||
|
if axis is not None and axis+1 > x.ndim:
|
||
|
raise ValueError('axis(=%s) out of bounds' % axis)
|
||
|
|
||
|
if (axis is None and x.ndim == 0) or (not axis and x.ndim == 1):
|
||
|
return key(x)
|
||
|
|
||
|
# try to use the 'axis' argument if the function supports it,
|
||
|
# otherwise use apply_along_axis to do it
|
||
|
try:
|
||
|
return key(x, axis=axis)
|
||
|
except TypeError:
|
||
|
return np.apply_along_axis(key, axis=axis, arr=x)
|
||
|
|
||
|
|
||
|
def demean(x, axis=0):
|
||
|
'''
|
||
|
Return x minus its mean along the specified axis.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array or sequence
|
||
|
Array or sequence containing the data
|
||
|
Can have any dimensionality
|
||
|
|
||
|
axis : integer
|
||
|
The axis along which to take the mean. See numpy.mean for a
|
||
|
description of this argument.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`delinear`
|
||
|
|
||
|
:func:`denone`
|
||
|
:func:`delinear` and :func:`denone` are other detrend algorithms.
|
||
|
|
||
|
:func:`detrend_mean`
|
||
|
This function is the same as :func:`detrend_mean` except for the
|
||
|
default *axis*.
|
||
|
'''
|
||
|
return detrend_mean(x, axis=axis)
|
||
|
|
||
|
|
||
|
def detrend_mean(x, axis=None):
|
||
|
'''
|
||
|
Return x minus the mean(x).
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array or sequence
|
||
|
Array or sequence containing the data
|
||
|
Can have any dimensionality
|
||
|
|
||
|
axis : integer
|
||
|
The axis along which to take the mean. See numpy.mean for a
|
||
|
description of this argument.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`demean`
|
||
|
This function is the same as :func:`demean` except for the default
|
||
|
*axis*.
|
||
|
|
||
|
:func:`detrend_linear`
|
||
|
|
||
|
:func:`detrend_none`
|
||
|
:func:`detrend_linear` and :func:`detrend_none` are other detrend
|
||
|
algorithms.
|
||
|
|
||
|
:func:`detrend`
|
||
|
:func:`detrend` is a wrapper around all the detrend algorithms.
|
||
|
'''
|
||
|
x = np.asarray(x)
|
||
|
|
||
|
if axis is not None and axis+1 > x.ndim:
|
||
|
raise ValueError('axis(=%s) out of bounds' % axis)
|
||
|
|
||
|
return x - x.mean(axis, keepdims=True)
|
||
|
|
||
|
|
||
|
def detrend_none(x, axis=None):
|
||
|
'''
|
||
|
Return x: no detrending.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : any object
|
||
|
An object containing the data
|
||
|
|
||
|
axis : integer
|
||
|
This parameter is ignored.
|
||
|
It is included for compatibility with detrend_mean
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`denone`
|
||
|
This function is the same as :func:`denone` except for the default
|
||
|
*axis*, which has no effect.
|
||
|
|
||
|
:func:`detrend_mean`
|
||
|
|
||
|
:func:`detrend_linear`
|
||
|
:func:`detrend_mean` and :func:`detrend_linear` are other detrend
|
||
|
algorithms.
|
||
|
|
||
|
:func:`detrend`
|
||
|
:func:`detrend` is a wrapper around all the detrend algorithms.
|
||
|
'''
|
||
|
return x
|
||
|
|
||
|
|
||
|
def detrend_linear(y):
|
||
|
'''
|
||
|
Return x minus best fit line; 'linear' detrending.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y : 0-D or 1-D array or sequence
|
||
|
Array or sequence containing the data
|
||
|
|
||
|
axis : integer
|
||
|
The axis along which to take the mean. See numpy.mean for a
|
||
|
description of this argument.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`delinear`
|
||
|
This function is the same as :func:`delinear` except for the default
|
||
|
*axis*.
|
||
|
|
||
|
:func:`detrend_mean`
|
||
|
|
||
|
:func:`detrend_none`
|
||
|
:func:`detrend_mean` and :func:`detrend_none` are other detrend
|
||
|
algorithms.
|
||
|
|
||
|
:func:`detrend`
|
||
|
:func:`detrend` is a wrapper around all the detrend algorithms.
|
||
|
'''
|
||
|
# This is faster than an algorithm based on linalg.lstsq.
|
||
|
y = np.asarray(y)
|
||
|
|
||
|
if y.ndim > 1:
|
||
|
raise ValueError('y cannot have ndim > 1')
|
||
|
|
||
|
# short-circuit 0-D array.
|
||
|
if not y.ndim:
|
||
|
return np.array(0., dtype=y.dtype)
|
||
|
|
||
|
x = np.arange(y.size, dtype=float)
|
||
|
|
||
|
C = np.cov(x, y, bias=1)
|
||
|
b = C[0, 1]/C[0, 0]
|
||
|
|
||
|
a = y.mean() - b*x.mean()
|
||
|
return y - (b*x + a)
|
||
|
|
||
|
|
||
|
def stride_windows(x, n, noverlap=None, axis=0):
|
||
|
'''
|
||
|
Get all windows of x with length n as a single array,
|
||
|
using strides to avoid data duplication.
|
||
|
|
||
|
.. warning::
|
||
|
|
||
|
It is not safe to write to the output array. Multiple
|
||
|
elements may point to the same piece of memory,
|
||
|
so modifying one value may change others.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : 1D array or sequence
|
||
|
Array or sequence containing the data.
|
||
|
|
||
|
n : integer
|
||
|
The number of data points in each window.
|
||
|
|
||
|
noverlap : integer
|
||
|
The overlap between adjacent windows.
|
||
|
Default is 0 (no overlap)
|
||
|
|
||
|
axis : integer
|
||
|
The axis along which the windows will run.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
`stackoverflow: Rolling window for 1D arrays in Numpy?
|
||
|
<http://stackoverflow.com/a/6811241>`_
|
||
|
`stackoverflow: Using strides for an efficient moving average filter
|
||
|
<http://stackoverflow.com/a/4947453>`_
|
||
|
'''
|
||
|
if noverlap is None:
|
||
|
noverlap = 0
|
||
|
|
||
|
if noverlap >= n:
|
||
|
raise ValueError('noverlap must be less than n')
|
||
|
if n < 1:
|
||
|
raise ValueError('n cannot be less than 1')
|
||
|
|
||
|
x = np.asarray(x)
|
||
|
|
||
|
if x.ndim != 1:
|
||
|
raise ValueError('only 1-dimensional arrays can be used')
|
||
|
if n == 1 and noverlap == 0:
|
||
|
if axis == 0:
|
||
|
return x[np.newaxis]
|
||
|
else:
|
||
|
return x[np.newaxis].transpose()
|
||
|
if n > x.size:
|
||
|
raise ValueError('n cannot be greater than the length of x')
|
||
|
|
||
|
# np.lib.stride_tricks.as_strided easily leads to memory corruption for
|
||
|
# non integer shape and strides, i.e. noverlap or n. See #3845.
|
||
|
noverlap = int(noverlap)
|
||
|
n = int(n)
|
||
|
|
||
|
step = n - noverlap
|
||
|
if axis == 0:
|
||
|
shape = (n, (x.shape[-1]-noverlap)//step)
|
||
|
strides = (x.strides[0], step*x.strides[0])
|
||
|
else:
|
||
|
shape = ((x.shape[-1]-noverlap)//step, n)
|
||
|
strides = (step*x.strides[0], x.strides[0])
|
||
|
return np.lib.stride_tricks.as_strided(x, shape=shape, strides=strides)
|
||
|
|
||
|
|
||
|
def stride_repeat(x, n, axis=0):
|
||
|
'''
|
||
|
Repeat the values in an array in a memory-efficient manner. Array x is
|
||
|
stacked vertically n times.
|
||
|
|
||
|
.. warning::
|
||
|
|
||
|
It is not safe to write to the output array. Multiple
|
||
|
elements may point to the same piece of memory, so
|
||
|
modifying one value may change others.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : 1D array or sequence
|
||
|
Array or sequence containing the data.
|
||
|
|
||
|
n : integer
|
||
|
The number of time to repeat the array.
|
||
|
|
||
|
axis : integer
|
||
|
The axis along which the data will run.
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
`stackoverflow: Repeat NumPy array without replicating data?
|
||
|
<http://stackoverflow.com/a/5568169>`_
|
||
|
'''
|
||
|
if axis not in [0, 1]:
|
||
|
raise ValueError('axis must be 0 or 1')
|
||
|
x = np.asarray(x)
|
||
|
if x.ndim != 1:
|
||
|
raise ValueError('only 1-dimensional arrays can be used')
|
||
|
|
||
|
if n == 1:
|
||
|
if axis == 0:
|
||
|
return np.atleast_2d(x)
|
||
|
else:
|
||
|
return np.atleast_2d(x).T
|
||
|
if n < 1:
|
||
|
raise ValueError('n cannot be less than 1')
|
||
|
|
||
|
# np.lib.stride_tricks.as_strided easily leads to memory corruption for
|
||
|
# non integer shape and strides, i.e. n. See #3845.
|
||
|
n = int(n)
|
||
|
|
||
|
if axis == 0:
|
||
|
shape = (n, x.size)
|
||
|
strides = (0, x.strides[0])
|
||
|
else:
|
||
|
shape = (x.size, n)
|
||
|
strides = (x.strides[0], 0)
|
||
|
|
||
|
return np.lib.stride_tricks.as_strided(x, shape=shape, strides=strides)
|
||
|
|
||
|
|
||
|
def _spectral_helper(x, y=None, NFFT=None, Fs=None, detrend_func=None,
|
||
|
window=None, noverlap=None, pad_to=None,
|
||
|
sides=None, scale_by_freq=None, mode=None):
|
||
|
'''
|
||
|
This is a helper function that implements the commonality between the
|
||
|
psd, csd, spectrogram and complex, magnitude, angle, and phase spectrums.
|
||
|
It is *NOT* meant to be used outside of mlab and may change at any time.
|
||
|
'''
|
||
|
if y is None:
|
||
|
# if y is None use x for y
|
||
|
same_data = True
|
||
|
else:
|
||
|
# The checks for if y is x are so that we can use the same function to
|
||
|
# implement the core of psd(), csd(), and spectrogram() without doing
|
||
|
# extra calculations. We return the unaveraged Pxy, freqs, and t.
|
||
|
same_data = y is x
|
||
|
|
||
|
if Fs is None:
|
||
|
Fs = 2
|
||
|
if noverlap is None:
|
||
|
noverlap = 0
|
||
|
if detrend_func is None:
|
||
|
detrend_func = detrend_none
|
||
|
if window is None:
|
||
|
window = window_hanning
|
||
|
|
||
|
# if NFFT is set to None use the whole signal
|
||
|
if NFFT is None:
|
||
|
NFFT = 256
|
||
|
|
||
|
if mode is None or mode == 'default':
|
||
|
mode = 'psd'
|
||
|
elif mode not in ['psd', 'complex', 'magnitude', 'angle', 'phase']:
|
||
|
raise ValueError("Unknown value for mode %s, must be one of: "
|
||
|
"'default', 'psd', 'complex', "
|
||
|
"'magnitude', 'angle', 'phase'" % mode)
|
||
|
|
||
|
if not same_data and mode != 'psd':
|
||
|
raise ValueError("x and y must be equal if mode is not 'psd'")
|
||
|
|
||
|
# Make sure we're dealing with a numpy array. If y and x were the same
|
||
|
# object to start with, keep them that way
|
||
|
x = np.asarray(x)
|
||
|
if not same_data:
|
||
|
y = np.asarray(y)
|
||
|
|
||
|
if sides is None or sides == 'default':
|
||
|
if np.iscomplexobj(x):
|
||
|
sides = 'twosided'
|
||
|
else:
|
||
|
sides = 'onesided'
|
||
|
elif sides not in ['onesided', 'twosided']:
|
||
|
raise ValueError("Unknown value for sides %s, must be one of: "
|
||
|
"'default', 'onesided', or 'twosided'" % sides)
|
||
|
|
||
|
# zero pad x and y up to NFFT if they are shorter than NFFT
|
||
|
if len(x) < NFFT:
|
||
|
n = len(x)
|
||
|
x = np.resize(x, (NFFT,))
|
||
|
x[n:] = 0
|
||
|
|
||
|
if not same_data and len(y) < NFFT:
|
||
|
n = len(y)
|
||
|
y = np.resize(y, (NFFT,))
|
||
|
y[n:] = 0
|
||
|
|
||
|
if pad_to is None:
|
||
|
pad_to = NFFT
|
||
|
|
||
|
if mode != 'psd':
|
||
|
scale_by_freq = False
|
||
|
elif scale_by_freq is None:
|
||
|
scale_by_freq = True
|
||
|
|
||
|
# For real x, ignore the negative frequencies unless told otherwise
|
||
|
if sides == 'twosided':
|
||
|
numFreqs = pad_to
|
||
|
if pad_to % 2:
|
||
|
freqcenter = (pad_to - 1)//2 + 1
|
||
|
else:
|
||
|
freqcenter = pad_to//2
|
||
|
scaling_factor = 1.
|
||
|
elif sides == 'onesided':
|
||
|
if pad_to % 2:
|
||
|
numFreqs = (pad_to + 1)//2
|
||
|
else:
|
||
|
numFreqs = pad_to//2 + 1
|
||
|
scaling_factor = 2.
|
||
|
|
||
|
result = stride_windows(x, NFFT, noverlap, axis=0)
|
||
|
result = detrend(result, detrend_func, axis=0)
|
||
|
result, windowVals = apply_window(result, window, axis=0,
|
||
|
return_window=True)
|
||
|
result = np.fft.fft(result, n=pad_to, axis=0)[:numFreqs, :]
|
||
|
freqs = np.fft.fftfreq(pad_to, 1/Fs)[:numFreqs]
|
||
|
|
||
|
if not same_data:
|
||
|
# if same_data is False, mode must be 'psd'
|
||
|
resultY = stride_windows(y, NFFT, noverlap)
|
||
|
resultY = detrend(resultY, detrend_func, axis=0)
|
||
|
resultY = apply_window(resultY, window, axis=0)
|
||
|
resultY = np.fft.fft(resultY, n=pad_to, axis=0)[:numFreqs, :]
|
||
|
result = np.conj(result) * resultY
|
||
|
elif mode == 'psd':
|
||
|
result = np.conj(result) * result
|
||
|
elif mode == 'magnitude':
|
||
|
result = np.abs(result) / np.abs(windowVals).sum()
|
||
|
elif mode == 'angle' or mode == 'phase':
|
||
|
# we unwrap the phase later to handle the onesided vs. twosided case
|
||
|
result = np.angle(result)
|
||
|
elif mode == 'complex':
|
||
|
result /= np.abs(windowVals).sum()
|
||
|
|
||
|
if mode == 'psd':
|
||
|
|
||
|
# Also include scaling factors for one-sided densities and dividing by
|
||
|
# the sampling frequency, if desired. Scale everything, except the DC
|
||
|
# component and the NFFT/2 component:
|
||
|
|
||
|
# if we have a even number of frequencies, don't scale NFFT/2
|
||
|
if not NFFT % 2:
|
||
|
slc = slice(1, -1, None)
|
||
|
# if we have an odd number, just don't scale DC
|
||
|
else:
|
||
|
slc = slice(1, None, None)
|
||
|
|
||
|
result[slc] *= scaling_factor
|
||
|
|
||
|
# MATLAB divides by the sampling frequency so that density function
|
||
|
# has units of dB/Hz and can be integrated by the plotted frequency
|
||
|
# values. Perform the same scaling here.
|
||
|
if scale_by_freq:
|
||
|
result /= Fs
|
||
|
# Scale the spectrum by the norm of the window to compensate for
|
||
|
# windowing loss; see Bendat & Piersol Sec 11.5.2.
|
||
|
result /= (np.abs(windowVals)**2).sum()
|
||
|
else:
|
||
|
# In this case, preserve power in the segment, not amplitude
|
||
|
result /= np.abs(windowVals).sum()**2
|
||
|
|
||
|
t = np.arange(NFFT/2, len(x) - NFFT/2 + 1, NFFT - noverlap)/Fs
|
||
|
|
||
|
if sides == 'twosided':
|
||
|
# center the frequency range at zero
|
||
|
freqs = np.concatenate((freqs[freqcenter:], freqs[:freqcenter]))
|
||
|
result = np.concatenate((result[freqcenter:, :],
|
||
|
result[:freqcenter, :]), 0)
|
||
|
elif not pad_to % 2:
|
||
|
# get the last value correctly, it is negative otherwise
|
||
|
freqs[-1] *= -1
|
||
|
|
||
|
# we unwrap the phase here to handle the onesided vs. twosided case
|
||
|
if mode == 'phase':
|
||
|
result = np.unwrap(result, axis=0)
|
||
|
|
||
|
return result, freqs, t
|
||
|
|
||
|
|
||
|
def _single_spectrum_helper(x, mode, Fs=None, window=None, pad_to=None,
|
||
|
sides=None):
|
||
|
'''
|
||
|
This is a helper function that implements the commonality between the
|
||
|
complex, magnitude, angle, and phase spectrums.
|
||
|
It is *NOT* meant to be used outside of mlab and may change at any time.
|
||
|
'''
|
||
|
if mode is None or mode == 'psd' or mode == 'default':
|
||
|
raise ValueError('_single_spectrum_helper does not work with %s mode'
|
||
|
% mode)
|
||
|
|
||
|
if pad_to is None:
|
||
|
pad_to = len(x)
|
||
|
|
||
|
spec, freqs, _ = _spectral_helper(x=x, y=None, NFFT=len(x), Fs=Fs,
|
||
|
detrend_func=detrend_none, window=window,
|
||
|
noverlap=0, pad_to=pad_to,
|
||
|
sides=sides,
|
||
|
scale_by_freq=False,
|
||
|
mode=mode)
|
||
|
if mode != 'complex':
|
||
|
spec = spec.real
|
||
|
|
||
|
if spec.ndim == 2 and spec.shape[1] == 1:
|
||
|
spec = spec[:, 0]
|
||
|
|
||
|
return spec, freqs
|
||
|
|
||
|
|
||
|
# Split out these keyword docs so that they can be used elsewhere
|
||
|
docstring.interpd.update(Spectral=cbook.dedent("""
|
||
|
Fs : scalar
|
||
|
The sampling frequency (samples per time unit). It is used
|
||
|
to calculate the Fourier frequencies, freqs, in cycles per time
|
||
|
unit. The default value is 2.
|
||
|
|
||
|
window : callable or ndarray
|
||
|
A function or a vector of length *NFFT*. To create window
|
||
|
vectors see :func:`window_hanning`, :func:`window_none`,
|
||
|
:func:`numpy.blackman`, :func:`numpy.hamming`,
|
||
|
:func:`numpy.bartlett`, :func:`scipy.signal`,
|
||
|
:func:`scipy.signal.get_window`, etc. The default is
|
||
|
:func:`window_hanning`. If a function is passed as the
|
||
|
argument, it must take a data segment as an argument and
|
||
|
return the windowed version of the segment.
|
||
|
|
||
|
sides : {'default', 'onesided', 'twosided'}
|
||
|
Specifies which sides of the spectrum to return. Default gives the
|
||
|
default behavior, which returns one-sided for real data and both
|
||
|
for complex data. 'onesided' forces the return of a one-sided
|
||
|
spectrum, while 'twosided' forces two-sided.
|
||
|
"""))
|
||
|
|
||
|
|
||
|
docstring.interpd.update(Single_Spectrum=cbook.dedent("""
|
||
|
pad_to : int
|
||
|
The number of points to which the data segment is padded when
|
||
|
performing the FFT. While not increasing the actual resolution of
|
||
|
the spectrum (the minimum distance between resolvable peaks),
|
||
|
this can give more points in the plot, allowing for more
|
||
|
detail. This corresponds to the *n* parameter in the call to fft().
|
||
|
The default is None, which sets *pad_to* equal to the length of the
|
||
|
input signal (i.e. no padding).
|
||
|
"""))
|
||
|
|
||
|
|
||
|
docstring.interpd.update(PSD=cbook.dedent("""
|
||
|
pad_to : int
|
||
|
The number of points to which the data segment is padded when
|
||
|
performing the FFT. This can be different from *NFFT*, which
|
||
|
specifies the number of data points used. While not increasing
|
||
|
the actual resolution of the spectrum (the minimum distance between
|
||
|
resolvable peaks), this can give more points in the plot,
|
||
|
allowing for more detail. This corresponds to the *n* parameter
|
||
|
in the call to fft(). The default is None, which sets *pad_to*
|
||
|
equal to *NFFT*
|
||
|
|
||
|
NFFT : int
|
||
|
The number of data points used in each block for the FFT.
|
||
|
A power 2 is most efficient. The default value is 256.
|
||
|
This should *NOT* be used to get zero padding, or the scaling of the
|
||
|
result will be incorrect. Use *pad_to* for this instead.
|
||
|
|
||
|
detrend : {'default', 'constant', 'mean', 'linear', 'none'} or callable
|
||
|
The function applied to each segment before fft-ing,
|
||
|
designed to remove the mean or linear trend. Unlike in
|
||
|
MATLAB, where the *detrend* parameter is a vector, in
|
||
|
matplotlib is it a function. The :mod:`~matplotlib.mlab`
|
||
|
module defines :func:`~matplotlib.mlab.detrend_none`,
|
||
|
:func:`~matplotlib.mlab.detrend_mean`, and
|
||
|
:func:`~matplotlib.mlab.detrend_linear`, but you can use
|
||
|
a custom function as well. You can also use a string to choose
|
||
|
one of the functions. 'default', 'constant', and 'mean' call
|
||
|
:func:`~matplotlib.mlab.detrend_mean`. 'linear' calls
|
||
|
:func:`~matplotlib.mlab.detrend_linear`. 'none' calls
|
||
|
:func:`~matplotlib.mlab.detrend_none`.
|
||
|
|
||
|
scale_by_freq : bool, optional
|
||
|
Specifies whether the resulting density values should be scaled
|
||
|
by the scaling frequency, which gives density in units of Hz^-1.
|
||
|
This allows for integration over the returned frequency values.
|
||
|
The default is True for MATLAB compatibility.
|
||
|
"""))
|
||
|
|
||
|
|
||
|
@docstring.dedent_interpd
|
||
|
def psd(x, NFFT=None, Fs=None, detrend=None, window=None,
|
||
|
noverlap=None, pad_to=None, sides=None, scale_by_freq=None):
|
||
|
r"""
|
||
|
Compute the power spectral density.
|
||
|
|
||
|
Call signature::
|
||
|
|
||
|
psd(x, NFFT=256, Fs=2, detrend=mlab.detrend_none,
|
||
|
window=mlab.window_hanning, noverlap=0, pad_to=None,
|
||
|
sides='default', scale_by_freq=None)
|
||
|
|
||
|
The power spectral density :math:`P_{xx}` by Welch's average
|
||
|
periodogram method. The vector *x* is divided into *NFFT* length
|
||
|
segments. Each segment is detrended by function *detrend* and
|
||
|
windowed by function *window*. *noverlap* gives the length of
|
||
|
the overlap between segments. The :math:`|\mathrm{fft}(i)|^2`
|
||
|
of each segment :math:`i` are averaged to compute :math:`P_{xx}`.
|
||
|
|
||
|
If len(*x*) < *NFFT*, it will be zero padded to *NFFT*.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : 1-D array or sequence
|
||
|
Array or sequence containing the data
|
||
|
|
||
|
%(Spectral)s
|
||
|
|
||
|
%(PSD)s
|
||
|
|
||
|
noverlap : integer
|
||
|
The number of points of overlap between segments.
|
||
|
The default value is 0 (no overlap).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Pxx : 1-D array
|
||
|
The values for the power spectrum `P_{xx}` (real valued)
|
||
|
|
||
|
freqs : 1-D array
|
||
|
The frequencies corresponding to the elements in *Pxx*
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
Bendat & Piersol -- Random Data: Analysis and Measurement Procedures, John
|
||
|
Wiley & Sons (1986)
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`specgram`
|
||
|
:func:`specgram` differs in the default overlap; in not returning the
|
||
|
mean of the segment periodograms; and in returning the times of the
|
||
|
segments.
|
||
|
|
||
|
:func:`magnitude_spectrum`
|
||
|
:func:`magnitude_spectrum` returns the magnitude spectrum.
|
||
|
|
||
|
:func:`csd`
|
||
|
:func:`csd` returns the spectral density between two signals.
|
||
|
"""
|
||
|
Pxx, freqs = csd(x=x, y=None, NFFT=NFFT, Fs=Fs, detrend=detrend,
|
||
|
window=window, noverlap=noverlap, pad_to=pad_to,
|
||
|
sides=sides, scale_by_freq=scale_by_freq)
|
||
|
return Pxx.real, freqs
|
||
|
|
||
|
|
||
|
@docstring.dedent_interpd
|
||
|
def csd(x, y, NFFT=None, Fs=None, detrend=None, window=None,
|
||
|
noverlap=None, pad_to=None, sides=None, scale_by_freq=None):
|
||
|
"""
|
||
|
Compute the cross-spectral density.
|
||
|
|
||
|
Call signature::
|
||
|
|
||
|
csd(x, y, NFFT=256, Fs=2, detrend=mlab.detrend_none,
|
||
|
window=mlab.window_hanning, noverlap=0, pad_to=None,
|
||
|
sides='default', scale_by_freq=None)
|
||
|
|
||
|
The cross spectral density :math:`P_{xy}` by Welch's average
|
||
|
periodogram method. The vectors *x* and *y* are divided into
|
||
|
*NFFT* length segments. Each segment is detrended by function
|
||
|
*detrend* and windowed by function *window*. *noverlap* gives
|
||
|
the length of the overlap between segments. The product of
|
||
|
the direct FFTs of *x* and *y* are averaged over each segment
|
||
|
to compute :math:`P_{xy}`, with a scaling to correct for power
|
||
|
loss due to windowing.
|
||
|
|
||
|
If len(*x*) < *NFFT* or len(*y*) < *NFFT*, they will be zero
|
||
|
padded to *NFFT*.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x, y : 1-D arrays or sequences
|
||
|
Arrays or sequences containing the data
|
||
|
|
||
|
%(Spectral)s
|
||
|
|
||
|
%(PSD)s
|
||
|
|
||
|
noverlap : integer
|
||
|
The number of points of overlap between segments.
|
||
|
The default value is 0 (no overlap).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Pxy : 1-D array
|
||
|
The values for the cross spectrum `P_{xy}` before scaling (real valued)
|
||
|
|
||
|
freqs : 1-D array
|
||
|
The frequencies corresponding to the elements in *Pxy*
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
Bendat & Piersol -- Random Data: Analysis and Measurement Procedures, John
|
||
|
Wiley & Sons (1986)
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`psd`
|
||
|
:func:`psd` is the equivalent to setting y=x.
|
||
|
"""
|
||
|
if NFFT is None:
|
||
|
NFFT = 256
|
||
|
Pxy, freqs, _ = _spectral_helper(x=x, y=y, NFFT=NFFT, Fs=Fs,
|
||
|
detrend_func=detrend, window=window,
|
||
|
noverlap=noverlap, pad_to=pad_to,
|
||
|
sides=sides, scale_by_freq=scale_by_freq,
|
||
|
mode='psd')
|
||
|
|
||
|
if Pxy.ndim == 2:
|
||
|
if Pxy.shape[1] > 1:
|
||
|
Pxy = Pxy.mean(axis=1)
|
||
|
else:
|
||
|
Pxy = Pxy[:, 0]
|
||
|
return Pxy, freqs
|
||
|
|
||
|
|
||
|
@docstring.dedent_interpd
|
||
|
def complex_spectrum(x, Fs=None, window=None, pad_to=None,
|
||
|
sides=None):
|
||
|
"""
|
||
|
Compute the complex-valued frequency spectrum of *x*. Data is padded to a
|
||
|
length of *pad_to* and the windowing function *window* is applied to the
|
||
|
signal.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : 1-D array or sequence
|
||
|
Array or sequence containing the data
|
||
|
|
||
|
%(Spectral)s
|
||
|
|
||
|
%(Single_Spectrum)s
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
spectrum : 1-D array
|
||
|
The values for the complex spectrum (complex valued)
|
||
|
|
||
|
freqs : 1-D array
|
||
|
The frequencies corresponding to the elements in *spectrum*
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`magnitude_spectrum`
|
||
|
:func:`magnitude_spectrum` returns the absolute value of this function.
|
||
|
|
||
|
:func:`angle_spectrum`
|
||
|
:func:`angle_spectrum` returns the angle of this function.
|
||
|
|
||
|
:func:`phase_spectrum`
|
||
|
:func:`phase_spectrum` returns the phase (unwrapped angle) of this
|
||
|
function.
|
||
|
|
||
|
:func:`specgram`
|
||
|
:func:`specgram` can return the complex spectrum of segments within the
|
||
|
signal.
|
||
|
"""
|
||
|
return _single_spectrum_helper(x=x, Fs=Fs, window=window, pad_to=pad_to,
|
||
|
sides=sides, mode='complex')
|
||
|
|
||
|
|
||
|
@docstring.dedent_interpd
|
||
|
def magnitude_spectrum(x, Fs=None, window=None, pad_to=None,
|
||
|
sides=None):
|
||
|
"""
|
||
|
Compute the magnitude (absolute value) of the frequency spectrum of
|
||
|
*x*. Data is padded to a length of *pad_to* and the windowing function
|
||
|
*window* is applied to the signal.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : 1-D array or sequence
|
||
|
Array or sequence containing the data
|
||
|
|
||
|
%(Spectral)s
|
||
|
|
||
|
%(Single_Spectrum)s
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
spectrum : 1-D array
|
||
|
The values for the magnitude spectrum (real valued)
|
||
|
|
||
|
freqs : 1-D array
|
||
|
The frequencies corresponding to the elements in *spectrum*
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`psd`
|
||
|
:func:`psd` returns the power spectral density.
|
||
|
|
||
|
:func:`complex_spectrum`
|
||
|
This function returns the absolute value of :func:`complex_spectrum`.
|
||
|
|
||
|
:func:`angle_spectrum`
|
||
|
:func:`angle_spectrum` returns the angles of the corresponding
|
||
|
frequencies.
|
||
|
|
||
|
:func:`phase_spectrum`
|
||
|
:func:`phase_spectrum` returns the phase (unwrapped angle) of the
|
||
|
corresponding frequencies.
|
||
|
|
||
|
:func:`specgram`
|
||
|
:func:`specgram` can return the magnitude spectrum of segments within
|
||
|
the signal.
|
||
|
"""
|
||
|
return _single_spectrum_helper(x=x, Fs=Fs, window=window, pad_to=pad_to,
|
||
|
sides=sides, mode='magnitude')
|
||
|
|
||
|
|
||
|
@docstring.dedent_interpd
|
||
|
def angle_spectrum(x, Fs=None, window=None, pad_to=None,
|
||
|
sides=None):
|
||
|
"""
|
||
|
Compute the angle of the frequency spectrum (wrapped phase spectrum) of
|
||
|
*x*. Data is padded to a length of *pad_to* and the windowing function
|
||
|
*window* is applied to the signal.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : 1-D array or sequence
|
||
|
Array or sequence containing the data
|
||
|
|
||
|
%(Spectral)s
|
||
|
|
||
|
%(Single_Spectrum)s
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
spectrum : 1-D array
|
||
|
The values for the angle spectrum in radians (real valued)
|
||
|
|
||
|
freqs : 1-D array
|
||
|
The frequencies corresponding to the elements in *spectrum*
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`complex_spectrum`
|
||
|
This function returns the angle value of :func:`complex_spectrum`.
|
||
|
|
||
|
:func:`magnitude_spectrum`
|
||
|
:func:`angle_spectrum` returns the magnitudes of the corresponding
|
||
|
frequencies.
|
||
|
|
||
|
:func:`phase_spectrum`
|
||
|
:func:`phase_spectrum` returns the unwrapped version of this function.
|
||
|
|
||
|
:func:`specgram`
|
||
|
:func:`specgram` can return the angle spectrum of segments within the
|
||
|
signal.
|
||
|
"""
|
||
|
return _single_spectrum_helper(x=x, Fs=Fs, window=window, pad_to=pad_to,
|
||
|
sides=sides, mode='angle')
|
||
|
|
||
|
|
||
|
@docstring.dedent_interpd
|
||
|
def phase_spectrum(x, Fs=None, window=None, pad_to=None,
|
||
|
sides=None):
|
||
|
"""
|
||
|
Compute the phase of the frequency spectrum (unwrapped angle spectrum) of
|
||
|
*x*. Data is padded to a length of *pad_to* and the windowing function
|
||
|
*window* is applied to the signal.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : 1-D array or sequence
|
||
|
Array or sequence containing the data
|
||
|
|
||
|
%(Spectral)s
|
||
|
|
||
|
%(Single_Spectrum)s
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
spectrum : 1-D array
|
||
|
The values for the phase spectrum in radians (real valued)
|
||
|
|
||
|
freqs : 1-D array
|
||
|
The frequencies corresponding to the elements in *spectrum*
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`complex_spectrum`
|
||
|
This function returns the angle value of :func:`complex_spectrum`.
|
||
|
|
||
|
:func:`magnitude_spectrum`
|
||
|
:func:`magnitude_spectrum` returns the magnitudes of the corresponding
|
||
|
frequencies.
|
||
|
|
||
|
:func:`angle_spectrum`
|
||
|
:func:`angle_spectrum` returns the wrapped version of this function.
|
||
|
|
||
|
:func:`specgram`
|
||
|
:func:`specgram` can return the phase spectrum of segments within the
|
||
|
signal.
|
||
|
"""
|
||
|
return _single_spectrum_helper(x=x, Fs=Fs, window=window, pad_to=pad_to,
|
||
|
sides=sides, mode='phase')
|
||
|
|
||
|
|
||
|
@docstring.dedent_interpd
|
||
|
def specgram(x, NFFT=None, Fs=None, detrend=None, window=None,
|
||
|
noverlap=None, pad_to=None, sides=None, scale_by_freq=None,
|
||
|
mode=None):
|
||
|
"""
|
||
|
Compute a spectrogram.
|
||
|
|
||
|
Compute and plot a spectrogram of data in x. Data are split into
|
||
|
NFFT length segments and the spectrum of each section is
|
||
|
computed. The windowing function window is applied to each
|
||
|
segment, and the amount of overlap of each segment is
|
||
|
specified with noverlap.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x : array_like
|
||
|
1-D array or sequence.
|
||
|
|
||
|
%(Spectral)s
|
||
|
|
||
|
%(PSD)s
|
||
|
|
||
|
noverlap : int, optional
|
||
|
The number of points of overlap between blocks. The default
|
||
|
value is 128.
|
||
|
mode : str, optional
|
||
|
What sort of spectrum to use, default is 'psd'.
|
||
|
'psd'
|
||
|
Returns the power spectral density.
|
||
|
|
||
|
'complex'
|
||
|
Returns the complex-valued frequency spectrum.
|
||
|
|
||
|
'magnitude'
|
||
|
Returns the magnitude spectrum.
|
||
|
|
||
|
'angle'
|
||
|
Returns the phase spectrum without unwrapping.
|
||
|
|
||
|
'phase'
|
||
|
Returns the phase spectrum with unwrapping.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
spectrum : array_like
|
||
|
2-D array, columns are the periodograms of successive segments.
|
||
|
|
||
|
freqs : array_like
|
||
|
1-D array, frequencies corresponding to the rows in *spectrum*.
|
||
|
|
||
|
t : array_like
|
||
|
1-D array, the times corresponding to midpoints of segments
|
||
|
(i.e the columns in *spectrum*).
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
psd : differs in the overlap and in the return values.
|
||
|
complex_spectrum : similar, but with complex valued frequencies.
|
||
|
magnitude_spectrum : similar single segment when mode is 'magnitude'.
|
||
|
angle_spectrum : similar to single segment when mode is 'angle'.
|
||
|
phase_spectrum : similar to single segment when mode is 'phase'.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
detrend and scale_by_freq only apply when *mode* is set to 'psd'.
|
||
|
|
||
|
"""
|
||
|
if noverlap is None:
|
||
|
noverlap = 128 # default in _spectral_helper() is noverlap = 0
|
||
|
if NFFT is None:
|
||
|
NFFT = 256 # same default as in _spectral_helper()
|
||
|
if len(x) <= NFFT:
|
||
|
warnings.warn("Only one segment is calculated since parameter NFFT " +
|
||
|
"(=%d) >= signal length (=%d)." % (NFFT, len(x)))
|
||
|
|
||
|
spec, freqs, t = _spectral_helper(x=x, y=None, NFFT=NFFT, Fs=Fs,
|
||
|
detrend_func=detrend, window=window,
|
||
|
noverlap=noverlap, pad_to=pad_to,
|
||
|
sides=sides,
|
||
|
scale_by_freq=scale_by_freq,
|
||
|
mode=mode)
|
||
|
|
||
|
if mode != 'complex':
|
||
|
spec = spec.real # Needed since helper implements generically
|
||
|
|
||
|
return spec, freqs, t
|
||
|
|
||
|
|
||
|
_coh_error = """Coherence is calculated by averaging over *NFFT*
|
||
|
length segments. Your signal is too short for your choice of *NFFT*.
|
||
|
"""
|
||
|
|
||
|
|
||
|
@docstring.dedent_interpd
|
||
|
def cohere(x, y, NFFT=256, Fs=2, detrend=detrend_none, window=window_hanning,
|
||
|
noverlap=0, pad_to=None, sides='default', scale_by_freq=None):
|
||
|
"""
|
||
|
The coherence between *x* and *y*. Coherence is the normalized
|
||
|
cross spectral density:
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
C_{xy} = \\frac{|P_{xy}|^2}{P_{xx}P_{yy}}
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x, y
|
||
|
Array or sequence containing the data
|
||
|
|
||
|
%(Spectral)s
|
||
|
|
||
|
%(PSD)s
|
||
|
|
||
|
noverlap : integer
|
||
|
The number of points of overlap between blocks. The default value
|
||
|
is 0 (no overlap).
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
The return value is the tuple (*Cxy*, *f*), where *f* are the
|
||
|
frequencies of the coherence vector. For cohere, scaling the
|
||
|
individual densities by the sampling frequency has no effect,
|
||
|
since the factors cancel out.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`psd`, :func:`csd` :
|
||
|
For information about the methods used to compute :math:`P_{xy}`,
|
||
|
:math:`P_{xx}` and :math:`P_{yy}`.
|
||
|
"""
|
||
|
|
||
|
if len(x) < 2 * NFFT:
|
||
|
raise ValueError(_coh_error)
|
||
|
Pxx, f = psd(x, NFFT, Fs, detrend, window, noverlap, pad_to, sides,
|
||
|
scale_by_freq)
|
||
|
Pyy, f = psd(y, NFFT, Fs, detrend, window, noverlap, pad_to, sides,
|
||
|
scale_by_freq)
|
||
|
Pxy, f = csd(x, y, NFFT, Fs, detrend, window, noverlap, pad_to, sides,
|
||
|
scale_by_freq)
|
||
|
Cxy = np.abs(Pxy) ** 2 / (Pxx * Pyy)
|
||
|
return Cxy, f
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def donothing_callback(*args):
|
||
|
pass
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2', 'scipy.signal.coherence')
|
||
|
def cohere_pairs(X, ij, NFFT=256, Fs=2, detrend=detrend_none,
|
||
|
window=window_hanning, noverlap=0,
|
||
|
preferSpeedOverMemory=True,
|
||
|
progressCallback=donothing_callback,
|
||
|
returnPxx=False):
|
||
|
|
||
|
"""
|
||
|
Compute the coherence and phase for all pairs *ij*, in *X*.
|
||
|
|
||
|
*X* is a *numSamples* * *numCols* array
|
||
|
|
||
|
*ij* is a list of tuples. Each tuple is a pair of indexes into
|
||
|
the columns of X for which you want to compute coherence. For
|
||
|
example, if *X* has 64 columns, and you want to compute all
|
||
|
nonredundant pairs, define *ij* as::
|
||
|
|
||
|
ij = []
|
||
|
for i in range(64):
|
||
|
for j in range(i+1,64):
|
||
|
ij.append( (i,j) )
|
||
|
|
||
|
*preferSpeedOverMemory* is an optional bool. Defaults to true. If
|
||
|
False, limits the caching by only making one, rather than two,
|
||
|
complex cache arrays. This is useful if memory becomes critical.
|
||
|
Even when *preferSpeedOverMemory* is False, :func:`cohere_pairs`
|
||
|
will still give significant performance gains over calling
|
||
|
:func:`cohere` for each pair, and will use subtantially less
|
||
|
memory than if *preferSpeedOverMemory* is True. In my tests with
|
||
|
a 43000,64 array over all nonredundant pairs,
|
||
|
*preferSpeedOverMemory* = True delivered a 33% performance boost
|
||
|
on a 1.7GHZ Athlon with 512MB RAM compared with
|
||
|
*preferSpeedOverMemory* = False. But both solutions were more
|
||
|
than 10x faster than naively crunching all possible pairs through
|
||
|
:func:`cohere`.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Cxy : dictionary of (*i*, *j*) tuples -> coherence vector for
|
||
|
that pair. i.e., ``Cxy[(i,j) = cohere(X[:,i], X[:,j])``.
|
||
|
Number of dictionary keys is ``len(ij)``.
|
||
|
|
||
|
Phase : dictionary of phases of the cross spectral density at
|
||
|
each frequency for each pair. Keys are (*i*, *j*).
|
||
|
|
||
|
freqs : vector of frequencies, equal in length to either the
|
||
|
coherence or phase vectors for any (*i*, *j*) key.
|
||
|
|
||
|
e.g., to make a coherence Bode plot::
|
||
|
|
||
|
subplot(211)
|
||
|
plot( freqs, Cxy[(12,19)])
|
||
|
subplot(212)
|
||
|
plot( freqs, Phase[(12,19)])
|
||
|
|
||
|
For a large number of pairs, :func:`cohere_pairs` can be much more
|
||
|
efficient than just calling :func:`cohere` for each pair, because
|
||
|
it caches most of the intensive computations. If :math:`N` is the
|
||
|
number of pairs, this function is :math:`O(N)` for most of the
|
||
|
heavy lifting, whereas calling cohere for each pair is
|
||
|
:math:`O(N^2)`. However, because of the caching, it is also more
|
||
|
memory intensive, making 2 additional complex arrays with
|
||
|
approximately the same number of elements as *X*.
|
||
|
|
||
|
See :file:`test/cohere_pairs_test.py` in the src tree for an
|
||
|
example script that shows that this :func:`cohere_pairs` and
|
||
|
:func:`cohere` give the same results for a given pair.
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`psd`
|
||
|
For information about the methods used to compute :math:`P_{xy}`,
|
||
|
:math:`P_{xx}` and :math:`P_{yy}`.
|
||
|
"""
|
||
|
numRows, numCols = X.shape
|
||
|
|
||
|
# zero pad if X is too short
|
||
|
if numRows < NFFT:
|
||
|
tmp = X
|
||
|
X = np.zeros((NFFT, numCols), X.dtype)
|
||
|
X[:numRows, :] = tmp
|
||
|
del tmp
|
||
|
|
||
|
numRows, numCols = X.shape
|
||
|
# get all the columns of X that we are interested in by checking
|
||
|
# the ij tuples
|
||
|
allColumns = set()
|
||
|
for i, j in ij:
|
||
|
allColumns.add(i)
|
||
|
allColumns.add(j)
|
||
|
Ncols = len(allColumns)
|
||
|
|
||
|
# for real X, ignore the negative frequencies
|
||
|
if np.iscomplexobj(X):
|
||
|
numFreqs = NFFT
|
||
|
else:
|
||
|
numFreqs = NFFT//2+1
|
||
|
|
||
|
# cache the FFT of every windowed, detrended NFFT length segment
|
||
|
# of every channel. If preferSpeedOverMemory, cache the conjugate
|
||
|
# as well
|
||
|
if cbook.iterable(window):
|
||
|
if len(window) != NFFT:
|
||
|
raise ValueError("The length of the window must be equal to NFFT")
|
||
|
windowVals = window
|
||
|
else:
|
||
|
windowVals = window(np.ones(NFFT, X.dtype))
|
||
|
ind = list(range(0, numRows-NFFT+1, NFFT-noverlap))
|
||
|
numSlices = len(ind)
|
||
|
FFTSlices = {}
|
||
|
FFTConjSlices = {}
|
||
|
Pxx = {}
|
||
|
slices = range(numSlices)
|
||
|
normVal = np.linalg.norm(windowVals)**2
|
||
|
for iCol in allColumns:
|
||
|
progressCallback(i/Ncols, 'Cacheing FFTs')
|
||
|
Slices = np.zeros((numSlices, numFreqs), dtype=np.complex_)
|
||
|
for iSlice in slices:
|
||
|
thisSlice = X[ind[iSlice]:ind[iSlice]+NFFT, iCol]
|
||
|
thisSlice = windowVals*detrend(thisSlice)
|
||
|
Slices[iSlice, :] = np.fft.fft(thisSlice)[:numFreqs]
|
||
|
|
||
|
FFTSlices[iCol] = Slices
|
||
|
if preferSpeedOverMemory:
|
||
|
FFTConjSlices[iCol] = np.conj(Slices)
|
||
|
Pxx[iCol] = np.divide(np.mean(abs(Slices)**2, axis=0), normVal)
|
||
|
del Slices, ind, windowVals
|
||
|
|
||
|
# compute the coherences and phases for all pairs using the
|
||
|
# cached FFTs
|
||
|
Cxy = {}
|
||
|
Phase = {}
|
||
|
count = 0
|
||
|
N = len(ij)
|
||
|
for i, j in ij:
|
||
|
count += 1
|
||
|
if count % 10 == 0:
|
||
|
progressCallback(count/N, 'Computing coherences')
|
||
|
|
||
|
if preferSpeedOverMemory:
|
||
|
Pxy = FFTSlices[i] * FFTConjSlices[j]
|
||
|
else:
|
||
|
Pxy = FFTSlices[i] * np.conj(FFTSlices[j])
|
||
|
if numSlices > 1:
|
||
|
Pxy = np.mean(Pxy, axis=0)
|
||
|
# Pxy = np.divide(Pxy, normVal)
|
||
|
Pxy /= normVal
|
||
|
# Cxy[(i,j)] = np.divide(np.absolute(Pxy)**2, Pxx[i]*Pxx[j])
|
||
|
Cxy[i, j] = abs(Pxy)**2 / (Pxx[i]*Pxx[j])
|
||
|
Phase[i, j] = np.arctan2(Pxy.imag, Pxy.real)
|
||
|
|
||
|
freqs = Fs/NFFT*np.arange(numFreqs)
|
||
|
if returnPxx:
|
||
|
return Cxy, Phase, freqs, Pxx
|
||
|
else:
|
||
|
return Cxy, Phase, freqs
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2', 'scipy.stats.entropy')
|
||
|
def entropy(y, bins):
|
||
|
r"""
|
||
|
Return the entropy of the data in *y* in units of nat.
|
||
|
|
||
|
.. math::
|
||
|
|
||
|
-\sum p_i \ln(p_i)
|
||
|
|
||
|
where :math:`p_i` is the probability of observing *y* in the
|
||
|
:math:`i^{th}` bin of *bins*. *bins* can be a number of bins or a
|
||
|
range of bins; see :func:`numpy.histogram`.
|
||
|
|
||
|
Compare *S* with analytic calculation for a Gaussian::
|
||
|
|
||
|
x = mu + sigma * randn(200000)
|
||
|
Sanalytic = 0.5 * ( 1.0 + log(2*pi*sigma**2.0) )
|
||
|
"""
|
||
|
n, bins = np.histogram(y, bins)
|
||
|
n = n.astype(float)
|
||
|
|
||
|
n = np.take(n, np.nonzero(n)[0]) # get the positive
|
||
|
|
||
|
p = np.divide(n, len(y))
|
||
|
|
||
|
delta = bins[1] - bins[0]
|
||
|
S = -1.0 * np.sum(p * np.log(p)) + np.log(delta)
|
||
|
return S
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2', 'scipy.stats.norm.pdf')
|
||
|
def normpdf(x, *args):
|
||
|
"Return the normal pdf evaluated at *x*; args provides *mu*, *sigma*"
|
||
|
mu, sigma = args
|
||
|
return 1./(np.sqrt(2*np.pi)*sigma)*np.exp(-0.5 * (1./sigma*(x - mu))**2)
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def find(condition):
|
||
|
"Return the indices where ravel(condition) is true"
|
||
|
res, = np.nonzero(np.ravel(condition))
|
||
|
return res
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def longest_contiguous_ones(x):
|
||
|
"""
|
||
|
Return the indices of the longest stretch of contiguous ones in *x*,
|
||
|
assuming *x* is a vector of zeros and ones. If there are two
|
||
|
equally long stretches, pick the first.
|
||
|
"""
|
||
|
x = np.ravel(x)
|
||
|
if len(x) == 0:
|
||
|
return np.array([])
|
||
|
|
||
|
ind = (x == 0).nonzero()[0]
|
||
|
if len(ind) == 0:
|
||
|
return np.arange(len(x))
|
||
|
if len(ind) == len(x):
|
||
|
return np.array([])
|
||
|
|
||
|
y = np.zeros((len(x)+2,), x.dtype)
|
||
|
y[1:-1] = x
|
||
|
dif = np.diff(y)
|
||
|
up = (dif == 1).nonzero()[0]
|
||
|
dn = (dif == -1).nonzero()[0]
|
||
|
i = (dn-up == max(dn - up)).nonzero()[0][0]
|
||
|
ind = np.arange(up[i], dn[i])
|
||
|
|
||
|
return ind
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def longest_ones(x):
|
||
|
'''alias for longest_contiguous_ones'''
|
||
|
return longest_contiguous_ones(x)
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
class PCA(object):
|
||
|
def __init__(self, a, standardize=True):
|
||
|
"""
|
||
|
compute the SVD of a and store data for PCA. Use project to
|
||
|
project the data onto a reduced set of dimensions
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
a : np.ndarray
|
||
|
A numobservations x numdims array
|
||
|
standardize : bool
|
||
|
True if input data are to be standardized. If False, only centering
|
||
|
will be carried out.
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
a
|
||
|
A centered unit sigma version of input ``a``.
|
||
|
|
||
|
numrows, numcols
|
||
|
The dimensions of ``a``.
|
||
|
|
||
|
mu
|
||
|
A numdims array of means of ``a``. This is the vector that points
|
||
|
to the origin of PCA space.
|
||
|
|
||
|
sigma
|
||
|
A numdims array of standard deviation of ``a``.
|
||
|
|
||
|
fracs
|
||
|
The proportion of variance of each of the principal components.
|
||
|
|
||
|
s
|
||
|
The actual eigenvalues of the decomposition.
|
||
|
|
||
|
Wt
|
||
|
The weight vector for projecting a numdims point or array into
|
||
|
PCA space.
|
||
|
|
||
|
Y
|
||
|
A projected into PCA space.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
The factor loadings are in the ``Wt`` factor, i.e., the factor loadings
|
||
|
for the first principal component are given by ``Wt[0]``. This row is
|
||
|
also the first eigenvector.
|
||
|
|
||
|
"""
|
||
|
n, m = a.shape
|
||
|
if n < m:
|
||
|
raise RuntimeError('we assume data in a is organized with '
|
||
|
'numrows>numcols')
|
||
|
|
||
|
self.numrows, self.numcols = n, m
|
||
|
self.mu = a.mean(axis=0)
|
||
|
self.sigma = a.std(axis=0)
|
||
|
self.standardize = standardize
|
||
|
|
||
|
a = self.center(a)
|
||
|
|
||
|
self.a = a
|
||
|
|
||
|
U, s, Vh = np.linalg.svd(a, full_matrices=False)
|
||
|
|
||
|
# Note: .H indicates the conjugate transposed / Hermitian.
|
||
|
|
||
|
# The SVD is commonly written as a = U s V.H.
|
||
|
# If U is a unitary matrix, it means that it satisfies U.H = inv(U).
|
||
|
|
||
|
# The rows of Vh are the eigenvectors of a.H a.
|
||
|
# The columns of U are the eigenvectors of a a.H.
|
||
|
# For row i in Vh and column i in U, the corresponding eigenvalue is
|
||
|
# s[i]**2.
|
||
|
|
||
|
self.Wt = Vh
|
||
|
|
||
|
# save the transposed coordinates
|
||
|
Y = np.dot(Vh, a.T).T
|
||
|
self.Y = Y
|
||
|
|
||
|
# save the eigenvalues
|
||
|
self.s = s**2
|
||
|
|
||
|
# and now the contribution of the individual components
|
||
|
vars = self.s / len(s)
|
||
|
self.fracs = vars/vars.sum()
|
||
|
|
||
|
def project(self, x, minfrac=0.):
|
||
|
'''
|
||
|
project x onto the principle axes, dropping any axes where fraction
|
||
|
of variance<minfrac
|
||
|
'''
|
||
|
x = np.asarray(x)
|
||
|
if x.shape[-1] != self.numcols:
|
||
|
raise ValueError('Expected an array with dims[-1]==%d' %
|
||
|
self.numcols)
|
||
|
Y = np.dot(self.Wt, self.center(x).T).T
|
||
|
mask = self.fracs >= minfrac
|
||
|
if x.ndim == 2:
|
||
|
Yreduced = Y[:, mask]
|
||
|
else:
|
||
|
Yreduced = Y[mask]
|
||
|
return Yreduced
|
||
|
|
||
|
def center(self, x):
|
||
|
'''
|
||
|
center and optionally standardize the data using the mean and sigma
|
||
|
from training set a
|
||
|
'''
|
||
|
if self.standardize:
|
||
|
return (x - self.mu)/self.sigma
|
||
|
else:
|
||
|
return (x - self.mu)
|
||
|
|
||
|
@staticmethod
|
||
|
def _get_colinear():
|
||
|
c0 = np.array([
|
||
|
0.19294738, 0.6202667, 0.45962655, 0.07608613, 0.135818,
|
||
|
0.83580842, 0.07218851, 0.48318321, 0.84472463, 0.18348462,
|
||
|
0.81585306, 0.96923926, 0.12835919, 0.35075355, 0.15807861,
|
||
|
0.837437, 0.10824303, 0.1723387, 0.43926494, 0.83705486])
|
||
|
|
||
|
c1 = np.array([
|
||
|
-1.17705601, -0.513883, -0.26614584, 0.88067144, 1.00474954,
|
||
|
-1.1616545, 0.0266109, 0.38227157, 1.80489433, 0.21472396,
|
||
|
-1.41920399, -2.08158544, -0.10559009, 1.68999268, 0.34847107,
|
||
|
-0.4685737, 1.23980423, -0.14638744, -0.35907697, 0.22442616])
|
||
|
|
||
|
c2 = c0 + 2*c1
|
||
|
c3 = -3*c0 + 4*c1
|
||
|
a = np.array([c3, c0, c1, c2]).T
|
||
|
return a
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2', 'numpy.percentile')
|
||
|
def prctile(x, p=(0.0, 25.0, 50.0, 75.0, 100.0)):
|
||
|
"""
|
||
|
Return the percentiles of *x*. *p* can either be a sequence of
|
||
|
percentile values or a scalar. If *p* is a sequence, the ith
|
||
|
element of the return sequence is the *p*(i)-th percentile of *x*.
|
||
|
If *p* is a scalar, the largest value of *x* less than or equal to
|
||
|
the *p* percentage point in the sequence is returned.
|
||
|
"""
|
||
|
|
||
|
# This implementation derived from scipy.stats.scoreatpercentile
|
||
|
def _interpolate(a, b, fraction):
|
||
|
"""Returns the point at the given fraction between a and b, where
|
||
|
'fraction' must be between 0 and 1.
|
||
|
"""
|
||
|
return a + (b - a) * fraction
|
||
|
|
||
|
per = np.array(p)
|
||
|
values = np.sort(x, axis=None)
|
||
|
|
||
|
idxs = per / 100 * (values.shape[0] - 1)
|
||
|
ai = idxs.astype(int)
|
||
|
bi = ai + 1
|
||
|
frac = idxs % 1
|
||
|
|
||
|
# handle cases where attempting to interpolate past last index
|
||
|
cond = bi >= len(values)
|
||
|
if per.ndim:
|
||
|
ai[cond] -= 1
|
||
|
bi[cond] -= 1
|
||
|
frac[cond] += 1
|
||
|
else:
|
||
|
if cond:
|
||
|
ai -= 1
|
||
|
bi -= 1
|
||
|
frac += 1
|
||
|
|
||
|
return _interpolate(values[ai], values[bi], frac)
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def prctile_rank(x, p):
|
||
|
"""
|
||
|
Return the rank for each element in *x*, return the rank
|
||
|
0..len(*p*). e.g., if *p* = (25, 50, 75), the return value will be a
|
||
|
len(*x*) array with values in [0,1,2,3] where 0 indicates the
|
||
|
value is less than the 25th percentile, 1 indicates the value is
|
||
|
>= the 25th and < 50th percentile, ... and 3 indicates the value
|
||
|
is above the 75th percentile cutoff.
|
||
|
|
||
|
*p* is either an array of percentiles in [0..100] or a scalar which
|
||
|
indicates how many quantiles of data you want ranked.
|
||
|
"""
|
||
|
|
||
|
if not cbook.iterable(p):
|
||
|
p = np.arange(100.0/p, 100.0, 100.0/p)
|
||
|
else:
|
||
|
p = np.asarray(p)
|
||
|
|
||
|
if p.max() <= 1 or p.min() < 0 or p.max() > 100:
|
||
|
raise ValueError('percentiles should be in range 0..100, not 0..1')
|
||
|
|
||
|
ptiles = prctile(x, p)
|
||
|
return np.searchsorted(ptiles, x)
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def center_matrix(M, dim=0):
|
||
|
"""
|
||
|
Return the matrix *M* with each row having zero mean and unit std.
|
||
|
|
||
|
If *dim* = 1 operate on columns instead of rows. (*dim* is
|
||
|
opposite to the numpy axis kwarg.)
|
||
|
"""
|
||
|
M = np.asarray(M, float)
|
||
|
if dim:
|
||
|
M = (M - M.mean(axis=0)) / M.std(axis=0)
|
||
|
else:
|
||
|
M = (M - M.mean(axis=1)[:, np.newaxis])
|
||
|
M = M / M.std(axis=1)[:, np.newaxis]
|
||
|
return M
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2', 'scipy.integrate.ode')
|
||
|
def rk4(derivs, y0, t):
|
||
|
"""
|
||
|
Integrate 1D or ND system of ODEs using 4-th order Runge-Kutta.
|
||
|
This is a toy implementation which may be useful if you find
|
||
|
yourself stranded on a system w/o scipy. Otherwise use
|
||
|
:func:`scipy.integrate`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
y0
|
||
|
initial state vector
|
||
|
|
||
|
t
|
||
|
sample times
|
||
|
|
||
|
derivs
|
||
|
returns the derivative of the system and has the
|
||
|
signature ``dy = derivs(yi, ti)``
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
|
||
|
A 2D system::
|
||
|
|
||
|
def derivs6(x,t):
|
||
|
d1 = x[0] + 2*x[1]
|
||
|
d2 = -3*x[0] + 4*x[1]
|
||
|
return (d1, d2)
|
||
|
dt = 0.0005
|
||
|
t = arange(0.0, 2.0, dt)
|
||
|
y0 = (1,2)
|
||
|
yout = rk4(derivs6, y0, t)
|
||
|
|
||
|
A 1D system::
|
||
|
|
||
|
alpha = 2
|
||
|
def derivs(x,t):
|
||
|
return -alpha*x + exp(-t)
|
||
|
|
||
|
y0 = 1
|
||
|
yout = rk4(derivs, y0, t)
|
||
|
|
||
|
If you have access to scipy, you should probably be using the
|
||
|
scipy.integrate tools rather than this function.
|
||
|
"""
|
||
|
|
||
|
try:
|
||
|
Ny = len(y0)
|
||
|
except TypeError:
|
||
|
yout = np.zeros((len(t),), float)
|
||
|
else:
|
||
|
yout = np.zeros((len(t), Ny), float)
|
||
|
|
||
|
yout[0] = y0
|
||
|
i = 0
|
||
|
|
||
|
for i in np.arange(len(t)-1):
|
||
|
|
||
|
thist = t[i]
|
||
|
dt = t[i+1] - thist
|
||
|
dt2 = dt/2.0
|
||
|
y0 = yout[i]
|
||
|
|
||
|
k1 = np.asarray(derivs(y0, thist))
|
||
|
k2 = np.asarray(derivs(y0 + dt2*k1, thist+dt2))
|
||
|
k3 = np.asarray(derivs(y0 + dt2*k2, thist+dt2))
|
||
|
k4 = np.asarray(derivs(y0 + dt*k3, thist+dt))
|
||
|
yout[i+1] = y0 + dt/6.0*(k1 + 2*k2 + 2*k3 + k4)
|
||
|
return yout
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def bivariate_normal(X, Y, sigmax=1.0, sigmay=1.0,
|
||
|
mux=0.0, muy=0.0, sigmaxy=0.0):
|
||
|
"""
|
||
|
Bivariate Gaussian distribution for equal shape *X*, *Y*.
|
||
|
|
||
|
See `bivariate normal
|
||
|
<http://mathworld.wolfram.com/BivariateNormalDistribution.html>`_
|
||
|
at mathworld.
|
||
|
"""
|
||
|
Xmu = X-mux
|
||
|
Ymu = Y-muy
|
||
|
|
||
|
rho = sigmaxy/(sigmax*sigmay)
|
||
|
z = Xmu**2/sigmax**2 + Ymu**2/sigmay**2 - 2*rho*Xmu*Ymu/(sigmax*sigmay)
|
||
|
denom = 2*np.pi*sigmax*sigmay*np.sqrt(1-rho**2)
|
||
|
return np.exp(-z/(2*(1-rho**2))) / denom
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def get_xyz_where(Z, Cond):
|
||
|
"""
|
||
|
*Z* and *Cond* are *M* x *N* matrices. *Z* are data and *Cond* is
|
||
|
a boolean matrix where some condition is satisfied. Return value
|
||
|
is (*x*, *y*, *z*) where *x* and *y* are the indices into *Z* and
|
||
|
*z* are the values of *Z* at those indices. *x*, *y*, and *z* are
|
||
|
1D arrays.
|
||
|
"""
|
||
|
X, Y = np.indices(Z.shape)
|
||
|
return X[Cond], Y[Cond], Z[Cond]
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def get_sparse_matrix(M, N, frac=0.1):
|
||
|
"""
|
||
|
Return a *M* x *N* sparse matrix with *frac* elements randomly
|
||
|
filled.
|
||
|
"""
|
||
|
data = np.zeros((M, N))*0.
|
||
|
for i in range(int(M*N*frac)):
|
||
|
x = np.random.randint(0, M-1)
|
||
|
y = np.random.randint(0, N-1)
|
||
|
data[x, y] = np.random.rand()
|
||
|
return data
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2', 'numpy.hypot')
|
||
|
def dist(x, y):
|
||
|
"""
|
||
|
Return the distance between two points.
|
||
|
"""
|
||
|
d = x-y
|
||
|
return np.sqrt(np.dot(d, d))
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def dist_point_to_segment(p, s0, s1):
|
||
|
"""
|
||
|
Get the distance of a point to a segment.
|
||
|
|
||
|
*p*, *s0*, *s1* are *xy* sequences
|
||
|
|
||
|
This algorithm from
|
||
|
http://geomalgorithms.com/a02-_lines.html
|
||
|
"""
|
||
|
p = np.asarray(p, float)
|
||
|
s0 = np.asarray(s0, float)
|
||
|
s1 = np.asarray(s1, float)
|
||
|
v = s1 - s0
|
||
|
w = p - s0
|
||
|
|
||
|
c1 = np.dot(w, v)
|
||
|
if c1 <= 0:
|
||
|
return dist(p, s0)
|
||
|
|
||
|
c2 = np.dot(v, v)
|
||
|
if c2 <= c1:
|
||
|
return dist(p, s1)
|
||
|
|
||
|
b = c1 / c2
|
||
|
pb = s0 + b * v
|
||
|
return dist(p, pb)
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def segments_intersect(s1, s2):
|
||
|
"""
|
||
|
Return *True* if *s1* and *s2* intersect.
|
||
|
*s1* and *s2* are defined as::
|
||
|
|
||
|
s1: (x1, y1), (x2, y2)
|
||
|
s2: (x3, y3), (x4, y4)
|
||
|
"""
|
||
|
(x1, y1), (x2, y2) = s1
|
||
|
(x3, y3), (x4, y4) = s2
|
||
|
|
||
|
den = ((y4-y3) * (x2-x1)) - ((x4-x3)*(y2-y1))
|
||
|
|
||
|
n1 = ((x4-x3) * (y1-y3)) - ((y4-y3)*(x1-x3))
|
||
|
n2 = ((x2-x1) * (y1-y3)) - ((y2-y1)*(x1-x3))
|
||
|
|
||
|
if den == 0:
|
||
|
# lines parallel
|
||
|
return False
|
||
|
|
||
|
u1 = n1/den
|
||
|
u2 = n2/den
|
||
|
|
||
|
return 0.0 <= u1 <= 1.0 and 0.0 <= u2 <= 1.0
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def fftsurr(x, detrend=detrend_none, window=window_none):
|
||
|
"""
|
||
|
Compute an FFT phase randomized surrogate of *x*.
|
||
|
"""
|
||
|
if cbook.iterable(window):
|
||
|
x = window*detrend(x)
|
||
|
else:
|
||
|
x = window(detrend(x))
|
||
|
z = np.fft.fft(x)
|
||
|
a = 2.*np.pi*1j
|
||
|
phase = a * np.random.rand(len(x))
|
||
|
z = z*np.exp(phase)
|
||
|
return np.fft.ifft(z).real
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def movavg(x, n):
|
||
|
"""
|
||
|
Compute the len(*n*) moving average of *x*.
|
||
|
"""
|
||
|
w = np.empty((n,), dtype=float)
|
||
|
w[:] = 1.0/n
|
||
|
return np.convolve(x, w, mode='valid')
|
||
|
|
||
|
|
||
|
# the following code was written and submitted by Fernando Perez
|
||
|
# from the ipython numutils package under a BSD license
|
||
|
# begin fperez functions
|
||
|
|
||
|
"""
|
||
|
A set of convenient utilities for numerical work.
|
||
|
|
||
|
Most of this module requires numpy or is meant to be used with it.
|
||
|
|
||
|
Copyright (c) 2001-2004, Fernando Perez. <Fernando.Perez@colorado.edu>
|
||
|
All rights reserved.
|
||
|
|
||
|
This license was generated from the BSD license template as found in:
|
||
|
http://www.opensource.org/licenses/bsd-license.php
|
||
|
|
||
|
Redistribution and use in source and binary forms, with or without
|
||
|
modification, are permitted provided that the following conditions are met:
|
||
|
|
||
|
* Redistributions of source code must retain the above copyright notice,
|
||
|
this list of conditions and the following disclaimer.
|
||
|
|
||
|
* Redistributions in binary form must reproduce the above copyright
|
||
|
notice, this list of conditions and the following disclaimer in the
|
||
|
documentation and/or other materials provided with the distribution.
|
||
|
|
||
|
* Neither the name of the IPython project nor the names of its
|
||
|
contributors may be used to endorse or promote products derived from
|
||
|
this software without specific prior written permission.
|
||
|
|
||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
||
|
"""
|
||
|
|
||
|
|
||
|
# *****************************************************************************
|
||
|
# Globals
|
||
|
# ****************************************************************************
|
||
|
# function definitions
|
||
|
exp_safe_MIN = math.log(2.2250738585072014e-308)
|
||
|
exp_safe_MAX = 1.7976931348623157e+308
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", 'numpy.exp')
|
||
|
def exp_safe(x):
|
||
|
"""
|
||
|
Compute exponentials which safely underflow to zero.
|
||
|
|
||
|
Slow, but convenient to use. Note that numpy provides proper
|
||
|
floating point exception handling with access to the underlying
|
||
|
hardware.
|
||
|
"""
|
||
|
|
||
|
if type(x) is np.ndarray:
|
||
|
return np.exp(np.clip(x, exp_safe_MIN, exp_safe_MAX))
|
||
|
else:
|
||
|
return math.exp(x)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", alternative='numpy.array(list(map(...)))')
|
||
|
def amap(fn, *args):
|
||
|
"""
|
||
|
amap(function, sequence[, sequence, ...]) -> array.
|
||
|
|
||
|
Works like :func:`map`, but it returns an array. This is just a
|
||
|
convenient shorthand for ``numpy.array(map(...))``.
|
||
|
"""
|
||
|
return np.array(list(map(fn, *args)))
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def rms_flat(a):
|
||
|
"""
|
||
|
Return the root mean square of all the elements of *a*, flattened out.
|
||
|
"""
|
||
|
return np.sqrt(np.mean(np.abs(a) ** 2))
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", alternative='numpy.linalg.norm(a, ord=1)')
|
||
|
def l1norm(a):
|
||
|
"""
|
||
|
Return the *l1* norm of *a*, flattened out.
|
||
|
|
||
|
Implemented as a separate function (not a call to :func:`norm` for speed).
|
||
|
"""
|
||
|
return np.sum(np.abs(a))
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", alternative='numpy.linalg.norm(a, ord=2)')
|
||
|
def l2norm(a):
|
||
|
"""
|
||
|
Return the *l2* norm of *a*, flattened out.
|
||
|
|
||
|
Implemented as a separate function (not a call to :func:`norm` for speed).
|
||
|
"""
|
||
|
return np.sqrt(np.sum(np.abs(a) ** 2))
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", alternative='numpy.linalg.norm(a.flat, ord=p)')
|
||
|
def norm_flat(a, p=2):
|
||
|
"""
|
||
|
norm(a,p=2) -> l-p norm of a.flat
|
||
|
|
||
|
Return the l-p norm of *a*, considered as a flat array. This is NOT a true
|
||
|
matrix norm, since arrays of arbitrary rank are always flattened.
|
||
|
|
||
|
*p* can be a number or the string 'Infinity' to get the L-infinity norm.
|
||
|
"""
|
||
|
# This function was being masked by a more general norm later in
|
||
|
# the file. We may want to simply delete it.
|
||
|
if p == 'Infinity':
|
||
|
return np.max(np.abs(a))
|
||
|
else:
|
||
|
return np.sum(np.abs(a) ** p) ** (1 / p)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", 'numpy.arange')
|
||
|
def frange(xini, xfin=None, delta=None, **kw):
|
||
|
"""
|
||
|
frange([start,] stop[, step, keywords]) -> array of floats
|
||
|
|
||
|
Return a numpy ndarray containing a progression of floats. Similar to
|
||
|
:func:`numpy.arange`, but defaults to a closed interval.
|
||
|
|
||
|
``frange(x0, x1)`` returns ``[x0, x0+1, x0+2, ..., x1]``; *start*
|
||
|
defaults to 0, and the endpoint *is included*. This behavior is
|
||
|
different from that of :func:`range` and
|
||
|
:func:`numpy.arange`. This is deliberate, since :func:`frange`
|
||
|
will probably be more useful for generating lists of points for
|
||
|
function evaluation, and endpoints are often desired in this
|
||
|
use. The usual behavior of :func:`range` can be obtained by
|
||
|
setting the keyword *closed* = 0, in this case, :func:`frange`
|
||
|
basically becomes :func:numpy.arange`.
|
||
|
|
||
|
When *step* is given, it specifies the increment (or
|
||
|
decrement). All arguments can be floating point numbers.
|
||
|
|
||
|
``frange(x0,x1,d)`` returns ``[x0,x0+d,x0+2d,...,xfin]`` where
|
||
|
*xfin* <= *x1*.
|
||
|
|
||
|
:func:`frange` can also be called with the keyword *npts*. This
|
||
|
sets the number of points the list should contain (and overrides
|
||
|
the value *step* might have been given). :func:`numpy.arange`
|
||
|
doesn't offer this option.
|
||
|
|
||
|
Examples::
|
||
|
|
||
|
>>> frange(3)
|
||
|
array([ 0., 1., 2., 3.])
|
||
|
>>> frange(3,closed=0)
|
||
|
array([ 0., 1., 2.])
|
||
|
>>> frange(1,6,2)
|
||
|
array([1, 3, 5]) or 1,3,5,7, depending on floating point vagueries
|
||
|
>>> frange(1,6.5,npts=5)
|
||
|
array([ 1. , 2.375, 3.75 , 5.125, 6.5 ])
|
||
|
"""
|
||
|
|
||
|
# defaults
|
||
|
kw.setdefault('closed', 1)
|
||
|
endpoint = kw['closed'] != 0
|
||
|
|
||
|
# funny logic to allow the *first* argument to be optional (like range())
|
||
|
# This was modified with a simpler version from a similar frange() found
|
||
|
# at http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66472
|
||
|
if xfin is None:
|
||
|
xfin = xini + 0.0
|
||
|
xini = 0.0
|
||
|
|
||
|
if delta is None:
|
||
|
delta = 1.0
|
||
|
|
||
|
# compute # of points, spacing and return final list
|
||
|
try:
|
||
|
npts = kw['npts']
|
||
|
delta = (xfin-xini) / (npts-endpoint)
|
||
|
except KeyError:
|
||
|
npts = int(np.round((xfin-xini)/delta)) + endpoint
|
||
|
# round finds the nearest, so the endpoint can be up to
|
||
|
# delta/2 larger than xfin.
|
||
|
|
||
|
return np.arange(npts)*delta+xini
|
||
|
# end frange()
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", 'numpy.identity')
|
||
|
def identity(n, rank=2, dtype='l', typecode=None):
|
||
|
"""
|
||
|
Returns the identity matrix of shape (*n*, *n*, ..., *n*) (rank *r*).
|
||
|
|
||
|
For ranks higher than 2, this object is simply a multi-index Kronecker
|
||
|
delta::
|
||
|
|
||
|
/ 1 if i0=i1=...=iR,
|
||
|
id[i0,i1,...,iR] = -|
|
||
|
\\ 0 otherwise.
|
||
|
|
||
|
Optionally a *dtype* (or typecode) may be given (it defaults to 'l').
|
||
|
|
||
|
Since rank defaults to 2, this function behaves in the default case (when
|
||
|
only *n* is given) like ``numpy.identity(n)`` -- but surprisingly, it is
|
||
|
much faster.
|
||
|
"""
|
||
|
if typecode is not None:
|
||
|
dtype = typecode
|
||
|
iden = np.zeros((n,)*rank, dtype)
|
||
|
for i in range(n):
|
||
|
idx = (i,)*rank
|
||
|
iden[idx] = 1
|
||
|
return iden
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def base_repr(number, base=2, padding=0):
|
||
|
"""
|
||
|
Return the representation of a *number* in any given *base*.
|
||
|
"""
|
||
|
chars = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||
|
if number < base:
|
||
|
return (padding - 1) * chars[0] + chars[int(number)]
|
||
|
max_exponent = int(math.log(number)/math.log(base))
|
||
|
max_power = int(base) ** max_exponent
|
||
|
lead_digit = int(number/max_power)
|
||
|
return (chars[lead_digit] +
|
||
|
base_repr(number - max_power * lead_digit, base,
|
||
|
max(padding - 1, max_exponent)))
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def binary_repr(number, max_length=1025):
|
||
|
"""
|
||
|
Return the binary representation of the input *number* as a
|
||
|
string.
|
||
|
|
||
|
This is more efficient than using :func:`base_repr` with base 2.
|
||
|
|
||
|
Increase the value of max_length for very large numbers. Note that
|
||
|
on 32-bit machines, 2**1023 is the largest integer power of 2
|
||
|
which can be converted to a Python float.
|
||
|
"""
|
||
|
|
||
|
# assert number < 2L << max_length
|
||
|
shifts = map(operator.rshift, max_length * [number],
|
||
|
range(max_length - 1, -1, -1))
|
||
|
digits = list(map(operator.mod, shifts, max_length * [2]))
|
||
|
if not digits.count(1):
|
||
|
return 0
|
||
|
digits = digits[digits.index(1):]
|
||
|
return ''.join(map(repr, digits)).replace('L', '')
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", 'numpy.log2')
|
||
|
def log2(x, ln2=math.log(2.0)):
|
||
|
"""
|
||
|
Return the log(*x*) in base 2.
|
||
|
|
||
|
This is a _slow_ function but which is guaranteed to return the correct
|
||
|
integer value if the input is an integer exact power of 2.
|
||
|
"""
|
||
|
try:
|
||
|
bin_n = binary_repr(x)[1:]
|
||
|
except (AssertionError, TypeError):
|
||
|
return math.log(x)/ln2
|
||
|
else:
|
||
|
if '1' in bin_n:
|
||
|
return math.log(x)/ln2
|
||
|
else:
|
||
|
return len(bin_n)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def ispower2(n):
|
||
|
"""
|
||
|
Returns the log base 2 of *n* if *n* is a power of 2, zero otherwise.
|
||
|
|
||
|
Note the potential ambiguity if *n* == 1: 2**0 == 1, interpret accordingly.
|
||
|
"""
|
||
|
|
||
|
bin_n = binary_repr(n)[1:]
|
||
|
if '1' in bin_n:
|
||
|
return 0
|
||
|
else:
|
||
|
return len(bin_n)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def isvector(X):
|
||
|
"""
|
||
|
Like the MATLAB function with the same name, returns *True*
|
||
|
if the supplied numpy array or matrix *X* looks like a vector,
|
||
|
meaning it has a one non-singleton axis (i.e., it can have
|
||
|
multiple axes, but all must have length 1, except for one of
|
||
|
them).
|
||
|
|
||
|
If you just want to see if the array has 1 axis, use X.ndim == 1.
|
||
|
"""
|
||
|
return np.prod(X.shape) == np.max(X.shape)
|
||
|
|
||
|
# end fperez numutils code
|
||
|
|
||
|
|
||
|
# helpers for loading, saving, manipulating and viewing numpy record arrays
|
||
|
@cbook.deprecated("2.2", 'numpy.isnan')
|
||
|
def safe_isnan(x):
|
||
|
':func:`numpy.isnan` for arbitrary types'
|
||
|
if isinstance(x, str):
|
||
|
return False
|
||
|
try:
|
||
|
b = np.isnan(x)
|
||
|
except NotImplementedError:
|
||
|
return False
|
||
|
except TypeError:
|
||
|
return False
|
||
|
else:
|
||
|
return b
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", 'numpy.isinf')
|
||
|
def safe_isinf(x):
|
||
|
':func:`numpy.isinf` for arbitrary types'
|
||
|
if isinstance(x, str):
|
||
|
return False
|
||
|
try:
|
||
|
b = np.isinf(x)
|
||
|
except NotImplementedError:
|
||
|
return False
|
||
|
except TypeError:
|
||
|
return False
|
||
|
else:
|
||
|
return b
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def rec_append_fields(rec, names, arrs, dtypes=None):
|
||
|
"""
|
||
|
Return a new record array with field names populated with data
|
||
|
from arrays in *arrs*. If appending a single field, then *names*,
|
||
|
*arrs* and *dtypes* do not have to be lists. They can just be the
|
||
|
values themselves.
|
||
|
"""
|
||
|
if (not isinstance(names, str) and cbook.iterable(names)
|
||
|
and len(names) and isinstance(names[0], str)):
|
||
|
if len(names) != len(arrs):
|
||
|
raise ValueError("number of arrays do not match number of names")
|
||
|
else: # we have only 1 name and 1 array
|
||
|
names = [names]
|
||
|
arrs = [arrs]
|
||
|
arrs = list(map(np.asarray, arrs))
|
||
|
if dtypes is None:
|
||
|
dtypes = [a.dtype for a in arrs]
|
||
|
elif not cbook.iterable(dtypes):
|
||
|
dtypes = [dtypes]
|
||
|
if len(arrs) != len(dtypes):
|
||
|
if len(dtypes) == 1:
|
||
|
dtypes = dtypes * len(arrs)
|
||
|
else:
|
||
|
raise ValueError("dtypes must be None, a single dtype or a list")
|
||
|
old_dtypes = rec.dtype.descr
|
||
|
newdtype = np.dtype(old_dtypes + list(zip(names, dtypes)))
|
||
|
newrec = np.recarray(rec.shape, dtype=newdtype)
|
||
|
for field in rec.dtype.fields:
|
||
|
newrec[field] = rec[field]
|
||
|
for name, arr in zip(names, arrs):
|
||
|
newrec[name] = arr
|
||
|
return newrec
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def rec_drop_fields(rec, names):
|
||
|
"""
|
||
|
Return a new numpy record array with fields in *names* dropped.
|
||
|
"""
|
||
|
|
||
|
names = set(names)
|
||
|
|
||
|
newdtype = np.dtype([(name, rec.dtype[name]) for name in rec.dtype.names
|
||
|
if name not in names])
|
||
|
|
||
|
newrec = np.recarray(rec.shape, dtype=newdtype)
|
||
|
for field in newdtype.names:
|
||
|
newrec[field] = rec[field]
|
||
|
|
||
|
return newrec
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def rec_keep_fields(rec, names):
|
||
|
"""
|
||
|
Return a new numpy record array with only fields listed in names
|
||
|
"""
|
||
|
|
||
|
if isinstance(names, str):
|
||
|
names = names.split(',')
|
||
|
|
||
|
arrays = []
|
||
|
for name in names:
|
||
|
arrays.append(rec[name])
|
||
|
|
||
|
return np.rec.fromarrays(arrays, names=names)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def rec_groupby(r, groupby, stats):
|
||
|
"""
|
||
|
*r* is a numpy record array
|
||
|
|
||
|
*groupby* is a sequence of record array attribute names that
|
||
|
together form the grouping key. e.g., ('date', 'productcode')
|
||
|
|
||
|
*stats* is a sequence of (*attr*, *func*, *outname*) tuples which
|
||
|
will call ``x = func(attr)`` and assign *x* to the record array
|
||
|
output with attribute *outname*. For example::
|
||
|
|
||
|
stats = ( ('sales', len, 'numsales'), ('sales', np.mean, 'avgsale') )
|
||
|
|
||
|
Return record array has *dtype* names for each attribute name in
|
||
|
the *groupby* argument, with the associated group values, and
|
||
|
for each outname name in the *stats* argument, with the associated
|
||
|
stat summary output.
|
||
|
"""
|
||
|
# build a dictionary from groupby keys-> list of indices into r with
|
||
|
# those keys
|
||
|
rowd = {}
|
||
|
for i, row in enumerate(r):
|
||
|
key = tuple([row[attr] for attr in groupby])
|
||
|
rowd.setdefault(key, []).append(i)
|
||
|
|
||
|
rows = []
|
||
|
# sort the output by groupby keys
|
||
|
for key in sorted(rowd):
|
||
|
row = list(key)
|
||
|
# get the indices for this groupby key
|
||
|
ind = rowd[key]
|
||
|
thisr = r[ind]
|
||
|
# call each stat function for this groupby slice
|
||
|
row.extend([func(thisr[attr]) for attr, func, outname in stats])
|
||
|
rows.append(row)
|
||
|
|
||
|
# build the output record array with groupby and outname attributes
|
||
|
attrs, funcs, outnames = list(zip(*stats))
|
||
|
names = list(groupby)
|
||
|
names.extend(outnames)
|
||
|
return np.rec.fromrecords(rows, names=names)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def rec_summarize(r, summaryfuncs):
|
||
|
"""
|
||
|
*r* is a numpy record array
|
||
|
|
||
|
*summaryfuncs* is a list of (*attr*, *func*, *outname*) tuples
|
||
|
which will apply *func* to the array *r*[attr] and assign the
|
||
|
output to a new attribute name *outname*. The returned record
|
||
|
array is identical to *r*, with extra arrays for each element in
|
||
|
*summaryfuncs*.
|
||
|
|
||
|
"""
|
||
|
|
||
|
names = list(r.dtype.names)
|
||
|
arrays = [r[name] for name in names]
|
||
|
|
||
|
for attr, func, outname in summaryfuncs:
|
||
|
names.append(outname)
|
||
|
arrays.append(np.asarray(func(r[attr])))
|
||
|
|
||
|
return np.rec.fromarrays(arrays, names=names)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def rec_join(key, r1, r2, jointype='inner', defaults=None, r1postfix='1',
|
||
|
r2postfix='2'):
|
||
|
"""
|
||
|
Join record arrays *r1* and *r2* on *key*; *key* is a tuple of
|
||
|
field names -- if *key* is a string it is assumed to be a single
|
||
|
attribute name. If *r1* and *r2* have equal values on all the keys
|
||
|
in the *key* tuple, then their fields will be merged into a new
|
||
|
record array containing the intersection of the fields of *r1* and
|
||
|
*r2*.
|
||
|
|
||
|
*r1* (also *r2*) must not have any duplicate keys.
|
||
|
|
||
|
The *jointype* keyword can be 'inner', 'outer', 'leftouter'. To
|
||
|
do a rightouter join just reverse *r1* and *r2*.
|
||
|
|
||
|
The *defaults* keyword is a dictionary filled with
|
||
|
``{column_name:default_value}`` pairs.
|
||
|
|
||
|
The keywords *r1postfix* and *r2postfix* are postfixed to column names
|
||
|
(other than keys) that are both in *r1* and *r2*.
|
||
|
"""
|
||
|
|
||
|
if isinstance(key, str):
|
||
|
key = (key, )
|
||
|
|
||
|
for name in key:
|
||
|
if name not in r1.dtype.names:
|
||
|
raise ValueError('r1 does not have key field %s' % name)
|
||
|
if name not in r2.dtype.names:
|
||
|
raise ValueError('r2 does not have key field %s' % name)
|
||
|
|
||
|
def makekey(row):
|
||
|
return tuple([row[name] for name in key])
|
||
|
|
||
|
r1d = {makekey(row): i for i, row in enumerate(r1)}
|
||
|
r2d = {makekey(row): i for i, row in enumerate(r2)}
|
||
|
|
||
|
r1keys = set(r1d)
|
||
|
r2keys = set(r2d)
|
||
|
|
||
|
common_keys = r1keys & r2keys
|
||
|
|
||
|
r1ind = np.array([r1d[k] for k in common_keys])
|
||
|
r2ind = np.array([r2d[k] for k in common_keys])
|
||
|
|
||
|
common_len = len(common_keys)
|
||
|
left_len = right_len = 0
|
||
|
if jointype == "outer" or jointype == "leftouter":
|
||
|
left_keys = r1keys.difference(r2keys)
|
||
|
left_ind = np.array([r1d[k] for k in left_keys])
|
||
|
left_len = len(left_ind)
|
||
|
if jointype == "outer":
|
||
|
right_keys = r2keys.difference(r1keys)
|
||
|
right_ind = np.array([r2d[k] for k in right_keys])
|
||
|
right_len = len(right_ind)
|
||
|
|
||
|
def key_desc(name):
|
||
|
'''
|
||
|
if name is a string key, use the larger size of r1 or r2 before
|
||
|
merging
|
||
|
'''
|
||
|
dt1 = r1.dtype[name]
|
||
|
if dt1.type != np.string_:
|
||
|
return (name, dt1.descr[0][1])
|
||
|
|
||
|
dt2 = r2.dtype[name]
|
||
|
if dt1 != dt2:
|
||
|
raise ValueError("The '{}' fields in arrays 'r1' and 'r2' must "
|
||
|
"have the same dtype".format(name))
|
||
|
if dt1.num > dt2.num:
|
||
|
return (name, dt1.descr[0][1])
|
||
|
else:
|
||
|
return (name, dt2.descr[0][1])
|
||
|
|
||
|
keydesc = [key_desc(name) for name in key]
|
||
|
|
||
|
def mapped_r1field(name):
|
||
|
"""
|
||
|
The column name in *newrec* that corresponds to the column in *r1*.
|
||
|
"""
|
||
|
if name in key or name not in r2.dtype.names:
|
||
|
return name
|
||
|
else:
|
||
|
return name + r1postfix
|
||
|
|
||
|
def mapped_r2field(name):
|
||
|
"""
|
||
|
The column name in *newrec* that corresponds to the column in *r2*.
|
||
|
"""
|
||
|
if name in key or name not in r1.dtype.names:
|
||
|
return name
|
||
|
else:
|
||
|
return name + r2postfix
|
||
|
|
||
|
r1desc = [(mapped_r1field(desc[0]), desc[1]) for desc in r1.dtype.descr
|
||
|
if desc[0] not in key]
|
||
|
r2desc = [(mapped_r2field(desc[0]), desc[1]) for desc in r2.dtype.descr
|
||
|
if desc[0] not in key]
|
||
|
all_dtypes = keydesc + r1desc + r2desc
|
||
|
newdtype = np.dtype(all_dtypes)
|
||
|
newrec = np.recarray((common_len + left_len + right_len,), dtype=newdtype)
|
||
|
|
||
|
if defaults is not None:
|
||
|
for thiskey in defaults:
|
||
|
if thiskey not in newdtype.names:
|
||
|
warnings.warn('rec_join defaults key="%s" not in new dtype '
|
||
|
'names "%s"' % (thiskey, newdtype.names))
|
||
|
|
||
|
for name in newdtype.names:
|
||
|
dt = newdtype[name]
|
||
|
if dt.kind in ('f', 'i'):
|
||
|
newrec[name] = 0
|
||
|
|
||
|
if jointype != 'inner' and defaults is not None:
|
||
|
# fill in the defaults enmasse
|
||
|
newrec_fields = list(newrec.dtype.fields)
|
||
|
for k, v in defaults.items():
|
||
|
if k in newrec_fields:
|
||
|
newrec[k] = v
|
||
|
|
||
|
for field in r1.dtype.names:
|
||
|
newfield = mapped_r1field(field)
|
||
|
if common_len:
|
||
|
newrec[newfield][:common_len] = r1[field][r1ind]
|
||
|
if (jointype == "outer" or jointype == "leftouter") and left_len:
|
||
|
newrec[newfield][common_len:(common_len+left_len)] = (
|
||
|
r1[field][left_ind]
|
||
|
)
|
||
|
|
||
|
for field in r2.dtype.names:
|
||
|
newfield = mapped_r2field(field)
|
||
|
if field not in key and common_len:
|
||
|
newrec[newfield][:common_len] = r2[field][r2ind]
|
||
|
if jointype == "outer" and right_len:
|
||
|
newrec[newfield][-right_len:] = r2[field][right_ind]
|
||
|
|
||
|
newrec.sort(order=key)
|
||
|
|
||
|
return newrec
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def recs_join(key, name, recs, jointype='outer', missing=0., postfixes=None):
|
||
|
"""
|
||
|
Join a sequence of record arrays on single column key.
|
||
|
|
||
|
This function only joins a single column of the multiple record arrays
|
||
|
|
||
|
*key*
|
||
|
is the column name that acts as a key
|
||
|
|
||
|
*name*
|
||
|
is the name of the column that we want to join
|
||
|
|
||
|
*recs*
|
||
|
is a list of record arrays to join
|
||
|
|
||
|
*jointype*
|
||
|
is a string 'inner' or 'outer'
|
||
|
|
||
|
*missing*
|
||
|
is what any missing field is replaced by
|
||
|
|
||
|
*postfixes*
|
||
|
if not None, a len recs sequence of postfixes
|
||
|
|
||
|
returns a record array with columns [rowkey, name0, name1, ... namen-1].
|
||
|
or if postfixes [PF0, PF1, ..., PFN-1] are supplied,
|
||
|
[rowkey, namePF0, namePF1, ... namePFN-1].
|
||
|
|
||
|
Example::
|
||
|
|
||
|
r = recs_join("date", "close", recs=[r0, r1], missing=0.)
|
||
|
|
||
|
"""
|
||
|
results = []
|
||
|
aligned_iters = cbook.align_iterators(operator.attrgetter(key),
|
||
|
*[iter(r) for r in recs])
|
||
|
|
||
|
def extract(r):
|
||
|
if r is None:
|
||
|
return missing
|
||
|
else:
|
||
|
return r[name]
|
||
|
|
||
|
if jointype == "outer":
|
||
|
for rowkey, row in aligned_iters:
|
||
|
results.append([rowkey] + list(map(extract, row)))
|
||
|
elif jointype == "inner":
|
||
|
for rowkey, row in aligned_iters:
|
||
|
if None not in row: # throw out any Nones
|
||
|
results.append([rowkey] + list(map(extract, row)))
|
||
|
|
||
|
if postfixes is None:
|
||
|
postfixes = ['%d' % i for i in range(len(recs))]
|
||
|
names = ",".join([key] + ["%s%s" % (name, postfix)
|
||
|
for postfix in postfixes])
|
||
|
return np.rec.fromrecords(results, names=names)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def csv2rec(fname, comments='#', skiprows=0, checkrows=0, delimiter=',',
|
||
|
converterd=None, names=None, missing='', missingd=None,
|
||
|
use_mrecords=False, dayfirst=False, yearfirst=False):
|
||
|
"""
|
||
|
Load data from comma/space/tab delimited file in *fname* into a
|
||
|
numpy record array and return the record array.
|
||
|
|
||
|
If *names* is *None*, a header row is required to automatically
|
||
|
assign the recarray names. The headers will be lower cased,
|
||
|
spaces will be converted to underscores, and illegal attribute
|
||
|
name characters removed. If *names* is not *None*, it is a
|
||
|
sequence of names to use for the column names. In this case, it
|
||
|
is assumed there is no header row.
|
||
|
|
||
|
|
||
|
- *fname*: can be a filename or a file handle. Support for gzipped
|
||
|
files is automatic, if the filename ends in '.gz'
|
||
|
|
||
|
- *comments*: the character used to indicate the start of a comment
|
||
|
in the file, or *None* to switch off the removal of comments
|
||
|
|
||
|
- *skiprows*: is the number of rows from the top to skip
|
||
|
|
||
|
- *checkrows*: is the number of rows to check to validate the column
|
||
|
data type. When set to zero all rows are validated.
|
||
|
|
||
|
- *converterd*: if not *None*, is a dictionary mapping column number or
|
||
|
munged column name to a converter function.
|
||
|
|
||
|
- *names*: if not None, is a list of header names. In this case, no
|
||
|
header will be read from the file
|
||
|
|
||
|
- *missingd* is a dictionary mapping munged column names to field values
|
||
|
which signify that the field does not contain actual data and should
|
||
|
be masked, e.g., '0000-00-00' or 'unused'
|
||
|
|
||
|
- *missing*: a string whose value signals a missing field regardless of
|
||
|
the column it appears in
|
||
|
|
||
|
- *use_mrecords*: if True, return an mrecords.fromrecords record array if
|
||
|
any of the data are missing
|
||
|
|
||
|
- *dayfirst*: default is False so that MM-DD-YY has precedence over
|
||
|
DD-MM-YY. See
|
||
|
http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
|
||
|
for further information.
|
||
|
|
||
|
- *yearfirst*: default is False so that MM-DD-YY has precedence over
|
||
|
YY-MM-DD. See
|
||
|
http://labix.org/python-dateutil#head-b95ce2094d189a89f80f5ae52a05b4ab7b41af47
|
||
|
for further information.
|
||
|
|
||
|
If no rows are found, *None* is returned
|
||
|
"""
|
||
|
|
||
|
if converterd is None:
|
||
|
converterd = dict()
|
||
|
|
||
|
if missingd is None:
|
||
|
missingd = {}
|
||
|
|
||
|
import dateutil.parser
|
||
|
import datetime
|
||
|
|
||
|
fh = cbook.to_filehandle(fname)
|
||
|
|
||
|
delimiter = str(delimiter)
|
||
|
|
||
|
class FH:
|
||
|
"""
|
||
|
For space-delimited files, we want different behavior than
|
||
|
comma or tab. Generally, we want multiple spaces to be
|
||
|
treated as a single separator, whereas with comma and tab we
|
||
|
want multiple commas to return multiple (empty) fields. The
|
||
|
join/strip trick below effects this.
|
||
|
"""
|
||
|
def __init__(self, fh):
|
||
|
self.fh = fh
|
||
|
|
||
|
def close(self):
|
||
|
self.fh.close()
|
||
|
|
||
|
def seek(self, arg):
|
||
|
self.fh.seek(arg)
|
||
|
|
||
|
def fix(self, s):
|
||
|
return ' '.join(s.split())
|
||
|
|
||
|
def __next__(self):
|
||
|
return self.fix(next(self.fh))
|
||
|
|
||
|
def __iter__(self):
|
||
|
for line in self.fh:
|
||
|
yield self.fix(line)
|
||
|
|
||
|
if delimiter == ' ':
|
||
|
fh = FH(fh)
|
||
|
|
||
|
reader = csv.reader(fh, delimiter=delimiter)
|
||
|
|
||
|
def process_skiprows(reader):
|
||
|
if skiprows:
|
||
|
for i, row in enumerate(reader):
|
||
|
if i >= (skiprows-1):
|
||
|
break
|
||
|
|
||
|
return fh, reader
|
||
|
|
||
|
process_skiprows(reader)
|
||
|
|
||
|
def ismissing(name, val):
|
||
|
"Should the value val in column name be masked?"
|
||
|
return val == missing or val == missingd.get(name) or val == ''
|
||
|
|
||
|
def with_default_value(func, default):
|
||
|
def newfunc(name, val):
|
||
|
if ismissing(name, val):
|
||
|
return default
|
||
|
else:
|
||
|
return func(val)
|
||
|
return newfunc
|
||
|
|
||
|
def mybool(x):
|
||
|
if x == 'True':
|
||
|
return True
|
||
|
elif x == 'False':
|
||
|
return False
|
||
|
else:
|
||
|
raise ValueError('invalid bool')
|
||
|
|
||
|
dateparser = dateutil.parser.parse
|
||
|
|
||
|
def mydateparser(x):
|
||
|
# try and return a datetime object
|
||
|
d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst)
|
||
|
return d
|
||
|
|
||
|
mydateparser = with_default_value(mydateparser, datetime.datetime(1, 1, 1))
|
||
|
|
||
|
myfloat = with_default_value(float, np.nan)
|
||
|
myint = with_default_value(int, -1)
|
||
|
mystr = with_default_value(str, '')
|
||
|
mybool = with_default_value(mybool, None)
|
||
|
|
||
|
def mydate(x):
|
||
|
# try and return a date object
|
||
|
d = dateparser(x, dayfirst=dayfirst, yearfirst=yearfirst)
|
||
|
|
||
|
if d.hour > 0 or d.minute > 0 or d.second > 0:
|
||
|
raise ValueError('not a date')
|
||
|
return d.date()
|
||
|
mydate = with_default_value(mydate, datetime.date(1, 1, 1))
|
||
|
|
||
|
def get_func(name, item, func):
|
||
|
# promote functions in this order
|
||
|
funcs = [mybool, myint, myfloat, mydate, mydateparser, mystr]
|
||
|
for func in funcs[funcs.index(func):]:
|
||
|
try:
|
||
|
func(name, item)
|
||
|
except Exception:
|
||
|
continue
|
||
|
return func
|
||
|
raise ValueError('Could not find a working conversion function')
|
||
|
|
||
|
# map column names that clash with builtins -- TODO - extend this list
|
||
|
itemd = {
|
||
|
'return': 'return_',
|
||
|
'file': 'file_',
|
||
|
'print': 'print_',
|
||
|
}
|
||
|
|
||
|
def get_converters(reader, comments):
|
||
|
|
||
|
converters = None
|
||
|
i = 0
|
||
|
for row in reader:
|
||
|
if (len(row) and comments is not None and
|
||
|
row[0].startswith(comments)):
|
||
|
continue
|
||
|
if i == 0:
|
||
|
converters = [mybool]*len(row)
|
||
|
if checkrows and i > checkrows:
|
||
|
break
|
||
|
i += 1
|
||
|
|
||
|
for j, (name, item) in enumerate(zip(names, row)):
|
||
|
func = converterd.get(j)
|
||
|
if func is None:
|
||
|
func = converterd.get(name)
|
||
|
if func is None:
|
||
|
func = converters[j]
|
||
|
if len(item.strip()):
|
||
|
func = get_func(name, item, func)
|
||
|
else:
|
||
|
# how should we handle custom converters and defaults?
|
||
|
func = with_default_value(func, None)
|
||
|
converters[j] = func
|
||
|
return converters
|
||
|
|
||
|
# Get header and remove invalid characters
|
||
|
needheader = names is None
|
||
|
|
||
|
if needheader:
|
||
|
for row in reader:
|
||
|
if (len(row) and comments is not None and
|
||
|
row[0].startswith(comments)):
|
||
|
continue
|
||
|
headers = row
|
||
|
break
|
||
|
|
||
|
# remove these chars
|
||
|
delete = set(r"""~!@#$%^&*()-=+~\|}[]{';: /?.>,<""")
|
||
|
delete.add('"')
|
||
|
|
||
|
names = []
|
||
|
seen = dict()
|
||
|
for i, item in enumerate(headers):
|
||
|
item = item.strip().lower().replace(' ', '_')
|
||
|
item = ''.join([c for c in item if c not in delete])
|
||
|
if not len(item):
|
||
|
item = 'column%d' % i
|
||
|
|
||
|
item = itemd.get(item, item)
|
||
|
cnt = seen.get(item, 0)
|
||
|
if cnt > 0:
|
||
|
names.append(item + '_%d' % cnt)
|
||
|
else:
|
||
|
names.append(item)
|
||
|
seen[item] = cnt+1
|
||
|
|
||
|
else:
|
||
|
if isinstance(names, str):
|
||
|
names = [n.strip() for n in names.split(',')]
|
||
|
|
||
|
# get the converter functions by inspecting checkrows
|
||
|
converters = get_converters(reader, comments)
|
||
|
if converters is None:
|
||
|
raise ValueError('Could not find any valid data in CSV file')
|
||
|
|
||
|
# reset the reader and start over
|
||
|
fh.seek(0)
|
||
|
reader = csv.reader(fh, delimiter=delimiter)
|
||
|
process_skiprows(reader)
|
||
|
|
||
|
if needheader:
|
||
|
while True:
|
||
|
# skip past any comments and consume one line of column header
|
||
|
row = next(reader)
|
||
|
if (len(row) and comments is not None and
|
||
|
row[0].startswith(comments)):
|
||
|
continue
|
||
|
break
|
||
|
|
||
|
# iterate over the remaining rows and convert the data to date
|
||
|
# objects, ints, or floats as appropriate
|
||
|
rows = []
|
||
|
rowmasks = []
|
||
|
for i, row in enumerate(reader):
|
||
|
if not len(row):
|
||
|
continue
|
||
|
if comments is not None and row[0].startswith(comments):
|
||
|
continue
|
||
|
# Ensure that the row returned always has the same nr of elements
|
||
|
row.extend([''] * (len(converters) - len(row)))
|
||
|
rows.append([func(name, val)
|
||
|
for func, name, val in zip(converters, names, row)])
|
||
|
rowmasks.append([ismissing(name, val)
|
||
|
for name, val in zip(names, row)])
|
||
|
fh.close()
|
||
|
|
||
|
if not len(rows):
|
||
|
return None
|
||
|
|
||
|
if use_mrecords and np.any(rowmasks):
|
||
|
r = np.ma.mrecords.fromrecords(rows, names=names, mask=rowmasks)
|
||
|
else:
|
||
|
r = np.rec.fromrecords(rows, names=names)
|
||
|
return r
|
||
|
|
||
|
|
||
|
# a series of classes for describing the format intentions of various rec views
|
||
|
@cbook.deprecated("2.2")
|
||
|
class FormatObj(object):
|
||
|
def tostr(self, x):
|
||
|
return self.toval(x)
|
||
|
|
||
|
def toval(self, x):
|
||
|
return str(x)
|
||
|
|
||
|
def fromstr(self, s):
|
||
|
return s
|
||
|
|
||
|
def __hash__(self):
|
||
|
"""
|
||
|
override the hash function of any of the formatters, so that we don't
|
||
|
create duplicate excel format styles
|
||
|
"""
|
||
|
return hash(self.__class__)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
class FormatString(FormatObj):
|
||
|
def tostr(self, x):
|
||
|
val = repr(x)
|
||
|
return val[1:-1]
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
class FormatFormatStr(FormatObj):
|
||
|
def __init__(self, fmt):
|
||
|
self.fmt = fmt
|
||
|
|
||
|
def tostr(self, x):
|
||
|
if x is None:
|
||
|
return 'None'
|
||
|
return self.fmt % self.toval(x)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
class FormatFloat(FormatFormatStr):
|
||
|
def __init__(self, precision=4, scale=1.):
|
||
|
FormatFormatStr.__init__(self, '%%1.%df' % precision)
|
||
|
self.precision = precision
|
||
|
self.scale = scale
|
||
|
|
||
|
def __hash__(self):
|
||
|
return hash((self.__class__, self.precision, self.scale))
|
||
|
|
||
|
def toval(self, x):
|
||
|
if x is not None:
|
||
|
x = x * self.scale
|
||
|
return x
|
||
|
|
||
|
def fromstr(self, s):
|
||
|
return float(s)/self.scale
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
class FormatInt(FormatObj):
|
||
|
|
||
|
def tostr(self, x):
|
||
|
return '%d' % int(x)
|
||
|
|
||
|
def toval(self, x):
|
||
|
return int(x)
|
||
|
|
||
|
def fromstr(self, s):
|
||
|
return int(s)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
class FormatBool(FormatObj):
|
||
|
def toval(self, x):
|
||
|
return str(x)
|
||
|
|
||
|
def fromstr(self, s):
|
||
|
return bool(s)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
class FormatPercent(FormatFloat):
|
||
|
def __init__(self, precision=4):
|
||
|
FormatFloat.__init__(self, precision, scale=100.)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
class FormatThousands(FormatFloat):
|
||
|
def __init__(self, precision=4):
|
||
|
FormatFloat.__init__(self, precision, scale=1e-3)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
class FormatMillions(FormatFloat):
|
||
|
def __init__(self, precision=4):
|
||
|
FormatFloat.__init__(self, precision, scale=1e-6)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", alternative='date.strftime')
|
||
|
class FormatDate(FormatObj):
|
||
|
def __init__(self, fmt):
|
||
|
self.fmt = fmt
|
||
|
|
||
|
def __hash__(self):
|
||
|
return hash((self.__class__, self.fmt))
|
||
|
|
||
|
def toval(self, x):
|
||
|
if x is None:
|
||
|
return 'None'
|
||
|
return x.strftime(self.fmt)
|
||
|
|
||
|
def fromstr(self, x):
|
||
|
import dateutil.parser
|
||
|
return dateutil.parser.parse(x).date()
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", alternative='datetime.strftime')
|
||
|
class FormatDatetime(FormatDate):
|
||
|
def __init__(self, fmt='%Y-%m-%d %H:%M:%S'):
|
||
|
FormatDate.__init__(self, fmt)
|
||
|
|
||
|
def fromstr(self, x):
|
||
|
import dateutil.parser
|
||
|
return dateutil.parser.parse(x)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def get_formatd(r, formatd=None):
|
||
|
'build a formatd guaranteed to have a key for every dtype name'
|
||
|
defaultformatd = {
|
||
|
np.bool_: FormatBool(),
|
||
|
np.int16: FormatInt(),
|
||
|
np.int32: FormatInt(),
|
||
|
np.int64: FormatInt(),
|
||
|
np.float32: FormatFloat(),
|
||
|
np.float64: FormatFloat(),
|
||
|
np.object_: FormatObj(),
|
||
|
np.string_: FormatString()}
|
||
|
|
||
|
if formatd is None:
|
||
|
formatd = dict()
|
||
|
|
||
|
for i, name in enumerate(r.dtype.names):
|
||
|
dt = r.dtype[name]
|
||
|
format = formatd.get(name)
|
||
|
if format is None:
|
||
|
format = defaultformatd.get(dt.type, FormatObj())
|
||
|
formatd[name] = format
|
||
|
return formatd
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def csvformat_factory(format):
|
||
|
format = copy.deepcopy(format)
|
||
|
if isinstance(format, FormatFloat):
|
||
|
format.scale = 1. # override scaling for storage
|
||
|
format.fmt = '%r'
|
||
|
return format
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", alternative='numpy.recarray.tofile')
|
||
|
def rec2txt(r, header=None, padding=3, precision=3, fields=None):
|
||
|
"""
|
||
|
Returns a textual representation of a record array.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
r: numpy recarray
|
||
|
|
||
|
header: list
|
||
|
column headers
|
||
|
|
||
|
padding:
|
||
|
space between each column
|
||
|
|
||
|
precision: number of decimal places to use for floats.
|
||
|
Set to an integer to apply to all floats. Set to a
|
||
|
list of integers to apply precision individually.
|
||
|
Precision for non-floats is simply ignored.
|
||
|
|
||
|
fields : list
|
||
|
If not None, a list of field names to print. fields
|
||
|
can be a list of strings like ['field1', 'field2'] or a single
|
||
|
comma separated string like 'field1,field2'
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
|
||
|
For ``precision=[0,2,3]``, the output is ::
|
||
|
|
||
|
ID Price Return
|
||
|
ABC 12.54 0.234
|
||
|
XYZ 6.32 -0.076
|
||
|
"""
|
||
|
|
||
|
if fields is not None:
|
||
|
r = rec_keep_fields(r, fields)
|
||
|
|
||
|
if cbook.is_numlike(precision):
|
||
|
precision = [precision]*len(r.dtype)
|
||
|
|
||
|
def get_type(item, atype=int):
|
||
|
tdict = {None: int, int: float, float: str}
|
||
|
try:
|
||
|
atype(str(item))
|
||
|
except:
|
||
|
return get_type(item, tdict[atype])
|
||
|
return atype
|
||
|
|
||
|
def get_justify(colname, column, precision):
|
||
|
ntype = column.dtype
|
||
|
|
||
|
if np.issubdtype(ntype, np.character):
|
||
|
fixed_width = int(ntype.str[2:])
|
||
|
length = max(len(colname), fixed_width)
|
||
|
return 0, length+padding, "%s" # left justify
|
||
|
|
||
|
if np.issubdtype(ntype, np.integer):
|
||
|
length = max(len(colname),
|
||
|
np.max(list(map(len, list(map(str, column))))))
|
||
|
return 1, length+padding, "%d" # right justify
|
||
|
|
||
|
if np.issubdtype(ntype, np.floating):
|
||
|
fmt = "%." + str(precision) + "f"
|
||
|
length = max(
|
||
|
len(colname),
|
||
|
np.max(list(map(len, list(map(lambda x: fmt % x, column)))))
|
||
|
)
|
||
|
return 1, length+padding, fmt # right justify
|
||
|
|
||
|
return (0,
|
||
|
max(len(colname),
|
||
|
np.max(list(map(len, list(map(str, column))))))+padding,
|
||
|
"%s")
|
||
|
|
||
|
if header is None:
|
||
|
header = r.dtype.names
|
||
|
|
||
|
justify_pad_prec = [get_justify(header[i], r.__getitem__(colname),
|
||
|
precision[i])
|
||
|
for i, colname in enumerate(r.dtype.names)]
|
||
|
|
||
|
justify_pad_prec_spacer = []
|
||
|
for i in range(len(justify_pad_prec)):
|
||
|
just, pad, prec = justify_pad_prec[i]
|
||
|
if i == 0:
|
||
|
justify_pad_prec_spacer.append((just, pad, prec, 0))
|
||
|
else:
|
||
|
pjust, ppad, pprec = justify_pad_prec[i-1]
|
||
|
if pjust == 0 and just == 1:
|
||
|
justify_pad_prec_spacer.append((just, pad-padding, prec, 0))
|
||
|
elif pjust == 1 and just == 0:
|
||
|
justify_pad_prec_spacer.append((just, pad, prec, padding))
|
||
|
else:
|
||
|
justify_pad_prec_spacer.append((just, pad, prec, 0))
|
||
|
|
||
|
def format(item, just_pad_prec_spacer):
|
||
|
just, pad, prec, spacer = just_pad_prec_spacer
|
||
|
if just == 0:
|
||
|
return spacer*' ' + str(item).ljust(pad)
|
||
|
else:
|
||
|
if get_type(item) == float:
|
||
|
item = (prec % float(item))
|
||
|
elif get_type(item) == int:
|
||
|
item = (prec % int(item))
|
||
|
|
||
|
return item.rjust(pad)
|
||
|
|
||
|
textl = []
|
||
|
textl.append(''.join([format(colitem, justify_pad_prec_spacer[j])
|
||
|
for j, colitem in enumerate(header)]))
|
||
|
for i, row in enumerate(r):
|
||
|
textl.append(''.join([format(colitem, justify_pad_prec_spacer[j])
|
||
|
for j, colitem in enumerate(row)]))
|
||
|
if i == 0:
|
||
|
textl[0] = textl[0].rstrip()
|
||
|
|
||
|
text = os.linesep.join(textl)
|
||
|
return text
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", alternative='numpy.recarray.tofile')
|
||
|
def rec2csv(r, fname, delimiter=',', formatd=None, missing='',
|
||
|
missingd=None, withheader=True):
|
||
|
"""
|
||
|
Save the data from numpy recarray *r* into a
|
||
|
comma-/space-/tab-delimited file. The record array dtype names
|
||
|
will be used for column headers.
|
||
|
|
||
|
*fname*: can be a filename or a file handle. Support for gzipped
|
||
|
files is automatic, if the filename ends in '.gz'
|
||
|
|
||
|
*withheader*: if withheader is False, do not write the attribute
|
||
|
names in the first row
|
||
|
|
||
|
for formatd type FormatFloat, we override the precision to store
|
||
|
full precision floats in the CSV file
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`csv2rec`
|
||
|
For information about *missing* and *missingd*, which can be used to
|
||
|
fill in masked values into your CSV file.
|
||
|
"""
|
||
|
|
||
|
delimiter = str(delimiter)
|
||
|
|
||
|
if missingd is None:
|
||
|
missingd = dict()
|
||
|
|
||
|
def with_mask(func):
|
||
|
def newfunc(val, mask, mval):
|
||
|
if mask:
|
||
|
return mval
|
||
|
else:
|
||
|
return func(val)
|
||
|
return newfunc
|
||
|
|
||
|
if r.ndim != 1:
|
||
|
raise ValueError('rec2csv only operates on 1 dimensional recarrays')
|
||
|
|
||
|
formatd = get_formatd(r, formatd)
|
||
|
funcs = []
|
||
|
for i, name in enumerate(r.dtype.names):
|
||
|
funcs.append(with_mask(csvformat_factory(formatd[name]).tostr))
|
||
|
|
||
|
fh, opened = cbook.to_filehandle(fname, 'wb', return_opened=True)
|
||
|
writer = csv.writer(fh, delimiter=delimiter)
|
||
|
header = r.dtype.names
|
||
|
if withheader:
|
||
|
writer.writerow(header)
|
||
|
|
||
|
# Our list of specials for missing values
|
||
|
mvals = []
|
||
|
for name in header:
|
||
|
mvals.append(missingd.get(name, missing))
|
||
|
|
||
|
ismasked = False
|
||
|
if len(r):
|
||
|
row = r[0]
|
||
|
ismasked = hasattr(row, '_fieldmask')
|
||
|
|
||
|
for row in r:
|
||
|
if ismasked:
|
||
|
row, rowmask = row.item(), row._fieldmask.item()
|
||
|
else:
|
||
|
rowmask = [False] * len(row)
|
||
|
writer.writerow([func(val, mask, mval) for func, val, mask, mval
|
||
|
in zip(funcs, row, rowmask, mvals)])
|
||
|
if opened:
|
||
|
fh.close()
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2', alternative='scipy.interpolate.griddata')
|
||
|
def griddata(x, y, z, xi, yi, interp='nn'):
|
||
|
"""
|
||
|
Interpolates from a nonuniformly spaced grid to some other grid.
|
||
|
|
||
|
Fits a surface of the form z = f(`x`, `y`) to the data in the
|
||
|
(usually) nonuniformly spaced vectors (`x`, `y`, `z`), then
|
||
|
interpolates this surface at the points specified by
|
||
|
(`xi`, `yi`) to produce `zi`.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
x, y, z : 1d array_like
|
||
|
Coordinates of grid points to interpolate from.
|
||
|
xi, yi : 1d or 2d array_like
|
||
|
Coordinates of grid points to interpolate to.
|
||
|
interp : string key from {'nn', 'linear'}
|
||
|
Interpolation algorithm, either 'nn' for natural neighbor, or
|
||
|
'linear' for linear interpolation.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
2d float array
|
||
|
Array of values interpolated at (`xi`, `yi`) points. Array
|
||
|
will be masked is any of (`xi`, `yi`) are outside the convex
|
||
|
hull of (`x`, `y`).
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
If `interp` is 'nn' (the default), uses natural neighbor
|
||
|
interpolation based on Delaunay triangulation. This option is
|
||
|
only available if the mpl_toolkits.natgrid module is installed.
|
||
|
This can be downloaded from https://github.com/matplotlib/natgrid.
|
||
|
The (`xi`, `yi`) grid must be regular and monotonically increasing
|
||
|
in this case.
|
||
|
|
||
|
If `interp` is 'linear', linear interpolation is used via
|
||
|
matplotlib.tri.LinearTriInterpolator.
|
||
|
|
||
|
Instead of using `griddata`, more flexible functionality and other
|
||
|
interpolation options are available using a
|
||
|
matplotlib.tri.Triangulation and a matplotlib.tri.TriInterpolator.
|
||
|
"""
|
||
|
# Check input arguments.
|
||
|
x = np.asanyarray(x, dtype=np.float64)
|
||
|
y = np.asanyarray(y, dtype=np.float64)
|
||
|
z = np.asanyarray(z, dtype=np.float64)
|
||
|
if x.shape != y.shape or x.shape != z.shape or x.ndim != 1:
|
||
|
raise ValueError("x, y and z must be equal-length 1-D arrays")
|
||
|
|
||
|
xi = np.asanyarray(xi, dtype=np.float64)
|
||
|
yi = np.asanyarray(yi, dtype=np.float64)
|
||
|
if xi.ndim != yi.ndim:
|
||
|
raise ValueError("xi and yi must be arrays with the same number of "
|
||
|
"dimensions (1 or 2)")
|
||
|
if xi.ndim == 2 and xi.shape != yi.shape:
|
||
|
raise ValueError("if xi and yi are 2D arrays, they must have the same "
|
||
|
"shape")
|
||
|
if xi.ndim == 1:
|
||
|
xi, yi = np.meshgrid(xi, yi)
|
||
|
|
||
|
if interp == 'nn':
|
||
|
use_nn_interpolation = True
|
||
|
elif interp == 'linear':
|
||
|
use_nn_interpolation = False
|
||
|
else:
|
||
|
raise ValueError("interp keyword must be one of 'linear' (for linear "
|
||
|
"interpolation) or 'nn' (for natural neighbor "
|
||
|
"interpolation). Default is 'nn'.")
|
||
|
|
||
|
# Remove masked points.
|
||
|
mask = np.ma.getmask(z)
|
||
|
if mask is not np.ma.nomask:
|
||
|
x = x.compress(~mask)
|
||
|
y = y.compress(~mask)
|
||
|
z = z.compressed()
|
||
|
|
||
|
if use_nn_interpolation:
|
||
|
try:
|
||
|
from mpl_toolkits.natgrid import _natgrid
|
||
|
except ImportError:
|
||
|
raise RuntimeError(
|
||
|
"To use interp='nn' (Natural Neighbor interpolation) in "
|
||
|
"griddata, natgrid must be installed. Either install it "
|
||
|
"from http://github.com/matplotlib/natgrid or use "
|
||
|
"interp='linear' instead.")
|
||
|
|
||
|
if xi.ndim == 2:
|
||
|
# natgrid expects 1D xi and yi arrays.
|
||
|
xi = xi[0, :]
|
||
|
yi = yi[:, 0]
|
||
|
|
||
|
# Override default natgrid internal parameters.
|
||
|
_natgrid.seti(b'ext', 0)
|
||
|
_natgrid.setr(b'nul', np.nan)
|
||
|
|
||
|
if np.min(np.diff(xi)) < 0 or np.min(np.diff(yi)) < 0:
|
||
|
raise ValueError("Output grid defined by xi,yi must be monotone "
|
||
|
"increasing")
|
||
|
|
||
|
# Allocate array for output (buffer will be overwritten by natgridd)
|
||
|
zi = np.empty((yi.shape[0], xi.shape[0]), np.float64)
|
||
|
|
||
|
# Natgrid requires each array to be contiguous rather than e.g. a view
|
||
|
# that is a non-contiguous slice of another array. Use numpy.require
|
||
|
# to deal with this, which will copy if necessary.
|
||
|
x = np.require(x, requirements=['C'])
|
||
|
y = np.require(y, requirements=['C'])
|
||
|
z = np.require(z, requirements=['C'])
|
||
|
xi = np.require(xi, requirements=['C'])
|
||
|
yi = np.require(yi, requirements=['C'])
|
||
|
_natgrid.natgridd(x, y, z, xi, yi, zi)
|
||
|
|
||
|
# Mask points on grid outside convex hull of input data.
|
||
|
if np.any(np.isnan(zi)):
|
||
|
zi = np.ma.masked_where(np.isnan(zi), zi)
|
||
|
return zi
|
||
|
else:
|
||
|
# Linear interpolation performed using a matplotlib.tri.Triangulation
|
||
|
# and a matplotlib.tri.LinearTriInterpolator.
|
||
|
from .tri import Triangulation, LinearTriInterpolator
|
||
|
triang = Triangulation(x, y)
|
||
|
interpolator = LinearTriInterpolator(triang, z)
|
||
|
return interpolator(xi, yi)
|
||
|
|
||
|
|
||
|
##################################################
|
||
|
# Linear interpolation algorithms
|
||
|
##################################################
|
||
|
@cbook.deprecated("2.2", alternative="numpy.interp")
|
||
|
def less_simple_linear_interpolation(x, y, xi, extrap=False):
|
||
|
"""
|
||
|
This function provides simple (but somewhat less so than
|
||
|
:func:`cbook.simple_linear_interpolation`) linear interpolation.
|
||
|
:func:`simple_linear_interpolation` will give a list of point
|
||
|
between a start and an end, while this does true linear
|
||
|
interpolation at an arbitrary set of points.
|
||
|
|
||
|
This is very inefficient linear interpolation meant to be used
|
||
|
only for a small number of points in relatively non-intensive use
|
||
|
cases. For real linear interpolation, use scipy.
|
||
|
"""
|
||
|
x = np.asarray(x)
|
||
|
y = np.asarray(y)
|
||
|
xi = np.atleast_1d(xi)
|
||
|
|
||
|
s = list(y.shape)
|
||
|
s[0] = len(xi)
|
||
|
yi = np.tile(np.nan, s)
|
||
|
|
||
|
for ii, xx in enumerate(xi):
|
||
|
bb = x == xx
|
||
|
if np.any(bb):
|
||
|
jj, = np.nonzero(bb)
|
||
|
yi[ii] = y[jj[0]]
|
||
|
elif xx < x[0]:
|
||
|
if extrap:
|
||
|
yi[ii] = y[0]
|
||
|
elif xx > x[-1]:
|
||
|
if extrap:
|
||
|
yi[ii] = y[-1]
|
||
|
else:
|
||
|
jj, = np.nonzero(x < xx)
|
||
|
jj = max(jj)
|
||
|
|
||
|
yi[ii] = y[jj] + (xx-x[jj])/(x[jj+1]-x[jj]) * (y[jj+1]-y[jj])
|
||
|
|
||
|
return yi
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def slopes(x, y):
|
||
|
"""
|
||
|
:func:`slopes` calculates the slope *y*'(*x*)
|
||
|
|
||
|
The slope is estimated using the slope obtained from that of a
|
||
|
parabola through any three consecutive points.
|
||
|
|
||
|
This method should be superior to that described in the appendix
|
||
|
of A CONSISTENTLY WELL BEHAVED METHOD OF INTERPOLATION by Russel
|
||
|
W. Stineman (Creative Computing July 1980) in at least one aspect:
|
||
|
|
||
|
Circles for interpolation demand a known aspect ratio between
|
||
|
*x*- and *y*-values. For many functions, however, the abscissa
|
||
|
are given in different dimensions, so an aspect ratio is
|
||
|
completely arbitrary.
|
||
|
|
||
|
The parabola method gives very similar results to the circle
|
||
|
method for most regular cases but behaves much better in special
|
||
|
cases.
|
||
|
|
||
|
Norbert Nemec, Institute of Theoretical Physics, University or
|
||
|
Regensburg, April 2006 Norbert.Nemec at physik.uni-regensburg.de
|
||
|
|
||
|
(inspired by a original implementation by Halldor Bjornsson,
|
||
|
Icelandic Meteorological Office, March 2006 halldor at vedur.is)
|
||
|
"""
|
||
|
# Cast key variables as float.
|
||
|
x = np.asarray(x, float)
|
||
|
y = np.asarray(y, float)
|
||
|
|
||
|
yp = np.zeros(y.shape, float)
|
||
|
|
||
|
dx = x[1:] - x[:-1]
|
||
|
dy = y[1:] - y[:-1]
|
||
|
dydx = dy/dx
|
||
|
yp[1:-1] = (dydx[:-1] * dx[1:] + dydx[1:] * dx[:-1])/(dx[1:] + dx[:-1])
|
||
|
yp[0] = 2.0 * dy[0]/dx[0] - yp[1]
|
||
|
yp[-1] = 2.0 * dy[-1]/dx[-1] - yp[-2]
|
||
|
return yp
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def stineman_interp(xi, x, y, yp=None):
|
||
|
"""
|
||
|
Given data vectors *x* and *y*, the slope vector *yp* and a new
|
||
|
abscissa vector *xi*, the function :func:`stineman_interp` uses
|
||
|
Stineman interpolation to calculate a vector *yi* corresponding to
|
||
|
*xi*.
|
||
|
|
||
|
Here's an example that generates a coarse sine curve, then
|
||
|
interpolates over a finer abscissa::
|
||
|
|
||
|
x = linspace(0,2*pi,20); y = sin(x); yp = cos(x)
|
||
|
xi = linspace(0,2*pi,40);
|
||
|
yi = stineman_interp(xi,x,y,yp);
|
||
|
plot(x,y,'o',xi,yi)
|
||
|
|
||
|
The interpolation method is described in the article A
|
||
|
CONSISTENTLY WELL BEHAVED METHOD OF INTERPOLATION by Russell
|
||
|
W. Stineman. The article appeared in the July 1980 issue of
|
||
|
Creative Computing with a note from the editor stating that while
|
||
|
they were:
|
||
|
|
||
|
not an academic journal but once in a while something serious
|
||
|
and original comes in adding that this was
|
||
|
"apparently a real solution" to a well known problem.
|
||
|
|
||
|
For *yp* = *None*, the routine automatically determines the slopes
|
||
|
using the :func:`slopes` routine.
|
||
|
|
||
|
*x* is assumed to be sorted in increasing order.
|
||
|
|
||
|
For values ``xi[j] < x[0]`` or ``xi[j] > x[-1]``, the routine
|
||
|
tries an extrapolation. The relevance of the data obtained from
|
||
|
this, of course, is questionable...
|
||
|
|
||
|
Original implementation by Halldor Bjornsson, Icelandic
|
||
|
Meteorolocial Office, March 2006 halldor at vedur.is
|
||
|
|
||
|
Completely reworked and optimized for Python by Norbert Nemec,
|
||
|
Institute of Theoretical Physics, University or Regensburg, April
|
||
|
2006 Norbert.Nemec at physik.uni-regensburg.de
|
||
|
"""
|
||
|
|
||
|
# Cast key variables as float.
|
||
|
x = np.asarray(x, float)
|
||
|
y = np.asarray(y, float)
|
||
|
if x.shape != y.shape:
|
||
|
raise ValueError("'x' and 'y' must be of same shape")
|
||
|
|
||
|
if yp is None:
|
||
|
yp = slopes(x, y)
|
||
|
else:
|
||
|
yp = np.asarray(yp, float)
|
||
|
|
||
|
xi = np.asarray(xi, float)
|
||
|
yi = np.zeros(xi.shape, float)
|
||
|
|
||
|
# calculate linear slopes
|
||
|
dx = x[1:] - x[:-1]
|
||
|
dy = y[1:] - y[:-1]
|
||
|
s = dy/dx # note length of s is N-1 so last element is #N-2
|
||
|
|
||
|
# find the segment each xi is in
|
||
|
# this line actually is the key to the efficiency of this implementation
|
||
|
idx = np.searchsorted(x[1:-1], xi)
|
||
|
|
||
|
# now we have generally: x[idx[j]] <= xi[j] <= x[idx[j]+1]
|
||
|
# except at the boundaries, where it may be that xi[j] < x[0] or
|
||
|
# xi[j] > x[-1]
|
||
|
|
||
|
# the y-values that would come out from a linear interpolation:
|
||
|
sidx = s.take(idx)
|
||
|
xidx = x.take(idx)
|
||
|
yidx = y.take(idx)
|
||
|
xidxp1 = x.take(idx+1)
|
||
|
yo = yidx + sidx * (xi - xidx)
|
||
|
|
||
|
# the difference that comes when using the slopes given in yp
|
||
|
# using the yp slope of the left point
|
||
|
dy1 = (yp.take(idx) - sidx) * (xi - xidx)
|
||
|
# using the yp slope of the right point
|
||
|
dy2 = (yp.take(idx+1)-sidx) * (xi - xidxp1)
|
||
|
|
||
|
dy1dy2 = dy1*dy2
|
||
|
# The following is optimized for Python. The solution actually
|
||
|
# does more calculations than necessary but exploiting the power
|
||
|
# of numpy, this is far more efficient than coding a loop by hand
|
||
|
# in Python
|
||
|
yi = yo + dy1dy2 * np.choose(np.array(np.sign(dy1dy2), np.int32)+1,
|
||
|
((2*xi-xidx-xidxp1)/((dy1-dy2)*(xidxp1-xidx)),
|
||
|
0.0,
|
||
|
1/(dy1+dy2),))
|
||
|
return yi
|
||
|
|
||
|
|
||
|
class GaussianKDE(object):
|
||
|
"""
|
||
|
Representation of a kernel-density estimate using Gaussian kernels.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
dataset : array_like
|
||
|
Datapoints to estimate from. In case of univariate data this is a 1-D
|
||
|
array, otherwise a 2-D array with shape (# of dims, # of data).
|
||
|
|
||
|
bw_method : str, scalar or callable, optional
|
||
|
The method used to calculate the estimator bandwidth. This can be
|
||
|
'scott', 'silverman', a scalar constant or a callable. If a
|
||
|
scalar, this will be used directly as `kde.factor`. If a
|
||
|
callable, it should take a `GaussianKDE` instance as only
|
||
|
parameter and return a scalar. If None (default), 'scott' is used.
|
||
|
|
||
|
Attributes
|
||
|
----------
|
||
|
dataset : ndarray
|
||
|
The dataset with which `gaussian_kde` was initialized.
|
||
|
|
||
|
dim : int
|
||
|
Number of dimensions.
|
||
|
|
||
|
num_dp : int
|
||
|
Number of datapoints.
|
||
|
|
||
|
factor : float
|
||
|
The bandwidth factor, obtained from `kde.covariance_factor`, with which
|
||
|
the covariance matrix is multiplied.
|
||
|
|
||
|
covariance : ndarray
|
||
|
The covariance matrix of `dataset`, scaled by the calculated bandwidth
|
||
|
(`kde.factor`).
|
||
|
|
||
|
inv_cov : ndarray
|
||
|
The inverse of `covariance`.
|
||
|
|
||
|
Methods
|
||
|
-------
|
||
|
kde.evaluate(points) : ndarray
|
||
|
Evaluate the estimated pdf on a provided set of points.
|
||
|
|
||
|
kde(points) : ndarray
|
||
|
Same as kde.evaluate(points)
|
||
|
|
||
|
"""
|
||
|
|
||
|
# This implementation with minor modification was too good to pass up.
|
||
|
# from scipy: https://github.com/scipy/scipy/blob/master/scipy/stats/kde.py
|
||
|
|
||
|
def __init__(self, dataset, bw_method=None):
|
||
|
self.dataset = np.atleast_2d(dataset)
|
||
|
if not np.array(self.dataset).size > 1:
|
||
|
raise ValueError("`dataset` input should have multiple elements.")
|
||
|
|
||
|
self.dim, self.num_dp = np.array(self.dataset).shape
|
||
|
isString = isinstance(bw_method, str)
|
||
|
|
||
|
if bw_method is None:
|
||
|
pass
|
||
|
elif (isString and bw_method == 'scott'):
|
||
|
self.covariance_factor = self.scotts_factor
|
||
|
elif (isString and bw_method == 'silverman'):
|
||
|
self.covariance_factor = self.silverman_factor
|
||
|
elif (np.isscalar(bw_method) and not isString):
|
||
|
self._bw_method = 'use constant'
|
||
|
self.covariance_factor = lambda: bw_method
|
||
|
elif callable(bw_method):
|
||
|
self._bw_method = bw_method
|
||
|
self.covariance_factor = lambda: self._bw_method(self)
|
||
|
else:
|
||
|
raise ValueError("`bw_method` should be 'scott', 'silverman', a "
|
||
|
"scalar or a callable")
|
||
|
|
||
|
# Computes the covariance matrix for each Gaussian kernel using
|
||
|
# covariance_factor().
|
||
|
|
||
|
self.factor = self.covariance_factor()
|
||
|
# Cache covariance and inverse covariance of the data
|
||
|
if not hasattr(self, '_data_inv_cov'):
|
||
|
self.data_covariance = np.atleast_2d(
|
||
|
np.cov(
|
||
|
self.dataset,
|
||
|
rowvar=1,
|
||
|
bias=False))
|
||
|
self.data_inv_cov = np.linalg.inv(self.data_covariance)
|
||
|
|
||
|
self.covariance = self.data_covariance * self.factor ** 2
|
||
|
self.inv_cov = self.data_inv_cov / self.factor ** 2
|
||
|
self.norm_factor = np.sqrt(
|
||
|
np.linalg.det(
|
||
|
2 * np.pi * self.covariance)) * self.num_dp
|
||
|
|
||
|
def scotts_factor(self):
|
||
|
return np.power(self.num_dp, -1. / (self.dim + 4))
|
||
|
|
||
|
def silverman_factor(self):
|
||
|
return np.power(
|
||
|
self.num_dp * (self.dim + 2.0) / 4.0, -1. / (self.dim + 4))
|
||
|
|
||
|
# Default method to calculate bandwidth, can be overwritten by subclass
|
||
|
covariance_factor = scotts_factor
|
||
|
|
||
|
def evaluate(self, points):
|
||
|
"""Evaluate the estimated pdf on a set of points.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
points : (# of dimensions, # of points)-array
|
||
|
Alternatively, a (# of dimensions,) vector can be passed in and
|
||
|
treated as a single point.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
values : (# of points,)-array
|
||
|
The values at each point.
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
ValueError : if the dimensionality of the input points is different
|
||
|
than the dimensionality of the KDE.
|
||
|
|
||
|
"""
|
||
|
points = np.atleast_2d(points)
|
||
|
|
||
|
dim, num_m = np.array(points).shape
|
||
|
if dim != self.dim:
|
||
|
raise ValueError("points have dimension {}, dataset has dimension "
|
||
|
"{}".format(dim, self.dim))
|
||
|
|
||
|
result = np.zeros((num_m,), dtype=float)
|
||
|
|
||
|
if num_m >= self.num_dp:
|
||
|
# there are more points than data, so loop over data
|
||
|
for i in range(self.num_dp):
|
||
|
diff = self.dataset[:, i, np.newaxis] - points
|
||
|
tdiff = np.dot(self.inv_cov, diff)
|
||
|
energy = np.sum(diff * tdiff, axis=0) / 2.0
|
||
|
result = result + np.exp(-energy)
|
||
|
else:
|
||
|
# loop over points
|
||
|
for i in range(num_m):
|
||
|
diff = self.dataset - points[:, i, np.newaxis]
|
||
|
tdiff = np.dot(self.inv_cov, diff)
|
||
|
energy = np.sum(diff * tdiff, axis=0) / 2.0
|
||
|
result[i] = np.sum(np.exp(-energy), axis=0)
|
||
|
|
||
|
result = result / self.norm_factor
|
||
|
|
||
|
return result
|
||
|
|
||
|
__call__ = evaluate
|
||
|
|
||
|
|
||
|
##################################################
|
||
|
# Code related to things in and around polygons
|
||
|
##################################################
|
||
|
@cbook.deprecated("2.2")
|
||
|
def inside_poly(points, verts):
|
||
|
"""
|
||
|
*points* is a sequence of *x*, *y* points.
|
||
|
*verts* is a sequence of *x*, *y* vertices of a polygon.
|
||
|
|
||
|
Return value is a sequence of indices into points for the points
|
||
|
that are inside the polygon.
|
||
|
"""
|
||
|
# Make a closed polygon path
|
||
|
poly = Path(verts)
|
||
|
|
||
|
# Check to see which points are contained within the Path
|
||
|
return [idx for idx, p in enumerate(points) if poly.contains_point(p)]
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def poly_below(xmin, xs, ys):
|
||
|
"""
|
||
|
Given a sequence of *xs* and *ys*, return the vertices of a
|
||
|
polygon that has a horizontal base at *xmin* and an upper bound at
|
||
|
the *ys*. *xmin* is a scalar.
|
||
|
|
||
|
Intended for use with :meth:`matplotlib.axes.Axes.fill`, e.g.,::
|
||
|
|
||
|
xv, yv = poly_below(0, x, y)
|
||
|
ax.fill(xv, yv)
|
||
|
"""
|
||
|
if any(isinstance(var, np.ma.MaskedArray) for var in [xs, ys]):
|
||
|
numpy = np.ma
|
||
|
else:
|
||
|
numpy = np
|
||
|
|
||
|
xs = numpy.asarray(xs)
|
||
|
ys = numpy.asarray(ys)
|
||
|
Nx = len(xs)
|
||
|
Ny = len(ys)
|
||
|
if Nx != Ny:
|
||
|
raise ValueError("'xs' and 'ys' must have the same length")
|
||
|
x = xmin*numpy.ones(2*Nx)
|
||
|
y = numpy.ones(2*Nx)
|
||
|
x[:Nx] = xs
|
||
|
y[:Nx] = ys
|
||
|
y[Nx:] = ys[::-1]
|
||
|
return x, y
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def poly_between(x, ylower, yupper):
|
||
|
"""
|
||
|
Given a sequence of *x*, *ylower* and *yupper*, return the polygon
|
||
|
that fills the regions between them. *ylower* or *yupper* can be
|
||
|
scalar or iterable. If they are iterable, they must be equal in
|
||
|
length to *x*.
|
||
|
|
||
|
Return value is *x*, *y* arrays for use with
|
||
|
:meth:`matplotlib.axes.Axes.fill`.
|
||
|
"""
|
||
|
if any(isinstance(var, np.ma.MaskedArray) for var in [ylower, yupper, x]):
|
||
|
numpy = np.ma
|
||
|
else:
|
||
|
numpy = np
|
||
|
|
||
|
Nx = len(x)
|
||
|
if not cbook.iterable(ylower):
|
||
|
ylower = ylower*numpy.ones(Nx)
|
||
|
|
||
|
if not cbook.iterable(yupper):
|
||
|
yupper = yupper*numpy.ones(Nx)
|
||
|
|
||
|
x = numpy.concatenate((x, x[::-1]))
|
||
|
y = numpy.concatenate((yupper, ylower[::-1]))
|
||
|
return x, y
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def is_closed_polygon(X):
|
||
|
"""
|
||
|
Tests whether first and last object in a sequence are the same. These are
|
||
|
presumably coordinates on a polygonal curve, in which case this function
|
||
|
tests if that curve is closed.
|
||
|
"""
|
||
|
return np.all(X[0] == X[-1])
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2", message='Moved to matplotlib.cbook')
|
||
|
def contiguous_regions(mask):
|
||
|
"""
|
||
|
return a list of (ind0, ind1) such that mask[ind0:ind1].all() is
|
||
|
True and we cover all such regions
|
||
|
"""
|
||
|
return cbook.contiguous_regions(mask)
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def cross_from_below(x, threshold):
|
||
|
"""
|
||
|
return the indices into *x* where *x* crosses some threshold from
|
||
|
below, e.g., the i's where::
|
||
|
|
||
|
x[i-1]<threshold and x[i]>=threshold
|
||
|
|
||
|
Example code::
|
||
|
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
t = np.arange(0.0, 2.0, 0.1)
|
||
|
s = np.sin(2*np.pi*t)
|
||
|
|
||
|
fig, ax = plt.subplots()
|
||
|
ax.plot(t, s, '-o')
|
||
|
ax.axhline(0.5)
|
||
|
ax.axhline(-0.5)
|
||
|
|
||
|
ind = cross_from_below(s, 0.5)
|
||
|
ax.vlines(t[ind], -1, 1)
|
||
|
|
||
|
ind = cross_from_above(s, -0.5)
|
||
|
ax.vlines(t[ind], -1, 1)
|
||
|
|
||
|
plt.show()
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`cross_from_above` and :func:`contiguous_regions`
|
||
|
|
||
|
"""
|
||
|
x = np.asarray(x)
|
||
|
ind = np.nonzero((x[:-1] < threshold) & (x[1:] >= threshold))[0]
|
||
|
if len(ind):
|
||
|
return ind+1
|
||
|
else:
|
||
|
return ind
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def cross_from_above(x, threshold):
|
||
|
"""
|
||
|
return the indices into *x* where *x* crosses some threshold from
|
||
|
below, e.g., the i's where::
|
||
|
|
||
|
x[i-1]>threshold and x[i]<=threshold
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
:func:`cross_from_below` and :func:`contiguous_regions`
|
||
|
|
||
|
"""
|
||
|
x = np.asarray(x)
|
||
|
ind = np.nonzero((x[:-1] >= threshold) & (x[1:] < threshold))[0]
|
||
|
if len(ind):
|
||
|
return ind+1
|
||
|
else:
|
||
|
return ind
|
||
|
|
||
|
|
||
|
##################################################
|
||
|
# Vector and path length geometry calculations
|
||
|
##################################################
|
||
|
@cbook.deprecated('2.2')
|
||
|
def vector_lengths(X, P=2., axis=None):
|
||
|
"""
|
||
|
Finds the length of a set of vectors in *n* dimensions. This is
|
||
|
like the :func:`numpy.norm` function for vectors, but has the ability to
|
||
|
work over a particular axis of the supplied array or matrix.
|
||
|
|
||
|
Computes ``(sum((x_i)^P))^(1/P)`` for each ``{x_i}`` being the
|
||
|
elements of *X* along the given axis. If *axis* is *None*,
|
||
|
compute over all elements of *X*.
|
||
|
"""
|
||
|
X = np.asarray(X)
|
||
|
return (np.sum(X**(P), axis=axis))**(1./P)
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def distances_along_curve(X):
|
||
|
"""
|
||
|
Computes the distance between a set of successive points in *N* dimensions.
|
||
|
|
||
|
Where *X* is an *M* x *N* array or matrix. The distances between
|
||
|
successive rows is computed. Distance is the standard Euclidean
|
||
|
distance.
|
||
|
"""
|
||
|
X = np.diff(X, axis=0)
|
||
|
return vector_lengths(X, axis=1)
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def path_length(X):
|
||
|
"""
|
||
|
Computes the distance travelled along a polygonal curve in *N* dimensions.
|
||
|
|
||
|
Where *X* is an *M* x *N* array or matrix. Returns an array of
|
||
|
length *M* consisting of the distance along the curve at each point
|
||
|
(i.e., the rows of *X*).
|
||
|
"""
|
||
|
X = distances_along_curve(X)
|
||
|
return np.concatenate((np.zeros(1), np.cumsum(X)))
|
||
|
|
||
|
|
||
|
@cbook.deprecated('2.2')
|
||
|
def quad2cubic(q0x, q0y, q1x, q1y, q2x, q2y):
|
||
|
"""
|
||
|
Converts a quadratic Bezier curve to a cubic approximation.
|
||
|
|
||
|
The inputs are the *x* and *y* coordinates of the three control
|
||
|
points of a quadratic curve, and the output is a tuple of *x* and
|
||
|
*y* coordinates of the four control points of the cubic curve.
|
||
|
"""
|
||
|
# TODO: Candidate for deprecation -- no longer used internally
|
||
|
|
||
|
# c0x, c0y = q0x, q0y
|
||
|
c1x, c1y = q0x + 2./3. * (q1x - q0x), q0y + 2./3. * (q1y - q0y)
|
||
|
c2x, c2y = c1x + 1./3. * (q2x - q0x), c1y + 1./3. * (q2y - q0y)
|
||
|
# c3x, c3y = q2x, q2y
|
||
|
return q0x, q0y, c1x, c1y, c2x, c2y, q2x, q2y
|
||
|
|
||
|
|
||
|
@cbook.deprecated("2.2")
|
||
|
def offset_line(y, yerr):
|
||
|
"""
|
||
|
Offsets an array *y* by +/- an error and returns a tuple
|
||
|
(y - err, y + err).
|
||
|
|
||
|
The error term can be:
|
||
|
|
||
|
* A scalar. In this case, the returned tuple is obvious.
|
||
|
* A vector of the same length as *y*. The quantities y +/- err are computed
|
||
|
component-wise.
|
||
|
* A tuple of length 2. In this case, yerr[0] is the error below *y* and
|
||
|
yerr[1] is error above *y*. For example::
|
||
|
|
||
|
import numpy as np
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
x = np.linspace(0, 2*np.pi, num=100, endpoint=True)
|
||
|
y = np.sin(x)
|
||
|
y_minus, y_plus = mlab.offset_line(y, 0.1)
|
||
|
plt.plot(x, y)
|
||
|
plt.fill_between(x, y_minus, y2=y_plus)
|
||
|
plt.show()
|
||
|
|
||
|
"""
|
||
|
if cbook.is_numlike(yerr) or (cbook.iterable(yerr) and
|
||
|
len(yerr) == len(y)):
|
||
|
ymin = y - yerr
|
||
|
ymax = y + yerr
|
||
|
elif len(yerr) == 2:
|
||
|
ymin, ymax = y - yerr[0], y + yerr[1]
|
||
|
else:
|
||
|
raise ValueError("yerr must be scalar, 1xN or 2xN")
|
||
|
return ymin, ymax
|