646 lines
21 KiB
Python
646 lines
21 KiB
Python
|
# being a bit too dynamic
|
||
|
# pylint: disable=E1101
|
||
|
from __future__ import division
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from pandas.util._decorators import deprecate_kwarg
|
||
|
from pandas.core.dtypes.missing import notna
|
||
|
from pandas.compat import range, lrange, lmap, zip
|
||
|
from pandas.io.formats.printing import pprint_thing
|
||
|
|
||
|
|
||
|
from pandas.plotting._style import _get_standard_colors
|
||
|
from pandas.plotting._tools import _subplots, _set_ticks_props
|
||
|
|
||
|
|
||
|
def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False,
|
||
|
diagonal='hist', marker='.', density_kwds=None,
|
||
|
hist_kwds=None, range_padding=0.05, **kwds):
|
||
|
"""
|
||
|
Draw a matrix of scatter plots.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
frame : DataFrame
|
||
|
alpha : float, optional
|
||
|
amount of transparency applied
|
||
|
figsize : (float,float), optional
|
||
|
a tuple (width, height) in inches
|
||
|
ax : Matplotlib axis object, optional
|
||
|
grid : bool, optional
|
||
|
setting this to True will show the grid
|
||
|
diagonal : {'hist', 'kde'}
|
||
|
pick between 'kde' and 'hist' for
|
||
|
either Kernel Density Estimation or Histogram
|
||
|
plot in the diagonal
|
||
|
marker : str, optional
|
||
|
Matplotlib marker type, default '.'
|
||
|
hist_kwds : other plotting keyword arguments
|
||
|
To be passed to hist function
|
||
|
density_kwds : other plotting keyword arguments
|
||
|
To be passed to kernel density estimate plot
|
||
|
range_padding : float, optional
|
||
|
relative extension of axis range in x and y
|
||
|
with respect to (x_max - x_min) or (y_max - y_min),
|
||
|
default 0.05
|
||
|
kwds : other plotting keyword arguments
|
||
|
To be passed to scatter function
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D'])
|
||
|
>>> scatter_matrix(df, alpha=0.2)
|
||
|
"""
|
||
|
|
||
|
df = frame._get_numeric_data()
|
||
|
n = df.columns.size
|
||
|
naxes = n * n
|
||
|
fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax,
|
||
|
squeeze=False)
|
||
|
|
||
|
# no gaps between subplots
|
||
|
fig.subplots_adjust(wspace=0, hspace=0)
|
||
|
|
||
|
mask = notna(df)
|
||
|
|
||
|
marker = _get_marker_compat(marker)
|
||
|
|
||
|
hist_kwds = hist_kwds or {}
|
||
|
density_kwds = density_kwds or {}
|
||
|
|
||
|
# GH 14855
|
||
|
kwds.setdefault('edgecolors', 'none')
|
||
|
|
||
|
boundaries_list = []
|
||
|
for a in df.columns:
|
||
|
values = df[a].values[mask[a].values]
|
||
|
rmin_, rmax_ = np.min(values), np.max(values)
|
||
|
rdelta_ext = (rmax_ - rmin_) * range_padding / 2.
|
||
|
boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext))
|
||
|
|
||
|
for i, a in zip(lrange(n), df.columns):
|
||
|
for j, b in zip(lrange(n), df.columns):
|
||
|
ax = axes[i, j]
|
||
|
|
||
|
if i == j:
|
||
|
values = df[a].values[mask[a].values]
|
||
|
|
||
|
# Deal with the diagonal by drawing a histogram there.
|
||
|
if diagonal == 'hist':
|
||
|
ax.hist(values, **hist_kwds)
|
||
|
|
||
|
elif diagonal in ('kde', 'density'):
|
||
|
from scipy.stats import gaussian_kde
|
||
|
y = values
|
||
|
gkde = gaussian_kde(y)
|
||
|
ind = np.linspace(y.min(), y.max(), 1000)
|
||
|
ax.plot(ind, gkde.evaluate(ind), **density_kwds)
|
||
|
|
||
|
ax.set_xlim(boundaries_list[i])
|
||
|
|
||
|
else:
|
||
|
common = (mask[a] & mask[b]).values
|
||
|
|
||
|
ax.scatter(df[b][common], df[a][common],
|
||
|
marker=marker, alpha=alpha, **kwds)
|
||
|
|
||
|
ax.set_xlim(boundaries_list[j])
|
||
|
ax.set_ylim(boundaries_list[i])
|
||
|
|
||
|
ax.set_xlabel(b)
|
||
|
ax.set_ylabel(a)
|
||
|
|
||
|
if j != 0:
|
||
|
ax.yaxis.set_visible(False)
|
||
|
if i != n - 1:
|
||
|
ax.xaxis.set_visible(False)
|
||
|
|
||
|
if len(df.columns) > 1:
|
||
|
lim1 = boundaries_list[0]
|
||
|
locs = axes[0][1].yaxis.get_majorticklocs()
|
||
|
locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])]
|
||
|
adj = (locs - lim1[0]) / (lim1[1] - lim1[0])
|
||
|
|
||
|
lim0 = axes[0][0].get_ylim()
|
||
|
adj = adj * (lim0[1] - lim0[0]) + lim0[0]
|
||
|
axes[0][0].yaxis.set_ticks(adj)
|
||
|
|
||
|
if np.all(locs == locs.astype(int)):
|
||
|
# if all ticks are int
|
||
|
locs = locs.astype(int)
|
||
|
axes[0][0].yaxis.set_ticklabels(locs)
|
||
|
|
||
|
_set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0)
|
||
|
|
||
|
return axes
|
||
|
|
||
|
|
||
|
def _get_marker_compat(marker):
|
||
|
import matplotlib.lines as mlines
|
||
|
import matplotlib as mpl
|
||
|
if mpl.__version__ < '1.1.0' and marker == '.':
|
||
|
return 'o'
|
||
|
if marker not in mlines.lineMarkers:
|
||
|
return 'o'
|
||
|
return marker
|
||
|
|
||
|
|
||
|
def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
|
||
|
"""
|
||
|
Plot a multidimensional dataset in 2D.
|
||
|
|
||
|
Each Series in the DataFrame is represented as a evenly distributed
|
||
|
slice on a circle. Each data point is rendered in the circle according to
|
||
|
the value on each Series. Highly correlated `Series` in the `DataFrame`
|
||
|
are placed closer on the unit circle.
|
||
|
|
||
|
RadViz allow to project a N-dimensional data set into a 2D space where the
|
||
|
influence of each dimension can be interpreted as a balance between the
|
||
|
influence of all dimensions.
|
||
|
|
||
|
More info available at the `original article
|
||
|
<http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.889>`_
|
||
|
describing RadViz.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
frame : `DataFrame`
|
||
|
Pandas object holding the data.
|
||
|
class_column : str
|
||
|
Column name containing the name of the data point category.
|
||
|
ax : :class:`matplotlib.axes.Axes`, optional
|
||
|
A plot instance to which to add the information.
|
||
|
color : list[str] or tuple[str], optional
|
||
|
Assign a color to each category. Example: ['blue', 'green'].
|
||
|
colormap : str or :class:`matplotlib.colors.Colormap`, default None
|
||
|
Colormap to select colors from. If string, load colormap with that
|
||
|
name from matplotlib.
|
||
|
kwds : optional
|
||
|
Options to pass to matplotlib scatter plotting method.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
axes : :class:`matplotlib.axes.Axes`
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
pandas.plotting.andrews_curves : Plot clustering visualization
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
.. plot::
|
||
|
:context: close-figs
|
||
|
|
||
|
>>> df = pd.DataFrame({
|
||
|
... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6,
|
||
|
... 6.7, 4.6],
|
||
|
... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2,
|
||
|
... 3.3, 3.6],
|
||
|
... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4,
|
||
|
... 5.7, 1.0],
|
||
|
... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2,
|
||
|
... 2.1, 0.2],
|
||
|
... 'Category': ['virginica', 'virginica', 'setosa',
|
||
|
... 'virginica', 'virginica', 'versicolor',
|
||
|
... 'versicolor', 'setosa', 'virginica',
|
||
|
... 'setosa']
|
||
|
... })
|
||
|
>>> rad_viz = pd.plotting.radviz(df, 'Category')
|
||
|
"""
|
||
|
import matplotlib.pyplot as plt
|
||
|
import matplotlib.patches as patches
|
||
|
|
||
|
def normalize(series):
|
||
|
a = min(series)
|
||
|
b = max(series)
|
||
|
return (series - a) / (b - a)
|
||
|
|
||
|
n = len(frame)
|
||
|
classes = frame[class_column].drop_duplicates()
|
||
|
class_col = frame[class_column]
|
||
|
df = frame.drop(class_column, axis=1).apply(normalize)
|
||
|
|
||
|
if ax is None:
|
||
|
ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1])
|
||
|
|
||
|
to_plot = {}
|
||
|
colors = _get_standard_colors(num_colors=len(classes), colormap=colormap,
|
||
|
color_type='random', color=color)
|
||
|
|
||
|
for kls in classes:
|
||
|
to_plot[kls] = [[], []]
|
||
|
|
||
|
m = len(frame.columns) - 1
|
||
|
s = np.array([(np.cos(t), np.sin(t))
|
||
|
for t in [2.0 * np.pi * (i / float(m))
|
||
|
for i in range(m)]])
|
||
|
|
||
|
for i in range(n):
|
||
|
row = df.iloc[i].values
|
||
|
row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
|
||
|
y = (s * row_).sum(axis=0) / row.sum()
|
||
|
kls = class_col.iat[i]
|
||
|
to_plot[kls][0].append(y[0])
|
||
|
to_plot[kls][1].append(y[1])
|
||
|
|
||
|
for i, kls in enumerate(classes):
|
||
|
ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i],
|
||
|
label=pprint_thing(kls), **kwds)
|
||
|
ax.legend()
|
||
|
|
||
|
ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none'))
|
||
|
|
||
|
for xy, name in zip(s, df.columns):
|
||
|
|
||
|
ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray'))
|
||
|
|
||
|
if xy[0] < 0.0 and xy[1] < 0.0:
|
||
|
ax.text(xy[0] - 0.025, xy[1] - 0.025, name,
|
||
|
ha='right', va='top', size='small')
|
||
|
elif xy[0] < 0.0 and xy[1] >= 0.0:
|
||
|
ax.text(xy[0] - 0.025, xy[1] + 0.025, name,
|
||
|
ha='right', va='bottom', size='small')
|
||
|
elif xy[0] >= 0.0 and xy[1] < 0.0:
|
||
|
ax.text(xy[0] + 0.025, xy[1] - 0.025, name,
|
||
|
ha='left', va='top', size='small')
|
||
|
elif xy[0] >= 0.0 and xy[1] >= 0.0:
|
||
|
ax.text(xy[0] + 0.025, xy[1] + 0.025, name,
|
||
|
ha='left', va='bottom', size='small')
|
||
|
|
||
|
ax.axis('equal')
|
||
|
return ax
|
||
|
|
||
|
|
||
|
@deprecate_kwarg(old_arg_name='data', new_arg_name='frame')
|
||
|
def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
|
||
|
colormap=None, **kwds):
|
||
|
"""
|
||
|
Generates a matplotlib plot of Andrews curves, for visualising clusters of
|
||
|
multivariate data.
|
||
|
|
||
|
Andrews curves have the functional form:
|
||
|
|
||
|
f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) +
|
||
|
x_4 sin(2t) + x_5 cos(2t) + ...
|
||
|
|
||
|
Where x coefficients correspond to the values of each dimension and t is
|
||
|
linearly spaced between -pi and +pi. Each row of frame then corresponds to
|
||
|
a single curve.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
frame : DataFrame
|
||
|
Data to be plotted, preferably normalized to (0.0, 1.0)
|
||
|
class_column : Name of the column containing class names
|
||
|
ax : matplotlib axes object, default None
|
||
|
samples : Number of points to plot in each curve
|
||
|
color: list or tuple, optional
|
||
|
Colors to use for the different classes
|
||
|
colormap : str or matplotlib colormap object, default None
|
||
|
Colormap to select colors from. If string, load colormap with that name
|
||
|
from matplotlib.
|
||
|
kwds: keywords
|
||
|
Options to pass to matplotlib plotting method
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
ax: Matplotlib axis object
|
||
|
|
||
|
"""
|
||
|
from math import sqrt, pi
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
def function(amplitudes):
|
||
|
def f(t):
|
||
|
x1 = amplitudes[0]
|
||
|
result = x1 / sqrt(2.0)
|
||
|
|
||
|
# Take the rest of the coefficients and resize them
|
||
|
# appropriately. Take a copy of amplitudes as otherwise numpy
|
||
|
# deletes the element from amplitudes itself.
|
||
|
coeffs = np.delete(np.copy(amplitudes), 0)
|
||
|
coeffs.resize(int((coeffs.size + 1) / 2), 2)
|
||
|
|
||
|
# Generate the harmonics and arguments for the sin and cos
|
||
|
# functions.
|
||
|
harmonics = np.arange(0, coeffs.shape[0]) + 1
|
||
|
trig_args = np.outer(harmonics, t)
|
||
|
|
||
|
result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) +
|
||
|
coeffs[:, 1, np.newaxis] * np.cos(trig_args),
|
||
|
axis=0)
|
||
|
return result
|
||
|
return f
|
||
|
|
||
|
n = len(frame)
|
||
|
class_col = frame[class_column]
|
||
|
classes = frame[class_column].drop_duplicates()
|
||
|
df = frame.drop(class_column, axis=1)
|
||
|
t = np.linspace(-pi, pi, samples)
|
||
|
used_legends = set([])
|
||
|
|
||
|
color_values = _get_standard_colors(num_colors=len(classes),
|
||
|
colormap=colormap, color_type='random',
|
||
|
color=color)
|
||
|
colors = dict(zip(classes, color_values))
|
||
|
if ax is None:
|
||
|
ax = plt.gca(xlim=(-pi, pi))
|
||
|
for i in range(n):
|
||
|
row = df.iloc[i].values
|
||
|
f = function(row)
|
||
|
y = f(t)
|
||
|
kls = class_col.iat[i]
|
||
|
label = pprint_thing(kls)
|
||
|
if label not in used_legends:
|
||
|
used_legends.add(label)
|
||
|
ax.plot(t, y, color=colors[kls], label=label, **kwds)
|
||
|
else:
|
||
|
ax.plot(t, y, color=colors[kls], **kwds)
|
||
|
|
||
|
ax.legend(loc='upper right')
|
||
|
ax.grid()
|
||
|
return ax
|
||
|
|
||
|
|
||
|
def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds):
|
||
|
"""
|
||
|
Bootstrap plot on mean, median and mid-range statistics.
|
||
|
|
||
|
The bootstrap plot is used to estimate the uncertainty of a statistic
|
||
|
by relaying on random sampling with replacement [1]_. This function will
|
||
|
generate bootstrapping plots for mean, median and mid-range statistics
|
||
|
for the given number of samples of the given size.
|
||
|
|
||
|
.. [1] "Bootstrapping (statistics)" in \
|
||
|
https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
series : pandas.Series
|
||
|
Pandas Series from where to get the samplings for the bootstrapping.
|
||
|
fig : matplotlib.figure.Figure, default None
|
||
|
If given, it will use the `fig` reference for plotting instead of
|
||
|
creating a new one with default parameters.
|
||
|
size : int, default 50
|
||
|
Number of data points to consider during each sampling. It must be
|
||
|
greater or equal than the length of the `series`.
|
||
|
samples : int, default 500
|
||
|
Number of times the bootstrap procedure is performed.
|
||
|
**kwds :
|
||
|
Options to pass to matplotlib plotting method.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
fig : matplotlib.figure.Figure
|
||
|
Matplotlib figure
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
pandas.DataFrame.plot : Basic plotting for DataFrame objects.
|
||
|
pandas.Series.plot : Basic plotting for Series objects.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
|
||
|
.. plot::
|
||
|
:context: close-figs
|
||
|
|
||
|
>>> import numpy as np
|
||
|
>>> s = pd.Series(np.random.uniform(size=100))
|
||
|
>>> fig = pd.plotting.bootstrap_plot(s)
|
||
|
"""
|
||
|
import random
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
# random.sample(ndarray, int) fails on python 3.3, sigh
|
||
|
data = list(series.values)
|
||
|
samplings = [random.sample(data, size) for _ in range(samples)]
|
||
|
|
||
|
means = np.array([np.mean(sampling) for sampling in samplings])
|
||
|
medians = np.array([np.median(sampling) for sampling in samplings])
|
||
|
midranges = np.array([(min(sampling) + max(sampling)) * 0.5
|
||
|
for sampling in samplings])
|
||
|
if fig is None:
|
||
|
fig = plt.figure()
|
||
|
x = lrange(samples)
|
||
|
axes = []
|
||
|
ax1 = fig.add_subplot(2, 3, 1)
|
||
|
ax1.set_xlabel("Sample")
|
||
|
axes.append(ax1)
|
||
|
ax1.plot(x, means, **kwds)
|
||
|
ax2 = fig.add_subplot(2, 3, 2)
|
||
|
ax2.set_xlabel("Sample")
|
||
|
axes.append(ax2)
|
||
|
ax2.plot(x, medians, **kwds)
|
||
|
ax3 = fig.add_subplot(2, 3, 3)
|
||
|
ax3.set_xlabel("Sample")
|
||
|
axes.append(ax3)
|
||
|
ax3.plot(x, midranges, **kwds)
|
||
|
ax4 = fig.add_subplot(2, 3, 4)
|
||
|
ax4.set_xlabel("Mean")
|
||
|
axes.append(ax4)
|
||
|
ax4.hist(means, **kwds)
|
||
|
ax5 = fig.add_subplot(2, 3, 5)
|
||
|
ax5.set_xlabel("Median")
|
||
|
axes.append(ax5)
|
||
|
ax5.hist(medians, **kwds)
|
||
|
ax6 = fig.add_subplot(2, 3, 6)
|
||
|
ax6.set_xlabel("Midrange")
|
||
|
axes.append(ax6)
|
||
|
ax6.hist(midranges, **kwds)
|
||
|
for axis in axes:
|
||
|
plt.setp(axis.get_xticklabels(), fontsize=8)
|
||
|
plt.setp(axis.get_yticklabels(), fontsize=8)
|
||
|
return fig
|
||
|
|
||
|
|
||
|
@deprecate_kwarg(old_arg_name='colors', new_arg_name='color')
|
||
|
@deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3)
|
||
|
def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None,
|
||
|
use_columns=False, xticks=None, colormap=None,
|
||
|
axvlines=True, axvlines_kwds=None, sort_labels=False,
|
||
|
**kwds):
|
||
|
"""Parallel coordinates plotting.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
frame: DataFrame
|
||
|
class_column: str
|
||
|
Column name containing class names
|
||
|
cols: list, optional
|
||
|
A list of column names to use
|
||
|
ax: matplotlib.axis, optional
|
||
|
matplotlib axis object
|
||
|
color: list or tuple, optional
|
||
|
Colors to use for the different classes
|
||
|
use_columns: bool, optional
|
||
|
If true, columns will be used as xticks
|
||
|
xticks: list or tuple, optional
|
||
|
A list of values to use for xticks
|
||
|
colormap: str or matplotlib colormap, default None
|
||
|
Colormap to use for line colors.
|
||
|
axvlines: bool, optional
|
||
|
If true, vertical lines will be added at each xtick
|
||
|
axvlines_kwds: keywords, optional
|
||
|
Options to be passed to axvline method for vertical lines
|
||
|
sort_labels: bool, False
|
||
|
Sort class_column labels, useful when assigning colors
|
||
|
|
||
|
.. versionadded:: 0.20.0
|
||
|
|
||
|
kwds: keywords
|
||
|
Options to pass to matplotlib plotting method
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
ax: matplotlib axis object
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> from pandas import read_csv
|
||
|
>>> from pandas.tools.plotting import parallel_coordinates
|
||
|
>>> from matplotlib import pyplot as plt
|
||
|
>>> df = read_csv('https://raw.github.com/pandas-dev/pandas/master'
|
||
|
'/pandas/tests/data/iris.csv')
|
||
|
>>> parallel_coordinates(df, 'Name', color=('#556270',
|
||
|
'#4ECDC4', '#C7F464'))
|
||
|
>>> plt.show()
|
||
|
"""
|
||
|
if axvlines_kwds is None:
|
||
|
axvlines_kwds = {'linewidth': 1, 'color': 'black'}
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
n = len(frame)
|
||
|
classes = frame[class_column].drop_duplicates()
|
||
|
class_col = frame[class_column]
|
||
|
|
||
|
if cols is None:
|
||
|
df = frame.drop(class_column, axis=1)
|
||
|
else:
|
||
|
df = frame[cols]
|
||
|
|
||
|
used_legends = set([])
|
||
|
|
||
|
ncols = len(df.columns)
|
||
|
|
||
|
# determine values to use for xticks
|
||
|
if use_columns is True:
|
||
|
if not np.all(np.isreal(list(df.columns))):
|
||
|
raise ValueError('Columns must be numeric to be used as xticks')
|
||
|
x = df.columns
|
||
|
elif xticks is not None:
|
||
|
if not np.all(np.isreal(xticks)):
|
||
|
raise ValueError('xticks specified must be numeric')
|
||
|
elif len(xticks) != ncols:
|
||
|
raise ValueError('Length of xticks must match number of columns')
|
||
|
x = xticks
|
||
|
else:
|
||
|
x = lrange(ncols)
|
||
|
|
||
|
if ax is None:
|
||
|
ax = plt.gca()
|
||
|
|
||
|
color_values = _get_standard_colors(num_colors=len(classes),
|
||
|
colormap=colormap, color_type='random',
|
||
|
color=color)
|
||
|
|
||
|
if sort_labels:
|
||
|
classes = sorted(classes)
|
||
|
color_values = sorted(color_values)
|
||
|
colors = dict(zip(classes, color_values))
|
||
|
|
||
|
for i in range(n):
|
||
|
y = df.iloc[i].values
|
||
|
kls = class_col.iat[i]
|
||
|
label = pprint_thing(kls)
|
||
|
if label not in used_legends:
|
||
|
used_legends.add(label)
|
||
|
ax.plot(x, y, color=colors[kls], label=label, **kwds)
|
||
|
else:
|
||
|
ax.plot(x, y, color=colors[kls], **kwds)
|
||
|
|
||
|
if axvlines:
|
||
|
for i in x:
|
||
|
ax.axvline(i, **axvlines_kwds)
|
||
|
|
||
|
ax.set_xticks(x)
|
||
|
ax.set_xticklabels(df.columns)
|
||
|
ax.set_xlim(x[0], x[-1])
|
||
|
ax.legend(loc='upper right')
|
||
|
ax.grid()
|
||
|
return ax
|
||
|
|
||
|
|
||
|
def lag_plot(series, lag=1, ax=None, **kwds):
|
||
|
"""Lag plot for time series.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
series: Time series
|
||
|
lag: lag of the scatter plot, default 1
|
||
|
ax: Matplotlib axis object, optional
|
||
|
kwds: Matplotlib scatter method keyword arguments, optional
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
ax: Matplotlib axis object
|
||
|
"""
|
||
|
import matplotlib.pyplot as plt
|
||
|
|
||
|
# workaround because `c='b'` is hardcoded in matplotlibs scatter method
|
||
|
kwds.setdefault('c', plt.rcParams['patch.facecolor'])
|
||
|
|
||
|
data = series.values
|
||
|
y1 = data[:-lag]
|
||
|
y2 = data[lag:]
|
||
|
if ax is None:
|
||
|
ax = plt.gca()
|
||
|
ax.set_xlabel("y(t)")
|
||
|
ax.set_ylabel("y(t + {lag})".format(lag=lag))
|
||
|
ax.scatter(y1, y2, **kwds)
|
||
|
return ax
|
||
|
|
||
|
|
||
|
def autocorrelation_plot(series, ax=None, **kwds):
|
||
|
"""Autocorrelation plot for time series.
|
||
|
|
||
|
Parameters:
|
||
|
-----------
|
||
|
series: Time series
|
||
|
ax: Matplotlib axis object, optional
|
||
|
kwds : keywords
|
||
|
Options to pass to matplotlib plotting method
|
||
|
|
||
|
Returns:
|
||
|
-----------
|
||
|
ax: Matplotlib axis object
|
||
|
"""
|
||
|
import matplotlib.pyplot as plt
|
||
|
n = len(series)
|
||
|
data = np.asarray(series)
|
||
|
if ax is None:
|
||
|
ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0))
|
||
|
mean = np.mean(data)
|
||
|
c0 = np.sum((data - mean) ** 2) / float(n)
|
||
|
|
||
|
def r(h):
|
||
|
return ((data[:n - h] - mean) *
|
||
|
(data[h:] - mean)).sum() / float(n) / c0
|
||
|
x = np.arange(n) + 1
|
||
|
y = lmap(r, x)
|
||
|
z95 = 1.959963984540054
|
||
|
z99 = 2.5758293035489004
|
||
|
ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey')
|
||
|
ax.axhline(y=z95 / np.sqrt(n), color='grey')
|
||
|
ax.axhline(y=0.0, color='black')
|
||
|
ax.axhline(y=-z95 / np.sqrt(n), color='grey')
|
||
|
ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey')
|
||
|
ax.set_xlabel("Lag")
|
||
|
ax.set_ylabel("Autocorrelation")
|
||
|
ax.plot(x, y, **kwds)
|
||
|
if 'label' in kwds:
|
||
|
ax.legend()
|
||
|
ax.grid()
|
||
|
return ax
|