# coding: utf-8 import pytest import itertools import string from distutils.version import LooseVersion from pandas import Series, DataFrame, MultiIndex from pandas.compat import range, lzip import pandas.util.testing as tm import pandas.util._test_decorators as td import numpy as np from numpy import random import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) """ Test cases for .boxplot method """ def _skip_if_mpl_14_or_dev_boxplot(): # GH 8382 # Boxplot failures on 1.4 and 1.4.1 # Don't need try / except since that's done at class level import matplotlib if LooseVersion(matplotlib.__version__) >= LooseVersion('1.4'): pytest.skip("Matplotlib Regression in 1.4 and current dev.") @td.skip_if_no_mpl class TestDataFramePlots(TestPlotBase): @pytest.mark.slow def test_boxplot_legacy1(self): df = DataFrame(np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=['one', 'two', 'three', 'four']) df['indic'] = ['foo', 'bar'] * 3 df['indic2'] = ['foo', 'bar', 'foo'] * 2 _check_plot_works(df.boxplot, return_type='dict') _check_plot_works(df.boxplot, column=[ 'one', 'two'], return_type='dict') # _check_plot_works adds an ax so catch warning. see GH #13188 with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, column=['one', 'two'], by='indic') _check_plot_works(df.boxplot, column='one', by=['indic', 'indic2']) with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by='indic') with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by=['indic', 'indic2']) _check_plot_works(plotting._core.boxplot, data=df['one'], return_type='dict') _check_plot_works(df.boxplot, notch=1, return_type='dict') with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by='indic', notch=1) @pytest.mark.slow def test_boxplot_legacy2(self): df = DataFrame(np.random.rand(10, 2), columns=['Col1', 'Col2']) df['X'] = Series(['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B']) df['Y'] = Series(['A'] * 10) with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by='X') # When ax is supplied and required number of axes is 1, # passed ax should be used: fig, ax = self.plt.subplots() axes = df.boxplot('Col1', by='X', ax=ax) ax_axes = ax.axes if self.mpl_ge_1_5_0 else ax.get_axes() assert ax_axes is axes fig, ax = self.plt.subplots() axes = df.groupby('Y').boxplot(ax=ax, return_type='axes') ax_axes = ax.axes if self.mpl_ge_1_5_0 else ax.get_axes() assert ax_axes is axes['A'] # Multiple columns with an ax argument should use same figure fig, ax = self.plt.subplots() with tm.assert_produces_warning(UserWarning): axes = df.boxplot(column=['Col1', 'Col2'], by='X', ax=ax, return_type='axes') assert axes['Col1'].get_figure() is fig # When by is None, check that all relevant lines are present in the # dict fig, ax = self.plt.subplots() d = df.boxplot(ax=ax, return_type='dict') lines = list(itertools.chain.from_iterable(d.values())) assert len(ax.get_lines()) == len(lines) @pytest.mark.slow def test_boxplot_return_type_none(self): # GH 12216; return_type=None & by=None -> axes result = self.hist_df.boxplot() assert isinstance(result, self.plt.Axes) @pytest.mark.slow def test_boxplot_return_type_legacy(self): # API change in https://github.com/pandas-dev/pandas/pull/7096 import matplotlib as mpl # noqa df = DataFrame(np.random.randn(6, 4), index=list(string.ascii_letters[:6]), columns=['one', 'two', 'three', 'four']) with pytest.raises(ValueError): df.boxplot(return_type='NOTATYPE') result = df.boxplot() self._check_box_return_type(result, 'axes') with tm.assert_produces_warning(False): result = df.boxplot(return_type='dict') self._check_box_return_type(result, 'dict') with tm.assert_produces_warning(False): result = df.boxplot(return_type='axes') self._check_box_return_type(result, 'axes') with tm.assert_produces_warning(False): result = df.boxplot(return_type='both') self._check_box_return_type(result, 'both') @pytest.mark.slow def test_boxplot_axis_limits(self): def _check_ax_limits(col, ax): y_min, y_max = ax.get_ylim() assert y_min <= col.min() assert y_max >= col.max() df = self.hist_df.copy() df['age'] = np.random.randint(1, 20, df.shape[0]) # One full row height_ax, weight_ax = df.boxplot(['height', 'weight'], by='category') _check_ax_limits(df['height'], height_ax) _check_ax_limits(df['weight'], weight_ax) assert weight_ax._sharey == height_ax # Two rows, one partial p = df.boxplot(['height', 'weight', 'age'], by='category') height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0] dummy_ax = p[1, 1] _check_ax_limits(df['height'], height_ax) _check_ax_limits(df['weight'], weight_ax) _check_ax_limits(df['age'], age_ax) assert weight_ax._sharey == height_ax assert age_ax._sharey == height_ax assert dummy_ax._sharey is None @pytest.mark.slow def test_boxplot_empty_column(self): _skip_if_mpl_14_or_dev_boxplot() df = DataFrame(np.random.randn(20, 4)) df.loc[:, 0] = np.nan _check_plot_works(df.boxplot, return_type='axes') @pytest.mark.slow def test_figsize(self): df = DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E']) result = df.boxplot(return_type='axes', figsize=(12, 8)) assert result.figure.bbox_inches.width == 12 assert result.figure.bbox_inches.height == 8 def test_fontsize(self): df = DataFrame({"a": [1, 2, 3, 4, 5, 6]}) self._check_ticks_props(df.boxplot("a", fontsize=16), xlabelsize=16, ylabelsize=16) @td.skip_if_no_mpl class TestDataFrameGroupByPlots(TestPlotBase): @pytest.mark.slow def test_boxplot_legacy1(self): grouped = self.hist_df.groupby(by='gender') with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') self._check_axes_shape(list(axes.values), axes_num=2, layout=(1, 2)) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) @pytest.mark.slow def test_boxplot_legacy2(self): tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) grouped = df.groupby(level=1) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') self._check_axes_shape(list(axes.values), axes_num=10, layout=(4, 3)) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) @pytest.mark.slow def test_boxplot_legacy3(self): tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) grouped = df.unstack(level=1).groupby(level=0, axis=1) with tm.assert_produces_warning(UserWarning): axes = _check_plot_works(grouped.boxplot, return_type='axes') self._check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2)) axes = _check_plot_works(grouped.boxplot, subplots=False, return_type='axes') self._check_axes_shape(axes, axes_num=1, layout=(1, 1)) @pytest.mark.slow def test_grouped_plot_fignums(self): n = 10 weight = Series(np.random.normal(166, 20, size=n)) height = Series(np.random.normal(60, 10, size=n)) with tm.RNGContext(42): gender = np.random.choice(['male', 'female'], size=n) df = DataFrame({'height': height, 'weight': weight, 'gender': gender}) gb = df.groupby('gender') res = gb.plot() assert len(self.plt.get_fignums()) == 2 assert len(res) == 2 tm.close() res = gb.boxplot(return_type='axes') assert len(self.plt.get_fignums()) == 1 assert len(res) == 2 tm.close() # now works with GH 5610 as gender is excluded res = df.groupby('gender').hist() tm.close() @pytest.mark.slow def test_grouped_box_return_type(self): df = self.hist_df # old style: return_type=None result = df.boxplot(by='gender') assert isinstance(result, np.ndarray) self._check_box_return_type( result, None, expected_keys=['height', 'weight', 'category']) # now for groupby result = df.groupby('gender').boxplot(return_type='dict') self._check_box_return_type( result, 'dict', expected_keys=['Male', 'Female']) columns2 = 'X B C D A G Y N Q O'.split() df2 = DataFrame(random.randn(50, 10), columns=columns2) categories2 = 'A B C D E F G H I J'.split() df2['category'] = categories2 * 5 for t in ['dict', 'axes', 'both']: returned = df.groupby('classroom').boxplot(return_type=t) self._check_box_return_type( returned, t, expected_keys=['A', 'B', 'C']) returned = df.boxplot(by='classroom', return_type=t) self._check_box_return_type( returned, t, expected_keys=['height', 'weight', 'category']) returned = df2.groupby('category').boxplot(return_type=t) self._check_box_return_type(returned, t, expected_keys=categories2) returned = df2.boxplot(by='category', return_type=t) self._check_box_return_type(returned, t, expected_keys=columns2) @pytest.mark.slow def test_grouped_box_layout(self): df = self.hist_df pytest.raises(ValueError, df.boxplot, column=['weight', 'height'], by=df.gender, layout=(1, 1)) pytest.raises(ValueError, df.boxplot, column=['height', 'weight', 'category'], layout=(2, 1), return_type='dict') pytest.raises(ValueError, df.boxplot, column=['weight', 'height'], by=df.gender, layout=(-1, -1)) # _check_plot_works adds an ax so catch warning. see GH #13188 with tm.assert_produces_warning(UserWarning): box = _check_plot_works(df.groupby('gender').boxplot, column='height', return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=2, layout=(1, 2)) with tm.assert_produces_warning(UserWarning): box = _check_plot_works(df.groupby('category').boxplot, column='height', return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) # GH 6769 with tm.assert_produces_warning(UserWarning): box = _check_plot_works(df.groupby('classroom').boxplot, column='height', return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) # GH 5897 axes = df.boxplot(column=['height', 'weight', 'category'], by='gender', return_type='axes') self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) for ax in [axes['height']]: self._check_visible(ax.get_xticklabels(), visible=False) self._check_visible([ax.xaxis.get_label()], visible=False) for ax in [axes['weight'], axes['category']]: self._check_visible(ax.get_xticklabels()) self._check_visible([ax.xaxis.get_label()]) box = df.groupby('classroom').boxplot( column=['height', 'weight', 'category'], return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2)) with tm.assert_produces_warning(UserWarning): box = _check_plot_works(df.groupby('category').boxplot, column='height', layout=(3, 2), return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) with tm.assert_produces_warning(UserWarning): box = _check_plot_works(df.groupby('category').boxplot, column='height', layout=(3, -1), return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2)) box = df.boxplot(column=['height', 'weight', 'category'], by='gender', layout=(4, 1)) self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(4, 1)) box = df.boxplot(column=['height', 'weight', 'category'], by='gender', layout=(-1, 1)) self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(3, 1)) box = df.groupby('classroom').boxplot( column=['height', 'weight', 'category'], layout=(1, 4), return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 4)) box = df.groupby('classroom').boxplot( # noqa column=['height', 'weight', 'category'], layout=(1, -1), return_type='dict') self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3)) @pytest.mark.slow def test_grouped_box_multiple_axes(self): # GH 6970, GH 7069 df = self.hist_df # check warning to ignore sharex / sharey # this check should be done in the first function which # passes multiple axes to plot, hist or boxplot # location should be changed if other test is added # which has earlier alphabetical order with tm.assert_produces_warning(UserWarning): fig, axes = self.plt.subplots(2, 2) df.groupby('category').boxplot( column='height', return_type='axes', ax=axes) self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2)) fig, axes = self.plt.subplots(2, 3) with tm.assert_produces_warning(UserWarning): returned = df.boxplot(column=['height', 'weight', 'category'], by='gender', return_type='axes', ax=axes[0]) returned = np.array(list(returned.values)) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) tm.assert_numpy_array_equal(returned, axes[0]) assert returned[0].figure is fig # draw on second row with tm.assert_produces_warning(UserWarning): returned = df.groupby('classroom').boxplot( column=['height', 'weight', 'category'], return_type='axes', ax=axes[1]) returned = np.array(list(returned.values)) self._check_axes_shape(returned, axes_num=3, layout=(1, 3)) tm.assert_numpy_array_equal(returned, axes[1]) assert returned[0].figure is fig with pytest.raises(ValueError): fig, axes = self.plt.subplots(2, 3) # pass different number of axes from required with tm.assert_produces_warning(UserWarning): axes = df.groupby('classroom').boxplot(ax=axes) def test_fontsize(self): df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]}) self._check_ticks_props(df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16)