123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 |
- import re
- import numpy as np
- import pytest
- import pandas.util._test_decorators as td
- from pandas import DataFrame
- import pandas._testing as tm
- from pandas.tests.plotting.common import (
- TestPlotBase,
- _check_plot_works,
- )
- @pytest.fixture
- def hist_df():
- np.random.seed(0)
- df = DataFrame(np.random.randn(30, 2), columns=["A", "B"])
- df["C"] = np.random.choice(["a", "b", "c"], 30)
- df["D"] = np.random.choice(["a", "b", "c"], 30)
- return df
- @td.skip_if_no_mpl
- class TestHistWithBy(TestPlotBase):
- @pytest.mark.slow
- @pytest.mark.parametrize(
- "by, column, titles, legends",
- [
- ("C", "A", ["a", "b", "c"], [["A"]] * 3),
- ("C", ["A", "B"], ["a", "b", "c"], [["A", "B"]] * 3),
- ("C", None, ["a", "b", "c"], [["A", "B"]] * 3),
- (
- ["C", "D"],
- "A",
- [
- "(a, a)",
- "(a, b)",
- "(a, c)",
- "(b, a)",
- "(b, b)",
- "(b, c)",
- "(c, a)",
- "(c, b)",
- "(c, c)",
- ],
- [["A"]] * 9,
- ),
- (
- ["C", "D"],
- ["A", "B"],
- [
- "(a, a)",
- "(a, b)",
- "(a, c)",
- "(b, a)",
- "(b, b)",
- "(b, c)",
- "(c, a)",
- "(c, b)",
- "(c, c)",
- ],
- [["A", "B"]] * 9,
- ),
- (
- ["C", "D"],
- None,
- [
- "(a, a)",
- "(a, b)",
- "(a, c)",
- "(b, a)",
- "(b, b)",
- "(b, c)",
- "(c, a)",
- "(c, b)",
- "(c, c)",
- ],
- [["A", "B"]] * 9,
- ),
- ],
- )
- def test_hist_plot_by_argument(self, by, column, titles, legends, hist_df):
- # GH 15079
- axes = _check_plot_works(
- hist_df.plot.hist, column=column, by=by, default_axes=True
- )
- result_titles = [ax.get_title() for ax in axes]
- result_legends = [
- [legend.get_text() for legend in ax.get_legend().texts] for ax in axes
- ]
- assert result_legends == legends
- assert result_titles == titles
- @pytest.mark.parametrize(
- "by, column, titles, legends",
- [
- (0, "A", ["a", "b", "c"], [["A"]] * 3),
- (0, None, ["a", "b", "c"], [["A", "B"]] * 3),
- (
- [0, "D"],
- "A",
- [
- "(a, a)",
- "(a, b)",
- "(a, c)",
- "(b, a)",
- "(b, b)",
- "(b, c)",
- "(c, a)",
- "(c, b)",
- "(c, c)",
- ],
- [["A"]] * 9,
- ),
- ],
- )
- def test_hist_plot_by_0(self, by, column, titles, legends, hist_df):
- # GH 15079
- df = hist_df.copy()
- df = df.rename(columns={"C": 0})
- axes = _check_plot_works(df.plot.hist, default_axes=True, column=column, by=by)
- result_titles = [ax.get_title() for ax in axes]
- result_legends = [
- [legend.get_text() for legend in ax.get_legend().texts] for ax in axes
- ]
- assert result_legends == legends
- assert result_titles == titles
- @pytest.mark.parametrize(
- "by, column",
- [
- ([], ["A"]),
- ([], ["A", "B"]),
- ((), None),
- ((), ["A", "B"]),
- ],
- )
- def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df):
- # GH 15079
- msg = "No group keys passed"
- with pytest.raises(ValueError, match=msg):
- _check_plot_works(
- hist_df.plot.hist, default_axes=True, column=column, by=by
- )
- @pytest.mark.slow
- @pytest.mark.parametrize(
- "by, column, layout, axes_num",
- [
- (["C"], "A", (2, 2), 3),
- ("C", "A", (2, 2), 3),
- (["C"], ["A"], (1, 3), 3),
- ("C", None, (3, 1), 3),
- ("C", ["A", "B"], (3, 1), 3),
- (["C", "D"], "A", (9, 1), 9),
- (["C", "D"], "A", (3, 3), 9),
- (["C", "D"], ["A"], (5, 2), 9),
- (["C", "D"], ["A", "B"], (9, 1), 9),
- (["C", "D"], None, (9, 1), 9),
- (["C", "D"], ["A", "B"], (5, 2), 9),
- ],
- )
- def test_hist_plot_layout_with_by(self, by, column, layout, axes_num, hist_df):
- # GH 15079
- # _check_plot_works adds an ax so catch warning. see GH #13188
- with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
- axes = _check_plot_works(
- hist_df.plot.hist, column=column, by=by, layout=layout
- )
- self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
- @pytest.mark.parametrize(
- "msg, by, layout",
- [
- ("larger than required size", ["C", "D"], (1, 1)),
- (re.escape("Layout must be a tuple of (rows, columns)"), "C", (1,)),
- ("At least one dimension of layout must be positive", "C", (-1, -1)),
- ],
- )
- def test_hist_plot_invalid_layout_with_by_raises(self, msg, by, layout, hist_df):
- # GH 15079, test if error is raised when invalid layout is given
- with pytest.raises(ValueError, match=msg):
- hist_df.plot.hist(column=["A", "B"], by=by, layout=layout)
- @pytest.mark.slow
- def test_axis_share_x_with_by(self, hist_df):
- # GH 15079
- ax1, ax2, ax3 = hist_df.plot.hist(column="A", by="C", sharex=True)
- # share x
- assert self.get_x_axis(ax1).joined(ax1, ax2)
- assert self.get_x_axis(ax2).joined(ax1, ax2)
- assert self.get_x_axis(ax3).joined(ax1, ax3)
- assert self.get_x_axis(ax3).joined(ax2, ax3)
- # don't share y
- assert not self.get_y_axis(ax1).joined(ax1, ax2)
- assert not self.get_y_axis(ax2).joined(ax1, ax2)
- assert not self.get_y_axis(ax3).joined(ax1, ax3)
- assert not self.get_y_axis(ax3).joined(ax2, ax3)
- @pytest.mark.slow
- def test_axis_share_y_with_by(self, hist_df):
- # GH 15079
- ax1, ax2, ax3 = hist_df.plot.hist(column="A", by="C", sharey=True)
- # share y
- assert self.get_y_axis(ax1).joined(ax1, ax2)
- assert self.get_y_axis(ax2).joined(ax1, ax2)
- assert self.get_y_axis(ax3).joined(ax1, ax3)
- assert self.get_y_axis(ax3).joined(ax2, ax3)
- # don't share x
- assert not self.get_x_axis(ax1).joined(ax1, ax2)
- assert not self.get_x_axis(ax2).joined(ax1, ax2)
- assert not self.get_x_axis(ax3).joined(ax1, ax3)
- assert not self.get_x_axis(ax3).joined(ax2, ax3)
- @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
- def test_figure_shape_hist_with_by(self, figsize, hist_df):
- # GH 15079
- axes = hist_df.plot.hist(column="A", by="C", figsize=figsize)
- self._check_axes_shape(axes, axes_num=3, figsize=figsize)
- @td.skip_if_no_mpl
- class TestBoxWithBy(TestPlotBase):
- @pytest.mark.parametrize(
- "by, column, titles, xticklabels",
- [
- ("C", "A", ["A"], [["a", "b", "c"]]),
- (
- ["C", "D"],
- "A",
- ["A"],
- [
- [
- "(a, a)",
- "(a, b)",
- "(a, c)",
- "(b, a)",
- "(b, b)",
- "(b, c)",
- "(c, a)",
- "(c, b)",
- "(c, c)",
- ]
- ],
- ),
- ("C", ["A", "B"], ["A", "B"], [["a", "b", "c"]] * 2),
- (
- ["C", "D"],
- ["A", "B"],
- ["A", "B"],
- [
- [
- "(a, a)",
- "(a, b)",
- "(a, c)",
- "(b, a)",
- "(b, b)",
- "(b, c)",
- "(c, a)",
- "(c, b)",
- "(c, c)",
- ]
- ]
- * 2,
- ),
- (["C"], None, ["A", "B"], [["a", "b", "c"]] * 2),
- ],
- )
- def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df):
- # GH 15079
- axes = _check_plot_works(
- hist_df.plot.box, default_axes=True, column=column, by=by
- )
- result_titles = [ax.get_title() for ax in axes]
- result_xticklabels = [
- [label.get_text() for label in ax.get_xticklabels()] for ax in axes
- ]
- assert result_xticklabels == xticklabels
- assert result_titles == titles
- @pytest.mark.parametrize(
- "by, column, titles, xticklabels",
- [
- (0, "A", ["A"], [["a", "b", "c"]]),
- (
- [0, "D"],
- "A",
- ["A"],
- [
- [
- "(a, a)",
- "(a, b)",
- "(a, c)",
- "(b, a)",
- "(b, b)",
- "(b, c)",
- "(c, a)",
- "(c, b)",
- "(c, c)",
- ]
- ],
- ),
- (0, None, ["A", "B"], [["a", "b", "c"]] * 2),
- ],
- )
- def test_box_plot_by_0(self, by, column, titles, xticklabels, hist_df):
- # GH 15079
- df = hist_df.copy()
- df = df.rename(columns={"C": 0})
- axes = _check_plot_works(df.plot.box, default_axes=True, column=column, by=by)
- result_titles = [ax.get_title() for ax in axes]
- result_xticklabels = [
- [label.get_text() for label in ax.get_xticklabels()] for ax in axes
- ]
- assert result_xticklabels == xticklabels
- assert result_titles == titles
- @pytest.mark.parametrize(
- "by, column",
- [
- ([], ["A"]),
- ((), "A"),
- ([], None),
- ((), ["A", "B"]),
- ],
- )
- def test_box_plot_with_none_empty_list_by(self, by, column, hist_df):
- # GH 15079
- msg = "No group keys passed"
- with pytest.raises(ValueError, match=msg):
- _check_plot_works(hist_df.plot.box, default_axes=True, column=column, by=by)
- @pytest.mark.slow
- @pytest.mark.parametrize(
- "by, column, layout, axes_num",
- [
- (["C"], "A", (1, 1), 1),
- ("C", "A", (1, 1), 1),
- ("C", None, (2, 1), 2),
- ("C", ["A", "B"], (1, 2), 2),
- (["C", "D"], "A", (1, 1), 1),
- (["C", "D"], None, (1, 2), 2),
- ],
- )
- def test_box_plot_layout_with_by(self, by, column, layout, axes_num, hist_df):
- # GH 15079
- axes = _check_plot_works(
- hist_df.plot.box, default_axes=True, column=column, by=by, layout=layout
- )
- self._check_axes_shape(axes, axes_num=axes_num, layout=layout)
- @pytest.mark.parametrize(
- "msg, by, layout",
- [
- ("larger than required size", ["C", "D"], (1, 1)),
- (re.escape("Layout must be a tuple of (rows, columns)"), "C", (1,)),
- ("At least one dimension of layout must be positive", "C", (-1, -1)),
- ],
- )
- def test_box_plot_invalid_layout_with_by_raises(self, msg, by, layout, hist_df):
- # GH 15079, test if error is raised when invalid layout is given
- with pytest.raises(ValueError, match=msg):
- hist_df.plot.box(column=["A", "B"], by=by, layout=layout)
- @pytest.mark.parametrize("figsize", [(12, 8), (20, 10)])
- def test_figure_shape_hist_with_by(self, figsize, hist_df):
- # GH 15079
- axes = hist_df.plot.box(column="A", by="C", figsize=figsize)
- self._check_axes_shape(axes, axes_num=1, figsize=figsize)
|