test_boxplot_method.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. """ Test cases for .boxplot method """
  2. import itertools
  3. import string
  4. import numpy as np
  5. import pytest
  6. import pandas.util._test_decorators as td
  7. from pandas import (
  8. DataFrame,
  9. MultiIndex,
  10. Series,
  11. date_range,
  12. plotting,
  13. timedelta_range,
  14. )
  15. import pandas._testing as tm
  16. from pandas.tests.plotting.common import (
  17. TestPlotBase,
  18. _check_plot_works,
  19. )
  20. from pandas.io.formats.printing import pprint_thing
  21. @td.skip_if_no_mpl
  22. class TestDataFramePlots(TestPlotBase):
  23. def test_stacked_boxplot_set_axis(self):
  24. # GH2980
  25. import matplotlib.pyplot as plt
  26. n = 80
  27. df = DataFrame(
  28. {
  29. "Clinical": np.random.choice([0, 1, 2, 3], n),
  30. "Confirmed": np.random.choice([0, 1, 2, 3], n),
  31. "Discarded": np.random.choice([0, 1, 2, 3], n),
  32. },
  33. index=np.arange(0, n),
  34. )
  35. ax = df.plot(kind="bar", stacked=True)
  36. assert [int(x.get_text()) for x in ax.get_xticklabels()] == df.index.to_list()
  37. ax.set_xticks(np.arange(0, 80, 10))
  38. plt.draw() # Update changes
  39. assert [int(x.get_text()) for x in ax.get_xticklabels()] == list(
  40. np.arange(0, 80, 10)
  41. )
  42. @pytest.mark.slow
  43. def test_boxplot_legacy1(self):
  44. df = DataFrame(
  45. np.random.randn(6, 4),
  46. index=list(string.ascii_letters[:6]),
  47. columns=["one", "two", "three", "four"],
  48. )
  49. df["indic"] = ["foo", "bar"] * 3
  50. df["indic2"] = ["foo", "bar", "foo"] * 2
  51. _check_plot_works(df.boxplot, return_type="dict")
  52. _check_plot_works(df.boxplot, column=["one", "two"], return_type="dict")
  53. # _check_plot_works adds an ax so catch warning. see GH #13188
  54. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  55. _check_plot_works(df.boxplot, column=["one", "two"], by="indic")
  56. _check_plot_works(df.boxplot, column="one", by=["indic", "indic2"])
  57. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  58. _check_plot_works(df.boxplot, by="indic")
  59. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  60. _check_plot_works(df.boxplot, by=["indic", "indic2"])
  61. _check_plot_works(plotting._core.boxplot, data=df["one"], return_type="dict")
  62. _check_plot_works(df.boxplot, notch=1, return_type="dict")
  63. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  64. _check_plot_works(df.boxplot, by="indic", notch=1)
  65. def test_boxplot_legacy2(self):
  66. df = DataFrame(np.random.rand(10, 2), columns=["Col1", "Col2"])
  67. df["X"] = Series(["A", "A", "A", "A", "A", "B", "B", "B", "B", "B"])
  68. df["Y"] = Series(["A"] * 10)
  69. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  70. _check_plot_works(df.boxplot, by="X")
  71. # When ax is supplied and required number of axes is 1,
  72. # passed ax should be used:
  73. fig, ax = self.plt.subplots()
  74. axes = df.boxplot("Col1", by="X", ax=ax)
  75. ax_axes = ax.axes
  76. assert ax_axes is axes
  77. fig, ax = self.plt.subplots()
  78. axes = df.groupby("Y").boxplot(ax=ax, return_type="axes")
  79. ax_axes = ax.axes
  80. assert ax_axes is axes["A"]
  81. # Multiple columns with an ax argument should use same figure
  82. fig, ax = self.plt.subplots()
  83. with tm.assert_produces_warning(UserWarning):
  84. axes = df.boxplot(
  85. column=["Col1", "Col2"], by="X", ax=ax, return_type="axes"
  86. )
  87. assert axes["Col1"].get_figure() is fig
  88. # When by is None, check that all relevant lines are present in the
  89. # dict
  90. fig, ax = self.plt.subplots()
  91. d = df.boxplot(ax=ax, return_type="dict")
  92. lines = list(itertools.chain.from_iterable(d.values()))
  93. assert len(ax.get_lines()) == len(lines)
  94. def test_boxplot_return_type_none(self, hist_df):
  95. # GH 12216; return_type=None & by=None -> axes
  96. result = hist_df.boxplot()
  97. assert isinstance(result, self.plt.Axes)
  98. def test_boxplot_return_type_legacy(self):
  99. # API change in https://github.com/pandas-dev/pandas/pull/7096
  100. df = DataFrame(
  101. np.random.randn(6, 4),
  102. index=list(string.ascii_letters[:6]),
  103. columns=["one", "two", "three", "four"],
  104. )
  105. msg = "return_type must be {'axes', 'dict', 'both'}"
  106. with pytest.raises(ValueError, match=msg):
  107. df.boxplot(return_type="NOT_A_TYPE")
  108. result = df.boxplot()
  109. self._check_box_return_type(result, "axes")
  110. with tm.assert_produces_warning(False):
  111. result = df.boxplot(return_type="dict")
  112. self._check_box_return_type(result, "dict")
  113. with tm.assert_produces_warning(False):
  114. result = df.boxplot(return_type="axes")
  115. self._check_box_return_type(result, "axes")
  116. with tm.assert_produces_warning(False):
  117. result = df.boxplot(return_type="both")
  118. self._check_box_return_type(result, "both")
  119. def test_boxplot_axis_limits(self, hist_df):
  120. def _check_ax_limits(col, ax):
  121. y_min, y_max = ax.get_ylim()
  122. assert y_min <= col.min()
  123. assert y_max >= col.max()
  124. df = hist_df.copy()
  125. df["age"] = np.random.randint(1, 20, df.shape[0])
  126. # One full row
  127. height_ax, weight_ax = df.boxplot(["height", "weight"], by="category")
  128. _check_ax_limits(df["height"], height_ax)
  129. _check_ax_limits(df["weight"], weight_ax)
  130. assert weight_ax._sharey == height_ax
  131. # Two rows, one partial
  132. p = df.boxplot(["height", "weight", "age"], by="category")
  133. height_ax, weight_ax, age_ax = p[0, 0], p[0, 1], p[1, 0]
  134. dummy_ax = p[1, 1]
  135. _check_ax_limits(df["height"], height_ax)
  136. _check_ax_limits(df["weight"], weight_ax)
  137. _check_ax_limits(df["age"], age_ax)
  138. assert weight_ax._sharey == height_ax
  139. assert age_ax._sharey == height_ax
  140. assert dummy_ax._sharey is None
  141. def test_boxplot_empty_column(self):
  142. df = DataFrame(np.random.randn(20, 4))
  143. df.loc[:, 0] = np.nan
  144. _check_plot_works(df.boxplot, return_type="axes")
  145. def test_figsize(self):
  146. df = DataFrame(np.random.rand(10, 5), columns=["A", "B", "C", "D", "E"])
  147. result = df.boxplot(return_type="axes", figsize=(12, 8))
  148. assert result.figure.bbox_inches.width == 12
  149. assert result.figure.bbox_inches.height == 8
  150. def test_fontsize(self):
  151. df = DataFrame({"a": [1, 2, 3, 4, 5, 6]})
  152. self._check_ticks_props(
  153. df.boxplot("a", fontsize=16), xlabelsize=16, ylabelsize=16
  154. )
  155. def test_boxplot_numeric_data(self):
  156. # GH 22799
  157. df = DataFrame(
  158. {
  159. "a": date_range("2012-01-01", periods=100),
  160. "b": np.random.randn(100),
  161. "c": np.random.randn(100) + 2,
  162. "d": date_range("2012-01-01", periods=100).astype(str),
  163. "e": date_range("2012-01-01", periods=100, tz="UTC"),
  164. "f": timedelta_range("1 days", periods=100),
  165. }
  166. )
  167. ax = df.plot(kind="box")
  168. assert [x.get_text() for x in ax.get_xticklabels()] == ["b", "c"]
  169. @pytest.mark.parametrize(
  170. "colors_kwd, expected",
  171. [
  172. (
  173. {"boxes": "r", "whiskers": "b", "medians": "g", "caps": "c"},
  174. {"boxes": "r", "whiskers": "b", "medians": "g", "caps": "c"},
  175. ),
  176. ({"boxes": "r"}, {"boxes": "r"}),
  177. ("r", {"boxes": "r", "whiskers": "r", "medians": "r", "caps": "r"}),
  178. ],
  179. )
  180. def test_color_kwd(self, colors_kwd, expected):
  181. # GH: 26214
  182. df = DataFrame(np.random.rand(10, 2))
  183. result = df.boxplot(color=colors_kwd, return_type="dict")
  184. for k, v in expected.items():
  185. assert result[k][0].get_color() == v
  186. @pytest.mark.parametrize(
  187. "scheme,expected",
  188. [
  189. (
  190. "dark_background",
  191. {
  192. "boxes": "#8dd3c7",
  193. "whiskers": "#8dd3c7",
  194. "medians": "#bfbbd9",
  195. "caps": "#8dd3c7",
  196. },
  197. ),
  198. (
  199. "default",
  200. {
  201. "boxes": "#1f77b4",
  202. "whiskers": "#1f77b4",
  203. "medians": "#2ca02c",
  204. "caps": "#1f77b4",
  205. },
  206. ),
  207. ],
  208. )
  209. def test_colors_in_theme(self, scheme, expected):
  210. # GH: 40769
  211. df = DataFrame(np.random.rand(10, 2))
  212. import matplotlib.pyplot as plt
  213. plt.style.use(scheme)
  214. result = df.plot.box(return_type="dict")
  215. for k, v in expected.items():
  216. assert result[k][0].get_color() == v
  217. @pytest.mark.parametrize(
  218. "dict_colors, msg",
  219. [({"boxes": "r", "invalid_key": "r"}, "invalid key 'invalid_key'")],
  220. )
  221. def test_color_kwd_errors(self, dict_colors, msg):
  222. # GH: 26214
  223. df = DataFrame(np.random.rand(10, 2))
  224. with pytest.raises(ValueError, match=msg):
  225. df.boxplot(color=dict_colors, return_type="dict")
  226. @pytest.mark.parametrize(
  227. "props, expected",
  228. [
  229. ("boxprops", "boxes"),
  230. ("whiskerprops", "whiskers"),
  231. ("capprops", "caps"),
  232. ("medianprops", "medians"),
  233. ],
  234. )
  235. def test_specified_props_kwd(self, props, expected):
  236. # GH 30346
  237. df = DataFrame({k: np.random.random(100) for k in "ABC"})
  238. kwd = {props: {"color": "C1"}}
  239. result = df.boxplot(return_type="dict", **kwd)
  240. assert result[expected][0].get_color() == "C1"
  241. @pytest.mark.parametrize("vert", [True, False])
  242. def test_plot_xlabel_ylabel(self, vert):
  243. df = DataFrame(
  244. {
  245. "a": np.random.randn(100),
  246. "b": np.random.randn(100),
  247. "group": np.random.choice(["group1", "group2"], 100),
  248. }
  249. )
  250. xlabel, ylabel = "x", "y"
  251. ax = df.plot(kind="box", vert=vert, xlabel=xlabel, ylabel=ylabel)
  252. assert ax.get_xlabel() == xlabel
  253. assert ax.get_ylabel() == ylabel
  254. @pytest.mark.parametrize("vert", [True, False])
  255. def test_boxplot_xlabel_ylabel(self, vert):
  256. df = DataFrame(
  257. {
  258. "a": np.random.randn(100),
  259. "b": np.random.randn(100),
  260. "group": np.random.choice(["group1", "group2"], 100),
  261. }
  262. )
  263. xlabel, ylabel = "x", "y"
  264. ax = df.boxplot(vert=vert, xlabel=xlabel, ylabel=ylabel)
  265. assert ax.get_xlabel() == xlabel
  266. assert ax.get_ylabel() == ylabel
  267. @pytest.mark.parametrize("vert", [True, False])
  268. def test_boxplot_group_xlabel_ylabel(self, vert):
  269. df = DataFrame(
  270. {
  271. "a": np.random.randn(100),
  272. "b": np.random.randn(100),
  273. "group": np.random.choice(["group1", "group2"], 100),
  274. }
  275. )
  276. xlabel, ylabel = "x", "y"
  277. ax = df.boxplot(by="group", vert=vert, xlabel=xlabel, ylabel=ylabel)
  278. for subplot in ax:
  279. assert subplot.get_xlabel() == xlabel
  280. assert subplot.get_ylabel() == ylabel
  281. self.plt.close()
  282. ax = df.boxplot(by="group", vert=vert)
  283. for subplot in ax:
  284. target_label = subplot.get_xlabel() if vert else subplot.get_ylabel()
  285. assert target_label == pprint_thing(["group"])
  286. self.plt.close()
  287. @td.skip_if_no_mpl
  288. class TestDataFrameGroupByPlots(TestPlotBase):
  289. def test_boxplot_legacy1(self, hist_df):
  290. grouped = hist_df.groupby(by="gender")
  291. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  292. axes = _check_plot_works(grouped.boxplot, return_type="axes")
  293. self._check_axes_shape(list(axes.values), axes_num=2, layout=(1, 2))
  294. axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
  295. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  296. @pytest.mark.slow
  297. def test_boxplot_legacy2(self):
  298. tuples = zip(string.ascii_letters[:10], range(10))
  299. df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples))
  300. grouped = df.groupby(level=1)
  301. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  302. axes = _check_plot_works(grouped.boxplot, return_type="axes")
  303. self._check_axes_shape(list(axes.values), axes_num=10, layout=(4, 3))
  304. axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
  305. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  306. def test_boxplot_legacy3(self):
  307. tuples = zip(string.ascii_letters[:10], range(10))
  308. df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples))
  309. grouped = df.unstack(level=1).groupby(level=0, axis=1)
  310. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  311. axes = _check_plot_works(grouped.boxplot, return_type="axes")
  312. self._check_axes_shape(list(axes.values), axes_num=3, layout=(2, 2))
  313. axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes")
  314. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  315. def test_grouped_plot_fignums(self):
  316. n = 10
  317. weight = Series(np.random.normal(166, 20, size=n))
  318. height = Series(np.random.normal(60, 10, size=n))
  319. gender = np.random.RandomState(42).choice(["male", "female"], size=n)
  320. df = DataFrame({"height": height, "weight": weight, "gender": gender})
  321. gb = df.groupby("gender")
  322. res = gb.plot()
  323. assert len(self.plt.get_fignums()) == 2
  324. assert len(res) == 2
  325. tm.close()
  326. res = gb.boxplot(return_type="axes")
  327. assert len(self.plt.get_fignums()) == 1
  328. assert len(res) == 2
  329. tm.close()
  330. # now works with GH 5610 as gender is excluded
  331. res = df.groupby("gender").hist()
  332. tm.close()
  333. @pytest.mark.slow
  334. def test_grouped_box_return_type(self, hist_df):
  335. df = hist_df
  336. # old style: return_type=None
  337. result = df.boxplot(by="gender")
  338. assert isinstance(result, np.ndarray)
  339. self._check_box_return_type(
  340. result, None, expected_keys=["height", "weight", "category"]
  341. )
  342. # now for groupby
  343. result = df.groupby("gender").boxplot(return_type="dict")
  344. self._check_box_return_type(result, "dict", expected_keys=["Male", "Female"])
  345. columns2 = "X B C D A G Y N Q O".split()
  346. df2 = DataFrame(np.random.randn(50, 10), columns=columns2)
  347. categories2 = "A B C D E F G H I J".split()
  348. df2["category"] = categories2 * 5
  349. for t in ["dict", "axes", "both"]:
  350. returned = df.groupby("classroom").boxplot(return_type=t)
  351. self._check_box_return_type(returned, t, expected_keys=["A", "B", "C"])
  352. returned = df.boxplot(by="classroom", return_type=t)
  353. self._check_box_return_type(
  354. returned, t, expected_keys=["height", "weight", "category"]
  355. )
  356. returned = df2.groupby("category").boxplot(return_type=t)
  357. self._check_box_return_type(returned, t, expected_keys=categories2)
  358. returned = df2.boxplot(by="category", return_type=t)
  359. self._check_box_return_type(returned, t, expected_keys=columns2)
  360. @pytest.mark.slow
  361. def test_grouped_box_layout(self, hist_df):
  362. df = hist_df
  363. msg = "Layout of 1x1 must be larger than required size 2"
  364. with pytest.raises(ValueError, match=msg):
  365. df.boxplot(column=["weight", "height"], by=df.gender, layout=(1, 1))
  366. msg = "The 'layout' keyword is not supported when 'by' is None"
  367. with pytest.raises(ValueError, match=msg):
  368. df.boxplot(
  369. column=["height", "weight", "category"],
  370. layout=(2, 1),
  371. return_type="dict",
  372. )
  373. msg = "At least one dimension of layout must be positive"
  374. with pytest.raises(ValueError, match=msg):
  375. df.boxplot(column=["weight", "height"], by=df.gender, layout=(-1, -1))
  376. # _check_plot_works adds an ax so catch warning. see GH #13188
  377. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  378. box = _check_plot_works(
  379. df.groupby("gender").boxplot, column="height", return_type="dict"
  380. )
  381. self._check_axes_shape(self.plt.gcf().axes, axes_num=2, layout=(1, 2))
  382. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  383. box = _check_plot_works(
  384. df.groupby("category").boxplot, column="height", return_type="dict"
  385. )
  386. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2))
  387. # GH 6769
  388. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  389. box = _check_plot_works(
  390. df.groupby("classroom").boxplot, column="height", return_type="dict"
  391. )
  392. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
  393. # GH 5897
  394. axes = df.boxplot(
  395. column=["height", "weight", "category"], by="gender", return_type="axes"
  396. )
  397. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
  398. for ax in [axes["height"]]:
  399. self._check_visible(ax.get_xticklabels(), visible=False)
  400. self._check_visible([ax.xaxis.get_label()], visible=False)
  401. for ax in [axes["weight"], axes["category"]]:
  402. self._check_visible(ax.get_xticklabels())
  403. self._check_visible([ax.xaxis.get_label()])
  404. box = df.groupby("classroom").boxplot(
  405. column=["height", "weight", "category"], return_type="dict"
  406. )
  407. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(2, 2))
  408. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  409. box = _check_plot_works(
  410. df.groupby("category").boxplot,
  411. column="height",
  412. layout=(3, 2),
  413. return_type="dict",
  414. )
  415. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2))
  416. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  417. box = _check_plot_works(
  418. df.groupby("category").boxplot,
  419. column="height",
  420. layout=(3, -1),
  421. return_type="dict",
  422. )
  423. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(3, 2))
  424. box = df.boxplot(
  425. column=["height", "weight", "category"], by="gender", layout=(4, 1)
  426. )
  427. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(4, 1))
  428. box = df.boxplot(
  429. column=["height", "weight", "category"], by="gender", layout=(-1, 1)
  430. )
  431. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(3, 1))
  432. box = df.groupby("classroom").boxplot(
  433. column=["height", "weight", "category"], layout=(1, 4), return_type="dict"
  434. )
  435. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 4))
  436. box = df.groupby("classroom").boxplot( # noqa
  437. column=["height", "weight", "category"], layout=(1, -1), return_type="dict"
  438. )
  439. self._check_axes_shape(self.plt.gcf().axes, axes_num=3, layout=(1, 3))
  440. @pytest.mark.slow
  441. def test_grouped_box_multiple_axes(self, hist_df):
  442. # GH 6970, GH 7069
  443. df = hist_df
  444. # check warning to ignore sharex / sharey
  445. # this check should be done in the first function which
  446. # passes multiple axes to plot, hist or boxplot
  447. # location should be changed if other test is added
  448. # which has earlier alphabetical order
  449. with tm.assert_produces_warning(UserWarning):
  450. fig, axes = self.plt.subplots(2, 2)
  451. df.groupby("category").boxplot(column="height", return_type="axes", ax=axes)
  452. self._check_axes_shape(self.plt.gcf().axes, axes_num=4, layout=(2, 2))
  453. fig, axes = self.plt.subplots(2, 3)
  454. with tm.assert_produces_warning(UserWarning):
  455. returned = df.boxplot(
  456. column=["height", "weight", "category"],
  457. by="gender",
  458. return_type="axes",
  459. ax=axes[0],
  460. )
  461. returned = np.array(list(returned.values))
  462. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  463. tm.assert_numpy_array_equal(returned, axes[0])
  464. assert returned[0].figure is fig
  465. # draw on second row
  466. with tm.assert_produces_warning(UserWarning):
  467. returned = df.groupby("classroom").boxplot(
  468. column=["height", "weight", "category"], return_type="axes", ax=axes[1]
  469. )
  470. returned = np.array(list(returned.values))
  471. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  472. tm.assert_numpy_array_equal(returned, axes[1])
  473. assert returned[0].figure is fig
  474. msg = "The number of passed axes must be 3, the same as the output plot"
  475. with pytest.raises(ValueError, match=msg):
  476. fig, axes = self.plt.subplots(2, 3)
  477. # pass different number of axes from required
  478. with tm.assert_produces_warning(UserWarning):
  479. axes = df.groupby("classroom").boxplot(ax=axes)
  480. def test_fontsize(self):
  481. df = DataFrame({"a": [1, 2, 3, 4, 5, 6], "b": [0, 0, 0, 1, 1, 1]})
  482. self._check_ticks_props(
  483. df.boxplot("a", by="b", fontsize=16), xlabelsize=16, ylabelsize=16
  484. )
  485. @pytest.mark.parametrize(
  486. "col, expected_xticklabel",
  487. [
  488. ("v", ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]),
  489. (["v"], ["(a, v)", "(b, v)", "(c, v)", "(d, v)", "(e, v)"]),
  490. ("v1", ["(a, v1)", "(b, v1)", "(c, v1)", "(d, v1)", "(e, v1)"]),
  491. (
  492. ["v", "v1"],
  493. [
  494. "(a, v)",
  495. "(a, v1)",
  496. "(b, v)",
  497. "(b, v1)",
  498. "(c, v)",
  499. "(c, v1)",
  500. "(d, v)",
  501. "(d, v1)",
  502. "(e, v)",
  503. "(e, v1)",
  504. ],
  505. ),
  506. (
  507. None,
  508. [
  509. "(a, v)",
  510. "(a, v1)",
  511. "(b, v)",
  512. "(b, v1)",
  513. "(c, v)",
  514. "(c, v1)",
  515. "(d, v)",
  516. "(d, v1)",
  517. "(e, v)",
  518. "(e, v1)",
  519. ],
  520. ),
  521. ],
  522. )
  523. def test_groupby_boxplot_subplots_false(self, col, expected_xticklabel):
  524. # GH 16748
  525. df = DataFrame(
  526. {
  527. "cat": np.random.choice(list("abcde"), 100),
  528. "v": np.random.rand(100),
  529. "v1": np.random.rand(100),
  530. }
  531. )
  532. grouped = df.groupby("cat")
  533. axes = _check_plot_works(
  534. grouped.boxplot, subplots=False, column=col, return_type="axes"
  535. )
  536. result_xticklabel = [x.get_text() for x in axes.get_xticklabels()]
  537. assert expected_xticklabel == result_xticklabel
  538. def test_groupby_boxplot_object(self, hist_df):
  539. # GH 43480
  540. df = hist_df.astype("object")
  541. grouped = df.groupby("gender")
  542. msg = "boxplot method requires numerical columns, nothing to plot"
  543. with pytest.raises(ValueError, match=msg):
  544. _check_plot_works(grouped.boxplot, subplots=False)
  545. def test_boxplot_multiindex_column(self):
  546. # GH 16748
  547. arrays = [
  548. ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
  549. ["one", "two", "one", "two", "one", "two", "one", "two"],
  550. ]
  551. tuples = list(zip(*arrays))
  552. index = MultiIndex.from_tuples(tuples, names=["first", "second"])
  553. df = DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)
  554. col = [("bar", "one"), ("bar", "two")]
  555. axes = _check_plot_works(df.boxplot, column=col, return_type="axes")
  556. expected_xticklabel = ["(bar, one)", "(bar, two)"]
  557. result_xticklabel = [x.get_text() for x in axes.get_xticklabels()]
  558. assert expected_xticklabel == result_xticklabel