test_hist_method.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800
  1. """ Test cases for .hist method """
  2. import re
  3. import numpy as np
  4. import pytest
  5. import pandas.util._test_decorators as td
  6. from pandas import (
  7. DataFrame,
  8. Index,
  9. Series,
  10. to_datetime,
  11. )
  12. import pandas._testing as tm
  13. from pandas.tests.plotting.common import (
  14. TestPlotBase,
  15. _check_plot_works,
  16. )
  17. @pytest.fixture
  18. def ts():
  19. return tm.makeTimeSeries(name="ts")
  20. @td.skip_if_no_mpl
  21. class TestSeriesPlots(TestPlotBase):
  22. def test_hist_legacy(self, ts):
  23. _check_plot_works(ts.hist)
  24. _check_plot_works(ts.hist, grid=False)
  25. _check_plot_works(ts.hist, figsize=(8, 10))
  26. # _check_plot_works adds an ax so catch warning. see GH #13188
  27. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  28. _check_plot_works(ts.hist, by=ts.index.month)
  29. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  30. _check_plot_works(ts.hist, by=ts.index.month, bins=5)
  31. fig, ax = self.plt.subplots(1, 1)
  32. _check_plot_works(ts.hist, ax=ax, default_axes=True)
  33. _check_plot_works(ts.hist, ax=ax, figure=fig, default_axes=True)
  34. _check_plot_works(ts.hist, figure=fig, default_axes=True)
  35. tm.close()
  36. fig, (ax1, ax2) = self.plt.subplots(1, 2)
  37. _check_plot_works(ts.hist, figure=fig, ax=ax1, default_axes=True)
  38. _check_plot_works(ts.hist, figure=fig, ax=ax2, default_axes=True)
  39. msg = (
  40. "Cannot pass 'figure' when using the 'by' argument, since a new 'Figure' "
  41. "instance will be created"
  42. )
  43. with pytest.raises(ValueError, match=msg):
  44. ts.hist(by=ts.index, figure=fig)
  45. def test_hist_bins_legacy(self):
  46. df = DataFrame(np.random.randn(10, 2))
  47. ax = df.hist(bins=2)[0][0]
  48. assert len(ax.patches) == 2
  49. def test_hist_layout(self, hist_df):
  50. df = hist_df
  51. msg = "The 'layout' keyword is not supported when 'by' is None"
  52. with pytest.raises(ValueError, match=msg):
  53. df.height.hist(layout=(1, 1))
  54. with pytest.raises(ValueError, match=msg):
  55. df.height.hist(layout=[1, 1])
  56. @pytest.mark.slow
  57. def test_hist_layout_with_by(self, hist_df):
  58. df = hist_df
  59. # _check_plot_works adds an `ax` kwarg to the method call
  60. # so we get a warning about an axis being cleared, even
  61. # though we don't explicing pass one, see GH #13188
  62. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  63. axes = _check_plot_works(df.height.hist, by=df.gender, layout=(2, 1))
  64. self._check_axes_shape(axes, axes_num=2, layout=(2, 1))
  65. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  66. axes = _check_plot_works(df.height.hist, by=df.gender, layout=(3, -1))
  67. self._check_axes_shape(axes, axes_num=2, layout=(3, 1))
  68. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  69. axes = _check_plot_works(df.height.hist, by=df.category, layout=(4, 1))
  70. self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
  71. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  72. axes = _check_plot_works(df.height.hist, by=df.category, layout=(2, -1))
  73. self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
  74. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  75. axes = _check_plot_works(df.height.hist, by=df.category, layout=(3, -1))
  76. self._check_axes_shape(axes, axes_num=4, layout=(3, 2))
  77. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  78. axes = _check_plot_works(df.height.hist, by=df.category, layout=(-1, 4))
  79. self._check_axes_shape(axes, axes_num=4, layout=(1, 4))
  80. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  81. axes = _check_plot_works(df.height.hist, by=df.classroom, layout=(2, 2))
  82. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  83. axes = df.height.hist(by=df.category, layout=(4, 2), figsize=(12, 7))
  84. self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 7))
  85. def test_hist_no_overlap(self):
  86. from matplotlib.pyplot import (
  87. gcf,
  88. subplot,
  89. )
  90. x = Series(np.random.randn(2))
  91. y = Series(np.random.randn(2))
  92. subplot(121)
  93. x.hist()
  94. subplot(122)
  95. y.hist()
  96. fig = gcf()
  97. axes = fig.axes
  98. assert len(axes) == 2
  99. def test_hist_by_no_extra_plots(self, hist_df):
  100. df = hist_df
  101. axes = df.height.hist(by=df.gender) # noqa
  102. assert len(self.plt.get_fignums()) == 1
  103. def test_plot_fails_when_ax_differs_from_figure(self, ts):
  104. from pylab import figure
  105. fig1 = figure()
  106. fig2 = figure()
  107. ax1 = fig1.add_subplot(111)
  108. msg = "passed axis not bound to passed figure"
  109. with pytest.raises(AssertionError, match=msg):
  110. ts.hist(ax=ax1, figure=fig2)
  111. @pytest.mark.parametrize(
  112. "histtype, expected",
  113. [
  114. ("bar", True),
  115. ("barstacked", True),
  116. ("step", False),
  117. ("stepfilled", True),
  118. ],
  119. )
  120. def test_histtype_argument(self, histtype, expected):
  121. # GH23992 Verify functioning of histtype argument
  122. ser = Series(np.random.randint(1, 10))
  123. ax = ser.hist(histtype=histtype)
  124. self._check_patches_all_filled(ax, filled=expected)
  125. @pytest.mark.parametrize(
  126. "by, expected_axes_num, expected_layout", [(None, 1, (1, 1)), ("b", 2, (1, 2))]
  127. )
  128. def test_hist_with_legend(self, by, expected_axes_num, expected_layout):
  129. # GH 6279 - Series histogram can have a legend
  130. index = 15 * ["1"] + 15 * ["2"]
  131. s = Series(np.random.randn(30), index=index, name="a")
  132. s.index.name = "b"
  133. # Use default_axes=True when plotting method generate subplots itself
  134. axes = _check_plot_works(s.hist, default_axes=True, legend=True, by=by)
  135. self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
  136. self._check_legend_labels(axes, "a")
  137. @pytest.mark.parametrize("by", [None, "b"])
  138. def test_hist_with_legend_raises(self, by):
  139. # GH 6279 - Series histogram with legend and label raises
  140. index = 15 * ["1"] + 15 * ["2"]
  141. s = Series(np.random.randn(30), index=index, name="a")
  142. s.index.name = "b"
  143. with pytest.raises(ValueError, match="Cannot use both legend and label"):
  144. s.hist(legend=True, by=by, label="c")
  145. def test_hist_kwargs(self, ts):
  146. _, ax = self.plt.subplots()
  147. ax = ts.plot.hist(bins=5, ax=ax)
  148. assert len(ax.patches) == 5
  149. self._check_text_labels(ax.yaxis.get_label(), "Frequency")
  150. tm.close()
  151. _, ax = self.plt.subplots()
  152. ax = ts.plot.hist(orientation="horizontal", ax=ax)
  153. self._check_text_labels(ax.xaxis.get_label(), "Frequency")
  154. tm.close()
  155. _, ax = self.plt.subplots()
  156. ax = ts.plot.hist(align="left", stacked=True, ax=ax)
  157. tm.close()
  158. @pytest.mark.xfail(reason="Api changed in 3.6.0")
  159. @td.skip_if_no_scipy
  160. def test_hist_kde(self, ts):
  161. _, ax = self.plt.subplots()
  162. ax = ts.plot.hist(logy=True, ax=ax)
  163. self._check_ax_scales(ax, yaxis="log")
  164. xlabels = ax.get_xticklabels()
  165. # ticks are values, thus ticklabels are blank
  166. self._check_text_labels(xlabels, [""] * len(xlabels))
  167. ylabels = ax.get_yticklabels()
  168. self._check_text_labels(ylabels, [""] * len(ylabels))
  169. _check_plot_works(ts.plot.kde)
  170. _check_plot_works(ts.plot.density)
  171. _, ax = self.plt.subplots()
  172. ax = ts.plot.kde(logy=True, ax=ax)
  173. self._check_ax_scales(ax, yaxis="log")
  174. xlabels = ax.get_xticklabels()
  175. self._check_text_labels(xlabels, [""] * len(xlabels))
  176. ylabels = ax.get_yticklabels()
  177. self._check_text_labels(ylabels, [""] * len(ylabels))
  178. @td.skip_if_no_scipy
  179. def test_hist_kde_color(self, ts):
  180. _, ax = self.plt.subplots()
  181. ax = ts.plot.hist(logy=True, bins=10, color="b", ax=ax)
  182. self._check_ax_scales(ax, yaxis="log")
  183. assert len(ax.patches) == 10
  184. self._check_colors(ax.patches, facecolors=["b"] * 10)
  185. _, ax = self.plt.subplots()
  186. ax = ts.plot.kde(logy=True, color="r", ax=ax)
  187. self._check_ax_scales(ax, yaxis="log")
  188. lines = ax.get_lines()
  189. assert len(lines) == 1
  190. self._check_colors(lines, ["r"])
  191. @td.skip_if_no_mpl
  192. class TestDataFramePlots(TestPlotBase):
  193. @pytest.mark.slow
  194. def test_hist_df_legacy(self, hist_df):
  195. from matplotlib.patches import Rectangle
  196. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  197. _check_plot_works(hist_df.hist)
  198. # make sure layout is handled
  199. df = DataFrame(np.random.randn(100, 2))
  200. df[2] = to_datetime(
  201. np.random.randint(
  202. 812419200000000000,
  203. 819331200000000000,
  204. size=100,
  205. dtype=np.int64,
  206. )
  207. )
  208. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  209. axes = _check_plot_works(df.hist, grid=False)
  210. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  211. assert not axes[1, 1].get_visible()
  212. _check_plot_works(df[[2]].hist)
  213. df = DataFrame(np.random.randn(100, 1))
  214. _check_plot_works(df.hist)
  215. # make sure layout is handled
  216. df = DataFrame(np.random.randn(100, 5))
  217. df[5] = to_datetime(
  218. np.random.randint(
  219. 812419200000000000,
  220. 819331200000000000,
  221. size=100,
  222. dtype=np.int64,
  223. )
  224. )
  225. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  226. axes = _check_plot_works(df.hist, layout=(4, 2))
  227. self._check_axes_shape(axes, axes_num=6, layout=(4, 2))
  228. # make sure sharex, sharey is handled
  229. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  230. _check_plot_works(df.hist, sharex=True, sharey=True)
  231. # handle figsize arg
  232. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  233. _check_plot_works(df.hist, figsize=(8, 10))
  234. # check bins argument
  235. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  236. _check_plot_works(df.hist, bins=5)
  237. # make sure xlabelsize and xrot are handled
  238. ser = df[0]
  239. xf, yf = 20, 18
  240. xrot, yrot = 30, 40
  241. axes = ser.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
  242. self._check_ticks_props(
  243. axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot
  244. )
  245. xf, yf = 20, 18
  246. xrot, yrot = 30, 40
  247. axes = df.hist(xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot)
  248. self._check_ticks_props(
  249. axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot
  250. )
  251. tm.close()
  252. ax = ser.hist(cumulative=True, bins=4, density=True)
  253. # height of last bin (index 5) must be 1.0
  254. rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
  255. tm.assert_almost_equal(rects[-1].get_height(), 1.0)
  256. tm.close()
  257. ax = ser.hist(log=True)
  258. # scale of y must be 'log'
  259. self._check_ax_scales(ax, yaxis="log")
  260. tm.close()
  261. # propagate attr exception from matplotlib.Axes.hist
  262. with tm.external_error_raised(AttributeError):
  263. ser.hist(foo="bar")
  264. def test_hist_non_numerical_or_datetime_raises(self):
  265. # gh-10444, GH32590
  266. df = DataFrame(
  267. {
  268. "a": np.random.rand(10),
  269. "b": np.random.randint(0, 10, 10),
  270. "c": to_datetime(
  271. np.random.randint(
  272. 1582800000000000000, 1583500000000000000, 10, dtype=np.int64
  273. )
  274. ),
  275. "d": to_datetime(
  276. np.random.randint(
  277. 1582800000000000000, 1583500000000000000, 10, dtype=np.int64
  278. ),
  279. utc=True,
  280. ),
  281. }
  282. )
  283. df_o = df.astype(object)
  284. msg = "hist method requires numerical or datetime columns, nothing to plot."
  285. with pytest.raises(ValueError, match=msg):
  286. df_o.hist()
  287. def test_hist_layout(self):
  288. df = DataFrame(np.random.randn(100, 2))
  289. df[2] = to_datetime(
  290. np.random.randint(
  291. 812419200000000000,
  292. 819331200000000000,
  293. size=100,
  294. dtype=np.int64,
  295. )
  296. )
  297. layout_to_expected_size = (
  298. {"layout": None, "expected_size": (2, 2)}, # default is 2x2
  299. {"layout": (2, 2), "expected_size": (2, 2)},
  300. {"layout": (4, 1), "expected_size": (4, 1)},
  301. {"layout": (1, 4), "expected_size": (1, 4)},
  302. {"layout": (3, 3), "expected_size": (3, 3)},
  303. {"layout": (-1, 4), "expected_size": (1, 4)},
  304. {"layout": (4, -1), "expected_size": (4, 1)},
  305. {"layout": (-1, 2), "expected_size": (2, 2)},
  306. {"layout": (2, -1), "expected_size": (2, 2)},
  307. )
  308. for layout_test in layout_to_expected_size:
  309. axes = df.hist(layout=layout_test["layout"])
  310. expected = layout_test["expected_size"]
  311. self._check_axes_shape(axes, axes_num=3, layout=expected)
  312. # layout too small for all 4 plots
  313. msg = "Layout of 1x1 must be larger than required size 3"
  314. with pytest.raises(ValueError, match=msg):
  315. df.hist(layout=(1, 1))
  316. # invalid format for layout
  317. msg = re.escape("Layout must be a tuple of (rows, columns)")
  318. with pytest.raises(ValueError, match=msg):
  319. df.hist(layout=(1,))
  320. msg = "At least one dimension of layout must be positive"
  321. with pytest.raises(ValueError, match=msg):
  322. df.hist(layout=(-1, -1))
  323. # GH 9351
  324. def test_tight_layout(self):
  325. df = DataFrame(np.random.randn(100, 2))
  326. df[2] = to_datetime(
  327. np.random.randint(
  328. 812419200000000000,
  329. 819331200000000000,
  330. size=100,
  331. dtype=np.int64,
  332. )
  333. )
  334. # Use default_axes=True when plotting method generate subplots itself
  335. _check_plot_works(df.hist, default_axes=True)
  336. self.plt.tight_layout()
  337. tm.close()
  338. def test_hist_subplot_xrot(self):
  339. # GH 30288
  340. df = DataFrame(
  341. {
  342. "length": [1.5, 0.5, 1.2, 0.9, 3],
  343. "animal": ["pig", "rabbit", "pig", "pig", "rabbit"],
  344. }
  345. )
  346. # Use default_axes=True when plotting method generate subplots itself
  347. axes = _check_plot_works(
  348. df.hist,
  349. default_axes=True,
  350. column="length",
  351. by="animal",
  352. bins=5,
  353. xrot=0,
  354. )
  355. self._check_ticks_props(axes, xrot=0)
  356. @pytest.mark.parametrize(
  357. "column, expected",
  358. [
  359. (None, ["width", "length", "height"]),
  360. (["length", "width", "height"], ["length", "width", "height"]),
  361. ],
  362. )
  363. def test_hist_column_order_unchanged(self, column, expected):
  364. # GH29235
  365. df = DataFrame(
  366. {
  367. "width": [0.7, 0.2, 0.15, 0.2, 1.1],
  368. "length": [1.5, 0.5, 1.2, 0.9, 3],
  369. "height": [3, 0.5, 3.4, 2, 1],
  370. },
  371. index=["pig", "rabbit", "duck", "chicken", "horse"],
  372. )
  373. # Use default_axes=True when plotting method generate subplots itself
  374. axes = _check_plot_works(
  375. df.hist,
  376. default_axes=True,
  377. column=column,
  378. layout=(1, 3),
  379. )
  380. result = [axes[0, i].get_title() for i in range(3)]
  381. assert result == expected
  382. @pytest.mark.parametrize(
  383. "histtype, expected",
  384. [
  385. ("bar", True),
  386. ("barstacked", True),
  387. ("step", False),
  388. ("stepfilled", True),
  389. ],
  390. )
  391. def test_histtype_argument(self, histtype, expected):
  392. # GH23992 Verify functioning of histtype argument
  393. df = DataFrame(np.random.randint(1, 10, size=(100, 2)), columns=["a", "b"])
  394. ax = df.hist(histtype=histtype)
  395. self._check_patches_all_filled(ax, filled=expected)
  396. @pytest.mark.parametrize("by", [None, "c"])
  397. @pytest.mark.parametrize("column", [None, "b"])
  398. def test_hist_with_legend(self, by, column):
  399. # GH 6279 - DataFrame histogram can have a legend
  400. expected_axes_num = 1 if by is None and column is not None else 2
  401. expected_layout = (1, expected_axes_num)
  402. expected_labels = column or ["a", "b"]
  403. if by is not None:
  404. expected_labels = [expected_labels] * 2
  405. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  406. df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
  407. # Use default_axes=True when plotting method generate subplots itself
  408. axes = _check_plot_works(
  409. df.hist,
  410. default_axes=True,
  411. legend=True,
  412. by=by,
  413. column=column,
  414. )
  415. self._check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
  416. if by is None and column is None:
  417. axes = axes[0]
  418. for expected_label, ax in zip(expected_labels, axes):
  419. self._check_legend_labels(ax, expected_label)
  420. @pytest.mark.parametrize("by", [None, "c"])
  421. @pytest.mark.parametrize("column", [None, "b"])
  422. def test_hist_with_legend_raises(self, by, column):
  423. # GH 6279 - DataFrame histogram with legend and label raises
  424. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  425. df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
  426. with pytest.raises(ValueError, match="Cannot use both legend and label"):
  427. df.hist(legend=True, by=by, column=column, label="d")
  428. def test_hist_df_kwargs(self):
  429. df = DataFrame(np.random.randn(10, 2))
  430. _, ax = self.plt.subplots()
  431. ax = df.plot.hist(bins=5, ax=ax)
  432. assert len(ax.patches) == 10
  433. def test_hist_df_with_nonnumerics(self):
  434. # GH 9853
  435. df = DataFrame(
  436. np.random.RandomState(42).randn(10, 4), columns=["A", "B", "C", "D"]
  437. )
  438. df["E"] = ["x", "y"] * 5
  439. _, ax = self.plt.subplots()
  440. ax = df.plot.hist(bins=5, ax=ax)
  441. assert len(ax.patches) == 20
  442. _, ax = self.plt.subplots()
  443. ax = df.plot.hist(ax=ax) # bins=10
  444. assert len(ax.patches) == 40
  445. def test_hist_secondary_legend(self):
  446. # GH 9610
  447. df = DataFrame(np.random.randn(30, 4), columns=list("abcd"))
  448. # primary -> secondary
  449. _, ax = self.plt.subplots()
  450. ax = df["a"].plot.hist(legend=True, ax=ax)
  451. df["b"].plot.hist(ax=ax, legend=True, secondary_y=True)
  452. # both legends are drawn on left ax
  453. # left and right axis must be visible
  454. self._check_legend_labels(ax, labels=["a", "b (right)"])
  455. assert ax.get_yaxis().get_visible()
  456. assert ax.right_ax.get_yaxis().get_visible()
  457. tm.close()
  458. # secondary -> secondary
  459. _, ax = self.plt.subplots()
  460. ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax)
  461. df["b"].plot.hist(ax=ax, legend=True, secondary_y=True)
  462. # both legends are draw on left ax
  463. # left axis must be invisible, right axis must be visible
  464. self._check_legend_labels(ax.left_ax, labels=["a (right)", "b (right)"])
  465. assert not ax.left_ax.get_yaxis().get_visible()
  466. assert ax.get_yaxis().get_visible()
  467. tm.close()
  468. # secondary -> primary
  469. _, ax = self.plt.subplots()
  470. ax = df["a"].plot.hist(legend=True, secondary_y=True, ax=ax)
  471. # right axes is returned
  472. df["b"].plot.hist(ax=ax, legend=True)
  473. # both legends are draw on left ax
  474. # left and right axis must be visible
  475. self._check_legend_labels(ax.left_ax, labels=["a (right)", "b"])
  476. assert ax.left_ax.get_yaxis().get_visible()
  477. assert ax.get_yaxis().get_visible()
  478. tm.close()
  479. @td.skip_if_no_mpl
  480. def test_hist_with_nans_and_weights(self):
  481. # GH 48884
  482. df = DataFrame(
  483. [[np.nan, 0.2, 0.3], [0.4, np.nan, np.nan], [0.7, 0.8, 0.9]],
  484. columns=list("abc"),
  485. )
  486. weights = np.array([0.25, 0.3, 0.45])
  487. no_nan_df = DataFrame([[0.4, 0.2, 0.3], [0.7, 0.8, 0.9]], columns=list("abc"))
  488. no_nan_weights = np.array([[0.3, 0.25, 0.25], [0.45, 0.45, 0.45]])
  489. from matplotlib.patches import Rectangle
  490. _, ax0 = self.plt.subplots()
  491. df.plot.hist(ax=ax0, weights=weights)
  492. rects = [x for x in ax0.get_children() if isinstance(x, Rectangle)]
  493. heights = [rect.get_height() for rect in rects]
  494. _, ax1 = self.plt.subplots()
  495. no_nan_df.plot.hist(ax=ax1, weights=no_nan_weights)
  496. no_nan_rects = [x for x in ax1.get_children() if isinstance(x, Rectangle)]
  497. no_nan_heights = [rect.get_height() for rect in no_nan_rects]
  498. assert all(h0 == h1 for h0, h1 in zip(heights, no_nan_heights))
  499. idxerror_weights = np.array([[0.3, 0.25], [0.45, 0.45]])
  500. msg = "weights must have the same shape as data, or be a single column"
  501. with pytest.raises(ValueError, match=msg):
  502. _, ax2 = self.plt.subplots()
  503. no_nan_df.plot.hist(ax=ax2, weights=idxerror_weights)
  504. @td.skip_if_no_mpl
  505. class TestDataFrameGroupByPlots(TestPlotBase):
  506. def test_grouped_hist_legacy(self):
  507. from matplotlib.patches import Rectangle
  508. from pandas.plotting._matplotlib.hist import _grouped_hist
  509. df = DataFrame(np.random.randn(500, 1), columns=["A"])
  510. df["B"] = to_datetime(
  511. np.random.randint(
  512. 812419200000000000,
  513. 819331200000000000,
  514. size=500,
  515. dtype=np.int64,
  516. )
  517. )
  518. df["C"] = np.random.randint(0, 4, 500)
  519. df["D"] = ["X"] * 500
  520. axes = _grouped_hist(df.A, by=df.C)
  521. self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
  522. tm.close()
  523. axes = df.hist(by=df.C)
  524. self._check_axes_shape(axes, axes_num=4, layout=(2, 2))
  525. tm.close()
  526. # group by a key with single value
  527. axes = df.hist(by="D", rot=30)
  528. self._check_axes_shape(axes, axes_num=1, layout=(1, 1))
  529. self._check_ticks_props(axes, xrot=30)
  530. tm.close()
  531. # make sure kwargs to hist are handled
  532. xf, yf = 20, 18
  533. xrot, yrot = 30, 40
  534. axes = _grouped_hist(
  535. df.A,
  536. by=df.C,
  537. cumulative=True,
  538. bins=4,
  539. xlabelsize=xf,
  540. xrot=xrot,
  541. ylabelsize=yf,
  542. yrot=yrot,
  543. density=True,
  544. )
  545. # height of last bin (index 5) must be 1.0
  546. for ax in axes.ravel():
  547. rects = [x for x in ax.get_children() if isinstance(x, Rectangle)]
  548. height = rects[-1].get_height()
  549. tm.assert_almost_equal(height, 1.0)
  550. self._check_ticks_props(
  551. axes, xlabelsize=xf, xrot=xrot, ylabelsize=yf, yrot=yrot
  552. )
  553. tm.close()
  554. axes = _grouped_hist(df.A, by=df.C, log=True)
  555. # scale of y must be 'log'
  556. self._check_ax_scales(axes, yaxis="log")
  557. tm.close()
  558. # propagate attr exception from matplotlib.Axes.hist
  559. with tm.external_error_raised(AttributeError):
  560. _grouped_hist(df.A, by=df.C, foo="bar")
  561. msg = "Specify figure size by tuple instead"
  562. with pytest.raises(ValueError, match=msg):
  563. df.hist(by="C", figsize="default")
  564. def test_grouped_hist_legacy2(self):
  565. n = 10
  566. weight = Series(np.random.normal(166, 20, size=n))
  567. height = Series(np.random.normal(60, 10, size=n))
  568. gender_int = np.random.RandomState(42).choice([0, 1], size=n)
  569. df_int = DataFrame({"height": height, "weight": weight, "gender": gender_int})
  570. gb = df_int.groupby("gender")
  571. axes = gb.hist()
  572. assert len(axes) == 2
  573. assert len(self.plt.get_fignums()) == 2
  574. tm.close()
  575. @pytest.mark.slow
  576. def test_grouped_hist_layout(self, hist_df):
  577. df = hist_df
  578. msg = "Layout of 1x1 must be larger than required size 2"
  579. with pytest.raises(ValueError, match=msg):
  580. df.hist(column="weight", by=df.gender, layout=(1, 1))
  581. msg = "Layout of 1x3 must be larger than required size 4"
  582. with pytest.raises(ValueError, match=msg):
  583. df.hist(column="height", by=df.category, layout=(1, 3))
  584. msg = "At least one dimension of layout must be positive"
  585. with pytest.raises(ValueError, match=msg):
  586. df.hist(column="height", by=df.category, layout=(-1, -1))
  587. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  588. axes = _check_plot_works(
  589. df.hist, column="height", by=df.gender, layout=(2, 1)
  590. )
  591. self._check_axes_shape(axes, axes_num=2, layout=(2, 1))
  592. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  593. axes = _check_plot_works(
  594. df.hist, column="height", by=df.gender, layout=(2, -1)
  595. )
  596. self._check_axes_shape(axes, axes_num=2, layout=(2, 1))
  597. axes = df.hist(column="height", by=df.category, layout=(4, 1))
  598. self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
  599. axes = df.hist(column="height", by=df.category, layout=(-1, 1))
  600. self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
  601. axes = df.hist(column="height", by=df.category, layout=(4, 2), figsize=(12, 8))
  602. self._check_axes_shape(axes, axes_num=4, layout=(4, 2), figsize=(12, 8))
  603. tm.close()
  604. # GH 6769
  605. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  606. axes = _check_plot_works(
  607. df.hist, column="height", by="classroom", layout=(2, 2)
  608. )
  609. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  610. # without column
  611. with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
  612. axes = _check_plot_works(df.hist, by="classroom")
  613. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  614. axes = df.hist(by="gender", layout=(3, 5))
  615. self._check_axes_shape(axes, axes_num=2, layout=(3, 5))
  616. axes = df.hist(column=["height", "weight", "category"])
  617. self._check_axes_shape(axes, axes_num=3, layout=(2, 2))
  618. def test_grouped_hist_multiple_axes(self, hist_df):
  619. # GH 6970, GH 7069
  620. df = hist_df
  621. fig, axes = self.plt.subplots(2, 3)
  622. returned = df.hist(column=["height", "weight", "category"], ax=axes[0])
  623. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  624. tm.assert_numpy_array_equal(returned, axes[0])
  625. assert returned[0].figure is fig
  626. returned = df.hist(by="classroom", ax=axes[1])
  627. self._check_axes_shape(returned, axes_num=3, layout=(1, 3))
  628. tm.assert_numpy_array_equal(returned, axes[1])
  629. assert returned[0].figure is fig
  630. fig, axes = self.plt.subplots(2, 3)
  631. # pass different number of axes from required
  632. msg = "The number of passed axes must be 1, the same as the output plot"
  633. with pytest.raises(ValueError, match=msg):
  634. axes = df.hist(column="height", ax=axes)
  635. def test_axis_share_x(self, hist_df):
  636. df = hist_df
  637. # GH4089
  638. ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True)
  639. # share x
  640. assert self.get_x_axis(ax1).joined(ax1, ax2)
  641. assert self.get_x_axis(ax2).joined(ax1, ax2)
  642. # don't share y
  643. assert not self.get_y_axis(ax1).joined(ax1, ax2)
  644. assert not self.get_y_axis(ax2).joined(ax1, ax2)
  645. def test_axis_share_y(self, hist_df):
  646. df = hist_df
  647. ax1, ax2 = df.hist(column="height", by=df.gender, sharey=True)
  648. # share y
  649. assert self.get_y_axis(ax1).joined(ax1, ax2)
  650. assert self.get_y_axis(ax2).joined(ax1, ax2)
  651. # don't share x
  652. assert not self.get_x_axis(ax1).joined(ax1, ax2)
  653. assert not self.get_x_axis(ax2).joined(ax1, ax2)
  654. def test_axis_share_xy(self, hist_df):
  655. df = hist_df
  656. ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True, sharey=True)
  657. # share both x and y
  658. assert self.get_x_axis(ax1).joined(ax1, ax2)
  659. assert self.get_x_axis(ax2).joined(ax1, ax2)
  660. assert self.get_y_axis(ax1).joined(ax1, ax2)
  661. assert self.get_y_axis(ax2).joined(ax1, ax2)
  662. @pytest.mark.parametrize(
  663. "histtype, expected",
  664. [
  665. ("bar", True),
  666. ("barstacked", True),
  667. ("step", False),
  668. ("stepfilled", True),
  669. ],
  670. )
  671. def test_histtype_argument(self, histtype, expected):
  672. # GH23992 Verify functioning of histtype argument
  673. df = DataFrame(np.random.randint(1, 10, size=(100, 2)), columns=["a", "b"])
  674. ax = df.hist(by="a", histtype=histtype)
  675. self._check_patches_all_filled(ax, filled=expected)