test_groupby.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. """ Test cases for GroupBy.plot """
  2. import numpy as np
  3. import pytest
  4. import pandas.util._test_decorators as td
  5. from pandas import (
  6. DataFrame,
  7. Index,
  8. Series,
  9. )
  10. import pandas._testing as tm
  11. from pandas.tests.plotting.common import TestPlotBase
  12. @td.skip_if_no_mpl
  13. class TestDataFrameGroupByPlots(TestPlotBase):
  14. def test_series_groupby_plotting_nominally_works(self):
  15. n = 10
  16. weight = Series(np.random.normal(166, 20, size=n))
  17. height = Series(np.random.normal(60, 10, size=n))
  18. gender = np.random.RandomState(42).choice(["male", "female"], size=n)
  19. weight.groupby(gender).plot()
  20. tm.close()
  21. height.groupby(gender).hist()
  22. tm.close()
  23. # Regression test for GH8733
  24. height.groupby(gender).plot(alpha=0.5)
  25. tm.close()
  26. def test_plotting_with_float_index_works(self):
  27. # GH 7025
  28. df = DataFrame(
  29. {"def": [1, 1, 1, 2, 2, 2, 3, 3, 3], "val": np.random.randn(9)},
  30. index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0],
  31. )
  32. df.groupby("def")["val"].plot()
  33. tm.close()
  34. df.groupby("def")["val"].apply(lambda x: x.plot())
  35. tm.close()
  36. def test_hist_single_row(self):
  37. # GH10214
  38. bins = np.arange(80, 100 + 2, 1)
  39. df = DataFrame({"Name": ["AAA", "BBB"], "ByCol": [1, 2], "Mark": [85, 89]})
  40. df["Mark"].hist(by=df["ByCol"], bins=bins)
  41. df = DataFrame({"Name": ["AAA"], "ByCol": [1], "Mark": [85]})
  42. df["Mark"].hist(by=df["ByCol"], bins=bins)
  43. def test_plot_submethod_works(self):
  44. df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")})
  45. df.groupby("z").plot.scatter("x", "y")
  46. tm.close()
  47. df.groupby("z")["x"].plot.line()
  48. tm.close()
  49. def test_plot_kwargs(self):
  50. df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")})
  51. res = df.groupby("z").plot(kind="scatter", x="x", y="y")
  52. # check that a scatter plot is effectively plotted: the axes should
  53. # contain a PathCollection from the scatter plot (GH11805)
  54. assert len(res["a"].collections) == 1
  55. res = df.groupby("z").plot.scatter(x="x", y="y")
  56. assert len(res["a"].collections) == 1
  57. @pytest.mark.parametrize("column, expected_axes_num", [(None, 2), ("b", 1)])
  58. def test_groupby_hist_frame_with_legend(self, column, expected_axes_num):
  59. # GH 6279 - DataFrameGroupBy histogram can have a legend
  60. expected_layout = (1, expected_axes_num)
  61. expected_labels = column or [["a"], ["b"]]
  62. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  63. df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
  64. g = df.groupby("c")
  65. for axes in g.hist(legend=True, column=column):
  66. self._check_axes_shape(
  67. axes, axes_num=expected_axes_num, layout=expected_layout
  68. )
  69. for ax, expected_label in zip(axes[0], expected_labels):
  70. self._check_legend_labels(ax, expected_label)
  71. @pytest.mark.parametrize("column", [None, "b"])
  72. def test_groupby_hist_frame_with_legend_raises(self, column):
  73. # GH 6279 - DataFrameGroupBy histogram with legend and label raises
  74. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  75. df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
  76. g = df.groupby("c")
  77. with pytest.raises(ValueError, match="Cannot use both legend and label"):
  78. g.hist(legend=True, column=column, label="d")
  79. def test_groupby_hist_series_with_legend(self):
  80. # GH 6279 - SeriesGroupBy histogram can have a legend
  81. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  82. df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
  83. g = df.groupby("c")
  84. for ax in g["a"].hist(legend=True):
  85. self._check_axes_shape(ax, axes_num=1, layout=(1, 1))
  86. self._check_legend_labels(ax, ["1", "2"])
  87. def test_groupby_hist_series_with_legend_raises(self):
  88. # GH 6279 - SeriesGroupBy histogram with legend and label raises
  89. index = Index(15 * ["1"] + 15 * ["2"], name="c")
  90. df = DataFrame(np.random.randn(30, 2), index=index, columns=["a", "b"])
  91. g = df.groupby("c")
  92. with pytest.raises(ValueError, match="Cannot use both legend and label"):
  93. g.hist(legend=True, label="d")