test_multilevel.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. DataFrame,
  6. MultiIndex,
  7. Series,
  8. )
  9. import pandas._testing as tm
  10. class TestMultiLevel:
  11. def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data):
  12. # axis=0
  13. ymd = multiindex_year_month_day_dataframe_random_data
  14. month_sums = ymd.groupby("month").sum()
  15. result = month_sums.reindex(ymd.index, level=1)
  16. expected = ymd.groupby(level="month").transform(np.sum)
  17. tm.assert_frame_equal(result, expected)
  18. # Series
  19. result = month_sums["A"].reindex(ymd.index, level=1)
  20. expected = ymd["A"].groupby(level="month").transform(np.sum)
  21. tm.assert_series_equal(result, expected, check_names=False)
  22. # axis=1
  23. month_sums = ymd.T.groupby("month", axis=1).sum()
  24. result = month_sums.reindex(columns=ymd.index, level=1)
  25. expected = ymd.groupby(level="month").transform(np.sum).T
  26. tm.assert_frame_equal(result, expected)
  27. def test_reindex(self, multiindex_dataframe_random_data):
  28. frame = multiindex_dataframe_random_data
  29. expected = frame.iloc[[0, 3]]
  30. reindexed = frame.loc[[("foo", "one"), ("bar", "one")]]
  31. tm.assert_frame_equal(reindexed, expected)
  32. def test_reindex_preserve_levels(
  33. self, multiindex_year_month_day_dataframe_random_data
  34. ):
  35. ymd = multiindex_year_month_day_dataframe_random_data
  36. new_index = ymd.index[::10]
  37. chunk = ymd.reindex(new_index)
  38. assert chunk.index is new_index
  39. chunk = ymd.loc[new_index]
  40. assert chunk.index.equals(new_index)
  41. ymdT = ymd.T
  42. chunk = ymdT.reindex(columns=new_index)
  43. assert chunk.columns is new_index
  44. chunk = ymdT.loc[:, new_index]
  45. assert chunk.columns.equals(new_index)
  46. def test_groupby_transform(self, multiindex_dataframe_random_data):
  47. frame = multiindex_dataframe_random_data
  48. s = frame["A"]
  49. grouper = s.index.get_level_values(0)
  50. grouped = s.groupby(grouper, group_keys=False)
  51. applied = grouped.apply(lambda x: x * 2)
  52. expected = grouped.transform(lambda x: x * 2)
  53. result = applied.reindex(expected.index)
  54. tm.assert_series_equal(result, expected, check_names=False)
  55. def test_groupby_corner(self):
  56. midx = MultiIndex(
  57. levels=[["foo"], ["bar"], ["baz"]],
  58. codes=[[0], [0], [0]],
  59. names=["one", "two", "three"],
  60. )
  61. df = DataFrame([np.random.rand(4)], columns=["a", "b", "c", "d"], index=midx)
  62. # should work
  63. df.groupby(level="three")
  64. def test_groupby_level_no_obs(self):
  65. # #1697
  66. midx = MultiIndex.from_tuples(
  67. [
  68. ("f1", "s1"),
  69. ("f1", "s2"),
  70. ("f2", "s1"),
  71. ("f2", "s2"),
  72. ("f3", "s1"),
  73. ("f3", "s2"),
  74. ]
  75. )
  76. df = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx)
  77. df1 = df.loc(axis=1)[df.columns.map(lambda u: u[0] in ["f2", "f3"])]
  78. grouped = df1.groupby(axis=1, level=0)
  79. result = grouped.sum()
  80. assert (result.columns == ["f2", "f3"]).all()
  81. def test_setitem_with_expansion_multiindex_columns(
  82. self, multiindex_year_month_day_dataframe_random_data
  83. ):
  84. ymd = multiindex_year_month_day_dataframe_random_data
  85. df = ymd[:5].T
  86. df[2000, 1, 10] = df[2000, 1, 7]
  87. assert isinstance(df.columns, MultiIndex)
  88. assert (df[2000, 1, 10] == df[2000, 1, 7]).all()
  89. def test_alignment(self):
  90. x = Series(
  91. data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)])
  92. )
  93. y = Series(
  94. data=[4, 5, 6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)])
  95. )
  96. res = x - y
  97. exp_index = x.index.union(y.index)
  98. exp = x.reindex(exp_index) - y.reindex(exp_index)
  99. tm.assert_series_equal(res, exp)
  100. # hit non-monotonic code path
  101. res = x[::-1] - y[::-1]
  102. exp_index = x.index.union(y.index)
  103. exp = x.reindex(exp_index) - y.reindex(exp_index)
  104. tm.assert_series_equal(res, exp)
  105. def test_groupby_multilevel(self, multiindex_year_month_day_dataframe_random_data):
  106. ymd = multiindex_year_month_day_dataframe_random_data
  107. result = ymd.groupby(level=[0, 1]).mean()
  108. k1 = ymd.index.get_level_values(0)
  109. k2 = ymd.index.get_level_values(1)
  110. expected = ymd.groupby([k1, k2]).mean()
  111. # TODO groupby with level_values drops names
  112. tm.assert_frame_equal(result, expected, check_names=False)
  113. assert result.index.names == ymd.index.names[:2]
  114. result2 = ymd.groupby(level=ymd.index.names[:2]).mean()
  115. tm.assert_frame_equal(result, result2)
  116. def test_multilevel_consolidate(self):
  117. index = MultiIndex.from_tuples(
  118. [("foo", "one"), ("foo", "two"), ("bar", "one"), ("bar", "two")]
  119. )
  120. df = DataFrame(np.random.randn(4, 4), index=index, columns=index)
  121. df["Totals", ""] = df.sum(1)
  122. df = df._consolidate()
  123. def test_level_with_tuples(self):
  124. index = MultiIndex(
  125. levels=[[("foo", "bar", 0), ("foo", "baz", 0), ("foo", "qux", 0)], [0, 1]],
  126. codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
  127. )
  128. series = Series(np.random.randn(6), index=index)
  129. frame = DataFrame(np.random.randn(6, 4), index=index)
  130. result = series[("foo", "bar", 0)]
  131. result2 = series.loc[("foo", "bar", 0)]
  132. expected = series[:2]
  133. expected.index = expected.index.droplevel(0)
  134. tm.assert_series_equal(result, expected)
  135. tm.assert_series_equal(result2, expected)
  136. with pytest.raises(KeyError, match=r"^\(\('foo', 'bar', 0\), 2\)$"):
  137. series[("foo", "bar", 0), 2]
  138. result = frame.loc[("foo", "bar", 0)]
  139. result2 = frame.xs(("foo", "bar", 0))
  140. expected = frame[:2]
  141. expected.index = expected.index.droplevel(0)
  142. tm.assert_frame_equal(result, expected)
  143. tm.assert_frame_equal(result2, expected)
  144. index = MultiIndex(
  145. levels=[[("foo", "bar"), ("foo", "baz"), ("foo", "qux")], [0, 1]],
  146. codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
  147. )
  148. series = Series(np.random.randn(6), index=index)
  149. frame = DataFrame(np.random.randn(6, 4), index=index)
  150. result = series[("foo", "bar")]
  151. result2 = series.loc[("foo", "bar")]
  152. expected = series[:2]
  153. expected.index = expected.index.droplevel(0)
  154. tm.assert_series_equal(result, expected)
  155. tm.assert_series_equal(result2, expected)
  156. result = frame.loc[("foo", "bar")]
  157. result2 = frame.xs(("foo", "bar"))
  158. expected = frame[:2]
  159. expected.index = expected.index.droplevel(0)
  160. tm.assert_frame_equal(result, expected)
  161. tm.assert_frame_equal(result2, expected)
  162. def test_reindex_level_partial_selection(self, multiindex_dataframe_random_data):
  163. frame = multiindex_dataframe_random_data
  164. result = frame.reindex(["foo", "qux"], level=0)
  165. expected = frame.iloc[[0, 1, 2, 7, 8, 9]]
  166. tm.assert_frame_equal(result, expected)
  167. result = frame.T.reindex(["foo", "qux"], axis=1, level=0)
  168. tm.assert_frame_equal(result, expected.T)
  169. result = frame.loc[["foo", "qux"]]
  170. tm.assert_frame_equal(result, expected)
  171. result = frame["A"].loc[["foo", "qux"]]
  172. tm.assert_series_equal(result, expected["A"])
  173. result = frame.T.loc[:, ["foo", "qux"]]
  174. tm.assert_frame_equal(result, expected.T)
  175. @pytest.mark.parametrize("d", [4, "d"])
  176. def test_empty_frame_groupby_dtypes_consistency(self, d):
  177. # GH 20888
  178. group_keys = ["a", "b", "c"]
  179. df = DataFrame({"a": [1], "b": [2], "c": [3], "d": [d]})
  180. g = df[df.a == 2].groupby(group_keys)
  181. result = g.first().index
  182. expected = MultiIndex(
  183. levels=[[1], [2], [3]], codes=[[], [], []], names=["a", "b", "c"]
  184. )
  185. tm.assert_index_equal(result, expected)
  186. def test_duplicate_groupby_issues(self):
  187. idx_tp = [
  188. ("600809", "20061231"),
  189. ("600809", "20070331"),
  190. ("600809", "20070630"),
  191. ("600809", "20070331"),
  192. ]
  193. dt = ["demo", "demo", "demo", "demo"]
  194. idx = MultiIndex.from_tuples(idx_tp, names=["STK_ID", "RPT_Date"])
  195. s = Series(dt, index=idx)
  196. result = s.groupby(s.index).first()
  197. assert len(result) == 3
  198. def test_subsets_multiindex_dtype(self):
  199. # GH 20757
  200. data = [["x", 1]]
  201. columns = [("a", "b", np.nan), ("a", "c", 0.0)]
  202. df = DataFrame(data, columns=MultiIndex.from_tuples(columns))
  203. expected = df.dtypes.a.b
  204. result = df.a.b.dtypes
  205. tm.assert_series_equal(result, expected)
  206. class TestSorted:
  207. """everything you wanted to test about sorting"""
  208. def test_sort_non_lexsorted(self):
  209. # degenerate case where we sort but don't
  210. # have a satisfying result :<
  211. # GH 15797
  212. idx = MultiIndex(
  213. [["A", "B", "C"], ["c", "b", "a"]], [[0, 1, 2, 0, 1, 2], [0, 2, 1, 1, 0, 2]]
  214. )
  215. df = DataFrame({"col": range(len(idx))}, index=idx, dtype="int64")
  216. assert df.index.is_monotonic_increasing is False
  217. sorted = df.sort_index()
  218. assert sorted.index.is_monotonic_increasing is True
  219. expected = DataFrame(
  220. {"col": [1, 4, 5, 2]},
  221. index=MultiIndex.from_tuples(
  222. [("B", "a"), ("B", "c"), ("C", "a"), ("C", "b")]
  223. ),
  224. dtype="int64",
  225. )
  226. result = sorted.loc[pd.IndexSlice["B":"C", "a":"c"], :]
  227. tm.assert_frame_equal(result, expected)