test_pivot_multilevel.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. import numpy as np
  2. import pytest
  3. from pandas._libs import lib
  4. import pandas as pd
  5. from pandas import (
  6. Index,
  7. MultiIndex,
  8. )
  9. import pandas._testing as tm
  10. @pytest.mark.parametrize(
  11. "input_index, input_columns, input_values, "
  12. "expected_values, expected_columns, expected_index",
  13. [
  14. (
  15. ["lev4"],
  16. "lev3",
  17. "values",
  18. [
  19. [0.0, np.nan],
  20. [np.nan, 1.0],
  21. [2.0, np.nan],
  22. [np.nan, 3.0],
  23. [4.0, np.nan],
  24. [np.nan, 5.0],
  25. [6.0, np.nan],
  26. [np.nan, 7.0],
  27. ],
  28. Index([1, 2], name="lev3"),
  29. Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
  30. ),
  31. (
  32. ["lev4"],
  33. "lev3",
  34. lib.NoDefault,
  35. [
  36. [1.0, np.nan, 1.0, np.nan, 0.0, np.nan],
  37. [np.nan, 1.0, np.nan, 1.0, np.nan, 1.0],
  38. [1.0, np.nan, 2.0, np.nan, 2.0, np.nan],
  39. [np.nan, 1.0, np.nan, 2.0, np.nan, 3.0],
  40. [2.0, np.nan, 1.0, np.nan, 4.0, np.nan],
  41. [np.nan, 2.0, np.nan, 1.0, np.nan, 5.0],
  42. [2.0, np.nan, 2.0, np.nan, 6.0, np.nan],
  43. [np.nan, 2.0, np.nan, 2.0, np.nan, 7.0],
  44. ],
  45. MultiIndex.from_tuples(
  46. [
  47. ("lev1", 1),
  48. ("lev1", 2),
  49. ("lev2", 1),
  50. ("lev2", 2),
  51. ("values", 1),
  52. ("values", 2),
  53. ],
  54. names=[None, "lev3"],
  55. ),
  56. Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
  57. ),
  58. (
  59. ["lev1", "lev2"],
  60. "lev3",
  61. "values",
  62. [[0, 1], [2, 3], [4, 5], [6, 7]],
  63. Index([1, 2], name="lev3"),
  64. MultiIndex.from_tuples(
  65. [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
  66. ),
  67. ),
  68. (
  69. ["lev1", "lev2"],
  70. "lev3",
  71. lib.NoDefault,
  72. [[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]],
  73. MultiIndex.from_tuples(
  74. [("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)],
  75. names=[None, "lev3"],
  76. ),
  77. MultiIndex.from_tuples(
  78. [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
  79. ),
  80. ),
  81. ],
  82. )
  83. def test_pivot_list_like_index(
  84. input_index,
  85. input_columns,
  86. input_values,
  87. expected_values,
  88. expected_columns,
  89. expected_index,
  90. ):
  91. # GH 21425, test when index is given a list
  92. df = pd.DataFrame(
  93. {
  94. "lev1": [1, 1, 1, 1, 2, 2, 2, 2],
  95. "lev2": [1, 1, 2, 2, 1, 1, 2, 2],
  96. "lev3": [1, 2, 1, 2, 1, 2, 1, 2],
  97. "lev4": [1, 2, 3, 4, 5, 6, 7, 8],
  98. "values": [0, 1, 2, 3, 4, 5, 6, 7],
  99. }
  100. )
  101. result = df.pivot(index=input_index, columns=input_columns, values=input_values)
  102. expected = pd.DataFrame(
  103. expected_values, columns=expected_columns, index=expected_index
  104. )
  105. tm.assert_frame_equal(result, expected)
  106. @pytest.mark.parametrize(
  107. "input_index, input_columns, input_values, "
  108. "expected_values, expected_columns, expected_index",
  109. [
  110. (
  111. "lev4",
  112. ["lev3"],
  113. "values",
  114. [
  115. [0.0, np.nan],
  116. [np.nan, 1.0],
  117. [2.0, np.nan],
  118. [np.nan, 3.0],
  119. [4.0, np.nan],
  120. [np.nan, 5.0],
  121. [6.0, np.nan],
  122. [np.nan, 7.0],
  123. ],
  124. Index([1, 2], name="lev3"),
  125. Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
  126. ),
  127. (
  128. ["lev1", "lev2"],
  129. ["lev3"],
  130. "values",
  131. [[0, 1], [2, 3], [4, 5], [6, 7]],
  132. Index([1, 2], name="lev3"),
  133. MultiIndex.from_tuples(
  134. [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
  135. ),
  136. ),
  137. (
  138. ["lev1"],
  139. ["lev2", "lev3"],
  140. "values",
  141. [[0, 1, 2, 3], [4, 5, 6, 7]],
  142. MultiIndex.from_tuples(
  143. [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev2", "lev3"]
  144. ),
  145. Index([1, 2], name="lev1"),
  146. ),
  147. (
  148. ["lev1", "lev2"],
  149. ["lev3", "lev4"],
  150. "values",
  151. [
  152. [0.0, 1.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
  153. [np.nan, np.nan, 2.0, 3.0, np.nan, np.nan, np.nan, np.nan],
  154. [np.nan, np.nan, np.nan, np.nan, 4.0, 5.0, np.nan, np.nan],
  155. [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0],
  156. ],
  157. MultiIndex.from_tuples(
  158. [(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8)],
  159. names=["lev3", "lev4"],
  160. ),
  161. MultiIndex.from_tuples(
  162. [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
  163. ),
  164. ),
  165. ],
  166. )
  167. def test_pivot_list_like_columns(
  168. input_index,
  169. input_columns,
  170. input_values,
  171. expected_values,
  172. expected_columns,
  173. expected_index,
  174. ):
  175. # GH 21425, test when columns is given a list
  176. df = pd.DataFrame(
  177. {
  178. "lev1": [1, 1, 1, 1, 2, 2, 2, 2],
  179. "lev2": [1, 1, 2, 2, 1, 1, 2, 2],
  180. "lev3": [1, 2, 1, 2, 1, 2, 1, 2],
  181. "lev4": [1, 2, 3, 4, 5, 6, 7, 8],
  182. "values": [0, 1, 2, 3, 4, 5, 6, 7],
  183. }
  184. )
  185. result = df.pivot(index=input_index, columns=input_columns, values=input_values)
  186. expected = pd.DataFrame(
  187. expected_values, columns=expected_columns, index=expected_index
  188. )
  189. tm.assert_frame_equal(result, expected)
  190. def test_pivot_multiindexed_rows_and_cols(using_array_manager):
  191. # GH 36360
  192. df = pd.DataFrame(
  193. data=np.arange(12).reshape(4, 3),
  194. columns=MultiIndex.from_tuples(
  195. [(0, 0), (0, 1), (0, 2)], names=["col_L0", "col_L1"]
  196. ),
  197. index=MultiIndex.from_tuples(
  198. [(0, 0, 0), (0, 0, 1), (1, 1, 1), (1, 0, 0)],
  199. names=["idx_L0", "idx_L1", "idx_L2"],
  200. ),
  201. )
  202. res = df.pivot_table(
  203. index=["idx_L0"],
  204. columns=["idx_L1"],
  205. values=[(0, 1)],
  206. aggfunc=lambda col: col.values.sum(),
  207. )
  208. expected = pd.DataFrame(
  209. data=[[5, np.nan], [10, 7.0]],
  210. columns=MultiIndex.from_tuples(
  211. [(0, 1, 0), (0, 1, 1)], names=["col_L0", "col_L1", "idx_L1"]
  212. ),
  213. index=Index([0, 1], dtype="int64", name="idx_L0"),
  214. )
  215. if not using_array_manager:
  216. # BlockManager does not preserve the dtypes
  217. expected = expected.astype("float64")
  218. tm.assert_frame_equal(res, expected)
  219. def test_pivot_df_multiindex_index_none():
  220. # GH 23955
  221. df = pd.DataFrame(
  222. [
  223. ["A", "A1", "label1", 1],
  224. ["A", "A2", "label2", 2],
  225. ["B", "A1", "label1", 3],
  226. ["B", "A2", "label2", 4],
  227. ],
  228. columns=["index_1", "index_2", "label", "value"],
  229. )
  230. df = df.set_index(["index_1", "index_2"])
  231. result = df.pivot(columns="label", values="value")
  232. expected = pd.DataFrame(
  233. [[1.0, np.nan], [np.nan, 2.0], [3.0, np.nan], [np.nan, 4.0]],
  234. index=df.index,
  235. columns=Index(["label1", "label2"], name="label"),
  236. )
  237. tm.assert_frame_equal(result, expected)