123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- import numpy as np
- import pytest
- from pandas._libs import lib
- import pandas as pd
- from pandas import (
- Index,
- MultiIndex,
- )
- import pandas._testing as tm
- @pytest.mark.parametrize(
- "input_index, input_columns, input_values, "
- "expected_values, expected_columns, expected_index",
- [
- (
- ["lev4"],
- "lev3",
- "values",
- [
- [0.0, np.nan],
- [np.nan, 1.0],
- [2.0, np.nan],
- [np.nan, 3.0],
- [4.0, np.nan],
- [np.nan, 5.0],
- [6.0, np.nan],
- [np.nan, 7.0],
- ],
- Index([1, 2], name="lev3"),
- Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
- ),
- (
- ["lev4"],
- "lev3",
- lib.NoDefault,
- [
- [1.0, np.nan, 1.0, np.nan, 0.0, np.nan],
- [np.nan, 1.0, np.nan, 1.0, np.nan, 1.0],
- [1.0, np.nan, 2.0, np.nan, 2.0, np.nan],
- [np.nan, 1.0, np.nan, 2.0, np.nan, 3.0],
- [2.0, np.nan, 1.0, np.nan, 4.0, np.nan],
- [np.nan, 2.0, np.nan, 1.0, np.nan, 5.0],
- [2.0, np.nan, 2.0, np.nan, 6.0, np.nan],
- [np.nan, 2.0, np.nan, 2.0, np.nan, 7.0],
- ],
- MultiIndex.from_tuples(
- [
- ("lev1", 1),
- ("lev1", 2),
- ("lev2", 1),
- ("lev2", 2),
- ("values", 1),
- ("values", 2),
- ],
- names=[None, "lev3"],
- ),
- Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
- ),
- (
- ["lev1", "lev2"],
- "lev3",
- "values",
- [[0, 1], [2, 3], [4, 5], [6, 7]],
- Index([1, 2], name="lev3"),
- MultiIndex.from_tuples(
- [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
- ),
- ),
- (
- ["lev1", "lev2"],
- "lev3",
- lib.NoDefault,
- [[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]],
- MultiIndex.from_tuples(
- [("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)],
- names=[None, "lev3"],
- ),
- MultiIndex.from_tuples(
- [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
- ),
- ),
- ],
- )
- def test_pivot_list_like_index(
- input_index,
- input_columns,
- input_values,
- expected_values,
- expected_columns,
- expected_index,
- ):
- # GH 21425, test when index is given a list
- df = pd.DataFrame(
- {
- "lev1": [1, 1, 1, 1, 2, 2, 2, 2],
- "lev2": [1, 1, 2, 2, 1, 1, 2, 2],
- "lev3": [1, 2, 1, 2, 1, 2, 1, 2],
- "lev4": [1, 2, 3, 4, 5, 6, 7, 8],
- "values": [0, 1, 2, 3, 4, 5, 6, 7],
- }
- )
- result = df.pivot(index=input_index, columns=input_columns, values=input_values)
- expected = pd.DataFrame(
- expected_values, columns=expected_columns, index=expected_index
- )
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "input_index, input_columns, input_values, "
- "expected_values, expected_columns, expected_index",
- [
- (
- "lev4",
- ["lev3"],
- "values",
- [
- [0.0, np.nan],
- [np.nan, 1.0],
- [2.0, np.nan],
- [np.nan, 3.0],
- [4.0, np.nan],
- [np.nan, 5.0],
- [6.0, np.nan],
- [np.nan, 7.0],
- ],
- Index([1, 2], name="lev3"),
- Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
- ),
- (
- ["lev1", "lev2"],
- ["lev3"],
- "values",
- [[0, 1], [2, 3], [4, 5], [6, 7]],
- Index([1, 2], name="lev3"),
- MultiIndex.from_tuples(
- [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
- ),
- ),
- (
- ["lev1"],
- ["lev2", "lev3"],
- "values",
- [[0, 1, 2, 3], [4, 5, 6, 7]],
- MultiIndex.from_tuples(
- [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev2", "lev3"]
- ),
- Index([1, 2], name="lev1"),
- ),
- (
- ["lev1", "lev2"],
- ["lev3", "lev4"],
- "values",
- [
- [0.0, 1.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
- [np.nan, np.nan, 2.0, 3.0, np.nan, np.nan, np.nan, np.nan],
- [np.nan, np.nan, np.nan, np.nan, 4.0, 5.0, np.nan, np.nan],
- [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0],
- ],
- MultiIndex.from_tuples(
- [(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8)],
- names=["lev3", "lev4"],
- ),
- MultiIndex.from_tuples(
- [(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
- ),
- ),
- ],
- )
- def test_pivot_list_like_columns(
- input_index,
- input_columns,
- input_values,
- expected_values,
- expected_columns,
- expected_index,
- ):
- # GH 21425, test when columns is given a list
- df = pd.DataFrame(
- {
- "lev1": [1, 1, 1, 1, 2, 2, 2, 2],
- "lev2": [1, 1, 2, 2, 1, 1, 2, 2],
- "lev3": [1, 2, 1, 2, 1, 2, 1, 2],
- "lev4": [1, 2, 3, 4, 5, 6, 7, 8],
- "values": [0, 1, 2, 3, 4, 5, 6, 7],
- }
- )
- result = df.pivot(index=input_index, columns=input_columns, values=input_values)
- expected = pd.DataFrame(
- expected_values, columns=expected_columns, index=expected_index
- )
- tm.assert_frame_equal(result, expected)
- def test_pivot_multiindexed_rows_and_cols(using_array_manager):
- # GH 36360
- df = pd.DataFrame(
- data=np.arange(12).reshape(4, 3),
- columns=MultiIndex.from_tuples(
- [(0, 0), (0, 1), (0, 2)], names=["col_L0", "col_L1"]
- ),
- index=MultiIndex.from_tuples(
- [(0, 0, 0), (0, 0, 1), (1, 1, 1), (1, 0, 0)],
- names=["idx_L0", "idx_L1", "idx_L2"],
- ),
- )
- res = df.pivot_table(
- index=["idx_L0"],
- columns=["idx_L1"],
- values=[(0, 1)],
- aggfunc=lambda col: col.values.sum(),
- )
- expected = pd.DataFrame(
- data=[[5, np.nan], [10, 7.0]],
- columns=MultiIndex.from_tuples(
- [(0, 1, 0), (0, 1, 1)], names=["col_L0", "col_L1", "idx_L1"]
- ),
- index=Index([0, 1], dtype="int64", name="idx_L0"),
- )
- if not using_array_manager:
- # BlockManager does not preserve the dtypes
- expected = expected.astype("float64")
- tm.assert_frame_equal(res, expected)
- def test_pivot_df_multiindex_index_none():
- # GH 23955
- df = pd.DataFrame(
- [
- ["A", "A1", "label1", 1],
- ["A", "A2", "label2", 2],
- ["B", "A1", "label1", 3],
- ["B", "A2", "label2", 4],
- ],
- columns=["index_1", "index_2", "label", "value"],
- )
- df = df.set_index(["index_1", "index_2"])
- result = df.pivot(columns="label", values="value")
- expected = pd.DataFrame(
- [[1.0, np.nan], [np.nan, 2.0], [3.0, np.nan], [np.nan, 4.0]],
- index=df.index,
- columns=Index(["label1", "label2"], name="label"),
- )
- tm.assert_frame_equal(result, expected)
|