123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import (
- DataFrame,
- MultiIndex,
- )
- import pandas._testing as tm
- def test_to_numpy(idx):
- result = idx.to_numpy()
- exp = idx.values
- tm.assert_numpy_array_equal(result, exp)
- def test_to_frame():
- tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
- index = MultiIndex.from_tuples(tuples)
- result = index.to_frame(index=False)
- expected = DataFrame(tuples)
- tm.assert_frame_equal(result, expected)
- result = index.to_frame()
- expected.index = index
- tm.assert_frame_equal(result, expected)
- tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
- index = MultiIndex.from_tuples(tuples, names=["first", "second"])
- result = index.to_frame(index=False)
- expected = DataFrame(tuples)
- expected.columns = ["first", "second"]
- tm.assert_frame_equal(result, expected)
- result = index.to_frame()
- expected.index = index
- tm.assert_frame_equal(result, expected)
- # See GH-22580
- index = MultiIndex.from_tuples(tuples)
- result = index.to_frame(index=False, name=["first", "second"])
- expected = DataFrame(tuples)
- expected.columns = ["first", "second"]
- tm.assert_frame_equal(result, expected)
- result = index.to_frame(name=["first", "second"])
- expected.index = index
- expected.columns = ["first", "second"]
- tm.assert_frame_equal(result, expected)
- msg = "'name' must be a list / sequence of column names."
- with pytest.raises(TypeError, match=msg):
- index.to_frame(name="first")
- msg = "'name' should have same length as number of levels on index."
- with pytest.raises(ValueError, match=msg):
- index.to_frame(name=["first"])
- # Tests for datetime index
- index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
- result = index.to_frame(index=False)
- expected = DataFrame(
- {
- 0: np.repeat(np.arange(5, dtype="int64"), 3),
- 1: np.tile(pd.date_range("20130101", periods=3), 5),
- }
- )
- tm.assert_frame_equal(result, expected)
- result = index.to_frame()
- expected.index = index
- tm.assert_frame_equal(result, expected)
- # See GH-22580
- result = index.to_frame(index=False, name=["first", "second"])
- expected = DataFrame(
- {
- "first": np.repeat(np.arange(5, dtype="int64"), 3),
- "second": np.tile(pd.date_range("20130101", periods=3), 5),
- }
- )
- tm.assert_frame_equal(result, expected)
- result = index.to_frame(name=["first", "second"])
- expected.index = index
- tm.assert_frame_equal(result, expected)
- def test_to_frame_dtype_fidelity():
- # GH 22420
- mi = MultiIndex.from_arrays(
- [
- pd.date_range("19910905", periods=6, tz="US/Eastern"),
- [1, 1, 1, 2, 2, 2],
- pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
- ["x", "x", "y", "z", "x", "y"],
- ],
- names=["dates", "a", "b", "c"],
- )
- original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
- expected_df = DataFrame(
- {
- "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
- "a": [1, 1, 1, 2, 2, 2],
- "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
- "c": ["x", "x", "y", "z", "x", "y"],
- }
- )
- df = mi.to_frame(index=False)
- df_dtypes = df.dtypes.to_dict()
- tm.assert_frame_equal(df, expected_df)
- assert original_dtypes == df_dtypes
- def test_to_frame_resulting_column_order():
- # GH 22420
- expected = ["z", 0, "a"]
- mi = MultiIndex.from_arrays(
- [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
- )
- result = mi.to_frame().columns.tolist()
- assert result == expected
- def test_to_frame_duplicate_labels():
- # GH 45245
- data = [(1, 2), (3, 4)]
- names = ["a", "a"]
- index = MultiIndex.from_tuples(data, names=names)
- with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
- index.to_frame()
- result = index.to_frame(allow_duplicates=True)
- expected = DataFrame(data, index=index, columns=names)
- tm.assert_frame_equal(result, expected)
- names = [None, 0]
- index = MultiIndex.from_tuples(data, names=names)
- with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
- index.to_frame()
- result = index.to_frame(allow_duplicates=True)
- expected = DataFrame(data, index=index, columns=[0, 0])
- tm.assert_frame_equal(result, expected)
- def test_to_flat_index(idx):
- expected = pd.Index(
- (
- ("foo", "one"),
- ("foo", "two"),
- ("bar", "one"),
- ("baz", "two"),
- ("qux", "one"),
- ("qux", "two"),
- ),
- tupleize_cols=False,
- )
- result = idx.to_flat_index()
- tm.assert_index_equal(result, expected)
|