test_conversion.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. DataFrame,
  6. MultiIndex,
  7. )
  8. import pandas._testing as tm
  9. def test_to_numpy(idx):
  10. result = idx.to_numpy()
  11. exp = idx.values
  12. tm.assert_numpy_array_equal(result, exp)
  13. def test_to_frame():
  14. tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
  15. index = MultiIndex.from_tuples(tuples)
  16. result = index.to_frame(index=False)
  17. expected = DataFrame(tuples)
  18. tm.assert_frame_equal(result, expected)
  19. result = index.to_frame()
  20. expected.index = index
  21. tm.assert_frame_equal(result, expected)
  22. tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
  23. index = MultiIndex.from_tuples(tuples, names=["first", "second"])
  24. result = index.to_frame(index=False)
  25. expected = DataFrame(tuples)
  26. expected.columns = ["first", "second"]
  27. tm.assert_frame_equal(result, expected)
  28. result = index.to_frame()
  29. expected.index = index
  30. tm.assert_frame_equal(result, expected)
  31. # See GH-22580
  32. index = MultiIndex.from_tuples(tuples)
  33. result = index.to_frame(index=False, name=["first", "second"])
  34. expected = DataFrame(tuples)
  35. expected.columns = ["first", "second"]
  36. tm.assert_frame_equal(result, expected)
  37. result = index.to_frame(name=["first", "second"])
  38. expected.index = index
  39. expected.columns = ["first", "second"]
  40. tm.assert_frame_equal(result, expected)
  41. msg = "'name' must be a list / sequence of column names."
  42. with pytest.raises(TypeError, match=msg):
  43. index.to_frame(name="first")
  44. msg = "'name' should have same length as number of levels on index."
  45. with pytest.raises(ValueError, match=msg):
  46. index.to_frame(name=["first"])
  47. # Tests for datetime index
  48. index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
  49. result = index.to_frame(index=False)
  50. expected = DataFrame(
  51. {
  52. 0: np.repeat(np.arange(5, dtype="int64"), 3),
  53. 1: np.tile(pd.date_range("20130101", periods=3), 5),
  54. }
  55. )
  56. tm.assert_frame_equal(result, expected)
  57. result = index.to_frame()
  58. expected.index = index
  59. tm.assert_frame_equal(result, expected)
  60. # See GH-22580
  61. result = index.to_frame(index=False, name=["first", "second"])
  62. expected = DataFrame(
  63. {
  64. "first": np.repeat(np.arange(5, dtype="int64"), 3),
  65. "second": np.tile(pd.date_range("20130101", periods=3), 5),
  66. }
  67. )
  68. tm.assert_frame_equal(result, expected)
  69. result = index.to_frame(name=["first", "second"])
  70. expected.index = index
  71. tm.assert_frame_equal(result, expected)
  72. def test_to_frame_dtype_fidelity():
  73. # GH 22420
  74. mi = MultiIndex.from_arrays(
  75. [
  76. pd.date_range("19910905", periods=6, tz="US/Eastern"),
  77. [1, 1, 1, 2, 2, 2],
  78. pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
  79. ["x", "x", "y", "z", "x", "y"],
  80. ],
  81. names=["dates", "a", "b", "c"],
  82. )
  83. original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
  84. expected_df = DataFrame(
  85. {
  86. "dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
  87. "a": [1, 1, 1, 2, 2, 2],
  88. "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
  89. "c": ["x", "x", "y", "z", "x", "y"],
  90. }
  91. )
  92. df = mi.to_frame(index=False)
  93. df_dtypes = df.dtypes.to_dict()
  94. tm.assert_frame_equal(df, expected_df)
  95. assert original_dtypes == df_dtypes
  96. def test_to_frame_resulting_column_order():
  97. # GH 22420
  98. expected = ["z", 0, "a"]
  99. mi = MultiIndex.from_arrays(
  100. [["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
  101. )
  102. result = mi.to_frame().columns.tolist()
  103. assert result == expected
  104. def test_to_frame_duplicate_labels():
  105. # GH 45245
  106. data = [(1, 2), (3, 4)]
  107. names = ["a", "a"]
  108. index = MultiIndex.from_tuples(data, names=names)
  109. with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
  110. index.to_frame()
  111. result = index.to_frame(allow_duplicates=True)
  112. expected = DataFrame(data, index=index, columns=names)
  113. tm.assert_frame_equal(result, expected)
  114. names = [None, 0]
  115. index = MultiIndex.from_tuples(data, names=names)
  116. with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
  117. index.to_frame()
  118. result = index.to_frame(allow_duplicates=True)
  119. expected = DataFrame(data, index=index, columns=[0, 0])
  120. tm.assert_frame_equal(result, expected)
  121. def test_to_flat_index(idx):
  122. expected = pd.Index(
  123. (
  124. ("foo", "one"),
  125. ("foo", "two"),
  126. ("bar", "one"),
  127. ("baz", "two"),
  128. ("qux", "one"),
  129. ("qux", "two"),
  130. ),
  131. tupleize_cols=False,
  132. )
  133. result = idx.to_flat_index()
  134. tm.assert_index_equal(result, expected)