test_reset_index.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. from datetime import datetime
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. from pandas import (
  6. DataFrame,
  7. Index,
  8. MultiIndex,
  9. RangeIndex,
  10. Series,
  11. date_range,
  12. )
  13. import pandas._testing as tm
  14. class TestResetIndex:
  15. def test_reset_index_dti_round_trip(self):
  16. dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None)
  17. d1 = DataFrame({"v": np.random.rand(len(dti))}, index=dti)
  18. d2 = d1.reset_index()
  19. assert d2.dtypes[0] == np.dtype("M8[ns]")
  20. d3 = d2.set_index("index")
  21. tm.assert_frame_equal(d1, d3, check_names=False)
  22. # GH#2329
  23. stamp = datetime(2012, 11, 22)
  24. df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"])
  25. df = df.set_index("Date")
  26. assert df.index[0] == stamp
  27. assert df.reset_index()["Date"][0] == stamp
  28. def test_reset_index(self):
  29. df = tm.makeDataFrame()[:5]
  30. ser = df.stack()
  31. ser.index.names = ["hash", "category"]
  32. ser.name = "value"
  33. df = ser.reset_index()
  34. assert "value" in df
  35. df = ser.reset_index(name="value2")
  36. assert "value2" in df
  37. # check inplace
  38. s = ser.reset_index(drop=True)
  39. s2 = ser
  40. return_value = s2.reset_index(drop=True, inplace=True)
  41. assert return_value is None
  42. tm.assert_series_equal(s, s2)
  43. # level
  44. index = MultiIndex(
  45. levels=[["bar"], ["one", "two", "three"], [0, 1]],
  46. codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
  47. )
  48. s = Series(np.random.randn(6), index=index)
  49. rs = s.reset_index(level=1)
  50. assert len(rs.columns) == 2
  51. rs = s.reset_index(level=[0, 2], drop=True)
  52. tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
  53. assert isinstance(rs, Series)
  54. def test_reset_index_name(self):
  55. s = Series([1, 2, 3], index=Index(range(3), name="x"))
  56. assert s.reset_index().index.name is None
  57. assert s.reset_index(drop=True).index.name is None
  58. def test_reset_index_level(self):
  59. df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
  60. for levels in ["A", "B"], [0, 1]:
  61. # With MultiIndex
  62. s = df.set_index(["A", "B"])["C"]
  63. result = s.reset_index(level=levels[0])
  64. tm.assert_frame_equal(result, df.set_index("B"))
  65. result = s.reset_index(level=levels[:1])
  66. tm.assert_frame_equal(result, df.set_index("B"))
  67. result = s.reset_index(level=levels)
  68. tm.assert_frame_equal(result, df)
  69. result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
  70. tm.assert_frame_equal(result, df[["C"]])
  71. with pytest.raises(KeyError, match="Level E "):
  72. s.reset_index(level=["A", "E"])
  73. # With single-level Index
  74. s = df.set_index("A")["B"]
  75. result = s.reset_index(level=levels[0])
  76. tm.assert_frame_equal(result, df[["A", "B"]])
  77. result = s.reset_index(level=levels[:1])
  78. tm.assert_frame_equal(result, df[["A", "B"]])
  79. result = s.reset_index(level=levels[0], drop=True)
  80. tm.assert_series_equal(result, df["B"])
  81. with pytest.raises(IndexError, match="Too many levels"):
  82. s.reset_index(level=[0, 1, 2])
  83. # Check that .reset_index([],drop=True) doesn't fail
  84. result = Series(range(4)).reset_index([], drop=True)
  85. expected = Series(range(4))
  86. tm.assert_series_equal(result, expected)
  87. def test_reset_index_range(self):
  88. # GH 12071
  89. s = Series(range(2), name="A", dtype="int64")
  90. series_result = s.reset_index()
  91. assert isinstance(series_result.index, RangeIndex)
  92. series_expected = DataFrame(
  93. [[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)
  94. )
  95. tm.assert_frame_equal(series_result, series_expected)
  96. def test_reset_index_drop_errors(self):
  97. # GH 20925
  98. # KeyError raised for series index when passed level name is missing
  99. s = Series(range(4))
  100. with pytest.raises(KeyError, match="does not match index name"):
  101. s.reset_index("wrong", drop=True)
  102. with pytest.raises(KeyError, match="does not match index name"):
  103. s.reset_index("wrong")
  104. # KeyError raised for series when level to be dropped is missing
  105. s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
  106. with pytest.raises(KeyError, match="not found"):
  107. s.reset_index("wrong", drop=True)
  108. def test_reset_index_with_drop(self, series_with_multilevel_index):
  109. ser = series_with_multilevel_index
  110. deleveled = ser.reset_index()
  111. assert isinstance(deleveled, DataFrame)
  112. assert len(deleveled.columns) == len(ser.index.levels) + 1
  113. assert deleveled.index.name == ser.index.name
  114. deleveled = ser.reset_index(drop=True)
  115. assert isinstance(deleveled, Series)
  116. assert deleveled.index.name == ser.index.name
  117. def test_reset_index_inplace_and_drop_ignore_name(self):
  118. # GH#44575
  119. ser = Series(range(2), name="old")
  120. ser.reset_index(name="new", drop=True, inplace=True)
  121. expected = Series(range(2), name="old")
  122. tm.assert_series_equal(ser, expected)
  123. @pytest.mark.parametrize(
  124. "array, dtype",
  125. [
  126. (["a", "b"], object),
  127. (
  128. pd.period_range("12-1-2000", periods=2, freq="Q-DEC"),
  129. pd.PeriodDtype(freq="Q-DEC"),
  130. ),
  131. ],
  132. )
  133. def test_reset_index_dtypes_on_empty_series_with_multiindex(array, dtype):
  134. # GH 19602 - Preserve dtype on empty Series with MultiIndex
  135. idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
  136. result = Series(dtype=object, index=idx)[:0].reset_index().dtypes
  137. expected = Series(
  138. {"level_0": np.int64, "level_1": np.float64, "level_2": dtype, 0: object}
  139. )
  140. tm.assert_series_equal(result, expected)
  141. @pytest.mark.parametrize(
  142. "names, expected_names",
  143. [
  144. (["A", "A"], ["A", "A"]),
  145. (["level_1", None], ["level_1", "level_1"]),
  146. ],
  147. )
  148. @pytest.mark.parametrize("allow_duplicates", [False, True])
  149. def test_column_name_duplicates(names, expected_names, allow_duplicates):
  150. # GH#44755 reset_index with duplicate column labels
  151. s = Series([1], index=MultiIndex.from_arrays([[1], [1]], names=names))
  152. if allow_duplicates:
  153. result = s.reset_index(allow_duplicates=True)
  154. expected = DataFrame([[1, 1, 1]], columns=expected_names + [0])
  155. tm.assert_frame_equal(result, expected)
  156. else:
  157. with pytest.raises(ValueError, match="cannot insert"):
  158. s.reset_index()