test_reset_index.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775
  1. from datetime import datetime
  2. from itertools import product
  3. import numpy as np
  4. import pytest
  5. from pandas.core.dtypes.common import (
  6. is_float_dtype,
  7. is_integer_dtype,
  8. )
  9. import pandas as pd
  10. from pandas import (
  11. Categorical,
  12. CategoricalIndex,
  13. DataFrame,
  14. Index,
  15. Interval,
  16. IntervalIndex,
  17. MultiIndex,
  18. RangeIndex,
  19. Series,
  20. Timestamp,
  21. cut,
  22. date_range,
  23. )
  24. import pandas._testing as tm
  25. @pytest.fixture()
  26. def multiindex_df():
  27. levels = [["A", ""], ["B", "b"]]
  28. return DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels))
  29. class TestResetIndex:
  30. def test_reset_index_empty_rangeindex(self):
  31. # GH#45230
  32. df = DataFrame(
  33. columns=["brand"], dtype=np.int64, index=RangeIndex(0, 0, 1, name="foo")
  34. )
  35. df2 = df.set_index([df.index, "brand"])
  36. result = df2.reset_index([1], drop=True)
  37. tm.assert_frame_equal(result, df[[]], check_index_type=True)
  38. def test_set_reset(self):
  39. idx = Index([2**63, 2**63 + 5, 2**63 + 10], name="foo")
  40. # set/reset
  41. df = DataFrame({"A": [0, 1, 2]}, index=idx)
  42. result = df.reset_index()
  43. assert result["foo"].dtype == np.dtype("uint64")
  44. df = result.set_index("foo")
  45. tm.assert_index_equal(df.index, idx)
  46. def test_set_index_reset_index_dt64tz(self):
  47. idx = Index(date_range("20130101", periods=3, tz="US/Eastern"), name="foo")
  48. # set/reset
  49. df = DataFrame({"A": [0, 1, 2]}, index=idx)
  50. result = df.reset_index()
  51. assert result["foo"].dtype == "datetime64[ns, US/Eastern]"
  52. df = result.set_index("foo")
  53. tm.assert_index_equal(df.index, idx)
  54. def test_reset_index_tz(self, tz_aware_fixture):
  55. # GH 3950
  56. # reset_index with single level
  57. tz = tz_aware_fixture
  58. idx = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx")
  59. df = DataFrame({"a": range(5), "b": ["A", "B", "C", "D", "E"]}, index=idx)
  60. expected = DataFrame(
  61. {
  62. "idx": [
  63. datetime(2011, 1, 1),
  64. datetime(2011, 1, 2),
  65. datetime(2011, 1, 3),
  66. datetime(2011, 1, 4),
  67. datetime(2011, 1, 5),
  68. ],
  69. "a": range(5),
  70. "b": ["A", "B", "C", "D", "E"],
  71. },
  72. columns=["idx", "a", "b"],
  73. )
  74. expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz))
  75. tm.assert_frame_equal(df.reset_index(), expected)
  76. @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
  77. def test_frame_reset_index_tzaware_index(self, tz):
  78. dr = date_range("2012-06-02", periods=10, tz=tz)
  79. df = DataFrame(np.random.randn(len(dr)), dr)
  80. roundtripped = df.reset_index().set_index("index")
  81. xp = df.index.tz
  82. rs = roundtripped.index.tz
  83. assert xp == rs
  84. def test_reset_index_with_intervals(self):
  85. idx = IntervalIndex.from_breaks(np.arange(11), name="x")
  86. original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]]
  87. result = original.set_index("x")
  88. expected = DataFrame({"y": np.arange(10)}, index=idx)
  89. tm.assert_frame_equal(result, expected)
  90. result2 = result.reset_index()
  91. tm.assert_frame_equal(result2, original)
  92. def test_reset_index(self, float_frame):
  93. stacked = float_frame.stack()[::2]
  94. stacked = DataFrame({"foo": stacked, "bar": stacked})
  95. names = ["first", "second"]
  96. stacked.index.names = names
  97. deleveled = stacked.reset_index()
  98. for i, (lev, level_codes) in enumerate(
  99. zip(stacked.index.levels, stacked.index.codes)
  100. ):
  101. values = lev.take(level_codes)
  102. name = names[i]
  103. tm.assert_index_equal(values, Index(deleveled[name]))
  104. stacked.index.names = [None, None]
  105. deleveled2 = stacked.reset_index()
  106. tm.assert_series_equal(
  107. deleveled["first"], deleveled2["level_0"], check_names=False
  108. )
  109. tm.assert_series_equal(
  110. deleveled["second"], deleveled2["level_1"], check_names=False
  111. )
  112. # default name assigned
  113. rdf = float_frame.reset_index()
  114. exp = Series(float_frame.index.values, name="index")
  115. tm.assert_series_equal(rdf["index"], exp)
  116. # default name assigned, corner case
  117. df = float_frame.copy()
  118. df["index"] = "foo"
  119. rdf = df.reset_index()
  120. exp = Series(float_frame.index.values, name="level_0")
  121. tm.assert_series_equal(rdf["level_0"], exp)
  122. # but this is ok
  123. float_frame.index.name = "index"
  124. deleveled = float_frame.reset_index()
  125. tm.assert_series_equal(deleveled["index"], Series(float_frame.index))
  126. tm.assert_index_equal(deleveled.index, Index(range(len(deleveled))), exact=True)
  127. # preserve column names
  128. float_frame.columns.name = "columns"
  129. reset = float_frame.reset_index()
  130. assert reset.columns.name == "columns"
  131. # only remove certain columns
  132. df = float_frame.reset_index().set_index(["index", "A", "B"])
  133. rs = df.reset_index(["A", "B"])
  134. tm.assert_frame_equal(rs, float_frame)
  135. rs = df.reset_index(["index", "A", "B"])
  136. tm.assert_frame_equal(rs, float_frame.reset_index())
  137. rs = df.reset_index(["index", "A", "B"])
  138. tm.assert_frame_equal(rs, float_frame.reset_index())
  139. rs = df.reset_index("A")
  140. xp = float_frame.reset_index().set_index(["index", "B"])
  141. tm.assert_frame_equal(rs, xp)
  142. # test resetting in place
  143. df = float_frame.copy()
  144. reset = float_frame.reset_index()
  145. return_value = df.reset_index(inplace=True)
  146. assert return_value is None
  147. tm.assert_frame_equal(df, reset)
  148. df = float_frame.reset_index().set_index(["index", "A", "B"])
  149. rs = df.reset_index("A", drop=True)
  150. xp = float_frame.copy()
  151. del xp["A"]
  152. xp = xp.set_index(["B"], append=True)
  153. tm.assert_frame_equal(rs, xp)
  154. def test_reset_index_name(self):
  155. df = DataFrame(
  156. [[1, 2, 3, 4], [5, 6, 7, 8]],
  157. columns=["A", "B", "C", "D"],
  158. index=Index(range(2), name="x"),
  159. )
  160. assert df.reset_index().index.name is None
  161. assert df.reset_index(drop=True).index.name is None
  162. return_value = df.reset_index(inplace=True)
  163. assert return_value is None
  164. assert df.index.name is None
  165. @pytest.mark.parametrize("levels", [["A", "B"], [0, 1]])
  166. def test_reset_index_level(self, levels):
  167. df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"])
  168. # With MultiIndex
  169. result = df.set_index(["A", "B"]).reset_index(level=levels[0])
  170. tm.assert_frame_equal(result, df.set_index("B"))
  171. result = df.set_index(["A", "B"]).reset_index(level=levels[:1])
  172. tm.assert_frame_equal(result, df.set_index("B"))
  173. result = df.set_index(["A", "B"]).reset_index(level=levels)
  174. tm.assert_frame_equal(result, df)
  175. result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
  176. tm.assert_frame_equal(result, df[["C", "D"]])
  177. # With single-level Index (GH 16263)
  178. result = df.set_index("A").reset_index(level=levels[0])
  179. tm.assert_frame_equal(result, df)
  180. result = df.set_index("A").reset_index(level=levels[:1])
  181. tm.assert_frame_equal(result, df)
  182. result = df.set_index(["A"]).reset_index(level=levels[0], drop=True)
  183. tm.assert_frame_equal(result, df[["B", "C", "D"]])
  184. @pytest.mark.parametrize("idx_lev", [["A", "B"], ["A"]])
  185. def test_reset_index_level_missing(self, idx_lev):
  186. # Missing levels - for both MultiIndex and single-level Index:
  187. df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "C", "D"])
  188. with pytest.raises(KeyError, match=r"(L|l)evel \(?E\)?"):
  189. df.set_index(idx_lev).reset_index(level=["A", "E"])
  190. with pytest.raises(IndexError, match="Too many levels"):
  191. df.set_index(idx_lev).reset_index(level=[0, 1, 2])
  192. def test_reset_index_right_dtype(self):
  193. time = np.arange(0.0, 10, np.sqrt(2) / 2)
  194. s1 = Series(
  195. (9.81 * time**2) / 2, index=Index(time, name="time"), name="speed"
  196. )
  197. df = DataFrame(s1)
  198. reset = s1.reset_index()
  199. assert reset["time"].dtype == np.float64
  200. reset = df.reset_index()
  201. assert reset["time"].dtype == np.float64
  202. def test_reset_index_multiindex_col(self):
  203. vals = np.random.randn(3, 3).astype(object)
  204. idx = ["x", "y", "z"]
  205. full = np.hstack(([[x] for x in idx], vals))
  206. df = DataFrame(
  207. vals,
  208. Index(idx, name="a"),
  209. columns=[["b", "b", "c"], ["mean", "median", "mean"]],
  210. )
  211. rs = df.reset_index()
  212. xp = DataFrame(
  213. full, columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]]
  214. )
  215. tm.assert_frame_equal(rs, xp)
  216. rs = df.reset_index(col_fill=None)
  217. xp = DataFrame(
  218. full, columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]]
  219. )
  220. tm.assert_frame_equal(rs, xp)
  221. rs = df.reset_index(col_level=1, col_fill="blah")
  222. xp = DataFrame(
  223. full, columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]]
  224. )
  225. tm.assert_frame_equal(rs, xp)
  226. df = DataFrame(
  227. vals,
  228. MultiIndex.from_arrays([[0, 1, 2], ["x", "y", "z"]], names=["d", "a"]),
  229. columns=[["b", "b", "c"], ["mean", "median", "mean"]],
  230. )
  231. rs = df.reset_index("a")
  232. xp = DataFrame(
  233. full,
  234. Index([0, 1, 2], name="d"),
  235. columns=[["a", "b", "b", "c"], ["", "mean", "median", "mean"]],
  236. )
  237. tm.assert_frame_equal(rs, xp)
  238. rs = df.reset_index("a", col_fill=None)
  239. xp = DataFrame(
  240. full,
  241. Index(range(3), name="d"),
  242. columns=[["a", "b", "b", "c"], ["a", "mean", "median", "mean"]],
  243. )
  244. tm.assert_frame_equal(rs, xp)
  245. rs = df.reset_index("a", col_fill="blah", col_level=1)
  246. xp = DataFrame(
  247. full,
  248. Index(range(3), name="d"),
  249. columns=[["blah", "b", "b", "c"], ["a", "mean", "median", "mean"]],
  250. )
  251. tm.assert_frame_equal(rs, xp)
  252. def test_reset_index_multiindex_nan(self):
  253. # GH#6322, testing reset_index on MultiIndexes
  254. # when we have a nan or all nan
  255. df = DataFrame(
  256. {"A": ["a", "b", "c"], "B": [0, 1, np.nan], "C": np.random.rand(3)}
  257. )
  258. rs = df.set_index(["A", "B"]).reset_index()
  259. tm.assert_frame_equal(rs, df)
  260. df = DataFrame(
  261. {"A": [np.nan, "b", "c"], "B": [0, 1, 2], "C": np.random.rand(3)}
  262. )
  263. rs = df.set_index(["A", "B"]).reset_index()
  264. tm.assert_frame_equal(rs, df)
  265. df = DataFrame({"A": ["a", "b", "c"], "B": [0, 1, 2], "C": [np.nan, 1.1, 2.2]})
  266. rs = df.set_index(["A", "B"]).reset_index()
  267. tm.assert_frame_equal(rs, df)
  268. df = DataFrame(
  269. {
  270. "A": ["a", "b", "c"],
  271. "B": [np.nan, np.nan, np.nan],
  272. "C": np.random.rand(3),
  273. }
  274. )
  275. rs = df.set_index(["A", "B"]).reset_index()
  276. tm.assert_frame_equal(rs, df)
  277. @pytest.mark.parametrize(
  278. "name",
  279. [
  280. None,
  281. "foo",
  282. 2,
  283. 3.0,
  284. pd.Timedelta(6),
  285. Timestamp("2012-12-30", tz="UTC"),
  286. "2012-12-31",
  287. ],
  288. )
  289. def test_reset_index_with_datetimeindex_cols(self, name):
  290. # GH#5818
  291. df = DataFrame(
  292. [[1, 2], [3, 4]],
  293. columns=date_range("1/1/2013", "1/2/2013"),
  294. index=["A", "B"],
  295. )
  296. df.index.name = name
  297. result = df.reset_index()
  298. item = name if name is not None else "index"
  299. columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)])
  300. if isinstance(item, str) and item == "2012-12-31":
  301. columns = columns.astype("datetime64[ns]")
  302. else:
  303. assert columns.dtype == object
  304. expected = DataFrame(
  305. [["A", 1, 2], ["B", 3, 4]],
  306. columns=columns,
  307. )
  308. tm.assert_frame_equal(result, expected)
  309. def test_reset_index_range(self):
  310. # GH#12071
  311. df = DataFrame([[0, 0], [1, 1]], columns=["A", "B"], index=RangeIndex(stop=2))
  312. result = df.reset_index()
  313. assert isinstance(result.index, RangeIndex)
  314. expected = DataFrame(
  315. [[0, 0, 0], [1, 1, 1]],
  316. columns=["index", "A", "B"],
  317. index=RangeIndex(stop=2),
  318. )
  319. tm.assert_frame_equal(result, expected)
  320. def test_reset_index_multiindex_columns(self, multiindex_df):
  321. result = multiindex_df[["B"]].rename_axis("A").reset_index()
  322. tm.assert_frame_equal(result, multiindex_df)
  323. # GH#16120: already existing column
  324. msg = r"cannot insert \('A', ''\), already exists"
  325. with pytest.raises(ValueError, match=msg):
  326. multiindex_df.rename_axis("A").reset_index()
  327. # GH#16164: multiindex (tuple) full key
  328. result = multiindex_df.set_index([("A", "")]).reset_index()
  329. tm.assert_frame_equal(result, multiindex_df)
  330. # with additional (unnamed) index level
  331. idx_col = DataFrame(
  332. [[0], [1]], columns=MultiIndex.from_tuples([("level_0", "")])
  333. )
  334. expected = pd.concat([idx_col, multiindex_df[[("B", "b"), ("A", "")]]], axis=1)
  335. result = multiindex_df.set_index([("B", "b")], append=True).reset_index()
  336. tm.assert_frame_equal(result, expected)
  337. # with index name which is a too long tuple...
  338. msg = "Item must have length equal to number of levels."
  339. with pytest.raises(ValueError, match=msg):
  340. multiindex_df.rename_axis([("C", "c", "i")]).reset_index()
  341. # or too short...
  342. levels = [["A", "a", ""], ["B", "b", "i"]]
  343. df2 = DataFrame([[0, 2], [1, 3]], columns=MultiIndex.from_tuples(levels))
  344. idx_col = DataFrame(
  345. [[0], [1]], columns=MultiIndex.from_tuples([("C", "c", "ii")])
  346. )
  347. expected = pd.concat([idx_col, df2], axis=1)
  348. result = df2.rename_axis([("C", "c")]).reset_index(col_fill="ii")
  349. tm.assert_frame_equal(result, expected)
  350. # ... which is incompatible with col_fill=None
  351. with pytest.raises(
  352. ValueError,
  353. match=(
  354. "col_fill=None is incompatible with "
  355. r"incomplete column name \('C', 'c'\)"
  356. ),
  357. ):
  358. df2.rename_axis([("C", "c")]).reset_index(col_fill=None)
  359. # with col_level != 0
  360. result = df2.rename_axis([("c", "ii")]).reset_index(col_level=1, col_fill="C")
  361. tm.assert_frame_equal(result, expected)
  362. @pytest.mark.parametrize("flag", [False, True])
  363. @pytest.mark.parametrize("allow_duplicates", [False, True])
  364. def test_reset_index_duplicate_columns_allow(
  365. self, multiindex_df, flag, allow_duplicates
  366. ):
  367. # GH#44755 reset_index with duplicate column labels
  368. df = multiindex_df.rename_axis("A")
  369. df = df.set_flags(allows_duplicate_labels=flag)
  370. if flag and allow_duplicates:
  371. result = df.reset_index(allow_duplicates=allow_duplicates)
  372. levels = [["A", ""], ["A", ""], ["B", "b"]]
  373. expected = DataFrame(
  374. [[0, 0, 2], [1, 1, 3]], columns=MultiIndex.from_tuples(levels)
  375. )
  376. tm.assert_frame_equal(result, expected)
  377. else:
  378. if not flag and allow_duplicates:
  379. msg = (
  380. "Cannot specify 'allow_duplicates=True' when "
  381. "'self.flags.allows_duplicate_labels' is False"
  382. )
  383. else:
  384. msg = r"cannot insert \('A', ''\), already exists"
  385. with pytest.raises(ValueError, match=msg):
  386. df.reset_index(allow_duplicates=allow_duplicates)
  387. @pytest.mark.parametrize("flag", [False, True])
  388. def test_reset_index_duplicate_columns_default(self, multiindex_df, flag):
  389. df = multiindex_df.rename_axis("A")
  390. df = df.set_flags(allows_duplicate_labels=flag)
  391. msg = r"cannot insert \('A', ''\), already exists"
  392. with pytest.raises(ValueError, match=msg):
  393. df.reset_index()
  394. @pytest.mark.parametrize("allow_duplicates", ["bad value"])
  395. def test_reset_index_allow_duplicates_check(self, multiindex_df, allow_duplicates):
  396. with pytest.raises(ValueError, match="expected type bool"):
  397. multiindex_df.reset_index(allow_duplicates=allow_duplicates)
  398. def test_reset_index_datetime(self, tz_naive_fixture):
  399. # GH#3950
  400. tz = tz_naive_fixture
  401. idx1 = date_range("1/1/2011", periods=5, freq="D", tz=tz, name="idx1")
  402. idx2 = Index(range(5), name="idx2", dtype="int64")
  403. idx = MultiIndex.from_arrays([idx1, idx2])
  404. df = DataFrame(
  405. {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]},
  406. index=idx,
  407. )
  408. expected = DataFrame(
  409. {
  410. "idx1": [
  411. datetime(2011, 1, 1),
  412. datetime(2011, 1, 2),
  413. datetime(2011, 1, 3),
  414. datetime(2011, 1, 4),
  415. datetime(2011, 1, 5),
  416. ],
  417. "idx2": np.arange(5, dtype="int64"),
  418. "a": np.arange(5, dtype="int64"),
  419. "b": ["A", "B", "C", "D", "E"],
  420. },
  421. columns=["idx1", "idx2", "a", "b"],
  422. )
  423. expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz))
  424. tm.assert_frame_equal(df.reset_index(), expected)
  425. idx3 = date_range(
  426. "1/1/2012", periods=5, freq="MS", tz="Europe/Paris", name="idx3"
  427. )
  428. idx = MultiIndex.from_arrays([idx1, idx2, idx3])
  429. df = DataFrame(
  430. {"a": np.arange(5, dtype="int64"), "b": ["A", "B", "C", "D", "E"]},
  431. index=idx,
  432. )
  433. expected = DataFrame(
  434. {
  435. "idx1": [
  436. datetime(2011, 1, 1),
  437. datetime(2011, 1, 2),
  438. datetime(2011, 1, 3),
  439. datetime(2011, 1, 4),
  440. datetime(2011, 1, 5),
  441. ],
  442. "idx2": np.arange(5, dtype="int64"),
  443. "idx3": [
  444. datetime(2012, 1, 1),
  445. datetime(2012, 2, 1),
  446. datetime(2012, 3, 1),
  447. datetime(2012, 4, 1),
  448. datetime(2012, 5, 1),
  449. ],
  450. "a": np.arange(5, dtype="int64"),
  451. "b": ["A", "B", "C", "D", "E"],
  452. },
  453. columns=["idx1", "idx2", "idx3", "a", "b"],
  454. )
  455. expected["idx1"] = expected["idx1"].apply(lambda d: Timestamp(d, tz=tz))
  456. expected["idx3"] = expected["idx3"].apply(
  457. lambda d: Timestamp(d, tz="Europe/Paris")
  458. )
  459. tm.assert_frame_equal(df.reset_index(), expected)
  460. # GH#7793
  461. idx = MultiIndex.from_product(
  462. [["a", "b"], date_range("20130101", periods=3, tz=tz)]
  463. )
  464. df = DataFrame(
  465. np.arange(6, dtype="int64").reshape(6, 1), columns=["a"], index=idx
  466. )
  467. expected = DataFrame(
  468. {
  469. "level_0": "a a a b b b".split(),
  470. "level_1": [
  471. datetime(2013, 1, 1),
  472. datetime(2013, 1, 2),
  473. datetime(2013, 1, 3),
  474. ]
  475. * 2,
  476. "a": np.arange(6, dtype="int64"),
  477. },
  478. columns=["level_0", "level_1", "a"],
  479. )
  480. expected["level_1"] = expected["level_1"].apply(lambda d: Timestamp(d, tz=tz))
  481. result = df.reset_index()
  482. tm.assert_frame_equal(result, expected)
  483. def test_reset_index_period(self):
  484. # GH#7746
  485. idx = MultiIndex.from_product(
  486. [pd.period_range("20130101", periods=3, freq="M"), list("abc")],
  487. names=["month", "feature"],
  488. )
  489. df = DataFrame(
  490. np.arange(9, dtype="int64").reshape(-1, 1), index=idx, columns=["a"]
  491. )
  492. expected = DataFrame(
  493. {
  494. "month": (
  495. [pd.Period("2013-01", freq="M")] * 3
  496. + [pd.Period("2013-02", freq="M")] * 3
  497. + [pd.Period("2013-03", freq="M")] * 3
  498. ),
  499. "feature": ["a", "b", "c"] * 3,
  500. "a": np.arange(9, dtype="int64"),
  501. },
  502. columns=["month", "feature", "a"],
  503. )
  504. result = df.reset_index()
  505. tm.assert_frame_equal(result, expected)
  506. def test_reset_index_delevel_infer_dtype(self):
  507. tuples = list(product(["foo", "bar"], [10, 20], [1.0, 1.1]))
  508. index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"])
  509. df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index)
  510. deleveled = df.reset_index()
  511. assert is_integer_dtype(deleveled["prm1"])
  512. assert is_float_dtype(deleveled["prm2"])
  513. def test_reset_index_with_drop(
  514. self, multiindex_year_month_day_dataframe_random_data
  515. ):
  516. ymd = multiindex_year_month_day_dataframe_random_data
  517. deleveled = ymd.reset_index(drop=True)
  518. assert len(deleveled.columns) == len(ymd.columns)
  519. assert deleveled.index.name == ymd.index.name
  520. @pytest.mark.parametrize(
  521. "ix_data, exp_data",
  522. [
  523. (
  524. [(pd.NaT, 1), (pd.NaT, 2)],
  525. {"a": [pd.NaT, pd.NaT], "b": [1, 2], "x": [11, 12]},
  526. ),
  527. (
  528. [(pd.NaT, 1), (Timestamp("2020-01-01"), 2)],
  529. {"a": [pd.NaT, Timestamp("2020-01-01")], "b": [1, 2], "x": [11, 12]},
  530. ),
  531. (
  532. [(pd.NaT, 1), (pd.Timedelta(123, "d"), 2)],
  533. {"a": [pd.NaT, pd.Timedelta(123, "d")], "b": [1, 2], "x": [11, 12]},
  534. ),
  535. ],
  536. )
  537. def test_reset_index_nat_multiindex(self, ix_data, exp_data):
  538. # GH#36541: that reset_index() does not raise ValueError
  539. ix = MultiIndex.from_tuples(ix_data, names=["a", "b"])
  540. result = DataFrame({"x": [11, 12]}, index=ix)
  541. result = result.reset_index()
  542. expected = DataFrame(exp_data)
  543. tm.assert_frame_equal(result, expected)
  544. @pytest.mark.parametrize(
  545. "codes", ([[0, 0, 1, 1], [0, 1, 0, 1]], [[0, 0, -1, 1], [0, 1, 0, 1]])
  546. )
  547. def test_rest_index_multiindex_categorical_with_missing_values(self, codes):
  548. # GH#24206
  549. index = MultiIndex(
  550. [CategoricalIndex(["A", "B"]), CategoricalIndex(["a", "b"])], codes
  551. )
  552. data = {"col": range(len(index))}
  553. df = DataFrame(data=data, index=index)
  554. expected = DataFrame(
  555. {
  556. "level_0": Categorical.from_codes(codes[0], categories=["A", "B"]),
  557. "level_1": Categorical.from_codes(codes[1], categories=["a", "b"]),
  558. "col": range(4),
  559. }
  560. )
  561. res = df.reset_index()
  562. tm.assert_frame_equal(res, expected)
  563. # roundtrip
  564. res = expected.set_index(["level_0", "level_1"]).reset_index()
  565. tm.assert_frame_equal(res, expected)
  566. @pytest.mark.parametrize(
  567. "array, dtype",
  568. [
  569. (["a", "b"], object),
  570. (
  571. pd.period_range("12-1-2000", periods=2, freq="Q-DEC"),
  572. pd.PeriodDtype(freq="Q-DEC"),
  573. ),
  574. ],
  575. )
  576. def test_reset_index_dtypes_on_empty_frame_with_multiindex(array, dtype):
  577. # GH 19602 - Preserve dtype on empty DataFrame with MultiIndex
  578. idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
  579. result = DataFrame(index=idx)[:0].reset_index().dtypes
  580. expected = Series({"level_0": np.int64, "level_1": np.float64, "level_2": dtype})
  581. tm.assert_series_equal(result, expected)
  582. def test_reset_index_empty_frame_with_datetime64_multiindex():
  583. # https://github.com/pandas-dev/pandas/issues/35606
  584. idx = MultiIndex(
  585. levels=[[Timestamp("2020-07-20 00:00:00")], [3, 4]],
  586. codes=[[], []],
  587. names=["a", "b"],
  588. )
  589. df = DataFrame(index=idx, columns=["c", "d"])
  590. result = df.reset_index()
  591. expected = DataFrame(
  592. columns=list("abcd"), index=RangeIndex(start=0, stop=0, step=1)
  593. )
  594. expected["a"] = expected["a"].astype("datetime64[ns]")
  595. expected["b"] = expected["b"].astype("int64")
  596. tm.assert_frame_equal(result, expected)
  597. def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby():
  598. # https://github.com/pandas-dev/pandas/issues/35657
  599. df = DataFrame({"c1": [10.0], "c2": ["a"], "c3": pd.to_datetime("2020-01-01")})
  600. df = df.head(0).groupby(["c2", "c3"])[["c1"]].sum()
  601. result = df.reset_index()
  602. expected = DataFrame(
  603. columns=["c2", "c3", "c1"], index=RangeIndex(start=0, stop=0, step=1)
  604. )
  605. expected["c3"] = expected["c3"].astype("datetime64[ns]")
  606. expected["c1"] = expected["c1"].astype("float64")
  607. tm.assert_frame_equal(result, expected)
  608. def test_reset_index_multiindex_nat():
  609. # GH 11479
  610. idx = range(3)
  611. tstamp = date_range("2015-07-01", freq="D", periods=3)
  612. df = DataFrame({"id": idx, "tstamp": tstamp, "a": list("abc")})
  613. df.loc[2, "tstamp"] = pd.NaT
  614. result = df.set_index(["id", "tstamp"]).reset_index("id")
  615. expected = DataFrame(
  616. {"id": range(3), "a": list("abc")},
  617. index=pd.DatetimeIndex(["2015-07-01", "2015-07-02", "NaT"], name="tstamp"),
  618. )
  619. tm.assert_frame_equal(result, expected)
  620. def test_reset_index_interval_columns_object_cast():
  621. # GH 19136
  622. df = DataFrame(
  623. np.eye(2), index=Index([1, 2], name="Year"), columns=cut([1, 2], [0, 1, 2])
  624. )
  625. result = df.reset_index()
  626. expected = DataFrame(
  627. [[1, 1.0, 0.0], [2, 0.0, 1.0]],
  628. columns=Index(["Year", Interval(0, 1), Interval(1, 2)]),
  629. )
  630. tm.assert_frame_equal(result, expected)
  631. def test_reset_index_rename(float_frame):
  632. # GH 6878
  633. result = float_frame.reset_index(names="new_name")
  634. expected = Series(float_frame.index.values, name="new_name")
  635. tm.assert_series_equal(result["new_name"], expected)
  636. result = float_frame.reset_index(names=123)
  637. expected = Series(float_frame.index.values, name=123)
  638. tm.assert_series_equal(result[123], expected)
  639. def test_reset_index_rename_multiindex(float_frame):
  640. # GH 6878
  641. stacked_df = float_frame.stack()[::2]
  642. stacked_df = DataFrame({"foo": stacked_df, "bar": stacked_df})
  643. names = ["first", "second"]
  644. stacked_df.index.names = names
  645. result = stacked_df.reset_index()
  646. expected = stacked_df.reset_index(names=["new_first", "new_second"])
  647. tm.assert_series_equal(result["first"], expected["new_first"], check_names=False)
  648. tm.assert_series_equal(result["second"], expected["new_second"], check_names=False)
  649. def test_errorreset_index_rename(float_frame):
  650. # GH 6878
  651. stacked_df = float_frame.stack()[::2]
  652. stacked_df = DataFrame({"first": stacked_df, "second": stacked_df})
  653. with pytest.raises(
  654. ValueError, match="Index names must be str or 1-dimensional list"
  655. ):
  656. stacked_df.reset_index(names={"first": "new_first", "second": "new_second"})
  657. with pytest.raises(IndexError, match="list index out of range"):
  658. stacked_df.reset_index(names=["new_first"])