test_reindex.py 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256
  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. )
  5. import inspect
  6. import numpy as np
  7. import pytest
  8. from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
  9. import pandas.util._test_decorators as td
  10. import pandas as pd
  11. from pandas import (
  12. Categorical,
  13. CategoricalIndex,
  14. DataFrame,
  15. Index,
  16. MultiIndex,
  17. Series,
  18. date_range,
  19. isna,
  20. )
  21. import pandas._testing as tm
  22. from pandas.api.types import CategoricalDtype as CDT
  23. import pandas.core.common as com
  24. class TestReindexSetIndex:
  25. # Tests that check both reindex and set_index
  26. def test_dti_set_index_reindex_datetimeindex(self):
  27. # GH#6631
  28. df = DataFrame(np.random.random(6))
  29. idx1 = date_range("2011/01/01", periods=6, freq="M", tz="US/Eastern")
  30. idx2 = date_range("2013", periods=6, freq="A", tz="Asia/Tokyo")
  31. df = df.set_index(idx1)
  32. tm.assert_index_equal(df.index, idx1)
  33. df = df.reindex(idx2)
  34. tm.assert_index_equal(df.index, idx2)
  35. def test_dti_set_index_reindex_freq_with_tz(self):
  36. # GH#11314 with tz
  37. index = date_range(
  38. datetime(2015, 10, 1), datetime(2015, 10, 1, 23), freq="H", tz="US/Eastern"
  39. )
  40. df = DataFrame(np.random.randn(24, 1), columns=["a"], index=index)
  41. new_index = date_range(
  42. datetime(2015, 10, 2), datetime(2015, 10, 2, 23), freq="H", tz="US/Eastern"
  43. )
  44. result = df.set_index(new_index)
  45. assert result.index.freq == index.freq
  46. def test_set_reset_index_intervalindex(self):
  47. df = DataFrame({"A": range(10)})
  48. ser = pd.cut(df.A, 5)
  49. df["B"] = ser
  50. df = df.set_index("B")
  51. df = df.reset_index()
  52. def test_setitem_reset_index_dtypes(self):
  53. # GH 22060
  54. df = DataFrame(columns=["a", "b", "c"]).astype(
  55. {"a": "datetime64[ns]", "b": np.int64, "c": np.float64}
  56. )
  57. df1 = df.set_index(["a"])
  58. df1["d"] = []
  59. result = df1.reset_index()
  60. expected = DataFrame(columns=["a", "b", "c", "d"], index=range(0)).astype(
  61. {"a": "datetime64[ns]", "b": np.int64, "c": np.float64, "d": np.float64}
  62. )
  63. tm.assert_frame_equal(result, expected)
  64. df2 = df.set_index(["a", "b"])
  65. df2["d"] = []
  66. result = df2.reset_index()
  67. tm.assert_frame_equal(result, expected)
  68. @pytest.mark.parametrize(
  69. "timezone, year, month, day, hour",
  70. [["America/Chicago", 2013, 11, 3, 1], ["America/Santiago", 2021, 4, 3, 23]],
  71. )
  72. def test_reindex_timestamp_with_fold(self, timezone, year, month, day, hour):
  73. # see gh-40817
  74. test_timezone = gettz(timezone)
  75. transition_1 = pd.Timestamp(
  76. year=year,
  77. month=month,
  78. day=day,
  79. hour=hour,
  80. minute=0,
  81. fold=0,
  82. tzinfo=test_timezone,
  83. )
  84. transition_2 = pd.Timestamp(
  85. year=year,
  86. month=month,
  87. day=day,
  88. hour=hour,
  89. minute=0,
  90. fold=1,
  91. tzinfo=test_timezone,
  92. )
  93. df = (
  94. DataFrame({"index": [transition_1, transition_2], "vals": ["a", "b"]})
  95. .set_index("index")
  96. .reindex(["1", "2"])
  97. )
  98. tm.assert_frame_equal(
  99. df,
  100. DataFrame({"index": ["1", "2"], "vals": [None, None]}).set_index("index"),
  101. )
  102. class TestDataFrameSelectReindex:
  103. # These are specific reindex-based tests; other indexing tests should go in
  104. # test_indexing
  105. def test_reindex_copies(self):
  106. # based on asv time_reindex_axis1
  107. N = 10
  108. df = DataFrame(np.random.randn(N * 10, N))
  109. cols = np.arange(N)
  110. np.random.shuffle(cols)
  111. result = df.reindex(columns=cols, copy=True)
  112. assert not np.shares_memory(result[0]._values, df[0]._values)
  113. # pass both columns and index
  114. result2 = df.reindex(columns=cols, index=df.index, copy=True)
  115. assert not np.shares_memory(result2[0]._values, df[0]._values)
  116. def test_reindex_copies_ea(self, using_copy_on_write):
  117. # https://github.com/pandas-dev/pandas/pull/51197
  118. # also ensure to honor copy keyword for ExtensionDtypes
  119. N = 10
  120. df = DataFrame(np.random.randn(N * 10, N), dtype="Float64")
  121. cols = np.arange(N)
  122. np.random.shuffle(cols)
  123. result = df.reindex(columns=cols, copy=True)
  124. if using_copy_on_write:
  125. assert np.shares_memory(result[0].array._data, df[0].array._data)
  126. else:
  127. assert not np.shares_memory(result[0].array._data, df[0].array._data)
  128. # pass both columns and index
  129. result2 = df.reindex(columns=cols, index=df.index, copy=True)
  130. if using_copy_on_write:
  131. assert np.shares_memory(result2[0].array._data, df[0].array._data)
  132. else:
  133. assert not np.shares_memory(result2[0].array._data, df[0].array._data)
  134. @td.skip_array_manager_not_yet_implemented
  135. def test_reindex_date_fill_value(self):
  136. # passing date to dt64 is deprecated; enforced in 2.0 to cast to object
  137. arr = date_range("2016-01-01", periods=6).values.reshape(3, 2)
  138. df = DataFrame(arr, columns=["A", "B"], index=range(3))
  139. ts = df.iloc[0, 0]
  140. fv = ts.date()
  141. res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv)
  142. expected = DataFrame(
  143. {"A": df["A"].tolist() + [fv], "B": df["B"].tolist() + [fv], "C": [fv] * 4},
  144. dtype=object,
  145. )
  146. tm.assert_frame_equal(res, expected)
  147. # only reindexing rows
  148. res = df.reindex(index=range(4), fill_value=fv)
  149. tm.assert_frame_equal(res, expected[["A", "B"]])
  150. # same with a datetime-castable str
  151. res = df.reindex(
  152. index=range(4), columns=["A", "B", "C"], fill_value="2016-01-01"
  153. )
  154. expected = DataFrame(
  155. {"A": df["A"].tolist() + [ts], "B": df["B"].tolist() + [ts], "C": [ts] * 4},
  156. )
  157. tm.assert_frame_equal(res, expected)
  158. def test_reindex_with_multi_index(self):
  159. # https://github.com/pandas-dev/pandas/issues/29896
  160. # tests for reindexing a multi-indexed DataFrame with a new MultiIndex
  161. #
  162. # confirms that we can reindex a multi-indexed DataFrame with a new
  163. # MultiIndex object correctly when using no filling, backfilling, and
  164. # padding
  165. #
  166. # The DataFrame, `df`, used in this test is:
  167. # c
  168. # a b
  169. # -1 0 A
  170. # 1 B
  171. # 2 C
  172. # 3 D
  173. # 4 E
  174. # 5 F
  175. # 6 G
  176. # 0 0 A
  177. # 1 B
  178. # 2 C
  179. # 3 D
  180. # 4 E
  181. # 5 F
  182. # 6 G
  183. # 1 0 A
  184. # 1 B
  185. # 2 C
  186. # 3 D
  187. # 4 E
  188. # 5 F
  189. # 6 G
  190. #
  191. # and the other MultiIndex, `new_multi_index`, is:
  192. # 0: 0 0.5
  193. # 1: 2.0
  194. # 2: 5.0
  195. # 3: 5.8
  196. df = DataFrame(
  197. {
  198. "a": [-1] * 7 + [0] * 7 + [1] * 7,
  199. "b": list(range(7)) * 3,
  200. "c": ["A", "B", "C", "D", "E", "F", "G"] * 3,
  201. }
  202. ).set_index(["a", "b"])
  203. new_index = [0.5, 2.0, 5.0, 5.8]
  204. new_multi_index = MultiIndex.from_product([[0], new_index], names=["a", "b"])
  205. # reindexing w/o a `method` value
  206. reindexed = df.reindex(new_multi_index)
  207. expected = DataFrame(
  208. {"a": [0] * 4, "b": new_index, "c": [np.nan, "C", "F", np.nan]}
  209. ).set_index(["a", "b"])
  210. tm.assert_frame_equal(expected, reindexed)
  211. # reindexing with backfilling
  212. expected = DataFrame(
  213. {"a": [0] * 4, "b": new_index, "c": ["B", "C", "F", "G"]}
  214. ).set_index(["a", "b"])
  215. reindexed_with_backfilling = df.reindex(new_multi_index, method="bfill")
  216. tm.assert_frame_equal(expected, reindexed_with_backfilling)
  217. reindexed_with_backfilling = df.reindex(new_multi_index, method="backfill")
  218. tm.assert_frame_equal(expected, reindexed_with_backfilling)
  219. # reindexing with padding
  220. expected = DataFrame(
  221. {"a": [0] * 4, "b": new_index, "c": ["A", "C", "F", "F"]}
  222. ).set_index(["a", "b"])
  223. reindexed_with_padding = df.reindex(new_multi_index, method="pad")
  224. tm.assert_frame_equal(expected, reindexed_with_padding)
  225. reindexed_with_padding = df.reindex(new_multi_index, method="ffill")
  226. tm.assert_frame_equal(expected, reindexed_with_padding)
  227. @pytest.mark.parametrize(
  228. "method,expected_values",
  229. [
  230. ("nearest", [0, 1, 1, 2]),
  231. ("pad", [np.nan, 0, 1, 1]),
  232. ("backfill", [0, 1, 2, 2]),
  233. ],
  234. )
  235. def test_reindex_methods(self, method, expected_values):
  236. df = DataFrame({"x": list(range(5))})
  237. target = np.array([-0.1, 0.9, 1.1, 1.5])
  238. expected = DataFrame({"x": expected_values}, index=target)
  239. actual = df.reindex(target, method=method)
  240. tm.assert_frame_equal(expected, actual)
  241. actual = df.reindex(target, method=method, tolerance=1)
  242. tm.assert_frame_equal(expected, actual)
  243. actual = df.reindex(target, method=method, tolerance=[1, 1, 1, 1])
  244. tm.assert_frame_equal(expected, actual)
  245. e2 = expected[::-1]
  246. actual = df.reindex(target[::-1], method=method)
  247. tm.assert_frame_equal(e2, actual)
  248. new_order = [3, 0, 2, 1]
  249. e2 = expected.iloc[new_order]
  250. actual = df.reindex(target[new_order], method=method)
  251. tm.assert_frame_equal(e2, actual)
  252. switched_method = (
  253. "pad" if method == "backfill" else "backfill" if method == "pad" else method
  254. )
  255. actual = df[::-1].reindex(target, method=switched_method)
  256. tm.assert_frame_equal(expected, actual)
  257. def test_reindex_methods_nearest_special(self):
  258. df = DataFrame({"x": list(range(5))})
  259. target = np.array([-0.1, 0.9, 1.1, 1.5])
  260. expected = DataFrame({"x": [0, 1, 1, np.nan]}, index=target)
  261. actual = df.reindex(target, method="nearest", tolerance=0.2)
  262. tm.assert_frame_equal(expected, actual)
  263. expected = DataFrame({"x": [0, np.nan, 1, np.nan]}, index=target)
  264. actual = df.reindex(target, method="nearest", tolerance=[0.5, 0.01, 0.4, 0.1])
  265. tm.assert_frame_equal(expected, actual)
  266. def test_reindex_nearest_tz(self, tz_aware_fixture):
  267. # GH26683
  268. tz = tz_aware_fixture
  269. idx = date_range("2019-01-01", periods=5, tz=tz)
  270. df = DataFrame({"x": list(range(5))}, index=idx)
  271. expected = df.head(3)
  272. actual = df.reindex(idx[:3], method="nearest")
  273. tm.assert_frame_equal(expected, actual)
  274. def test_reindex_nearest_tz_empty_frame(self):
  275. # https://github.com/pandas-dev/pandas/issues/31964
  276. dti = pd.DatetimeIndex(["2016-06-26 14:27:26+00:00"])
  277. df = DataFrame(index=pd.DatetimeIndex(["2016-07-04 14:00:59+00:00"]))
  278. expected = DataFrame(index=dti)
  279. result = df.reindex(dti, method="nearest")
  280. tm.assert_frame_equal(result, expected)
  281. def test_reindex_frame_add_nat(self):
  282. rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
  283. df = DataFrame({"A": np.random.randn(len(rng)), "B": rng})
  284. result = df.reindex(range(15))
  285. assert np.issubdtype(result["B"].dtype, np.dtype("M8[ns]"))
  286. mask = com.isna(result)["B"]
  287. assert mask[-5:].all()
  288. assert not mask[:-5].any()
  289. @pytest.mark.parametrize(
  290. "method, exp_values",
  291. [("ffill", [0, 1, 2, 3]), ("bfill", [1.0, 2.0, 3.0, np.nan])],
  292. )
  293. def test_reindex_frame_tz_ffill_bfill(self, frame_or_series, method, exp_values):
  294. # GH#38566
  295. obj = frame_or_series(
  296. [0, 1, 2, 3],
  297. index=date_range("2020-01-01 00:00:00", periods=4, freq="H", tz="UTC"),
  298. )
  299. new_index = date_range("2020-01-01 00:01:00", periods=4, freq="H", tz="UTC")
  300. result = obj.reindex(new_index, method=method, tolerance=pd.Timedelta("1 hour"))
  301. expected = frame_or_series(exp_values, index=new_index)
  302. tm.assert_equal(result, expected)
  303. def test_reindex_limit(self):
  304. # GH 28631
  305. data = [["A", "A", "A"], ["B", "B", "B"], ["C", "C", "C"], ["D", "D", "D"]]
  306. exp_data = [
  307. ["A", "A", "A"],
  308. ["B", "B", "B"],
  309. ["C", "C", "C"],
  310. ["D", "D", "D"],
  311. ["D", "D", "D"],
  312. [np.nan, np.nan, np.nan],
  313. ]
  314. df = DataFrame(data)
  315. result = df.reindex([0, 1, 2, 3, 4, 5], method="ffill", limit=1)
  316. expected = DataFrame(exp_data)
  317. tm.assert_frame_equal(result, expected)
  318. @pytest.mark.parametrize(
  319. "idx, check_index_type",
  320. [
  321. [["C", "B", "A"], True],
  322. [["F", "C", "A", "D"], True],
  323. [["A"], True],
  324. [["A", "B", "C"], True],
  325. [["C", "A", "B"], True],
  326. [["C", "B"], True],
  327. [["C", "A"], True],
  328. [["A", "B"], True],
  329. [["B", "A", "C"], True],
  330. # reindex by these causes different MultiIndex levels
  331. [["D", "F"], False],
  332. [["A", "C", "B"], False],
  333. ],
  334. )
  335. def test_reindex_level_verify_first_level(self, idx, check_index_type):
  336. df = DataFrame(
  337. {
  338. "jim": list("B" * 4 + "A" * 2 + "C" * 3),
  339. "joe": list("abcdeabcd")[::-1],
  340. "jolie": [10, 20, 30] * 3,
  341. "joline": np.random.randint(0, 1000, 9),
  342. }
  343. )
  344. icol = ["jim", "joe", "jolie"]
  345. def f(val):
  346. return np.nonzero((df["jim"] == val).to_numpy())[0]
  347. i = np.concatenate(list(map(f, idx)))
  348. left = df.set_index(icol).reindex(idx, level="jim")
  349. right = df.iloc[i].set_index(icol)
  350. tm.assert_frame_equal(left, right, check_index_type=check_index_type)
  351. @pytest.mark.parametrize(
  352. "idx",
  353. [
  354. ("mid",),
  355. ("mid", "btm"),
  356. ("mid", "btm", "top"),
  357. ("mid",),
  358. ("mid", "top"),
  359. ("mid", "top", "btm"),
  360. ("btm",),
  361. ("btm", "mid"),
  362. ("btm", "mid", "top"),
  363. ("btm",),
  364. ("btm", "top"),
  365. ("btm", "top", "mid"),
  366. ("top",),
  367. ("top", "mid"),
  368. ("top", "mid", "btm"),
  369. ("top",),
  370. ("top", "btm"),
  371. ("top", "btm", "mid"),
  372. ],
  373. )
  374. def test_reindex_level_verify_first_level_repeats(self, idx):
  375. df = DataFrame(
  376. {
  377. "jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7,
  378. "joe": ["3rd"] * 2
  379. + ["1st"] * 3
  380. + ["2nd"] * 3
  381. + ["1st"] * 2
  382. + ["3rd"] * 3
  383. + ["1st"] * 2
  384. + ["3rd"] * 3
  385. + ["2nd"] * 2,
  386. # this needs to be jointly unique with jim and joe or
  387. # reindexing will fail ~1.5% of the time, this works
  388. # out to needing unique groups of same size as joe
  389. "jolie": np.concatenate(
  390. [
  391. np.random.choice(1000, x, replace=False)
  392. for x in [2, 3, 3, 2, 3, 2, 3, 2]
  393. ]
  394. ),
  395. "joline": np.random.randn(20).round(3) * 10,
  396. }
  397. )
  398. icol = ["jim", "joe", "jolie"]
  399. def f(val):
  400. return np.nonzero((df["jim"] == val).to_numpy())[0]
  401. i = np.concatenate(list(map(f, idx)))
  402. left = df.set_index(icol).reindex(idx, level="jim")
  403. right = df.iloc[i].set_index(icol)
  404. tm.assert_frame_equal(left, right)
  405. @pytest.mark.parametrize(
  406. "idx, indexer",
  407. [
  408. [
  409. ["1st", "2nd", "3rd"],
  410. [2, 3, 4, 0, 1, 8, 9, 5, 6, 7, 10, 11, 12, 13, 14, 18, 19, 15, 16, 17],
  411. ],
  412. [
  413. ["3rd", "2nd", "1st"],
  414. [0, 1, 2, 3, 4, 10, 11, 12, 5, 6, 7, 8, 9, 15, 16, 17, 18, 19, 13, 14],
  415. ],
  416. [["2nd", "3rd"], [0, 1, 5, 6, 7, 10, 11, 12, 18, 19, 15, 16, 17]],
  417. [["3rd", "1st"], [0, 1, 2, 3, 4, 10, 11, 12, 8, 9, 15, 16, 17, 13, 14]],
  418. ],
  419. )
  420. def test_reindex_level_verify_repeats(self, idx, indexer):
  421. df = DataFrame(
  422. {
  423. "jim": ["mid"] * 5 + ["btm"] * 8 + ["top"] * 7,
  424. "joe": ["3rd"] * 2
  425. + ["1st"] * 3
  426. + ["2nd"] * 3
  427. + ["1st"] * 2
  428. + ["3rd"] * 3
  429. + ["1st"] * 2
  430. + ["3rd"] * 3
  431. + ["2nd"] * 2,
  432. # this needs to be jointly unique with jim and joe or
  433. # reindexing will fail ~1.5% of the time, this works
  434. # out to needing unique groups of same size as joe
  435. "jolie": np.concatenate(
  436. [
  437. np.random.choice(1000, x, replace=False)
  438. for x in [2, 3, 3, 2, 3, 2, 3, 2]
  439. ]
  440. ),
  441. "joline": np.random.randn(20).round(3) * 10,
  442. }
  443. )
  444. icol = ["jim", "joe", "jolie"]
  445. left = df.set_index(icol).reindex(idx, level="joe")
  446. right = df.iloc[indexer].set_index(icol)
  447. tm.assert_frame_equal(left, right)
  448. @pytest.mark.parametrize(
  449. "idx, indexer, check_index_type",
  450. [
  451. [list("abcde"), [3, 2, 1, 0, 5, 4, 8, 7, 6], True],
  452. [list("abcd"), [3, 2, 1, 0, 5, 8, 7, 6], True],
  453. [list("abc"), [3, 2, 1, 8, 7, 6], True],
  454. [list("eca"), [1, 3, 4, 6, 8], True],
  455. [list("edc"), [0, 1, 4, 5, 6], True],
  456. [list("eadbc"), [3, 0, 2, 1, 4, 5, 8, 7, 6], True],
  457. [list("edwq"), [0, 4, 5], True],
  458. [list("wq"), [], False],
  459. ],
  460. )
  461. def test_reindex_level_verify(self, idx, indexer, check_index_type):
  462. df = DataFrame(
  463. {
  464. "jim": list("B" * 4 + "A" * 2 + "C" * 3),
  465. "joe": list("abcdeabcd")[::-1],
  466. "jolie": [10, 20, 30] * 3,
  467. "joline": np.random.randint(0, 1000, 9),
  468. }
  469. )
  470. icol = ["jim", "joe", "jolie"]
  471. left = df.set_index(icol).reindex(idx, level="joe")
  472. right = df.iloc[indexer].set_index(icol)
  473. tm.assert_frame_equal(left, right, check_index_type=check_index_type)
  474. def test_non_monotonic_reindex_methods(self):
  475. dr = date_range("2013-08-01", periods=6, freq="B")
  476. data = np.random.randn(6, 1)
  477. df = DataFrame(data, index=dr, columns=list("A"))
  478. df_rev = DataFrame(data, index=dr[[3, 4, 5] + [0, 1, 2]], columns=list("A"))
  479. # index is not monotonic increasing or decreasing
  480. msg = "index must be monotonic increasing or decreasing"
  481. with pytest.raises(ValueError, match=msg):
  482. df_rev.reindex(df.index, method="pad")
  483. with pytest.raises(ValueError, match=msg):
  484. df_rev.reindex(df.index, method="ffill")
  485. with pytest.raises(ValueError, match=msg):
  486. df_rev.reindex(df.index, method="bfill")
  487. with pytest.raises(ValueError, match=msg):
  488. df_rev.reindex(df.index, method="nearest")
  489. def test_reindex_sparse(self):
  490. # https://github.com/pandas-dev/pandas/issues/35286
  491. df = DataFrame(
  492. {"A": [0, 1], "B": pd.array([0, 1], dtype=pd.SparseDtype("int64", 0))}
  493. )
  494. result = df.reindex([0, 2])
  495. expected = DataFrame(
  496. {
  497. "A": [0.0, np.nan],
  498. "B": pd.array([0.0, np.nan], dtype=pd.SparseDtype("float64", 0.0)),
  499. },
  500. index=[0, 2],
  501. )
  502. tm.assert_frame_equal(result, expected)
  503. def test_reindex(self, float_frame):
  504. datetime_series = tm.makeTimeSeries(nper=30)
  505. newFrame = float_frame.reindex(datetime_series.index)
  506. for col in newFrame.columns:
  507. for idx, val in newFrame[col].items():
  508. if idx in float_frame.index:
  509. if np.isnan(val):
  510. assert np.isnan(float_frame[col][idx])
  511. else:
  512. assert val == float_frame[col][idx]
  513. else:
  514. assert np.isnan(val)
  515. for col, series in newFrame.items():
  516. assert tm.equalContents(series.index, newFrame.index)
  517. emptyFrame = float_frame.reindex(Index([]))
  518. assert len(emptyFrame.index) == 0
  519. # Cython code should be unit-tested directly
  520. nonContigFrame = float_frame.reindex(datetime_series.index[::2])
  521. for col in nonContigFrame.columns:
  522. for idx, val in nonContigFrame[col].items():
  523. if idx in float_frame.index:
  524. if np.isnan(val):
  525. assert np.isnan(float_frame[col][idx])
  526. else:
  527. assert val == float_frame[col][idx]
  528. else:
  529. assert np.isnan(val)
  530. for col, series in nonContigFrame.items():
  531. assert tm.equalContents(series.index, nonContigFrame.index)
  532. # corner cases
  533. # Same index, copies values but not index if copy=False
  534. newFrame = float_frame.reindex(float_frame.index, copy=False)
  535. assert newFrame.index is float_frame.index
  536. # length zero
  537. newFrame = float_frame.reindex([])
  538. assert newFrame.empty
  539. assert len(newFrame.columns) == len(float_frame.columns)
  540. # length zero with columns reindexed with non-empty index
  541. newFrame = float_frame.reindex([])
  542. newFrame = newFrame.reindex(float_frame.index)
  543. assert len(newFrame.index) == len(float_frame.index)
  544. assert len(newFrame.columns) == len(float_frame.columns)
  545. # pass non-Index
  546. newFrame = float_frame.reindex(list(datetime_series.index))
  547. expected = datetime_series.index._with_freq(None)
  548. tm.assert_index_equal(newFrame.index, expected)
  549. # copy with no axes
  550. result = float_frame.reindex()
  551. tm.assert_frame_equal(result, float_frame)
  552. assert result is not float_frame
  553. def test_reindex_nan(self):
  554. df = DataFrame(
  555. [[1, 2], [3, 5], [7, 11], [9, 23]],
  556. index=[2, np.nan, 1, 5],
  557. columns=["joe", "jim"],
  558. )
  559. i, j = [np.nan, 5, 5, np.nan, 1, 2, np.nan], [1, 3, 3, 1, 2, 0, 1]
  560. tm.assert_frame_equal(df.reindex(i), df.iloc[j])
  561. df.index = df.index.astype("object")
  562. tm.assert_frame_equal(df.reindex(i), df.iloc[j], check_index_type=False)
  563. # GH10388
  564. df = DataFrame(
  565. {
  566. "other": ["a", "b", np.nan, "c"],
  567. "date": ["2015-03-22", np.nan, "2012-01-08", np.nan],
  568. "amount": [2, 3, 4, 5],
  569. }
  570. )
  571. df["date"] = pd.to_datetime(df.date)
  572. df["delta"] = (pd.to_datetime("2015-06-18") - df["date"]).shift(1)
  573. left = df.set_index(["delta", "other", "date"]).reset_index()
  574. right = df.reindex(columns=["delta", "other", "date", "amount"])
  575. tm.assert_frame_equal(left, right)
  576. def test_reindex_name_remains(self):
  577. s = Series(np.random.rand(10))
  578. df = DataFrame(s, index=np.arange(len(s)))
  579. i = Series(np.arange(10), name="iname")
  580. df = df.reindex(i)
  581. assert df.index.name == "iname"
  582. df = df.reindex(Index(np.arange(10), name="tmpname"))
  583. assert df.index.name == "tmpname"
  584. s = Series(np.random.rand(10))
  585. df = DataFrame(s.T, index=np.arange(len(s)))
  586. i = Series(np.arange(10), name="iname")
  587. df = df.reindex(columns=i)
  588. assert df.columns.name == "iname"
  589. def test_reindex_int(self, int_frame):
  590. smaller = int_frame.reindex(int_frame.index[::2])
  591. assert smaller["A"].dtype == np.int64
  592. bigger = smaller.reindex(int_frame.index)
  593. assert bigger["A"].dtype == np.float64
  594. smaller = int_frame.reindex(columns=["A", "B"])
  595. assert smaller["A"].dtype == np.int64
  596. def test_reindex_columns(self, float_frame):
  597. new_frame = float_frame.reindex(columns=["A", "B", "E"])
  598. tm.assert_series_equal(new_frame["B"], float_frame["B"])
  599. assert np.isnan(new_frame["E"]).all()
  600. assert "C" not in new_frame
  601. # Length zero
  602. new_frame = float_frame.reindex(columns=[])
  603. assert new_frame.empty
  604. def test_reindex_columns_method(self):
  605. # GH 14992, reindexing over columns ignored method
  606. df = DataFrame(
  607. data=[[11, 12, 13], [21, 22, 23], [31, 32, 33]],
  608. index=[1, 2, 4],
  609. columns=[1, 2, 4],
  610. dtype=float,
  611. )
  612. # default method
  613. result = df.reindex(columns=range(6))
  614. expected = DataFrame(
  615. data=[
  616. [np.nan, 11, 12, np.nan, 13, np.nan],
  617. [np.nan, 21, 22, np.nan, 23, np.nan],
  618. [np.nan, 31, 32, np.nan, 33, np.nan],
  619. ],
  620. index=[1, 2, 4],
  621. columns=range(6),
  622. dtype=float,
  623. )
  624. tm.assert_frame_equal(result, expected)
  625. # method='ffill'
  626. result = df.reindex(columns=range(6), method="ffill")
  627. expected = DataFrame(
  628. data=[
  629. [np.nan, 11, 12, 12, 13, 13],
  630. [np.nan, 21, 22, 22, 23, 23],
  631. [np.nan, 31, 32, 32, 33, 33],
  632. ],
  633. index=[1, 2, 4],
  634. columns=range(6),
  635. dtype=float,
  636. )
  637. tm.assert_frame_equal(result, expected)
  638. # method='bfill'
  639. result = df.reindex(columns=range(6), method="bfill")
  640. expected = DataFrame(
  641. data=[
  642. [11, 11, 12, 13, 13, np.nan],
  643. [21, 21, 22, 23, 23, np.nan],
  644. [31, 31, 32, 33, 33, np.nan],
  645. ],
  646. index=[1, 2, 4],
  647. columns=range(6),
  648. dtype=float,
  649. )
  650. tm.assert_frame_equal(result, expected)
  651. def test_reindex_axes(self):
  652. # GH 3317, reindexing by both axes loses freq of the index
  653. df = DataFrame(
  654. np.ones((3, 3)),
  655. index=[datetime(2012, 1, 1), datetime(2012, 1, 2), datetime(2012, 1, 3)],
  656. columns=["a", "b", "c"],
  657. )
  658. time_freq = date_range("2012-01-01", "2012-01-03", freq="d")
  659. some_cols = ["a", "b"]
  660. index_freq = df.reindex(index=time_freq).index.freq
  661. both_freq = df.reindex(index=time_freq, columns=some_cols).index.freq
  662. seq_freq = df.reindex(index=time_freq).reindex(columns=some_cols).index.freq
  663. assert index_freq == both_freq
  664. assert index_freq == seq_freq
  665. def test_reindex_fill_value(self):
  666. df = DataFrame(np.random.randn(10, 4))
  667. # axis=0
  668. result = df.reindex(list(range(15)))
  669. assert np.isnan(result.values[-5:]).all()
  670. result = df.reindex(range(15), fill_value=0)
  671. expected = df.reindex(range(15)).fillna(0)
  672. tm.assert_frame_equal(result, expected)
  673. # axis=1
  674. result = df.reindex(columns=range(5), fill_value=0.0)
  675. expected = df.copy()
  676. expected[4] = 0.0
  677. tm.assert_frame_equal(result, expected)
  678. result = df.reindex(columns=range(5), fill_value=0)
  679. expected = df.copy()
  680. expected[4] = 0
  681. tm.assert_frame_equal(result, expected)
  682. result = df.reindex(columns=range(5), fill_value="foo")
  683. expected = df.copy()
  684. expected[4] = "foo"
  685. tm.assert_frame_equal(result, expected)
  686. # other dtypes
  687. df["foo"] = "foo"
  688. result = df.reindex(range(15), fill_value=0)
  689. expected = df.reindex(range(15)).fillna(0)
  690. tm.assert_frame_equal(result, expected)
  691. def test_reindex_uint_dtypes_fill_value(self, any_unsigned_int_numpy_dtype):
  692. # GH#48184
  693. df = DataFrame({"a": [1, 2], "b": [1, 2]}, dtype=any_unsigned_int_numpy_dtype)
  694. result = df.reindex(columns=list("abcd"), index=[0, 1, 2, 3], fill_value=10)
  695. expected = DataFrame(
  696. {"a": [1, 2, 10, 10], "b": [1, 2, 10, 10], "c": 10, "d": 10},
  697. dtype=any_unsigned_int_numpy_dtype,
  698. )
  699. tm.assert_frame_equal(result, expected)
  700. def test_reindex_single_column_ea_index_and_columns(self, any_numeric_ea_dtype):
  701. # GH#48190
  702. df = DataFrame({"a": [1, 2]}, dtype=any_numeric_ea_dtype)
  703. result = df.reindex(columns=list("ab"), index=[0, 1, 2], fill_value=10)
  704. expected = DataFrame(
  705. {"a": Series([1, 2, 10], dtype=any_numeric_ea_dtype), "b": 10}
  706. )
  707. tm.assert_frame_equal(result, expected)
  708. def test_reindex_dups(self):
  709. # GH4746, reindex on duplicate index error messages
  710. arr = np.random.randn(10)
  711. df = DataFrame(arr, index=[1, 2, 3, 4, 5, 1, 2, 3, 4, 5])
  712. # set index is ok
  713. result = df.copy()
  714. result.index = list(range(len(df)))
  715. expected = DataFrame(arr, index=list(range(len(df))))
  716. tm.assert_frame_equal(result, expected)
  717. # reindex fails
  718. msg = "cannot reindex on an axis with duplicate labels"
  719. with pytest.raises(ValueError, match=msg):
  720. df.reindex(index=list(range(len(df))))
  721. def test_reindex_with_duplicate_columns(self):
  722. # reindex is invalid!
  723. df = DataFrame(
  724. [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"]
  725. )
  726. msg = "cannot reindex on an axis with duplicate labels"
  727. with pytest.raises(ValueError, match=msg):
  728. df.reindex(columns=["bar"])
  729. with pytest.raises(ValueError, match=msg):
  730. df.reindex(columns=["bar", "foo"])
  731. def test_reindex_axis_style(self):
  732. # https://github.com/pandas-dev/pandas/issues/12392
  733. df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  734. expected = DataFrame(
  735. {"A": [1, 2, np.nan], "B": [4, 5, np.nan]}, index=[0, 1, 3]
  736. )
  737. result = df.reindex([0, 1, 3])
  738. tm.assert_frame_equal(result, expected)
  739. result = df.reindex([0, 1, 3], axis=0)
  740. tm.assert_frame_equal(result, expected)
  741. result = df.reindex([0, 1, 3], axis="index")
  742. tm.assert_frame_equal(result, expected)
  743. def test_reindex_positional_raises(self):
  744. # https://github.com/pandas-dev/pandas/issues/12392
  745. # Enforced in 2.0
  746. df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  747. msg = r"reindex\(\) takes from 1 to 2 positional arguments but 3 were given"
  748. with pytest.raises(TypeError, match=msg):
  749. df.reindex([0, 1], ["A", "B", "C"])
  750. def test_reindex_axis_style_raises(self):
  751. # https://github.com/pandas-dev/pandas/issues/12392
  752. df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  753. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  754. df.reindex([0, 1], columns=["A"], axis=1)
  755. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  756. df.reindex([0, 1], columns=["A"], axis="index")
  757. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  758. df.reindex(index=[0, 1], axis="index")
  759. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  760. df.reindex(index=[0, 1], axis="columns")
  761. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  762. df.reindex(columns=[0, 1], axis="columns")
  763. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  764. df.reindex(index=[0, 1], columns=[0, 1], axis="columns")
  765. with pytest.raises(TypeError, match="Cannot specify all"):
  766. df.reindex(labels=[0, 1], index=[0], columns=["A"])
  767. # Mixing styles
  768. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  769. df.reindex(index=[0, 1], axis="index")
  770. with pytest.raises(TypeError, match="Cannot specify both 'axis'"):
  771. df.reindex(index=[0, 1], axis="columns")
  772. # Duplicates
  773. with pytest.raises(TypeError, match="multiple values"):
  774. df.reindex([0, 1], labels=[0, 1])
  775. def test_reindex_single_named_indexer(self):
  776. # https://github.com/pandas-dev/pandas/issues/12392
  777. df = DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]})
  778. result = df.reindex([0, 1], columns=["A"])
  779. expected = DataFrame({"A": [1, 2]})
  780. tm.assert_frame_equal(result, expected)
  781. def test_reindex_api_equivalence(self):
  782. # https://github.com/pandas-dev/pandas/issues/12392
  783. # equivalence of the labels/axis and index/columns API's
  784. df = DataFrame(
  785. [[1, 2, 3], [3, 4, 5], [5, 6, 7]],
  786. index=["a", "b", "c"],
  787. columns=["d", "e", "f"],
  788. )
  789. res1 = df.reindex(["b", "a"])
  790. res2 = df.reindex(index=["b", "a"])
  791. res3 = df.reindex(labels=["b", "a"])
  792. res4 = df.reindex(labels=["b", "a"], axis=0)
  793. res5 = df.reindex(["b", "a"], axis=0)
  794. for res in [res2, res3, res4, res5]:
  795. tm.assert_frame_equal(res1, res)
  796. res1 = df.reindex(columns=["e", "d"])
  797. res2 = df.reindex(["e", "d"], axis=1)
  798. res3 = df.reindex(labels=["e", "d"], axis=1)
  799. for res in [res2, res3]:
  800. tm.assert_frame_equal(res1, res)
  801. res1 = df.reindex(index=["b", "a"], columns=["e", "d"])
  802. res2 = df.reindex(columns=["e", "d"], index=["b", "a"])
  803. res3 = df.reindex(labels=["b", "a"], axis=0).reindex(labels=["e", "d"], axis=1)
  804. for res in [res2, res3]:
  805. tm.assert_frame_equal(res1, res)
  806. def test_reindex_boolean(self):
  807. frame = DataFrame(
  808. np.ones((10, 2), dtype=bool), index=np.arange(0, 20, 2), columns=[0, 2]
  809. )
  810. reindexed = frame.reindex(np.arange(10))
  811. assert reindexed.values.dtype == np.object_
  812. assert isna(reindexed[0][1])
  813. reindexed = frame.reindex(columns=range(3))
  814. assert reindexed.values.dtype == np.object_
  815. assert isna(reindexed[1]).all()
  816. def test_reindex_objects(self, float_string_frame):
  817. reindexed = float_string_frame.reindex(columns=["foo", "A", "B"])
  818. assert "foo" in reindexed
  819. reindexed = float_string_frame.reindex(columns=["A", "B"])
  820. assert "foo" not in reindexed
  821. def test_reindex_corner(self, int_frame):
  822. index = Index(["a", "b", "c"])
  823. dm = DataFrame({}).reindex(index=[1, 2, 3])
  824. reindexed = dm.reindex(columns=index)
  825. tm.assert_index_equal(reindexed.columns, index)
  826. # ints are weird
  827. smaller = int_frame.reindex(columns=["A", "B", "E"])
  828. assert smaller["E"].dtype == np.float64
  829. def test_reindex_with_nans(self):
  830. df = DataFrame(
  831. [[1, 2], [3, 4], [np.nan, np.nan], [7, 8], [9, 10]],
  832. columns=["a", "b"],
  833. index=[100.0, 101.0, np.nan, 102.0, 103.0],
  834. )
  835. result = df.reindex(index=[101.0, 102.0, 103.0])
  836. expected = df.iloc[[1, 3, 4]]
  837. tm.assert_frame_equal(result, expected)
  838. result = df.reindex(index=[103.0])
  839. expected = df.iloc[[4]]
  840. tm.assert_frame_equal(result, expected)
  841. result = df.reindex(index=[101.0])
  842. expected = df.iloc[[1]]
  843. tm.assert_frame_equal(result, expected)
  844. def test_reindex_multi(self):
  845. df = DataFrame(np.random.randn(3, 3))
  846. result = df.reindex(index=range(4), columns=range(4))
  847. expected = df.reindex(list(range(4))).reindex(columns=range(4))
  848. tm.assert_frame_equal(result, expected)
  849. df = DataFrame(np.random.randint(0, 10, (3, 3)))
  850. result = df.reindex(index=range(4), columns=range(4))
  851. expected = df.reindex(list(range(4))).reindex(columns=range(4))
  852. tm.assert_frame_equal(result, expected)
  853. df = DataFrame(np.random.randint(0, 10, (3, 3)))
  854. result = df.reindex(index=range(2), columns=range(2))
  855. expected = df.reindex(range(2)).reindex(columns=range(2))
  856. tm.assert_frame_equal(result, expected)
  857. df = DataFrame(np.random.randn(5, 3) + 1j, columns=["a", "b", "c"])
  858. result = df.reindex(index=[0, 1], columns=["a", "b"])
  859. expected = df.reindex([0, 1]).reindex(columns=["a", "b"])
  860. tm.assert_frame_equal(result, expected)
  861. def test_reindex_multi_categorical_time(self):
  862. # https://github.com/pandas-dev/pandas/issues/21390
  863. midx = MultiIndex.from_product(
  864. [
  865. Categorical(["a", "b", "c"]),
  866. Categorical(date_range("2012-01-01", periods=3, freq="H")),
  867. ]
  868. )
  869. df = DataFrame({"a": range(len(midx))}, index=midx)
  870. df2 = df.iloc[[0, 1, 2, 3, 4, 5, 6, 8]]
  871. result = df2.reindex(midx)
  872. expected = DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx)
  873. tm.assert_frame_equal(result, expected)
  874. def test_reindex_with_categoricalindex(self):
  875. df = DataFrame(
  876. {
  877. "A": np.arange(3, dtype="int64"),
  878. },
  879. index=CategoricalIndex(list("abc"), dtype=CDT(list("cabe")), name="B"),
  880. )
  881. # reindexing
  882. # convert to a regular index
  883. result = df.reindex(["a", "b", "e"])
  884. expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
  885. "B"
  886. )
  887. tm.assert_frame_equal(result, expected, check_index_type=True)
  888. result = df.reindex(["a", "b"])
  889. expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
  890. tm.assert_frame_equal(result, expected, check_index_type=True)
  891. result = df.reindex(["e"])
  892. expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
  893. tm.assert_frame_equal(result, expected, check_index_type=True)
  894. result = df.reindex(["d"])
  895. expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
  896. tm.assert_frame_equal(result, expected, check_index_type=True)
  897. # since we are actually reindexing with a Categorical
  898. # then return a Categorical
  899. cats = list("cabe")
  900. result = df.reindex(Categorical(["a", "e"], categories=cats))
  901. expected = DataFrame(
  902. {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))}
  903. ).set_index("B")
  904. tm.assert_frame_equal(result, expected, check_index_type=True)
  905. result = df.reindex(Categorical(["a"], categories=cats))
  906. expected = DataFrame(
  907. {"A": [0], "B": Series(list("a")).astype(CDT(cats))}
  908. ).set_index("B")
  909. tm.assert_frame_equal(result, expected, check_index_type=True)
  910. result = df.reindex(["a", "b", "e"])
  911. expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
  912. "B"
  913. )
  914. tm.assert_frame_equal(result, expected, check_index_type=True)
  915. result = df.reindex(["a", "b"])
  916. expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
  917. tm.assert_frame_equal(result, expected, check_index_type=True)
  918. result = df.reindex(["e"])
  919. expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
  920. tm.assert_frame_equal(result, expected, check_index_type=True)
  921. # give back the type of categorical that we received
  922. result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True))
  923. expected = DataFrame(
  924. {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))}
  925. ).set_index("B")
  926. tm.assert_frame_equal(result, expected, check_index_type=True)
  927. result = df.reindex(Categorical(["a", "d"], categories=["a", "d"]))
  928. expected = DataFrame(
  929. {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))}
  930. ).set_index("B")
  931. tm.assert_frame_equal(result, expected, check_index_type=True)
  932. df2 = DataFrame(
  933. {
  934. "A": np.arange(6, dtype="int64"),
  935. },
  936. index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"),
  937. )
  938. # passed duplicate indexers are not allowed
  939. msg = "cannot reindex on an axis with duplicate labels"
  940. with pytest.raises(ValueError, match=msg):
  941. df2.reindex(["a", "b"])
  942. # args NotImplemented ATM
  943. msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
  944. with pytest.raises(NotImplementedError, match=msg.format("method")):
  945. df.reindex(["a"], method="ffill")
  946. with pytest.raises(NotImplementedError, match=msg.format("level")):
  947. df.reindex(["a"], level=1)
  948. with pytest.raises(NotImplementedError, match=msg.format("limit")):
  949. df.reindex(["a"], limit=2)
  950. def test_reindex_signature(self):
  951. sig = inspect.signature(DataFrame.reindex)
  952. parameters = set(sig.parameters)
  953. assert parameters == {
  954. "self",
  955. "labels",
  956. "index",
  957. "columns",
  958. "axis",
  959. "limit",
  960. "copy",
  961. "level",
  962. "method",
  963. "fill_value",
  964. "tolerance",
  965. }
  966. def test_reindex_multiindex_ffill_added_rows(self):
  967. # GH#23693
  968. # reindex added rows with nan values even when fill method was specified
  969. mi = MultiIndex.from_tuples([("a", "b"), ("d", "e")])
  970. df = DataFrame([[0, 7], [3, 4]], index=mi, columns=["x", "y"])
  971. mi2 = MultiIndex.from_tuples([("a", "b"), ("d", "e"), ("h", "i")])
  972. result = df.reindex(mi2, axis=0, method="ffill")
  973. expected = DataFrame([[0, 7], [3, 4], [3, 4]], index=mi2, columns=["x", "y"])
  974. tm.assert_frame_equal(result, expected)
  975. @pytest.mark.parametrize(
  976. "kwargs",
  977. [
  978. {"method": "pad", "tolerance": timedelta(seconds=9)},
  979. {"method": "backfill", "tolerance": timedelta(seconds=9)},
  980. {"method": "nearest"},
  981. {"method": None},
  982. ],
  983. )
  984. def test_reindex_empty_frame(self, kwargs):
  985. # GH#27315
  986. idx = date_range(start="2020", freq="30s", periods=3)
  987. df = DataFrame([], index=Index([], name="time"), columns=["a"])
  988. result = df.reindex(idx, **kwargs)
  989. expected = DataFrame({"a": [pd.NA] * 3}, index=idx)
  990. tm.assert_frame_equal(result, expected)
  991. @pytest.mark.parametrize(
  992. "src_idx",
  993. [
  994. Index([]),
  995. CategoricalIndex([]),
  996. ],
  997. )
  998. @pytest.mark.parametrize(
  999. "cat_idx",
  1000. [
  1001. # No duplicates
  1002. Index([]),
  1003. CategoricalIndex([]),
  1004. Index(["A", "B"]),
  1005. CategoricalIndex(["A", "B"]),
  1006. # Duplicates: GH#38906
  1007. Index(["A", "A"]),
  1008. CategoricalIndex(["A", "A"]),
  1009. ],
  1010. )
  1011. def test_reindex_empty(self, src_idx, cat_idx):
  1012. df = DataFrame(columns=src_idx, index=["K"], dtype="f8")
  1013. result = df.reindex(columns=cat_idx)
  1014. expected = DataFrame(index=["K"], columns=cat_idx, dtype="f8")
  1015. tm.assert_frame_equal(result, expected)
  1016. @pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"])
  1017. def test_reindex_datetimelike_to_object(self, dtype):
  1018. # GH#39755 dont cast dt64/td64 to ints
  1019. mi = MultiIndex.from_product([list("ABCDE"), range(2)])
  1020. dti = date_range("2016-01-01", periods=10)
  1021. fv = np.timedelta64("NaT", "ns")
  1022. if dtype == "m8[ns]":
  1023. dti = dti - dti[0]
  1024. fv = np.datetime64("NaT", "ns")
  1025. ser = Series(dti, index=mi)
  1026. ser[::3] = pd.NaT
  1027. df = ser.unstack()
  1028. index = df.index.append(Index([1]))
  1029. columns = df.columns.append(Index(["foo"]))
  1030. res = df.reindex(index=index, columns=columns, fill_value=fv)
  1031. expected = DataFrame(
  1032. {
  1033. 0: df[0].tolist() + [fv],
  1034. 1: df[1].tolist() + [fv],
  1035. "foo": np.array(["NaT"] * 6, dtype=fv.dtype),
  1036. },
  1037. index=index,
  1038. )
  1039. assert (res.dtypes[[0, 1]] == object).all()
  1040. assert res.iloc[0, 0] is pd.NaT
  1041. assert res.iloc[-1, 0] is fv
  1042. assert res.iloc[-1, 1] is fv
  1043. tm.assert_frame_equal(res, expected)
  1044. @pytest.mark.parametrize(
  1045. "index_df,index_res,index_exp",
  1046. [
  1047. (
  1048. CategoricalIndex([], categories=["A"]),
  1049. Index(["A"]),
  1050. Index(["A"]),
  1051. ),
  1052. (
  1053. CategoricalIndex([], categories=["A"]),
  1054. Index(["B"]),
  1055. Index(["B"]),
  1056. ),
  1057. (
  1058. CategoricalIndex([], categories=["A"]),
  1059. CategoricalIndex(["A"]),
  1060. CategoricalIndex(["A"]),
  1061. ),
  1062. (
  1063. CategoricalIndex([], categories=["A"]),
  1064. CategoricalIndex(["B"]),
  1065. CategoricalIndex(["B"]),
  1066. ),
  1067. ],
  1068. )
  1069. def test_reindex_not_category(self, index_df, index_res, index_exp):
  1070. # GH#28690
  1071. df = DataFrame(index=index_df)
  1072. result = df.reindex(index=index_res)
  1073. expected = DataFrame(index=index_exp)
  1074. tm.assert_frame_equal(result, expected)