test_reindex.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. NA,
  5. Categorical,
  6. Float64Dtype,
  7. Index,
  8. MultiIndex,
  9. NaT,
  10. Period,
  11. PeriodIndex,
  12. RangeIndex,
  13. Series,
  14. Timedelta,
  15. Timestamp,
  16. date_range,
  17. isna,
  18. )
  19. import pandas._testing as tm
  20. def test_reindex(datetime_series, string_series):
  21. identity = string_series.reindex(string_series.index)
  22. # __array_interface__ is not defined for older numpies
  23. # and on some pythons
  24. try:
  25. assert np.may_share_memory(string_series.index, identity.index)
  26. except AttributeError:
  27. pass
  28. assert identity.index.is_(string_series.index)
  29. assert identity.index.identical(string_series.index)
  30. subIndex = string_series.index[10:20]
  31. subSeries = string_series.reindex(subIndex)
  32. for idx, val in subSeries.items():
  33. assert val == string_series[idx]
  34. subIndex2 = datetime_series.index[10:20]
  35. subTS = datetime_series.reindex(subIndex2)
  36. for idx, val in subTS.items():
  37. assert val == datetime_series[idx]
  38. stuffSeries = datetime_series.reindex(subIndex)
  39. assert np.isnan(stuffSeries).all()
  40. # This is extremely important for the Cython code to not screw up
  41. nonContigIndex = datetime_series.index[::2]
  42. subNonContig = datetime_series.reindex(nonContigIndex)
  43. for idx, val in subNonContig.items():
  44. assert val == datetime_series[idx]
  45. # return a copy the same index here
  46. result = datetime_series.reindex()
  47. assert result is not datetime_series
  48. def test_reindex_nan():
  49. ts = Series([2, 3, 5, 7], index=[1, 4, np.nan, 8])
  50. i, j = [np.nan, 1, np.nan, 8, 4, np.nan], [2, 0, 2, 3, 1, 2]
  51. tm.assert_series_equal(ts.reindex(i), ts.iloc[j])
  52. ts.index = ts.index.astype("object")
  53. # reindex coerces index.dtype to float, loc/iloc doesn't
  54. tm.assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
  55. def test_reindex_series_add_nat():
  56. rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
  57. series = Series(rng)
  58. result = series.reindex(range(15))
  59. assert np.issubdtype(result.dtype, np.dtype("M8[ns]"))
  60. mask = result.isna()
  61. assert mask[-5:].all()
  62. assert not mask[:-5].any()
  63. def test_reindex_with_datetimes():
  64. rng = date_range("1/1/2000", periods=20)
  65. ts = Series(np.random.randn(20), index=rng)
  66. result = ts.reindex(list(ts.index[5:10]))
  67. expected = ts[5:10]
  68. expected.index = expected.index._with_freq(None)
  69. tm.assert_series_equal(result, expected)
  70. result = ts[list(ts.index[5:10])]
  71. tm.assert_series_equal(result, expected)
  72. def test_reindex_corner(datetime_series):
  73. # (don't forget to fix this) I think it's fixed
  74. empty = Series(index=[])
  75. empty.reindex(datetime_series.index, method="pad") # it works
  76. # corner case: pad empty series
  77. reindexed = empty.reindex(datetime_series.index, method="pad")
  78. # pass non-Index
  79. reindexed = datetime_series.reindex(list(datetime_series.index))
  80. datetime_series.index = datetime_series.index._with_freq(None)
  81. tm.assert_series_equal(datetime_series, reindexed)
  82. # bad fill method
  83. ts = datetime_series[::2]
  84. msg = (
  85. r"Invalid fill method\. Expecting pad \(ffill\), backfill "
  86. r"\(bfill\) or nearest\. Got foo"
  87. )
  88. with pytest.raises(ValueError, match=msg):
  89. ts.reindex(datetime_series.index, method="foo")
  90. def test_reindex_pad():
  91. s = Series(np.arange(10), dtype="int64")
  92. s2 = s[::2]
  93. reindexed = s2.reindex(s.index, method="pad")
  94. reindexed2 = s2.reindex(s.index, method="ffill")
  95. tm.assert_series_equal(reindexed, reindexed2)
  96. expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8])
  97. tm.assert_series_equal(reindexed, expected)
  98. # GH4604
  99. s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
  100. new_index = ["a", "g", "c", "f"]
  101. expected = Series([1, 1, 3, 3], index=new_index)
  102. # this changes dtype because the ffill happens after
  103. result = s.reindex(new_index).ffill()
  104. tm.assert_series_equal(result, expected.astype("float64"))
  105. result = s.reindex(new_index).ffill(downcast="infer")
  106. tm.assert_series_equal(result, expected)
  107. expected = Series([1, 5, 3, 5], index=new_index)
  108. result = s.reindex(new_index, method="ffill")
  109. tm.assert_series_equal(result, expected)
  110. # inference of new dtype
  111. s = Series([True, False, False, True], index=list("abcd"))
  112. new_index = "agc"
  113. result = s.reindex(list(new_index)).ffill()
  114. expected = Series([True, True, False], index=list(new_index))
  115. tm.assert_series_equal(result, expected)
  116. # GH4618 shifted series downcasting
  117. s = Series(False, index=range(0, 5))
  118. result = s.shift(1).fillna(method="bfill")
  119. expected = Series(False, index=range(0, 5))
  120. tm.assert_series_equal(result, expected)
  121. def test_reindex_nearest():
  122. s = Series(np.arange(10, dtype="int64"))
  123. target = [0.1, 0.9, 1.5, 2.0]
  124. result = s.reindex(target, method="nearest")
  125. expected = Series(np.around(target).astype("int64"), target)
  126. tm.assert_series_equal(expected, result)
  127. result = s.reindex(target, method="nearest", tolerance=0.2)
  128. expected = Series([0, 1, np.nan, 2], target)
  129. tm.assert_series_equal(expected, result)
  130. result = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3])
  131. expected = Series([0, np.nan, np.nan, 2], target)
  132. tm.assert_series_equal(expected, result)
  133. def test_reindex_int(datetime_series):
  134. ts = datetime_series[::2]
  135. int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
  136. # this should work fine
  137. reindexed_int = int_ts.reindex(datetime_series.index)
  138. # if NaNs introduced
  139. assert reindexed_int.dtype == np.float_
  140. # NO NaNs introduced
  141. reindexed_int = int_ts.reindex(int_ts.index[::2])
  142. assert reindexed_int.dtype == np.int_
  143. def test_reindex_bool(datetime_series):
  144. # A series other than float, int, string, or object
  145. ts = datetime_series[::2]
  146. bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
  147. # this should work fine
  148. reindexed_bool = bool_ts.reindex(datetime_series.index)
  149. # if NaNs introduced
  150. assert reindexed_bool.dtype == np.object_
  151. # NO NaNs introduced
  152. reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
  153. assert reindexed_bool.dtype == np.bool_
  154. def test_reindex_bool_pad(datetime_series):
  155. # fail
  156. ts = datetime_series[5:]
  157. bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
  158. filled_bool = bool_ts.reindex(datetime_series.index, method="pad")
  159. assert isna(filled_bool[:5]).all()
  160. def test_reindex_categorical():
  161. index = date_range("20000101", periods=3)
  162. # reindexing to an invalid Categorical
  163. s = Series(["a", "b", "c"], dtype="category")
  164. result = s.reindex(index)
  165. expected = Series(
  166. Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
  167. )
  168. expected.index = index
  169. tm.assert_series_equal(result, expected)
  170. # partial reindexing
  171. expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"]))
  172. expected.index = [1, 2]
  173. result = s.reindex([1, 2])
  174. tm.assert_series_equal(result, expected)
  175. expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"]))
  176. expected.index = [2, 3]
  177. result = s.reindex([2, 3])
  178. tm.assert_series_equal(result, expected)
  179. def test_reindex_astype_order_consistency():
  180. # GH#17444
  181. ser = Series([1, 2, 3], index=[2, 0, 1])
  182. new_index = [0, 1, 2]
  183. temp_dtype = "category"
  184. new_dtype = str
  185. result = ser.reindex(new_index).astype(temp_dtype).astype(new_dtype)
  186. expected = ser.astype(temp_dtype).reindex(new_index).astype(new_dtype)
  187. tm.assert_series_equal(result, expected)
  188. def test_reindex_fill_value():
  189. # -----------------------------------------------------------
  190. # floats
  191. floats = Series([1.0, 2.0, 3.0])
  192. result = floats.reindex([1, 2, 3])
  193. expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
  194. tm.assert_series_equal(result, expected)
  195. result = floats.reindex([1, 2, 3], fill_value=0)
  196. expected = Series([2.0, 3.0, 0], index=[1, 2, 3])
  197. tm.assert_series_equal(result, expected)
  198. # -----------------------------------------------------------
  199. # ints
  200. ints = Series([1, 2, 3])
  201. result = ints.reindex([1, 2, 3])
  202. expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
  203. tm.assert_series_equal(result, expected)
  204. # don't upcast
  205. result = ints.reindex([1, 2, 3], fill_value=0)
  206. expected = Series([2, 3, 0], index=[1, 2, 3])
  207. assert issubclass(result.dtype.type, np.integer)
  208. tm.assert_series_equal(result, expected)
  209. # -----------------------------------------------------------
  210. # objects
  211. objects = Series([1, 2, 3], dtype=object)
  212. result = objects.reindex([1, 2, 3])
  213. expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
  214. tm.assert_series_equal(result, expected)
  215. result = objects.reindex([1, 2, 3], fill_value="foo")
  216. expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object)
  217. tm.assert_series_equal(result, expected)
  218. # ------------------------------------------------------------
  219. # bools
  220. bools = Series([True, False, True])
  221. result = bools.reindex([1, 2, 3])
  222. expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
  223. tm.assert_series_equal(result, expected)
  224. result = bools.reindex([1, 2, 3], fill_value=False)
  225. expected = Series([False, True, False], index=[1, 2, 3])
  226. tm.assert_series_equal(result, expected)
  227. @pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
  228. @pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)])
  229. def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager):
  230. # https://github.com/pandas-dev/pandas/issues/42921
  231. if using_array_manager:
  232. pytest.skip("Array manager does not promote dtype, hence we fail")
  233. if dtype == "timedelta64[ns]" and fill_value == Timedelta(0):
  234. # use the scalar that is not compatible with the dtype for this test
  235. fill_value = Timestamp(0)
  236. ser = Series([NaT], dtype=dtype)
  237. result = ser.reindex([0, 1], fill_value=fill_value)
  238. expected = Series([None, fill_value], index=[0, 1], dtype=object)
  239. tm.assert_series_equal(result, expected)
  240. def test_reindex_datetimeindexes_tz_naive_and_aware():
  241. # GH 8306
  242. idx = date_range("20131101", tz="America/Chicago", periods=7)
  243. newidx = date_range("20131103", periods=10, freq="H")
  244. s = Series(range(7), index=idx)
  245. msg = (
  246. r"Cannot compare dtypes datetime64\[ns, America/Chicago\] "
  247. r"and datetime64\[ns\]"
  248. )
  249. with pytest.raises(TypeError, match=msg):
  250. s.reindex(newidx, method="ffill")
  251. def test_reindex_empty_series_tz_dtype():
  252. # GH 20869
  253. result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1])
  254. expected = Series([NaT] * 2, dtype="datetime64[ns, UTC]")
  255. tm.assert_equal(result, expected)
  256. @pytest.mark.parametrize(
  257. "p_values, o_values, values, expected_values",
  258. [
  259. (
  260. [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
  261. [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"],
  262. [1.0, 1.0],
  263. [1.0, 1.0, np.nan],
  264. ),
  265. (
  266. [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
  267. [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
  268. [1.0, 1.0],
  269. [1.0, 1.0],
  270. ),
  271. ],
  272. )
  273. def test_reindex_periodindex_with_object(p_values, o_values, values, expected_values):
  274. # GH#28337
  275. period_index = PeriodIndex(p_values)
  276. object_index = Index(o_values)
  277. ser = Series(values, index=period_index)
  278. result = ser.reindex(object_index)
  279. expected = Series(expected_values, index=object_index)
  280. tm.assert_series_equal(result, expected)
  281. def test_reindex_too_many_args():
  282. # GH 40980
  283. ser = Series([1, 2])
  284. msg = r"reindex\(\) takes from 1 to 2 positional arguments but 3 were given"
  285. with pytest.raises(TypeError, match=msg):
  286. ser.reindex([2, 3], False)
  287. def test_reindex_double_index():
  288. # GH 40980
  289. ser = Series([1, 2])
  290. msg = r"reindex\(\) got multiple values for argument 'index'"
  291. with pytest.raises(TypeError, match=msg):
  292. ser.reindex([2, 3], index=[3, 4])
  293. def test_reindex_no_posargs():
  294. # GH 40980
  295. ser = Series([1, 2])
  296. result = ser.reindex(index=[1, 0])
  297. expected = Series([2, 1], index=[1, 0])
  298. tm.assert_series_equal(result, expected)
  299. @pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
  300. def test_reindex_empty_with_level(values):
  301. # GH41170
  302. ser = Series(
  303. range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object"
  304. )
  305. result = ser.reindex(np.array(["b"]), level=0)
  306. expected = Series(
  307. index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object"
  308. )
  309. tm.assert_series_equal(result, expected)
  310. def test_reindex_missing_category():
  311. # GH#18185
  312. ser = Series([1, 2, 3, 1], dtype="category")
  313. msg = r"Cannot setitem on a Categorical with a new category \(-1\)"
  314. with pytest.raises(TypeError, match=msg):
  315. ser.reindex([1, 2, 3, 4, 5], fill_value=-1)
  316. def test_reindexing_with_float64_NA_log():
  317. # GH 47055
  318. s = Series([1.0, NA], dtype=Float64Dtype())
  319. s_reindex = s.reindex(range(3))
  320. result = s_reindex.values._data
  321. expected = np.array([1, np.NaN, np.NaN])
  322. tm.assert_numpy_array_equal(result, expected)
  323. with tm.assert_produces_warning(None):
  324. result_log = np.log(s_reindex)
  325. expected_log = Series([0, np.NaN, np.NaN], dtype=Float64Dtype())
  326. tm.assert_series_equal(result_log, expected_log)
  327. @pytest.mark.parametrize("dtype", ["timedelta64", "datetime64"])
  328. def test_reindex_expand_nonnano_nat(dtype):
  329. # GH 53497
  330. ser = Series(np.array([1], dtype=f"{dtype}[s]"))
  331. result = ser.reindex(RangeIndex(2))
  332. expected = Series(
  333. np.array([1, getattr(np, dtype)("nat", "s")], dtype=f"{dtype}[s]")
  334. )
  335. tm.assert_series_equal(result, expected)