test_coercion.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904
  1. from __future__ import annotations
  2. from datetime import (
  3. datetime,
  4. timedelta,
  5. )
  6. import itertools
  7. import numpy as np
  8. import pytest
  9. from pandas.compat import (
  10. IS64,
  11. is_platform_windows,
  12. )
  13. import pandas as pd
  14. import pandas._testing as tm
  15. ###############################################################
  16. # Index / Series common tests which may trigger dtype coercions
  17. ###############################################################
  18. @pytest.fixture(autouse=True, scope="class")
  19. def check_comprehensiveness(request):
  20. # Iterate over combination of dtype, method and klass
  21. # and ensure that each are contained within a collected test
  22. cls = request.cls
  23. combos = itertools.product(cls.klasses, cls.dtypes, [cls.method])
  24. def has_test(combo):
  25. klass, dtype, method = combo
  26. cls_funcs = request.node.session.items
  27. return any(
  28. klass in x.name and dtype in x.name and method in x.name for x in cls_funcs
  29. )
  30. opts = request.config.option
  31. if opts.lf or opts.keyword:
  32. # If we are running with "last-failed" or -k foo, we expect to only
  33. # run a subset of tests.
  34. yield
  35. else:
  36. for combo in combos:
  37. if not has_test(combo):
  38. raise AssertionError(
  39. f"test method is not defined: {cls.__name__}, {combo}"
  40. )
  41. yield
  42. class CoercionBase:
  43. klasses = ["index", "series"]
  44. dtypes = [
  45. "object",
  46. "int64",
  47. "float64",
  48. "complex128",
  49. "bool",
  50. "datetime64",
  51. "datetime64tz",
  52. "timedelta64",
  53. "period",
  54. ]
  55. @property
  56. def method(self):
  57. raise NotImplementedError(self)
  58. class TestSetitemCoercion(CoercionBase):
  59. method = "setitem"
  60. # disable comprehensiveness tests, as most of these have been moved to
  61. # tests.series.indexing.test_setitem in SetitemCastingEquivalents subclasses.
  62. klasses: list[str] = []
  63. def test_setitem_series_no_coercion_from_values_list(self):
  64. # GH35865 - int casted to str when internally calling np.array(ser.values)
  65. ser = pd.Series(["a", 1])
  66. ser[:] = list(ser.values)
  67. expected = pd.Series(["a", 1])
  68. tm.assert_series_equal(ser, expected)
  69. def _assert_setitem_index_conversion(
  70. self, original_series, loc_key, expected_index, expected_dtype
  71. ):
  72. """test index's coercion triggered by assign key"""
  73. temp = original_series.copy()
  74. # GH#33469 pre-2.0 with int loc_key and temp.index.dtype == np.float64
  75. # `temp[loc_key] = 5` treated loc_key as positional
  76. temp[loc_key] = 5
  77. exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
  78. tm.assert_series_equal(temp, exp)
  79. # check dtype explicitly for sure
  80. assert temp.index.dtype == expected_dtype
  81. temp = original_series.copy()
  82. temp.loc[loc_key] = 5
  83. exp = pd.Series([1, 2, 3, 4, 5], index=expected_index)
  84. tm.assert_series_equal(temp, exp)
  85. # check dtype explicitly for sure
  86. assert temp.index.dtype == expected_dtype
  87. @pytest.mark.parametrize(
  88. "val,exp_dtype", [("x", object), (5, IndexError), (1.1, object)]
  89. )
  90. def test_setitem_index_object(self, val, exp_dtype):
  91. obj = pd.Series([1, 2, 3, 4], index=list("abcd"))
  92. assert obj.index.dtype == object
  93. if exp_dtype is IndexError:
  94. temp = obj.copy()
  95. msg = "index 5 is out of bounds for axis 0 with size 4"
  96. with pytest.raises(exp_dtype, match=msg):
  97. temp[5] = 5
  98. else:
  99. exp_index = pd.Index(list("abcd") + [val])
  100. self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
  101. @pytest.mark.parametrize(
  102. "val,exp_dtype", [(5, np.int64), (1.1, np.float64), ("x", object)]
  103. )
  104. def test_setitem_index_int64(self, val, exp_dtype):
  105. obj = pd.Series([1, 2, 3, 4])
  106. assert obj.index.dtype == np.int64
  107. exp_index = pd.Index([0, 1, 2, 3, val])
  108. self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
  109. @pytest.mark.parametrize(
  110. "val,exp_dtype", [(5, np.float64), (5.1, np.float64), ("x", object)]
  111. )
  112. def test_setitem_index_float64(self, val, exp_dtype, request):
  113. obj = pd.Series([1, 2, 3, 4], index=[1.1, 2.1, 3.1, 4.1])
  114. assert obj.index.dtype == np.float64
  115. exp_index = pd.Index([1.1, 2.1, 3.1, 4.1, val])
  116. self._assert_setitem_index_conversion(obj, val, exp_index, exp_dtype)
  117. @pytest.mark.xfail(reason="Test not implemented")
  118. def test_setitem_series_period(self):
  119. raise NotImplementedError
  120. @pytest.mark.xfail(reason="Test not implemented")
  121. def test_setitem_index_complex128(self):
  122. raise NotImplementedError
  123. @pytest.mark.xfail(reason="Test not implemented")
  124. def test_setitem_index_bool(self):
  125. raise NotImplementedError
  126. @pytest.mark.xfail(reason="Test not implemented")
  127. def test_setitem_index_datetime64(self):
  128. raise NotImplementedError
  129. @pytest.mark.xfail(reason="Test not implemented")
  130. def test_setitem_index_datetime64tz(self):
  131. raise NotImplementedError
  132. @pytest.mark.xfail(reason="Test not implemented")
  133. def test_setitem_index_timedelta64(self):
  134. raise NotImplementedError
  135. @pytest.mark.xfail(reason="Test not implemented")
  136. def test_setitem_index_period(self):
  137. raise NotImplementedError
  138. class TestInsertIndexCoercion(CoercionBase):
  139. klasses = ["index"]
  140. method = "insert"
  141. def _assert_insert_conversion(self, original, value, expected, expected_dtype):
  142. """test coercion triggered by insert"""
  143. target = original.copy()
  144. res = target.insert(1, value)
  145. tm.assert_index_equal(res, expected)
  146. assert res.dtype == expected_dtype
  147. @pytest.mark.parametrize(
  148. "insert, coerced_val, coerced_dtype",
  149. [
  150. (1, 1, object),
  151. (1.1, 1.1, object),
  152. (False, False, object),
  153. ("x", "x", object),
  154. ],
  155. )
  156. def test_insert_index_object(self, insert, coerced_val, coerced_dtype):
  157. obj = pd.Index(list("abcd"))
  158. assert obj.dtype == object
  159. exp = pd.Index(["a", coerced_val, "b", "c", "d"])
  160. self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
  161. @pytest.mark.parametrize(
  162. "insert, coerced_val, coerced_dtype",
  163. [
  164. (1, 1, None),
  165. (1.1, 1.1, np.float64),
  166. (False, False, object), # GH#36319
  167. ("x", "x", object),
  168. ],
  169. )
  170. def test_insert_int_index(
  171. self, any_int_numpy_dtype, insert, coerced_val, coerced_dtype
  172. ):
  173. dtype = any_int_numpy_dtype
  174. obj = pd.Index([1, 2, 3, 4], dtype=dtype)
  175. coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
  176. exp = pd.Index([1, coerced_val, 2, 3, 4], dtype=coerced_dtype)
  177. self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
  178. @pytest.mark.parametrize(
  179. "insert, coerced_val, coerced_dtype",
  180. [
  181. (1, 1.0, None),
  182. (1.1, 1.1, np.float64),
  183. (False, False, object), # GH#36319
  184. ("x", "x", object),
  185. ],
  186. )
  187. def test_insert_float_index(
  188. self, float_numpy_dtype, insert, coerced_val, coerced_dtype
  189. ):
  190. dtype = float_numpy_dtype
  191. obj = pd.Index([1.0, 2.0, 3.0, 4.0], dtype=dtype)
  192. coerced_dtype = coerced_dtype if coerced_dtype is not None else dtype
  193. exp = pd.Index([1.0, coerced_val, 2.0, 3.0, 4.0], dtype=coerced_dtype)
  194. self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
  195. @pytest.mark.parametrize(
  196. "fill_val,exp_dtype",
  197. [
  198. (pd.Timestamp("2012-01-01"), "datetime64[ns]"),
  199. (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
  200. ],
  201. ids=["datetime64", "datetime64tz"],
  202. )
  203. @pytest.mark.parametrize(
  204. "insert_value",
  205. [pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1],
  206. )
  207. def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value):
  208. obj = pd.DatetimeIndex(
  209. ["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz
  210. )
  211. assert obj.dtype == exp_dtype
  212. exp = pd.DatetimeIndex(
  213. ["2011-01-01", fill_val.date(), "2011-01-02", "2011-01-03", "2011-01-04"],
  214. tz=fill_val.tz,
  215. )
  216. self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
  217. if fill_val.tz:
  218. # mismatched tzawareness
  219. ts = pd.Timestamp("2012-01-01")
  220. result = obj.insert(1, ts)
  221. expected = obj.astype(object).insert(1, ts)
  222. assert expected.dtype == object
  223. tm.assert_index_equal(result, expected)
  224. ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
  225. result = obj.insert(1, ts)
  226. # once deprecation is enforced:
  227. expected = obj.insert(1, ts.tz_convert(obj.dtype.tz))
  228. assert expected.dtype == obj.dtype
  229. tm.assert_index_equal(result, expected)
  230. else:
  231. # mismatched tzawareness
  232. ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
  233. result = obj.insert(1, ts)
  234. expected = obj.astype(object).insert(1, ts)
  235. assert expected.dtype == object
  236. tm.assert_index_equal(result, expected)
  237. item = 1
  238. result = obj.insert(1, item)
  239. expected = obj.astype(object).insert(1, item)
  240. assert expected[1] == item
  241. assert expected.dtype == object
  242. tm.assert_index_equal(result, expected)
  243. def test_insert_index_timedelta64(self):
  244. obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"])
  245. assert obj.dtype == "timedelta64[ns]"
  246. # timedelta64 + timedelta64 => timedelta64
  247. exp = pd.TimedeltaIndex(["1 day", "10 day", "2 day", "3 day", "4 day"])
  248. self._assert_insert_conversion(
  249. obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]"
  250. )
  251. for item in [pd.Timestamp("2012-01-01"), 1]:
  252. result = obj.insert(1, item)
  253. expected = obj.astype(object).insert(1, item)
  254. assert expected.dtype == object
  255. tm.assert_index_equal(result, expected)
  256. @pytest.mark.parametrize(
  257. "insert, coerced_val, coerced_dtype",
  258. [
  259. (pd.Period("2012-01", freq="M"), "2012-01", "period[M]"),
  260. (pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01"), object),
  261. (1, 1, object),
  262. ("x", "x", object),
  263. ],
  264. )
  265. def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
  266. obj = pd.PeriodIndex(["2011-01", "2011-02", "2011-03", "2011-04"], freq="M")
  267. assert obj.dtype == "period[M]"
  268. data = [
  269. pd.Period("2011-01", freq="M"),
  270. coerced_val,
  271. pd.Period("2011-02", freq="M"),
  272. pd.Period("2011-03", freq="M"),
  273. pd.Period("2011-04", freq="M"),
  274. ]
  275. if isinstance(insert, pd.Period):
  276. exp = pd.PeriodIndex(data, freq="M")
  277. self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
  278. # string that can be parsed to appropriate PeriodDtype
  279. self._assert_insert_conversion(obj, str(insert), exp, coerced_dtype)
  280. else:
  281. result = obj.insert(0, insert)
  282. expected = obj.astype(object).insert(0, insert)
  283. tm.assert_index_equal(result, expected)
  284. # TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M"
  285. # casts that string to Period[M], not clear that is desirable
  286. if not isinstance(insert, pd.Timestamp):
  287. # non-castable string
  288. result = obj.insert(0, str(insert))
  289. expected = obj.astype(object).insert(0, str(insert))
  290. tm.assert_index_equal(result, expected)
  291. @pytest.mark.xfail(reason="Test not implemented")
  292. def test_insert_index_complex128(self):
  293. raise NotImplementedError
  294. @pytest.mark.xfail(reason="Test not implemented")
  295. def test_insert_index_bool(self):
  296. raise NotImplementedError
  297. class TestWhereCoercion(CoercionBase):
  298. method = "where"
  299. _cond = np.array([True, False, True, False])
  300. def _assert_where_conversion(
  301. self, original, cond, values, expected, expected_dtype
  302. ):
  303. """test coercion triggered by where"""
  304. target = original.copy()
  305. res = target.where(cond, values)
  306. tm.assert_equal(res, expected)
  307. assert res.dtype == expected_dtype
  308. def _construct_exp(self, obj, klass, fill_val, exp_dtype):
  309. if fill_val is True:
  310. values = klass([True, False, True, True])
  311. elif isinstance(fill_val, (datetime, np.datetime64)):
  312. values = pd.date_range(fill_val, periods=4)
  313. else:
  314. values = klass(x * fill_val for x in [5, 6, 7, 8])
  315. exp = klass([obj[0], values[1], obj[2], values[3]], dtype=exp_dtype)
  316. return values, exp
  317. def _run_test(self, obj, fill_val, klass, exp_dtype):
  318. cond = klass(self._cond)
  319. exp = klass([obj[0], fill_val, obj[2], fill_val], dtype=exp_dtype)
  320. self._assert_where_conversion(obj, cond, fill_val, exp, exp_dtype)
  321. values, exp = self._construct_exp(obj, klass, fill_val, exp_dtype)
  322. self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
  323. @pytest.mark.parametrize(
  324. "fill_val,exp_dtype",
  325. [(1, object), (1.1, object), (1 + 1j, object), (True, object)],
  326. )
  327. def test_where_object(self, index_or_series, fill_val, exp_dtype):
  328. klass = index_or_series
  329. obj = klass(list("abcd"))
  330. assert obj.dtype == object
  331. self._run_test(obj, fill_val, klass, exp_dtype)
  332. @pytest.mark.parametrize(
  333. "fill_val,exp_dtype",
  334. [(1, np.int64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
  335. )
  336. def test_where_int64(self, index_or_series, fill_val, exp_dtype, request):
  337. klass = index_or_series
  338. obj = klass([1, 2, 3, 4])
  339. assert obj.dtype == np.int64
  340. self._run_test(obj, fill_val, klass, exp_dtype)
  341. @pytest.mark.parametrize(
  342. "fill_val, exp_dtype",
  343. [(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
  344. )
  345. def test_where_float64(self, index_or_series, fill_val, exp_dtype, request):
  346. klass = index_or_series
  347. obj = klass([1.1, 2.2, 3.3, 4.4])
  348. assert obj.dtype == np.float64
  349. self._run_test(obj, fill_val, klass, exp_dtype)
  350. @pytest.mark.parametrize(
  351. "fill_val,exp_dtype",
  352. [
  353. (1, np.complex128),
  354. (1.1, np.complex128),
  355. (1 + 1j, np.complex128),
  356. (True, object),
  357. ],
  358. )
  359. def test_where_complex128(self, index_or_series, fill_val, exp_dtype):
  360. klass = index_or_series
  361. obj = klass([1 + 1j, 2 + 2j, 3 + 3j, 4 + 4j], dtype=np.complex128)
  362. assert obj.dtype == np.complex128
  363. self._run_test(obj, fill_val, klass, exp_dtype)
  364. @pytest.mark.parametrize(
  365. "fill_val,exp_dtype",
  366. [(1, object), (1.1, object), (1 + 1j, object), (True, np.bool_)],
  367. )
  368. def test_where_series_bool(self, fill_val, exp_dtype):
  369. klass = pd.Series # TODO: use index_or_series once we have Index[bool]
  370. obj = klass([True, False, True, False])
  371. assert obj.dtype == np.bool_
  372. self._run_test(obj, fill_val, klass, exp_dtype)
  373. @pytest.mark.parametrize(
  374. "fill_val,exp_dtype",
  375. [
  376. (pd.Timestamp("2012-01-01"), "datetime64[ns]"),
  377. (pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
  378. ],
  379. ids=["datetime64", "datetime64tz"],
  380. )
  381. def test_where_datetime64(self, index_or_series, fill_val, exp_dtype):
  382. klass = index_or_series
  383. obj = klass(pd.date_range("2011-01-01", periods=4, freq="D")._with_freq(None))
  384. assert obj.dtype == "datetime64[ns]"
  385. fv = fill_val
  386. # do the check with each of the available datetime scalars
  387. if exp_dtype == "datetime64[ns]":
  388. for scalar in [fv, fv.to_pydatetime(), fv.to_datetime64()]:
  389. self._run_test(obj, scalar, klass, exp_dtype)
  390. else:
  391. for scalar in [fv, fv.to_pydatetime()]:
  392. self._run_test(obj, fill_val, klass, exp_dtype)
  393. @pytest.mark.xfail(reason="Test not implemented")
  394. def test_where_index_complex128(self):
  395. raise NotImplementedError
  396. @pytest.mark.xfail(reason="Test not implemented")
  397. def test_where_index_bool(self):
  398. raise NotImplementedError
  399. @pytest.mark.xfail(reason="Test not implemented")
  400. def test_where_series_timedelta64(self):
  401. raise NotImplementedError
  402. @pytest.mark.xfail(reason="Test not implemented")
  403. def test_where_series_period(self):
  404. raise NotImplementedError
  405. @pytest.mark.parametrize(
  406. "value", [pd.Timedelta(days=9), timedelta(days=9), np.timedelta64(9, "D")]
  407. )
  408. def test_where_index_timedelta64(self, value):
  409. tdi = pd.timedelta_range("1 Day", periods=4)
  410. cond = np.array([True, False, False, True])
  411. expected = pd.TimedeltaIndex(["1 Day", value, value, "4 Days"])
  412. result = tdi.where(cond, value)
  413. tm.assert_index_equal(result, expected)
  414. # wrong-dtyped NaT
  415. dtnat = np.datetime64("NaT", "ns")
  416. expected = pd.Index([tdi[0], dtnat, dtnat, tdi[3]], dtype=object)
  417. assert expected[1] is dtnat
  418. result = tdi.where(cond, dtnat)
  419. tm.assert_index_equal(result, expected)
  420. def test_where_index_period(self):
  421. dti = pd.date_range("2016-01-01", periods=3, freq="QS")
  422. pi = dti.to_period("Q")
  423. cond = np.array([False, True, False])
  424. # Passing a valid scalar
  425. value = pi[-1] + pi.freq * 10
  426. expected = pd.PeriodIndex([value, pi[1], value])
  427. result = pi.where(cond, value)
  428. tm.assert_index_equal(result, expected)
  429. # Case passing ndarray[object] of Periods
  430. other = np.asarray(pi + pi.freq * 10, dtype=object)
  431. result = pi.where(cond, other)
  432. expected = pd.PeriodIndex([other[0], pi[1], other[2]])
  433. tm.assert_index_equal(result, expected)
  434. # Passing a mismatched scalar -> casts to object
  435. td = pd.Timedelta(days=4)
  436. expected = pd.Index([td, pi[1], td], dtype=object)
  437. result = pi.where(cond, td)
  438. tm.assert_index_equal(result, expected)
  439. per = pd.Period("2020-04-21", "D")
  440. expected = pd.Index([per, pi[1], per], dtype=object)
  441. result = pi.where(cond, per)
  442. tm.assert_index_equal(result, expected)
  443. class TestFillnaSeriesCoercion(CoercionBase):
  444. # not indexing, but place here for consistency
  445. method = "fillna"
  446. @pytest.mark.xfail(reason="Test not implemented")
  447. def test_has_comprehensive_tests(self):
  448. raise NotImplementedError
  449. def _assert_fillna_conversion(self, original, value, expected, expected_dtype):
  450. """test coercion triggered by fillna"""
  451. target = original.copy()
  452. res = target.fillna(value)
  453. tm.assert_equal(res, expected)
  454. assert res.dtype == expected_dtype
  455. @pytest.mark.parametrize(
  456. "fill_val, fill_dtype",
  457. [(1, object), (1.1, object), (1 + 1j, object), (True, object)],
  458. )
  459. def test_fillna_object(self, index_or_series, fill_val, fill_dtype):
  460. klass = index_or_series
  461. obj = klass(["a", np.nan, "c", "d"])
  462. assert obj.dtype == object
  463. exp = klass(["a", fill_val, "c", "d"])
  464. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  465. @pytest.mark.parametrize(
  466. "fill_val,fill_dtype",
  467. [(1, np.float64), (1.1, np.float64), (1 + 1j, np.complex128), (True, object)],
  468. )
  469. def test_fillna_float64(self, index_or_series, fill_val, fill_dtype):
  470. klass = index_or_series
  471. obj = klass([1.1, np.nan, 3.3, 4.4])
  472. assert obj.dtype == np.float64
  473. exp = klass([1.1, fill_val, 3.3, 4.4])
  474. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  475. @pytest.mark.parametrize(
  476. "fill_val,fill_dtype",
  477. [
  478. (1, np.complex128),
  479. (1.1, np.complex128),
  480. (1 + 1j, np.complex128),
  481. (True, object),
  482. ],
  483. )
  484. def test_fillna_complex128(self, index_or_series, fill_val, fill_dtype):
  485. klass = index_or_series
  486. obj = klass([1 + 1j, np.nan, 3 + 3j, 4 + 4j], dtype=np.complex128)
  487. assert obj.dtype == np.complex128
  488. exp = klass([1 + 1j, fill_val, 3 + 3j, 4 + 4j])
  489. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  490. @pytest.mark.parametrize(
  491. "fill_val,fill_dtype",
  492. [
  493. (pd.Timestamp("2012-01-01"), "datetime64[ns]"),
  494. (pd.Timestamp("2012-01-01", tz="US/Eastern"), object),
  495. (1, object),
  496. ("x", object),
  497. ],
  498. ids=["datetime64", "datetime64tz", "object", "object"],
  499. )
  500. def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype):
  501. klass = index_or_series
  502. obj = klass(
  503. [
  504. pd.Timestamp("2011-01-01"),
  505. pd.NaT,
  506. pd.Timestamp("2011-01-03"),
  507. pd.Timestamp("2011-01-04"),
  508. ]
  509. )
  510. assert obj.dtype == "datetime64[ns]"
  511. exp = klass(
  512. [
  513. pd.Timestamp("2011-01-01"),
  514. fill_val,
  515. pd.Timestamp("2011-01-03"),
  516. pd.Timestamp("2011-01-04"),
  517. ]
  518. )
  519. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  520. @pytest.mark.parametrize(
  521. "fill_val,fill_dtype",
  522. [
  523. (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"),
  524. (pd.Timestamp("2012-01-01"), object),
  525. # pre-2.0 with a mismatched tz we would get object result
  526. (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[ns, US/Eastern]"),
  527. (1, object),
  528. ("x", object),
  529. ],
  530. )
  531. def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype):
  532. klass = index_or_series
  533. tz = "US/Eastern"
  534. obj = klass(
  535. [
  536. pd.Timestamp("2011-01-01", tz=tz),
  537. pd.NaT,
  538. pd.Timestamp("2011-01-03", tz=tz),
  539. pd.Timestamp("2011-01-04", tz=tz),
  540. ]
  541. )
  542. assert obj.dtype == "datetime64[ns, US/Eastern]"
  543. if getattr(fill_val, "tz", None) is None:
  544. fv = fill_val
  545. else:
  546. fv = fill_val.tz_convert(tz)
  547. exp = klass(
  548. [
  549. pd.Timestamp("2011-01-01", tz=tz),
  550. fv,
  551. pd.Timestamp("2011-01-03", tz=tz),
  552. pd.Timestamp("2011-01-04", tz=tz),
  553. ]
  554. )
  555. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  556. @pytest.mark.parametrize(
  557. "fill_val",
  558. [
  559. 1,
  560. 1.1,
  561. 1 + 1j,
  562. True,
  563. pd.Interval(1, 2, closed="left"),
  564. pd.Timestamp("2012-01-01", tz="US/Eastern"),
  565. pd.Timestamp("2012-01-01"),
  566. pd.Timedelta(days=1),
  567. pd.Period("2016-01-01", "D"),
  568. ],
  569. )
  570. def test_fillna_interval(self, index_or_series, fill_val):
  571. ii = pd.interval_range(1.0, 5.0, closed="right").insert(1, np.nan)
  572. assert isinstance(ii.dtype, pd.IntervalDtype)
  573. obj = index_or_series(ii)
  574. exp = index_or_series([ii[0], fill_val, ii[2], ii[3], ii[4]], dtype=object)
  575. fill_dtype = object
  576. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  577. @pytest.mark.xfail(reason="Test not implemented")
  578. def test_fillna_series_int64(self):
  579. raise NotImplementedError
  580. @pytest.mark.xfail(reason="Test not implemented")
  581. def test_fillna_index_int64(self):
  582. raise NotImplementedError
  583. @pytest.mark.xfail(reason="Test not implemented")
  584. def test_fillna_series_bool(self):
  585. raise NotImplementedError
  586. @pytest.mark.xfail(reason="Test not implemented")
  587. def test_fillna_index_bool(self):
  588. raise NotImplementedError
  589. @pytest.mark.xfail(reason="Test not implemented")
  590. def test_fillna_series_timedelta64(self):
  591. raise NotImplementedError
  592. @pytest.mark.parametrize(
  593. "fill_val",
  594. [
  595. 1,
  596. 1.1,
  597. 1 + 1j,
  598. True,
  599. pd.Interval(1, 2, closed="left"),
  600. pd.Timestamp("2012-01-01", tz="US/Eastern"),
  601. pd.Timestamp("2012-01-01"),
  602. pd.Timedelta(days=1),
  603. pd.Period("2016-01-01", "W"),
  604. ],
  605. )
  606. def test_fillna_series_period(self, index_or_series, fill_val):
  607. pi = pd.period_range("2016-01-01", periods=4, freq="D").insert(1, pd.NaT)
  608. assert isinstance(pi.dtype, pd.PeriodDtype)
  609. obj = index_or_series(pi)
  610. exp = index_or_series([pi[0], fill_val, pi[2], pi[3], pi[4]], dtype=object)
  611. fill_dtype = object
  612. self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype)
  613. @pytest.mark.xfail(reason="Test not implemented")
  614. def test_fillna_index_timedelta64(self):
  615. raise NotImplementedError
  616. @pytest.mark.xfail(reason="Test not implemented")
  617. def test_fillna_index_period(self):
  618. raise NotImplementedError
  619. class TestReplaceSeriesCoercion(CoercionBase):
  620. klasses = ["series"]
  621. method = "replace"
  622. rep: dict[str, list] = {}
  623. rep["object"] = ["a", "b"]
  624. rep["int64"] = [4, 5]
  625. rep["float64"] = [1.1, 2.2]
  626. rep["complex128"] = [1 + 1j, 2 + 2j]
  627. rep["bool"] = [True, False]
  628. rep["datetime64[ns]"] = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-03")]
  629. for tz in ["UTC", "US/Eastern"]:
  630. # to test tz => different tz replacement
  631. key = f"datetime64[ns, {tz}]"
  632. rep[key] = [
  633. pd.Timestamp("2011-01-01", tz=tz),
  634. pd.Timestamp("2011-01-03", tz=tz),
  635. ]
  636. rep["timedelta64[ns]"] = [pd.Timedelta("1 day"), pd.Timedelta("2 day")]
  637. @pytest.fixture(params=["dict", "series"])
  638. def how(self, request):
  639. return request.param
  640. @pytest.fixture(
  641. params=[
  642. "object",
  643. "int64",
  644. "float64",
  645. "complex128",
  646. "bool",
  647. "datetime64[ns]",
  648. "datetime64[ns, UTC]",
  649. "datetime64[ns, US/Eastern]",
  650. "timedelta64[ns]",
  651. ]
  652. )
  653. def from_key(self, request):
  654. return request.param
  655. @pytest.fixture(
  656. params=[
  657. "object",
  658. "int64",
  659. "float64",
  660. "complex128",
  661. "bool",
  662. "datetime64[ns]",
  663. "datetime64[ns, UTC]",
  664. "datetime64[ns, US/Eastern]",
  665. "timedelta64[ns]",
  666. ],
  667. ids=[
  668. "object",
  669. "int64",
  670. "float64",
  671. "complex128",
  672. "bool",
  673. "datetime64",
  674. "datetime64tz",
  675. "datetime64tz",
  676. "timedelta64",
  677. ],
  678. )
  679. def to_key(self, request):
  680. return request.param
  681. @pytest.fixture
  682. def replacer(self, how, from_key, to_key):
  683. """
  684. Object we will pass to `Series.replace`
  685. """
  686. if how == "dict":
  687. replacer = dict(zip(self.rep[from_key], self.rep[to_key]))
  688. elif how == "series":
  689. replacer = pd.Series(self.rep[to_key], index=self.rep[from_key])
  690. else:
  691. raise ValueError
  692. return replacer
  693. def test_replace_series(self, how, to_key, from_key, replacer):
  694. index = pd.Index([3, 4], name="xxx")
  695. obj = pd.Series(self.rep[from_key], index=index, name="yyy")
  696. assert obj.dtype == from_key
  697. if from_key.startswith("datetime") and to_key.startswith("datetime"):
  698. # tested below
  699. return
  700. elif from_key in ["datetime64[ns, US/Eastern]", "datetime64[ns, UTC]"]:
  701. # tested below
  702. return
  703. result = obj.replace(replacer)
  704. if (from_key == "float64" and to_key in ("int64")) or (
  705. from_key == "complex128" and to_key in ("int64", "float64")
  706. ):
  707. if not IS64 or is_platform_windows():
  708. pytest.skip(f"32-bit platform buggy: {from_key} -> {to_key}")
  709. # Expected: do not downcast by replacement
  710. exp = pd.Series(self.rep[to_key], index=index, name="yyy", dtype=from_key)
  711. else:
  712. exp = pd.Series(self.rep[to_key], index=index, name="yyy")
  713. assert exp.dtype == to_key
  714. tm.assert_series_equal(result, exp)
  715. @pytest.mark.parametrize(
  716. "to_key",
  717. ["timedelta64[ns]", "bool", "object", "complex128", "float64", "int64"],
  718. indirect=True,
  719. )
  720. @pytest.mark.parametrize(
  721. "from_key", ["datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"], indirect=True
  722. )
  723. def test_replace_series_datetime_tz(self, how, to_key, from_key, replacer):
  724. index = pd.Index([3, 4], name="xyz")
  725. obj = pd.Series(self.rep[from_key], index=index, name="yyy")
  726. assert obj.dtype == from_key
  727. result = obj.replace(replacer)
  728. exp = pd.Series(self.rep[to_key], index=index, name="yyy")
  729. assert exp.dtype == to_key
  730. tm.assert_series_equal(result, exp)
  731. @pytest.mark.parametrize(
  732. "to_key",
  733. ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
  734. indirect=True,
  735. )
  736. @pytest.mark.parametrize(
  737. "from_key",
  738. ["datetime64[ns]", "datetime64[ns, UTC]", "datetime64[ns, US/Eastern]"],
  739. indirect=True,
  740. )
  741. def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer):
  742. index = pd.Index([3, 4], name="xyz")
  743. obj = pd.Series(self.rep[from_key], index=index, name="yyy")
  744. assert obj.dtype == from_key
  745. result = obj.replace(replacer)
  746. exp = pd.Series(self.rep[to_key], index=index, name="yyy")
  747. if isinstance(obj.dtype, pd.DatetimeTZDtype) and isinstance(
  748. exp.dtype, pd.DatetimeTZDtype
  749. ):
  750. # with mismatched tzs, we retain the original dtype as of 2.0
  751. exp = exp.astype(obj.dtype)
  752. else:
  753. assert exp.dtype == to_key
  754. tm.assert_series_equal(result, exp)
  755. @pytest.mark.xfail(reason="Test not implemented")
  756. def test_replace_series_period(self):
  757. raise NotImplementedError