test_fillna.py 33 KB


  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. timezone,
  5. )
  6. import numpy as np
  7. import pytest
  8. import pytz
  9. from pandas import (
  10. Categorical,
  11. DataFrame,
  12. DatetimeIndex,
  13. NaT,
  14. Period,
  15. Series,
  16. Timedelta,
  17. Timestamp,
  18. date_range,
  19. isna,
  20. )
  21. import pandas._testing as tm
  22. from pandas.core.arrays import period_array
  23. class TestSeriesFillNA:
  24. def test_fillna_nat(self):
  25. series = Series([0, 1, 2, NaT._value], dtype="M8[ns]")
  26. filled = series.fillna(method="pad")
  27. filled2 = series.fillna(value=series.values[2])
  28. expected = series.copy()
  29. expected.iloc[3] = expected.iloc[2]
  30. tm.assert_series_equal(filled, expected)
  31. tm.assert_series_equal(filled2, expected)
  32. df = DataFrame({"A": series})
  33. filled = df.fillna(method="pad")
  34. filled2 = df.fillna(value=series.values[2])
  35. expected = DataFrame({"A": expected})
  36. tm.assert_frame_equal(filled, expected)
  37. tm.assert_frame_equal(filled2, expected)
  38. series = Series([NaT._value, 0, 1, 2], dtype="M8[ns]")
  39. filled = series.fillna(method="bfill")
  40. filled2 = series.fillna(value=series[1])
  41. expected = series.copy()
  42. expected[0] = expected[1]
  43. tm.assert_series_equal(filled, expected)
  44. tm.assert_series_equal(filled2, expected)
  45. df = DataFrame({"A": series})
  46. filled = df.fillna(method="bfill")
  47. filled2 = df.fillna(value=series[1])
  48. expected = DataFrame({"A": expected})
  49. tm.assert_frame_equal(filled, expected)
  50. tm.assert_frame_equal(filled2, expected)
  51. def test_fillna_value_or_method(self, datetime_series):
  52. msg = "Cannot specify both 'value' and 'method'"
  53. with pytest.raises(ValueError, match=msg):
  54. datetime_series.fillna(value=0, method="ffill")
  55. def test_fillna(self):
  56. ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
  57. tm.assert_series_equal(ts, ts.fillna(method="ffill"))
  58. ts[2] = np.NaN
  59. exp = Series([0.0, 1.0, 1.0, 3.0, 4.0], index=ts.index)
  60. tm.assert_series_equal(ts.fillna(method="ffill"), exp)
  61. exp = Series([0.0, 1.0, 3.0, 3.0, 4.0], index=ts.index)
  62. tm.assert_series_equal(ts.fillna(method="backfill"), exp)
  63. exp = Series([0.0, 1.0, 5.0, 3.0, 4.0], index=ts.index)
  64. tm.assert_series_equal(ts.fillna(value=5), exp)
  65. msg = "Must specify a fill 'value' or 'method'"
  66. with pytest.raises(ValueError, match=msg):
  67. ts.fillna()
  68. def test_fillna_nonscalar(self):
  69. # GH#5703
  70. s1 = Series([np.nan])
  71. s2 = Series([1])
  72. result = s1.fillna(s2)
  73. expected = Series([1.0])
  74. tm.assert_series_equal(result, expected)
  75. result = s1.fillna({})
  76. tm.assert_series_equal(result, s1)
  77. result = s1.fillna(Series((), dtype=object))
  78. tm.assert_series_equal(result, s1)
  79. result = s2.fillna(s1)
  80. tm.assert_series_equal(result, s2)
  81. result = s1.fillna({0: 1})
  82. tm.assert_series_equal(result, expected)
  83. result = s1.fillna({1: 1})
  84. tm.assert_series_equal(result, Series([np.nan]))
  85. result = s1.fillna({0: 1, 1: 1})
  86. tm.assert_series_equal(result, expected)
  87. result = s1.fillna(Series({0: 1, 1: 1}))
  88. tm.assert_series_equal(result, expected)
  89. result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5]))
  90. tm.assert_series_equal(result, s1)
  91. def test_fillna_aligns(self):
  92. s1 = Series([0, 1, 2], list("abc"))
  93. s2 = Series([0, np.nan, 2], list("bac"))
  94. result = s2.fillna(s1)
  95. expected = Series([0, 0, 2.0], list("bac"))
  96. tm.assert_series_equal(result, expected)
  97. def test_fillna_limit(self):
  98. ser = Series(np.nan, index=[0, 1, 2])
  99. result = ser.fillna(999, limit=1)
  100. expected = Series([999, np.nan, np.nan], index=[0, 1, 2])
  101. tm.assert_series_equal(result, expected)
  102. result = ser.fillna(999, limit=2)
  103. expected = Series([999, 999, np.nan], index=[0, 1, 2])
  104. tm.assert_series_equal(result, expected)
  105. def test_fillna_dont_cast_strings(self):
  106. # GH#9043
  107. # make sure a string representation of int/float values can be filled
  108. # correctly without raising errors or being converted
  109. vals = ["0", "1.5", "-0.3"]
  110. for val in vals:
  111. ser = Series([0, 1, np.nan, np.nan, 4], dtype="float64")
  112. result = ser.fillna(val)
  113. expected = Series([0, 1, val, val, 4], dtype="object")
  114. tm.assert_series_equal(result, expected)
  115. def test_fillna_consistency(self):
  116. # GH#16402
  117. # fillna with a tz aware to a tz-naive, should result in object
  118. ser = Series([Timestamp("20130101"), NaT])
  119. result = ser.fillna(Timestamp("20130101", tz="US/Eastern"))
  120. expected = Series(
  121. [Timestamp("20130101"), Timestamp("2013-01-01", tz="US/Eastern")],
  122. dtype="object",
  123. )
  124. tm.assert_series_equal(result, expected)
  125. result = ser.where([True, False], Timestamp("20130101", tz="US/Eastern"))
  126. tm.assert_series_equal(result, expected)
  127. result = ser.where([True, False], Timestamp("20130101", tz="US/Eastern"))
  128. tm.assert_series_equal(result, expected)
  129. # with a non-datetime
  130. result = ser.fillna("foo")
  131. expected = Series([Timestamp("20130101"), "foo"])
  132. tm.assert_series_equal(result, expected)
  133. # assignment
  134. ser2 = ser.copy()
  135. ser2[1] = "foo"
  136. tm.assert_series_equal(ser2, expected)
  137. def test_fillna_downcast(self):
  138. # GH#15277
  139. # infer int64 from float64
  140. ser = Series([1.0, np.nan])
  141. result = ser.fillna(0, downcast="infer")
  142. expected = Series([1, 0])
  143. tm.assert_series_equal(result, expected)
  144. # infer int64 from float64 when fillna value is a dict
  145. ser = Series([1.0, np.nan])
  146. result = ser.fillna({1: 0}, downcast="infer")
  147. expected = Series([1, 0])
  148. tm.assert_series_equal(result, expected)
  149. def test_fillna_downcast_infer_objects_to_numeric(self):
  150. # GH#44241 if we have object-dtype, 'downcast="infer"' should
  151. # _actually_ infer
  152. arr = np.arange(5).astype(object)
  153. arr[3] = np.nan
  154. ser = Series(arr)
  155. res = ser.fillna(3, downcast="infer")
  156. expected = Series(np.arange(5), dtype=np.int64)
  157. tm.assert_series_equal(res, expected)
  158. res = ser.ffill(downcast="infer")
  159. expected = Series([0, 1, 2, 2, 4], dtype=np.int64)
  160. tm.assert_series_equal(res, expected)
  161. res = ser.bfill(downcast="infer")
  162. expected = Series([0, 1, 2, 4, 4], dtype=np.int64)
  163. tm.assert_series_equal(res, expected)
  164. # with a non-round float present, we will downcast to float64
  165. ser[2] = 2.5
  166. expected = Series([0, 1, 2.5, 3, 4], dtype=np.float64)
  167. res = ser.fillna(3, downcast="infer")
  168. tm.assert_series_equal(res, expected)
  169. res = ser.ffill(downcast="infer")
  170. expected = Series([0, 1, 2.5, 2.5, 4], dtype=np.float64)
  171. tm.assert_series_equal(res, expected)
  172. res = ser.bfill(downcast="infer")
  173. expected = Series([0, 1, 2.5, 4, 4], dtype=np.float64)
  174. tm.assert_series_equal(res, expected)
  175. def test_timedelta_fillna(self, frame_or_series):
  176. # GH#3371
  177. ser = Series(
  178. [
  179. Timestamp("20130101"),
  180. Timestamp("20130101"),
  181. Timestamp("20130102"),
  182. Timestamp("20130103 9:01:01"),
  183. ]
  184. )
  185. td = ser.diff()
  186. obj = frame_or_series(td)
  187. # reg fillna
  188. result = obj.fillna(Timedelta(seconds=0))
  189. expected = Series(
  190. [
  191. timedelta(0),
  192. timedelta(0),
  193. timedelta(1),
  194. timedelta(days=1, seconds=9 * 3600 + 60 + 1),
  195. ]
  196. )
  197. expected = frame_or_series(expected)
  198. tm.assert_equal(result, expected)
  199. # GH#45746 pre-1.? ints were interpreted as seconds. then that was
  200. # deprecated and changed to raise. In 2.0 it casts to common dtype,
  201. # consistent with every other dtype's behavior
  202. res = obj.fillna(1)
  203. expected = obj.astype(object).fillna(1)
  204. tm.assert_equal(res, expected)
  205. result = obj.fillna(Timedelta(seconds=1))
  206. expected = Series(
  207. [
  208. timedelta(seconds=1),
  209. timedelta(0),
  210. timedelta(1),
  211. timedelta(days=1, seconds=9 * 3600 + 60 + 1),
  212. ]
  213. )
  214. expected = frame_or_series(expected)
  215. tm.assert_equal(result, expected)
  216. result = obj.fillna(timedelta(days=1, seconds=1))
  217. expected = Series(
  218. [
  219. timedelta(days=1, seconds=1),
  220. timedelta(0),
  221. timedelta(1),
  222. timedelta(days=1, seconds=9 * 3600 + 60 + 1),
  223. ]
  224. )
  225. expected = frame_or_series(expected)
  226. tm.assert_equal(result, expected)
  227. result = obj.fillna(np.timedelta64(10**9))
  228. expected = Series(
  229. [
  230. timedelta(seconds=1),
  231. timedelta(0),
  232. timedelta(1),
  233. timedelta(days=1, seconds=9 * 3600 + 60 + 1),
  234. ]
  235. )
  236. expected = frame_or_series(expected)
  237. tm.assert_equal(result, expected)
  238. result = obj.fillna(NaT)
  239. expected = Series(
  240. [
  241. NaT,
  242. timedelta(0),
  243. timedelta(1),
  244. timedelta(days=1, seconds=9 * 3600 + 60 + 1),
  245. ],
  246. dtype="m8[ns]",
  247. )
  248. expected = frame_or_series(expected)
  249. tm.assert_equal(result, expected)
  250. # ffill
  251. td[2] = np.nan
  252. obj = frame_or_series(td)
  253. result = obj.ffill()
  254. expected = td.fillna(Timedelta(seconds=0))
  255. expected[0] = np.nan
  256. expected = frame_or_series(expected)
  257. tm.assert_equal(result, expected)
  258. # bfill
  259. td[2] = np.nan
  260. obj = frame_or_series(td)
  261. result = obj.bfill()
  262. expected = td.fillna(Timedelta(seconds=0))
  263. expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
  264. expected = frame_or_series(expected)
  265. tm.assert_equal(result, expected)
  266. def test_datetime64_fillna(self):
  267. ser = Series(
  268. [
  269. Timestamp("20130101"),
  270. Timestamp("20130101"),
  271. Timestamp("20130102"),
  272. Timestamp("20130103 9:01:01"),
  273. ]
  274. )
  275. ser[2] = np.nan
  276. # ffill
  277. result = ser.ffill()
  278. expected = Series(
  279. [
  280. Timestamp("20130101"),
  281. Timestamp("20130101"),
  282. Timestamp("20130101"),
  283. Timestamp("20130103 9:01:01"),
  284. ]
  285. )
  286. tm.assert_series_equal(result, expected)
  287. # bfill
  288. result = ser.bfill()
  289. expected = Series(
  290. [
  291. Timestamp("20130101"),
  292. Timestamp("20130101"),
  293. Timestamp("20130103 9:01:01"),
  294. Timestamp("20130103 9:01:01"),
  295. ]
  296. )
  297. tm.assert_series_equal(result, expected)
  298. def test_datetime64_fillna_backfill(self):
  299. # GH#6587
  300. # make sure that we are treating as integer when filling
  301. ser = Series([NaT, NaT, "2013-08-05 15:30:00.000001"], dtype="M8[ns]")
  302. expected = Series(
  303. [
  304. "2013-08-05 15:30:00.000001",
  305. "2013-08-05 15:30:00.000001",
  306. "2013-08-05 15:30:00.000001",
  307. ],
  308. dtype="M8[ns]",
  309. )
  310. result = ser.fillna(method="backfill")
  311. tm.assert_series_equal(result, expected)
  312. @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
  313. def test_datetime64_tz_fillna(self, tz):
  314. # DatetimeLikeBlock
  315. ser = Series(
  316. [
  317. Timestamp("2011-01-01 10:00"),
  318. NaT,
  319. Timestamp("2011-01-03 10:00"),
  320. NaT,
  321. ]
  322. )
  323. null_loc = Series([False, True, False, True])
  324. result = ser.fillna(Timestamp("2011-01-02 10:00"))
  325. expected = Series(
  326. [
  327. Timestamp("2011-01-01 10:00"),
  328. Timestamp("2011-01-02 10:00"),
  329. Timestamp("2011-01-03 10:00"),
  330. Timestamp("2011-01-02 10:00"),
  331. ]
  332. )
  333. tm.assert_series_equal(expected, result)
  334. # check s is not changed
  335. tm.assert_series_equal(isna(ser), null_loc)
  336. result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz))
  337. expected = Series(
  338. [
  339. Timestamp("2011-01-01 10:00"),
  340. Timestamp("2011-01-02 10:00", tz=tz),
  341. Timestamp("2011-01-03 10:00"),
  342. Timestamp("2011-01-02 10:00", tz=tz),
  343. ]
  344. )
  345. tm.assert_series_equal(expected, result)
  346. tm.assert_series_equal(isna(ser), null_loc)
  347. result = ser.fillna("AAA")
  348. expected = Series(
  349. [
  350. Timestamp("2011-01-01 10:00"),
  351. "AAA",
  352. Timestamp("2011-01-03 10:00"),
  353. "AAA",
  354. ],
  355. dtype=object,
  356. )
  357. tm.assert_series_equal(expected, result)
  358. tm.assert_series_equal(isna(ser), null_loc)
  359. result = ser.fillna(
  360. {
  361. 1: Timestamp("2011-01-02 10:00", tz=tz),
  362. 3: Timestamp("2011-01-04 10:00"),
  363. }
  364. )
  365. expected = Series(
  366. [
  367. Timestamp("2011-01-01 10:00"),
  368. Timestamp("2011-01-02 10:00", tz=tz),
  369. Timestamp("2011-01-03 10:00"),
  370. Timestamp("2011-01-04 10:00"),
  371. ]
  372. )
  373. tm.assert_series_equal(expected, result)
  374. tm.assert_series_equal(isna(ser), null_loc)
  375. result = ser.fillna(
  376. {1: Timestamp("2011-01-02 10:00"), 3: Timestamp("2011-01-04 10:00")}
  377. )
  378. expected = Series(
  379. [
  380. Timestamp("2011-01-01 10:00"),
  381. Timestamp("2011-01-02 10:00"),
  382. Timestamp("2011-01-03 10:00"),
  383. Timestamp("2011-01-04 10:00"),
  384. ]
  385. )
  386. tm.assert_series_equal(expected, result)
  387. tm.assert_series_equal(isna(ser), null_loc)
  388. # DatetimeTZBlock
  389. idx = DatetimeIndex(["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz)
  390. ser = Series(idx)
  391. assert ser.dtype == f"datetime64[ns, {tz}]"
  392. tm.assert_series_equal(isna(ser), null_loc)
  393. result = ser.fillna(Timestamp("2011-01-02 10:00"))
  394. expected = Series(
  395. [
  396. Timestamp("2011-01-01 10:00", tz=tz),
  397. Timestamp("2011-01-02 10:00"),
  398. Timestamp("2011-01-03 10:00", tz=tz),
  399. Timestamp("2011-01-02 10:00"),
  400. ]
  401. )
  402. tm.assert_series_equal(expected, result)
  403. tm.assert_series_equal(isna(ser), null_loc)
  404. result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz))
  405. idx = DatetimeIndex(
  406. [
  407. "2011-01-01 10:00",
  408. "2011-01-02 10:00",
  409. "2011-01-03 10:00",
  410. "2011-01-02 10:00",
  411. ],
  412. tz=tz,
  413. )
  414. expected = Series(idx)
  415. tm.assert_series_equal(expected, result)
  416. tm.assert_series_equal(isna(ser), null_loc)
  417. result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime())
  418. idx = DatetimeIndex(
  419. [
  420. "2011-01-01 10:00",
  421. "2011-01-02 10:00",
  422. "2011-01-03 10:00",
  423. "2011-01-02 10:00",
  424. ],
  425. tz=tz,
  426. )
  427. expected = Series(idx)
  428. tm.assert_series_equal(expected, result)
  429. tm.assert_series_equal(isna(ser), null_loc)
  430. result = ser.fillna("AAA")
  431. expected = Series(
  432. [
  433. Timestamp("2011-01-01 10:00", tz=tz),
  434. "AAA",
  435. Timestamp("2011-01-03 10:00", tz=tz),
  436. "AAA",
  437. ],
  438. dtype=object,
  439. )
  440. tm.assert_series_equal(expected, result)
  441. tm.assert_series_equal(isna(ser), null_loc)
  442. result = ser.fillna(
  443. {
  444. 1: Timestamp("2011-01-02 10:00", tz=tz),
  445. 3: Timestamp("2011-01-04 10:00"),
  446. }
  447. )
  448. expected = Series(
  449. [
  450. Timestamp("2011-01-01 10:00", tz=tz),
  451. Timestamp("2011-01-02 10:00", tz=tz),
  452. Timestamp("2011-01-03 10:00", tz=tz),
  453. Timestamp("2011-01-04 10:00"),
  454. ]
  455. )
  456. tm.assert_series_equal(expected, result)
  457. tm.assert_series_equal(isna(ser), null_loc)
  458. result = ser.fillna(
  459. {
  460. 1: Timestamp("2011-01-02 10:00", tz=tz),
  461. 3: Timestamp("2011-01-04 10:00", tz=tz),
  462. }
  463. )
  464. expected = Series(
  465. [
  466. Timestamp("2011-01-01 10:00", tz=tz),
  467. Timestamp("2011-01-02 10:00", tz=tz),
  468. Timestamp("2011-01-03 10:00", tz=tz),
  469. Timestamp("2011-01-04 10:00", tz=tz),
  470. ]
  471. )
  472. tm.assert_series_equal(expected, result)
  473. tm.assert_series_equal(isna(ser), null_loc)
  474. # filling with a naive/other zone, coerce to object
  475. result = ser.fillna(Timestamp("20130101"))
  476. expected = Series(
  477. [
  478. Timestamp("2011-01-01 10:00", tz=tz),
  479. Timestamp("2013-01-01"),
  480. Timestamp("2011-01-03 10:00", tz=tz),
  481. Timestamp("2013-01-01"),
  482. ]
  483. )
  484. tm.assert_series_equal(expected, result)
  485. tm.assert_series_equal(isna(ser), null_loc)
  486. # pre-2.0 fillna with mixed tzs would cast to object, in 2.0
  487. # it retains dtype.
  488. result = ser.fillna(Timestamp("20130101", tz="US/Pacific"))
  489. expected = Series(
  490. [
  491. Timestamp("2011-01-01 10:00", tz=tz),
  492. Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz),
  493. Timestamp("2011-01-03 10:00", tz=tz),
  494. Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz),
  495. ]
  496. )
  497. tm.assert_series_equal(expected, result)
  498. tm.assert_series_equal(isna(ser), null_loc)
  499. def test_fillna_dt64tz_with_method(self):
  500. # with timezone
  501. # GH#15855
  502. ser = Series([Timestamp("2012-11-11 00:00:00+01:00"), NaT])
  503. exp = Series(
  504. [
  505. Timestamp("2012-11-11 00:00:00+01:00"),
  506. Timestamp("2012-11-11 00:00:00+01:00"),
  507. ]
  508. )
  509. tm.assert_series_equal(ser.fillna(method="pad"), exp)
  510. ser = Series([NaT, Timestamp("2012-11-11 00:00:00+01:00")])
  511. exp = Series(
  512. [
  513. Timestamp("2012-11-11 00:00:00+01:00"),
  514. Timestamp("2012-11-11 00:00:00+01:00"),
  515. ]
  516. )
  517. tm.assert_series_equal(ser.fillna(method="bfill"), exp)
  518. def test_fillna_pytimedelta(self):
  519. # GH#8209
  520. ser = Series([np.nan, Timedelta("1 days")], index=["A", "B"])
  521. result = ser.fillna(timedelta(1))
  522. expected = Series(Timedelta("1 days"), index=["A", "B"])
  523. tm.assert_series_equal(result, expected)
  524. def test_fillna_period(self):
  525. # GH#13737
  526. ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
  527. res = ser.fillna(Period("2012-01", freq="M"))
  528. exp = Series([Period("2011-01", freq="M"), Period("2012-01", freq="M")])
  529. tm.assert_series_equal(res, exp)
  530. assert res.dtype == "Period[M]"
  531. def test_fillna_dt64_timestamp(self, frame_or_series):
  532. ser = Series(
  533. [
  534. Timestamp("20130101"),
  535. Timestamp("20130101"),
  536. Timestamp("20130102"),
  537. Timestamp("20130103 9:01:01"),
  538. ]
  539. )
  540. ser[2] = np.nan
  541. obj = frame_or_series(ser)
  542. # reg fillna
  543. result = obj.fillna(Timestamp("20130104"))
  544. expected = Series(
  545. [
  546. Timestamp("20130101"),
  547. Timestamp("20130101"),
  548. Timestamp("20130104"),
  549. Timestamp("20130103 9:01:01"),
  550. ]
  551. )
  552. expected = frame_or_series(expected)
  553. tm.assert_equal(result, expected)
  554. result = obj.fillna(NaT)
  555. expected = obj
  556. tm.assert_equal(result, expected)
  557. def test_fillna_dt64_non_nao(self):
  558. # GH#27419
  559. ser = Series([Timestamp("2010-01-01"), NaT, Timestamp("2000-01-01")])
  560. val = np.datetime64("1975-04-05", "ms")
  561. result = ser.fillna(val)
  562. expected = Series(
  563. [Timestamp("2010-01-01"), Timestamp("1975-04-05"), Timestamp("2000-01-01")]
  564. )
  565. tm.assert_series_equal(result, expected)
  566. def test_fillna_numeric_inplace(self):
  567. x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"])
  568. y = x.copy()
  569. return_value = y.fillna(value=0, inplace=True)
  570. assert return_value is None
  571. expected = x.fillna(value=0)
  572. tm.assert_series_equal(y, expected)
  573. # ---------------------------------------------------------------
  574. # CategoricalDtype
  575. @pytest.mark.parametrize(
  576. "fill_value, expected_output",
  577. [
  578. ("a", ["a", "a", "b", "a", "a"]),
  579. ({1: "a", 3: "b", 4: "b"}, ["a", "a", "b", "b", "b"]),
  580. ({1: "a"}, ["a", "a", "b", np.nan, np.nan]),
  581. ({1: "a", 3: "b"}, ["a", "a", "b", "b", np.nan]),
  582. (Series("a"), ["a", np.nan, "b", np.nan, np.nan]),
  583. (Series("a", index=[1]), ["a", "a", "b", np.nan, np.nan]),
  584. (Series({1: "a", 3: "b"}), ["a", "a", "b", "b", np.nan]),
  585. (Series(["a", "b"], index=[3, 4]), ["a", np.nan, "b", "a", "b"]),
  586. ],
  587. )
  588. def test_fillna_categorical(self, fill_value, expected_output):
  589. # GH#17033
  590. # Test fillna for a Categorical series
  591. data = ["a", np.nan, "b", np.nan, np.nan]
  592. ser = Series(Categorical(data, categories=["a", "b"]))
  593. exp = Series(Categorical(expected_output, categories=["a", "b"]))
  594. result = ser.fillna(fill_value)
  595. tm.assert_series_equal(result, exp)
  596. @pytest.mark.parametrize(
  597. "fill_value, expected_output",
  598. [
  599. (Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]),
  600. (Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]),
  601. (
  602. Series(
  603. Categorical(
  604. ["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"]
  605. )
  606. ),
  607. ["a", "d", "b", "d", "a"],
  608. ),
  609. ],
  610. )
  611. def test_fillna_categorical_with_new_categories(self, fill_value, expected_output):
  612. # GH#26215
  613. data = ["a", np.nan, "b", np.nan, np.nan]
  614. ser = Series(Categorical(data, categories=["a", "b", "c", "d", "e"]))
  615. exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"]))
  616. result = ser.fillna(fill_value)
  617. tm.assert_series_equal(result, exp)
  618. def test_fillna_categorical_raises(self):
  619. data = ["a", np.nan, "b", np.nan, np.nan]
  620. ser = Series(Categorical(data, categories=["a", "b"]))
  621. cat = ser._values
  622. msg = "Cannot setitem on a Categorical with a new category"
  623. with pytest.raises(TypeError, match=msg):
  624. ser.fillna("d")
  625. msg2 = "Length of 'value' does not match."
  626. with pytest.raises(ValueError, match=msg2):
  627. cat.fillna(Series("d"))
  628. with pytest.raises(TypeError, match=msg):
  629. ser.fillna({1: "d", 3: "a"})
  630. msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
  631. with pytest.raises(TypeError, match=msg):
  632. ser.fillna(["a", "b"])
  633. msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
  634. with pytest.raises(TypeError, match=msg):
  635. ser.fillna(("a", "b"))
  636. msg = (
  637. '"value" parameter must be a scalar, dict '
  638. 'or Series, but you passed a "DataFrame"'
  639. )
  640. with pytest.raises(TypeError, match=msg):
  641. ser.fillna(DataFrame({1: ["a"], 3: ["b"]}))
  642. @pytest.mark.parametrize("dtype", [float, "float32", "float64"])
  643. @pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES)
  644. @pytest.mark.parametrize("scalar", [True, False])
  645. def test_fillna_float_casting(self, dtype, fill_type, scalar):
  646. # GH-43424
  647. ser = Series([np.nan, 1.2], dtype=dtype)
  648. fill_values = Series([2, 2], dtype=fill_type)
  649. if scalar:
  650. fill_values = fill_values.dtype.type(2)
  651. result = ser.fillna(fill_values)
  652. expected = Series([2.0, 1.2], dtype=dtype)
  653. tm.assert_series_equal(result, expected)
  654. ser = Series([np.nan, 1.2], dtype=dtype)
  655. mask = ser.isna().to_numpy()
  656. ser[mask] = fill_values
  657. tm.assert_series_equal(ser, expected)
  658. ser = Series([np.nan, 1.2], dtype=dtype)
  659. ser.mask(mask, fill_values, inplace=True)
  660. tm.assert_series_equal(ser, expected)
  661. ser = Series([np.nan, 1.2], dtype=dtype)
  662. res = ser.where(~mask, fill_values)
  663. tm.assert_series_equal(res, expected)
  664. def test_fillna_f32_upcast_with_dict(self):
  665. # GH-43424
  666. ser = Series([np.nan, 1.2], dtype=np.float32)
  667. result = ser.fillna({0: 1})
  668. expected = Series([1.0, 1.2], dtype=np.float32)
  669. tm.assert_series_equal(result, expected)
  670. # ---------------------------------------------------------------
  671. # Invalid Usages
  672. def test_fillna_invalid_method(self, datetime_series):
  673. try:
  674. datetime_series.fillna(method="ffil")
  675. except ValueError as inst:
  676. assert "ffil" in str(inst)
  677. def test_fillna_listlike_invalid(self):
  678. ser = Series(np.random.randint(-100, 100, 50))
  679. msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
  680. with pytest.raises(TypeError, match=msg):
  681. ser.fillna([1, 2])
  682. msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
  683. with pytest.raises(TypeError, match=msg):
  684. ser.fillna((1, 2))
  685. def test_fillna_method_and_limit_invalid(self):
  686. # related GH#9217, make sure limit is an int and greater than 0
  687. ser = Series([1, 2, 3, None])
  688. msg = "|".join(
  689. [
  690. r"Cannot specify both 'value' and 'method'\.",
  691. "Limit must be greater than 0",
  692. "Limit must be an integer",
  693. ]
  694. )
  695. for limit in [-1, 0, 1.0, 2.0]:
  696. for method in ["backfill", "bfill", "pad", "ffill", None]:
  697. with pytest.raises(ValueError, match=msg):
  698. ser.fillna(1, limit=limit, method=method)
  699. def test_fillna_datetime64_with_timezone_tzinfo(self):
  700. # https://github.com/pandas-dev/pandas/issues/38851
  701. # different tzinfos representing UTC treated as equal
  702. ser = Series(date_range("2020", periods=3, tz="UTC"))
  703. expected = ser.copy()
  704. ser[1] = NaT
  705. result = ser.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc))
  706. tm.assert_series_equal(result, expected)
  707. # pre-2.0 we cast to object with mixed tzs, in 2.0 we retain dtype
  708. ts = Timestamp("2000-01-01", tz="US/Pacific")
  709. ser2 = Series(ser._values.tz_convert("dateutil/US/Pacific"))
  710. assert ser2.dtype.kind == "M"
  711. result = ser2.fillna(ts)
  712. expected = Series(
  713. [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]],
  714. dtype=ser2.dtype,
  715. )
  716. tm.assert_series_equal(result, expected)
  717. @pytest.mark.parametrize(
  718. "input, input_fillna, expected_data, expected_categories",
  719. [
  720. (["A", "B", None, "A"], "B", ["A", "B", "B", "A"], ["A", "B"]),
  721. (["A", "B", np.nan, "A"], "B", ["A", "B", "B", "A"], ["A", "B"]),
  722. ],
  723. )
  724. def test_fillna_categorical_accept_same_type(
  725. self, input, input_fillna, expected_data, expected_categories
  726. ):
  727. # GH32414
  728. cat = Categorical(input)
  729. ser = Series(cat).fillna(input_fillna)
  730. filled = cat.fillna(ser)
  731. result = cat.fillna(filled)
  732. expected = Categorical(expected_data, categories=expected_categories)
  733. tm.assert_categorical_equal(result, expected)
  734. class TestFillnaPad:
  735. def test_fillna_bug(self):
  736. ser = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"])
  737. filled = ser.fillna(method="ffill")
  738. expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0], ser.index)
  739. tm.assert_series_equal(filled, expected)
  740. filled = ser.fillna(method="bfill")
  741. expected = Series([1.0, 1.0, 3.0, 3.0, np.nan], ser.index)
  742. tm.assert_series_equal(filled, expected)
  743. def test_ffill(self):
  744. ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
  745. ts[2] = np.NaN
  746. tm.assert_series_equal(ts.ffill(), ts.fillna(method="ffill"))
  747. def test_ffill_mixed_dtypes_without_missing_data(self):
  748. # GH#14956
  749. series = Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1])
  750. result = series.ffill()
  751. tm.assert_series_equal(series, result)
  752. def test_bfill(self):
  753. ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
  754. ts[2] = np.NaN
  755. tm.assert_series_equal(ts.bfill(), ts.fillna(method="bfill"))
  756. def test_pad_nan(self):
  757. x = Series(
  758. [np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"], dtype=float
  759. )
  760. return_value = x.fillna(method="pad", inplace=True)
  761. assert return_value is None
  762. expected = Series(
  763. [np.nan, 1.0, 1.0, 3.0, 3.0], ["z", "a", "b", "c", "d"], dtype=float
  764. )
  765. tm.assert_series_equal(x[1:], expected[1:])
  766. assert np.isnan(x[0]), np.isnan(expected[0])
  767. def test_series_fillna_limit(self):
  768. index = np.arange(10)
  769. s = Series(np.random.randn(10), index=index)
  770. result = s[:2].reindex(index)
  771. result = result.fillna(method="pad", limit=5)
  772. expected = s[:2].reindex(index).fillna(method="pad")
  773. expected[-3:] = np.nan
  774. tm.assert_series_equal(result, expected)
  775. result = s[-2:].reindex(index)
  776. result = result.fillna(method="bfill", limit=5)
  777. expected = s[-2:].reindex(index).fillna(method="backfill")
  778. expected[:3] = np.nan
  779. tm.assert_series_equal(result, expected)
  780. def test_series_pad_backfill_limit(self):
  781. index = np.arange(10)
  782. s = Series(np.random.randn(10), index=index)
  783. result = s[:2].reindex(index, method="pad", limit=5)
  784. expected = s[:2].reindex(index).fillna(method="pad")
  785. expected[-3:] = np.nan
  786. tm.assert_series_equal(result, expected)
  787. result = s[-2:].reindex(index, method="backfill", limit=5)
  788. expected = s[-2:].reindex(index).fillna(method="backfill")
  789. expected[:3] = np.nan
  790. tm.assert_series_equal(result, expected)
  791. def test_fillna_int(self):
  792. ser = Series(np.random.randint(-100, 100, 50))
  793. return_value = ser.fillna(method="ffill", inplace=True)
  794. assert return_value is None
  795. tm.assert_series_equal(ser.fillna(method="ffill", inplace=False), ser)
  796. def test_datetime64tz_fillna_round_issue(self):
  797. # GH#14872
  798. data = Series(
  799. [NaT, NaT, datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc)]
  800. )
  801. filled = data.fillna(method="bfill")
  802. expected = Series(
  803. [
  804. datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
  805. datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
  806. datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
  807. ]
  808. )
  809. tm.assert_series_equal(filled, expected)
  810. def test_fillna_parr(self):
  811. # GH-24537
  812. dti = date_range(
  813. Timestamp.max - Timedelta(nanoseconds=10), periods=5, freq="ns"
  814. )
  815. ser = Series(dti.to_period("ns"))
  816. ser[2] = NaT
  817. arr = period_array(
  818. [
  819. Timestamp("2262-04-11 23:47:16.854775797"),
  820. Timestamp("2262-04-11 23:47:16.854775798"),
  821. Timestamp("2262-04-11 23:47:16.854775798"),
  822. Timestamp("2262-04-11 23:47:16.854775800"),
  823. Timestamp("2262-04-11 23:47:16.854775801"),
  824. ],
  825. freq="ns",
  826. )
  827. expected = Series(arr)
  828. filled = ser.fillna(method="pad")
  829. tm.assert_series_equal(filled, expected)
  830. @pytest.mark.parametrize("func", ["pad", "backfill"])
  831. def test_pad_backfill_deprecated(self, func):
  832. # GH#33396
  833. ser = Series([1, 2, 3])
  834. with tm.assert_produces_warning(FutureWarning):
  835. getattr(ser, func)()