test_fillna.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778
  1. import numpy as np
  2. import pytest
  3. import pandas.util._test_decorators as td
  4. from pandas import (
  5. Categorical,
  6. DataFrame,
  7. DatetimeIndex,
  8. NaT,
  9. PeriodIndex,
  10. Series,
  11. TimedeltaIndex,
  12. Timestamp,
  13. date_range,
  14. to_datetime,
  15. )
  16. import pandas._testing as tm
  17. from pandas.tests.frame.common import _check_mixed_float
  18. class TestFillNA:
  19. def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write):
  20. df = DataFrame(
  21. {"A": [np.nan] * 3, "B": [NaT, Timestamp(1), NaT], "C": [np.nan, "foo", 2]}
  22. )
  23. df.columns = ["A", "A", "A"]
  24. orig = df[:]
  25. df.fillna({"A": 2}, inplace=True)
  26. # The first and third columns can be set inplace, while the second cannot.
  27. expected = DataFrame(
  28. {"A": [2.0] * 3, "B": [2, Timestamp(1), 2], "C": [2, "foo", 2]}
  29. )
  30. expected.columns = ["A", "A", "A"]
  31. tm.assert_frame_equal(df, expected)
  32. # TODO: what's the expected/desired behavior with CoW?
  33. if not using_copy_on_write:
  34. assert tm.shares_memory(df.iloc[:, 0], orig.iloc[:, 0])
  35. assert not tm.shares_memory(df.iloc[:, 1], orig.iloc[:, 1])
  36. if not using_copy_on_write:
  37. assert tm.shares_memory(df.iloc[:, 2], orig.iloc[:, 2])
  38. @td.skip_array_manager_not_yet_implemented
  39. def test_fillna_on_column_view(self, using_copy_on_write):
  40. # GH#46149 avoid unnecessary copies
  41. arr = np.full((40, 50), np.nan)
  42. df = DataFrame(arr, copy=False)
  43. # TODO(CoW): This should raise a chained assignment error
  44. df[0].fillna(-1, inplace=True)
  45. if using_copy_on_write:
  46. assert np.isnan(arr[:, 0]).all()
  47. else:
  48. assert (arr[:, 0] == -1).all()
  49. # i.e. we didn't create a new 49-column block
  50. assert len(df._mgr.arrays) == 1
  51. assert np.shares_memory(df.values, arr)
  52. def test_fillna_datetime(self, datetime_frame):
  53. tf = datetime_frame
  54. tf.loc[tf.index[:5], "A"] = np.nan
  55. tf.loc[tf.index[-5:], "A"] = np.nan
  56. zero_filled = datetime_frame.fillna(0)
  57. assert (zero_filled.loc[zero_filled.index[:5], "A"] == 0).all()
  58. padded = datetime_frame.fillna(method="pad")
  59. assert np.isnan(padded.loc[padded.index[:5], "A"]).all()
  60. assert (
  61. padded.loc[padded.index[-5:], "A"] == padded.loc[padded.index[-5], "A"]
  62. ).all()
  63. msg = "Must specify a fill 'value' or 'method'"
  64. with pytest.raises(ValueError, match=msg):
  65. datetime_frame.fillna()
  66. msg = "Cannot specify both 'value' and 'method'"
  67. with pytest.raises(ValueError, match=msg):
  68. datetime_frame.fillna(5, method="ffill")
  69. def test_fillna_mixed_type(self, float_string_frame):
  70. mf = float_string_frame
  71. mf.loc[mf.index[5:20], "foo"] = np.nan
  72. mf.loc[mf.index[-10:], "A"] = np.nan
  73. # TODO: make stronger assertion here, GH 25640
  74. mf.fillna(value=0)
  75. mf.fillna(method="pad")
  76. def test_fillna_mixed_float(self, mixed_float_frame):
  77. # mixed numeric (but no float16)
  78. mf = mixed_float_frame.reindex(columns=["A", "B", "D"])
  79. mf.loc[mf.index[-10:], "A"] = np.nan
  80. result = mf.fillna(value=0)
  81. _check_mixed_float(result, dtype={"C": None})
  82. result = mf.fillna(method="pad")
  83. _check_mixed_float(result, dtype={"C": None})
  84. def test_fillna_empty(self):
  85. # empty frame (GH#2778)
  86. df = DataFrame(columns=["x"])
  87. for m in ["pad", "backfill"]:
  88. df.x.fillna(method=m, inplace=True)
  89. df.x.fillna(method=m)
  90. def test_fillna_different_dtype(self):
  91. # with different dtype (GH#3386)
  92. df = DataFrame(
  93. [["a", "a", np.nan, "a"], ["b", "b", np.nan, "b"], ["c", "c", np.nan, "c"]]
  94. )
  95. result = df.fillna({2: "foo"})
  96. expected = DataFrame(
  97. [["a", "a", "foo", "a"], ["b", "b", "foo", "b"], ["c", "c", "foo", "c"]]
  98. )
  99. tm.assert_frame_equal(result, expected)
  100. return_value = df.fillna({2: "foo"}, inplace=True)
  101. tm.assert_frame_equal(df, expected)
  102. assert return_value is None
  103. def test_fillna_limit_and_value(self):
  104. # limit and value
  105. df = DataFrame(np.random.randn(10, 3))
  106. df.iloc[2:7, 0] = np.nan
  107. df.iloc[3:5, 2] = np.nan
  108. expected = df.copy()
  109. expected.iloc[2, 0] = 999
  110. expected.iloc[3, 2] = 999
  111. result = df.fillna(999, limit=1)
  112. tm.assert_frame_equal(result, expected)
  113. def test_fillna_datelike(self):
  114. # with datelike
  115. # GH#6344
  116. df = DataFrame(
  117. {
  118. "Date": [NaT, Timestamp("2014-1-1")],
  119. "Date2": [Timestamp("2013-1-1"), NaT],
  120. }
  121. )
  122. expected = df.copy()
  123. expected["Date"] = expected["Date"].fillna(df.loc[df.index[0], "Date2"])
  124. result = df.fillna(value={"Date": df["Date2"]})
  125. tm.assert_frame_equal(result, expected)
  126. def test_fillna_tzaware(self):
  127. # with timezone
  128. # GH#15855
  129. df = DataFrame({"A": [Timestamp("2012-11-11 00:00:00+01:00"), NaT]})
  130. exp = DataFrame(
  131. {
  132. "A": [
  133. Timestamp("2012-11-11 00:00:00+01:00"),
  134. Timestamp("2012-11-11 00:00:00+01:00"),
  135. ]
  136. }
  137. )
  138. tm.assert_frame_equal(df.fillna(method="pad"), exp)
  139. df = DataFrame({"A": [NaT, Timestamp("2012-11-11 00:00:00+01:00")]})
  140. exp = DataFrame(
  141. {
  142. "A": [
  143. Timestamp("2012-11-11 00:00:00+01:00"),
  144. Timestamp("2012-11-11 00:00:00+01:00"),
  145. ]
  146. }
  147. )
  148. tm.assert_frame_equal(df.fillna(method="bfill"), exp)
  149. def test_fillna_tzaware_different_column(self):
  150. # with timezone in another column
  151. # GH#15522
  152. df = DataFrame(
  153. {
  154. "A": date_range("20130101", periods=4, tz="US/Eastern"),
  155. "B": [1, 2, np.nan, np.nan],
  156. }
  157. )
  158. result = df.fillna(method="pad")
  159. expected = DataFrame(
  160. {
  161. "A": date_range("20130101", periods=4, tz="US/Eastern"),
  162. "B": [1.0, 2.0, 2.0, 2.0],
  163. }
  164. )
  165. tm.assert_frame_equal(result, expected)
  166. def test_na_actions_categorical(self):
  167. cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
  168. vals = ["a", "b", np.nan, "d"]
  169. df = DataFrame({"cats": cat, "vals": vals})
  170. cat2 = Categorical([1, 2, 3, 3], categories=[1, 2, 3])
  171. vals2 = ["a", "b", "b", "d"]
  172. df_exp_fill = DataFrame({"cats": cat2, "vals": vals2})
  173. cat3 = Categorical([1, 2, 3], categories=[1, 2, 3])
  174. vals3 = ["a", "b", np.nan]
  175. df_exp_drop_cats = DataFrame({"cats": cat3, "vals": vals3})
  176. cat4 = Categorical([1, 2], categories=[1, 2, 3])
  177. vals4 = ["a", "b"]
  178. df_exp_drop_all = DataFrame({"cats": cat4, "vals": vals4})
  179. # fillna
  180. res = df.fillna(value={"cats": 3, "vals": "b"})
  181. tm.assert_frame_equal(res, df_exp_fill)
  182. msg = "Cannot setitem on a Categorical with a new category"
  183. with pytest.raises(TypeError, match=msg):
  184. df.fillna(value={"cats": 4, "vals": "c"})
  185. res = df.fillna(method="pad")
  186. tm.assert_frame_equal(res, df_exp_fill)
  187. # dropna
  188. res = df.dropna(subset=["cats"])
  189. tm.assert_frame_equal(res, df_exp_drop_cats)
  190. res = df.dropna()
  191. tm.assert_frame_equal(res, df_exp_drop_all)
  192. # make sure that fillna takes missing values into account
  193. c = Categorical([np.nan, "b", np.nan], categories=["a", "b"])
  194. df = DataFrame({"cats": c, "vals": [1, 2, 3]})
  195. cat_exp = Categorical(["a", "b", "a"], categories=["a", "b"])
  196. df_exp = DataFrame({"cats": cat_exp, "vals": [1, 2, 3]})
  197. res = df.fillna("a")
  198. tm.assert_frame_equal(res, df_exp)
  199. def test_fillna_categorical_nan(self):
  200. # GH#14021
  201. # np.nan should always be a valid filler
  202. cat = Categorical([np.nan, 2, np.nan])
  203. val = Categorical([np.nan, np.nan, np.nan])
  204. df = DataFrame({"cats": cat, "vals": val})
  205. # GH#32950 df.median() is poorly behaved because there is no
  206. # Categorical.median
  207. median = Series({"cats": 2.0, "vals": np.nan})
  208. res = df.fillna(median)
  209. v_exp = [np.nan, np.nan, np.nan]
  210. df_exp = DataFrame({"cats": [2, 2, 2], "vals": v_exp}, dtype="category")
  211. tm.assert_frame_equal(res, df_exp)
  212. result = df.cats.fillna(np.nan)
  213. tm.assert_series_equal(result, df.cats)
  214. result = df.vals.fillna(np.nan)
  215. tm.assert_series_equal(result, df.vals)
  216. idx = DatetimeIndex(
  217. ["2011-01-01 09:00", "2016-01-01 23:45", "2011-01-01 09:00", NaT, NaT]
  218. )
  219. df = DataFrame({"a": Categorical(idx)})
  220. tm.assert_frame_equal(df.fillna(value=NaT), df)
  221. idx = PeriodIndex(["2011-01", "2011-01", "2011-01", NaT, NaT], freq="M")
  222. df = DataFrame({"a": Categorical(idx)})
  223. tm.assert_frame_equal(df.fillna(value=NaT), df)
  224. idx = TimedeltaIndex(["1 days", "2 days", "1 days", NaT, NaT])
  225. df = DataFrame({"a": Categorical(idx)})
  226. tm.assert_frame_equal(df.fillna(value=NaT), df)
  227. def test_fillna_downcast(self):
  228. # GH#15277
  229. # infer int64 from float64
  230. df = DataFrame({"a": [1.0, np.nan]})
  231. result = df.fillna(0, downcast="infer")
  232. expected = DataFrame({"a": [1, 0]})
  233. tm.assert_frame_equal(result, expected)
  234. # infer int64 from float64 when fillna value is a dict
  235. df = DataFrame({"a": [1.0, np.nan]})
  236. result = df.fillna({"a": 0}, downcast="infer")
  237. expected = DataFrame({"a": [1, 0]})
  238. tm.assert_frame_equal(result, expected)
  239. def test_fillna_downcast_false(self, frame_or_series):
  240. # GH#45603 preserve object dtype with downcast=False
  241. obj = frame_or_series([1, 2, 3], dtype="object")
  242. result = obj.fillna("", downcast=False)
  243. tm.assert_equal(result, obj)
  244. def test_fillna_downcast_noop(self, frame_or_series):
  245. # GH#45423
  246. # Two relevant paths:
  247. # 1) not _can_hold_na (e.g. integer)
  248. # 2) _can_hold_na + noop + not can_hold_element
  249. obj = frame_or_series([1, 2, 3], dtype=np.int64)
  250. res = obj.fillna("foo", downcast=np.dtype(np.int32))
  251. expected = obj.astype(np.int32)
  252. tm.assert_equal(res, expected)
  253. obj2 = obj.astype(np.float64)
  254. res2 = obj2.fillna("foo", downcast="infer")
  255. expected2 = obj # get back int64
  256. tm.assert_equal(res2, expected2)
  257. res3 = obj2.fillna("foo", downcast=np.dtype(np.int32))
  258. tm.assert_equal(res3, expected)
  259. @pytest.mark.parametrize("columns", [["A", "A", "B"], ["A", "A"]])
  260. def test_fillna_dictlike_value_duplicate_colnames(self, columns):
  261. # GH#43476
  262. df = DataFrame(np.nan, index=[0, 1], columns=columns)
  263. with tm.assert_produces_warning(None):
  264. result = df.fillna({"A": 0})
  265. expected = df.copy()
  266. expected["A"] = 0.0
  267. tm.assert_frame_equal(result, expected)
  268. def test_fillna_dtype_conversion(self):
  269. # make sure that fillna on an empty frame works
  270. df = DataFrame(index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
  271. result = df.dtypes
  272. expected = Series([np.dtype("object")] * 5, index=[1, 2, 3, 4, 5])
  273. tm.assert_series_equal(result, expected)
  274. result = df.fillna(1)
  275. expected = DataFrame(1, index=["A", "B", "C"], columns=[1, 2, 3, 4, 5])
  276. tm.assert_frame_equal(result, expected)
  277. # empty block
  278. df = DataFrame(index=range(3), columns=["A", "B"], dtype="float64")
  279. result = df.fillna("nan")
  280. expected = DataFrame("nan", index=range(3), columns=["A", "B"])
  281. tm.assert_frame_equal(result, expected)
  282. @pytest.mark.parametrize("val", ["", 1, np.nan, 1.0])
  283. def test_fillna_dtype_conversion_equiv_replace(self, val):
  284. df = DataFrame({"A": [1, np.nan], "B": [1.0, 2.0]})
  285. expected = df.replace(np.nan, val)
  286. result = df.fillna(val)
  287. tm.assert_frame_equal(result, expected)
  288. def test_fillna_datetime_columns(self):
  289. # GH#7095
  290. df = DataFrame(
  291. {
  292. "A": [-1, -2, np.nan],
  293. "B": date_range("20130101", periods=3),
  294. "C": ["foo", "bar", None],
  295. "D": ["foo2", "bar2", None],
  296. },
  297. index=date_range("20130110", periods=3),
  298. )
  299. result = df.fillna("?")
  300. expected = DataFrame(
  301. {
  302. "A": [-1, -2, "?"],
  303. "B": date_range("20130101", periods=3),
  304. "C": ["foo", "bar", "?"],
  305. "D": ["foo2", "bar2", "?"],
  306. },
  307. index=date_range("20130110", periods=3),
  308. )
  309. tm.assert_frame_equal(result, expected)
  310. df = DataFrame(
  311. {
  312. "A": [-1, -2, np.nan],
  313. "B": [Timestamp("2013-01-01"), Timestamp("2013-01-02"), NaT],
  314. "C": ["foo", "bar", None],
  315. "D": ["foo2", "bar2", None],
  316. },
  317. index=date_range("20130110", periods=3),
  318. )
  319. result = df.fillna("?")
  320. expected = DataFrame(
  321. {
  322. "A": [-1, -2, "?"],
  323. "B": [Timestamp("2013-01-01"), Timestamp("2013-01-02"), "?"],
  324. "C": ["foo", "bar", "?"],
  325. "D": ["foo2", "bar2", "?"],
  326. },
  327. index=date_range("20130110", periods=3),
  328. )
  329. tm.assert_frame_equal(result, expected)
  330. def test_ffill(self, datetime_frame):
  331. datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
  332. datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
  333. tm.assert_frame_equal(
  334. datetime_frame.ffill(), datetime_frame.fillna(method="ffill")
  335. )
  336. def test_bfill(self, datetime_frame):
  337. datetime_frame.loc[datetime_frame.index[:5], "A"] = np.nan
  338. datetime_frame.loc[datetime_frame.index[-5:], "A"] = np.nan
  339. tm.assert_frame_equal(
  340. datetime_frame.bfill(), datetime_frame.fillna(method="bfill")
  341. )
  342. def test_frame_pad_backfill_limit(self):
  343. index = np.arange(10)
  344. df = DataFrame(np.random.randn(10, 4), index=index)
  345. result = df[:2].reindex(index, method="pad", limit=5)
  346. expected = df[:2].reindex(index).fillna(method="pad")
  347. expected.iloc[-3:] = np.nan
  348. tm.assert_frame_equal(result, expected)
  349. result = df[-2:].reindex(index, method="backfill", limit=5)
  350. expected = df[-2:].reindex(index).fillna(method="backfill")
  351. expected.iloc[:3] = np.nan
  352. tm.assert_frame_equal(result, expected)
  353. def test_frame_fillna_limit(self):
  354. index = np.arange(10)
  355. df = DataFrame(np.random.randn(10, 4), index=index)
  356. result = df[:2].reindex(index)
  357. result = result.fillna(method="pad", limit=5)
  358. expected = df[:2].reindex(index).fillna(method="pad")
  359. expected.iloc[-3:] = np.nan
  360. tm.assert_frame_equal(result, expected)
  361. result = df[-2:].reindex(index)
  362. result = result.fillna(method="backfill", limit=5)
  363. expected = df[-2:].reindex(index).fillna(method="backfill")
  364. expected.iloc[:3] = np.nan
  365. tm.assert_frame_equal(result, expected)
  366. def test_fillna_skip_certain_blocks(self):
  367. # don't try to fill boolean, int blocks
  368. df = DataFrame(np.random.randn(10, 4).astype(int))
  369. # it works!
  370. df.fillna(np.nan)
  371. @pytest.mark.parametrize("type", [int, float])
  372. def test_fillna_positive_limit(self, type):
  373. df = DataFrame(np.random.randn(10, 4)).astype(type)
  374. msg = "Limit must be greater than 0"
  375. with pytest.raises(ValueError, match=msg):
  376. df.fillna(0, limit=-5)
  377. @pytest.mark.parametrize("type", [int, float])
  378. def test_fillna_integer_limit(self, type):
  379. df = DataFrame(np.random.randn(10, 4)).astype(type)
  380. msg = "Limit must be an integer"
  381. with pytest.raises(ValueError, match=msg):
  382. df.fillna(0, limit=0.5)
  383. def test_fillna_inplace(self):
  384. df = DataFrame(np.random.randn(10, 4))
  385. df.loc[:4, 1] = np.nan
  386. df.loc[-4:, 3] = np.nan
  387. expected = df.fillna(value=0)
  388. assert expected is not df
  389. df.fillna(value=0, inplace=True)
  390. tm.assert_frame_equal(df, expected)
  391. expected = df.fillna(value={0: 0}, inplace=True)
  392. assert expected is None
  393. df.loc[:4, 1] = np.nan
  394. df.loc[-4:, 3] = np.nan
  395. expected = df.fillna(method="ffill")
  396. assert expected is not df
  397. df.fillna(method="ffill", inplace=True)
  398. tm.assert_frame_equal(df, expected)
  399. def test_fillna_dict_series(self):
  400. df = DataFrame(
  401. {
  402. "a": [np.nan, 1, 2, np.nan, np.nan],
  403. "b": [1, 2, 3, np.nan, np.nan],
  404. "c": [np.nan, 1, 2, 3, 4],
  405. }
  406. )
  407. result = df.fillna({"a": 0, "b": 5})
  408. expected = df.copy()
  409. expected["a"] = expected["a"].fillna(0)
  410. expected["b"] = expected["b"].fillna(5)
  411. tm.assert_frame_equal(result, expected)
  412. # it works
  413. result = df.fillna({"a": 0, "b": 5, "d": 7})
  414. # Series treated same as dict
  415. result = df.fillna(df.max())
  416. expected = df.fillna(df.max().to_dict())
  417. tm.assert_frame_equal(result, expected)
  418. # disable this for now
  419. with pytest.raises(NotImplementedError, match="column by column"):
  420. df.fillna(df.max(1), axis=1)
  421. def test_fillna_dataframe(self):
  422. # GH#8377
  423. df = DataFrame(
  424. {
  425. "a": [np.nan, 1, 2, np.nan, np.nan],
  426. "b": [1, 2, 3, np.nan, np.nan],
  427. "c": [np.nan, 1, 2, 3, 4],
  428. },
  429. index=list("VWXYZ"),
  430. )
  431. # df2 may have different index and columns
  432. df2 = DataFrame(
  433. {
  434. "a": [np.nan, 10, 20, 30, 40],
  435. "b": [50, 60, 70, 80, 90],
  436. "foo": ["bar"] * 5,
  437. },
  438. index=list("VWXuZ"),
  439. )
  440. result = df.fillna(df2)
  441. # only those columns and indices which are shared get filled
  442. expected = DataFrame(
  443. {
  444. "a": [np.nan, 1, 2, np.nan, 40],
  445. "b": [1, 2, 3, np.nan, 90],
  446. "c": [np.nan, 1, 2, 3, 4],
  447. },
  448. index=list("VWXYZ"),
  449. )
  450. tm.assert_frame_equal(result, expected)
  451. def test_fillna_columns(self):
  452. arr = np.random.randn(10, 10)
  453. arr[:, ::2] = np.nan
  454. df = DataFrame(arr)
  455. result = df.fillna(method="ffill", axis=1)
  456. expected = df.T.fillna(method="pad").T
  457. tm.assert_frame_equal(result, expected)
  458. df.insert(6, "foo", 5)
  459. result = df.fillna(method="ffill", axis=1)
  460. expected = df.astype(float).fillna(method="ffill", axis=1)
  461. tm.assert_frame_equal(result, expected)
  462. def test_fillna_invalid_method(self, float_frame):
  463. with pytest.raises(ValueError, match="ffil"):
  464. float_frame.fillna(method="ffil")
  465. def test_fillna_invalid_value(self, float_frame):
  466. # list
  467. msg = '"value" parameter must be a scalar or dict, but you passed a "{}"'
  468. with pytest.raises(TypeError, match=msg.format("list")):
  469. float_frame.fillna([1, 2])
  470. # tuple
  471. with pytest.raises(TypeError, match=msg.format("tuple")):
  472. float_frame.fillna((1, 2))
  473. # frame with series
  474. msg = (
  475. '"value" parameter must be a scalar, dict or Series, but you '
  476. 'passed a "DataFrame"'
  477. )
  478. with pytest.raises(TypeError, match=msg):
  479. float_frame.iloc[:, 0].fillna(float_frame)
  480. def test_fillna_col_reordering(self):
  481. cols = ["COL." + str(i) for i in range(5, 0, -1)]
  482. data = np.random.rand(20, 5)
  483. df = DataFrame(index=range(20), columns=cols, data=data)
  484. filled = df.fillna(method="ffill")
  485. assert df.columns.tolist() == filled.columns.tolist()
  486. def test_fill_corner(self, float_frame, float_string_frame):
  487. mf = float_string_frame
  488. mf.loc[mf.index[5:20], "foo"] = np.nan
  489. mf.loc[mf.index[-10:], "A"] = np.nan
  490. filled = float_string_frame.fillna(value=0)
  491. assert (filled.loc[filled.index[5:20], "foo"] == 0).all()
  492. del float_string_frame["foo"]
  493. empty_float = float_frame.reindex(columns=[])
  494. # TODO(wesm): unused?
  495. result = empty_float.fillna(value=0) # noqa
  496. def test_fillna_downcast_dict(self):
  497. # GH#40809
  498. df = DataFrame({"col1": [1, np.nan]})
  499. result = df.fillna({"col1": 2}, downcast={"col1": "int64"})
  500. expected = DataFrame({"col1": [1, 2]})
  501. tm.assert_frame_equal(result, expected)
  502. def test_fillna_with_columns_and_limit(self):
  503. # GH40989
  504. df = DataFrame(
  505. [
  506. [np.nan, 2, np.nan, 0],
  507. [3, 4, np.nan, 1],
  508. [np.nan, np.nan, np.nan, 5],
  509. [np.nan, 3, np.nan, 4],
  510. ],
  511. columns=list("ABCD"),
  512. )
  513. result = df.fillna(axis=1, value=100, limit=1)
  514. result2 = df.fillna(axis=1, value=100, limit=2)
  515. expected = DataFrame(
  516. {
  517. "A": Series([100, 3, 100, 100], dtype="float64"),
  518. "B": [2, 4, np.nan, 3],
  519. "C": [np.nan, 100, np.nan, np.nan],
  520. "D": Series([0, 1, 5, 4], dtype="float64"),
  521. },
  522. index=[0, 1, 2, 3],
  523. )
  524. expected2 = DataFrame(
  525. {
  526. "A": Series([100, 3, 100, 100], dtype="float64"),
  527. "B": Series([2, 4, 100, 3], dtype="float64"),
  528. "C": [100, 100, np.nan, 100],
  529. "D": Series([0, 1, 5, 4], dtype="float64"),
  530. },
  531. index=[0, 1, 2, 3],
  532. )
  533. tm.assert_frame_equal(result, expected)
  534. tm.assert_frame_equal(result2, expected2)
  535. def test_fillna_datetime_inplace(self):
  536. # GH#48863
  537. df = DataFrame(
  538. {
  539. "date1": to_datetime(["2018-05-30", None]),
  540. "date2": to_datetime(["2018-09-30", None]),
  541. }
  542. )
  543. expected = df.copy()
  544. df.fillna(np.nan, inplace=True)
  545. tm.assert_frame_equal(df, expected)
  546. def test_fillna_inplace_with_columns_limit_and_value(self):
  547. # GH40989
  548. df = DataFrame(
  549. [
  550. [np.nan, 2, np.nan, 0],
  551. [3, 4, np.nan, 1],
  552. [np.nan, np.nan, np.nan, 5],
  553. [np.nan, 3, np.nan, 4],
  554. ],
  555. columns=list("ABCD"),
  556. )
  557. expected = df.fillna(axis=1, value=100, limit=1)
  558. assert expected is not df
  559. df.fillna(axis=1, value=100, limit=1, inplace=True)
  560. tm.assert_frame_equal(df, expected)
  561. @td.skip_array_manager_invalid_test
  562. @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}])
  563. def test_inplace_dict_update_view(self, val, using_copy_on_write):
  564. # GH#47188
  565. df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]})
  566. df_orig = df.copy()
  567. result_view = df[:]
  568. df.fillna(val, inplace=True)
  569. expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]})
  570. tm.assert_frame_equal(df, expected)
  571. if using_copy_on_write:
  572. tm.assert_frame_equal(result_view, df_orig)
  573. else:
  574. tm.assert_frame_equal(result_view, expected)
  575. def test_single_block_df_with_horizontal_axis(self):
  576. # GH 47713
  577. df = DataFrame(
  578. {
  579. "col1": [5, 0, np.nan, 10, np.nan],
  580. "col2": [7, np.nan, np.nan, 5, 3],
  581. "col3": [12, np.nan, 1, 2, 0],
  582. "col4": [np.nan, 1, 1, np.nan, 18],
  583. }
  584. )
  585. result = df.fillna(50, limit=1, axis=1)
  586. expected = DataFrame(
  587. [
  588. [5.0, 7.0, 12.0, 50.0],
  589. [0.0, 50.0, np.nan, 1.0],
  590. [50.0, np.nan, 1.0, 1.0],
  591. [10.0, 5.0, 2.0, 50.0],
  592. [50.0, 3.0, 0.0, 18.0],
  593. ],
  594. columns=["col1", "col2", "col3", "col4"],
  595. )
  596. tm.assert_frame_equal(result, expected)
  597. def test_fillna_with_multi_index_frame(self):
  598. # GH 47649
  599. pdf = DataFrame(
  600. {
  601. ("x", "a"): [np.nan, 2.0, 3.0],
  602. ("x", "b"): [1.0, 2.0, np.nan],
  603. ("y", "c"): [1.0, 2.0, np.nan],
  604. }
  605. )
  606. expected = DataFrame(
  607. {
  608. ("x", "a"): [-1.0, 2.0, 3.0],
  609. ("x", "b"): [1.0, 2.0, -1.0],
  610. ("y", "c"): [1.0, 2.0, np.nan],
  611. }
  612. )
  613. tm.assert_frame_equal(pdf.fillna({"x": -1}), expected)
  614. tm.assert_frame_equal(pdf.fillna({"x": -1, ("x", "b"): -2}), expected)
  615. expected = DataFrame(
  616. {
  617. ("x", "a"): [-1.0, 2.0, 3.0],
  618. ("x", "b"): [1.0, 2.0, -2.0],
  619. ("y", "c"): [1.0, 2.0, np.nan],
  620. }
  621. )
  622. tm.assert_frame_equal(pdf.fillna({("x", "b"): -2, "x": -1}), expected)
  623. def test_fillna_nonconsolidated_frame():
  624. # https://github.com/pandas-dev/pandas/issues/36495
  625. df = DataFrame(
  626. [
  627. [1, 1, 1, 1.0],
  628. [2, 2, 2, 2.0],
  629. [3, 3, 3, 3.0],
  630. ],
  631. columns=["i1", "i2", "i3", "f1"],
  632. )
  633. df_nonconsol = df.pivot(index="i1", columns="i2")
  634. result = df_nonconsol.fillna(0)
  635. assert result.isna().sum().sum() == 0
  636. def test_fillna_nones_inplace():
  637. # GH 48480
  638. df = DataFrame(
  639. [[None, None], [None, None]],
  640. columns=["A", "B"],
  641. )
  642. with tm.assert_produces_warning(False):
  643. df.fillna(value={"A": 1, "B": 2}, inplace=True)
  644. expected = DataFrame([[1, 2], [1, 2]], columns=["A", "B"])
  645. tm.assert_frame_equal(df, expected)
  646. @pytest.mark.parametrize("func", ["pad", "backfill"])
  647. def test_pad_backfill_deprecated(func):
  648. # GH#33396
  649. df = DataFrame({"a": [1, 2, 3]})
  650. with tm.assert_produces_warning(FutureWarning):
  651. getattr(df, func)()