123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977 |
- from datetime import (
- datetime,
- timedelta,
- timezone,
- )
- import numpy as np
- import pytest
- import pytz
- from pandas import (
- Categorical,
- DataFrame,
- DatetimeIndex,
- NaT,
- Period,
- Series,
- Timedelta,
- Timestamp,
- date_range,
- isna,
- )
- import pandas._testing as tm
- from pandas.core.arrays import period_array
- class TestSeriesFillNA:
- def test_fillna_nat(self):
- series = Series([0, 1, 2, NaT._value], dtype="M8[ns]")
- filled = series.fillna(method="pad")
- filled2 = series.fillna(value=series.values[2])
- expected = series.copy()
- expected.iloc[3] = expected.iloc[2]
- tm.assert_series_equal(filled, expected)
- tm.assert_series_equal(filled2, expected)
- df = DataFrame({"A": series})
- filled = df.fillna(method="pad")
- filled2 = df.fillna(value=series.values[2])
- expected = DataFrame({"A": expected})
- tm.assert_frame_equal(filled, expected)
- tm.assert_frame_equal(filled2, expected)
- series = Series([NaT._value, 0, 1, 2], dtype="M8[ns]")
- filled = series.fillna(method="bfill")
- filled2 = series.fillna(value=series[1])
- expected = series.copy()
- expected[0] = expected[1]
- tm.assert_series_equal(filled, expected)
- tm.assert_series_equal(filled2, expected)
- df = DataFrame({"A": series})
- filled = df.fillna(method="bfill")
- filled2 = df.fillna(value=series[1])
- expected = DataFrame({"A": expected})
- tm.assert_frame_equal(filled, expected)
- tm.assert_frame_equal(filled2, expected)
- def test_fillna_value_or_method(self, datetime_series):
- msg = "Cannot specify both 'value' and 'method'"
- with pytest.raises(ValueError, match=msg):
- datetime_series.fillna(value=0, method="ffill")
- def test_fillna(self):
- ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
- tm.assert_series_equal(ts, ts.fillna(method="ffill"))
- ts[2] = np.NaN
- exp = Series([0.0, 1.0, 1.0, 3.0, 4.0], index=ts.index)
- tm.assert_series_equal(ts.fillna(method="ffill"), exp)
- exp = Series([0.0, 1.0, 3.0, 3.0, 4.0], index=ts.index)
- tm.assert_series_equal(ts.fillna(method="backfill"), exp)
- exp = Series([0.0, 1.0, 5.0, 3.0, 4.0], index=ts.index)
- tm.assert_series_equal(ts.fillna(value=5), exp)
- msg = "Must specify a fill 'value' or 'method'"
- with pytest.raises(ValueError, match=msg):
- ts.fillna()
- def test_fillna_nonscalar(self):
- # GH#5703
- s1 = Series([np.nan])
- s2 = Series([1])
- result = s1.fillna(s2)
- expected = Series([1.0])
- tm.assert_series_equal(result, expected)
- result = s1.fillna({})
- tm.assert_series_equal(result, s1)
- result = s1.fillna(Series((), dtype=object))
- tm.assert_series_equal(result, s1)
- result = s2.fillna(s1)
- tm.assert_series_equal(result, s2)
- result = s1.fillna({0: 1})
- tm.assert_series_equal(result, expected)
- result = s1.fillna({1: 1})
- tm.assert_series_equal(result, Series([np.nan]))
- result = s1.fillna({0: 1, 1: 1})
- tm.assert_series_equal(result, expected)
- result = s1.fillna(Series({0: 1, 1: 1}))
- tm.assert_series_equal(result, expected)
- result = s1.fillna(Series({0: 1, 1: 1}, index=[4, 5]))
- tm.assert_series_equal(result, s1)
- def test_fillna_aligns(self):
- s1 = Series([0, 1, 2], list("abc"))
- s2 = Series([0, np.nan, 2], list("bac"))
- result = s2.fillna(s1)
- expected = Series([0, 0, 2.0], list("bac"))
- tm.assert_series_equal(result, expected)
- def test_fillna_limit(self):
- ser = Series(np.nan, index=[0, 1, 2])
- result = ser.fillna(999, limit=1)
- expected = Series([999, np.nan, np.nan], index=[0, 1, 2])
- tm.assert_series_equal(result, expected)
- result = ser.fillna(999, limit=2)
- expected = Series([999, 999, np.nan], index=[0, 1, 2])
- tm.assert_series_equal(result, expected)
- def test_fillna_dont_cast_strings(self):
- # GH#9043
- # make sure a string representation of int/float values can be filled
- # correctly without raising errors or being converted
- vals = ["0", "1.5", "-0.3"]
- for val in vals:
- ser = Series([0, 1, np.nan, np.nan, 4], dtype="float64")
- result = ser.fillna(val)
- expected = Series([0, 1, val, val, 4], dtype="object")
- tm.assert_series_equal(result, expected)
- def test_fillna_consistency(self):
- # GH#16402
- # fillna with a tz aware to a tz-naive, should result in object
- ser = Series([Timestamp("20130101"), NaT])
- result = ser.fillna(Timestamp("20130101", tz="US/Eastern"))
- expected = Series(
- [Timestamp("20130101"), Timestamp("2013-01-01", tz="US/Eastern")],
- dtype="object",
- )
- tm.assert_series_equal(result, expected)
- result = ser.where([True, False], Timestamp("20130101", tz="US/Eastern"))
- tm.assert_series_equal(result, expected)
- result = ser.where([True, False], Timestamp("20130101", tz="US/Eastern"))
- tm.assert_series_equal(result, expected)
- # with a non-datetime
- result = ser.fillna("foo")
- expected = Series([Timestamp("20130101"), "foo"])
- tm.assert_series_equal(result, expected)
- # assignment
- ser2 = ser.copy()
- ser2[1] = "foo"
- tm.assert_series_equal(ser2, expected)
- def test_fillna_downcast(self):
- # GH#15277
- # infer int64 from float64
- ser = Series([1.0, np.nan])
- result = ser.fillna(0, downcast="infer")
- expected = Series([1, 0])
- tm.assert_series_equal(result, expected)
- # infer int64 from float64 when fillna value is a dict
- ser = Series([1.0, np.nan])
- result = ser.fillna({1: 0}, downcast="infer")
- expected = Series([1, 0])
- tm.assert_series_equal(result, expected)
- def test_fillna_downcast_infer_objects_to_numeric(self):
- # GH#44241 if we have object-dtype, 'downcast="infer"' should
- # _actually_ infer
- arr = np.arange(5).astype(object)
- arr[3] = np.nan
- ser = Series(arr)
- res = ser.fillna(3, downcast="infer")
- expected = Series(np.arange(5), dtype=np.int64)
- tm.assert_series_equal(res, expected)
- res = ser.ffill(downcast="infer")
- expected = Series([0, 1, 2, 2, 4], dtype=np.int64)
- tm.assert_series_equal(res, expected)
- res = ser.bfill(downcast="infer")
- expected = Series([0, 1, 2, 4, 4], dtype=np.int64)
- tm.assert_series_equal(res, expected)
- # with a non-round float present, we will downcast to float64
- ser[2] = 2.5
- expected = Series([0, 1, 2.5, 3, 4], dtype=np.float64)
- res = ser.fillna(3, downcast="infer")
- tm.assert_series_equal(res, expected)
- res = ser.ffill(downcast="infer")
- expected = Series([0, 1, 2.5, 2.5, 4], dtype=np.float64)
- tm.assert_series_equal(res, expected)
- res = ser.bfill(downcast="infer")
- expected = Series([0, 1, 2.5, 4, 4], dtype=np.float64)
- tm.assert_series_equal(res, expected)
- def test_timedelta_fillna(self, frame_or_series):
- # GH#3371
- ser = Series(
- [
- Timestamp("20130101"),
- Timestamp("20130101"),
- Timestamp("20130102"),
- Timestamp("20130103 9:01:01"),
- ]
- )
- td = ser.diff()
- obj = frame_or_series(td)
- # reg fillna
- result = obj.fillna(Timedelta(seconds=0))
- expected = Series(
- [
- timedelta(0),
- timedelta(0),
- timedelta(1),
- timedelta(days=1, seconds=9 * 3600 + 60 + 1),
- ]
- )
- expected = frame_or_series(expected)
- tm.assert_equal(result, expected)
- # GH#45746 pre-1.? ints were interpreted as seconds. then that was
- # deprecated and changed to raise. In 2.0 it casts to common dtype,
- # consistent with every other dtype's behavior
- res = obj.fillna(1)
- expected = obj.astype(object).fillna(1)
- tm.assert_equal(res, expected)
- result = obj.fillna(Timedelta(seconds=1))
- expected = Series(
- [
- timedelta(seconds=1),
- timedelta(0),
- timedelta(1),
- timedelta(days=1, seconds=9 * 3600 + 60 + 1),
- ]
- )
- expected = frame_or_series(expected)
- tm.assert_equal(result, expected)
- result = obj.fillna(timedelta(days=1, seconds=1))
- expected = Series(
- [
- timedelta(days=1, seconds=1),
- timedelta(0),
- timedelta(1),
- timedelta(days=1, seconds=9 * 3600 + 60 + 1),
- ]
- )
- expected = frame_or_series(expected)
- tm.assert_equal(result, expected)
- result = obj.fillna(np.timedelta64(10**9))
- expected = Series(
- [
- timedelta(seconds=1),
- timedelta(0),
- timedelta(1),
- timedelta(days=1, seconds=9 * 3600 + 60 + 1),
- ]
- )
- expected = frame_or_series(expected)
- tm.assert_equal(result, expected)
- result = obj.fillna(NaT)
- expected = Series(
- [
- NaT,
- timedelta(0),
- timedelta(1),
- timedelta(days=1, seconds=9 * 3600 + 60 + 1),
- ],
- dtype="m8[ns]",
- )
- expected = frame_or_series(expected)
- tm.assert_equal(result, expected)
- # ffill
- td[2] = np.nan
- obj = frame_or_series(td)
- result = obj.ffill()
- expected = td.fillna(Timedelta(seconds=0))
- expected[0] = np.nan
- expected = frame_or_series(expected)
- tm.assert_equal(result, expected)
- # bfill
- td[2] = np.nan
- obj = frame_or_series(td)
- result = obj.bfill()
- expected = td.fillna(Timedelta(seconds=0))
- expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
- expected = frame_or_series(expected)
- tm.assert_equal(result, expected)
- def test_datetime64_fillna(self):
- ser = Series(
- [
- Timestamp("20130101"),
- Timestamp("20130101"),
- Timestamp("20130102"),
- Timestamp("20130103 9:01:01"),
- ]
- )
- ser[2] = np.nan
- # ffill
- result = ser.ffill()
- expected = Series(
- [
- Timestamp("20130101"),
- Timestamp("20130101"),
- Timestamp("20130101"),
- Timestamp("20130103 9:01:01"),
- ]
- )
- tm.assert_series_equal(result, expected)
- # bfill
- result = ser.bfill()
- expected = Series(
- [
- Timestamp("20130101"),
- Timestamp("20130101"),
- Timestamp("20130103 9:01:01"),
- Timestamp("20130103 9:01:01"),
- ]
- )
- tm.assert_series_equal(result, expected)
- def test_datetime64_fillna_backfill(self):
- # GH#6587
- # make sure that we are treating as integer when filling
- ser = Series([NaT, NaT, "2013-08-05 15:30:00.000001"], dtype="M8[ns]")
- expected = Series(
- [
- "2013-08-05 15:30:00.000001",
- "2013-08-05 15:30:00.000001",
- "2013-08-05 15:30:00.000001",
- ],
- dtype="M8[ns]",
- )
- result = ser.fillna(method="backfill")
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
- def test_datetime64_tz_fillna(self, tz):
- # DatetimeLikeBlock
- ser = Series(
- [
- Timestamp("2011-01-01 10:00"),
- NaT,
- Timestamp("2011-01-03 10:00"),
- NaT,
- ]
- )
- null_loc = Series([False, True, False, True])
- result = ser.fillna(Timestamp("2011-01-02 10:00"))
- expected = Series(
- [
- Timestamp("2011-01-01 10:00"),
- Timestamp("2011-01-02 10:00"),
- Timestamp("2011-01-03 10:00"),
- Timestamp("2011-01-02 10:00"),
- ]
- )
- tm.assert_series_equal(expected, result)
- # check s is not changed
- tm.assert_series_equal(isna(ser), null_loc)
- result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz))
- expected = Series(
- [
- Timestamp("2011-01-01 10:00"),
- Timestamp("2011-01-02 10:00", tz=tz),
- Timestamp("2011-01-03 10:00"),
- Timestamp("2011-01-02 10:00", tz=tz),
- ]
- )
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- result = ser.fillna("AAA")
- expected = Series(
- [
- Timestamp("2011-01-01 10:00"),
- "AAA",
- Timestamp("2011-01-03 10:00"),
- "AAA",
- ],
- dtype=object,
- )
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- result = ser.fillna(
- {
- 1: Timestamp("2011-01-02 10:00", tz=tz),
- 3: Timestamp("2011-01-04 10:00"),
- }
- )
- expected = Series(
- [
- Timestamp("2011-01-01 10:00"),
- Timestamp("2011-01-02 10:00", tz=tz),
- Timestamp("2011-01-03 10:00"),
- Timestamp("2011-01-04 10:00"),
- ]
- )
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- result = ser.fillna(
- {1: Timestamp("2011-01-02 10:00"), 3: Timestamp("2011-01-04 10:00")}
- )
- expected = Series(
- [
- Timestamp("2011-01-01 10:00"),
- Timestamp("2011-01-02 10:00"),
- Timestamp("2011-01-03 10:00"),
- Timestamp("2011-01-04 10:00"),
- ]
- )
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- # DatetimeTZBlock
- idx = DatetimeIndex(["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz)
- ser = Series(idx)
- assert ser.dtype == f"datetime64[ns, {tz}]"
- tm.assert_series_equal(isna(ser), null_loc)
- result = ser.fillna(Timestamp("2011-01-02 10:00"))
- expected = Series(
- [
- Timestamp("2011-01-01 10:00", tz=tz),
- Timestamp("2011-01-02 10:00"),
- Timestamp("2011-01-03 10:00", tz=tz),
- Timestamp("2011-01-02 10:00"),
- ]
- )
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz))
- idx = DatetimeIndex(
- [
- "2011-01-01 10:00",
- "2011-01-02 10:00",
- "2011-01-03 10:00",
- "2011-01-02 10:00",
- ],
- tz=tz,
- )
- expected = Series(idx)
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime())
- idx = DatetimeIndex(
- [
- "2011-01-01 10:00",
- "2011-01-02 10:00",
- "2011-01-03 10:00",
- "2011-01-02 10:00",
- ],
- tz=tz,
- )
- expected = Series(idx)
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- result = ser.fillna("AAA")
- expected = Series(
- [
- Timestamp("2011-01-01 10:00", tz=tz),
- "AAA",
- Timestamp("2011-01-03 10:00", tz=tz),
- "AAA",
- ],
- dtype=object,
- )
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- result = ser.fillna(
- {
- 1: Timestamp("2011-01-02 10:00", tz=tz),
- 3: Timestamp("2011-01-04 10:00"),
- }
- )
- expected = Series(
- [
- Timestamp("2011-01-01 10:00", tz=tz),
- Timestamp("2011-01-02 10:00", tz=tz),
- Timestamp("2011-01-03 10:00", tz=tz),
- Timestamp("2011-01-04 10:00"),
- ]
- )
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- result = ser.fillna(
- {
- 1: Timestamp("2011-01-02 10:00", tz=tz),
- 3: Timestamp("2011-01-04 10:00", tz=tz),
- }
- )
- expected = Series(
- [
- Timestamp("2011-01-01 10:00", tz=tz),
- Timestamp("2011-01-02 10:00", tz=tz),
- Timestamp("2011-01-03 10:00", tz=tz),
- Timestamp("2011-01-04 10:00", tz=tz),
- ]
- )
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- # filling with a naive/other zone, coerce to object
- result = ser.fillna(Timestamp("20130101"))
- expected = Series(
- [
- Timestamp("2011-01-01 10:00", tz=tz),
- Timestamp("2013-01-01"),
- Timestamp("2011-01-03 10:00", tz=tz),
- Timestamp("2013-01-01"),
- ]
- )
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- # pre-2.0 fillna with mixed tzs would cast to object, in 2.0
- # it retains dtype.
- result = ser.fillna(Timestamp("20130101", tz="US/Pacific"))
- expected = Series(
- [
- Timestamp("2011-01-01 10:00", tz=tz),
- Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz),
- Timestamp("2011-01-03 10:00", tz=tz),
- Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz),
- ]
- )
- tm.assert_series_equal(expected, result)
- tm.assert_series_equal(isna(ser), null_loc)
- def test_fillna_dt64tz_with_method(self):
- # with timezone
- # GH#15855
- ser = Series([Timestamp("2012-11-11 00:00:00+01:00"), NaT])
- exp = Series(
- [
- Timestamp("2012-11-11 00:00:00+01:00"),
- Timestamp("2012-11-11 00:00:00+01:00"),
- ]
- )
- tm.assert_series_equal(ser.fillna(method="pad"), exp)
- ser = Series([NaT, Timestamp("2012-11-11 00:00:00+01:00")])
- exp = Series(
- [
- Timestamp("2012-11-11 00:00:00+01:00"),
- Timestamp("2012-11-11 00:00:00+01:00"),
- ]
- )
- tm.assert_series_equal(ser.fillna(method="bfill"), exp)
- def test_fillna_pytimedelta(self):
- # GH#8209
- ser = Series([np.nan, Timedelta("1 days")], index=["A", "B"])
- result = ser.fillna(timedelta(1))
- expected = Series(Timedelta("1 days"), index=["A", "B"])
- tm.assert_series_equal(result, expected)
- def test_fillna_period(self):
- # GH#13737
- ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
- res = ser.fillna(Period("2012-01", freq="M"))
- exp = Series([Period("2011-01", freq="M"), Period("2012-01", freq="M")])
- tm.assert_series_equal(res, exp)
- assert res.dtype == "Period[M]"
- def test_fillna_dt64_timestamp(self, frame_or_series):
- ser = Series(
- [
- Timestamp("20130101"),
- Timestamp("20130101"),
- Timestamp("20130102"),
- Timestamp("20130103 9:01:01"),
- ]
- )
- ser[2] = np.nan
- obj = frame_or_series(ser)
- # reg fillna
- result = obj.fillna(Timestamp("20130104"))
- expected = Series(
- [
- Timestamp("20130101"),
- Timestamp("20130101"),
- Timestamp("20130104"),
- Timestamp("20130103 9:01:01"),
- ]
- )
- expected = frame_or_series(expected)
- tm.assert_equal(result, expected)
- result = obj.fillna(NaT)
- expected = obj
- tm.assert_equal(result, expected)
- def test_fillna_dt64_non_nao(self):
- # GH#27419
- ser = Series([Timestamp("2010-01-01"), NaT, Timestamp("2000-01-01")])
- val = np.datetime64("1975-04-05", "ms")
- result = ser.fillna(val)
- expected = Series(
- [Timestamp("2010-01-01"), Timestamp("1975-04-05"), Timestamp("2000-01-01")]
- )
- tm.assert_series_equal(result, expected)
- def test_fillna_numeric_inplace(self):
- x = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"])
- y = x.copy()
- return_value = y.fillna(value=0, inplace=True)
- assert return_value is None
- expected = x.fillna(value=0)
- tm.assert_series_equal(y, expected)
- # ---------------------------------------------------------------
- # CategoricalDtype
- @pytest.mark.parametrize(
- "fill_value, expected_output",
- [
- ("a", ["a", "a", "b", "a", "a"]),
- ({1: "a", 3: "b", 4: "b"}, ["a", "a", "b", "b", "b"]),
- ({1: "a"}, ["a", "a", "b", np.nan, np.nan]),
- ({1: "a", 3: "b"}, ["a", "a", "b", "b", np.nan]),
- (Series("a"), ["a", np.nan, "b", np.nan, np.nan]),
- (Series("a", index=[1]), ["a", "a", "b", np.nan, np.nan]),
- (Series({1: "a", 3: "b"}), ["a", "a", "b", "b", np.nan]),
- (Series(["a", "b"], index=[3, 4]), ["a", np.nan, "b", "a", "b"]),
- ],
- )
- def test_fillna_categorical(self, fill_value, expected_output):
- # GH#17033
- # Test fillna for a Categorical series
- data = ["a", np.nan, "b", np.nan, np.nan]
- ser = Series(Categorical(data, categories=["a", "b"]))
- exp = Series(Categorical(expected_output, categories=["a", "b"]))
- result = ser.fillna(fill_value)
- tm.assert_series_equal(result, exp)
- @pytest.mark.parametrize(
- "fill_value, expected_output",
- [
- (Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]),
- (Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]),
- (
- Series(
- Categorical(
- ["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"]
- )
- ),
- ["a", "d", "b", "d", "a"],
- ),
- ],
- )
- def test_fillna_categorical_with_new_categories(self, fill_value, expected_output):
- # GH#26215
- data = ["a", np.nan, "b", np.nan, np.nan]
- ser = Series(Categorical(data, categories=["a", "b", "c", "d", "e"]))
- exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"]))
- result = ser.fillna(fill_value)
- tm.assert_series_equal(result, exp)
- def test_fillna_categorical_raises(self):
- data = ["a", np.nan, "b", np.nan, np.nan]
- ser = Series(Categorical(data, categories=["a", "b"]))
- cat = ser._values
- msg = "Cannot setitem on a Categorical with a new category"
- with pytest.raises(TypeError, match=msg):
- ser.fillna("d")
- msg2 = "Length of 'value' does not match."
- with pytest.raises(ValueError, match=msg2):
- cat.fillna(Series("d"))
- with pytest.raises(TypeError, match=msg):
- ser.fillna({1: "d", 3: "a"})
- msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
- with pytest.raises(TypeError, match=msg):
- ser.fillna(["a", "b"])
- msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
- with pytest.raises(TypeError, match=msg):
- ser.fillna(("a", "b"))
- msg = (
- '"value" parameter must be a scalar, dict '
- 'or Series, but you passed a "DataFrame"'
- )
- with pytest.raises(TypeError, match=msg):
- ser.fillna(DataFrame({1: ["a"], 3: ["b"]}))
- @pytest.mark.parametrize("dtype", [float, "float32", "float64"])
- @pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES)
- @pytest.mark.parametrize("scalar", [True, False])
- def test_fillna_float_casting(self, dtype, fill_type, scalar):
- # GH-43424
- ser = Series([np.nan, 1.2], dtype=dtype)
- fill_values = Series([2, 2], dtype=fill_type)
- if scalar:
- fill_values = fill_values.dtype.type(2)
- result = ser.fillna(fill_values)
- expected = Series([2.0, 1.2], dtype=dtype)
- tm.assert_series_equal(result, expected)
- ser = Series([np.nan, 1.2], dtype=dtype)
- mask = ser.isna().to_numpy()
- ser[mask] = fill_values
- tm.assert_series_equal(ser, expected)
- ser = Series([np.nan, 1.2], dtype=dtype)
- ser.mask(mask, fill_values, inplace=True)
- tm.assert_series_equal(ser, expected)
- ser = Series([np.nan, 1.2], dtype=dtype)
- res = ser.where(~mask, fill_values)
- tm.assert_series_equal(res, expected)
- def test_fillna_f32_upcast_with_dict(self):
- # GH-43424
- ser = Series([np.nan, 1.2], dtype=np.float32)
- result = ser.fillna({0: 1})
- expected = Series([1.0, 1.2], dtype=np.float32)
- tm.assert_series_equal(result, expected)
- # ---------------------------------------------------------------
- # Invalid Usages
- def test_fillna_invalid_method(self, datetime_series):
- try:
- datetime_series.fillna(method="ffil")
- except ValueError as inst:
- assert "ffil" in str(inst)
- def test_fillna_listlike_invalid(self):
- ser = Series(np.random.randint(-100, 100, 50))
- msg = '"value" parameter must be a scalar or dict, but you passed a "list"'
- with pytest.raises(TypeError, match=msg):
- ser.fillna([1, 2])
- msg = '"value" parameter must be a scalar or dict, but you passed a "tuple"'
- with pytest.raises(TypeError, match=msg):
- ser.fillna((1, 2))
- def test_fillna_method_and_limit_invalid(self):
- # related GH#9217, make sure limit is an int and greater than 0
- ser = Series([1, 2, 3, None])
- msg = "|".join(
- [
- r"Cannot specify both 'value' and 'method'\.",
- "Limit must be greater than 0",
- "Limit must be an integer",
- ]
- )
- for limit in [-1, 0, 1.0, 2.0]:
- for method in ["backfill", "bfill", "pad", "ffill", None]:
- with pytest.raises(ValueError, match=msg):
- ser.fillna(1, limit=limit, method=method)
- def test_fillna_datetime64_with_timezone_tzinfo(self):
- # https://github.com/pandas-dev/pandas/issues/38851
- # different tzinfos representing UTC treated as equal
- ser = Series(date_range("2020", periods=3, tz="UTC"))
- expected = ser.copy()
- ser[1] = NaT
- result = ser.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc))
- tm.assert_series_equal(result, expected)
- # pre-2.0 we cast to object with mixed tzs, in 2.0 we retain dtype
- ts = Timestamp("2000-01-01", tz="US/Pacific")
- ser2 = Series(ser._values.tz_convert("dateutil/US/Pacific"))
- assert ser2.dtype.kind == "M"
- result = ser2.fillna(ts)
- expected = Series(
- [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]],
- dtype=ser2.dtype,
- )
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize(
- "input, input_fillna, expected_data, expected_categories",
- [
- (["A", "B", None, "A"], "B", ["A", "B", "B", "A"], ["A", "B"]),
- (["A", "B", np.nan, "A"], "B", ["A", "B", "B", "A"], ["A", "B"]),
- ],
- )
- def test_fillna_categorical_accept_same_type(
- self, input, input_fillna, expected_data, expected_categories
- ):
- # GH32414
- cat = Categorical(input)
- ser = Series(cat).fillna(input_fillna)
- filled = cat.fillna(ser)
- result = cat.fillna(filled)
- expected = Categorical(expected_data, categories=expected_categories)
- tm.assert_categorical_equal(result, expected)
- class TestFillnaPad:
- def test_fillna_bug(self):
- ser = Series([np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"])
- filled = ser.fillna(method="ffill")
- expected = Series([np.nan, 1.0, 1.0, 3.0, 3.0], ser.index)
- tm.assert_series_equal(filled, expected)
- filled = ser.fillna(method="bfill")
- expected = Series([1.0, 1.0, 3.0, 3.0, np.nan], ser.index)
- tm.assert_series_equal(filled, expected)
- def test_ffill(self):
- ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
- ts[2] = np.NaN
- tm.assert_series_equal(ts.ffill(), ts.fillna(method="ffill"))
- def test_ffill_mixed_dtypes_without_missing_data(self):
- # GH#14956
- series = Series([datetime(2015, 1, 1, tzinfo=pytz.utc), 1])
- result = series.ffill()
- tm.assert_series_equal(series, result)
- def test_bfill(self):
- ts = Series([0.0, 1.0, 2.0, 3.0, 4.0], index=tm.makeDateIndex(5))
- ts[2] = np.NaN
- tm.assert_series_equal(ts.bfill(), ts.fillna(method="bfill"))
- def test_pad_nan(self):
- x = Series(
- [np.nan, 1.0, np.nan, 3.0, np.nan], ["z", "a", "b", "c", "d"], dtype=float
- )
- return_value = x.fillna(method="pad", inplace=True)
- assert return_value is None
- expected = Series(
- [np.nan, 1.0, 1.0, 3.0, 3.0], ["z", "a", "b", "c", "d"], dtype=float
- )
- tm.assert_series_equal(x[1:], expected[1:])
- assert np.isnan(x[0]), np.isnan(expected[0])
- def test_series_fillna_limit(self):
- index = np.arange(10)
- s = Series(np.random.randn(10), index=index)
- result = s[:2].reindex(index)
- result = result.fillna(method="pad", limit=5)
- expected = s[:2].reindex(index).fillna(method="pad")
- expected[-3:] = np.nan
- tm.assert_series_equal(result, expected)
- result = s[-2:].reindex(index)
- result = result.fillna(method="bfill", limit=5)
- expected = s[-2:].reindex(index).fillna(method="backfill")
- expected[:3] = np.nan
- tm.assert_series_equal(result, expected)
- def test_series_pad_backfill_limit(self):
- index = np.arange(10)
- s = Series(np.random.randn(10), index=index)
- result = s[:2].reindex(index, method="pad", limit=5)
- expected = s[:2].reindex(index).fillna(method="pad")
- expected[-3:] = np.nan
- tm.assert_series_equal(result, expected)
- result = s[-2:].reindex(index, method="backfill", limit=5)
- expected = s[-2:].reindex(index).fillna(method="backfill")
- expected[:3] = np.nan
- tm.assert_series_equal(result, expected)
- def test_fillna_int(self):
- ser = Series(np.random.randint(-100, 100, 50))
- return_value = ser.fillna(method="ffill", inplace=True)
- assert return_value is None
- tm.assert_series_equal(ser.fillna(method="ffill", inplace=False), ser)
- def test_datetime64tz_fillna_round_issue(self):
- # GH#14872
- data = Series(
- [NaT, NaT, datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc)]
- )
- filled = data.fillna(method="bfill")
- expected = Series(
- [
- datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
- datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
- datetime(2016, 12, 12, 22, 24, 6, 100001, tzinfo=pytz.utc),
- ]
- )
- tm.assert_series_equal(filled, expected)
- def test_fillna_parr(self):
- # GH-24537
- dti = date_range(
- Timestamp.max - Timedelta(nanoseconds=10), periods=5, freq="ns"
- )
- ser = Series(dti.to_period("ns"))
- ser[2] = NaT
- arr = period_array(
- [
- Timestamp("2262-04-11 23:47:16.854775797"),
- Timestamp("2262-04-11 23:47:16.854775798"),
- Timestamp("2262-04-11 23:47:16.854775798"),
- Timestamp("2262-04-11 23:47:16.854775800"),
- Timestamp("2262-04-11 23:47:16.854775801"),
- ],
- freq="ns",
- )
- expected = Series(arr)
- filled = ser.fillna(method="pad")
- tm.assert_series_equal(filled, expected)
- @pytest.mark.parametrize("func", ["pad", "backfill"])
- def test_pad_backfill_deprecated(self, func):
- # GH#33396
- ser = Series([1, 2, 3])
- with tm.assert_produces_warning(FutureWarning):
- getattr(ser, func)()
|