123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749 |
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import (
- Categorical,
- DataFrame,
- Index,
- Series,
- )
- import pandas._testing as tm
- dt_data = [
- pd.Timestamp("2011-01-01"),
- pd.Timestamp("2011-01-02"),
- pd.Timestamp("2011-01-03"),
- ]
- tz_data = [
- pd.Timestamp("2011-01-01", tz="US/Eastern"),
- pd.Timestamp("2011-01-02", tz="US/Eastern"),
- pd.Timestamp("2011-01-03", tz="US/Eastern"),
- ]
- td_data = [
- pd.Timedelta("1 days"),
- pd.Timedelta("2 days"),
- pd.Timedelta("3 days"),
- ]
- period_data = [
- pd.Period("2011-01", freq="M"),
- pd.Period("2011-02", freq="M"),
- pd.Period("2011-03", freq="M"),
- ]
- data_dict = {
- "bool": [True, False, True],
- "int64": [1, 2, 3],
- "float64": [1.1, np.nan, 3.3],
- "category": Categorical(["X", "Y", "Z"]),
- "object": ["a", "b", "c"],
- "datetime64[ns]": dt_data,
- "datetime64[ns, US/Eastern]": tz_data,
- "timedelta64[ns]": td_data,
- "period[M]": period_data,
- }
- class TestConcatAppendCommon:
- """
- Test common dtype coercion rules between concat and append.
- """
- @pytest.fixture(params=sorted(data_dict.keys()))
- def item(self, request):
- key = request.param
- return key, data_dict[key]
- item2 = item
- def test_dtypes(self, item, index_or_series):
- # to confirm test case covers intended dtypes
- typ, vals = item
- obj = index_or_series(vals)
- if isinstance(obj, Index):
- assert obj.dtype == typ
- elif isinstance(obj, Series):
- if typ.startswith("period"):
- assert obj.dtype == "Period[M]"
- else:
- assert obj.dtype == typ
- def test_concatlike_same_dtypes(self, item):
- # GH 13660
- typ1, vals1 = item
- vals2 = vals1
- vals3 = vals1
- if typ1 == "category":
- exp_data = Categorical(list(vals1) + list(vals2))
- exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3))
- else:
- exp_data = vals1 + vals2
- exp_data3 = vals1 + vals2 + vals3
- # ----- Index ----- #
- # index.append
- res = Index(vals1).append(Index(vals2))
- exp = Index(exp_data)
- tm.assert_index_equal(res, exp)
- # 3 elements
- res = Index(vals1).append([Index(vals2), Index(vals3)])
- exp = Index(exp_data3)
- tm.assert_index_equal(res, exp)
- # index.append name mismatch
- i1 = Index(vals1, name="x")
- i2 = Index(vals2, name="y")
- res = i1.append(i2)
- exp = Index(exp_data)
- tm.assert_index_equal(res, exp)
- # index.append name match
- i1 = Index(vals1, name="x")
- i2 = Index(vals2, name="x")
- res = i1.append(i2)
- exp = Index(exp_data, name="x")
- tm.assert_index_equal(res, exp)
- # cannot append non-index
- with pytest.raises(TypeError, match="all inputs must be Index"):
- Index(vals1).append(vals2)
- with pytest.raises(TypeError, match="all inputs must be Index"):
- Index(vals1).append([Index(vals2), vals3])
- # ----- Series ----- #
- # series.append
- res = Series(vals1)._append(Series(vals2), ignore_index=True)
- exp = Series(exp_data)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # concat
- res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # 3 elements
- res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True)
- exp = Series(exp_data3)
- tm.assert_series_equal(res, exp)
- res = pd.concat(
- [Series(vals1), Series(vals2), Series(vals3)],
- ignore_index=True,
- )
- tm.assert_series_equal(res, exp)
- # name mismatch
- s1 = Series(vals1, name="x")
- s2 = Series(vals2, name="y")
- res = s1._append(s2, ignore_index=True)
- exp = Series(exp_data)
- tm.assert_series_equal(res, exp, check_index_type=True)
- res = pd.concat([s1, s2], ignore_index=True)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # name match
- s1 = Series(vals1, name="x")
- s2 = Series(vals2, name="x")
- res = s1._append(s2, ignore_index=True)
- exp = Series(exp_data, name="x")
- tm.assert_series_equal(res, exp, check_index_type=True)
- res = pd.concat([s1, s2], ignore_index=True)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # cannot append non-index
- msg = (
- r"cannot concatenate object of type '.+'; "
- "only Series and DataFrame objs are valid"
- )
- with pytest.raises(TypeError, match=msg):
- Series(vals1)._append(vals2)
- with pytest.raises(TypeError, match=msg):
- Series(vals1)._append([Series(vals2), vals3])
- with pytest.raises(TypeError, match=msg):
- pd.concat([Series(vals1), vals2])
- with pytest.raises(TypeError, match=msg):
- pd.concat([Series(vals1), Series(vals2), vals3])
- def test_concatlike_dtypes_coercion(self, item, item2, request):
- # GH 13660
- typ1, vals1 = item
- typ2, vals2 = item2
- vals3 = vals2
- # basically infer
- exp_index_dtype = None
- exp_series_dtype = None
- if typ1 == typ2:
- # same dtype is tested in test_concatlike_same_dtypes
- return
- elif typ1 == "category" or typ2 == "category":
- # The `vals1 + vals2` below fails bc one of these is a Categorical
- # instead of a list; we have separate dedicated tests for categorical
- return
- # specify expected dtype
- if typ1 == "bool" and typ2 in ("int64", "float64"):
- # series coerces to numeric based on numpy rule
- # index doesn't because bool is object dtype
- exp_series_dtype = typ2
- mark = pytest.mark.xfail(reason="GH#39187 casting to object")
- request.node.add_marker(mark)
- elif typ2 == "bool" and typ1 in ("int64", "float64"):
- exp_series_dtype = typ1
- mark = pytest.mark.xfail(reason="GH#39187 casting to object")
- request.node.add_marker(mark)
- elif (
- typ1 == "datetime64[ns, US/Eastern]"
- or typ2 == "datetime64[ns, US/Eastern]"
- or typ1 == "timedelta64[ns]"
- or typ2 == "timedelta64[ns]"
- ):
- exp_index_dtype = object
- exp_series_dtype = object
- exp_data = vals1 + vals2
- exp_data3 = vals1 + vals2 + vals3
- # ----- Index ----- #
- # index.append
- # GH#39817
- res = Index(vals1).append(Index(vals2))
- exp = Index(exp_data, dtype=exp_index_dtype)
- tm.assert_index_equal(res, exp)
- # 3 elements
- res = Index(vals1).append([Index(vals2), Index(vals3)])
- exp = Index(exp_data3, dtype=exp_index_dtype)
- tm.assert_index_equal(res, exp)
- # ----- Series ----- #
- # series._append
- # GH#39817
- res = Series(vals1)._append(Series(vals2), ignore_index=True)
- exp = Series(exp_data, dtype=exp_series_dtype)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # concat
- # GH#39817
- res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
- tm.assert_series_equal(res, exp, check_index_type=True)
- # 3 elements
- # GH#39817
- res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True)
- exp = Series(exp_data3, dtype=exp_series_dtype)
- tm.assert_series_equal(res, exp)
- # GH#39817
- res = pd.concat(
- [Series(vals1), Series(vals2), Series(vals3)],
- ignore_index=True,
- )
- tm.assert_series_equal(res, exp)
- def test_concatlike_common_coerce_to_pandas_object(self):
- # GH 13626
- # result must be Timestamp/Timedelta, not datetime.datetime/timedelta
- dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"])
- tdi = pd.TimedeltaIndex(["1 days", "2 days"])
- exp = Index(
- [
- pd.Timestamp("2011-01-01"),
- pd.Timestamp("2011-01-02"),
- pd.Timedelta("1 days"),
- pd.Timedelta("2 days"),
- ]
- )
- res = dti.append(tdi)
- tm.assert_index_equal(res, exp)
- assert isinstance(res[0], pd.Timestamp)
- assert isinstance(res[-1], pd.Timedelta)
- dts = Series(dti)
- tds = Series(tdi)
- res = dts._append(tds)
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- assert isinstance(res.iloc[0], pd.Timestamp)
- assert isinstance(res.iloc[-1], pd.Timedelta)
- res = pd.concat([dts, tds])
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- assert isinstance(res.iloc[0], pd.Timestamp)
- assert isinstance(res.iloc[-1], pd.Timedelta)
- def test_concatlike_datetimetz(self, tz_aware_fixture):
- tz = tz_aware_fixture
- # GH 7795
- dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
- dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz)
- exp = pd.DatetimeIndex(
- ["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz
- )
- res = dti1.append(dti2)
- tm.assert_index_equal(res, exp)
- dts1 = Series(dti1)
- dts2 = Series(dti2)
- res = dts1._append(dts2)
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([dts1, dts2])
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"])
- def test_concatlike_datetimetz_short(self, tz):
- # GH#7795
- ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz)
- ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz)
- df1 = DataFrame(0, index=ix1, columns=["A", "B"])
- df2 = DataFrame(0, index=ix2, columns=["A", "B"])
- exp_idx = pd.DatetimeIndex(
- ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"],
- tz=tz,
- )
- exp = DataFrame(0, index=exp_idx, columns=["A", "B"])
- tm.assert_frame_equal(df1._append(df2), exp)
- tm.assert_frame_equal(pd.concat([df1, df2]), exp)
- def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
- tz = tz_aware_fixture
- # GH 13660
- # different tz coerces to object
- dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
- dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"])
- exp = Index(
- [
- pd.Timestamp("2011-01-01", tz=tz),
- pd.Timestamp("2011-01-02", tz=tz),
- pd.Timestamp("2012-01-01"),
- pd.Timestamp("2012-01-02"),
- ],
- dtype=object,
- )
- res = dti1.append(dti2)
- tm.assert_index_equal(res, exp)
- dts1 = Series(dti1)
- dts2 = Series(dti2)
- res = dts1._append(dts2)
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([dts1, dts2])
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- # different tz
- dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific")
- exp = Index(
- [
- pd.Timestamp("2011-01-01", tz=tz),
- pd.Timestamp("2011-01-02", tz=tz),
- pd.Timestamp("2012-01-01", tz="US/Pacific"),
- pd.Timestamp("2012-01-02", tz="US/Pacific"),
- ],
- dtype=object,
- )
- res = dti1.append(dti3)
- tm.assert_index_equal(res, exp)
- dts1 = Series(dti1)
- dts3 = Series(dti3)
- res = dts1._append(dts3)
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([dts1, dts3])
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- def test_concatlike_common_period(self):
- # GH 13660
- pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
- pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M")
- exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M")
- res = pi1.append(pi2)
- tm.assert_index_equal(res, exp)
- ps1 = Series(pi1)
- ps2 = Series(pi2)
- res = ps1._append(ps2)
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([ps1, ps2])
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- def test_concatlike_common_period_diff_freq_to_object(self):
- # GH 13221
- pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
- pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D")
- exp = Index(
- [
- pd.Period("2011-01", freq="M"),
- pd.Period("2011-02", freq="M"),
- pd.Period("2012-01-01", freq="D"),
- pd.Period("2012-02-01", freq="D"),
- ],
- dtype=object,
- )
- res = pi1.append(pi2)
- tm.assert_index_equal(res, exp)
- ps1 = Series(pi1)
- ps2 = Series(pi2)
- res = ps1._append(ps2)
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([ps1, ps2])
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- def test_concatlike_common_period_mixed_dt_to_object(self):
- # GH 13221
- # different datetimelike
- pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
- tdi = pd.TimedeltaIndex(["1 days", "2 days"])
- exp = Index(
- [
- pd.Period("2011-01", freq="M"),
- pd.Period("2011-02", freq="M"),
- pd.Timedelta("1 days"),
- pd.Timedelta("2 days"),
- ],
- dtype=object,
- )
- res = pi1.append(tdi)
- tm.assert_index_equal(res, exp)
- ps1 = Series(pi1)
- tds = Series(tdi)
- res = ps1._append(tds)
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([ps1, tds])
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- # inverse
- exp = Index(
- [
- pd.Timedelta("1 days"),
- pd.Timedelta("2 days"),
- pd.Period("2011-01", freq="M"),
- pd.Period("2011-02", freq="M"),
- ],
- dtype=object,
- )
- res = tdi.append(pi1)
- tm.assert_index_equal(res, exp)
- ps1 = Series(pi1)
- tds = Series(tdi)
- res = tds._append(ps1)
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- res = pd.concat([tds, ps1])
- tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
- def test_concat_categorical(self):
- # GH 13524
- # same categories -> category
- s1 = Series([1, 2, np.nan], dtype="category")
- s2 = Series([2, 1, 2], dtype="category")
- exp = Series([1, 2, np.nan, 2, 1, 2], dtype="category")
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- # partially different categories => not-category
- s1 = Series([3, 2], dtype="category")
- s2 = Series([2, 1], dtype="category")
- exp = Series([3, 2, 2, 1])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- # completely different categories (same dtype) => not-category
- s1 = Series([10, 11, np.nan], dtype="category")
- s2 = Series([np.nan, 1, 3, 2], dtype="category")
- exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype=np.float64)
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- def test_union_categorical_same_categories_different_order(self):
- # https://github.com/pandas-dev/pandas/issues/19096
- a = Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"]))
- b = Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"]))
- result = pd.concat([a, b], ignore_index=True)
- expected = Series(
- Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"])
- )
- tm.assert_series_equal(result, expected)
- def test_concat_categorical_coercion(self):
- # GH 13524
- # category + not-category => not-category
- s1 = Series([1, 2, np.nan], dtype="category")
- s2 = Series([2, 1, 2])
- exp = Series([1, 2, np.nan, 2, 1, 2], dtype=np.float64)
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- # result shouldn't be affected by 1st elem dtype
- exp = Series([2, 1, 2, 1, 2, np.nan], dtype=np.float64)
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
- # all values are not in category => not-category
- s1 = Series([3, 2], dtype="category")
- s2 = Series([2, 1])
- exp = Series([3, 2, 2, 1])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- exp = Series([2, 1, 3, 2])
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
- # completely different categories => not-category
- s1 = Series([10, 11, np.nan], dtype="category")
- s2 = Series([1, 3, 2])
- exp = Series([10, 11, np.nan, 1, 3, 2], dtype=np.float64)
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- exp = Series([1, 3, 2, 10, 11, np.nan], dtype=np.float64)
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
- # different dtype => not-category
- s1 = Series([10, 11, np.nan], dtype="category")
- s2 = Series(["a", "b", "c"])
- exp = Series([10, 11, np.nan, "a", "b", "c"])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- exp = Series(["a", "b", "c", 10, 11, np.nan])
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
- # if normal series only contains NaN-likes => not-category
- s1 = Series([10, 11], dtype="category")
- s2 = Series([np.nan, np.nan, np.nan])
- exp = Series([10, 11, np.nan, np.nan, np.nan])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- exp = Series([np.nan, np.nan, np.nan, 10, 11])
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
- def test_concat_categorical_3elem_coercion(self):
- # GH 13524
- # mixed dtypes => not-category
- s1 = Series([1, 2, np.nan], dtype="category")
- s2 = Series([2, 1, 2], dtype="category")
- s3 = Series([1, 2, 1, 2, np.nan])
- exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float")
- tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
- tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
- exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float")
- tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
- # values are all in either category => not-category
- s1 = Series([4, 5, 6], dtype="category")
- s2 = Series([1, 2, 3], dtype="category")
- s3 = Series([1, 3, 4])
- exp = Series([4, 5, 6, 1, 2, 3, 1, 3, 4])
- tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
- tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
- exp = Series([1, 3, 4, 4, 5, 6, 1, 2, 3])
- tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
- # values are all in either category => not-category
- s1 = Series([4, 5, 6], dtype="category")
- s2 = Series([1, 2, 3], dtype="category")
- s3 = Series([10, 11, 12])
- exp = Series([4, 5, 6, 1, 2, 3, 10, 11, 12])
- tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
- tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
- exp = Series([10, 11, 12, 4, 5, 6, 1, 2, 3])
- tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
- def test_concat_categorical_multi_coercion(self):
- # GH 13524
- s1 = Series([1, 3], dtype="category")
- s2 = Series([3, 4], dtype="category")
- s3 = Series([2, 3])
- s4 = Series([2, 2], dtype="category")
- s5 = Series([1, np.nan])
- s6 = Series([1, 3, 2], dtype="category")
- # mixed dtype, values are all in categories => not-category
- exp = Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2])
- res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True)
- tm.assert_series_equal(res, exp)
- res = s1._append([s2, s3, s4, s5, s6], ignore_index=True)
- tm.assert_series_equal(res, exp)
- exp = Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3])
- res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True)
- tm.assert_series_equal(res, exp)
- res = s6._append([s5, s4, s3, s2, s1], ignore_index=True)
- tm.assert_series_equal(res, exp)
- def test_concat_categorical_ordered(self):
- # GH 13524
- s1 = Series(Categorical([1, 2, np.nan], ordered=True))
- s2 = Series(Categorical([2, 1, 2], ordered=True))
- exp = Series(Categorical([1, 2, np.nan, 2, 1, 2], ordered=True))
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- exp = Series(Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True))
- tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s1._append([s2, s1], ignore_index=True), exp)
- def test_concat_categorical_coercion_nan(self):
- # GH 13524
- # some edge cases
- # category + not-category => not category
- s1 = Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category")
- s2 = Series([np.nan, 1])
- exp = Series([np.nan, np.nan, np.nan, 1])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- s1 = Series([1, np.nan], dtype="category")
- s2 = Series([np.nan, np.nan])
- exp = Series([1, np.nan, np.nan, np.nan], dtype="float")
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- # mixed dtype, all nan-likes => not-category
- s1 = Series([np.nan, np.nan], dtype="category")
- s2 = Series([np.nan, np.nan])
- exp = Series([np.nan, np.nan, np.nan, np.nan])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
- # all category nan-likes => category
- s1 = Series([np.nan, np.nan], dtype="category")
- s2 = Series([np.nan, np.nan], dtype="category")
- exp = Series([np.nan, np.nan, np.nan, np.nan], dtype="category")
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- def test_concat_categorical_empty(self):
- # GH 13524
- s1 = Series([], dtype="category")
- s2 = Series([1, 2], dtype="category")
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
- tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
- s1 = Series([], dtype="category")
- s2 = Series([], dtype="category")
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
- s1 = Series([], dtype="category")
- s2 = Series([], dtype="object")
- # different dtype => not-category
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
- tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
- s1 = Series([], dtype="category")
- s2 = Series([np.nan, np.nan])
- # empty Series is ignored
- exp = Series([np.nan, np.nan])
- tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
- tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
- tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
- tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
- def test_categorical_concat_append(self):
- cat = Categorical(["a", "b"], categories=["a", "b"])
- vals = [1, 2]
- df = DataFrame({"cats": cat, "vals": vals})
- cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
- vals2 = [1, 2, 1, 2]
- exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1]))
- tm.assert_frame_equal(pd.concat([df, df]), exp)
- tm.assert_frame_equal(df._append(df), exp)
- # GH 13524 can concat different categories
- cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
- vals3 = [1, 2]
- df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
- res = pd.concat([df, df_different_categories], ignore_index=True)
- exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
- tm.assert_frame_equal(res, exp)
- res = df._append(df_different_categories, ignore_index=True)
- tm.assert_frame_equal(res, exp)
|