test_append_common.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. Categorical,
  6. DataFrame,
  7. Index,
  8. Series,
  9. )
  10. import pandas._testing as tm
  11. dt_data = [
  12. pd.Timestamp("2011-01-01"),
  13. pd.Timestamp("2011-01-02"),
  14. pd.Timestamp("2011-01-03"),
  15. ]
  16. tz_data = [
  17. pd.Timestamp("2011-01-01", tz="US/Eastern"),
  18. pd.Timestamp("2011-01-02", tz="US/Eastern"),
  19. pd.Timestamp("2011-01-03", tz="US/Eastern"),
  20. ]
  21. td_data = [
  22. pd.Timedelta("1 days"),
  23. pd.Timedelta("2 days"),
  24. pd.Timedelta("3 days"),
  25. ]
  26. period_data = [
  27. pd.Period("2011-01", freq="M"),
  28. pd.Period("2011-02", freq="M"),
  29. pd.Period("2011-03", freq="M"),
  30. ]
  31. data_dict = {
  32. "bool": [True, False, True],
  33. "int64": [1, 2, 3],
  34. "float64": [1.1, np.nan, 3.3],
  35. "category": Categorical(["X", "Y", "Z"]),
  36. "object": ["a", "b", "c"],
  37. "datetime64[ns]": dt_data,
  38. "datetime64[ns, US/Eastern]": tz_data,
  39. "timedelta64[ns]": td_data,
  40. "period[M]": period_data,
  41. }
  42. class TestConcatAppendCommon:
  43. """
  44. Test common dtype coercion rules between concat and append.
  45. """
  46. @pytest.fixture(params=sorted(data_dict.keys()))
  47. def item(self, request):
  48. key = request.param
  49. return key, data_dict[key]
  50. item2 = item
  51. def test_dtypes(self, item, index_or_series):
  52. # to confirm test case covers intended dtypes
  53. typ, vals = item
  54. obj = index_or_series(vals)
  55. if isinstance(obj, Index):
  56. assert obj.dtype == typ
  57. elif isinstance(obj, Series):
  58. if typ.startswith("period"):
  59. assert obj.dtype == "Period[M]"
  60. else:
  61. assert obj.dtype == typ
  62. def test_concatlike_same_dtypes(self, item):
  63. # GH 13660
  64. typ1, vals1 = item
  65. vals2 = vals1
  66. vals3 = vals1
  67. if typ1 == "category":
  68. exp_data = Categorical(list(vals1) + list(vals2))
  69. exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3))
  70. else:
  71. exp_data = vals1 + vals2
  72. exp_data3 = vals1 + vals2 + vals3
  73. # ----- Index ----- #
  74. # index.append
  75. res = Index(vals1).append(Index(vals2))
  76. exp = Index(exp_data)
  77. tm.assert_index_equal(res, exp)
  78. # 3 elements
  79. res = Index(vals1).append([Index(vals2), Index(vals3)])
  80. exp = Index(exp_data3)
  81. tm.assert_index_equal(res, exp)
  82. # index.append name mismatch
  83. i1 = Index(vals1, name="x")
  84. i2 = Index(vals2, name="y")
  85. res = i1.append(i2)
  86. exp = Index(exp_data)
  87. tm.assert_index_equal(res, exp)
  88. # index.append name match
  89. i1 = Index(vals1, name="x")
  90. i2 = Index(vals2, name="x")
  91. res = i1.append(i2)
  92. exp = Index(exp_data, name="x")
  93. tm.assert_index_equal(res, exp)
  94. # cannot append non-index
  95. with pytest.raises(TypeError, match="all inputs must be Index"):
  96. Index(vals1).append(vals2)
  97. with pytest.raises(TypeError, match="all inputs must be Index"):
  98. Index(vals1).append([Index(vals2), vals3])
  99. # ----- Series ----- #
  100. # series.append
  101. res = Series(vals1)._append(Series(vals2), ignore_index=True)
  102. exp = Series(exp_data)
  103. tm.assert_series_equal(res, exp, check_index_type=True)
  104. # concat
  105. res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
  106. tm.assert_series_equal(res, exp, check_index_type=True)
  107. # 3 elements
  108. res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True)
  109. exp = Series(exp_data3)
  110. tm.assert_series_equal(res, exp)
  111. res = pd.concat(
  112. [Series(vals1), Series(vals2), Series(vals3)],
  113. ignore_index=True,
  114. )
  115. tm.assert_series_equal(res, exp)
  116. # name mismatch
  117. s1 = Series(vals1, name="x")
  118. s2 = Series(vals2, name="y")
  119. res = s1._append(s2, ignore_index=True)
  120. exp = Series(exp_data)
  121. tm.assert_series_equal(res, exp, check_index_type=True)
  122. res = pd.concat([s1, s2], ignore_index=True)
  123. tm.assert_series_equal(res, exp, check_index_type=True)
  124. # name match
  125. s1 = Series(vals1, name="x")
  126. s2 = Series(vals2, name="x")
  127. res = s1._append(s2, ignore_index=True)
  128. exp = Series(exp_data, name="x")
  129. tm.assert_series_equal(res, exp, check_index_type=True)
  130. res = pd.concat([s1, s2], ignore_index=True)
  131. tm.assert_series_equal(res, exp, check_index_type=True)
  132. # cannot append non-index
  133. msg = (
  134. r"cannot concatenate object of type '.+'; "
  135. "only Series and DataFrame objs are valid"
  136. )
  137. with pytest.raises(TypeError, match=msg):
  138. Series(vals1)._append(vals2)
  139. with pytest.raises(TypeError, match=msg):
  140. Series(vals1)._append([Series(vals2), vals3])
  141. with pytest.raises(TypeError, match=msg):
  142. pd.concat([Series(vals1), vals2])
  143. with pytest.raises(TypeError, match=msg):
  144. pd.concat([Series(vals1), Series(vals2), vals3])
  145. def test_concatlike_dtypes_coercion(self, item, item2, request):
  146. # GH 13660
  147. typ1, vals1 = item
  148. typ2, vals2 = item2
  149. vals3 = vals2
  150. # basically infer
  151. exp_index_dtype = None
  152. exp_series_dtype = None
  153. if typ1 == typ2:
  154. # same dtype is tested in test_concatlike_same_dtypes
  155. return
  156. elif typ1 == "category" or typ2 == "category":
  157. # The `vals1 + vals2` below fails bc one of these is a Categorical
  158. # instead of a list; we have separate dedicated tests for categorical
  159. return
  160. # specify expected dtype
  161. if typ1 == "bool" and typ2 in ("int64", "float64"):
  162. # series coerces to numeric based on numpy rule
  163. # index doesn't because bool is object dtype
  164. exp_series_dtype = typ2
  165. mark = pytest.mark.xfail(reason="GH#39187 casting to object")
  166. request.node.add_marker(mark)
  167. elif typ2 == "bool" and typ1 in ("int64", "float64"):
  168. exp_series_dtype = typ1
  169. mark = pytest.mark.xfail(reason="GH#39187 casting to object")
  170. request.node.add_marker(mark)
  171. elif (
  172. typ1 == "datetime64[ns, US/Eastern]"
  173. or typ2 == "datetime64[ns, US/Eastern]"
  174. or typ1 == "timedelta64[ns]"
  175. or typ2 == "timedelta64[ns]"
  176. ):
  177. exp_index_dtype = object
  178. exp_series_dtype = object
  179. exp_data = vals1 + vals2
  180. exp_data3 = vals1 + vals2 + vals3
  181. # ----- Index ----- #
  182. # index.append
  183. # GH#39817
  184. res = Index(vals1).append(Index(vals2))
  185. exp = Index(exp_data, dtype=exp_index_dtype)
  186. tm.assert_index_equal(res, exp)
  187. # 3 elements
  188. res = Index(vals1).append([Index(vals2), Index(vals3)])
  189. exp = Index(exp_data3, dtype=exp_index_dtype)
  190. tm.assert_index_equal(res, exp)
  191. # ----- Series ----- #
  192. # series._append
  193. # GH#39817
  194. res = Series(vals1)._append(Series(vals2), ignore_index=True)
  195. exp = Series(exp_data, dtype=exp_series_dtype)
  196. tm.assert_series_equal(res, exp, check_index_type=True)
  197. # concat
  198. # GH#39817
  199. res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
  200. tm.assert_series_equal(res, exp, check_index_type=True)
  201. # 3 elements
  202. # GH#39817
  203. res = Series(vals1)._append([Series(vals2), Series(vals3)], ignore_index=True)
  204. exp = Series(exp_data3, dtype=exp_series_dtype)
  205. tm.assert_series_equal(res, exp)
  206. # GH#39817
  207. res = pd.concat(
  208. [Series(vals1), Series(vals2), Series(vals3)],
  209. ignore_index=True,
  210. )
  211. tm.assert_series_equal(res, exp)
  212. def test_concatlike_common_coerce_to_pandas_object(self):
  213. # GH 13626
  214. # result must be Timestamp/Timedelta, not datetime.datetime/timedelta
  215. dti = pd.DatetimeIndex(["2011-01-01", "2011-01-02"])
  216. tdi = pd.TimedeltaIndex(["1 days", "2 days"])
  217. exp = Index(
  218. [
  219. pd.Timestamp("2011-01-01"),
  220. pd.Timestamp("2011-01-02"),
  221. pd.Timedelta("1 days"),
  222. pd.Timedelta("2 days"),
  223. ]
  224. )
  225. res = dti.append(tdi)
  226. tm.assert_index_equal(res, exp)
  227. assert isinstance(res[0], pd.Timestamp)
  228. assert isinstance(res[-1], pd.Timedelta)
  229. dts = Series(dti)
  230. tds = Series(tdi)
  231. res = dts._append(tds)
  232. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  233. assert isinstance(res.iloc[0], pd.Timestamp)
  234. assert isinstance(res.iloc[-1], pd.Timedelta)
  235. res = pd.concat([dts, tds])
  236. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  237. assert isinstance(res.iloc[0], pd.Timestamp)
  238. assert isinstance(res.iloc[-1], pd.Timedelta)
  239. def test_concatlike_datetimetz(self, tz_aware_fixture):
  240. tz = tz_aware_fixture
  241. # GH 7795
  242. dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
  243. dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz=tz)
  244. exp = pd.DatetimeIndex(
  245. ["2011-01-01", "2011-01-02", "2012-01-01", "2012-01-02"], tz=tz
  246. )
  247. res = dti1.append(dti2)
  248. tm.assert_index_equal(res, exp)
  249. dts1 = Series(dti1)
  250. dts2 = Series(dti2)
  251. res = dts1._append(dts2)
  252. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  253. res = pd.concat([dts1, dts2])
  254. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  255. @pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo", "EST5EDT"])
  256. def test_concatlike_datetimetz_short(self, tz):
  257. # GH#7795
  258. ix1 = pd.date_range(start="2014-07-15", end="2014-07-17", freq="D", tz=tz)
  259. ix2 = pd.DatetimeIndex(["2014-07-11", "2014-07-21"], tz=tz)
  260. df1 = DataFrame(0, index=ix1, columns=["A", "B"])
  261. df2 = DataFrame(0, index=ix2, columns=["A", "B"])
  262. exp_idx = pd.DatetimeIndex(
  263. ["2014-07-15", "2014-07-16", "2014-07-17", "2014-07-11", "2014-07-21"],
  264. tz=tz,
  265. )
  266. exp = DataFrame(0, index=exp_idx, columns=["A", "B"])
  267. tm.assert_frame_equal(df1._append(df2), exp)
  268. tm.assert_frame_equal(pd.concat([df1, df2]), exp)
  269. def test_concatlike_datetimetz_to_object(self, tz_aware_fixture):
  270. tz = tz_aware_fixture
  271. # GH 13660
  272. # different tz coerces to object
  273. dti1 = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
  274. dti2 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"])
  275. exp = Index(
  276. [
  277. pd.Timestamp("2011-01-01", tz=tz),
  278. pd.Timestamp("2011-01-02", tz=tz),
  279. pd.Timestamp("2012-01-01"),
  280. pd.Timestamp("2012-01-02"),
  281. ],
  282. dtype=object,
  283. )
  284. res = dti1.append(dti2)
  285. tm.assert_index_equal(res, exp)
  286. dts1 = Series(dti1)
  287. dts2 = Series(dti2)
  288. res = dts1._append(dts2)
  289. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  290. res = pd.concat([dts1, dts2])
  291. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  292. # different tz
  293. dti3 = pd.DatetimeIndex(["2012-01-01", "2012-01-02"], tz="US/Pacific")
  294. exp = Index(
  295. [
  296. pd.Timestamp("2011-01-01", tz=tz),
  297. pd.Timestamp("2011-01-02", tz=tz),
  298. pd.Timestamp("2012-01-01", tz="US/Pacific"),
  299. pd.Timestamp("2012-01-02", tz="US/Pacific"),
  300. ],
  301. dtype=object,
  302. )
  303. res = dti1.append(dti3)
  304. tm.assert_index_equal(res, exp)
  305. dts1 = Series(dti1)
  306. dts3 = Series(dti3)
  307. res = dts1._append(dts3)
  308. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  309. res = pd.concat([dts1, dts3])
  310. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  311. def test_concatlike_common_period(self):
  312. # GH 13660
  313. pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
  314. pi2 = pd.PeriodIndex(["2012-01", "2012-02"], freq="M")
  315. exp = pd.PeriodIndex(["2011-01", "2011-02", "2012-01", "2012-02"], freq="M")
  316. res = pi1.append(pi2)
  317. tm.assert_index_equal(res, exp)
  318. ps1 = Series(pi1)
  319. ps2 = Series(pi2)
  320. res = ps1._append(ps2)
  321. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  322. res = pd.concat([ps1, ps2])
  323. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  324. def test_concatlike_common_period_diff_freq_to_object(self):
  325. # GH 13221
  326. pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
  327. pi2 = pd.PeriodIndex(["2012-01-01", "2012-02-01"], freq="D")
  328. exp = Index(
  329. [
  330. pd.Period("2011-01", freq="M"),
  331. pd.Period("2011-02", freq="M"),
  332. pd.Period("2012-01-01", freq="D"),
  333. pd.Period("2012-02-01", freq="D"),
  334. ],
  335. dtype=object,
  336. )
  337. res = pi1.append(pi2)
  338. tm.assert_index_equal(res, exp)
  339. ps1 = Series(pi1)
  340. ps2 = Series(pi2)
  341. res = ps1._append(ps2)
  342. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  343. res = pd.concat([ps1, ps2])
  344. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  345. def test_concatlike_common_period_mixed_dt_to_object(self):
  346. # GH 13221
  347. # different datetimelike
  348. pi1 = pd.PeriodIndex(["2011-01", "2011-02"], freq="M")
  349. tdi = pd.TimedeltaIndex(["1 days", "2 days"])
  350. exp = Index(
  351. [
  352. pd.Period("2011-01", freq="M"),
  353. pd.Period("2011-02", freq="M"),
  354. pd.Timedelta("1 days"),
  355. pd.Timedelta("2 days"),
  356. ],
  357. dtype=object,
  358. )
  359. res = pi1.append(tdi)
  360. tm.assert_index_equal(res, exp)
  361. ps1 = Series(pi1)
  362. tds = Series(tdi)
  363. res = ps1._append(tds)
  364. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  365. res = pd.concat([ps1, tds])
  366. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  367. # inverse
  368. exp = Index(
  369. [
  370. pd.Timedelta("1 days"),
  371. pd.Timedelta("2 days"),
  372. pd.Period("2011-01", freq="M"),
  373. pd.Period("2011-02", freq="M"),
  374. ],
  375. dtype=object,
  376. )
  377. res = tdi.append(pi1)
  378. tm.assert_index_equal(res, exp)
  379. ps1 = Series(pi1)
  380. tds = Series(tdi)
  381. res = tds._append(ps1)
  382. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  383. res = pd.concat([tds, ps1])
  384. tm.assert_series_equal(res, Series(exp, index=[0, 1, 0, 1]))
  385. def test_concat_categorical(self):
  386. # GH 13524
  387. # same categories -> category
  388. s1 = Series([1, 2, np.nan], dtype="category")
  389. s2 = Series([2, 1, 2], dtype="category")
  390. exp = Series([1, 2, np.nan, 2, 1, 2], dtype="category")
  391. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  392. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  393. # partially different categories => not-category
  394. s1 = Series([3, 2], dtype="category")
  395. s2 = Series([2, 1], dtype="category")
  396. exp = Series([3, 2, 2, 1])
  397. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  398. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  399. # completely different categories (same dtype) => not-category
  400. s1 = Series([10, 11, np.nan], dtype="category")
  401. s2 = Series([np.nan, 1, 3, 2], dtype="category")
  402. exp = Series([10, 11, np.nan, np.nan, 1, 3, 2], dtype=np.float64)
  403. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  404. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  405. def test_union_categorical_same_categories_different_order(self):
  406. # https://github.com/pandas-dev/pandas/issues/19096
  407. a = Series(Categorical(["a", "b", "c"], categories=["a", "b", "c"]))
  408. b = Series(Categorical(["a", "b", "c"], categories=["b", "a", "c"]))
  409. result = pd.concat([a, b], ignore_index=True)
  410. expected = Series(
  411. Categorical(["a", "b", "c", "a", "b", "c"], categories=["a", "b", "c"])
  412. )
  413. tm.assert_series_equal(result, expected)
  414. def test_concat_categorical_coercion(self):
  415. # GH 13524
  416. # category + not-category => not-category
  417. s1 = Series([1, 2, np.nan], dtype="category")
  418. s2 = Series([2, 1, 2])
  419. exp = Series([1, 2, np.nan, 2, 1, 2], dtype=np.float64)
  420. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  421. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  422. # result shouldn't be affected by 1st elem dtype
  423. exp = Series([2, 1, 2, 1, 2, np.nan], dtype=np.float64)
  424. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  425. tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
  426. # all values are not in category => not-category
  427. s1 = Series([3, 2], dtype="category")
  428. s2 = Series([2, 1])
  429. exp = Series([3, 2, 2, 1])
  430. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  431. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  432. exp = Series([2, 1, 3, 2])
  433. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  434. tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
  435. # completely different categories => not-category
  436. s1 = Series([10, 11, np.nan], dtype="category")
  437. s2 = Series([1, 3, 2])
  438. exp = Series([10, 11, np.nan, 1, 3, 2], dtype=np.float64)
  439. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  440. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  441. exp = Series([1, 3, 2, 10, 11, np.nan], dtype=np.float64)
  442. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  443. tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
  444. # different dtype => not-category
  445. s1 = Series([10, 11, np.nan], dtype="category")
  446. s2 = Series(["a", "b", "c"])
  447. exp = Series([10, 11, np.nan, "a", "b", "c"])
  448. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  449. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  450. exp = Series(["a", "b", "c", 10, 11, np.nan])
  451. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  452. tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
  453. # if normal series only contains NaN-likes => not-category
  454. s1 = Series([10, 11], dtype="category")
  455. s2 = Series([np.nan, np.nan, np.nan])
  456. exp = Series([10, 11, np.nan, np.nan, np.nan])
  457. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  458. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  459. exp = Series([np.nan, np.nan, np.nan, 10, 11])
  460. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  461. tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
  462. def test_concat_categorical_3elem_coercion(self):
  463. # GH 13524
  464. # mixed dtypes => not-category
  465. s1 = Series([1, 2, np.nan], dtype="category")
  466. s2 = Series([2, 1, 2], dtype="category")
  467. s3 = Series([1, 2, 1, 2, np.nan])
  468. exp = Series([1, 2, np.nan, 2, 1, 2, 1, 2, 1, 2, np.nan], dtype="float")
  469. tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
  470. tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
  471. exp = Series([1, 2, 1, 2, np.nan, 1, 2, np.nan, 2, 1, 2], dtype="float")
  472. tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
  473. tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
  474. # values are all in either category => not-category
  475. s1 = Series([4, 5, 6], dtype="category")
  476. s2 = Series([1, 2, 3], dtype="category")
  477. s3 = Series([1, 3, 4])
  478. exp = Series([4, 5, 6, 1, 2, 3, 1, 3, 4])
  479. tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
  480. tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
  481. exp = Series([1, 3, 4, 4, 5, 6, 1, 2, 3])
  482. tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
  483. tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
  484. # values are all in either category => not-category
  485. s1 = Series([4, 5, 6], dtype="category")
  486. s2 = Series([1, 2, 3], dtype="category")
  487. s3 = Series([10, 11, 12])
  488. exp = Series([4, 5, 6, 1, 2, 3, 10, 11, 12])
  489. tm.assert_series_equal(pd.concat([s1, s2, s3], ignore_index=True), exp)
  490. tm.assert_series_equal(s1._append([s2, s3], ignore_index=True), exp)
  491. exp = Series([10, 11, 12, 4, 5, 6, 1, 2, 3])
  492. tm.assert_series_equal(pd.concat([s3, s1, s2], ignore_index=True), exp)
  493. tm.assert_series_equal(s3._append([s1, s2], ignore_index=True), exp)
  494. def test_concat_categorical_multi_coercion(self):
  495. # GH 13524
  496. s1 = Series([1, 3], dtype="category")
  497. s2 = Series([3, 4], dtype="category")
  498. s3 = Series([2, 3])
  499. s4 = Series([2, 2], dtype="category")
  500. s5 = Series([1, np.nan])
  501. s6 = Series([1, 3, 2], dtype="category")
  502. # mixed dtype, values are all in categories => not-category
  503. exp = Series([1, 3, 3, 4, 2, 3, 2, 2, 1, np.nan, 1, 3, 2])
  504. res = pd.concat([s1, s2, s3, s4, s5, s6], ignore_index=True)
  505. tm.assert_series_equal(res, exp)
  506. res = s1._append([s2, s3, s4, s5, s6], ignore_index=True)
  507. tm.assert_series_equal(res, exp)
  508. exp = Series([1, 3, 2, 1, np.nan, 2, 2, 2, 3, 3, 4, 1, 3])
  509. res = pd.concat([s6, s5, s4, s3, s2, s1], ignore_index=True)
  510. tm.assert_series_equal(res, exp)
  511. res = s6._append([s5, s4, s3, s2, s1], ignore_index=True)
  512. tm.assert_series_equal(res, exp)
  513. def test_concat_categorical_ordered(self):
  514. # GH 13524
  515. s1 = Series(Categorical([1, 2, np.nan], ordered=True))
  516. s2 = Series(Categorical([2, 1, 2], ordered=True))
  517. exp = Series(Categorical([1, 2, np.nan, 2, 1, 2], ordered=True))
  518. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  519. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  520. exp = Series(Categorical([1, 2, np.nan, 2, 1, 2, 1, 2, np.nan], ordered=True))
  521. tm.assert_series_equal(pd.concat([s1, s2, s1], ignore_index=True), exp)
  522. tm.assert_series_equal(s1._append([s2, s1], ignore_index=True), exp)
  523. def test_concat_categorical_coercion_nan(self):
  524. # GH 13524
  525. # some edge cases
  526. # category + not-category => not category
  527. s1 = Series(np.array([np.nan, np.nan], dtype=np.float64), dtype="category")
  528. s2 = Series([np.nan, 1])
  529. exp = Series([np.nan, np.nan, np.nan, 1])
  530. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  531. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  532. s1 = Series([1, np.nan], dtype="category")
  533. s2 = Series([np.nan, np.nan])
  534. exp = Series([1, np.nan, np.nan, np.nan], dtype="float")
  535. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  536. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  537. # mixed dtype, all nan-likes => not-category
  538. s1 = Series([np.nan, np.nan], dtype="category")
  539. s2 = Series([np.nan, np.nan])
  540. exp = Series([np.nan, np.nan, np.nan, np.nan])
  541. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  542. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  543. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  544. tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
  545. # all category nan-likes => category
  546. s1 = Series([np.nan, np.nan], dtype="category")
  547. s2 = Series([np.nan, np.nan], dtype="category")
  548. exp = Series([np.nan, np.nan, np.nan, np.nan], dtype="category")
  549. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  550. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  551. def test_concat_categorical_empty(self):
  552. # GH 13524
  553. s1 = Series([], dtype="category")
  554. s2 = Series([1, 2], dtype="category")
  555. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
  556. tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
  557. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
  558. tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
  559. s1 = Series([], dtype="category")
  560. s2 = Series([], dtype="category")
  561. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
  562. tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
  563. s1 = Series([], dtype="category")
  564. s2 = Series([], dtype="object")
  565. # different dtype => not-category
  566. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)
  567. tm.assert_series_equal(s1._append(s2, ignore_index=True), s2)
  568. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), s2)
  569. tm.assert_series_equal(s2._append(s1, ignore_index=True), s2)
  570. s1 = Series([], dtype="category")
  571. s2 = Series([np.nan, np.nan])
  572. # empty Series is ignored
  573. exp = Series([np.nan, np.nan])
  574. tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), exp)
  575. tm.assert_series_equal(s1._append(s2, ignore_index=True), exp)
  576. tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
  577. tm.assert_series_equal(s2._append(s1, ignore_index=True), exp)
  578. def test_categorical_concat_append(self):
  579. cat = Categorical(["a", "b"], categories=["a", "b"])
  580. vals = [1, 2]
  581. df = DataFrame({"cats": cat, "vals": vals})
  582. cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
  583. vals2 = [1, 2, 1, 2]
  584. exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1]))
  585. tm.assert_frame_equal(pd.concat([df, df]), exp)
  586. tm.assert_frame_equal(df._append(df), exp)
  587. # GH 13524 can concat different categories
  588. cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
  589. vals3 = [1, 2]
  590. df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
  591. res = pd.concat([df, df_different_categories], ignore_index=True)
  592. exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
  593. tm.assert_frame_equal(res, exp)
  594. res = df._append(df_different_categories, ignore_index=True)
  595. tm.assert_frame_equal(res, exp)