test_empty.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. DataFrame,
  6. RangeIndex,
  7. Series,
  8. concat,
  9. date_range,
  10. )
  11. import pandas._testing as tm
  12. class TestEmptyConcat:
  13. def test_handle_empty_objects(self, sort):
  14. df = DataFrame(np.random.randn(10, 4), columns=list("abcd"))
  15. dfcopy = df[:5].copy()
  16. dfcopy["foo"] = "bar"
  17. empty = df[5:5]
  18. frames = [dfcopy, empty, empty, df[5:]]
  19. concatted = concat(frames, axis=0, sort=sort)
  20. expected = df.reindex(columns=["a", "b", "c", "d", "foo"])
  21. expected["foo"] = expected["foo"].astype("O")
  22. expected.loc[0:4, "foo"] = "bar"
  23. tm.assert_frame_equal(concatted, expected)
  24. # empty as first element with time series
  25. # GH3259
  26. df = DataFrame(
  27. {"A": range(10000)}, index=date_range("20130101", periods=10000, freq="s")
  28. )
  29. empty = DataFrame()
  30. result = concat([df, empty], axis=1)
  31. tm.assert_frame_equal(result, df)
  32. result = concat([empty, df], axis=1)
  33. tm.assert_frame_equal(result, df)
  34. result = concat([df, empty])
  35. tm.assert_frame_equal(result, df)
  36. result = concat([empty, df])
  37. tm.assert_frame_equal(result, df)
  38. def test_concat_empty_series(self):
  39. # GH 11082
  40. s1 = Series([1, 2, 3], name="x")
  41. s2 = Series(name="y", dtype="float64")
  42. res = concat([s1, s2], axis=1)
  43. exp = DataFrame(
  44. {"x": [1, 2, 3], "y": [np.nan, np.nan, np.nan]},
  45. index=RangeIndex(3),
  46. )
  47. tm.assert_frame_equal(res, exp)
  48. s1 = Series([1, 2, 3], name="x")
  49. s2 = Series(name="y", dtype="float64")
  50. res = concat([s1, s2], axis=0)
  51. # name will be reset
  52. exp = Series([1, 2, 3])
  53. tm.assert_series_equal(res, exp)
  54. # empty Series with no name
  55. s1 = Series([1, 2, 3], name="x")
  56. s2 = Series(name=None, dtype="float64")
  57. res = concat([s1, s2], axis=1)
  58. exp = DataFrame(
  59. {"x": [1, 2, 3], 0: [np.nan, np.nan, np.nan]},
  60. columns=["x", 0],
  61. index=RangeIndex(3),
  62. )
  63. tm.assert_frame_equal(res, exp)
  64. @pytest.mark.parametrize("tz", [None, "UTC"])
  65. @pytest.mark.parametrize("values", [[], [1, 2, 3]])
  66. def test_concat_empty_series_timelike(self, tz, values):
  67. # GH 18447
  68. first = Series([], dtype="M8[ns]").dt.tz_localize(tz)
  69. dtype = None if values else np.float64
  70. second = Series(values, dtype=dtype)
  71. expected = DataFrame(
  72. {
  73. 0: Series([pd.NaT] * len(values), dtype="M8[ns]").dt.tz_localize(tz),
  74. 1: values,
  75. }
  76. )
  77. result = concat([first, second], axis=1)
  78. tm.assert_frame_equal(result, expected)
  79. @pytest.mark.parametrize(
  80. "left,right,expected",
  81. [
  82. # booleans
  83. (np.bool_, np.int32, np.object_), # changed from int32 in 2.0 GH#39817
  84. (np.bool_, np.float32, np.object_),
  85. # datetime-like
  86. ("m8[ns]", np.bool_, np.object_),
  87. ("m8[ns]", np.int64, np.object_),
  88. ("M8[ns]", np.bool_, np.object_),
  89. ("M8[ns]", np.int64, np.object_),
  90. # categorical
  91. ("category", "category", "category"),
  92. ("category", "object", "object"),
  93. ],
  94. )
  95. def test_concat_empty_series_dtypes(self, left, right, expected):
  96. # GH#39817, GH#45101
  97. result = concat([Series(dtype=left), Series(dtype=right)])
  98. assert result.dtype == expected
  99. @pytest.mark.parametrize(
  100. "dtype", ["float64", "int8", "uint8", "bool", "m8[ns]", "M8[ns]"]
  101. )
  102. def test_concat_empty_series_dtypes_match_roundtrips(self, dtype):
  103. dtype = np.dtype(dtype)
  104. result = concat([Series(dtype=dtype)])
  105. assert result.dtype == dtype
  106. result = concat([Series(dtype=dtype), Series(dtype=dtype)])
  107. assert result.dtype == dtype
  108. @pytest.mark.parametrize("dtype", ["float64", "int8", "uint8", "m8[ns]", "M8[ns]"])
  109. @pytest.mark.parametrize(
  110. "dtype2",
  111. ["float64", "int8", "uint8", "m8[ns]", "M8[ns]"],
  112. )
  113. def test_concat_empty_series_dtypes_roundtrips(self, dtype, dtype2):
  114. # round-tripping with self & like self
  115. if dtype == dtype2:
  116. return
  117. def int_result_type(dtype, dtype2):
  118. typs = {dtype.kind, dtype2.kind}
  119. if not len(typs - {"i", "u", "b"}) and (
  120. dtype.kind == "i" or dtype2.kind == "i"
  121. ):
  122. return "i"
  123. elif not len(typs - {"u", "b"}) and (
  124. dtype.kind == "u" or dtype2.kind == "u"
  125. ):
  126. return "u"
  127. return None
  128. def float_result_type(dtype, dtype2):
  129. typs = {dtype.kind, dtype2.kind}
  130. if not len(typs - {"f", "i", "u"}) and (
  131. dtype.kind == "f" or dtype2.kind == "f"
  132. ):
  133. return "f"
  134. return None
  135. def get_result_type(dtype, dtype2):
  136. result = float_result_type(dtype, dtype2)
  137. if result is not None:
  138. return result
  139. result = int_result_type(dtype, dtype2)
  140. if result is not None:
  141. return result
  142. return "O"
  143. dtype = np.dtype(dtype)
  144. dtype2 = np.dtype(dtype2)
  145. expected = get_result_type(dtype, dtype2)
  146. result = concat([Series(dtype=dtype), Series(dtype=dtype2)]).dtype
  147. assert result.kind == expected
  148. def test_concat_empty_series_dtypes_triple(self):
  149. assert (
  150. concat(
  151. [Series(dtype="M8[ns]"), Series(dtype=np.bool_), Series(dtype=np.int64)]
  152. ).dtype
  153. == np.object_
  154. )
  155. def test_concat_empty_series_dtype_category_with_array(self):
  156. # GH#18515
  157. assert (
  158. concat(
  159. [Series(np.array([]), dtype="category"), Series(dtype="float64")]
  160. ).dtype
  161. == "float64"
  162. )
  163. def test_concat_empty_series_dtypes_sparse(self):
  164. result = concat(
  165. [
  166. Series(dtype="float64").astype("Sparse"),
  167. Series(dtype="float64").astype("Sparse"),
  168. ]
  169. )
  170. assert result.dtype == "Sparse[float64]"
  171. result = concat(
  172. [Series(dtype="float64").astype("Sparse"), Series(dtype="float64")]
  173. )
  174. expected = pd.SparseDtype(np.float64)
  175. assert result.dtype == expected
  176. result = concat(
  177. [Series(dtype="float64").astype("Sparse"), Series(dtype="object")]
  178. )
  179. expected = pd.SparseDtype("object")
  180. assert result.dtype == expected
  181. def test_concat_empty_df_object_dtype(self):
  182. # GH 9149
  183. df_1 = DataFrame({"Row": [0, 1, 1], "EmptyCol": np.nan, "NumberCol": [1, 2, 3]})
  184. df_2 = DataFrame(columns=df_1.columns)
  185. result = concat([df_1, df_2], axis=0)
  186. expected = df_1.astype(object)
  187. tm.assert_frame_equal(result, expected)
  188. def test_concat_empty_dataframe_dtypes(self):
  189. df = DataFrame(columns=list("abc"))
  190. df["a"] = df["a"].astype(np.bool_)
  191. df["b"] = df["b"].astype(np.int32)
  192. df["c"] = df["c"].astype(np.float64)
  193. result = concat([df, df])
  194. assert result["a"].dtype == np.bool_
  195. assert result["b"].dtype == np.int32
  196. assert result["c"].dtype == np.float64
  197. result = concat([df, df.astype(np.float64)])
  198. assert result["a"].dtype == np.object_
  199. assert result["b"].dtype == np.float64
  200. assert result["c"].dtype == np.float64
  201. def test_concat_inner_join_empty(self):
  202. # GH 15328
  203. df_empty = DataFrame()
  204. df_a = DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64")
  205. df_expected = DataFrame({"a": []}, index=RangeIndex(0), dtype="int64")
  206. for how, expected in [("inner", df_expected), ("outer", df_a)]:
  207. result = concat([df_a, df_empty], axis=1, join=how)
  208. tm.assert_frame_equal(result, expected)
  209. def test_empty_dtype_coerce(self):
  210. # xref to #12411
  211. # xref to #12045
  212. # xref to #11594
  213. # see below
  214. # 10571
  215. df1 = DataFrame(data=[[1, None], [2, None]], columns=["a", "b"])
  216. df2 = DataFrame(data=[[3, None], [4, None]], columns=["a", "b"])
  217. result = concat([df1, df2])
  218. expected = df1.dtypes
  219. tm.assert_series_equal(result.dtypes, expected)
  220. def test_concat_empty_dataframe(self):
  221. # 39037
  222. df1 = DataFrame(columns=["a", "b"])
  223. df2 = DataFrame(columns=["b", "c"])
  224. result = concat([df1, df2, df1])
  225. expected = DataFrame(columns=["a", "b", "c"])
  226. tm.assert_frame_equal(result, expected)
  227. df3 = DataFrame(columns=["a", "b"])
  228. df4 = DataFrame(columns=["b"])
  229. result = concat([df3, df4])
  230. expected = DataFrame(columns=["a", "b"])
  231. tm.assert_frame_equal(result, expected)
  232. def test_concat_empty_dataframe_different_dtypes(self):
  233. # 39037
  234. df1 = DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
  235. df2 = DataFrame({"a": [1, 2, 3]})
  236. result = concat([df1[:0], df2[:0]])
  237. assert result["a"].dtype == np.int64
  238. assert result["b"].dtype == np.object_
  239. def test_concat_to_empty_ea(self):
  240. """48510 `concat` to an empty EA should maintain type EA dtype."""
  241. df_empty = DataFrame({"a": pd.array([], dtype=pd.Int64Dtype())})
  242. df_new = DataFrame({"a": pd.array([1, 2, 3], dtype=pd.Int64Dtype())})
  243. expected = df_new.copy()
  244. result = concat([df_empty, df_new])
  245. tm.assert_frame_equal(result, expected)