test_merge_ordered.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. DataFrame,
  6. merge_ordered,
  7. )
  8. import pandas._testing as tm
  9. @pytest.fixture
  10. def left():
  11. return DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2.0, 3]})
  12. @pytest.fixture
  13. def right():
  14. return DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [1, 2, 3.0, 4]})
  15. class TestMergeOrdered:
  16. def test_basic(self, left, right):
  17. result = merge_ordered(left, right, on="key")
  18. expected = DataFrame(
  19. {
  20. "key": ["a", "b", "c", "d", "e", "f"],
  21. "lvalue": [1, np.nan, 2, np.nan, 3, np.nan],
  22. "rvalue": [np.nan, 1, 2, 3, np.nan, 4],
  23. }
  24. )
  25. tm.assert_frame_equal(result, expected)
  26. def test_ffill(self, left, right):
  27. result = merge_ordered(left, right, on="key", fill_method="ffill")
  28. expected = DataFrame(
  29. {
  30. "key": ["a", "b", "c", "d", "e", "f"],
  31. "lvalue": [1.0, 1, 2, 2, 3, 3.0],
  32. "rvalue": [np.nan, 1, 2, 3, 3, 4],
  33. }
  34. )
  35. tm.assert_frame_equal(result, expected)
  36. def test_multigroup(self, left, right):
  37. left = pd.concat([left, left], ignore_index=True)
  38. left["group"] = ["a"] * 3 + ["b"] * 3
  39. result = merge_ordered(
  40. left, right, on="key", left_by="group", fill_method="ffill"
  41. )
  42. expected = DataFrame(
  43. {
  44. "key": ["a", "b", "c", "d", "e", "f"] * 2,
  45. "lvalue": [1.0, 1, 2, 2, 3, 3.0] * 2,
  46. "rvalue": [np.nan, 1, 2, 3, 3, 4] * 2,
  47. }
  48. )
  49. expected["group"] = ["a"] * 6 + ["b"] * 6
  50. tm.assert_frame_equal(result, expected.loc[:, result.columns])
  51. result2 = merge_ordered(
  52. right, left, on="key", right_by="group", fill_method="ffill"
  53. )
  54. tm.assert_frame_equal(result, result2.loc[:, result.columns])
  55. result = merge_ordered(left, right, on="key", left_by="group")
  56. assert result["group"].notna().all()
  57. def test_merge_type(self, left, right):
  58. class NotADataFrame(DataFrame):
  59. @property
  60. def _constructor(self):
  61. return NotADataFrame
  62. nad = NotADataFrame(left)
  63. result = nad.merge(right, on="key")
  64. assert isinstance(result, NotADataFrame)
  65. @pytest.mark.parametrize(
  66. "df_seq, pattern",
  67. [
  68. ((), "[Nn]o objects"),
  69. ([], "[Nn]o objects"),
  70. ({}, "[Nn]o objects"),
  71. ([None], "objects.*None"),
  72. ([None, None], "objects.*None"),
  73. ],
  74. )
  75. def test_empty_sequence_concat(self, df_seq, pattern):
  76. # GH 9157
  77. with pytest.raises(ValueError, match=pattern):
  78. pd.concat(df_seq)
  79. @pytest.mark.parametrize(
  80. "arg", [[DataFrame()], [None, DataFrame()], [DataFrame(), None]]
  81. )
  82. def test_empty_sequence_concat_ok(self, arg):
  83. pd.concat(arg)
  84. def test_doc_example(self):
  85. left = DataFrame(
  86. {
  87. "group": list("aaabbb"),
  88. "key": ["a", "c", "e", "a", "c", "e"],
  89. "lvalue": [1, 2, 3] * 2,
  90. }
  91. )
  92. right = DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]})
  93. result = merge_ordered(left, right, fill_method="ffill", left_by="group")
  94. expected = DataFrame(
  95. {
  96. "group": list("aaaaabbbbb"),
  97. "key": ["a", "b", "c", "d", "e"] * 2,
  98. "lvalue": [1, 1, 2, 2, 3] * 2,
  99. "rvalue": [np.nan, 1, 2, 3, 3] * 2,
  100. }
  101. )
  102. tm.assert_frame_equal(result, expected)
  103. @pytest.mark.parametrize(
  104. "left, right, on, left_by, right_by, expected",
  105. [
  106. (
  107. DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
  108. DataFrame({"T": [2], "E": [1]}),
  109. ["T"],
  110. ["G", "H"],
  111. None,
  112. DataFrame(
  113. {
  114. "G": ["g"] * 3,
  115. "H": ["h"] * 3,
  116. "T": [1, 2, 3],
  117. "E": [np.nan, 1.0, np.nan],
  118. }
  119. ),
  120. ),
  121. (
  122. DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
  123. DataFrame({"T": [2], "E": [1]}),
  124. "T",
  125. ["G", "H"],
  126. None,
  127. DataFrame(
  128. {
  129. "G": ["g"] * 3,
  130. "H": ["h"] * 3,
  131. "T": [1, 2, 3],
  132. "E": [np.nan, 1.0, np.nan],
  133. }
  134. ),
  135. ),
  136. (
  137. DataFrame({"T": [2], "E": [1]}),
  138. DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
  139. ["T"],
  140. None,
  141. ["G", "H"],
  142. DataFrame(
  143. {
  144. "T": [1, 2, 3],
  145. "E": [np.nan, 1.0, np.nan],
  146. "G": ["g"] * 3,
  147. "H": ["h"] * 3,
  148. }
  149. ),
  150. ),
  151. ],
  152. )
  153. def test_list_type_by(self, left, right, on, left_by, right_by, expected):
  154. # GH 35269
  155. result = merge_ordered(
  156. left=left,
  157. right=right,
  158. on=on,
  159. left_by=left_by,
  160. right_by=right_by,
  161. )
  162. tm.assert_frame_equal(result, expected)
  163. def test_left_by_length_equals_to_right_shape0(self):
  164. # GH 38166
  165. left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE"))
  166. right = DataFrame([[2, 1]], columns=list("ET"))
  167. result = merge_ordered(left, right, on="E", left_by=["G", "H"])
  168. expected = DataFrame(
  169. {"G": ["g"] * 3, "H": ["h"] * 3, "E": [1, 2, 3], "T": [np.nan, 1.0, np.nan]}
  170. )
  171. tm.assert_frame_equal(result, expected)
  172. def test_elements_not_in_by_but_in_df(self):
  173. # GH 38167
  174. left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE"))
  175. right = DataFrame([[2, 1]], columns=list("ET"))
  176. msg = r"\{'h'\} not found in left columns"
  177. with pytest.raises(KeyError, match=msg):
  178. merge_ordered(left, right, on="E", left_by=["G", "h"])