123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208 |
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import (
- DataFrame,
- merge_ordered,
- )
- import pandas._testing as tm
- @pytest.fixture
- def left():
- return DataFrame({"key": ["a", "c", "e"], "lvalue": [1, 2.0, 3]})
- @pytest.fixture
- def right():
- return DataFrame({"key": ["b", "c", "d", "f"], "rvalue": [1, 2, 3.0, 4]})
- class TestMergeOrdered:
- def test_basic(self, left, right):
- result = merge_ordered(left, right, on="key")
- expected = DataFrame(
- {
- "key": ["a", "b", "c", "d", "e", "f"],
- "lvalue": [1, np.nan, 2, np.nan, 3, np.nan],
- "rvalue": [np.nan, 1, 2, 3, np.nan, 4],
- }
- )
- tm.assert_frame_equal(result, expected)
- def test_ffill(self, left, right):
- result = merge_ordered(left, right, on="key", fill_method="ffill")
- expected = DataFrame(
- {
- "key": ["a", "b", "c", "d", "e", "f"],
- "lvalue": [1.0, 1, 2, 2, 3, 3.0],
- "rvalue": [np.nan, 1, 2, 3, 3, 4],
- }
- )
- tm.assert_frame_equal(result, expected)
- def test_multigroup(self, left, right):
- left = pd.concat([left, left], ignore_index=True)
- left["group"] = ["a"] * 3 + ["b"] * 3
- result = merge_ordered(
- left, right, on="key", left_by="group", fill_method="ffill"
- )
- expected = DataFrame(
- {
- "key": ["a", "b", "c", "d", "e", "f"] * 2,
- "lvalue": [1.0, 1, 2, 2, 3, 3.0] * 2,
- "rvalue": [np.nan, 1, 2, 3, 3, 4] * 2,
- }
- )
- expected["group"] = ["a"] * 6 + ["b"] * 6
- tm.assert_frame_equal(result, expected.loc[:, result.columns])
- result2 = merge_ordered(
- right, left, on="key", right_by="group", fill_method="ffill"
- )
- tm.assert_frame_equal(result, result2.loc[:, result.columns])
- result = merge_ordered(left, right, on="key", left_by="group")
- assert result["group"].notna().all()
- def test_merge_type(self, left, right):
- class NotADataFrame(DataFrame):
- @property
- def _constructor(self):
- return NotADataFrame
- nad = NotADataFrame(left)
- result = nad.merge(right, on="key")
- assert isinstance(result, NotADataFrame)
- @pytest.mark.parametrize(
- "df_seq, pattern",
- [
- ((), "[Nn]o objects"),
- ([], "[Nn]o objects"),
- ({}, "[Nn]o objects"),
- ([None], "objects.*None"),
- ([None, None], "objects.*None"),
- ],
- )
- def test_empty_sequence_concat(self, df_seq, pattern):
- # GH 9157
- with pytest.raises(ValueError, match=pattern):
- pd.concat(df_seq)
- @pytest.mark.parametrize(
- "arg", [[DataFrame()], [None, DataFrame()], [DataFrame(), None]]
- )
- def test_empty_sequence_concat_ok(self, arg):
- pd.concat(arg)
- def test_doc_example(self):
- left = DataFrame(
- {
- "group": list("aaabbb"),
- "key": ["a", "c", "e", "a", "c", "e"],
- "lvalue": [1, 2, 3] * 2,
- }
- )
- right = DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]})
- result = merge_ordered(left, right, fill_method="ffill", left_by="group")
- expected = DataFrame(
- {
- "group": list("aaaaabbbbb"),
- "key": ["a", "b", "c", "d", "e"] * 2,
- "lvalue": [1, 1, 2, 2, 3] * 2,
- "rvalue": [np.nan, 1, 2, 3, 3] * 2,
- }
- )
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "left, right, on, left_by, right_by, expected",
- [
- (
- DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
- DataFrame({"T": [2], "E": [1]}),
- ["T"],
- ["G", "H"],
- None,
- DataFrame(
- {
- "G": ["g"] * 3,
- "H": ["h"] * 3,
- "T": [1, 2, 3],
- "E": [np.nan, 1.0, np.nan],
- }
- ),
- ),
- (
- DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
- DataFrame({"T": [2], "E": [1]}),
- "T",
- ["G", "H"],
- None,
- DataFrame(
- {
- "G": ["g"] * 3,
- "H": ["h"] * 3,
- "T": [1, 2, 3],
- "E": [np.nan, 1.0, np.nan],
- }
- ),
- ),
- (
- DataFrame({"T": [2], "E": [1]}),
- DataFrame({"G": ["g", "g"], "H": ["h", "h"], "T": [1, 3]}),
- ["T"],
- None,
- ["G", "H"],
- DataFrame(
- {
- "T": [1, 2, 3],
- "E": [np.nan, 1.0, np.nan],
- "G": ["g"] * 3,
- "H": ["h"] * 3,
- }
- ),
- ),
- ],
- )
- def test_list_type_by(self, left, right, on, left_by, right_by, expected):
- # GH 35269
- result = merge_ordered(
- left=left,
- right=right,
- on=on,
- left_by=left_by,
- right_by=right_by,
- )
- tm.assert_frame_equal(result, expected)
- def test_left_by_length_equals_to_right_shape0(self):
- # GH 38166
- left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE"))
- right = DataFrame([[2, 1]], columns=list("ET"))
- result = merge_ordered(left, right, on="E", left_by=["G", "H"])
- expected = DataFrame(
- {"G": ["g"] * 3, "H": ["h"] * 3, "E": [1, 2, 3], "T": [np.nan, 1.0, np.nan]}
- )
- tm.assert_frame_equal(result, expected)
- def test_elements_not_in_by_but_in_df(self):
- # GH 38167
- left = DataFrame([["g", "h", 1], ["g", "h", 3]], columns=list("GHE"))
- right = DataFrame([[2, 1]], columns=list("ET"))
- msg = r"\{'h'\} not found in left columns"
- with pytest.raises(KeyError, match=msg):
- merge_ordered(left, right, on="E", left_by=["G", "h"])
|