SRI-DYZBC2
/
Vehicle-cpp


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905
							import numpy as np
import pytest

import pandas as pd
from pandas import (
    DataFrame,
    Index,
    MultiIndex,
    RangeIndex,
    Series,
    Timestamp,
)
import pandas._testing as tm
from pandas.core.reshape.concat import concat
from pandas.core.reshape.merge import merge


@pytest.fixture
def left():
    """left dataframe (not multi-indexed) for multi-index join tests"""
    # a little relevant example with NAs
    key1 = ["bar", "bar", "bar", "foo", "foo", "baz", "baz", "qux", "qux", "snap"]
    key2 = ["two", "one", "three", "one", "two", "one", "two", "two", "three", "one"]

    data = np.random.randn(len(key1))
    return DataFrame({"key1": key1, "key2": key2, "data": data})


@pytest.fixture
def right(multiindex_dataframe_random_data):
    """right dataframe (multi-indexed) for multi-index join tests"""
    df = multiindex_dataframe_random_data
    df.index.names = ["key1", "key2"]

    df.columns = ["j_one", "j_two", "j_three"]
    return df


@pytest.fixture
def left_multi():
    return DataFrame(
        {
            "Origin": ["A", "A", "B", "B", "C"],
            "Destination": ["A", "B", "A", "C", "A"],
            "Period": ["AM", "AM", "IP", "AM", "OP"],
            "TripPurp": ["hbw", "nhb", "hbo", "nhb", "hbw"],
            "Trips": [1987, 3647, 2470, 4296, 4444],
        },
        columns=["Origin", "Destination", "Period", "TripPurp", "Trips"],
    ).set_index(["Origin", "Destination", "Period", "TripPurp"])


@pytest.fixture
def right_multi():
    return DataFrame(
        {
            "Origin": ["A", "A", "B", "B", "C", "C", "E"],
            "Destination": ["A", "B", "A", "B", "A", "B", "F"],
            "Period": ["AM", "AM", "IP", "AM", "OP", "IP", "AM"],
            "LinkType": ["a", "b", "c", "b", "a", "b", "a"],
            "Distance": [100, 80, 90, 80, 75, 35, 55],
        },
        columns=["Origin", "Destination", "Period", "LinkType", "Distance"],
    ).set_index(["Origin", "Destination", "Period", "LinkType"])


@pytest.fixture
def on_cols_multi():
    return ["Origin", "Destination", "Period"]


@pytest.fixture
def idx_cols_multi():
    return ["Origin", "Destination", "Period", "TripPurp", "LinkType"]


class TestMergeMulti:
    def test_merge_on_multikey(self, left, right, join_type):
        on_cols = ["key1", "key2"]
        result = left.join(right, on=on_cols, how=join_type).reset_index(drop=True)

        expected = merge(left, right.reset_index(), on=on_cols, how=join_type)

        tm.assert_frame_equal(result, expected)

        result = left.join(right, on=on_cols, how=join_type, sort=True).reset_index(
            drop=True
        )

        expected = merge(
            left, right.reset_index(), on=on_cols, how=join_type, sort=True
        )

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("sort", [False, True])
    def test_left_join_multi_index(self, sort):
        icols = ["1st", "2nd", "3rd"]

        def bind_cols(df):
            iord = lambda a: 0 if a != a else ord(a)
            f = lambda ts: ts.map(iord) - ord("a")
            return f(df["1st"]) + f(df["3rd"]) * 1e2 + df["2nd"].fillna(0) * 1e4

        def run_asserts(left, right, sort):
            res = left.join(right, on=icols, how="left", sort=sort)

            assert len(left) < len(res) + 1
            assert not res["4th"].isna().any()
            assert not res["5th"].isna().any()

            tm.assert_series_equal(res["4th"], -res["5th"], check_names=False)
            result = bind_cols(res.iloc[:, :-2])
            tm.assert_series_equal(res["4th"], result, check_names=False)
            assert result.name is None

            if sort:
                tm.assert_frame_equal(res, res.sort_values(icols, kind="mergesort"))

            out = merge(left, right.reset_index(), on=icols, sort=sort, how="left")

            res.index = RangeIndex(len(res))
            tm.assert_frame_equal(out, res)

        lc = list(map(chr, np.arange(ord("a"), ord("z") + 1)))
        left = DataFrame(np.random.choice(lc, (5000, 2)), columns=["1st", "3rd"])
        # Explicit cast to float to avoid implicit cast when setting nan
        left.insert(1, "2nd", np.random.randint(0, 1000, len(left)).astype("float"))

        i = np.random.permutation(len(left))
        right = left.iloc[i].copy()

        left["4th"] = bind_cols(left)
        right["5th"] = -bind_cols(right)
        right.set_index(icols, inplace=True)

        run_asserts(left, right, sort)

        # inject some nulls
        left.loc[1::23, "1st"] = np.nan
        left.loc[2::37, "2nd"] = np.nan
        left.loc[3::43, "3rd"] = np.nan
        left["4th"] = bind_cols(left)

        i = np.random.permutation(len(left))
        right = left.iloc[i, :-1]
        right["5th"] = -bind_cols(right)
        right.set_index(icols, inplace=True)

        run_asserts(left, right, sort)

    @pytest.mark.parametrize("sort", [False, True])
    def test_merge_right_vs_left(self, left, right, sort):
        # compare left vs right merge with multikey
        on_cols = ["key1", "key2"]
        merged_left_right = left.merge(
            right, left_on=on_cols, right_index=True, how="left", sort=sort
        )

        merge_right_left = right.merge(
            left, right_on=on_cols, left_index=True, how="right", sort=sort
        )

        # Reorder columns
        merge_right_left = merge_right_left[merged_left_right.columns]

        tm.assert_frame_equal(merged_left_right, merge_right_left)

    def test_merge_multiple_cols_with_mixed_cols_index(self):
        # GH29522
        s = Series(
            range(6),
            MultiIndex.from_product([["A", "B"], [1, 2, 3]], names=["lev1", "lev2"]),
            name="Amount",
        )
        df = DataFrame({"lev1": list("AAABBB"), "lev2": [1, 2, 3, 1, 2, 3], "col": 0})
        result = merge(df, s.reset_index(), on=["lev1", "lev2"])
        expected = DataFrame(
            {
                "lev1": list("AAABBB"),
                "lev2": [1, 2, 3, 1, 2, 3],
                "col": [0] * 6,
                "Amount": range(6),
            }
        )
        tm.assert_frame_equal(result, expected)

    def test_compress_group_combinations(self):
        # ~ 40000000 possible unique groups
        key1 = tm.rands_array(10, 10000)
        key1 = np.tile(key1, 2)
        key2 = key1[::-1]

        df = DataFrame({"key1": key1, "key2": key2, "value1": np.random.randn(20000)})

        df2 = DataFrame(
            {"key1": key1[::2], "key2": key2[::2], "value2": np.random.randn(10000)}
        )

        # just to hit the label compression code path
        merge(df, df2, how="outer")

    def test_left_join_index_preserve_order(self):
        on_cols = ["k1", "k2"]
        left = DataFrame(
            {
                "k1": [0, 1, 2] * 8,
                "k2": ["foo", "bar"] * 12,
                "v": np.array(np.arange(24), dtype=np.int64),
            }
        )

        index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")])
        right = DataFrame({"v2": [5, 7]}, index=index)

        result = left.join(right, on=on_cols)

        expected = left.copy()
        expected["v2"] = np.nan
        expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5
        expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7

        tm.assert_frame_equal(result, expected)

        result.sort_values(on_cols, kind="mergesort", inplace=True)
        expected = left.join(right, on=on_cols, sort=True)

        tm.assert_frame_equal(result, expected)

        # test join with multi dtypes blocks
        left = DataFrame(
            {
                "k1": [0, 1, 2] * 8,
                "k2": ["foo", "bar"] * 12,
                "k3": np.array([0, 1, 2] * 8, dtype=np.float32),
                "v": np.array(np.arange(24), dtype=np.int32),
            }
        )

        index = MultiIndex.from_tuples([(2, "bar"), (1, "foo")])
        right = DataFrame({"v2": [5, 7]}, index=index)

        result = left.join(right, on=on_cols)

        expected = left.copy()
        expected["v2"] = np.nan
        expected.loc[(expected.k1 == 2) & (expected.k2 == "bar"), "v2"] = 5
        expected.loc[(expected.k1 == 1) & (expected.k2 == "foo"), "v2"] = 7

        tm.assert_frame_equal(result, expected)

        result = result.sort_values(on_cols, kind="mergesort")
        expected = left.join(right, on=on_cols, sort=True)

        tm.assert_frame_equal(result, expected)

    def test_left_join_index_multi_match_multiindex(self):
        left = DataFrame(
            [
                ["X", "Y", "C", "a"],
                ["W", "Y", "C", "e"],
                ["V", "Q", "A", "h"],
                ["V", "R", "D", "i"],
                ["X", "Y", "D", "b"],
                ["X", "Y", "A", "c"],
                ["W", "Q", "B", "f"],
                ["W", "R", "C", "g"],
                ["V", "Y", "C", "j"],
                ["X", "Y", "B", "d"],
            ],
            columns=["cola", "colb", "colc", "tag"],
            index=[3, 2, 0, 1, 7, 6, 4, 5, 9, 8],
        )

        right = DataFrame(
            [
                ["W", "R", "C", 0],
                ["W", "Q", "B", 3],
                ["W", "Q", "B", 8],
                ["X", "Y", "A", 1],
                ["X", "Y", "A", 4],
                ["X", "Y", "B", 5],
                ["X", "Y", "C", 6],
                ["X", "Y", "C", 9],
                ["X", "Q", "C", -6],
                ["X", "R", "C", -9],
                ["V", "Y", "C", 7],
                ["V", "R", "D", 2],
                ["V", "R", "D", -1],
                ["V", "Q", "A", -3],
            ],
            columns=["col1", "col2", "col3", "val"],
        ).set_index(["col1", "col2", "col3"])

        result = left.join(right, on=["cola", "colb", "colc"], how="left")

        expected = DataFrame(
            [
                ["X", "Y", "C", "a", 6],
                ["X", "Y", "C", "a", 9],
                ["W", "Y", "C", "e", np.nan],
                ["V", "Q", "A", "h", -3],
                ["V", "R", "D", "i", 2],
                ["V", "R", "D", "i", -1],
                ["X", "Y", "D", "b", np.nan],
                ["X", "Y", "A", "c", 1],
                ["X", "Y", "A", "c", 4],
                ["W", "Q", "B", "f", 3],
                ["W", "Q", "B", "f", 8],
                ["W", "R", "C", "g", 0],
                ["V", "Y", "C", "j", 7],
                ["X", "Y", "B", "d", 5],
            ],
            columns=["cola", "colb", "colc", "tag", "val"],
            index=[3, 3, 2, 0, 1, 1, 7, 6, 6, 4, 4, 5, 9, 8],
        )

        tm.assert_frame_equal(result, expected)

        result = left.join(right, on=["cola", "colb", "colc"], how="left", sort=True)

        expected = expected.sort_values(["cola", "colb", "colc"], kind="mergesort")

        tm.assert_frame_equal(result, expected)

    def test_left_join_index_multi_match(self):
        left = DataFrame(
            [["c", 0], ["b", 1], ["a", 2], ["b", 3]],
            columns=["tag", "val"],
            index=[2, 0, 1, 3],
        )

        right = DataFrame(
            [
                ["a", "v"],
                ["c", "w"],
                ["c", "x"],
                ["d", "y"],
                ["a", "z"],
                ["c", "r"],
                ["e", "q"],
                ["c", "s"],
            ],
            columns=["tag", "char"],
        ).set_index("tag")

        result = left.join(right, on="tag", how="left")

        expected = DataFrame(
            [
                ["c", 0, "w"],
                ["c", 0, "x"],
                ["c", 0, "r"],
                ["c", 0, "s"],
                ["b", 1, np.nan],
                ["a", 2, "v"],
                ["a", 2, "z"],
                ["b", 3, np.nan],
            ],
            columns=["tag", "val", "char"],
            index=[2, 2, 2, 2, 0, 1, 1, 3],
        )

        tm.assert_frame_equal(result, expected)

        result = left.join(right, on="tag", how="left", sort=True)
        expected2 = expected.sort_values("tag", kind="mergesort")

        tm.assert_frame_equal(result, expected2)

        # GH7331 - maintain left frame order in left merge
        result = merge(left, right.reset_index(), how="left", on="tag")
        expected.index = RangeIndex(len(expected))
        tm.assert_frame_equal(result, expected)

    def test_left_merge_na_buglet(self):
        left = DataFrame(
            {
                "id": list("abcde"),
                "v1": np.random.randn(5),
                "v2": np.random.randn(5),
                "dummy": list("abcde"),
                "v3": np.random.randn(5),
            },
            columns=["id", "v1", "v2", "dummy", "v3"],
        )
        right = DataFrame(
            {
                "id": ["a", "b", np.nan, np.nan, np.nan],
                "sv3": [1.234, 5.678, np.nan, np.nan, np.nan],
            }
        )

        result = merge(left, right, on="id", how="left")

        rdf = right.drop(["id"], axis=1)
        expected = left.join(rdf)
        tm.assert_frame_equal(result, expected)

    def test_merge_na_keys(self):
        data = [
            [1950, "A", 1.5],
            [1950, "B", 1.5],
            [1955, "B", 1.5],
            [1960, "B", np.nan],
            [1970, "B", 4.0],
            [1950, "C", 4.0],
            [1960, "C", np.nan],
            [1965, "C", 3.0],
            [1970, "C", 4.0],
        ]

        frame = DataFrame(data, columns=["year", "panel", "data"])

        other_data = [
            [1960, "A", np.nan],
            [1970, "A", np.nan],
            [1955, "A", np.nan],
            [1965, "A", np.nan],
            [1965, "B", np.nan],
            [1955, "C", np.nan],
        ]
        other = DataFrame(other_data, columns=["year", "panel", "data"])

        result = frame.merge(other, how="outer")

        expected = frame.fillna(-999).merge(other.fillna(-999), how="outer")
        expected = expected.replace(-999, np.nan)

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index])
    def test_merge_datetime_index(self, klass):
        # see gh-19038
        df = DataFrame(
            [1, 2, 3], ["2016-01-01", "2017-01-01", "2018-01-01"], columns=["a"]
        )
        df.index = pd.to_datetime(df.index)
        on_vector = df.index.year

        if klass is not None:
            on_vector = klass(on_vector)

        exp_years = np.array([2016, 2017, 2018], dtype=np.int32)
        expected = DataFrame({"a": [1, 2, 3], "key_1": exp_years})

        result = df.merge(df, on=["a", on_vector], how="inner")
        tm.assert_frame_equal(result, expected)

        expected = DataFrame({"key_0": exp_years, "a_x": [1, 2, 3], "a_y": [1, 2, 3]})

        result = df.merge(df, on=[df.index.year], how="inner")
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("merge_type", ["left", "right"])
    def test_merge_datetime_multi_index_empty_df(self, merge_type):
        # see gh-36895

        left = DataFrame(
            data={
                "data": [1.5, 1.5],
            },
            index=MultiIndex.from_tuples(
                [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]],
                names=["date", "panel"],
            ),
        )

        right = DataFrame(
            index=MultiIndex.from_tuples([], names=["date", "panel"]), columns=["state"]
        )

        expected_index = MultiIndex.from_tuples(
            [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]],
            names=["date", "panel"],
        )

        if merge_type == "left":
            expected = DataFrame(
                data={
                    "data": [1.5, 1.5],
                    "state": [None, None],
                },
                index=expected_index,
            )
            results_merge = left.merge(right, how="left", on=["date", "panel"])
            results_join = left.join(right, how="left")
        else:
            expected = DataFrame(
                data={
                    "state": [None, None],
                    "data": [1.5, 1.5],
                },
                index=expected_index,
            )
            results_merge = right.merge(left, how="right", on=["date", "panel"])
            results_join = right.join(left, how="right")

        tm.assert_frame_equal(results_merge, expected)
        tm.assert_frame_equal(results_join, expected)

    @pytest.fixture
    def household(self):
        household = DataFrame(
            {
                "household_id": [1, 2, 3],
                "male": [0, 1, 0],
                "wealth": [196087.3, 316478.7, 294750],
            },
            columns=["household_id", "male", "wealth"],
        ).set_index("household_id")
        return household

    @pytest.fixture
    def portfolio(self):
        portfolio = DataFrame(
            {
                "household_id": [1, 2, 2, 3, 3, 3, 4],
                "asset_id": [
                    "nl0000301109",
                    "nl0000289783",
                    "gb00b03mlx29",
                    "gb00b03mlx29",
                    "lu0197800237",
                    "nl0000289965",
                    np.nan,
                ],
                "name": [
                    "ABN Amro",
                    "Robeco",
                    "Royal Dutch Shell",
                    "Royal Dutch Shell",
                    "AAB Eastern Europe Equity Fund",
                    "Postbank BioTech Fonds",
                    np.nan,
                ],
                "share": [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0],
            },
            columns=["household_id", "asset_id", "name", "share"],
        ).set_index(["household_id", "asset_id"])
        return portfolio

    @pytest.fixture
    def expected(self):
        expected = (
            DataFrame(
                {
                    "male": [0, 1, 1, 0, 0, 0],
                    "wealth": [
                        196087.3,
                        316478.7,
                        316478.7,
                        294750.0,
                        294750.0,
                        294750.0,
                    ],
                    "name": [
                        "ABN Amro",
                        "Robeco",
                        "Royal Dutch Shell",
                        "Royal Dutch Shell",
                        "AAB Eastern Europe Equity Fund",
                        "Postbank BioTech Fonds",
                    ],
                    "share": [1.00, 0.40, 0.60, 0.15, 0.60, 0.25],
                    "household_id": [1, 2, 2, 3, 3, 3],
                    "asset_id": [
                        "nl0000301109",
                        "nl0000289783",
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "lu0197800237",
                        "nl0000289965",
                    ],
                }
            )
            .set_index(["household_id", "asset_id"])
            .reindex(columns=["male", "wealth", "name", "share"])
        )
        return expected

    def test_join_multi_levels(self, portfolio, household, expected):
        portfolio = portfolio.copy()
        household = household.copy()

        # GH 3662
        # merge multi-levels
        result = household.join(portfolio, how="inner")
        tm.assert_frame_equal(result, expected)

    def test_join_multi_levels_merge_equivalence(self, portfolio, household, expected):
        portfolio = portfolio.copy()
        household = household.copy()

        # equivalency
        result = merge(
            household.reset_index(),
            portfolio.reset_index(),
            on=["household_id"],
            how="inner",
        ).set_index(["household_id", "asset_id"])
        tm.assert_frame_equal(result, expected)

    def test_join_multi_levels_outer(self, portfolio, household, expected):
        portfolio = portfolio.copy()
        household = household.copy()

        result = household.join(portfolio, how="outer")
        expected = concat(
            [
                expected,
                (
                    DataFrame(
                        {"share": [1.00]},
                        index=MultiIndex.from_tuples(
                            [(4, np.nan)], names=["household_id", "asset_id"]
                        ),
                    )
                ),
            ],
            axis=0,
            sort=True,
        ).reindex(columns=expected.columns)
        tm.assert_frame_equal(result, expected)

    def test_join_multi_levels_invalid(self, portfolio, household):
        portfolio = portfolio.copy()
        household = household.copy()

        # invalid cases
        household.index.name = "foo"

        with pytest.raises(
            ValueError, match="cannot join with no overlapping index names"
        ):
            household.join(portfolio, how="inner")

        portfolio2 = portfolio.copy()
        portfolio2.index.set_names(["household_id", "foo"])

        with pytest.raises(ValueError, match="columns overlap but no suffix specified"):
            portfolio2.join(portfolio, how="inner")

    def test_join_multi_levels2(self):
        # some more advanced merges
        # GH6360
        household = DataFrame(
            {
                "household_id": [1, 2, 2, 3, 3, 3, 4],
                "asset_id": [
                    "nl0000301109",
                    "nl0000301109",
                    "gb00b03mlx29",
                    "gb00b03mlx29",
                    "lu0197800237",
                    "nl0000289965",
                    np.nan,
                ],
                "share": [1.0, 0.4, 0.6, 0.15, 0.6, 0.25, 1.0],
            },
            columns=["household_id", "asset_id", "share"],
        ).set_index(["household_id", "asset_id"])

        log_return = DataFrame(
            {
                "asset_id": [
                    "gb00b03mlx29",
                    "gb00b03mlx29",
                    "gb00b03mlx29",
                    "lu0197800237",
                    "lu0197800237",
                ],
                "t": [233, 234, 235, 180, 181],
                "log_return": [
                    0.09604978,
                    -0.06524096,
                    0.03532373,
                    0.03025441,
                    0.036997,
                ],
            }
        ).set_index(["asset_id", "t"])

        expected = (
            DataFrame(
                {
                    "household_id": [2, 2, 2, 3, 3, 3, 3, 3],
                    "asset_id": [
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "lu0197800237",
                        "lu0197800237",
                    ],
                    "t": [233, 234, 235, 233, 234, 235, 180, 181],
                    "share": [0.6, 0.6, 0.6, 0.15, 0.15, 0.15, 0.6, 0.6],
                    "log_return": [
                        0.09604978,
                        -0.06524096,
                        0.03532373,
                        0.09604978,
                        -0.06524096,
                        0.03532373,
                        0.03025441,
                        0.036997,
                    ],
                }
            )
            .set_index(["household_id", "asset_id", "t"])
            .reindex(columns=["share", "log_return"])
        )

        # this is the equivalency
        result = merge(
            household.reset_index(),
            log_return.reset_index(),
            on=["asset_id"],
            how="inner",
        ).set_index(["household_id", "asset_id", "t"])
        tm.assert_frame_equal(result, expected)

        expected = (
            DataFrame(
                {
                    "household_id": [1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4],
                    "asset_id": [
                        "nl0000301109",
                        "nl0000301109",
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "gb00b03mlx29",
                        "lu0197800237",
                        "lu0197800237",
                        "nl0000289965",
                        None,
                    ],
                    "t": [
                        None,
                        None,
                        233,
                        234,
                        235,
                        233,
                        234,
                        235,
                        180,
                        181,
                        None,
                        None,
                    ],
                    "share": [
                        1.0,
                        0.4,
                        0.6,
                        0.6,
                        0.6,
                        0.15,
                        0.15,
                        0.15,
                        0.6,
                        0.6,
                        0.25,
                        1.0,
                    ],
                    "log_return": [
                        None,
                        None,
                        0.09604978,
                        -0.06524096,
                        0.03532373,
                        0.09604978,
                        -0.06524096,
                        0.03532373,
                        0.03025441,
                        0.036997,
                        None,
                        None,
                    ],
                }
            )
            .set_index(["household_id", "asset_id", "t"])
            .reindex(columns=["share", "log_return"])
        )

        result = merge(
            household.reset_index(),
            log_return.reset_index(),
            on=["asset_id"],
            how="outer",
        ).set_index(["household_id", "asset_id", "t"])

        tm.assert_frame_equal(result, expected)


class TestJoinMultiMulti:
    def test_join_multi_multi(
        self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi
    ):
        # Multi-index join tests
        expected = (
            merge(
                left_multi.reset_index(),
                right_multi.reset_index(),
                how=join_type,
                on=on_cols_multi,
            )
            .set_index(idx_cols_multi)
            .sort_index()
        )

        result = left_multi.join(right_multi, how=join_type).sort_index()
        tm.assert_frame_equal(result, expected)

    def test_join_multi_empty_frames(
        self, left_multi, right_multi, join_type, on_cols_multi, idx_cols_multi
    ):
        left_multi = left_multi.drop(columns=left_multi.columns)
        right_multi = right_multi.drop(columns=right_multi.columns)

        expected = (
            merge(
                left_multi.reset_index(),
                right_multi.reset_index(),
                how=join_type,
                on=on_cols_multi,
            )
            .set_index(idx_cols_multi)
            .sort_index()
        )

        result = left_multi.join(right_multi, how=join_type).sort_index()
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("box", [None, np.asarray, Series, Index])
    def test_merge_datetime_index(self, box):
        # see gh-19038
        df = DataFrame(
            [1, 2, 3], ["2016-01-01", "2017-01-01", "2018-01-01"], columns=["a"]
        )
        df.index = pd.to_datetime(df.index)
        on_vector = df.index.year

        if box is not None:
            on_vector = box(on_vector)

        exp_years = np.array([2016, 2017, 2018], dtype=np.int32)
        expected = DataFrame({"a": [1, 2, 3], "key_1": exp_years})

        result = df.merge(df, on=["a", on_vector], how="inner")
        tm.assert_frame_equal(result, expected)

        expected = DataFrame({"key_0": exp_years, "a_x": [1, 2, 3], "a_y": [1, 2, 3]})

        result = df.merge(df, on=[df.index.year], how="inner")
        tm.assert_frame_equal(result, expected)

    def test_single_common_level(self):
        index_left = MultiIndex.from_tuples(
            [("K0", "X0"), ("K0", "X1"), ("K1", "X2")], names=["key", "X"]
        )

        left = DataFrame(
            {"A": ["A0", "A1", "A2"], "B": ["B0", "B1", "B2"]}, index=index_left
        )

        index_right = MultiIndex.from_tuples(
            [("K0", "Y0"), ("K1", "Y1"), ("K2", "Y2"), ("K2", "Y3")], names=["key", "Y"]
        )

        right = DataFrame(
            {"C": ["C0", "C1", "C2", "C3"], "D": ["D0", "D1", "D2", "D3"]},
            index=index_right,
        )

        result = left.join(right)
        expected = merge(
            left.reset_index(), right.reset_index(), on=["key"], how="inner"
        ).set_index(["key", "X", "Y"])

        tm.assert_frame_equal(result, expected)

    def test_join_multi_wrong_order(self):
        # GH 25760
        # GH 28956

        midx1 = MultiIndex.from_product([[1, 2], [3, 4]], names=["a", "b"])
        midx3 = MultiIndex.from_tuples([(4, 1), (3, 2), (3, 1)], names=["b", "a"])

        left = DataFrame(index=midx1, data={"x": [10, 20, 30, 40]})
        right = DataFrame(index=midx3, data={"y": ["foo", "bar", "fing"]})

        result = left.join(right)

        expected = DataFrame(
            index=midx1,
            data={"x": [10, 20, 30, 40], "y": ["fing", "foo", "bar", np.nan]},
        )

        tm.assert_frame_equal(result, expected)