test_combine_first.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. from datetime import datetime
  2. import numpy as np
  3. import pytest
  4. from pandas.core.dtypes.cast import find_common_type
  5. from pandas.core.dtypes.common import is_dtype_equal
  6. import pandas as pd
  7. from pandas import (
  8. DataFrame,
  9. Index,
  10. MultiIndex,
  11. Series,
  12. )
  13. import pandas._testing as tm
  14. class TestDataFrameCombineFirst:
  15. def test_combine_first_mixed(self):
  16. a = Series(["a", "b"], index=range(2))
  17. b = Series(range(2), index=range(2))
  18. f = DataFrame({"A": a, "B": b})
  19. a = Series(["a", "b"], index=range(5, 7))
  20. b = Series(range(2), index=range(5, 7))
  21. g = DataFrame({"A": a, "B": b})
  22. exp = DataFrame({"A": list("abab"), "B": [0, 1, 0, 1]}, index=[0, 1, 5, 6])
  23. combined = f.combine_first(g)
  24. tm.assert_frame_equal(combined, exp)
  25. def test_combine_first(self, float_frame):
  26. # disjoint
  27. head, tail = float_frame[:5], float_frame[5:]
  28. combined = head.combine_first(tail)
  29. reordered_frame = float_frame.reindex(combined.index)
  30. tm.assert_frame_equal(combined, reordered_frame)
  31. assert tm.equalContents(combined.columns, float_frame.columns)
  32. tm.assert_series_equal(combined["A"], reordered_frame["A"])
  33. # same index
  34. fcopy = float_frame.copy()
  35. fcopy["A"] = 1
  36. del fcopy["C"]
  37. fcopy2 = float_frame.copy()
  38. fcopy2["B"] = 0
  39. del fcopy2["D"]
  40. combined = fcopy.combine_first(fcopy2)
  41. assert (combined["A"] == 1).all()
  42. tm.assert_series_equal(combined["B"], fcopy["B"])
  43. tm.assert_series_equal(combined["C"], fcopy2["C"])
  44. tm.assert_series_equal(combined["D"], fcopy["D"])
  45. # overlap
  46. head, tail = reordered_frame[:10].copy(), reordered_frame
  47. head["A"] = 1
  48. combined = head.combine_first(tail)
  49. assert (combined["A"][:10] == 1).all()
  50. # reverse overlap
  51. tail.iloc[:10, tail.columns.get_loc("A")] = 0
  52. combined = tail.combine_first(head)
  53. assert (combined["A"][:10] == 0).all()
  54. # no overlap
  55. f = float_frame[:10]
  56. g = float_frame[10:]
  57. combined = f.combine_first(g)
  58. tm.assert_series_equal(combined["A"].reindex(f.index), f["A"])
  59. tm.assert_series_equal(combined["A"].reindex(g.index), g["A"])
  60. # corner cases
  61. comb = float_frame.combine_first(DataFrame())
  62. tm.assert_frame_equal(comb, float_frame)
  63. comb = DataFrame().combine_first(float_frame)
  64. tm.assert_frame_equal(comb, float_frame)
  65. comb = float_frame.combine_first(DataFrame(index=["faz", "boo"]))
  66. assert "faz" in comb.index
  67. # #2525
  68. df = DataFrame({"a": [1]}, index=[datetime(2012, 1, 1)])
  69. df2 = DataFrame(columns=["b"])
  70. result = df.combine_first(df2)
  71. assert "b" in result
  72. def test_combine_first_mixed_bug(self):
  73. idx = Index(["a", "b", "c", "e"])
  74. ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx)
  75. ser2 = Series(["a", "b", "c", "e"], index=idx)
  76. ser3 = Series([12, 4, 5, 97], index=idx)
  77. frame1 = DataFrame({"col0": ser1, "col2": ser2, "col3": ser3})
  78. idx = Index(["a", "b", "c", "f"])
  79. ser1 = Series([5.0, -9.0, 4.0, 100.0], index=idx)
  80. ser2 = Series(["a", "b", "c", "f"], index=idx)
  81. ser3 = Series([12, 4, 5, 97], index=idx)
  82. frame2 = DataFrame({"col1": ser1, "col2": ser2, "col5": ser3})
  83. combined = frame1.combine_first(frame2)
  84. assert len(combined.columns) == 5
  85. def test_combine_first_same_as_in_update(self):
  86. # gh 3016 (same as in update)
  87. df = DataFrame(
  88. [[1.0, 2.0, False, True], [4.0, 5.0, True, False]],
  89. columns=["A", "B", "bool1", "bool2"],
  90. )
  91. other = DataFrame([[45, 45]], index=[0], columns=["A", "B"])
  92. result = df.combine_first(other)
  93. tm.assert_frame_equal(result, df)
  94. df.loc[0, "A"] = np.nan
  95. result = df.combine_first(other)
  96. df.loc[0, "A"] = 45
  97. tm.assert_frame_equal(result, df)
  98. def test_combine_first_doc_example(self):
  99. # doc example
  100. df1 = DataFrame(
  101. {"A": [1.0, np.nan, 3.0, 5.0, np.nan], "B": [np.nan, 2.0, 3.0, np.nan, 6.0]}
  102. )
  103. df2 = DataFrame(
  104. {
  105. "A": [5.0, 2.0, 4.0, np.nan, 3.0, 7.0],
  106. "B": [np.nan, np.nan, 3.0, 4.0, 6.0, 8.0],
  107. }
  108. )
  109. result = df1.combine_first(df2)
  110. expected = DataFrame({"A": [1, 2, 3, 5, 3, 7.0], "B": [np.nan, 2, 3, 4, 6, 8]})
  111. tm.assert_frame_equal(result, expected)
  112. def test_combine_first_return_obj_type_with_bools(self):
  113. # GH3552
  114. df1 = DataFrame(
  115. [[np.nan, 3.0, True], [-4.6, np.nan, True], [np.nan, 7.0, False]]
  116. )
  117. df2 = DataFrame([[-42.6, np.nan, True], [-5.0, 1.6, False]], index=[1, 2])
  118. expected = Series([True, True, False], name=2, dtype=bool)
  119. result_12 = df1.combine_first(df2)[2]
  120. tm.assert_series_equal(result_12, expected)
  121. result_21 = df2.combine_first(df1)[2]
  122. tm.assert_series_equal(result_21, expected)
  123. @pytest.mark.parametrize(
  124. "data1, data2, data_expected",
  125. (
  126. (
  127. [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
  128. [pd.NaT, pd.NaT, pd.NaT],
  129. [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
  130. ),
  131. (
  132. [pd.NaT, pd.NaT, pd.NaT],
  133. [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
  134. [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
  135. ),
  136. (
  137. [datetime(2000, 1, 2), pd.NaT, pd.NaT],
  138. [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
  139. [datetime(2000, 1, 2), datetime(2000, 1, 2), datetime(2000, 1, 3)],
  140. ),
  141. (
  142. [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
  143. [datetime(2000, 1, 2), pd.NaT, pd.NaT],
  144. [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
  145. ),
  146. ),
  147. )
  148. def test_combine_first_convert_datatime_correctly(
  149. self, data1, data2, data_expected
  150. ):
  151. # GH 3593
  152. df1, df2 = DataFrame({"a": data1}), DataFrame({"a": data2})
  153. result = df1.combine_first(df2)
  154. expected = DataFrame({"a": data_expected})
  155. tm.assert_frame_equal(result, expected)
  156. def test_combine_first_align_nan(self):
  157. # GH 7509 (not fixed)
  158. dfa = DataFrame([[pd.Timestamp("2011-01-01"), 2]], columns=["a", "b"])
  159. dfb = DataFrame([[4], [5]], columns=["b"])
  160. assert dfa["a"].dtype == "datetime64[ns]"
  161. assert dfa["b"].dtype == "int64"
  162. res = dfa.combine_first(dfb)
  163. exp = DataFrame(
  164. {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2, 5]},
  165. columns=["a", "b"],
  166. )
  167. tm.assert_frame_equal(res, exp)
  168. assert res["a"].dtype == "datetime64[ns]"
  169. # TODO: this must be int64
  170. assert res["b"].dtype == "int64"
  171. res = dfa.iloc[:0].combine_first(dfb)
  172. exp = DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"])
  173. tm.assert_frame_equal(res, exp)
  174. # TODO: this must be datetime64
  175. assert res["a"].dtype == "float64"
  176. # TODO: this must be int64
  177. assert res["b"].dtype == "int64"
  178. def test_combine_first_timezone(self):
  179. # see gh-7630
  180. data1 = pd.to_datetime("20100101 01:01").tz_localize("UTC")
  181. df1 = DataFrame(
  182. columns=["UTCdatetime", "abc"],
  183. data=data1,
  184. index=pd.date_range("20140627", periods=1),
  185. )
  186. data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC")
  187. df2 = DataFrame(
  188. columns=["UTCdatetime", "xyz"],
  189. data=data2,
  190. index=pd.date_range("20140628", periods=1),
  191. )
  192. res = df2[["UTCdatetime"]].combine_first(df1)
  193. exp = DataFrame(
  194. {
  195. "UTCdatetime": [
  196. pd.Timestamp("2010-01-01 01:01", tz="UTC"),
  197. pd.Timestamp("2012-12-12 12:12", tz="UTC"),
  198. ],
  199. "abc": [pd.Timestamp("2010-01-01 01:01:00", tz="UTC"), pd.NaT],
  200. },
  201. columns=["UTCdatetime", "abc"],
  202. index=pd.date_range("20140627", periods=2, freq="D"),
  203. )
  204. assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]"
  205. assert res["abc"].dtype == "datetime64[ns, UTC]"
  206. tm.assert_frame_equal(res, exp)
  207. # see gh-10567
  208. dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="UTC")
  209. df1 = DataFrame({"DATE": dts1})
  210. dts2 = pd.date_range("2015-01-03", "2015-01-05", tz="UTC")
  211. df2 = DataFrame({"DATE": dts2})
  212. res = df1.combine_first(df2)
  213. tm.assert_frame_equal(res, df1)
  214. assert res["DATE"].dtype == "datetime64[ns, UTC]"
  215. dts1 = pd.DatetimeIndex(
  216. ["2011-01-01", "NaT", "2011-01-03", "2011-01-04"], tz="US/Eastern"
  217. )
  218. df1 = DataFrame({"DATE": dts1}, index=[1, 3, 5, 7])
  219. dts2 = pd.DatetimeIndex(
  220. ["2012-01-01", "2012-01-02", "2012-01-03"], tz="US/Eastern"
  221. )
  222. df2 = DataFrame({"DATE": dts2}, index=[2, 4, 5])
  223. res = df1.combine_first(df2)
  224. exp_dts = pd.DatetimeIndex(
  225. [
  226. "2011-01-01",
  227. "2012-01-01",
  228. "NaT",
  229. "2012-01-02",
  230. "2011-01-03",
  231. "2011-01-04",
  232. ],
  233. tz="US/Eastern",
  234. )
  235. exp = DataFrame({"DATE": exp_dts}, index=[1, 2, 3, 4, 5, 7])
  236. tm.assert_frame_equal(res, exp)
  237. # different tz
  238. dts1 = pd.date_range("2015-01-01", "2015-01-05", tz="US/Eastern")
  239. df1 = DataFrame({"DATE": dts1})
  240. dts2 = pd.date_range("2015-01-03", "2015-01-05")
  241. df2 = DataFrame({"DATE": dts2})
  242. # if df1 doesn't have NaN, keep its dtype
  243. res = df1.combine_first(df2)
  244. tm.assert_frame_equal(res, df1)
  245. assert res["DATE"].dtype == "datetime64[ns, US/Eastern]"
  246. dts1 = pd.date_range("2015-01-01", "2015-01-02", tz="US/Eastern")
  247. df1 = DataFrame({"DATE": dts1})
  248. dts2 = pd.date_range("2015-01-01", "2015-01-03")
  249. df2 = DataFrame({"DATE": dts2})
  250. res = df1.combine_first(df2)
  251. exp_dts = [
  252. pd.Timestamp("2015-01-01", tz="US/Eastern"),
  253. pd.Timestamp("2015-01-02", tz="US/Eastern"),
  254. pd.Timestamp("2015-01-03"),
  255. ]
  256. exp = DataFrame({"DATE": exp_dts})
  257. tm.assert_frame_equal(res, exp)
  258. assert res["DATE"].dtype == "object"
  259. def test_combine_first_timedelta(self):
  260. data1 = pd.TimedeltaIndex(["1 day", "NaT", "3 day", "4day"])
  261. df1 = DataFrame({"TD": data1}, index=[1, 3, 5, 7])
  262. data2 = pd.TimedeltaIndex(["10 day", "11 day", "12 day"])
  263. df2 = DataFrame({"TD": data2}, index=[2, 4, 5])
  264. res = df1.combine_first(df2)
  265. exp_dts = pd.TimedeltaIndex(
  266. ["1 day", "10 day", "NaT", "11 day", "3 day", "4 day"]
  267. )
  268. exp = DataFrame({"TD": exp_dts}, index=[1, 2, 3, 4, 5, 7])
  269. tm.assert_frame_equal(res, exp)
  270. assert res["TD"].dtype == "timedelta64[ns]"
  271. def test_combine_first_period(self):
  272. data1 = pd.PeriodIndex(["2011-01", "NaT", "2011-03", "2011-04"], freq="M")
  273. df1 = DataFrame({"P": data1}, index=[1, 3, 5, 7])
  274. data2 = pd.PeriodIndex(["2012-01-01", "2012-02", "2012-03"], freq="M")
  275. df2 = DataFrame({"P": data2}, index=[2, 4, 5])
  276. res = df1.combine_first(df2)
  277. exp_dts = pd.PeriodIndex(
  278. ["2011-01", "2012-01", "NaT", "2012-02", "2011-03", "2011-04"], freq="M"
  279. )
  280. exp = DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7])
  281. tm.assert_frame_equal(res, exp)
  282. assert res["P"].dtype == data1.dtype
  283. # different freq
  284. dts2 = pd.PeriodIndex(["2012-01-01", "2012-01-02", "2012-01-03"], freq="D")
  285. df2 = DataFrame({"P": dts2}, index=[2, 4, 5])
  286. res = df1.combine_first(df2)
  287. exp_dts = [
  288. pd.Period("2011-01", freq="M"),
  289. pd.Period("2012-01-01", freq="D"),
  290. pd.NaT,
  291. pd.Period("2012-01-02", freq="D"),
  292. pd.Period("2011-03", freq="M"),
  293. pd.Period("2011-04", freq="M"),
  294. ]
  295. exp = DataFrame({"P": exp_dts}, index=[1, 2, 3, 4, 5, 7])
  296. tm.assert_frame_equal(res, exp)
  297. assert res["P"].dtype == "object"
  298. def test_combine_first_int(self):
  299. # GH14687 - integer series that do no align exactly
  300. df1 = DataFrame({"a": [0, 1, 3, 5]}, dtype="int64")
  301. df2 = DataFrame({"a": [1, 4]}, dtype="int64")
  302. result_12 = df1.combine_first(df2)
  303. expected_12 = DataFrame({"a": [0, 1, 3, 5]})
  304. tm.assert_frame_equal(result_12, expected_12)
  305. result_21 = df2.combine_first(df1)
  306. expected_21 = DataFrame({"a": [1, 4, 3, 5]})
  307. tm.assert_frame_equal(result_21, expected_21)
  308. @pytest.mark.parametrize("val", [1, 1.0])
  309. def test_combine_first_with_asymmetric_other(self, val):
  310. # see gh-20699
  311. df1 = DataFrame({"isNum": [val]})
  312. df2 = DataFrame({"isBool": [True]})
  313. res = df1.combine_first(df2)
  314. exp = DataFrame({"isBool": [True], "isNum": [val]})
  315. tm.assert_frame_equal(res, exp)
  316. def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
  317. # GH: 37519
  318. df = DataFrame(
  319. {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
  320. )
  321. df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype)
  322. df.set_index(["a", "b"], inplace=True)
  323. df2.set_index(["a", "b"], inplace=True)
  324. result = df.combine_first(df2)
  325. expected = DataFrame(
  326. {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
  327. ).set_index(["a", "b"])
  328. tm.assert_frame_equal(result, expected)
  329. @pytest.mark.parametrize(
  330. "scalar1, scalar2",
  331. [
  332. (datetime(2020, 1, 1), datetime(2020, 1, 2)),
  333. (pd.Period("2020-01-01", "D"), pd.Period("2020-01-02", "D")),
  334. (pd.Timedelta("89 days"), pd.Timedelta("60 min")),
  335. (pd.Interval(left=0, right=1), pd.Interval(left=2, right=3, closed="left")),
  336. ],
  337. )
  338. def test_combine_first_timestamp_bug(scalar1, scalar2, nulls_fixture):
  339. # GH28481
  340. na_value = nulls_fixture
  341. frame = DataFrame([[na_value, na_value]], columns=["a", "b"])
  342. other = DataFrame([[scalar1, scalar2]], columns=["b", "c"])
  343. common_dtype = find_common_type([frame.dtypes["b"], other.dtypes["b"]])
  344. if is_dtype_equal(common_dtype, "object") or frame.dtypes["b"] == other.dtypes["b"]:
  345. val = scalar1
  346. else:
  347. val = na_value
  348. result = frame.combine_first(other)
  349. expected = DataFrame([[na_value, val, scalar2]], columns=["a", "b", "c"])
  350. expected["b"] = expected["b"].astype(common_dtype)
  351. tm.assert_frame_equal(result, expected)
  352. def test_combine_first_timestamp_bug_NaT():
  353. # GH28481
  354. frame = DataFrame([[pd.NaT, pd.NaT]], columns=["a", "b"])
  355. other = DataFrame(
  356. [[datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["b", "c"]
  357. )
  358. result = frame.combine_first(other)
  359. expected = DataFrame(
  360. [[pd.NaT, datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["a", "b", "c"]
  361. )
  362. tm.assert_frame_equal(result, expected)
  363. def test_combine_first_with_nan_multiindex():
  364. # gh-36562
  365. mi1 = MultiIndex.from_arrays(
  366. [["b", "b", "c", "a", "b", np.nan], [1, 2, 3, 4, 5, 6]], names=["a", "b"]
  367. )
  368. df = DataFrame({"c": [1, 1, 1, 1, 1, 1]}, index=mi1)
  369. mi2 = MultiIndex.from_arrays(
  370. [["a", "b", "c", "a", "b", "d"], [1, 1, 1, 1, 1, 1]], names=["a", "b"]
  371. )
  372. s = Series([1, 2, 3, 4, 5, 6], index=mi2)
  373. res = df.combine_first(DataFrame({"d": s}))
  374. mi_expected = MultiIndex.from_arrays(
  375. [
  376. ["a", "a", "a", "b", "b", "b", "b", "c", "c", "d", np.nan],
  377. [1, 1, 4, 1, 1, 2, 5, 1, 3, 1, 6],
  378. ],
  379. names=["a", "b"],
  380. )
  381. expected = DataFrame(
  382. {
  383. "c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1],
  384. "d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan],
  385. },
  386. index=mi_expected,
  387. )
  388. tm.assert_frame_equal(res, expected)
  389. def test_combine_preserve_dtypes():
  390. # GH7509
  391. a_column = Series(["a", "b"], index=range(2))
  392. b_column = Series(range(2), index=range(2))
  393. df1 = DataFrame({"A": a_column, "B": b_column})
  394. c_column = Series(["a", "b"], index=range(5, 7))
  395. b_column = Series(range(-1, 1), index=range(5, 7))
  396. df2 = DataFrame({"B": b_column, "C": c_column})
  397. expected = DataFrame(
  398. {
  399. "A": ["a", "b", np.nan, np.nan],
  400. "B": [0, 1, -1, 0],
  401. "C": [np.nan, np.nan, "a", "b"],
  402. },
  403. index=[0, 1, 5, 6],
  404. )
  405. combined = df1.combine_first(df2)
  406. tm.assert_frame_equal(combined, expected)
  407. def test_combine_first_duplicates_rows_for_nan_index_values():
  408. # GH39881
  409. df1 = DataFrame(
  410. {"x": [9, 10, 11]},
  411. index=MultiIndex.from_arrays([[1, 2, 3], [np.nan, 5, 6]], names=["a", "b"]),
  412. )
  413. df2 = DataFrame(
  414. {"y": [12, 13, 14]},
  415. index=MultiIndex.from_arrays([[1, 2, 4], [np.nan, 5, 7]], names=["a", "b"]),
  416. )
  417. expected = DataFrame(
  418. {
  419. "x": [9.0, 10.0, 11.0, np.nan],
  420. "y": [12.0, 13.0, np.nan, 14.0],
  421. },
  422. index=MultiIndex.from_arrays(
  423. [[1, 2, 3, 4], [np.nan, 5.0, 6.0, 7.0]], names=["a", "b"]
  424. ),
  425. )
  426. combined = df1.combine_first(df2)
  427. tm.assert_frame_equal(combined, expected)
  428. def test_combine_first_int64_not_cast_to_float64():
  429. # GH 28613
  430. df_1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  431. df_2 = DataFrame({"A": [1, 20, 30], "B": [40, 50, 60], "C": [12, 34, 65]})
  432. result = df_1.combine_first(df_2)
  433. expected = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [12, 34, 65]})
  434. tm.assert_frame_equal(result, expected)
  435. def test_midx_losing_dtype():
  436. # GH#49830
  437. midx = MultiIndex.from_arrays([[0, 0], [np.nan, np.nan]])
  438. midx2 = MultiIndex.from_arrays([[1, 1], [np.nan, np.nan]])
  439. df1 = DataFrame({"a": [None, 4]}, index=midx)
  440. df2 = DataFrame({"a": [3, 3]}, index=midx2)
  441. result = df1.combine_first(df2)
  442. expected_midx = MultiIndex.from_arrays(
  443. [[0, 0, 1, 1], [np.nan, np.nan, np.nan, np.nan]]
  444. )
  445. expected = DataFrame({"a": [np.nan, 4, 3, 3]}, index=expected_midx)
  446. tm.assert_frame_equal(result, expected)