123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860 |
- from datetime import (
- date,
- datetime,
- )
- import itertools
- import numpy as np
- import pytest
- from pandas.compat import pa_version_under7p0
- from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
- import pandas as pd
- from pandas import (
- Index,
- MultiIndex,
- Series,
- Timestamp,
- date_range,
- )
- import pandas._testing as tm
- def test_constructor_single_level():
- result = MultiIndex(
- levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
- )
- assert isinstance(result, MultiIndex)
- expected = Index(["foo", "bar", "baz", "qux"], name="first")
- tm.assert_index_equal(result.levels[0], expected)
- assert result.names == ["first"]
- def test_constructor_no_levels():
- msg = "non-zero number of levels/codes"
- with pytest.raises(ValueError, match=msg):
- MultiIndex(levels=[], codes=[])
- msg = "Must pass both levels and codes"
- with pytest.raises(TypeError, match=msg):
- MultiIndex(levels=[])
- with pytest.raises(TypeError, match=msg):
- MultiIndex(codes=[])
- def test_constructor_nonhashable_names():
- # GH 20527
- levels = [[1, 2], ["one", "two"]]
- codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
- names = (["foo"], ["bar"])
- msg = r"MultiIndex\.name must be a hashable type"
- with pytest.raises(TypeError, match=msg):
- MultiIndex(levels=levels, codes=codes, names=names)
- # With .rename()
- mi = MultiIndex(
- levels=[[1, 2], ["one", "two"]],
- codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
- names=("foo", "bar"),
- )
- renamed = [["foor"], ["barr"]]
- with pytest.raises(TypeError, match=msg):
- mi.rename(names=renamed)
- # With .set_names()
- with pytest.raises(TypeError, match=msg):
- mi.set_names(names=renamed)
- def test_constructor_mismatched_codes_levels(idx):
- codes = [np.array([1]), np.array([2]), np.array([3])]
- levels = ["a"]
- msg = "Length of levels and codes must be the same"
- with pytest.raises(ValueError, match=msg):
- MultiIndex(levels=levels, codes=codes)
- length_error = (
- r"On level 0, code max \(3\) >= length of level \(1\)\. "
- "NOTE: this index is in an inconsistent state"
- )
- label_error = r"Unequal code lengths: \[4, 2\]"
- code_value_error = r"On level 0, code value \(-2\) < -1"
- # important to check that it's looking at the right thing.
- with pytest.raises(ValueError, match=length_error):
- MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
- with pytest.raises(ValueError, match=label_error):
- MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
- # external API
- with pytest.raises(ValueError, match=length_error):
- idx.copy().set_levels([["a"], ["b"]])
- with pytest.raises(ValueError, match=label_error):
- idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
- # test set_codes with verify_integrity=False
- # the setting should not raise any value error
- idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
- # code value smaller than -1
- with pytest.raises(ValueError, match=code_value_error):
- MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
- def test_na_levels():
- # GH26408
- # test if codes are re-assigned value -1 for levels
- # with missing values (NaN, NaT, None)
- result = MultiIndex(
- levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
- )
- expected = MultiIndex(
- levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
- )
- tm.assert_index_equal(result, expected)
- result = MultiIndex(
- levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
- )
- expected = MultiIndex(
- levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
- )
- tm.assert_index_equal(result, expected)
- # verify set_levels and set_codes
- result = MultiIndex(
- levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
- ).set_levels([[np.nan, "s", pd.NaT, 128, None]])
- tm.assert_index_equal(result, expected)
- result = MultiIndex(
- levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
- ).set_codes([[0, -1, 1, 2, 3, 4]])
- tm.assert_index_equal(result, expected)
- def test_copy_in_constructor():
- levels = np.array(["a", "b", "c"])
- codes = np.array([1, 1, 2, 0, 0, 1, 1])
- val = codes[0]
- mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
- assert mi.codes[0][0] == val
- codes[0] = 15
- assert mi.codes[0][0] == val
- val = levels[0]
- levels[0] = "PANDA"
- assert mi.levels[0][0] == val
- # ----------------------------------------------------------------------------
- # from_arrays
- # ----------------------------------------------------------------------------
- def test_from_arrays(idx):
- arrays = [
- np.asarray(lev).take(level_codes)
- for lev, level_codes in zip(idx.levels, idx.codes)
- ]
- # list of arrays as input
- result = MultiIndex.from_arrays(arrays, names=idx.names)
- tm.assert_index_equal(result, idx)
- # infer correctly
- result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
- assert result.levels[0].equals(Index([Timestamp("20130101")]))
- assert result.levels[1].equals(Index(["a", "b"]))
- def test_from_arrays_iterator(idx):
- # GH 18434
- arrays = [
- np.asarray(lev).take(level_codes)
- for lev, level_codes in zip(idx.levels, idx.codes)
- ]
- # iterator as input
- result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
- tm.assert_index_equal(result, idx)
- # invalid iterator input
- msg = "Input must be a list / sequence of array-likes."
- with pytest.raises(TypeError, match=msg):
- MultiIndex.from_arrays(0)
- def test_from_arrays_tuples(idx):
- arrays = tuple(
- tuple(np.asarray(lev).take(level_codes))
- for lev, level_codes in zip(idx.levels, idx.codes)
- )
- # tuple of tuples as input
- result = MultiIndex.from_arrays(arrays, names=idx.names)
- tm.assert_index_equal(result, idx)
- @pytest.mark.parametrize(
- ("idx1", "idx2"),
- [
- (
- pd.period_range("2011-01-01", freq="D", periods=3),
- pd.period_range("2015-01-01", freq="H", periods=3),
- ),
- (
- date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
- date_range("2015-01-01 10:00", freq="H", periods=3, tz="Asia/Tokyo"),
- ),
- (
- pd.timedelta_range("1 days", freq="D", periods=3),
- pd.timedelta_range("2 hours", freq="H", periods=3),
- ),
- ],
- )
- def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2):
- result = MultiIndex.from_arrays([idx1, idx2])
- tm.assert_index_equal(result.get_level_values(0), idx1)
- tm.assert_index_equal(result.get_level_values(1), idx2)
- result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
- tm.assert_index_equal(result2.get_level_values(0), idx1)
- tm.assert_index_equal(result2.get_level_values(1), idx2)
- tm.assert_index_equal(result, result2)
- def test_from_arrays_index_datetimelike_mixed():
- idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
- idx2 = date_range("2015-01-01 10:00", freq="H", periods=3)
- idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
- idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
- result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
- tm.assert_index_equal(result.get_level_values(0), idx1)
- tm.assert_index_equal(result.get_level_values(1), idx2)
- tm.assert_index_equal(result.get_level_values(2), idx3)
- tm.assert_index_equal(result.get_level_values(3), idx4)
- result2 = MultiIndex.from_arrays(
- [Series(idx1), Series(idx2), Series(idx3), Series(idx4)]
- )
- tm.assert_index_equal(result2.get_level_values(0), idx1)
- tm.assert_index_equal(result2.get_level_values(1), idx2)
- tm.assert_index_equal(result2.get_level_values(2), idx3)
- tm.assert_index_equal(result2.get_level_values(3), idx4)
- tm.assert_index_equal(result, result2)
- def test_from_arrays_index_series_categorical():
- # GH13743
- idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
- idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
- result = MultiIndex.from_arrays([idx1, idx2])
- tm.assert_index_equal(result.get_level_values(0), idx1)
- tm.assert_index_equal(result.get_level_values(1), idx2)
- result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
- tm.assert_index_equal(result2.get_level_values(0), idx1)
- tm.assert_index_equal(result2.get_level_values(1), idx2)
- result3 = MultiIndex.from_arrays([idx1.values, idx2.values])
- tm.assert_index_equal(result3.get_level_values(0), idx1)
- tm.assert_index_equal(result3.get_level_values(1), idx2)
- def test_from_arrays_empty():
- # 0 levels
- msg = "Must pass non-zero number of levels/codes"
- with pytest.raises(ValueError, match=msg):
- MultiIndex.from_arrays(arrays=[])
- # 1 level
- result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
- assert isinstance(result, MultiIndex)
- expected = Index([], name="A")
- tm.assert_index_equal(result.levels[0], expected)
- assert result.names == ["A"]
- # N levels
- for N in [2, 3]:
- arrays = [[]] * N
- names = list("ABC")[:N]
- result = MultiIndex.from_arrays(arrays=arrays, names=names)
- expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "invalid_sequence_of_arrays",
- [
- 1,
- [1],
- [1, 2],
- [[1], 2],
- [1, [2]],
- "a",
- ["a"],
- ["a", "b"],
- [["a"], "b"],
- (1,),
- (1, 2),
- ([1], 2),
- (1, [2]),
- "a",
- ("a",),
- ("a", "b"),
- (["a"], "b"),
- [(1,), 2],
- [1, (2,)],
- [("a",), "b"],
- ((1,), 2),
- (1, (2,)),
- (("a",), "b"),
- ],
- )
- def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
- msg = "Input must be a list / sequence of array-likes"
- with pytest.raises(TypeError, match=msg):
- MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
- @pytest.mark.parametrize(
- "idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
- )
- def test_from_arrays_different_lengths(idx1, idx2):
- # see gh-13599
- msg = "^all arrays must be same length$"
- with pytest.raises(ValueError, match=msg):
- MultiIndex.from_arrays([idx1, idx2])
- def test_from_arrays_respects_none_names():
- # GH27292
- a = Series([1, 2, 3], name="foo")
- b = Series(["a", "b", "c"], name="bar")
- result = MultiIndex.from_arrays([a, b], names=None)
- expected = MultiIndex(
- levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
- )
- tm.assert_index_equal(result, expected)
- # ----------------------------------------------------------------------------
- # from_tuples
- # ----------------------------------------------------------------------------
- def test_from_tuples():
- msg = "Cannot infer number of levels from empty list"
- with pytest.raises(TypeError, match=msg):
- MultiIndex.from_tuples([])
- expected = MultiIndex(
- levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
- )
- # input tuples
- result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
- tm.assert_index_equal(result, expected)
- def test_from_tuples_iterator():
- # GH 18434
- # input iterator for tuples
- expected = MultiIndex(
- levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
- )
- result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
- tm.assert_index_equal(result, expected)
- # input non-iterables
- msg = "Input must be a list / sequence of tuple-likes."
- with pytest.raises(TypeError, match=msg):
- MultiIndex.from_tuples(0)
- def test_from_tuples_empty():
- # GH 16777
- result = MultiIndex.from_tuples([], names=["a", "b"])
- expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
- tm.assert_index_equal(result, expected)
- def test_from_tuples_index_values(idx):
- result = MultiIndex.from_tuples(idx)
- assert (result.values == idx.values).all()
- def test_tuples_with_name_string():
- # GH 15110 and GH 14848
- li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
- msg = "Names should be list-like for a MultiIndex"
- with pytest.raises(ValueError, match=msg):
- Index(li, name="abc")
- with pytest.raises(ValueError, match=msg):
- Index(li, name="a")
- def test_from_tuples_with_tuple_label():
- # GH 15457
- expected = pd.DataFrame(
- [[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
- ).set_index(["a", "b"])
- idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
- result = pd.DataFrame([2, 3], columns=["c"], index=idx)
- tm.assert_frame_equal(expected, result)
- # ----------------------------------------------------------------------------
- # from_product
- # ----------------------------------------------------------------------------
- def test_from_product_empty_zero_levels():
- # 0 levels
- msg = "Must pass non-zero number of levels/codes"
- with pytest.raises(ValueError, match=msg):
- MultiIndex.from_product([])
- def test_from_product_empty_one_level():
- result = MultiIndex.from_product([[]], names=["A"])
- expected = Index([], name="A")
- tm.assert_index_equal(result.levels[0], expected)
- assert result.names == ["A"]
- @pytest.mark.parametrize(
- "first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
- )
- def test_from_product_empty_two_levels(first, second):
- names = ["A", "B"]
- result = MultiIndex.from_product([first, second], names=names)
- expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("N", list(range(4)))
- def test_from_product_empty_three_levels(N):
- # GH12258
- names = ["A", "B", "C"]
- lvl2 = list(range(N))
- result = MultiIndex.from_product([[], lvl2, []], names=names)
- expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
- )
- def test_from_product_invalid_input(invalid_input):
- msg = r"Input must be a list / sequence of iterables|Input must be list-like"
- with pytest.raises(TypeError, match=msg):
- MultiIndex.from_product(iterables=invalid_input)
- def test_from_product_datetimeindex():
- dt_index = date_range("2000-01-01", periods=2)
- mi = MultiIndex.from_product([[1, 2], dt_index])
- etalon = construct_1d_object_array_from_listlike(
- [
- (1, Timestamp("2000-01-01")),
- (1, Timestamp("2000-01-02")),
- (2, Timestamp("2000-01-01")),
- (2, Timestamp("2000-01-02")),
- ]
- )
- tm.assert_numpy_array_equal(mi.values, etalon)
- def test_from_product_rangeindex():
- # RangeIndex is preserved by factorize, so preserved in levels
- rng = Index(range(5))
- other = ["a", "b"]
- mi = MultiIndex.from_product([rng, other])
- tm.assert_index_equal(mi._levels[0], rng, exact=True)
- @pytest.mark.parametrize("ordered", [False, True])
- @pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values])
- def test_from_product_index_series_categorical(ordered, f):
- # GH13743
- first = ["foo", "bar"]
- idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
- expected = pd.CategoricalIndex(
- list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
- )
- result = MultiIndex.from_product([first, f(idx)])
- tm.assert_index_equal(result.get_level_values(1), expected)
- def test_from_product():
- first = ["foo", "bar", "buz"]
- second = ["a", "b", "c"]
- names = ["first", "second"]
- result = MultiIndex.from_product([first, second], names=names)
- tuples = [
- ("foo", "a"),
- ("foo", "b"),
- ("foo", "c"),
- ("bar", "a"),
- ("bar", "b"),
- ("bar", "c"),
- ("buz", "a"),
- ("buz", "b"),
- ("buz", "c"),
- ]
- expected = MultiIndex.from_tuples(tuples, names=names)
- tm.assert_index_equal(result, expected)
- def test_from_product_iterator():
- # GH 18434
- first = ["foo", "bar", "buz"]
- second = ["a", "b", "c"]
- names = ["first", "second"]
- tuples = [
- ("foo", "a"),
- ("foo", "b"),
- ("foo", "c"),
- ("bar", "a"),
- ("bar", "b"),
- ("bar", "c"),
- ("buz", "a"),
- ("buz", "b"),
- ("buz", "c"),
- ]
- expected = MultiIndex.from_tuples(tuples, names=names)
- # iterator as input
- result = MultiIndex.from_product(iter([first, second]), names=names)
- tm.assert_index_equal(result, expected)
- # Invalid non-iterable input
- msg = "Input must be a list / sequence of iterables."
- with pytest.raises(TypeError, match=msg):
- MultiIndex.from_product(0)
- @pytest.mark.parametrize(
- "a, b, expected_names",
- [
- (
- Series([1, 2, 3], name="foo"),
- Series(["a", "b"], name="bar"),
- ["foo", "bar"],
- ),
- (Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
- ([1, 2, 3], ["a", "b"], None),
- ],
- )
- def test_from_product_infer_names(a, b, expected_names):
- # GH27292
- result = MultiIndex.from_product([a, b])
- expected = MultiIndex(
- levels=[[1, 2, 3], ["a", "b"]],
- codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
- names=expected_names,
- )
- tm.assert_index_equal(result, expected)
- def test_from_product_respects_none_names():
- # GH27292
- a = Series([1, 2, 3], name="foo")
- b = Series(["a", "b"], name="bar")
- result = MultiIndex.from_product([a, b], names=None)
- expected = MultiIndex(
- levels=[[1, 2, 3], ["a", "b"]],
- codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
- names=None,
- )
- tm.assert_index_equal(result, expected)
- def test_from_product_readonly():
- # GH#15286 passing read-only array to from_product
- a = np.array(range(3))
- b = ["a", "b"]
- expected = MultiIndex.from_product([a, b])
- a.setflags(write=False)
- result = MultiIndex.from_product([a, b])
- tm.assert_index_equal(result, expected)
- def test_create_index_existing_name(idx):
- # GH11193, when an existing index is passed, and a new name is not
- # specified, the new index should inherit the previous object name
- index = idx
- index.names = ["foo", "bar"]
- result = Index(index)
- expected = Index(
- Index(
- [
- ("foo", "one"),
- ("foo", "two"),
- ("bar", "one"),
- ("baz", "two"),
- ("qux", "one"),
- ("qux", "two"),
- ],
- dtype="object",
- )
- )
- tm.assert_index_equal(result, expected)
- result = Index(index, name="A")
- expected = Index(
- Index(
- [
- ("foo", "one"),
- ("foo", "two"),
- ("bar", "one"),
- ("baz", "two"),
- ("qux", "one"),
- ("qux", "two"),
- ],
- dtype="object",
- ),
- name="A",
- )
- tm.assert_index_equal(result, expected)
- # ----------------------------------------------------------------------------
- # from_frame
- # ----------------------------------------------------------------------------
- def test_from_frame():
- # GH 22420
- df = pd.DataFrame(
- [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
- )
- expected = MultiIndex.from_tuples(
- [("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
- )
- result = MultiIndex.from_frame(df)
- tm.assert_index_equal(expected, result)
- @pytest.mark.skipif(pa_version_under7p0, reason="minimum pyarrow not installed")
- def test_from_frame_missing_values_multiIndex():
- # GH 39984
- import pyarrow as pa
- df = pd.DataFrame(
- {
- "a": Series([1, 2, None], dtype="Int64"),
- "b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
- }
- )
- multi_indexed = MultiIndex.from_frame(df)
- expected = MultiIndex.from_arrays(
- [
- Series([1, 2, None]).astype("Int64"),
- pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
- ],
- names=["a", "b"],
- )
- tm.assert_index_equal(multi_indexed, expected)
- @pytest.mark.parametrize(
- "non_frame",
- [
- Series([1, 2, 3, 4]),
- [1, 2, 3, 4],
- [[1, 2], [3, 4], [5, 6]],
- Index([1, 2, 3, 4]),
- np.array([[1, 2], [3, 4], [5, 6]]),
- 27,
- ],
- )
- def test_from_frame_error(non_frame):
- # GH 22420
- with pytest.raises(TypeError, match="Input must be a DataFrame"):
- MultiIndex.from_frame(non_frame)
- def test_from_frame_dtype_fidelity():
- # GH 22420
- df = pd.DataFrame(
- {
- "dates": date_range("19910905", periods=6, tz="US/Eastern"),
- "a": [1, 1, 1, 2, 2, 2],
- "b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
- "c": ["x", "x", "y", "z", "x", "y"],
- }
- )
- original_dtypes = df.dtypes.to_dict()
- expected_mi = MultiIndex.from_arrays(
- [
- date_range("19910905", periods=6, tz="US/Eastern"),
- [1, 1, 1, 2, 2, 2],
- pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
- ["x", "x", "y", "z", "x", "y"],
- ],
- names=["dates", "a", "b", "c"],
- )
- mi = MultiIndex.from_frame(df)
- mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
- tm.assert_index_equal(expected_mi, mi)
- assert original_dtypes == mi_dtypes
- @pytest.mark.parametrize(
- "names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
- )
- def test_from_frame_valid_names(names_in, names_out):
- # GH 22420
- df = pd.DataFrame(
- [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
- columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
- )
- mi = MultiIndex.from_frame(df, names=names_in)
- assert mi.names == names_out
- @pytest.mark.parametrize(
- "names,expected_error_msg",
- [
- ("bad_input", "Names should be list-like for a MultiIndex"),
- (["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
- ],
- )
- def test_from_frame_invalid_names(names, expected_error_msg):
- # GH 22420
- df = pd.DataFrame(
- [["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
- columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
- )
- with pytest.raises(ValueError, match=expected_error_msg):
- MultiIndex.from_frame(df, names=names)
- def test_index_equal_empty_iterable():
- # #16844
- a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
- b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
- tm.assert_index_equal(a, b)
- def test_raise_invalid_sortorder():
- # Test that the MultiIndex constructor raise when a incorrect sortorder is given
- # GH#28518
- levels = [[0, 1], [0, 1, 2]]
- # Correct sortorder
- MultiIndex(
- levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
- )
- with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
- MultiIndex(
- levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
- )
- with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
- MultiIndex(
- levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
- )
- def test_datetimeindex():
- idx1 = pd.DatetimeIndex(
- ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
- )
- idx2 = date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern")
- idx = MultiIndex.from_arrays([idx1, idx2])
- expected1 = pd.DatetimeIndex(
- ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
- )
- tm.assert_index_equal(idx.levels[0], expected1)
- tm.assert_index_equal(idx.levels[1], idx2)
- # from datetime combos
- # GH 7888
- date1 = np.datetime64("today")
- date2 = datetime.today()
- date3 = Timestamp.today()
- for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
- index = MultiIndex.from_product([[d1], [d2]])
- assert isinstance(index.levels[0], pd.DatetimeIndex)
- assert isinstance(index.levels[1], pd.DatetimeIndex)
- # but NOT date objects, matching Index behavior
- date4 = date.today()
- index = MultiIndex.from_product([[date4], [date2]])
- assert not isinstance(index.levels[0], pd.DatetimeIndex)
- assert isinstance(index.levels[1], pd.DatetimeIndex)
- def test_constructor_with_tz():
- index = pd.DatetimeIndex(
- ["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
- )
- columns = pd.DatetimeIndex(
- ["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
- )
- result = MultiIndex.from_arrays([index, columns])
- assert result.names == ["dt1", "dt2"]
- tm.assert_index_equal(result.levels[0], index)
- tm.assert_index_equal(result.levels[1], columns)
- result = MultiIndex.from_arrays([Series(index), Series(columns)])
- assert result.names == ["dt1", "dt2"]
- tm.assert_index_equal(result.levels[0], index)
- tm.assert_index_equal(result.levels[1], columns)
- def test_multiindex_inference_consistency():
- # check that inference behavior matches the base class
- v = date.today()
- arr = [v, v]
- idx = Index(arr)
- assert idx.dtype == object
- mi = MultiIndex.from_arrays([arr])
- lev = mi.levels[0]
- assert lev.dtype == object
- mi = MultiIndex.from_product([arr])
- lev = mi.levels[0]
- assert lev.dtype == object
- mi = MultiIndex.from_tuples([(x,) for x in arr])
- lev = mi.levels[0]
- assert lev.dtype == object
- def test_dtype_representation():
- # GH#46900
- pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
- result = pmidx.dtypes
- expected = Series(
- ["int64", "object"], index=MultiIndex.from_tuples([("a", "b"), ("c", "d")])
- )
- tm.assert_series_equal(result, expected)
|