123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698 |
- """
- See also: test_reindex.py:TestReindexSetIndex
- """
- from datetime import (
- datetime,
- timedelta,
- )
- import numpy as np
- import pytest
- from pandas import (
- Categorical,
- DataFrame,
- DatetimeIndex,
- Index,
- MultiIndex,
- Series,
- date_range,
- period_range,
- to_datetime,
- )
- import pandas._testing as tm
- class TestSetIndex:
- def test_set_index_multiindex(self):
- # segfault in GH#3308
- d = {"t1": [2, 2.5, 3], "t2": [4, 5, 6]}
- df = DataFrame(d)
- tuples = [(0, 1), (0, 2), (1, 2)]
- df["tuples"] = tuples
- index = MultiIndex.from_tuples(df["tuples"])
- # it works!
- df.set_index(index)
- def test_set_index_empty_column(self):
- # GH#1971
- df = DataFrame(
- [
- {"a": 1, "p": 0},
- {"a": 2, "m": 10},
- {"a": 3, "m": 11, "p": 20},
- {"a": 4, "m": 12, "p": 21},
- ],
- columns=["a", "m", "p", "x"],
- )
- result = df.set_index(["a", "x"])
- expected = df[["m", "p"]]
- expected.index = MultiIndex.from_arrays([df["a"], df["x"]], names=["a", "x"])
- tm.assert_frame_equal(result, expected)
- def test_set_index_empty_dataframe(self):
- # GH#38419
- df1 = DataFrame(
- {"a": Series(dtype="datetime64[ns]"), "b": Series(dtype="int64"), "c": []}
- )
- df2 = df1.set_index(["a", "b"])
- result = df2.index.to_frame().dtypes
- expected = df1[["a", "b"]].dtypes
- tm.assert_series_equal(result, expected)
- def test_set_index_multiindexcolumns(self):
- columns = MultiIndex.from_tuples([("foo", 1), ("foo", 2), ("bar", 1)])
- df = DataFrame(np.random.randn(3, 3), columns=columns)
- result = df.set_index(df.columns[0])
- expected = df.iloc[:, 1:]
- expected.index = df.iloc[:, 0].values
- expected.index.names = [df.columns[0]]
- tm.assert_frame_equal(result, expected)
- def test_set_index_timezone(self):
- # GH#12358
- # tz-aware Series should retain the tz
- idx = DatetimeIndex(["2014-01-01 10:10:10"], tz="UTC").tz_convert("Europe/Rome")
- df = DataFrame({"A": idx})
- assert df.set_index(idx).index[0].hour == 11
- assert DatetimeIndex(Series(df.A))[0].hour == 11
- assert df.set_index(df.A).index[0].hour == 11
- def test_set_index_cast_datetimeindex(self):
- df = DataFrame(
- {
- "A": [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)],
- "B": np.random.randn(1000),
- }
- )
- idf = df.set_index("A")
- assert isinstance(idf.index, DatetimeIndex)
- def test_set_index_dst(self):
- di = date_range("2006-10-29 00:00:00", periods=3, freq="H", tz="US/Pacific")
- df = DataFrame(data={"a": [0, 1, 2], "b": [3, 4, 5]}, index=di).reset_index()
- # single level
- res = df.set_index("index")
- exp = DataFrame(
- data={"a": [0, 1, 2], "b": [3, 4, 5]},
- index=Index(di, name="index"),
- )
- exp.index = exp.index._with_freq(None)
- tm.assert_frame_equal(res, exp)
- # GH#12920
- res = df.set_index(["index", "a"])
- exp_index = MultiIndex.from_arrays([di, [0, 1, 2]], names=["index", "a"])
- exp = DataFrame({"b": [3, 4, 5]}, index=exp_index)
- tm.assert_frame_equal(res, exp)
- def test_set_index(self, float_string_frame):
- df = float_string_frame
- idx = Index(np.arange(len(df))[::-1])
- df = df.set_index(idx)
- tm.assert_index_equal(df.index, idx)
- with pytest.raises(ValueError, match="Length mismatch"):
- df.set_index(idx[::2])
- def test_set_index_names(self):
- df = tm.makeDataFrame()
- df.index.name = "name"
- assert df.set_index(df.index).index.names == ["name"]
- mi = MultiIndex.from_arrays(df[["A", "B"]].T.values, names=["A", "B"])
- mi2 = MultiIndex.from_arrays(
- df[["A", "B", "A", "B"]].T.values, names=["A", "B", "C", "D"]
- )
- df = df.set_index(["A", "B"])
- assert df.set_index(df.index).index.names == ["A", "B"]
- # Check that set_index isn't converting a MultiIndex into an Index
- assert isinstance(df.set_index(df.index).index, MultiIndex)
- # Check actual equality
- tm.assert_index_equal(df.set_index(df.index).index, mi)
- idx2 = df.index.rename(["C", "D"])
- # Check that [MultiIndex, MultiIndex] yields a MultiIndex rather
- # than a pair of tuples
- assert isinstance(df.set_index([df.index, idx2]).index, MultiIndex)
- # Check equality
- tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2)
- # A has duplicate values, C does not
- @pytest.mark.parametrize("keys", ["A", "C", ["A", "B"], ("tuple", "as", "label")])
- @pytest.mark.parametrize("inplace", [True, False])
- @pytest.mark.parametrize("drop", [True, False])
- def test_set_index_drop_inplace(self, frame_of_index_cols, drop, inplace, keys):
- df = frame_of_index_cols
- if isinstance(keys, list):
- idx = MultiIndex.from_arrays([df[x] for x in keys], names=keys)
- else:
- idx = Index(df[keys], name=keys)
- expected = df.drop(keys, axis=1) if drop else df
- expected.index = idx
- if inplace:
- result = df.copy()
- return_value = result.set_index(keys, drop=drop, inplace=True)
- assert return_value is None
- else:
- result = df.set_index(keys, drop=drop)
- tm.assert_frame_equal(result, expected)
- # A has duplicate values, C does not
- @pytest.mark.parametrize("keys", ["A", "C", ["A", "B"], ("tuple", "as", "label")])
- @pytest.mark.parametrize("drop", [True, False])
- def test_set_index_append(self, frame_of_index_cols, drop, keys):
- df = frame_of_index_cols
- keys = keys if isinstance(keys, list) else [keys]
- idx = MultiIndex.from_arrays(
- [df.index] + [df[x] for x in keys], names=[None] + keys
- )
- expected = df.drop(keys, axis=1) if drop else df.copy()
- expected.index = idx
- result = df.set_index(keys, drop=drop, append=True)
- tm.assert_frame_equal(result, expected)
- # A has duplicate values, C does not
- @pytest.mark.parametrize("keys", ["A", "C", ["A", "B"], ("tuple", "as", "label")])
- @pytest.mark.parametrize("drop", [True, False])
- def test_set_index_append_to_multiindex(self, frame_of_index_cols, drop, keys):
- # append to existing multiindex
- df = frame_of_index_cols.set_index(["D"], drop=drop, append=True)
- keys = keys if isinstance(keys, list) else [keys]
- expected = frame_of_index_cols.set_index(["D"] + keys, drop=drop, append=True)
- result = df.set_index(keys, drop=drop, append=True)
- tm.assert_frame_equal(result, expected)
- def test_set_index_after_mutation(self):
- # GH#1590
- df = DataFrame({"val": [0, 1, 2], "key": ["a", "b", "c"]})
- expected = DataFrame({"val": [1, 2]}, Index(["b", "c"], name="key"))
- df2 = df.loc[df.index.map(lambda indx: indx >= 1)]
- result = df2.set_index("key")
- tm.assert_frame_equal(result, expected)
- # MultiIndex constructor does not work directly on Series -> lambda
- # Add list-of-list constructor because list is ambiguous -> lambda
- # also test index name if append=True (name is duplicate here for B)
- @pytest.mark.parametrize(
- "box",
- [
- Series,
- Index,
- np.array,
- list,
- lambda x: [list(x)],
- lambda x: MultiIndex.from_arrays([x]),
- ],
- )
- @pytest.mark.parametrize(
- "append, index_name", [(True, None), (True, "B"), (True, "test"), (False, None)]
- )
- @pytest.mark.parametrize("drop", [True, False])
- def test_set_index_pass_single_array(
- self, frame_of_index_cols, drop, append, index_name, box
- ):
- df = frame_of_index_cols
- df.index.name = index_name
- key = box(df["B"])
- if box == list:
- # list of strings gets interpreted as list of keys
- msg = "['one', 'two', 'three', 'one', 'two']"
- with pytest.raises(KeyError, match=msg):
- df.set_index(key, drop=drop, append=append)
- else:
- # np.array/list-of-list "forget" the name of B
- name_mi = getattr(key, "names", None)
- name = [getattr(key, "name", None)] if name_mi is None else name_mi
- result = df.set_index(key, drop=drop, append=append)
- # only valid column keys are dropped
- # since B is always passed as array above, nothing is dropped
- expected = df.set_index(["B"], drop=False, append=append)
- expected.index.names = [index_name] + name if append else name
- tm.assert_frame_equal(result, expected)
- # MultiIndex constructor does not work directly on Series -> lambda
- # also test index name if append=True (name is duplicate here for A & B)
- @pytest.mark.parametrize(
- "box", [Series, Index, np.array, list, lambda x: MultiIndex.from_arrays([x])]
- )
- @pytest.mark.parametrize(
- "append, index_name",
- [(True, None), (True, "A"), (True, "B"), (True, "test"), (False, None)],
- )
- @pytest.mark.parametrize("drop", [True, False])
- def test_set_index_pass_arrays(
- self, frame_of_index_cols, drop, append, index_name, box
- ):
- df = frame_of_index_cols
- df.index.name = index_name
- keys = ["A", box(df["B"])]
- # np.array/list "forget" the name of B
- names = ["A", None if box in [np.array, list, tuple, iter] else "B"]
- result = df.set_index(keys, drop=drop, append=append)
- # only valid column keys are dropped
- # since B is always passed as array above, only A is dropped, if at all
- expected = df.set_index(["A", "B"], drop=False, append=append)
- expected = expected.drop("A", axis=1) if drop else expected
- expected.index.names = [index_name] + names if append else names
- tm.assert_frame_equal(result, expected)
- # MultiIndex constructor does not work directly on Series -> lambda
- # We also emulate a "constructor" for the label -> lambda
- # also test index name if append=True (name is duplicate here for A)
- @pytest.mark.parametrize(
- "box2",
- [
- Series,
- Index,
- np.array,
- list,
- iter,
- lambda x: MultiIndex.from_arrays([x]),
- lambda x: x.name,
- ],
- )
- @pytest.mark.parametrize(
- "box1",
- [
- Series,
- Index,
- np.array,
- list,
- iter,
- lambda x: MultiIndex.from_arrays([x]),
- lambda x: x.name,
- ],
- )
- @pytest.mark.parametrize(
- "append, index_name", [(True, None), (True, "A"), (True, "test"), (False, None)]
- )
- @pytest.mark.parametrize("drop", [True, False])
- def test_set_index_pass_arrays_duplicate(
- self, frame_of_index_cols, drop, append, index_name, box1, box2
- ):
- df = frame_of_index_cols
- df.index.name = index_name
- keys = [box1(df["A"]), box2(df["A"])]
- result = df.set_index(keys, drop=drop, append=append)
- # if either box is iter, it has been consumed; re-read
- keys = [box1(df["A"]), box2(df["A"])]
- # need to adapt first drop for case that both keys are 'A' --
- # cannot drop the same column twice;
- # plain == would give ambiguous Boolean error for containers
- first_drop = (
- False
- if (
- isinstance(keys[0], str)
- and keys[0] == "A"
- and isinstance(keys[1], str)
- and keys[1] == "A"
- )
- else drop
- )
- # to test against already-tested behaviour, we add sequentially,
- # hence second append always True; must wrap keys in list, otherwise
- # box = list would be interpreted as keys
- expected = df.set_index([keys[0]], drop=first_drop, append=append)
- expected = expected.set_index([keys[1]], drop=drop, append=True)
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize("append", [True, False])
- @pytest.mark.parametrize("drop", [True, False])
- def test_set_index_pass_multiindex(self, frame_of_index_cols, drop, append):
- df = frame_of_index_cols
- keys = MultiIndex.from_arrays([df["A"], df["B"]], names=["A", "B"])
- result = df.set_index(keys, drop=drop, append=append)
- # setting with a MultiIndex will never drop columns
- expected = df.set_index(["A", "B"], drop=False, append=append)
- tm.assert_frame_equal(result, expected)
- def test_construction_with_categorical_index(self):
- ci = tm.makeCategoricalIndex(10)
- ci.name = "B"
- # with Categorical
- df = DataFrame({"A": np.random.randn(10), "B": ci.values})
- idf = df.set_index("B")
- tm.assert_index_equal(idf.index, ci)
- # from a CategoricalIndex
- df = DataFrame({"A": np.random.randn(10), "B": ci})
- idf = df.set_index("B")
- tm.assert_index_equal(idf.index, ci)
- # round-trip
- idf = idf.reset_index().set_index("B")
- tm.assert_index_equal(idf.index, ci)
- def test_set_index_preserve_categorical_dtype(self):
- # GH#13743, GH#13854
- df = DataFrame(
- {
- "A": [1, 2, 1, 1, 2],
- "B": [10, 16, 22, 28, 34],
- "C1": Categorical(list("abaab"), categories=list("bac"), ordered=False),
- "C2": Categorical(list("abaab"), categories=list("bac"), ordered=True),
- }
- )
- for cols in ["C1", "C2", ["A", "C1"], ["A", "C2"], ["C1", "C2"]]:
- result = df.set_index(cols).reset_index()
- result = result.reindex(columns=df.columns)
- tm.assert_frame_equal(result, df)
- def test_set_index_datetime(self):
- # GH#3950
- df = DataFrame(
- {
- "label": ["a", "a", "a", "b", "b", "b"],
- "datetime": [
- "2011-07-19 07:00:00",
- "2011-07-19 08:00:00",
- "2011-07-19 09:00:00",
- "2011-07-19 07:00:00",
- "2011-07-19 08:00:00",
- "2011-07-19 09:00:00",
- ],
- "value": range(6),
- }
- )
- df.index = to_datetime(df.pop("datetime"), utc=True)
- df.index = df.index.tz_convert("US/Pacific")
- expected = DatetimeIndex(
- ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"],
- name="datetime",
- )
- expected = expected.tz_localize("UTC").tz_convert("US/Pacific")
- df = df.set_index("label", append=True)
- tm.assert_index_equal(df.index.levels[0], expected)
- tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label"))
- assert df.index.names == ["datetime", "label"]
- df = df.swaplevel(0, 1)
- tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label"))
- tm.assert_index_equal(df.index.levels[1], expected)
- assert df.index.names == ["label", "datetime"]
- df = DataFrame(np.random.random(6))
- idx1 = DatetimeIndex(
- [
- "2011-07-19 07:00:00",
- "2011-07-19 08:00:00",
- "2011-07-19 09:00:00",
- "2011-07-19 07:00:00",
- "2011-07-19 08:00:00",
- "2011-07-19 09:00:00",
- ],
- tz="US/Eastern",
- )
- idx2 = DatetimeIndex(
- [
- "2012-04-01 09:00",
- "2012-04-01 09:00",
- "2012-04-01 09:00",
- "2012-04-02 09:00",
- "2012-04-02 09:00",
- "2012-04-02 09:00",
- ],
- tz="US/Eastern",
- )
- idx3 = date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo")
- idx3 = idx3._with_freq(None)
- df = df.set_index(idx1)
- df = df.set_index(idx2, append=True)
- df = df.set_index(idx3, append=True)
- expected1 = DatetimeIndex(
- ["2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00"],
- tz="US/Eastern",
- )
- expected2 = DatetimeIndex(
- ["2012-04-01 09:00", "2012-04-02 09:00"], tz="US/Eastern"
- )
- tm.assert_index_equal(df.index.levels[0], expected1)
- tm.assert_index_equal(df.index.levels[1], expected2)
- tm.assert_index_equal(df.index.levels[2], idx3)
- # GH#7092
- tm.assert_index_equal(df.index.get_level_values(0), idx1)
- tm.assert_index_equal(df.index.get_level_values(1), idx2)
- tm.assert_index_equal(df.index.get_level_values(2), idx3)
- def test_set_index_period(self):
- # GH#6631
- df = DataFrame(np.random.random(6))
- idx1 = period_range("2011-01-01", periods=3, freq="M")
- idx1 = idx1.append(idx1)
- idx2 = period_range("2013-01-01 09:00", periods=2, freq="H")
- idx2 = idx2.append(idx2).append(idx2)
- idx3 = period_range("2005", periods=6, freq="A")
- df = df.set_index(idx1)
- df = df.set_index(idx2, append=True)
- df = df.set_index(idx3, append=True)
- expected1 = period_range("2011-01-01", periods=3, freq="M")
- expected2 = period_range("2013-01-01 09:00", periods=2, freq="H")
- tm.assert_index_equal(df.index.levels[0], expected1)
- tm.assert_index_equal(df.index.levels[1], expected2)
- tm.assert_index_equal(df.index.levels[2], idx3)
- tm.assert_index_equal(df.index.get_level_values(0), idx1)
- tm.assert_index_equal(df.index.get_level_values(1), idx2)
- tm.assert_index_equal(df.index.get_level_values(2), idx3)
- class TestSetIndexInvalid:
- def test_set_index_verify_integrity(self, frame_of_index_cols):
- df = frame_of_index_cols
- with pytest.raises(ValueError, match="Index has duplicate keys"):
- df.set_index("A", verify_integrity=True)
- # with MultiIndex
- with pytest.raises(ValueError, match="Index has duplicate keys"):
- df.set_index([df["A"], df["A"]], verify_integrity=True)
- @pytest.mark.parametrize("append", [True, False])
- @pytest.mark.parametrize("drop", [True, False])
- def test_set_index_raise_keys(self, frame_of_index_cols, drop, append):
- df = frame_of_index_cols
- with pytest.raises(KeyError, match="['foo', 'bar', 'baz']"):
- # column names are A-E, as well as one tuple
- df.set_index(["foo", "bar", "baz"], drop=drop, append=append)
- # non-existent key in list with arrays
- with pytest.raises(KeyError, match="X"):
- df.set_index([df["A"], df["B"], "X"], drop=drop, append=append)
- msg = "[('foo', 'foo', 'foo', 'bar', 'bar')]"
- # tuples always raise KeyError
- with pytest.raises(KeyError, match=msg):
- df.set_index(tuple(df["A"]), drop=drop, append=append)
- # also within a list
- with pytest.raises(KeyError, match=msg):
- df.set_index(["A", df["A"], tuple(df["A"])], drop=drop, append=append)
- @pytest.mark.parametrize("append", [True, False])
- @pytest.mark.parametrize("drop", [True, False])
- @pytest.mark.parametrize("box", [set], ids=["set"])
- def test_set_index_raise_on_type(self, frame_of_index_cols, box, drop, append):
- df = frame_of_index_cols
- msg = 'The parameter "keys" may be a column key, .*'
- # forbidden type, e.g. set
- with pytest.raises(TypeError, match=msg):
- df.set_index(box(df["A"]), drop=drop, append=append)
- # forbidden type in list, e.g. set
- with pytest.raises(TypeError, match=msg):
- df.set_index(["A", df["A"], box(df["A"])], drop=drop, append=append)
- # MultiIndex constructor does not work directly on Series -> lambda
- @pytest.mark.parametrize(
- "box",
- [Series, Index, np.array, iter, lambda x: MultiIndex.from_arrays([x])],
- ids=["Series", "Index", "np.array", "iter", "MultiIndex"],
- )
- @pytest.mark.parametrize("length", [4, 6], ids=["too_short", "too_long"])
- @pytest.mark.parametrize("append", [True, False])
- @pytest.mark.parametrize("drop", [True, False])
- def test_set_index_raise_on_len(
- self, frame_of_index_cols, box, length, drop, append
- ):
- # GH 24984
- df = frame_of_index_cols # has length 5
- values = np.random.randint(0, 10, (length,))
- msg = "Length mismatch: Expected 5 rows, received array of length.*"
- # wrong length directly
- with pytest.raises(ValueError, match=msg):
- df.set_index(box(values), drop=drop, append=append)
- # wrong length in list
- with pytest.raises(ValueError, match=msg):
- df.set_index(["A", df.A, box(values)], drop=drop, append=append)
- class TestSetIndexCustomLabelType:
- def test_set_index_custom_label_type(self):
- # GH#24969
- class Thing:
- def __init__(self, name, color) -> None:
- self.name = name
- self.color = color
- def __str__(self) -> str:
- return f"<Thing {repr(self.name)}>"
- # necessary for pretty KeyError
- __repr__ = __str__
- thing1 = Thing("One", "red")
- thing2 = Thing("Two", "blue")
- df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
- expected = DataFrame({thing1: [0, 1]}, index=Index([2, 3], name=thing2))
- # use custom label directly
- result = df.set_index(thing2)
- tm.assert_frame_equal(result, expected)
- # custom label wrapped in list
- result = df.set_index([thing2])
- tm.assert_frame_equal(result, expected)
- # missing key
- thing3 = Thing("Three", "pink")
- msg = "<Thing 'Three'>"
- with pytest.raises(KeyError, match=msg):
- # missing label directly
- df.set_index(thing3)
- with pytest.raises(KeyError, match=msg):
- # missing label in list
- df.set_index([thing3])
- def test_set_index_custom_label_hashable_iterable(self):
- # GH#24969
- # actual example discussed in GH 24984 was e.g. for shapely.geometry
- # objects (e.g. a collection of Points) that can be both hashable and
- # iterable; using frozenset as a stand-in for testing here
- class Thing(frozenset):
- # need to stabilize repr for KeyError (due to random order in sets)
- def __repr__(self) -> str:
- tmp = sorted(self)
- joined_reprs = ", ".join(map(repr, tmp))
- # double curly brace prints one brace in format string
- return f"frozenset({{{joined_reprs}}})"
- thing1 = Thing(["One", "red"])
- thing2 = Thing(["Two", "blue"])
- df = DataFrame({thing1: [0, 1], thing2: [2, 3]})
- expected = DataFrame({thing1: [0, 1]}, index=Index([2, 3], name=thing2))
- # use custom label directly
- result = df.set_index(thing2)
- tm.assert_frame_equal(result, expected)
- # custom label wrapped in list
- result = df.set_index([thing2])
- tm.assert_frame_equal(result, expected)
- # missing key
- thing3 = Thing(["Three", "pink"])
- msg = r"frozenset\(\{'Three', 'pink'\}\)"
- with pytest.raises(KeyError, match=msg):
- # missing label directly
- df.set_index(thing3)
- with pytest.raises(KeyError, match=msg):
- # missing label in list
- df.set_index([thing3])
- def test_set_index_custom_label_type_raises(self):
- # GH#24969
- # purposefully inherit from something unhashable
- class Thing(set):
- def __init__(self, name, color) -> None:
- self.name = name
- self.color = color
- def __str__(self) -> str:
- return f"<Thing {repr(self.name)}>"
- thing1 = Thing("One", "red")
- thing2 = Thing("Two", "blue")
- df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2])
- msg = 'The parameter "keys" may be a column key, .*'
- with pytest.raises(TypeError, match=msg):
- # use custom label directly
- df.set_index(thing2)
- with pytest.raises(TypeError, match=msg):
- # custom label wrapped in list
- df.set_index([thing2])
- def test_set_index_periodindex(self):
- # GH#6631
- df = DataFrame(np.random.random(6))
- idx1 = period_range("2011/01/01", periods=6, freq="M")
- idx2 = period_range("2013", periods=6, freq="A")
- df = df.set_index(idx1)
- tm.assert_index_equal(df.index, idx1)
- df = df.set_index(idx2)
- tm.assert_index_equal(df.index, idx2)
|