123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417 |
- from collections import ChainMap
- import inspect
- import numpy as np
- import pytest
- from pandas import (
- DataFrame,
- Index,
- MultiIndex,
- merge,
- )
- import pandas._testing as tm
- class TestRename:
- def test_rename_signature(self):
- sig = inspect.signature(DataFrame.rename)
- parameters = set(sig.parameters)
- assert parameters == {
- "self",
- "mapper",
- "index",
- "columns",
- "axis",
- "inplace",
- "copy",
- "level",
- "errors",
- }
- def test_rename_mi(self, frame_or_series):
- obj = frame_or_series(
- [11, 21, 31],
- index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]),
- )
- obj.rename(str.lower)
- def test_rename(self, float_frame):
- mapping = {"A": "a", "B": "b", "C": "c", "D": "d"}
- renamed = float_frame.rename(columns=mapping)
- renamed2 = float_frame.rename(columns=str.lower)
- tm.assert_frame_equal(renamed, renamed2)
- tm.assert_frame_equal(
- renamed2.rename(columns=str.upper), float_frame, check_names=False
- )
- # index
- data = {"A": {"foo": 0, "bar": 1}}
- # gets sorted alphabetical
- df = DataFrame(data)
- renamed = df.rename(index={"foo": "bar", "bar": "foo"})
- tm.assert_index_equal(renamed.index, Index(["foo", "bar"]))
- renamed = df.rename(index=str.upper)
- tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"]))
- # have to pass something
- with pytest.raises(TypeError, match="must pass an index to rename"):
- float_frame.rename()
- # partial columns
- renamed = float_frame.rename(columns={"C": "foo", "D": "bar"})
- tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"]))
- # other axis
- renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"})
- tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"]))
- # index with name
- index = Index(["foo", "bar"], name="name")
- renamer = DataFrame(data, index=index)
- renamed = renamer.rename(index={"foo": "bar", "bar": "foo"})
- tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name"))
- assert renamed.index.name == renamer.index.name
- @pytest.mark.parametrize(
- "args,kwargs",
- [
- ((ChainMap({"A": "a"}, {"B": "b"}),), {"axis": "columns"}),
- ((), {"columns": ChainMap({"A": "a"}, {"B": "b"})}),
- ],
- )
- def test_rename_chainmap(self, args, kwargs):
- # see gh-23859
- colAData = range(1, 11)
- colBdata = np.random.randn(10)
- df = DataFrame({"A": colAData, "B": colBdata})
- result = df.rename(*args, **kwargs)
- expected = DataFrame({"a": colAData, "b": colBdata})
- tm.assert_frame_equal(result, expected)
- def test_rename_multiindex(self):
- tuples_index = [("foo1", "bar1"), ("foo2", "bar2")]
- tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")]
- index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"])
- columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"])
- df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns)
- #
- # without specifying level -> across all levels
- renamed = df.rename(
- index={"foo1": "foo3", "bar2": "bar3"},
- columns={"fizz1": "fizz3", "buzz2": "buzz3"},
- )
- new_index = MultiIndex.from_tuples(
- [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"]
- )
- new_columns = MultiIndex.from_tuples(
- [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"]
- )
- tm.assert_index_equal(renamed.index, new_index)
- tm.assert_index_equal(renamed.columns, new_columns)
- assert renamed.index.names == df.index.names
- assert renamed.columns.names == df.columns.names
- #
- # with specifying a level (GH13766)
- # dict
- new_columns = MultiIndex.from_tuples(
- [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"]
- )
- renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0)
- tm.assert_index_equal(renamed.columns, new_columns)
- renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz")
- tm.assert_index_equal(renamed.columns, new_columns)
- new_columns = MultiIndex.from_tuples(
- [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"]
- )
- renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1)
- tm.assert_index_equal(renamed.columns, new_columns)
- renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz")
- tm.assert_index_equal(renamed.columns, new_columns)
- # function
- func = str.upper
- new_columns = MultiIndex.from_tuples(
- [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"]
- )
- renamed = df.rename(columns=func, level=0)
- tm.assert_index_equal(renamed.columns, new_columns)
- renamed = df.rename(columns=func, level="fizz")
- tm.assert_index_equal(renamed.columns, new_columns)
- new_columns = MultiIndex.from_tuples(
- [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"]
- )
- renamed = df.rename(columns=func, level=1)
- tm.assert_index_equal(renamed.columns, new_columns)
- renamed = df.rename(columns=func, level="buzz")
- tm.assert_index_equal(renamed.columns, new_columns)
- # index
- new_index = MultiIndex.from_tuples(
- [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"]
- )
- renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
- tm.assert_index_equal(renamed.index, new_index)
- def test_rename_nocopy(self, float_frame, using_copy_on_write):
- renamed = float_frame.rename(columns={"C": "foo"}, copy=False)
- assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values)
- renamed.loc[:, "foo"] = 1.0
- if using_copy_on_write:
- assert not (float_frame["C"] == 1.0).all()
- else:
- assert (float_frame["C"] == 1.0).all()
- def test_rename_inplace(self, float_frame):
- float_frame.rename(columns={"C": "foo"})
- assert "C" in float_frame
- assert "foo" not in float_frame
- c_values = float_frame["C"]
- float_frame = float_frame.copy()
- return_value = float_frame.rename(columns={"C": "foo"}, inplace=True)
- assert return_value is None
- assert "C" not in float_frame
- assert "foo" in float_frame
- # GH 44153
- # Used to be id(float_frame["foo"]) != c_id, but flaky in the CI
- assert float_frame["foo"] is not c_values
- def test_rename_bug(self):
- # GH 5344
- # rename set ref_locs, and set_index was not resetting
- df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]})
- df = df.rename(columns={0: "a"})
- df = df.rename(columns={1: "b"})
- df = df.set_index(["a", "b"])
- df.columns = ["2001-01-01"]
- expected = DataFrame(
- [[1], [2]],
- index=MultiIndex.from_tuples(
- [("foo", "bah"), ("bar", "bas")], names=["a", "b"]
- ),
- columns=["2001-01-01"],
- )
- tm.assert_frame_equal(df, expected)
- def test_rename_bug2(self):
- # GH 19497
- # rename was changing Index to MultiIndex if Index contained tuples
- df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"])
- df = df.rename({(1, 1): (5, 4)}, axis="index")
- expected = DataFrame(
- data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"]
- )
- tm.assert_frame_equal(df, expected)
- def test_rename_errors_raises(self):
- df = DataFrame(columns=["A", "B", "C", "D"])
- with pytest.raises(KeyError, match="'E'] not found in axis"):
- df.rename(columns={"A": "a", "E": "e"}, errors="raise")
- @pytest.mark.parametrize(
- "mapper, errors, expected_columns",
- [
- ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]),
- ({"A": "a"}, "raise", ["a", "B", "C", "D"]),
- (str.lower, "raise", ["a", "b", "c", "d"]),
- ],
- )
- def test_rename_errors(self, mapper, errors, expected_columns):
- # GH 13473
- # rename now works with errors parameter
- df = DataFrame(columns=["A", "B", "C", "D"])
- result = df.rename(columns=mapper, errors=errors)
- expected = DataFrame(columns=expected_columns)
- tm.assert_frame_equal(result, expected)
- def test_rename_objects(self, float_string_frame):
- renamed = float_string_frame.rename(columns=str.upper)
- assert "FOO" in renamed
- assert "foo" not in renamed
- def test_rename_axis_style(self):
- # https://github.com/pandas-dev/pandas/issues/12392
- df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"])
- expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"])
- result = df.rename(str.lower, axis=1)
- tm.assert_frame_equal(result, expected)
- result = df.rename(str.lower, axis="columns")
- tm.assert_frame_equal(result, expected)
- result = df.rename({"A": "a", "B": "b"}, axis=1)
- tm.assert_frame_equal(result, expected)
- result = df.rename({"A": "a", "B": "b"}, axis="columns")
- tm.assert_frame_equal(result, expected)
- # Index
- expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
- result = df.rename(str.lower, axis=0)
- tm.assert_frame_equal(result, expected)
- result = df.rename(str.lower, axis="index")
- tm.assert_frame_equal(result, expected)
- result = df.rename({"X": "x", "Y": "y"}, axis=0)
- tm.assert_frame_equal(result, expected)
- result = df.rename({"X": "x", "Y": "y"}, axis="index")
- tm.assert_frame_equal(result, expected)
- result = df.rename(mapper=str.lower, axis="index")
- tm.assert_frame_equal(result, expected)
- def test_rename_mapper_multi(self):
- df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index(
- ["A", "B"]
- )
- result = df.rename(str.upper)
- expected = df.rename(index=str.upper)
- tm.assert_frame_equal(result, expected)
- def test_rename_positional_named(self):
- # https://github.com/pandas-dev/pandas/issues/12392
- df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"])
- result = df.rename(index=str.lower, columns=str.upper)
- expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
- tm.assert_frame_equal(result, expected)
- def test_rename_axis_style_raises(self):
- # see gh-12392
- df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"])
- # Named target and axis
- over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(index=str.lower, axis=1)
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(index=str.lower, axis="columns")
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(columns=str.lower, axis="columns")
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(index=str.lower, axis=0)
- # Multiple targets and axis
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(str.lower, index=str.lower, axis="columns")
- # Too many targets
- over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'"
- with pytest.raises(TypeError, match=over_spec_msg):
- df.rename(str.lower, index=str.lower, columns=str.lower)
- # Duplicates
- with pytest.raises(TypeError, match="multiple values"):
- df.rename(id, mapper=id)
- def test_rename_positional_raises(self):
- # GH 29136
- df = DataFrame(columns=["A", "B"])
- msg = r"rename\(\) takes from 1 to 2 positional arguments"
- with pytest.raises(TypeError, match=msg):
- df.rename(None, str.lower)
- def test_rename_no_mappings_raises(self):
- # GH 29136
- df = DataFrame([[1]])
- msg = "must pass an index to rename"
- with pytest.raises(TypeError, match=msg):
- df.rename()
- with pytest.raises(TypeError, match=msg):
- df.rename(None, index=None)
- with pytest.raises(TypeError, match=msg):
- df.rename(None, columns=None)
- with pytest.raises(TypeError, match=msg):
- df.rename(None, columns=None, index=None)
- def test_rename_mapper_and_positional_arguments_raises(self):
- # GH 29136
- df = DataFrame([[1]])
- msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'"
- with pytest.raises(TypeError, match=msg):
- df.rename({}, index={})
- with pytest.raises(TypeError, match=msg):
- df.rename({}, columns={})
- with pytest.raises(TypeError, match=msg):
- df.rename({}, columns={}, index={})
- def test_rename_with_duplicate_columns(self):
- # GH#4403
- df4 = DataFrame(
- {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]},
- index=MultiIndex.from_tuples(
- [(600809, 20130331)], names=["STK_ID", "RPT_Date"]
- ),
- )
- df5 = DataFrame(
- {
- "RPT_Date": [20120930, 20121231, 20130331],
- "STK_ID": [600809] * 3,
- "STK_Name": ["饡驦", "饡驦", "饡驦"],
- "TClose": [38.05, 41.66, 30.01],
- },
- index=MultiIndex.from_tuples(
- [(600809, 20120930), (600809, 20121231), (600809, 20130331)],
- names=["STK_ID", "RPT_Date"],
- ),
- )
- # TODO: can we construct this without merge?
- k = merge(df4, df5, how="inner", left_index=True, right_index=True)
- result = k.rename(columns={"TClose_x": "TClose", "TClose_y": "QT_Close"})
- str(result)
- result.dtypes
- expected = DataFrame(
- [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]],
- columns=[
- "RT",
- "TClose",
- "TExg",
- "RPT_Date",
- "STK_ID",
- "STK_Name",
- "QT_Close",
- ],
- ).set_index(["STK_ID", "RPT_Date"], drop=False)
- tm.assert_frame_equal(result, expected)
- def test_rename_boolean_index(self):
- df = DataFrame(np.arange(15).reshape(3, 5), columns=[False, True, 2, 3, 4])
- mapper = {0: "foo", 1: "bar", 2: "bah"}
- res = df.rename(index=mapper)
- exp = DataFrame(
- np.arange(15).reshape(3, 5),
- columns=[False, True, 2, 3, 4],
- index=["foo", "bar", "bah"],
- )
- tm.assert_frame_equal(res, exp)
|