test_rename.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. from collections import ChainMap
  2. import inspect
  3. import numpy as np
  4. import pytest
  5. from pandas import (
  6. DataFrame,
  7. Index,
  8. MultiIndex,
  9. merge,
  10. )
  11. import pandas._testing as tm
  12. class TestRename:
  13. def test_rename_signature(self):
  14. sig = inspect.signature(DataFrame.rename)
  15. parameters = set(sig.parameters)
  16. assert parameters == {
  17. "self",
  18. "mapper",
  19. "index",
  20. "columns",
  21. "axis",
  22. "inplace",
  23. "copy",
  24. "level",
  25. "errors",
  26. }
  27. def test_rename_mi(self, frame_or_series):
  28. obj = frame_or_series(
  29. [11, 21, 31],
  30. index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]]),
  31. )
  32. obj.rename(str.lower)
  33. def test_rename(self, float_frame):
  34. mapping = {"A": "a", "B": "b", "C": "c", "D": "d"}
  35. renamed = float_frame.rename(columns=mapping)
  36. renamed2 = float_frame.rename(columns=str.lower)
  37. tm.assert_frame_equal(renamed, renamed2)
  38. tm.assert_frame_equal(
  39. renamed2.rename(columns=str.upper), float_frame, check_names=False
  40. )
  41. # index
  42. data = {"A": {"foo": 0, "bar": 1}}
  43. # gets sorted alphabetical
  44. df = DataFrame(data)
  45. renamed = df.rename(index={"foo": "bar", "bar": "foo"})
  46. tm.assert_index_equal(renamed.index, Index(["foo", "bar"]))
  47. renamed = df.rename(index=str.upper)
  48. tm.assert_index_equal(renamed.index, Index(["BAR", "FOO"]))
  49. # have to pass something
  50. with pytest.raises(TypeError, match="must pass an index to rename"):
  51. float_frame.rename()
  52. # partial columns
  53. renamed = float_frame.rename(columns={"C": "foo", "D": "bar"})
  54. tm.assert_index_equal(renamed.columns, Index(["A", "B", "foo", "bar"]))
  55. # other axis
  56. renamed = float_frame.T.rename(index={"C": "foo", "D": "bar"})
  57. tm.assert_index_equal(renamed.index, Index(["A", "B", "foo", "bar"]))
  58. # index with name
  59. index = Index(["foo", "bar"], name="name")
  60. renamer = DataFrame(data, index=index)
  61. renamed = renamer.rename(index={"foo": "bar", "bar": "foo"})
  62. tm.assert_index_equal(renamed.index, Index(["bar", "foo"], name="name"))
  63. assert renamed.index.name == renamer.index.name
  64. @pytest.mark.parametrize(
  65. "args,kwargs",
  66. [
  67. ((ChainMap({"A": "a"}, {"B": "b"}),), {"axis": "columns"}),
  68. ((), {"columns": ChainMap({"A": "a"}, {"B": "b"})}),
  69. ],
  70. )
  71. def test_rename_chainmap(self, args, kwargs):
  72. # see gh-23859
  73. colAData = range(1, 11)
  74. colBdata = np.random.randn(10)
  75. df = DataFrame({"A": colAData, "B": colBdata})
  76. result = df.rename(*args, **kwargs)
  77. expected = DataFrame({"a": colAData, "b": colBdata})
  78. tm.assert_frame_equal(result, expected)
  79. def test_rename_multiindex(self):
  80. tuples_index = [("foo1", "bar1"), ("foo2", "bar2")]
  81. tuples_columns = [("fizz1", "buzz1"), ("fizz2", "buzz2")]
  82. index = MultiIndex.from_tuples(tuples_index, names=["foo", "bar"])
  83. columns = MultiIndex.from_tuples(tuples_columns, names=["fizz", "buzz"])
  84. df = DataFrame([(0, 0), (1, 1)], index=index, columns=columns)
  85. #
  86. # without specifying level -> across all levels
  87. renamed = df.rename(
  88. index={"foo1": "foo3", "bar2": "bar3"},
  89. columns={"fizz1": "fizz3", "buzz2": "buzz3"},
  90. )
  91. new_index = MultiIndex.from_tuples(
  92. [("foo3", "bar1"), ("foo2", "bar3")], names=["foo", "bar"]
  93. )
  94. new_columns = MultiIndex.from_tuples(
  95. [("fizz3", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"]
  96. )
  97. tm.assert_index_equal(renamed.index, new_index)
  98. tm.assert_index_equal(renamed.columns, new_columns)
  99. assert renamed.index.names == df.index.names
  100. assert renamed.columns.names == df.columns.names
  101. #
  102. # with specifying a level (GH13766)
  103. # dict
  104. new_columns = MultiIndex.from_tuples(
  105. [("fizz3", "buzz1"), ("fizz2", "buzz2")], names=["fizz", "buzz"]
  106. )
  107. renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=0)
  108. tm.assert_index_equal(renamed.columns, new_columns)
  109. renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="fizz")
  110. tm.assert_index_equal(renamed.columns, new_columns)
  111. new_columns = MultiIndex.from_tuples(
  112. [("fizz1", "buzz1"), ("fizz2", "buzz3")], names=["fizz", "buzz"]
  113. )
  114. renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level=1)
  115. tm.assert_index_equal(renamed.columns, new_columns)
  116. renamed = df.rename(columns={"fizz1": "fizz3", "buzz2": "buzz3"}, level="buzz")
  117. tm.assert_index_equal(renamed.columns, new_columns)
  118. # function
  119. func = str.upper
  120. new_columns = MultiIndex.from_tuples(
  121. [("FIZZ1", "buzz1"), ("FIZZ2", "buzz2")], names=["fizz", "buzz"]
  122. )
  123. renamed = df.rename(columns=func, level=0)
  124. tm.assert_index_equal(renamed.columns, new_columns)
  125. renamed = df.rename(columns=func, level="fizz")
  126. tm.assert_index_equal(renamed.columns, new_columns)
  127. new_columns = MultiIndex.from_tuples(
  128. [("fizz1", "BUZZ1"), ("fizz2", "BUZZ2")], names=["fizz", "buzz"]
  129. )
  130. renamed = df.rename(columns=func, level=1)
  131. tm.assert_index_equal(renamed.columns, new_columns)
  132. renamed = df.rename(columns=func, level="buzz")
  133. tm.assert_index_equal(renamed.columns, new_columns)
  134. # index
  135. new_index = MultiIndex.from_tuples(
  136. [("foo3", "bar1"), ("foo2", "bar2")], names=["foo", "bar"]
  137. )
  138. renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0)
  139. tm.assert_index_equal(renamed.index, new_index)
  140. def test_rename_nocopy(self, float_frame, using_copy_on_write):
  141. renamed = float_frame.rename(columns={"C": "foo"}, copy=False)
  142. assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values)
  143. renamed.loc[:, "foo"] = 1.0
  144. if using_copy_on_write:
  145. assert not (float_frame["C"] == 1.0).all()
  146. else:
  147. assert (float_frame["C"] == 1.0).all()
  148. def test_rename_inplace(self, float_frame):
  149. float_frame.rename(columns={"C": "foo"})
  150. assert "C" in float_frame
  151. assert "foo" not in float_frame
  152. c_values = float_frame["C"]
  153. float_frame = float_frame.copy()
  154. return_value = float_frame.rename(columns={"C": "foo"}, inplace=True)
  155. assert return_value is None
  156. assert "C" not in float_frame
  157. assert "foo" in float_frame
  158. # GH 44153
  159. # Used to be id(float_frame["foo"]) != c_id, but flaky in the CI
  160. assert float_frame["foo"] is not c_values
  161. def test_rename_bug(self):
  162. # GH 5344
  163. # rename set ref_locs, and set_index was not resetting
  164. df = DataFrame({0: ["foo", "bar"], 1: ["bah", "bas"], 2: [1, 2]})
  165. df = df.rename(columns={0: "a"})
  166. df = df.rename(columns={1: "b"})
  167. df = df.set_index(["a", "b"])
  168. df.columns = ["2001-01-01"]
  169. expected = DataFrame(
  170. [[1], [2]],
  171. index=MultiIndex.from_tuples(
  172. [("foo", "bah"), ("bar", "bas")], names=["a", "b"]
  173. ),
  174. columns=["2001-01-01"],
  175. )
  176. tm.assert_frame_equal(df, expected)
  177. def test_rename_bug2(self):
  178. # GH 19497
  179. # rename was changing Index to MultiIndex if Index contained tuples
  180. df = DataFrame(data=np.arange(3), index=[(0, 0), (1, 1), (2, 2)], columns=["a"])
  181. df = df.rename({(1, 1): (5, 4)}, axis="index")
  182. expected = DataFrame(
  183. data=np.arange(3), index=[(0, 0), (5, 4), (2, 2)], columns=["a"]
  184. )
  185. tm.assert_frame_equal(df, expected)
  186. def test_rename_errors_raises(self):
  187. df = DataFrame(columns=["A", "B", "C", "D"])
  188. with pytest.raises(KeyError, match="'E'] not found in axis"):
  189. df.rename(columns={"A": "a", "E": "e"}, errors="raise")
  190. @pytest.mark.parametrize(
  191. "mapper, errors, expected_columns",
  192. [
  193. ({"A": "a", "E": "e"}, "ignore", ["a", "B", "C", "D"]),
  194. ({"A": "a"}, "raise", ["a", "B", "C", "D"]),
  195. (str.lower, "raise", ["a", "b", "c", "d"]),
  196. ],
  197. )
  198. def test_rename_errors(self, mapper, errors, expected_columns):
  199. # GH 13473
  200. # rename now works with errors parameter
  201. df = DataFrame(columns=["A", "B", "C", "D"])
  202. result = df.rename(columns=mapper, errors=errors)
  203. expected = DataFrame(columns=expected_columns)
  204. tm.assert_frame_equal(result, expected)
  205. def test_rename_objects(self, float_string_frame):
  206. renamed = float_string_frame.rename(columns=str.upper)
  207. assert "FOO" in renamed
  208. assert "foo" not in renamed
  209. def test_rename_axis_style(self):
  210. # https://github.com/pandas-dev/pandas/issues/12392
  211. df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["X", "Y"])
  212. expected = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"])
  213. result = df.rename(str.lower, axis=1)
  214. tm.assert_frame_equal(result, expected)
  215. result = df.rename(str.lower, axis="columns")
  216. tm.assert_frame_equal(result, expected)
  217. result = df.rename({"A": "a", "B": "b"}, axis=1)
  218. tm.assert_frame_equal(result, expected)
  219. result = df.rename({"A": "a", "B": "b"}, axis="columns")
  220. tm.assert_frame_equal(result, expected)
  221. # Index
  222. expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
  223. result = df.rename(str.lower, axis=0)
  224. tm.assert_frame_equal(result, expected)
  225. result = df.rename(str.lower, axis="index")
  226. tm.assert_frame_equal(result, expected)
  227. result = df.rename({"X": "x", "Y": "y"}, axis=0)
  228. tm.assert_frame_equal(result, expected)
  229. result = df.rename({"X": "x", "Y": "y"}, axis="index")
  230. tm.assert_frame_equal(result, expected)
  231. result = df.rename(mapper=str.lower, axis="index")
  232. tm.assert_frame_equal(result, expected)
  233. def test_rename_mapper_multi(self):
  234. df = DataFrame({"A": ["a", "b"], "B": ["c", "d"], "C": [1, 2]}).set_index(
  235. ["A", "B"]
  236. )
  237. result = df.rename(str.upper)
  238. expected = df.rename(index=str.upper)
  239. tm.assert_frame_equal(result, expected)
  240. def test_rename_positional_named(self):
  241. # https://github.com/pandas-dev/pandas/issues/12392
  242. df = DataFrame({"a": [1, 2], "b": [1, 2]}, index=["X", "Y"])
  243. result = df.rename(index=str.lower, columns=str.upper)
  244. expected = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["x", "y"])
  245. tm.assert_frame_equal(result, expected)
  246. def test_rename_axis_style_raises(self):
  247. # see gh-12392
  248. df = DataFrame({"A": [1, 2], "B": [1, 2]}, index=["0", "1"])
  249. # Named target and axis
  250. over_spec_msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
  251. with pytest.raises(TypeError, match=over_spec_msg):
  252. df.rename(index=str.lower, axis=1)
  253. with pytest.raises(TypeError, match=over_spec_msg):
  254. df.rename(index=str.lower, axis="columns")
  255. with pytest.raises(TypeError, match=over_spec_msg):
  256. df.rename(columns=str.lower, axis="columns")
  257. with pytest.raises(TypeError, match=over_spec_msg):
  258. df.rename(index=str.lower, axis=0)
  259. # Multiple targets and axis
  260. with pytest.raises(TypeError, match=over_spec_msg):
  261. df.rename(str.lower, index=str.lower, axis="columns")
  262. # Too many targets
  263. over_spec_msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'"
  264. with pytest.raises(TypeError, match=over_spec_msg):
  265. df.rename(str.lower, index=str.lower, columns=str.lower)
  266. # Duplicates
  267. with pytest.raises(TypeError, match="multiple values"):
  268. df.rename(id, mapper=id)
  269. def test_rename_positional_raises(self):
  270. # GH 29136
  271. df = DataFrame(columns=["A", "B"])
  272. msg = r"rename\(\) takes from 1 to 2 positional arguments"
  273. with pytest.raises(TypeError, match=msg):
  274. df.rename(None, str.lower)
  275. def test_rename_no_mappings_raises(self):
  276. # GH 29136
  277. df = DataFrame([[1]])
  278. msg = "must pass an index to rename"
  279. with pytest.raises(TypeError, match=msg):
  280. df.rename()
  281. with pytest.raises(TypeError, match=msg):
  282. df.rename(None, index=None)
  283. with pytest.raises(TypeError, match=msg):
  284. df.rename(None, columns=None)
  285. with pytest.raises(TypeError, match=msg):
  286. df.rename(None, columns=None, index=None)
  287. def test_rename_mapper_and_positional_arguments_raises(self):
  288. # GH 29136
  289. df = DataFrame([[1]])
  290. msg = "Cannot specify both 'mapper' and any of 'index' or 'columns'"
  291. with pytest.raises(TypeError, match=msg):
  292. df.rename({}, index={})
  293. with pytest.raises(TypeError, match=msg):
  294. df.rename({}, columns={})
  295. with pytest.raises(TypeError, match=msg):
  296. df.rename({}, columns={}, index={})
  297. def test_rename_with_duplicate_columns(self):
  298. # GH#4403
  299. df4 = DataFrame(
  300. {"RT": [0.0454], "TClose": [22.02], "TExg": [0.0422]},
  301. index=MultiIndex.from_tuples(
  302. [(600809, 20130331)], names=["STK_ID", "RPT_Date"]
  303. ),
  304. )
  305. df5 = DataFrame(
  306. {
  307. "RPT_Date": [20120930, 20121231, 20130331],
  308. "STK_ID": [600809] * 3,
  309. "STK_Name": ["饡驦", "饡驦", "饡驦"],
  310. "TClose": [38.05, 41.66, 30.01],
  311. },
  312. index=MultiIndex.from_tuples(
  313. [(600809, 20120930), (600809, 20121231), (600809, 20130331)],
  314. names=["STK_ID", "RPT_Date"],
  315. ),
  316. )
  317. # TODO: can we construct this without merge?
  318. k = merge(df4, df5, how="inner", left_index=True, right_index=True)
  319. result = k.rename(columns={"TClose_x": "TClose", "TClose_y": "QT_Close"})
  320. str(result)
  321. result.dtypes
  322. expected = DataFrame(
  323. [[0.0454, 22.02, 0.0422, 20130331, 600809, "饡驦", 30.01]],
  324. columns=[
  325. "RT",
  326. "TClose",
  327. "TExg",
  328. "RPT_Date",
  329. "STK_ID",
  330. "STK_Name",
  331. "QT_Close",
  332. ],
  333. ).set_index(["STK_ID", "RPT_Date"], drop=False)
  334. tm.assert_frame_equal(result, expected)
  335. def test_rename_boolean_index(self):
  336. df = DataFrame(np.arange(15).reshape(3, 5), columns=[False, True, 2, 3, 4])
  337. mapper = {0: "foo", 1: "bar", 2: "bah"}
  338. res = df.rename(index=mapper)
  339. exp = DataFrame(
  340. np.arange(15).reshape(3, 5),
  341. columns=[False, True, 2, 3, 4],
  342. index=["foo", "bar", "bah"],
  343. )
  344. tm.assert_frame_equal(res, exp)