test_get_set.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369
  1. import numpy as np
  2. import pytest
  3. from pandas.compat import PY311
  4. from pandas.core.dtypes.dtypes import DatetimeTZDtype
  5. import pandas as pd
  6. from pandas import (
  7. CategoricalIndex,
  8. MultiIndex,
  9. )
  10. import pandas._testing as tm
  11. def assert_matching(actual, expected, check_dtype=False):
  12. # avoid specifying internal representation
  13. # as much as possible
  14. assert len(actual) == len(expected)
  15. for act, exp in zip(actual, expected):
  16. act = np.asarray(act)
  17. exp = np.asarray(exp)
  18. tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
  19. def test_get_level_number_integer(idx):
  20. idx.names = [1, 0]
  21. assert idx._get_level_number(1) == 0
  22. assert idx._get_level_number(0) == 1
  23. msg = "Too many levels: Index has only 2 levels, not 3"
  24. with pytest.raises(IndexError, match=msg):
  25. idx._get_level_number(2)
  26. with pytest.raises(KeyError, match="Level fourth not found"):
  27. idx._get_level_number("fourth")
  28. def test_get_dtypes():
  29. # Test MultiIndex.dtypes (# Gh37062)
  30. idx_multitype = MultiIndex.from_product(
  31. [[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")],
  32. names=["int", "string", "dt"],
  33. )
  34. expected = pd.Series(
  35. {
  36. "int": np.dtype("int64"),
  37. "string": np.dtype("O"),
  38. "dt": DatetimeTZDtype(tz="utc"),
  39. }
  40. )
  41. tm.assert_series_equal(expected, idx_multitype.dtypes)
  42. def test_get_dtypes_no_level_name():
  43. # Test MultiIndex.dtypes (# GH38580 )
  44. idx_multitype = MultiIndex.from_product(
  45. [
  46. [1, 2, 3],
  47. ["a", "b", "c"],
  48. pd.date_range("20200101", periods=2, tz="UTC"),
  49. ],
  50. )
  51. expected = pd.Series(
  52. {
  53. "level_0": np.dtype("int64"),
  54. "level_1": np.dtype("O"),
  55. "level_2": DatetimeTZDtype(tz="utc"),
  56. }
  57. )
  58. tm.assert_series_equal(expected, idx_multitype.dtypes)
  59. def test_get_dtypes_duplicate_level_names():
  60. # Test MultiIndex.dtypes with non-unique level names (# GH45174)
  61. result = MultiIndex.from_product(
  62. [
  63. [1, 2, 3],
  64. ["a", "b", "c"],
  65. pd.date_range("20200101", periods=2, tz="UTC"),
  66. ],
  67. names=["A", "A", "A"],
  68. ).dtypes
  69. expected = pd.Series(
  70. [np.dtype("int64"), np.dtype("O"), DatetimeTZDtype(tz="utc")],
  71. index=["A", "A", "A"],
  72. )
  73. tm.assert_series_equal(result, expected)
  74. def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data):
  75. frame = multiindex_dataframe_random_data
  76. with pytest.raises(IndexError, match="Too many levels"):
  77. frame.index._get_level_number(2)
  78. with pytest.raises(IndexError, match="not a valid level number"):
  79. frame.index._get_level_number(-3)
  80. def test_set_name_methods(idx, index_names):
  81. # so long as these are synonyms, we don't need to test set_names
  82. assert idx.rename == idx.set_names
  83. new_names = [name + "SUFFIX" for name in index_names]
  84. ind = idx.set_names(new_names)
  85. assert idx.names == index_names
  86. assert ind.names == new_names
  87. msg = "Length of names must match number of levels in MultiIndex"
  88. with pytest.raises(ValueError, match=msg):
  89. ind.set_names(new_names + new_names)
  90. new_names2 = [name + "SUFFIX2" for name in new_names]
  91. res = ind.set_names(new_names2, inplace=True)
  92. assert res is None
  93. assert ind.names == new_names2
  94. # set names for specific level (# GH7792)
  95. ind = idx.set_names(new_names[0], level=0)
  96. assert idx.names == index_names
  97. assert ind.names == [new_names[0], index_names[1]]
  98. res = ind.set_names(new_names2[0], level=0, inplace=True)
  99. assert res is None
  100. assert ind.names == [new_names2[0], index_names[1]]
  101. # set names for multiple levels
  102. ind = idx.set_names(new_names, level=[0, 1])
  103. assert idx.names == index_names
  104. assert ind.names == new_names
  105. res = ind.set_names(new_names2, level=[0, 1], inplace=True)
  106. assert res is None
  107. assert ind.names == new_names2
  108. def test_set_levels_codes_directly(idx):
  109. # setting levels/codes directly raises AttributeError
  110. levels = idx.levels
  111. new_levels = [[lev + "a" for lev in level] for level in levels]
  112. codes = idx.codes
  113. major_codes, minor_codes = codes
  114. major_codes = [(x + 1) % 3 for x in major_codes]
  115. minor_codes = [(x + 1) % 1 for x in minor_codes]
  116. new_codes = [major_codes, minor_codes]
  117. msg = "Can't set attribute"
  118. with pytest.raises(AttributeError, match=msg):
  119. idx.levels = new_levels
  120. msg = (
  121. "property 'codes' of 'MultiIndex' object has no setter"
  122. if PY311
  123. else "can't set attribute"
  124. )
  125. with pytest.raises(AttributeError, match=msg):
  126. idx.codes = new_codes
  127. def test_set_levels(idx):
  128. # side note - you probably wouldn't want to use levels and codes
  129. # directly like this - but it is possible.
  130. levels = idx.levels
  131. new_levels = [[lev + "a" for lev in level] for level in levels]
  132. # level changing [w/o mutation]
  133. ind2 = idx.set_levels(new_levels)
  134. assert_matching(ind2.levels, new_levels)
  135. assert_matching(idx.levels, levels)
  136. # level changing specific level [w/o mutation]
  137. ind2 = idx.set_levels(new_levels[0], level=0)
  138. assert_matching(ind2.levels, [new_levels[0], levels[1]])
  139. assert_matching(idx.levels, levels)
  140. ind2 = idx.set_levels(new_levels[1], level=1)
  141. assert_matching(ind2.levels, [levels[0], new_levels[1]])
  142. assert_matching(idx.levels, levels)
  143. # level changing multiple levels [w/o mutation]
  144. ind2 = idx.set_levels(new_levels, level=[0, 1])
  145. assert_matching(ind2.levels, new_levels)
  146. assert_matching(idx.levels, levels)
  147. # illegal level changing should not change levels
  148. # GH 13754
  149. original_index = idx.copy()
  150. with pytest.raises(ValueError, match="^On"):
  151. idx.set_levels(["c"], level=0)
  152. assert_matching(idx.levels, original_index.levels, check_dtype=True)
  153. with pytest.raises(ValueError, match="^On"):
  154. idx.set_codes([0, 1, 2, 3, 4, 5], level=0)
  155. assert_matching(idx.codes, original_index.codes, check_dtype=True)
  156. with pytest.raises(TypeError, match="^Levels"):
  157. idx.set_levels("c", level=0)
  158. assert_matching(idx.levels, original_index.levels, check_dtype=True)
  159. with pytest.raises(TypeError, match="^Codes"):
  160. idx.set_codes(1, level=0)
  161. assert_matching(idx.codes, original_index.codes, check_dtype=True)
  162. def test_set_codes(idx):
  163. # side note - you probably wouldn't want to use levels and codes
  164. # directly like this - but it is possible.
  165. codes = idx.codes
  166. major_codes, minor_codes = codes
  167. major_codes = [(x + 1) % 3 for x in major_codes]
  168. minor_codes = [(x + 1) % 1 for x in minor_codes]
  169. new_codes = [major_codes, minor_codes]
  170. # changing codes w/o mutation
  171. ind2 = idx.set_codes(new_codes)
  172. assert_matching(ind2.codes, new_codes)
  173. assert_matching(idx.codes, codes)
  174. # codes changing specific level w/o mutation
  175. ind2 = idx.set_codes(new_codes[0], level=0)
  176. assert_matching(ind2.codes, [new_codes[0], codes[1]])
  177. assert_matching(idx.codes, codes)
  178. ind2 = idx.set_codes(new_codes[1], level=1)
  179. assert_matching(ind2.codes, [codes[0], new_codes[1]])
  180. assert_matching(idx.codes, codes)
  181. # codes changing multiple levels w/o mutation
  182. ind2 = idx.set_codes(new_codes, level=[0, 1])
  183. assert_matching(ind2.codes, new_codes)
  184. assert_matching(idx.codes, codes)
  185. # label changing for levels of different magnitude of categories
  186. ind = MultiIndex.from_tuples([(0, i) for i in range(130)])
  187. new_codes = range(129, -1, -1)
  188. expected = MultiIndex.from_tuples([(0, i) for i in new_codes])
  189. # [w/o mutation]
  190. result = ind.set_codes(codes=new_codes, level=1)
  191. assert result.equals(expected)
  192. def test_set_levels_codes_names_bad_input(idx):
  193. levels, codes = idx.levels, idx.codes
  194. names = idx.names
  195. with pytest.raises(ValueError, match="Length of levels"):
  196. idx.set_levels([levels[0]])
  197. with pytest.raises(ValueError, match="Length of codes"):
  198. idx.set_codes([codes[0]])
  199. with pytest.raises(ValueError, match="Length of names"):
  200. idx.set_names([names[0]])
  201. # shouldn't scalar data error, instead should demand list-like
  202. with pytest.raises(TypeError, match="list of lists-like"):
  203. idx.set_levels(levels[0])
  204. # shouldn't scalar data error, instead should demand list-like
  205. with pytest.raises(TypeError, match="list of lists-like"):
  206. idx.set_codes(codes[0])
  207. # shouldn't scalar data error, instead should demand list-like
  208. with pytest.raises(TypeError, match="list-like"):
  209. idx.set_names(names[0])
  210. # should have equal lengths
  211. with pytest.raises(TypeError, match="list of lists-like"):
  212. idx.set_levels(levels[0], level=[0, 1])
  213. with pytest.raises(TypeError, match="list-like"):
  214. idx.set_levels(levels, level=0)
  215. # should have equal lengths
  216. with pytest.raises(TypeError, match="list of lists-like"):
  217. idx.set_codes(codes[0], level=[0, 1])
  218. with pytest.raises(TypeError, match="list-like"):
  219. idx.set_codes(codes, level=0)
  220. # should have equal lengths
  221. with pytest.raises(ValueError, match="Length of names"):
  222. idx.set_names(names[0], level=[0, 1])
  223. with pytest.raises(TypeError, match="Names must be a"):
  224. idx.set_names(names, level=0)
  225. @pytest.mark.parametrize("inplace", [True, False])
  226. def test_set_names_with_nlevel_1(inplace):
  227. # GH 21149
  228. # Ensure that .set_names for MultiIndex with
  229. # nlevels == 1 does not raise any errors
  230. expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"])
  231. m = MultiIndex.from_product([[0, 1]])
  232. result = m.set_names("first", level=0, inplace=inplace)
  233. if inplace:
  234. result = m
  235. tm.assert_index_equal(result, expected)
  236. @pytest.mark.parametrize("ordered", [True, False])
  237. def test_set_levels_categorical(ordered):
  238. # GH13854
  239. index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
  240. cidx = CategoricalIndex(list("bac"), ordered=ordered)
  241. result = index.set_levels(cidx, level=0)
  242. expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes)
  243. tm.assert_index_equal(result, expected)
  244. result_lvl = result.get_level_values(0)
  245. expected_lvl = CategoricalIndex(
  246. list("bacb"), categories=cidx.categories, ordered=cidx.ordered
  247. )
  248. tm.assert_index_equal(result_lvl, expected_lvl)
  249. def test_set_value_keeps_names():
  250. # motivating example from #3742
  251. lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"]
  252. lev2 = ["1", "2", "3"] * 2
  253. idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"])
  254. df = pd.DataFrame(
  255. np.random.randn(6, 4), columns=["one", "two", "three", "four"], index=idx
  256. )
  257. df = df.sort_index()
  258. assert df._is_copy is None
  259. assert df.index.names == ("Name", "Number")
  260. df.at[("grethe", "4"), "one"] = 99.34
  261. assert df._is_copy is None
  262. assert df.index.names == ("Name", "Number")
  263. def test_set_levels_with_iterable():
  264. # GH23273
  265. sizes = [1, 2, 3]
  266. colors = ["black"] * 3
  267. index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"])
  268. result = index.set_levels(map(int, ["3", "2", "1"]), level="size")
  269. expected_sizes = [3, 2, 1]
  270. expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"])
  271. tm.assert_index_equal(result, expected)
  272. def test_set_empty_level():
  273. # GH#48636
  274. midx = MultiIndex.from_arrays([[]], names=["A"])
  275. result = midx.set_levels(pd.DatetimeIndex([]), level=0)
  276. expected = MultiIndex.from_arrays([pd.DatetimeIndex([])], names=["A"])
  277. tm.assert_index_equal(result, expected)
  278. def test_set_levels_pos_args_removal():
  279. # https://github.com/pandas-dev/pandas/issues/41485
  280. idx = MultiIndex.from_tuples(
  281. [
  282. (1, "one"),
  283. (3, "one"),
  284. ],
  285. names=["foo", "bar"],
  286. )
  287. with pytest.raises(TypeError, match="positional arguments"):
  288. idx.set_levels(["a", "b", "c"], 0)
  289. with pytest.raises(TypeError, match="positional arguments"):
  290. idx.set_codes([[0, 1], [1, 0]], 0)