common.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947
  1. from __future__ import annotations
  2. from datetime import datetime
  3. import gc
  4. import numpy as np
  5. import pytest
  6. from pandas._libs.tslibs import Timestamp
  7. from pandas.core.dtypes.common import (
  8. is_datetime64tz_dtype,
  9. is_integer_dtype,
  10. )
  11. from pandas.core.dtypes.dtypes import CategoricalDtype
  12. import pandas as pd
  13. from pandas import (
  14. CategoricalIndex,
  15. DatetimeIndex,
  16. Index,
  17. IntervalIndex,
  18. MultiIndex,
  19. PeriodIndex,
  20. RangeIndex,
  21. Series,
  22. TimedeltaIndex,
  23. isna,
  24. )
  25. import pandas._testing as tm
  26. from pandas.core.arrays import BaseMaskedArray
  27. class Base:
  28. """
  29. Base class for index sub-class tests.
  30. """
  31. _index_cls: type[Index]
  32. @pytest.fixture
  33. def simple_index(self):
  34. raise NotImplementedError("Method not implemented")
  35. def create_index(self) -> Index:
  36. raise NotImplementedError("Method not implemented")
  37. def test_pickle_compat_construction(self):
  38. # need an object to create with
  39. msg = "|".join(
  40. [
  41. r"Index\(\.\.\.\) must be called with a collection of some "
  42. r"kind, None was passed",
  43. r"DatetimeIndex\(\) must be called with a collection of some "
  44. r"kind, None was passed",
  45. r"TimedeltaIndex\(\) must be called with a collection of some "
  46. r"kind, None was passed",
  47. r"__new__\(\) missing 1 required positional argument: 'data'",
  48. r"__new__\(\) takes at least 2 arguments \(1 given\)",
  49. ]
  50. )
  51. with pytest.raises(TypeError, match=msg):
  52. self._index_cls()
  53. def test_shift(self, simple_index):
  54. # GH8083 test the base class for shift
  55. idx = simple_index
  56. msg = (
  57. f"This method is only implemented for DatetimeIndex, PeriodIndex and "
  58. f"TimedeltaIndex; Got type {type(idx).__name__}"
  59. )
  60. with pytest.raises(NotImplementedError, match=msg):
  61. idx.shift(1)
  62. with pytest.raises(NotImplementedError, match=msg):
  63. idx.shift(1, 2)
  64. def test_constructor_name_unhashable(self, simple_index):
  65. # GH#29069 check that name is hashable
  66. # See also same-named test in tests.series.test_constructors
  67. idx = simple_index
  68. with pytest.raises(TypeError, match="Index.name must be a hashable type"):
  69. type(idx)(idx, name=[])
  70. def test_create_index_existing_name(self, simple_index):
  71. # GH11193, when an existing index is passed, and a new name is not
  72. # specified, the new index should inherit the previous object name
  73. expected = simple_index
  74. if not isinstance(expected, MultiIndex):
  75. expected.name = "foo"
  76. result = Index(expected)
  77. tm.assert_index_equal(result, expected)
  78. result = Index(expected, name="bar")
  79. expected.name = "bar"
  80. tm.assert_index_equal(result, expected)
  81. else:
  82. expected.names = ["foo", "bar"]
  83. result = Index(expected)
  84. tm.assert_index_equal(
  85. result,
  86. Index(
  87. Index(
  88. [
  89. ("foo", "one"),
  90. ("foo", "two"),
  91. ("bar", "one"),
  92. ("baz", "two"),
  93. ("qux", "one"),
  94. ("qux", "two"),
  95. ],
  96. dtype="object",
  97. ),
  98. names=["foo", "bar"],
  99. ),
  100. )
  101. result = Index(expected, names=["A", "B"])
  102. tm.assert_index_equal(
  103. result,
  104. Index(
  105. Index(
  106. [
  107. ("foo", "one"),
  108. ("foo", "two"),
  109. ("bar", "one"),
  110. ("baz", "two"),
  111. ("qux", "one"),
  112. ("qux", "two"),
  113. ],
  114. dtype="object",
  115. ),
  116. names=["A", "B"],
  117. ),
  118. )
  119. def test_numeric_compat(self, simple_index):
  120. idx = simple_index
  121. # Check that this doesn't cover MultiIndex case, if/when it does,
  122. # we can remove multi.test_compat.test_numeric_compat
  123. assert not isinstance(idx, MultiIndex)
  124. if type(idx) is Index:
  125. return
  126. typ = type(idx._data).__name__
  127. cls = type(idx).__name__
  128. lmsg = "|".join(
  129. [
  130. rf"unsupported operand type\(s\) for \*: '{typ}' and 'int'",
  131. "cannot perform (__mul__|__truediv__|__floordiv__) with "
  132. f"this index type: ({cls}|{typ})",
  133. ]
  134. )
  135. with pytest.raises(TypeError, match=lmsg):
  136. idx * 1
  137. rmsg = "|".join(
  138. [
  139. rf"unsupported operand type\(s\) for \*: 'int' and '{typ}'",
  140. "cannot perform (__rmul__|__rtruediv__|__rfloordiv__) with "
  141. f"this index type: ({cls}|{typ})",
  142. ]
  143. )
  144. with pytest.raises(TypeError, match=rmsg):
  145. 1 * idx
  146. div_err = lmsg.replace("*", "/")
  147. with pytest.raises(TypeError, match=div_err):
  148. idx / 1
  149. div_err = rmsg.replace("*", "/")
  150. with pytest.raises(TypeError, match=div_err):
  151. 1 / idx
  152. floordiv_err = lmsg.replace("*", "//")
  153. with pytest.raises(TypeError, match=floordiv_err):
  154. idx // 1
  155. floordiv_err = rmsg.replace("*", "//")
  156. with pytest.raises(TypeError, match=floordiv_err):
  157. 1 // idx
  158. def test_logical_compat(self, simple_index):
  159. idx = simple_index
  160. with pytest.raises(TypeError, match="cannot perform all"):
  161. idx.all()
  162. with pytest.raises(TypeError, match="cannot perform any"):
  163. idx.any()
  164. def test_repr_roundtrip(self, simple_index):
  165. idx = simple_index
  166. tm.assert_index_equal(eval(repr(idx)), idx)
  167. def test_repr_max_seq_item_setting(self, simple_index):
  168. # GH10182
  169. idx = simple_index
  170. idx = idx.repeat(50)
  171. with pd.option_context("display.max_seq_items", None):
  172. repr(idx)
  173. assert "..." not in str(idx)
  174. def test_ensure_copied_data(self, index):
  175. # Check the "copy" argument of each Index.__new__ is honoured
  176. # GH12309
  177. init_kwargs = {}
  178. if isinstance(index, PeriodIndex):
  179. # Needs "freq" specification:
  180. init_kwargs["freq"] = index.freq
  181. elif isinstance(index, (RangeIndex, MultiIndex, CategoricalIndex)):
  182. # RangeIndex cannot be initialized from data
  183. # MultiIndex and CategoricalIndex are tested separately
  184. return
  185. elif index.dtype == object and index.inferred_type == "boolean":
  186. init_kwargs["dtype"] = index.dtype
  187. index_type = type(index)
  188. result = index_type(index.values, copy=True, **init_kwargs)
  189. if is_datetime64tz_dtype(index.dtype):
  190. result = result.tz_localize("UTC").tz_convert(index.tz)
  191. if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
  192. index = index._with_freq(None)
  193. tm.assert_index_equal(index, result)
  194. if isinstance(index, PeriodIndex):
  195. # .values an object array of Period, thus copied
  196. result = index_type(ordinal=index.asi8, copy=False, **init_kwargs)
  197. tm.assert_numpy_array_equal(index.asi8, result.asi8, check_same="same")
  198. elif isinstance(index, IntervalIndex):
  199. # checked in test_interval.py
  200. pass
  201. elif type(index) is Index and not isinstance(index.dtype, np.dtype):
  202. result = index_type(index.values, copy=False, **init_kwargs)
  203. tm.assert_index_equal(result, index)
  204. if isinstance(index._values, BaseMaskedArray):
  205. assert np.shares_memory(index._values._data, result._values._data)
  206. tm.assert_numpy_array_equal(
  207. index._values._data, result._values._data, check_same="same"
  208. )
  209. assert np.shares_memory(index._values._mask, result._values._mask)
  210. tm.assert_numpy_array_equal(
  211. index._values._mask, result._values._mask, check_same="same"
  212. )
  213. elif index.dtype == "string[python]":
  214. assert np.shares_memory(index._values._ndarray, result._values._ndarray)
  215. tm.assert_numpy_array_equal(
  216. index._values._ndarray, result._values._ndarray, check_same="same"
  217. )
  218. elif index.dtype == "string[pyarrow]":
  219. assert tm.shares_memory(result._values, index._values)
  220. else:
  221. raise NotImplementedError(index.dtype)
  222. else:
  223. result = index_type(index.values, copy=False, **init_kwargs)
  224. tm.assert_numpy_array_equal(index.values, result.values, check_same="same")
  225. def test_memory_usage(self, index):
  226. index._engine.clear_mapping()
  227. result = index.memory_usage()
  228. if index.empty:
  229. # we report 0 for no-length
  230. assert result == 0
  231. return
  232. # non-zero length
  233. index.get_loc(index[0])
  234. result2 = index.memory_usage()
  235. result3 = index.memory_usage(deep=True)
  236. # RangeIndex, IntervalIndex
  237. # don't have engines
  238. # Index[EA] has engine but it does not have a Hashtable .mapping
  239. if not isinstance(index, (RangeIndex, IntervalIndex)) and not (
  240. type(index) is Index and not isinstance(index.dtype, np.dtype)
  241. ):
  242. assert result2 > result
  243. if index.inferred_type == "object":
  244. assert result3 > result2
  245. def test_argsort(self, index):
  246. # separately tested
  247. if isinstance(index, CategoricalIndex):
  248. return
  249. result = index.argsort()
  250. expected = np.array(index).argsort()
  251. tm.assert_numpy_array_equal(result, expected, check_dtype=False)
  252. def test_numpy_argsort(self, index):
  253. result = np.argsort(index)
  254. expected = index.argsort()
  255. tm.assert_numpy_array_equal(result, expected)
  256. result = np.argsort(index, kind="mergesort")
  257. expected = index.argsort(kind="mergesort")
  258. tm.assert_numpy_array_equal(result, expected)
  259. # these are the only two types that perform
  260. # pandas compatibility input validation - the
  261. # rest already perform separate (or no) such
  262. # validation via their 'values' attribute as
  263. # defined in pandas.core.indexes/base.py - they
  264. # cannot be changed at the moment due to
  265. # backwards compatibility concerns
  266. if isinstance(index, (CategoricalIndex, RangeIndex)):
  267. msg = "the 'axis' parameter is not supported"
  268. with pytest.raises(ValueError, match=msg):
  269. np.argsort(index, axis=1)
  270. msg = "the 'order' parameter is not supported"
  271. with pytest.raises(ValueError, match=msg):
  272. np.argsort(index, order=("a", "b"))
  273. def test_repeat(self, simple_index):
  274. rep = 2
  275. idx = simple_index.copy()
  276. new_index_cls = idx._constructor
  277. expected = new_index_cls(idx.values.repeat(rep), name=idx.name)
  278. tm.assert_index_equal(idx.repeat(rep), expected)
  279. idx = simple_index
  280. rep = np.arange(len(idx))
  281. expected = new_index_cls(idx.values.repeat(rep), name=idx.name)
  282. tm.assert_index_equal(idx.repeat(rep), expected)
  283. def test_numpy_repeat(self, simple_index):
  284. rep = 2
  285. idx = simple_index
  286. expected = idx.repeat(rep)
  287. tm.assert_index_equal(np.repeat(idx, rep), expected)
  288. msg = "the 'axis' parameter is not supported"
  289. with pytest.raises(ValueError, match=msg):
  290. np.repeat(idx, rep, axis=0)
  291. def test_where(self, listlike_box, simple_index):
  292. klass = listlike_box
  293. idx = simple_index
  294. if isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
  295. # where does not preserve freq
  296. idx = idx._with_freq(None)
  297. cond = [True] * len(idx)
  298. result = idx.where(klass(cond))
  299. expected = idx
  300. tm.assert_index_equal(result, expected)
  301. cond = [False] + [True] * len(idx[1:])
  302. expected = Index([idx._na_value] + idx[1:].tolist(), dtype=idx.dtype)
  303. result = idx.where(klass(cond))
  304. tm.assert_index_equal(result, expected)
  305. def test_insert_base(self, index):
  306. result = index[1:4]
  307. if not len(index):
  308. return
  309. # test 0th element
  310. assert index[0:4].equals(result.insert(0, index[0]))
  311. def test_insert_out_of_bounds(self, index):
  312. # TypeError/IndexError matches what np.insert raises in these cases
  313. if len(index) > 0:
  314. err = TypeError
  315. else:
  316. err = IndexError
  317. if len(index) == 0:
  318. # 0 vs 0.5 in error message varies with numpy version
  319. msg = "index (0|0.5) is out of bounds for axis 0 with size 0"
  320. else:
  321. msg = "slice indices must be integers or None or have an __index__ method"
  322. with pytest.raises(err, match=msg):
  323. index.insert(0.5, "foo")
  324. msg = "|".join(
  325. [
  326. r"index -?\d+ is out of bounds for axis 0 with size \d+",
  327. "loc must be an integer between",
  328. ]
  329. )
  330. with pytest.raises(IndexError, match=msg):
  331. index.insert(len(index) + 1, 1)
  332. with pytest.raises(IndexError, match=msg):
  333. index.insert(-len(index) - 1, 1)
  334. def test_delete_base(self, index):
  335. if not len(index):
  336. return
  337. if isinstance(index, RangeIndex):
  338. # tested in class
  339. return
  340. expected = index[1:]
  341. result = index.delete(0)
  342. assert result.equals(expected)
  343. assert result.name == expected.name
  344. expected = index[:-1]
  345. result = index.delete(-1)
  346. assert result.equals(expected)
  347. assert result.name == expected.name
  348. length = len(index)
  349. msg = f"index {length} is out of bounds for axis 0 with size {length}"
  350. with pytest.raises(IndexError, match=msg):
  351. index.delete(length)
  352. def test_equals(self, index):
  353. if isinstance(index, IntervalIndex):
  354. # IntervalIndex tested separately, the index.equals(index.astype(object))
  355. # fails for IntervalIndex
  356. return
  357. is_ea_idx = type(index) is Index and not isinstance(index.dtype, np.dtype)
  358. assert index.equals(index)
  359. assert index.equals(index.copy())
  360. if not is_ea_idx:
  361. # doesn't hold for e.g. IntegerDtype
  362. assert index.equals(index.astype(object))
  363. assert not index.equals(list(index))
  364. assert not index.equals(np.array(index))
  365. # Cannot pass in non-int64 dtype to RangeIndex
  366. if not isinstance(index, RangeIndex) and not is_ea_idx:
  367. same_values = Index(index, dtype=object)
  368. assert index.equals(same_values)
  369. assert same_values.equals(index)
  370. if index.nlevels == 1:
  371. # do not test MultiIndex
  372. assert not index.equals(Series(index))
  373. def test_equals_op(self, simple_index):
  374. # GH9947, GH10637
  375. index_a = simple_index
  376. n = len(index_a)
  377. index_b = index_a[0:-1]
  378. index_c = index_a[0:-1].append(index_a[-2:-1])
  379. index_d = index_a[0:1]
  380. msg = "Lengths must match|could not be broadcast"
  381. with pytest.raises(ValueError, match=msg):
  382. index_a == index_b
  383. expected1 = np.array([True] * n)
  384. expected2 = np.array([True] * (n - 1) + [False])
  385. tm.assert_numpy_array_equal(index_a == index_a, expected1)
  386. tm.assert_numpy_array_equal(index_a == index_c, expected2)
  387. # test comparisons with numpy arrays
  388. array_a = np.array(index_a)
  389. array_b = np.array(index_a[0:-1])
  390. array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
  391. array_d = np.array(index_a[0:1])
  392. with pytest.raises(ValueError, match=msg):
  393. index_a == array_b
  394. tm.assert_numpy_array_equal(index_a == array_a, expected1)
  395. tm.assert_numpy_array_equal(index_a == array_c, expected2)
  396. # test comparisons with Series
  397. series_a = Series(array_a)
  398. series_b = Series(array_b)
  399. series_c = Series(array_c)
  400. series_d = Series(array_d)
  401. with pytest.raises(ValueError, match=msg):
  402. index_a == series_b
  403. tm.assert_numpy_array_equal(index_a == series_a, expected1)
  404. tm.assert_numpy_array_equal(index_a == series_c, expected2)
  405. # cases where length is 1 for one of them
  406. with pytest.raises(ValueError, match="Lengths must match"):
  407. index_a == index_d
  408. with pytest.raises(ValueError, match="Lengths must match"):
  409. index_a == series_d
  410. with pytest.raises(ValueError, match="Lengths must match"):
  411. index_a == array_d
  412. msg = "Can only compare identically-labeled Series objects"
  413. with pytest.raises(ValueError, match=msg):
  414. series_a == series_d
  415. with pytest.raises(ValueError, match="Lengths must match"):
  416. series_a == array_d
  417. # comparing with a scalar should broadcast; note that we are excluding
  418. # MultiIndex because in this case each item in the index is a tuple of
  419. # length 2, and therefore is considered an array of length 2 in the
  420. # comparison instead of a scalar
  421. if not isinstance(index_a, MultiIndex):
  422. expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
  423. # assuming the 2nd to last item is unique in the data
  424. item = index_a[-2]
  425. tm.assert_numpy_array_equal(index_a == item, expected3)
  426. tm.assert_series_equal(series_a == item, Series(expected3))
  427. def test_format(self, simple_index):
  428. # GH35439
  429. idx = simple_index
  430. expected = [str(x) for x in idx]
  431. assert idx.format() == expected
  432. def test_format_empty(self):
  433. # GH35712
  434. empty_idx = self._index_cls([])
  435. assert empty_idx.format() == []
  436. assert empty_idx.format(name=True) == [""]
  437. def test_fillna(self, index):
  438. # GH 11343
  439. if len(index) == 0:
  440. return
  441. elif index.dtype == bool:
  442. # can't hold NAs
  443. return
  444. elif isinstance(index, Index) and is_integer_dtype(index.dtype):
  445. return
  446. elif isinstance(index, MultiIndex):
  447. idx = index.copy(deep=True)
  448. msg = "isna is not defined for MultiIndex"
  449. with pytest.raises(NotImplementedError, match=msg):
  450. idx.fillna(idx[0])
  451. else:
  452. idx = index.copy(deep=True)
  453. result = idx.fillna(idx[0])
  454. tm.assert_index_equal(result, idx)
  455. assert result is not idx
  456. msg = "'value' must be a scalar, passed: "
  457. with pytest.raises(TypeError, match=msg):
  458. idx.fillna([idx[0]])
  459. idx = index.copy(deep=True)
  460. values = idx._values
  461. values[1] = np.nan
  462. idx = type(index)(values)
  463. msg = "does not support 'downcast'"
  464. with pytest.raises(NotImplementedError, match=msg):
  465. # For now at least, we only raise if there are NAs present
  466. idx.fillna(idx[0], downcast="infer")
  467. expected = np.array([False] * len(idx), dtype=bool)
  468. expected[1] = True
  469. tm.assert_numpy_array_equal(idx._isnan, expected)
  470. assert idx.hasnans is True
  471. def test_nulls(self, index):
  472. # this is really a smoke test for the methods
  473. # as these are adequately tested for function elsewhere
  474. if len(index) == 0:
  475. tm.assert_numpy_array_equal(index.isna(), np.array([], dtype=bool))
  476. elif isinstance(index, MultiIndex):
  477. idx = index.copy()
  478. msg = "isna is not defined for MultiIndex"
  479. with pytest.raises(NotImplementedError, match=msg):
  480. idx.isna()
  481. elif not index.hasnans:
  482. tm.assert_numpy_array_equal(index.isna(), np.zeros(len(index), dtype=bool))
  483. tm.assert_numpy_array_equal(index.notna(), np.ones(len(index), dtype=bool))
  484. else:
  485. result = isna(index)
  486. tm.assert_numpy_array_equal(index.isna(), result)
  487. tm.assert_numpy_array_equal(index.notna(), ~result)
  488. def test_empty(self, simple_index):
  489. # GH 15270
  490. idx = simple_index
  491. assert not idx.empty
  492. assert idx[:0].empty
  493. def test_join_self_unique(self, join_type, simple_index):
  494. idx = simple_index
  495. if idx.is_unique:
  496. joined = idx.join(idx, how=join_type)
  497. assert (idx == joined).all()
  498. def test_map(self, simple_index):
  499. # callable
  500. idx = simple_index
  501. result = idx.map(lambda x: x)
  502. # RangeIndex are equivalent to the similar Index with int64 dtype
  503. tm.assert_index_equal(result, idx, exact="equiv")
  504. @pytest.mark.parametrize(
  505. "mapper",
  506. [
  507. lambda values, index: {i: e for e, i in zip(values, index)},
  508. lambda values, index: Series(values, index),
  509. ],
  510. )
  511. def test_map_dictlike(self, mapper, simple_index):
  512. idx = simple_index
  513. if isinstance(idx, CategoricalIndex):
  514. # FIXME: this fails with CategoricalIndex bc it goes through
  515. # Categorical.map which ends up calling get_indexer with
  516. # non-unique values, which raises. This _should_ work fine for
  517. # CategoricalIndex.
  518. pytest.skip(f"skipping tests for {type(idx)}")
  519. identity = mapper(idx.values, idx)
  520. result = idx.map(identity)
  521. # RangeIndex are equivalent to the similar Index with int64 dtype
  522. tm.assert_index_equal(result, idx, exact="equiv")
  523. # empty mappable
  524. dtype = None
  525. if idx.dtype.kind == "f":
  526. dtype = idx.dtype
  527. expected = Index([np.nan] * len(idx), dtype=dtype)
  528. result = idx.map(mapper(expected, idx))
  529. tm.assert_index_equal(result, expected)
  530. def test_map_str(self, simple_index):
  531. # GH 31202
  532. idx = simple_index
  533. result = idx.map(str)
  534. expected = Index([str(x) for x in idx], dtype=object)
  535. tm.assert_index_equal(result, expected)
  536. @pytest.mark.parametrize("copy", [True, False])
  537. @pytest.mark.parametrize("name", [None, "foo"])
  538. @pytest.mark.parametrize("ordered", [True, False])
  539. def test_astype_category(self, copy, name, ordered, simple_index):
  540. # GH 18630
  541. idx = simple_index
  542. if name:
  543. idx = idx.rename(name)
  544. # standard categories
  545. dtype = CategoricalDtype(ordered=ordered)
  546. result = idx.astype(dtype, copy=copy)
  547. expected = CategoricalIndex(idx, name=name, ordered=ordered)
  548. tm.assert_index_equal(result, expected, exact=True)
  549. # non-standard categories
  550. dtype = CategoricalDtype(idx.unique().tolist()[:-1], ordered)
  551. result = idx.astype(dtype, copy=copy)
  552. expected = CategoricalIndex(idx, name=name, dtype=dtype)
  553. tm.assert_index_equal(result, expected, exact=True)
  554. if ordered is False:
  555. # dtype='category' defaults to ordered=False, so only test once
  556. result = idx.astype("category", copy=copy)
  557. expected = CategoricalIndex(idx, name=name)
  558. tm.assert_index_equal(result, expected, exact=True)
  559. def test_is_unique(self, simple_index):
  560. # initialize a unique index
  561. index = simple_index.drop_duplicates()
  562. assert index.is_unique is True
  563. # empty index should be unique
  564. index_empty = index[:0]
  565. assert index_empty.is_unique is True
  566. # test basic dupes
  567. index_dup = index.insert(0, index[0])
  568. assert index_dup.is_unique is False
  569. # single NA should be unique
  570. index_na = index.insert(0, np.nan)
  571. assert index_na.is_unique is True
  572. # multiple NA should not be unique
  573. index_na_dup = index_na.insert(0, np.nan)
  574. assert index_na_dup.is_unique is False
  575. @pytest.mark.arm_slow
  576. def test_engine_reference_cycle(self, simple_index):
  577. # GH27585
  578. index = simple_index
  579. nrefs_pre = len(gc.get_referrers(index))
  580. index._engine
  581. assert len(gc.get_referrers(index)) == nrefs_pre
  582. def test_getitem_2d_deprecated(self, simple_index):
  583. # GH#30588, GH#31479
  584. idx = simple_index
  585. msg = "Multi-dimensional indexing"
  586. with pytest.raises(ValueError, match=msg):
  587. idx[:, None]
  588. if not isinstance(idx, RangeIndex):
  589. # GH#44051 RangeIndex already raised pre-2.0 with a different message
  590. with pytest.raises(ValueError, match=msg):
  591. idx[True]
  592. with pytest.raises(ValueError, match=msg):
  593. idx[False]
  594. else:
  595. msg = "only integers, slices"
  596. with pytest.raises(IndexError, match=msg):
  597. idx[True]
  598. with pytest.raises(IndexError, match=msg):
  599. idx[False]
  600. def test_copy_shares_cache(self, simple_index):
  601. # GH32898, GH36840
  602. idx = simple_index
  603. idx.get_loc(idx[0]) # populates the _cache.
  604. copy = idx.copy()
  605. assert copy._cache is idx._cache
  606. def test_shallow_copy_shares_cache(self, simple_index):
  607. # GH32669, GH36840
  608. idx = simple_index
  609. idx.get_loc(idx[0]) # populates the _cache.
  610. shallow_copy = idx._view()
  611. assert shallow_copy._cache is idx._cache
  612. shallow_copy = idx._shallow_copy(idx._data)
  613. assert shallow_copy._cache is not idx._cache
  614. assert shallow_copy._cache == {}
  615. def test_index_groupby(self, simple_index):
  616. idx = simple_index[:5]
  617. to_groupby = np.array([1, 2, np.nan, 2, 1])
  618. tm.assert_dict_equal(
  619. idx.groupby(to_groupby), {1.0: idx[[0, 4]], 2.0: idx[[1, 3]]}
  620. )
  621. to_groupby = DatetimeIndex(
  622. [
  623. datetime(2011, 11, 1),
  624. datetime(2011, 12, 1),
  625. pd.NaT,
  626. datetime(2011, 12, 1),
  627. datetime(2011, 11, 1),
  628. ],
  629. tz="UTC",
  630. ).values
  631. ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")]
  632. expected = {ex_keys[0]: idx[[0, 4]], ex_keys[1]: idx[[1, 3]]}
  633. tm.assert_dict_equal(idx.groupby(to_groupby), expected)
  634. def test_append_preserves_dtype(self, simple_index):
  635. # In particular Index with dtype float32
  636. index = simple_index
  637. N = len(index)
  638. result = index.append(index)
  639. assert result.dtype == index.dtype
  640. tm.assert_index_equal(result[:N], index, check_exact=True)
  641. tm.assert_index_equal(result[N:], index, check_exact=True)
  642. alt = index.take(list(range(N)) * 2)
  643. tm.assert_index_equal(result, alt, check_exact=True)
  644. def test_inv(self, simple_index):
  645. idx = simple_index
  646. if idx.dtype.kind in ["i", "u"]:
  647. res = ~idx
  648. expected = Index(~idx.values, name=idx.name)
  649. tm.assert_index_equal(res, expected)
  650. # check that we are matching Series behavior
  651. res2 = ~Series(idx)
  652. tm.assert_series_equal(res2, Series(expected))
  653. else:
  654. if idx.dtype.kind == "f":
  655. msg = "ufunc 'invert' not supported for the input types"
  656. else:
  657. msg = "bad operand"
  658. with pytest.raises(TypeError, match=msg):
  659. ~idx
  660. # check that we get the same behavior with Series
  661. with pytest.raises(TypeError, match=msg):
  662. ~Series(idx)
  663. def test_is_boolean_is_deprecated(self, simple_index):
  664. # GH50042
  665. idx = simple_index
  666. with tm.assert_produces_warning(FutureWarning):
  667. idx.is_boolean()
  668. def test_is_floating_is_deprecated(self, simple_index):
  669. # GH50042
  670. idx = simple_index
  671. with tm.assert_produces_warning(FutureWarning):
  672. idx.is_floating()
  673. def test_is_integer_is_deprecated(self, simple_index):
  674. # GH50042
  675. idx = simple_index
  676. with tm.assert_produces_warning(FutureWarning):
  677. idx.is_integer()
  678. def test_holds_integer_deprecated(self, simple_index):
  679. # GH50243
  680. idx = simple_index
  681. msg = f"{type(idx).__name__}.holds_integer is deprecated. "
  682. with tm.assert_produces_warning(FutureWarning, match=msg):
  683. idx.holds_integer()
  684. def test_is_numeric_is_deprecated(self, simple_index):
  685. # GH50042
  686. idx = simple_index
  687. with tm.assert_produces_warning(
  688. FutureWarning,
  689. match=f"{type(idx).__name__}.is_numeric is deprecated. ",
  690. ):
  691. idx.is_numeric()
  692. def test_is_categorical_is_deprecated(self, simple_index):
  693. # GH50042
  694. idx = simple_index
  695. with tm.assert_produces_warning(
  696. FutureWarning,
  697. match=r"Use pandas\.api\.types\.is_categorical_dtype instead",
  698. ):
  699. idx.is_categorical()
  700. def test_is_interval_is_deprecated(self, simple_index):
  701. # GH50042
  702. idx = simple_index
  703. with tm.assert_produces_warning(FutureWarning):
  704. idx.is_interval()
  705. def test_is_object_is_deprecated(self, simple_index):
  706. # GH50042
  707. idx = simple_index
  708. with tm.assert_produces_warning(FutureWarning):
  709. idx.is_object()
  710. class NumericBase(Base):
  711. """
  712. Base class for numeric index (incl. RangeIndex) sub-class tests.
  713. """
  714. def test_constructor_unwraps_index(self, dtype):
  715. index_cls = self._index_cls
  716. idx = Index([1, 2], dtype=dtype)
  717. result = index_cls(idx)
  718. expected = np.array([1, 2], dtype=idx.dtype)
  719. tm.assert_numpy_array_equal(result._data, expected)
  720. def test_where(self):
  721. # Tested in numeric.test_indexing
  722. pass
  723. def test_can_hold_identifiers(self, simple_index):
  724. idx = simple_index
  725. key = idx[0]
  726. assert idx._can_hold_identifiers_and_holds_name(key) is False
  727. def test_view(self, dtype):
  728. index_cls = self._index_cls
  729. idx = index_cls([], dtype=dtype, name="Foo")
  730. idx_view = idx.view()
  731. assert idx_view.name == "Foo"
  732. idx_view = idx.view(dtype)
  733. tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True)
  734. idx_view = idx.view(index_cls)
  735. tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True)
  736. def test_format(self, simple_index):
  737. # GH35439
  738. idx = simple_index
  739. max_width = max(len(str(x)) for x in idx)
  740. expected = [str(x).ljust(max_width) for x in idx]
  741. assert idx.format() == expected
  742. def test_numeric_compat(self):
  743. pass # override Base method
  744. def test_insert_non_na(self, simple_index):
  745. # GH#43921 inserting an element that we know we can hold should
  746. # not change dtype or type (except for RangeIndex)
  747. index = simple_index
  748. result = index.insert(0, index[0])
  749. expected = Index([index[0]] + list(index), dtype=index.dtype)
  750. tm.assert_index_equal(result, expected, exact=True)
  751. def test_insert_na(self, nulls_fixture, simple_index):
  752. # GH 18295 (test missing)
  753. index = simple_index
  754. na_val = nulls_fixture
  755. if na_val is pd.NaT:
  756. expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
  757. else:
  758. expected = Index([index[0], np.nan] + list(index[1:]))
  759. # GH#43921 we preserve float dtype
  760. if index.dtype.kind == "f":
  761. expected = Index(expected, dtype=index.dtype)
  762. result = index.insert(1, na_val)
  763. tm.assert_index_equal(result, expected, exact=True)
  764. def test_arithmetic_explicit_conversions(self):
  765. # GH 8608
  766. # add/sub are overridden explicitly for Float/Int Index
  767. index_cls = self._index_cls
  768. if index_cls is RangeIndex:
  769. idx = RangeIndex(5)
  770. else:
  771. idx = index_cls(np.arange(5, dtype="int64"))
  772. # float conversions
  773. arr = np.arange(5, dtype="int64") * 3.2
  774. expected = Index(arr, dtype=np.float64)
  775. fidx = idx * 3.2
  776. tm.assert_index_equal(fidx, expected)
  777. fidx = 3.2 * idx
  778. tm.assert_index_equal(fidx, expected)
  779. # interops with numpy arrays
  780. expected = Index(arr, dtype=np.float64)
  781. a = np.zeros(5, dtype="float64")
  782. result = fidx - a
  783. tm.assert_index_equal(result, expected)
  784. expected = Index(-arr, dtype=np.float64)
  785. a = np.zeros(5, dtype="float64")
  786. result = a - fidx
  787. tm.assert_index_equal(result, expected)
  788. @pytest.mark.parametrize("complex_dtype", [np.complex64, np.complex128])
  789. def test_astype_to_complex(self, complex_dtype, simple_index):
  790. result = simple_index.astype(complex_dtype)
  791. assert type(result) is Index and result.dtype == complex_dtype
  792. def test_cast_string(self, dtype):
  793. result = self._index_cls(["0", "1", "2"], dtype=dtype)
  794. expected = self._index_cls([0, 1, 2], dtype=dtype)
  795. tm.assert_index_equal(result, expected)