test_range.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. import numpy as np
  2. import pytest
  3. from pandas.core.dtypes.common import ensure_platform_int
  4. import pandas as pd
  5. from pandas import (
  6. Index,
  7. RangeIndex,
  8. )
  9. import pandas._testing as tm
  10. from pandas.tests.indexes.common import NumericBase
  11. # aliases to make some tests easier to read
  12. RI = RangeIndex
  13. class TestRangeIndex(NumericBase):
  14. _index_cls = RangeIndex
  15. @pytest.fixture
  16. def dtype(self):
  17. return np.int64
  18. @pytest.fixture(
  19. params=["uint64", "float64", "category", "datetime64", "object"],
  20. )
  21. def invalid_dtype(self, request):
  22. return request.param
  23. @pytest.fixture
  24. def simple_index(self):
  25. return self._index_cls(start=0, stop=20, step=2)
  26. @pytest.fixture(
  27. params=[
  28. RangeIndex(start=0, stop=20, step=2, name="foo"),
  29. RangeIndex(start=18, stop=-1, step=-2, name="bar"),
  30. ],
  31. ids=["index_inc", "index_dec"],
  32. )
  33. def index(self, request):
  34. return request.param
  35. def test_constructor_unwraps_index(self, dtype):
  36. result = self._index_cls(1, 3)
  37. expected = np.array([1, 2], dtype=dtype)
  38. tm.assert_numpy_array_equal(result._data, expected)
  39. def test_can_hold_identifiers(self, simple_index):
  40. idx = simple_index
  41. key = idx[0]
  42. assert idx._can_hold_identifiers_and_holds_name(key) is False
  43. def test_too_many_names(self, simple_index):
  44. index = simple_index
  45. with pytest.raises(ValueError, match="^Length"):
  46. index.names = ["roger", "harold"]
  47. @pytest.mark.parametrize(
  48. "index, start, stop, step",
  49. [
  50. (RangeIndex(5), 0, 5, 1),
  51. (RangeIndex(0, 5), 0, 5, 1),
  52. (RangeIndex(5, step=2), 0, 5, 2),
  53. (RangeIndex(1, 5, 2), 1, 5, 2),
  54. ],
  55. )
  56. def test_start_stop_step_attrs(self, index, start, stop, step):
  57. # GH 25710
  58. assert index.start == start
  59. assert index.stop == stop
  60. assert index.step == step
  61. def test_copy(self):
  62. i = RangeIndex(5, name="Foo")
  63. i_copy = i.copy()
  64. assert i_copy is not i
  65. assert i_copy.identical(i)
  66. assert i_copy._range == range(0, 5, 1)
  67. assert i_copy.name == "Foo"
  68. def test_repr(self):
  69. i = RangeIndex(5, name="Foo")
  70. result = repr(i)
  71. expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')"
  72. assert result == expected
  73. result = eval(result)
  74. tm.assert_index_equal(result, i, exact=True)
  75. i = RangeIndex(5, 0, -1)
  76. result = repr(i)
  77. expected = "RangeIndex(start=5, stop=0, step=-1)"
  78. assert result == expected
  79. result = eval(result)
  80. tm.assert_index_equal(result, i, exact=True)
  81. def test_insert(self):
  82. idx = RangeIndex(5, name="Foo")
  83. result = idx[1:4]
  84. # test 0th element
  85. tm.assert_index_equal(idx[0:4], result.insert(0, idx[0]), exact="equiv")
  86. # GH 18295 (test missing)
  87. expected = Index([0, np.nan, 1, 2, 3, 4], dtype=np.float64)
  88. for na in [np.nan, None, pd.NA]:
  89. result = RangeIndex(5).insert(1, na)
  90. tm.assert_index_equal(result, expected)
  91. result = RangeIndex(5).insert(1, pd.NaT)
  92. expected = Index([0, pd.NaT, 1, 2, 3, 4], dtype=object)
  93. tm.assert_index_equal(result, expected)
  94. def test_insert_edges_preserves_rangeindex(self):
  95. idx = Index(range(4, 9, 2))
  96. result = idx.insert(0, 2)
  97. expected = Index(range(2, 9, 2))
  98. tm.assert_index_equal(result, expected, exact=True)
  99. result = idx.insert(3, 10)
  100. expected = Index(range(4, 11, 2))
  101. tm.assert_index_equal(result, expected, exact=True)
  102. def test_insert_middle_preserves_rangeindex(self):
  103. # insert in the middle
  104. idx = Index(range(0, 3, 2))
  105. result = idx.insert(1, 1)
  106. expected = Index(range(3))
  107. tm.assert_index_equal(result, expected, exact=True)
  108. idx = idx * 2
  109. result = idx.insert(1, 2)
  110. expected = expected * 2
  111. tm.assert_index_equal(result, expected, exact=True)
  112. def test_delete(self):
  113. idx = RangeIndex(5, name="Foo")
  114. expected = idx[1:]
  115. result = idx.delete(0)
  116. tm.assert_index_equal(result, expected, exact=True)
  117. assert result.name == expected.name
  118. expected = idx[:-1]
  119. result = idx.delete(-1)
  120. tm.assert_index_equal(result, expected, exact=True)
  121. assert result.name == expected.name
  122. msg = "index 5 is out of bounds for axis 0 with size 5"
  123. with pytest.raises((IndexError, ValueError), match=msg):
  124. # either depending on numpy version
  125. result = idx.delete(len(idx))
  126. def test_delete_preserves_rangeindex(self):
  127. idx = Index(range(2), name="foo")
  128. result = idx.delete([1])
  129. expected = Index(range(1), name="foo")
  130. tm.assert_index_equal(result, expected, exact=True)
  131. result = idx.delete(1)
  132. tm.assert_index_equal(result, expected, exact=True)
  133. def test_delete_preserves_rangeindex_middle(self):
  134. idx = Index(range(3), name="foo")
  135. result = idx.delete(1)
  136. expected = idx[::2]
  137. tm.assert_index_equal(result, expected, exact=True)
  138. result = idx.delete(-2)
  139. tm.assert_index_equal(result, expected, exact=True)
  140. def test_delete_preserves_rangeindex_list_at_end(self):
  141. idx = RangeIndex(0, 6, 1)
  142. loc = [2, 3, 4, 5]
  143. result = idx.delete(loc)
  144. expected = idx[:2]
  145. tm.assert_index_equal(result, expected, exact=True)
  146. result = idx.delete(loc[::-1])
  147. tm.assert_index_equal(result, expected, exact=True)
  148. def test_delete_preserves_rangeindex_list_middle(self):
  149. idx = RangeIndex(0, 6, 1)
  150. loc = [1, 2, 3, 4]
  151. result = idx.delete(loc)
  152. expected = RangeIndex(0, 6, 5)
  153. tm.assert_index_equal(result, expected, exact=True)
  154. result = idx.delete(loc[::-1])
  155. tm.assert_index_equal(result, expected, exact=True)
  156. def test_delete_all_preserves_rangeindex(self):
  157. idx = RangeIndex(0, 6, 1)
  158. loc = [0, 1, 2, 3, 4, 5]
  159. result = idx.delete(loc)
  160. expected = idx[:0]
  161. tm.assert_index_equal(result, expected, exact=True)
  162. result = idx.delete(loc[::-1])
  163. tm.assert_index_equal(result, expected, exact=True)
  164. def test_delete_not_preserving_rangeindex(self):
  165. idx = RangeIndex(0, 6, 1)
  166. loc = [0, 3, 5]
  167. result = idx.delete(loc)
  168. expected = Index([1, 2, 4])
  169. tm.assert_index_equal(result, expected, exact=True)
  170. result = idx.delete(loc[::-1])
  171. tm.assert_index_equal(result, expected, exact=True)
  172. def test_view(self):
  173. i = RangeIndex(0, name="Foo")
  174. i_view = i.view()
  175. assert i_view.name == "Foo"
  176. i_view = i.view("i8")
  177. tm.assert_numpy_array_equal(i.values, i_view)
  178. i_view = i.view(RangeIndex)
  179. tm.assert_index_equal(i, i_view)
  180. def test_dtype(self, simple_index):
  181. index = simple_index
  182. assert index.dtype == np.int64
  183. def test_cache(self):
  184. # GH 26565, GH26617, GH35432
  185. # This test checks whether _cache has been set.
  186. # Calling RangeIndex._cache["_data"] creates an int64 array of the same length
  187. # as the RangeIndex and stores it in _cache.
  188. idx = RangeIndex(0, 100, 10)
  189. assert idx._cache == {}
  190. repr(idx)
  191. assert idx._cache == {}
  192. str(idx)
  193. assert idx._cache == {}
  194. idx.get_loc(20)
  195. assert idx._cache == {}
  196. 90 in idx # True
  197. assert idx._cache == {}
  198. 91 in idx # False
  199. assert idx._cache == {}
  200. idx.all()
  201. assert idx._cache == {}
  202. idx.any()
  203. assert idx._cache == {}
  204. for _ in idx:
  205. pass
  206. assert idx._cache == {}
  207. idx.format()
  208. assert idx._cache == {}
  209. df = pd.DataFrame({"a": range(10)}, index=idx)
  210. str(df)
  211. assert idx._cache == {}
  212. df.loc[50]
  213. assert idx._cache == {}
  214. with pytest.raises(KeyError, match="51"):
  215. df.loc[51]
  216. assert idx._cache == {}
  217. df.loc[10:50]
  218. assert idx._cache == {}
  219. df.iloc[5:10]
  220. assert idx._cache == {}
  221. # idx._cache should contain a _data entry after call to idx._data
  222. idx._data
  223. assert isinstance(idx._data, np.ndarray)
  224. assert idx._data is idx._data # check cached value is reused
  225. assert len(idx._cache) == 1
  226. expected = np.arange(0, 100, 10, dtype="int64")
  227. tm.assert_numpy_array_equal(idx._cache["_data"], expected)
  228. def test_is_monotonic(self):
  229. index = RangeIndex(0, 20, 2)
  230. assert index.is_monotonic_increasing is True
  231. assert index.is_monotonic_increasing is True
  232. assert index.is_monotonic_decreasing is False
  233. assert index._is_strictly_monotonic_increasing is True
  234. assert index._is_strictly_monotonic_decreasing is False
  235. index = RangeIndex(4, 0, -1)
  236. assert index.is_monotonic_increasing is False
  237. assert index._is_strictly_monotonic_increasing is False
  238. assert index.is_monotonic_decreasing is True
  239. assert index._is_strictly_monotonic_decreasing is True
  240. index = RangeIndex(1, 2)
  241. assert index.is_monotonic_increasing is True
  242. assert index.is_monotonic_increasing is True
  243. assert index.is_monotonic_decreasing is True
  244. assert index._is_strictly_monotonic_increasing is True
  245. assert index._is_strictly_monotonic_decreasing is True
  246. index = RangeIndex(2, 1)
  247. assert index.is_monotonic_increasing is True
  248. assert index.is_monotonic_increasing is True
  249. assert index.is_monotonic_decreasing is True
  250. assert index._is_strictly_monotonic_increasing is True
  251. assert index._is_strictly_monotonic_decreasing is True
  252. index = RangeIndex(1, 1)
  253. assert index.is_monotonic_increasing is True
  254. assert index.is_monotonic_increasing is True
  255. assert index.is_monotonic_decreasing is True
  256. assert index._is_strictly_monotonic_increasing is True
  257. assert index._is_strictly_monotonic_decreasing is True
  258. def test_equals_range(self):
  259. equiv_pairs = [
  260. (RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)),
  261. (RangeIndex(0), RangeIndex(1, -1, 3)),
  262. (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)),
  263. (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2)),
  264. ]
  265. for left, right in equiv_pairs:
  266. assert left.equals(right)
  267. assert right.equals(left)
  268. def test_logical_compat(self, simple_index):
  269. idx = simple_index
  270. assert idx.all() == idx.values.all()
  271. assert idx.any() == idx.values.any()
  272. def test_identical(self, simple_index):
  273. index = simple_index
  274. i = Index(index.copy())
  275. assert i.identical(index)
  276. # we don't allow object dtype for RangeIndex
  277. if isinstance(index, RangeIndex):
  278. return
  279. same_values_different_type = Index(i, dtype=object)
  280. assert not i.identical(same_values_different_type)
  281. i = index.copy(dtype=object)
  282. i = i.rename("foo")
  283. same_values = Index(i, dtype=object)
  284. assert same_values.identical(index.copy(dtype=object))
  285. assert not i.identical(index)
  286. assert Index(same_values, name="foo", dtype=object).identical(i)
  287. assert not index.copy(dtype=object).identical(index.copy(dtype="int64"))
  288. def test_nbytes(self):
  289. # memory savings vs int index
  290. idx = RangeIndex(0, 1000)
  291. assert idx.nbytes < Index(idx._values).nbytes / 10
  292. # constant memory usage
  293. i2 = RangeIndex(0, 10)
  294. assert idx.nbytes == i2.nbytes
  295. @pytest.mark.parametrize(
  296. "start,stop,step",
  297. [
  298. # can't
  299. ("foo", "bar", "baz"),
  300. # shouldn't
  301. ("0", "1", "2"),
  302. ],
  303. )
  304. def test_cant_or_shouldnt_cast(self, start, stop, step):
  305. msg = f"Wrong type {type(start)} for value {start}"
  306. with pytest.raises(TypeError, match=msg):
  307. RangeIndex(start, stop, step)
  308. def test_view_index(self, simple_index):
  309. index = simple_index
  310. index.view(Index)
  311. def test_prevent_casting(self, simple_index):
  312. index = simple_index
  313. result = index.astype("O")
  314. assert result.dtype == np.object_
  315. def test_repr_roundtrip(self, simple_index):
  316. index = simple_index
  317. tm.assert_index_equal(eval(repr(index)), index)
  318. def test_slice_keep_name(self):
  319. idx = RangeIndex(1, 2, name="asdf")
  320. assert idx.name == idx[1:].name
  321. def test_has_duplicates(self, index):
  322. assert index.is_unique
  323. assert not index.has_duplicates
  324. def test_extended_gcd(self, simple_index):
  325. index = simple_index
  326. result = index._extended_gcd(6, 10)
  327. assert result[0] == result[1] * 6 + result[2] * 10
  328. assert 2 == result[0]
  329. result = index._extended_gcd(10, 6)
  330. assert 2 == result[1] * 10 + result[2] * 6
  331. assert 2 == result[0]
  332. def test_min_fitting_element(self):
  333. result = RangeIndex(0, 20, 2)._min_fitting_element(1)
  334. assert 2 == result
  335. result = RangeIndex(1, 6)._min_fitting_element(1)
  336. assert 1 == result
  337. result = RangeIndex(18, -2, -2)._min_fitting_element(1)
  338. assert 2 == result
  339. result = RangeIndex(5, 0, -1)._min_fitting_element(1)
  340. assert 1 == result
  341. big_num = 500000000000000000000000
  342. result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num)
  343. assert big_num == result
  344. def test_pickle_compat_construction(self):
  345. # RangeIndex() is a valid constructor
  346. pass
  347. def test_slice_specialised(self, simple_index):
  348. index = simple_index
  349. index.name = "foo"
  350. # scalar indexing
  351. res = index[1]
  352. expected = 2
  353. assert res == expected
  354. res = index[-1]
  355. expected = 18
  356. assert res == expected
  357. # slicing
  358. # slice value completion
  359. index_slice = index[:]
  360. expected = index
  361. tm.assert_index_equal(index_slice, expected)
  362. # positive slice values
  363. index_slice = index[7:10:2]
  364. expected = Index([14, 18], name="foo")
  365. tm.assert_index_equal(index_slice, expected, exact="equiv")
  366. # negative slice values
  367. index_slice = index[-1:-5:-2]
  368. expected = Index([18, 14], name="foo")
  369. tm.assert_index_equal(index_slice, expected, exact="equiv")
  370. # stop overshoot
  371. index_slice = index[2:100:4]
  372. expected = Index([4, 12], name="foo")
  373. tm.assert_index_equal(index_slice, expected, exact="equiv")
  374. # reverse
  375. index_slice = index[::-1]
  376. expected = Index(index.values[::-1], name="foo")
  377. tm.assert_index_equal(index_slice, expected, exact="equiv")
  378. index_slice = index[-8::-1]
  379. expected = Index([4, 2, 0], name="foo")
  380. tm.assert_index_equal(index_slice, expected, exact="equiv")
  381. index_slice = index[-40::-1]
  382. expected = Index(np.array([], dtype=np.int64), name="foo")
  383. tm.assert_index_equal(index_slice, expected, exact="equiv")
  384. index_slice = index[40::-1]
  385. expected = Index(index.values[40::-1], name="foo")
  386. tm.assert_index_equal(index_slice, expected, exact="equiv")
  387. index_slice = index[10::-1]
  388. expected = Index(index.values[::-1], name="foo")
  389. tm.assert_index_equal(index_slice, expected, exact="equiv")
  390. @pytest.mark.parametrize("step", set(range(-5, 6)) - {0})
  391. def test_len_specialised(self, step):
  392. # make sure that our len is the same as np.arange calc
  393. start, stop = (0, 5) if step > 0 else (5, 0)
  394. arr = np.arange(start, stop, step)
  395. index = RangeIndex(start, stop, step)
  396. assert len(index) == len(arr)
  397. index = RangeIndex(stop, start, step)
  398. assert len(index) == 0
  399. @pytest.fixture(
  400. params=[
  401. ([RI(1, 12, 5)], RI(1, 12, 5)),
  402. ([RI(0, 6, 4)], RI(0, 6, 4)),
  403. ([RI(1, 3), RI(3, 7)], RI(1, 7)),
  404. ([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)),
  405. ([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)),
  406. ([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)),
  407. ([RI(-4, -8), RI(-8, -12)], RI(0, 0)),
  408. ([RI(-4, -8), RI(3, -4)], RI(0, 0)),
  409. ([RI(-4, -8), RI(3, 5)], RI(3, 5)),
  410. ([RI(-4, -2), RI(3, 5)], Index([-4, -3, 3, 4])),
  411. ([RI(-2), RI(3, 5)], RI(3, 5)),
  412. ([RI(2), RI(2)], Index([0, 1, 0, 1])),
  413. ([RI(2), RI(2, 5), RI(5, 8, 4)], RI(0, 6)),
  414. ([RI(2), RI(3, 5), RI(5, 8, 4)], Index([0, 1, 3, 4, 5])),
  415. ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)),
  416. ([RI(3), Index([-1, 3, 15])], Index([0, 1, 2, -1, 3, 15])),
  417. ([RI(3), Index([-1, 3.1, 15.0])], Index([0, 1, 2, -1, 3.1, 15.0])),
  418. ([RI(3), Index(["a", None, 14])], Index([0, 1, 2, "a", None, 14])),
  419. ([RI(3, 1), Index(["a", None, 14])], Index(["a", None, 14])),
  420. ]
  421. )
  422. def appends(self, request):
  423. """Inputs and expected outputs for RangeIndex.append test"""
  424. return request.param
  425. def test_append(self, appends):
  426. # GH16212
  427. indices, expected = appends
  428. result = indices[0].append(indices[1:])
  429. tm.assert_index_equal(result, expected, exact=True)
  430. if len(indices) == 2:
  431. # Append single item rather than list
  432. result2 = indices[0].append(indices[1])
  433. tm.assert_index_equal(result2, expected, exact=True)
  434. def test_engineless_lookup(self):
  435. # GH 16685
  436. # Standard lookup on RangeIndex should not require the engine to be
  437. # created
  438. idx = RangeIndex(2, 10, 3)
  439. assert idx.get_loc(5) == 1
  440. tm.assert_numpy_array_equal(
  441. idx.get_indexer([2, 8]), ensure_platform_int(np.array([0, 2]))
  442. )
  443. with pytest.raises(KeyError, match="3"):
  444. idx.get_loc(3)
  445. assert "_engine" not in idx._cache
  446. # Different types of scalars can be excluded immediately, no need to
  447. # use the _engine
  448. with pytest.raises(KeyError, match="'a'"):
  449. idx.get_loc("a")
  450. assert "_engine" not in idx._cache
  451. def test_format_empty(self):
  452. # GH35712
  453. empty_idx = self._index_cls(0)
  454. assert empty_idx.format() == []
  455. assert empty_idx.format(name=True) == [""]
  456. @pytest.mark.parametrize(
  457. "RI",
  458. [
  459. RangeIndex(0, -1, -1),
  460. RangeIndex(0, 1, 1),
  461. RangeIndex(1, 3, 2),
  462. RangeIndex(0, -1, -2),
  463. RangeIndex(-3, -5, -2),
  464. ],
  465. )
  466. def test_append_len_one(self, RI):
  467. # GH39401
  468. result = RI.append([])
  469. tm.assert_index_equal(result, RI, exact=True)
  470. @pytest.mark.parametrize("base", [RangeIndex(0, 2), Index([0, 1])])
  471. def test_isin_range(self, base):
  472. # GH#41151
  473. values = RangeIndex(0, 1)
  474. result = base.isin(values)
  475. expected = np.array([True, False])
  476. tm.assert_numpy_array_equal(result, expected)
  477. def test_sort_values_key(self):
  478. # GH#43666
  479. sort_order = {8: 2, 6: 0, 4: 8, 2: 10, 0: 12}
  480. values = RangeIndex(0, 10, 2)
  481. result = values.sort_values(key=lambda x: x.map(sort_order))
  482. expected = Index([4, 8, 6, 0, 2], dtype="int64")
  483. tm.assert_index_equal(result, expected, check_exact=True)
  484. def test_cast_string(self, dtype):
  485. pytest.skip("casting of strings not relevant for RangeIndex")