range.py 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037
  1. from __future__ import annotations
  2. from datetime import timedelta
  3. import operator
  4. from sys import getsizeof
  5. from typing import (
  6. Any,
  7. Callable,
  8. Hashable,
  9. Iterator,
  10. List,
  11. cast,
  12. )
  13. import numpy as np
  14. from pandas._libs import (
  15. index as libindex,
  16. lib,
  17. )
  18. from pandas._libs.algos import unique_deltas
  19. from pandas._libs.lib import no_default
  20. from pandas._typing import (
  21. Dtype,
  22. npt,
  23. )
  24. from pandas.compat.numpy import function as nv
  25. from pandas.util._decorators import (
  26. cache_readonly,
  27. doc,
  28. )
  29. from pandas.core.dtypes.common import (
  30. ensure_platform_int,
  31. ensure_python_int,
  32. is_float,
  33. is_integer,
  34. is_scalar,
  35. is_signed_integer_dtype,
  36. is_timedelta64_dtype,
  37. )
  38. from pandas.core.dtypes.generic import ABCTimedeltaIndex
  39. from pandas.core import ops
  40. import pandas.core.common as com
  41. from pandas.core.construction import extract_array
  42. import pandas.core.indexes.base as ibase
  43. from pandas.core.indexes.base import (
  44. Index,
  45. maybe_extract_name,
  46. )
  47. from pandas.core.ops.common import unpack_zerodim_and_defer
  48. _empty_range = range(0)
  49. class RangeIndex(Index):
  50. """
  51. Immutable Index implementing a monotonic integer range.
  52. RangeIndex is a memory-saving special case of an Index limited to representing
  53. monotonic ranges with a 64-bit dtype. Using RangeIndex may in some instances
  54. improve computing speed.
  55. This is the default index type used
  56. by DataFrame and Series when no explicit index is provided by the user.
  57. Parameters
  58. ----------
  59. start : int (default: 0), range, or other RangeIndex instance
  60. If int and "stop" is not given, interpreted as "stop" instead.
  61. stop : int (default: 0)
  62. step : int (default: 1)
  63. dtype : np.int64
  64. Unused, accepted for homogeneity with other index types.
  65. copy : bool, default False
  66. Unused, accepted for homogeneity with other index types.
  67. name : object, optional
  68. Name to be stored in the index.
  69. Attributes
  70. ----------
  71. start
  72. stop
  73. step
  74. Methods
  75. -------
  76. from_range
  77. See Also
  78. --------
  79. Index : The base pandas Index type.
  80. """
  81. _typ = "rangeindex"
  82. _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer")
  83. _range: range
  84. _values: np.ndarray
  85. @property
  86. def _engine_type(self) -> type[libindex.Int64Engine]:
  87. return libindex.Int64Engine
  88. # --------------------------------------------------------------------
  89. # Constructors
  90. def __new__(
  91. cls,
  92. start=None,
  93. stop=None,
  94. step=None,
  95. dtype: Dtype | None = None,
  96. copy: bool = False,
  97. name: Hashable = None,
  98. ) -> RangeIndex:
  99. cls._validate_dtype(dtype)
  100. name = maybe_extract_name(name, start, cls)
  101. # RangeIndex
  102. if isinstance(start, RangeIndex):
  103. return start.copy(name=name)
  104. elif isinstance(start, range):
  105. return cls._simple_new(start, name=name)
  106. # validate the arguments
  107. if com.all_none(start, stop, step):
  108. raise TypeError("RangeIndex(...) must be called with integers")
  109. start = ensure_python_int(start) if start is not None else 0
  110. if stop is None:
  111. start, stop = 0, start
  112. else:
  113. stop = ensure_python_int(stop)
  114. step = ensure_python_int(step) if step is not None else 1
  115. if step == 0:
  116. raise ValueError("Step must not be zero")
  117. rng = range(start, stop, step)
  118. return cls._simple_new(rng, name=name)
  119. @classmethod
  120. def from_range(
  121. cls, data: range, name=None, dtype: Dtype | None = None
  122. ) -> RangeIndex:
  123. """
  124. Create RangeIndex from a range object.
  125. Returns
  126. -------
  127. RangeIndex
  128. """
  129. if not isinstance(data, range):
  130. raise TypeError(
  131. f"{cls.__name__}(...) must be called with object coercible to a "
  132. f"range, {repr(data)} was passed"
  133. )
  134. cls._validate_dtype(dtype)
  135. return cls._simple_new(data, name=name)
  136. # error: Argument 1 of "_simple_new" is incompatible with supertype "Index";
  137. # supertype defines the argument type as
  138. # "Union[ExtensionArray, ndarray[Any, Any]]" [override]
  139. @classmethod
  140. def _simple_new( # type: ignore[override]
  141. cls, values: range, name: Hashable = None
  142. ) -> RangeIndex:
  143. result = object.__new__(cls)
  144. assert isinstance(values, range)
  145. result._range = values
  146. result._name = name
  147. result._cache = {}
  148. result._reset_identity()
  149. result._references = None
  150. return result
  151. @classmethod
  152. def _validate_dtype(cls, dtype: Dtype | None) -> None:
  153. if dtype is None:
  154. return
  155. validation_func, expected = cls._dtype_validation_metadata
  156. if not validation_func(dtype):
  157. raise ValueError(
  158. f"Incorrect `dtype` passed: expected {expected}, received {dtype}"
  159. )
  160. # --------------------------------------------------------------------
  161. # error: Return type "Type[Index]" of "_constructor" incompatible with return
  162. # type "Type[RangeIndex]" in supertype "Index"
  163. @cache_readonly
  164. def _constructor(self) -> type[Index]: # type: ignore[override]
  165. """return the class to use for construction"""
  166. return Index
  167. # error: Signature of "_data" incompatible with supertype "Index"
  168. @cache_readonly
  169. def _data(self) -> np.ndarray: # type: ignore[override]
  170. """
  171. An int array that for performance reasons is created only when needed.
  172. The constructed array is saved in ``_cache``.
  173. """
  174. return np.arange(self.start, self.stop, self.step, dtype=np.int64)
  175. def _get_data_as_items(self):
  176. """return a list of tuples of start, stop, step"""
  177. rng = self._range
  178. return [("start", rng.start), ("stop", rng.stop), ("step", rng.step)]
  179. def __reduce__(self):
  180. d = {"name": self.name}
  181. d.update(dict(self._get_data_as_items()))
  182. return ibase._new_Index, (type(self), d), None
  183. # --------------------------------------------------------------------
  184. # Rendering Methods
  185. def _format_attrs(self):
  186. """
  187. Return a list of tuples of the (attr, formatted_value)
  188. """
  189. attrs = self._get_data_as_items()
  190. if self.name is not None:
  191. attrs.append(("name", ibase.default_pprint(self.name)))
  192. return attrs
  193. def _format_data(self, name=None):
  194. # we are formatting thru the attributes
  195. return None
  196. def _format_with_header(self, header: list[str], na_rep: str) -> list[str]:
  197. # Equivalent to Index implementation, but faster
  198. if not len(self._range):
  199. return header
  200. first_val_str = str(self._range[0])
  201. last_val_str = str(self._range[-1])
  202. max_length = max(len(first_val_str), len(last_val_str))
  203. return header + [f"{x:<{max_length}}" for x in self._range]
  204. # --------------------------------------------------------------------
  205. @property
  206. def start(self) -> int:
  207. """
  208. The value of the `start` parameter (``0`` if this was not supplied).
  209. """
  210. # GH 25710
  211. return self._range.start
  212. @property
  213. def stop(self) -> int:
  214. """
  215. The value of the `stop` parameter.
  216. """
  217. return self._range.stop
  218. @property
  219. def step(self) -> int:
  220. """
  221. The value of the `step` parameter (``1`` if this was not supplied).
  222. """
  223. # GH 25710
  224. return self._range.step
  225. @cache_readonly
  226. def nbytes(self) -> int:
  227. """
  228. Return the number of bytes in the underlying data.
  229. """
  230. rng = self._range
  231. return getsizeof(rng) + sum(
  232. getsizeof(getattr(rng, attr_name))
  233. for attr_name in ["start", "stop", "step"]
  234. )
  235. def memory_usage(self, deep: bool = False) -> int:
  236. """
  237. Memory usage of my values
  238. Parameters
  239. ----------
  240. deep : bool
  241. Introspect the data deeply, interrogate
  242. `object` dtypes for system-level memory consumption
  243. Returns
  244. -------
  245. bytes used
  246. Notes
  247. -----
  248. Memory usage does not include memory consumed by elements that
  249. are not components of the array if deep=False
  250. See Also
  251. --------
  252. numpy.ndarray.nbytes
  253. """
  254. return self.nbytes
  255. @property
  256. def dtype(self) -> np.dtype:
  257. return np.dtype(np.int64)
  258. @property
  259. def is_unique(self) -> bool:
  260. """return if the index has unique values"""
  261. return True
  262. @cache_readonly
  263. def is_monotonic_increasing(self) -> bool:
  264. return self._range.step > 0 or len(self) <= 1
  265. @cache_readonly
  266. def is_monotonic_decreasing(self) -> bool:
  267. return self._range.step < 0 or len(self) <= 1
  268. def __contains__(self, key: Any) -> bool:
  269. hash(key)
  270. try:
  271. key = ensure_python_int(key)
  272. except TypeError:
  273. return False
  274. return key in self._range
  275. @property
  276. def inferred_type(self) -> str:
  277. return "integer"
  278. # --------------------------------------------------------------------
  279. # Indexing Methods
  280. @doc(Index.get_loc)
  281. def get_loc(self, key):
  282. if is_integer(key) or (is_float(key) and key.is_integer()):
  283. new_key = int(key)
  284. try:
  285. return self._range.index(new_key)
  286. except ValueError as err:
  287. raise KeyError(key) from err
  288. if isinstance(key, Hashable):
  289. raise KeyError(key)
  290. self._check_indexing_error(key)
  291. raise KeyError(key)
  292. def _get_indexer(
  293. self,
  294. target: Index,
  295. method: str | None = None,
  296. limit: int | None = None,
  297. tolerance=None,
  298. ) -> npt.NDArray[np.intp]:
  299. if com.any_not_none(method, tolerance, limit):
  300. return super()._get_indexer(
  301. target, method=method, tolerance=tolerance, limit=limit
  302. )
  303. if self.step > 0:
  304. start, stop, step = self.start, self.stop, self.step
  305. else:
  306. # GH 28678: work on reversed range for simplicity
  307. reverse = self._range[::-1]
  308. start, stop, step = reverse.start, reverse.stop, reverse.step
  309. target_array = np.asarray(target)
  310. locs = target_array - start
  311. valid = (locs % step == 0) & (locs >= 0) & (target_array < stop)
  312. locs[~valid] = -1
  313. locs[valid] = locs[valid] / step
  314. if step != self.step:
  315. # We reversed this range: transform to original locs
  316. locs[valid] = len(self) - 1 - locs[valid]
  317. return ensure_platform_int(locs)
  318. @cache_readonly
  319. def _should_fallback_to_positional(self) -> bool:
  320. """
  321. Should an integer key be treated as positional?
  322. """
  323. return False
  324. # --------------------------------------------------------------------
  325. def tolist(self) -> list[int]:
  326. return list(self._range)
  327. @doc(Index.__iter__)
  328. def __iter__(self) -> Iterator[int]:
  329. yield from self._range
  330. @doc(Index._shallow_copy)
  331. def _shallow_copy(self, values, name: Hashable = no_default):
  332. name = self.name if name is no_default else name
  333. if values.dtype.kind == "f":
  334. return Index(values, name=name, dtype=np.float64)
  335. # GH 46675 & 43885: If values is equally spaced, return a
  336. # more memory-compact RangeIndex instead of Index with 64-bit dtype
  337. unique_diffs = unique_deltas(values)
  338. if len(unique_diffs) == 1 and unique_diffs[0] != 0:
  339. diff = unique_diffs[0]
  340. new_range = range(values[0], values[-1] + diff, diff)
  341. return type(self)._simple_new(new_range, name=name)
  342. else:
  343. return self._constructor._simple_new(values, name=name)
  344. def _view(self: RangeIndex) -> RangeIndex:
  345. result = type(self)._simple_new(self._range, name=self._name)
  346. result._cache = self._cache
  347. return result
  348. @doc(Index.copy)
  349. def copy(self, name: Hashable = None, deep: bool = False):
  350. name = self._validate_names(name=name, deep=deep)[0]
  351. new_index = self._rename(name=name)
  352. return new_index
  353. def _minmax(self, meth: str):
  354. no_steps = len(self) - 1
  355. if no_steps == -1:
  356. return np.nan
  357. elif (meth == "min" and self.step > 0) or (meth == "max" and self.step < 0):
  358. return self.start
  359. return self.start + self.step * no_steps
  360. def min(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
  361. """The minimum value of the RangeIndex"""
  362. nv.validate_minmax_axis(axis)
  363. nv.validate_min(args, kwargs)
  364. return self._minmax("min")
  365. def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
  366. """The maximum value of the RangeIndex"""
  367. nv.validate_minmax_axis(axis)
  368. nv.validate_max(args, kwargs)
  369. return self._minmax("max")
  370. def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
  371. """
  372. Returns the indices that would sort the index and its
  373. underlying data.
  374. Returns
  375. -------
  376. np.ndarray[np.intp]
  377. See Also
  378. --------
  379. numpy.ndarray.argsort
  380. """
  381. ascending = kwargs.pop("ascending", True) # EA compat
  382. kwargs.pop("kind", None) # e.g. "mergesort" is irrelevant
  383. nv.validate_argsort(args, kwargs)
  384. if self._range.step > 0:
  385. result = np.arange(len(self), dtype=np.intp)
  386. else:
  387. result = np.arange(len(self) - 1, -1, -1, dtype=np.intp)
  388. if not ascending:
  389. result = result[::-1]
  390. return result
  391. def factorize(
  392. self,
  393. sort: bool = False,
  394. use_na_sentinel: bool = True,
  395. ) -> tuple[npt.NDArray[np.intp], RangeIndex]:
  396. codes = np.arange(len(self), dtype=np.intp)
  397. uniques = self
  398. if sort and self.step < 0:
  399. codes = codes[::-1]
  400. uniques = uniques[::-1]
  401. return codes, uniques
  402. def equals(self, other: object) -> bool:
  403. """
  404. Determines if two Index objects contain the same elements.
  405. """
  406. if isinstance(other, RangeIndex):
  407. return self._range == other._range
  408. return super().equals(other)
  409. def sort_values(
  410. self,
  411. return_indexer: bool = False,
  412. ascending: bool = True,
  413. na_position: str = "last",
  414. key: Callable | None = None,
  415. ):
  416. if key is not None:
  417. return super().sort_values(
  418. return_indexer=return_indexer,
  419. ascending=ascending,
  420. na_position=na_position,
  421. key=key,
  422. )
  423. else:
  424. sorted_index = self
  425. inverse_indexer = False
  426. if ascending:
  427. if self.step < 0:
  428. sorted_index = self[::-1]
  429. inverse_indexer = True
  430. else:
  431. if self.step > 0:
  432. sorted_index = self[::-1]
  433. inverse_indexer = True
  434. if return_indexer:
  435. if inverse_indexer:
  436. rng = range(len(self) - 1, -1, -1)
  437. else:
  438. rng = range(len(self))
  439. return sorted_index, RangeIndex(rng)
  440. else:
  441. return sorted_index
  442. # --------------------------------------------------------------------
  443. # Set Operations
  444. def _intersection(self, other: Index, sort: bool = False):
  445. # caller is responsible for checking self and other are both non-empty
  446. if not isinstance(other, RangeIndex):
  447. return super()._intersection(other, sort=sort)
  448. first = self._range[::-1] if self.step < 0 else self._range
  449. second = other._range[::-1] if other.step < 0 else other._range
  450. # check whether intervals intersect
  451. # deals with in- and decreasing ranges
  452. int_low = max(first.start, second.start)
  453. int_high = min(first.stop, second.stop)
  454. if int_high <= int_low:
  455. return self._simple_new(_empty_range)
  456. # Method hint: linear Diophantine equation
  457. # solve intersection problem
  458. # performance hint: for identical step sizes, could use
  459. # cheaper alternative
  460. gcd, s, _ = self._extended_gcd(first.step, second.step)
  461. # check whether element sets intersect
  462. if (first.start - second.start) % gcd:
  463. return self._simple_new(_empty_range)
  464. # calculate parameters for the RangeIndex describing the
  465. # intersection disregarding the lower bounds
  466. tmp_start = first.start + (second.start - first.start) * first.step // gcd * s
  467. new_step = first.step * second.step // gcd
  468. new_range = range(tmp_start, int_high, new_step)
  469. new_index = self._simple_new(new_range)
  470. # adjust index to limiting interval
  471. new_start = new_index._min_fitting_element(int_low)
  472. new_range = range(new_start, new_index.stop, new_index.step)
  473. new_index = self._simple_new(new_range)
  474. if (self.step < 0 and other.step < 0) is not (new_index.step < 0):
  475. new_index = new_index[::-1]
  476. if sort is None:
  477. new_index = new_index.sort_values()
  478. return new_index
  479. def _min_fitting_element(self, lower_limit: int) -> int:
  480. """Returns the smallest element greater than or equal to the limit"""
  481. no_steps = -(-(lower_limit - self.start) // abs(self.step))
  482. return self.start + abs(self.step) * no_steps
  483. def _extended_gcd(self, a: int, b: int) -> tuple[int, int, int]:
  484. """
  485. Extended Euclidean algorithms to solve Bezout's identity:
  486. a*x + b*y = gcd(x, y)
  487. Finds one particular solution for x, y: s, t
  488. Returns: gcd, s, t
  489. """
  490. s, old_s = 0, 1
  491. t, old_t = 1, 0
  492. r, old_r = b, a
  493. while r:
  494. quotient = old_r // r
  495. old_r, r = r, old_r - quotient * r
  496. old_s, s = s, old_s - quotient * s
  497. old_t, t = t, old_t - quotient * t
  498. return old_r, old_s, old_t
  499. def _range_in_self(self, other: range) -> bool:
  500. """Check if other range is contained in self"""
  501. # https://stackoverflow.com/a/32481015
  502. if not other:
  503. return True
  504. if not self._range:
  505. return False
  506. if len(other) > 1 and other.step % self._range.step:
  507. return False
  508. return other.start in self._range and other[-1] in self._range
  509. def _union(self, other: Index, sort: bool | None):
  510. """
  511. Form the union of two Index objects and sorts if possible
  512. Parameters
  513. ----------
  514. other : Index or array-like
  515. sort : bool or None, default None
  516. Whether to sort (monotonically increasing) the resulting index.
  517. ``sort=None|True`` returns a ``RangeIndex`` if possible or a sorted
  518. ``Index`` with a int64 dtype if not.
  519. ``sort=False`` can return a ``RangeIndex`` if self is monotonically
  520. increasing and other is fully contained in self. Otherwise, returns
  521. an unsorted ``Index`` with an int64 dtype.
  522. Returns
  523. -------
  524. union : Index
  525. """
  526. if isinstance(other, RangeIndex):
  527. if sort in (None, True) or (
  528. sort is False and self.step > 0 and self._range_in_self(other._range)
  529. ):
  530. # GH 47557: Can still return a RangeIndex
  531. # if other range in self and sort=False
  532. start_s, step_s = self.start, self.step
  533. end_s = self.start + self.step * (len(self) - 1)
  534. start_o, step_o = other.start, other.step
  535. end_o = other.start + other.step * (len(other) - 1)
  536. if self.step < 0:
  537. start_s, step_s, end_s = end_s, -step_s, start_s
  538. if other.step < 0:
  539. start_o, step_o, end_o = end_o, -step_o, start_o
  540. if len(self) == 1 and len(other) == 1:
  541. step_s = step_o = abs(self.start - other.start)
  542. elif len(self) == 1:
  543. step_s = step_o
  544. elif len(other) == 1:
  545. step_o = step_s
  546. start_r = min(start_s, start_o)
  547. end_r = max(end_s, end_o)
  548. if step_o == step_s:
  549. if (
  550. (start_s - start_o) % step_s == 0
  551. and (start_s - end_o) <= step_s
  552. and (start_o - end_s) <= step_s
  553. ):
  554. return type(self)(start_r, end_r + step_s, step_s)
  555. if (
  556. (step_s % 2 == 0)
  557. and (abs(start_s - start_o) == step_s / 2)
  558. and (abs(end_s - end_o) == step_s / 2)
  559. ):
  560. # e.g. range(0, 10, 2) and range(1, 11, 2)
  561. # but not range(0, 20, 4) and range(1, 21, 4) GH#44019
  562. return type(self)(start_r, end_r + step_s / 2, step_s / 2)
  563. elif step_o % step_s == 0:
  564. if (
  565. (start_o - start_s) % step_s == 0
  566. and (start_o + step_s >= start_s)
  567. and (end_o - step_s <= end_s)
  568. ):
  569. return type(self)(start_r, end_r + step_s, step_s)
  570. elif step_s % step_o == 0:
  571. if (
  572. (start_s - start_o) % step_o == 0
  573. and (start_s + step_o >= start_o)
  574. and (end_s - step_o <= end_o)
  575. ):
  576. return type(self)(start_r, end_r + step_o, step_o)
  577. return super()._union(other, sort=sort)
  578. def _difference(self, other, sort=None):
  579. # optimized set operation if we have another RangeIndex
  580. self._validate_sort_keyword(sort)
  581. self._assert_can_do_setop(other)
  582. other, result_name = self._convert_can_do_setop(other)
  583. if not isinstance(other, RangeIndex):
  584. return super()._difference(other, sort=sort)
  585. if sort is not False and self.step < 0:
  586. return self[::-1]._difference(other)
  587. res_name = ops.get_op_result_name(self, other)
  588. first = self._range[::-1] if self.step < 0 else self._range
  589. overlap = self.intersection(other)
  590. if overlap.step < 0:
  591. overlap = overlap[::-1]
  592. if len(overlap) == 0:
  593. return self.rename(name=res_name)
  594. if len(overlap) == len(self):
  595. return self[:0].rename(res_name)
  596. # overlap.step will always be a multiple of self.step (see _intersection)
  597. if len(overlap) == 1:
  598. if overlap[0] == self[0]:
  599. return self[1:]
  600. elif overlap[0] == self[-1]:
  601. return self[:-1]
  602. elif len(self) == 3 and overlap[0] == self[1]:
  603. return self[::2]
  604. else:
  605. return super()._difference(other, sort=sort)
  606. elif len(overlap) == 2 and overlap[0] == first[0] and overlap[-1] == first[-1]:
  607. # e.g. range(-8, 20, 7) and range(13, -9, -3)
  608. return self[1:-1]
  609. if overlap.step == first.step:
  610. if overlap[0] == first.start:
  611. # The difference is everything after the intersection
  612. new_rng = range(overlap[-1] + first.step, first.stop, first.step)
  613. elif overlap[-1] == first[-1]:
  614. # The difference is everything before the intersection
  615. new_rng = range(first.start, overlap[0], first.step)
  616. elif overlap._range == first[1:-1]:
  617. # e.g. range(4) and range(1, 3)
  618. step = len(first) - 1
  619. new_rng = first[::step]
  620. else:
  621. # The difference is not range-like
  622. # e.g. range(1, 10, 1) and range(3, 7, 1)
  623. return super()._difference(other, sort=sort)
  624. else:
  625. # We must have len(self) > 1, bc we ruled out above
  626. # len(overlap) == 0 and len(overlap) == len(self)
  627. assert len(self) > 1
  628. if overlap.step == first.step * 2:
  629. if overlap[0] == first[0] and overlap[-1] in (first[-1], first[-2]):
  630. # e.g. range(1, 10, 1) and range(1, 10, 2)
  631. new_rng = first[1::2]
  632. elif overlap[0] == first[1] and overlap[-1] in (first[-1], first[-2]):
  633. # e.g. range(1, 10, 1) and range(2, 10, 2)
  634. new_rng = first[::2]
  635. else:
  636. # We can get here with e.g. range(20) and range(0, 10, 2)
  637. return super()._difference(other, sort=sort)
  638. else:
  639. # e.g. range(10) and range(0, 10, 3)
  640. return super()._difference(other, sort=sort)
  641. new_index = type(self)._simple_new(new_rng, name=res_name)
  642. if first is not self._range:
  643. new_index = new_index[::-1]
  644. return new_index
  645. def symmetric_difference(self, other, result_name: Hashable = None, sort=None):
  646. if not isinstance(other, RangeIndex) or sort is not None:
  647. return super().symmetric_difference(other, result_name, sort)
  648. left = self.difference(other)
  649. right = other.difference(self)
  650. result = left.union(right)
  651. if result_name is not None:
  652. result = result.rename(result_name)
  653. return result
  654. # --------------------------------------------------------------------
  655. # error: Return type "Index" of "delete" incompatible with return type
  656. # "RangeIndex" in supertype "Index"
  657. def delete(self, loc) -> Index: # type: ignore[override]
  658. # In some cases we can retain RangeIndex, see also
  659. # DatetimeTimedeltaMixin._get_delete_Freq
  660. if is_integer(loc):
  661. if loc in (0, -len(self)):
  662. return self[1:]
  663. if loc in (-1, len(self) - 1):
  664. return self[:-1]
  665. if len(self) == 3 and loc in (1, -2):
  666. return self[::2]
  667. elif lib.is_list_like(loc):
  668. slc = lib.maybe_indices_to_slice(np.asarray(loc, dtype=np.intp), len(self))
  669. if isinstance(slc, slice):
  670. # defer to RangeIndex._difference, which is optimized to return
  671. # a RangeIndex whenever possible
  672. other = self[slc]
  673. return self.difference(other, sort=False)
  674. return super().delete(loc)
  675. def insert(self, loc: int, item) -> Index:
  676. if len(self) and (is_integer(item) or is_float(item)):
  677. # We can retain RangeIndex is inserting at the beginning or end,
  678. # or right in the middle.
  679. rng = self._range
  680. if loc == 0 and item == self[0] - self.step:
  681. new_rng = range(rng.start - rng.step, rng.stop, rng.step)
  682. return type(self)._simple_new(new_rng, name=self.name)
  683. elif loc == len(self) and item == self[-1] + self.step:
  684. new_rng = range(rng.start, rng.stop + rng.step, rng.step)
  685. return type(self)._simple_new(new_rng, name=self.name)
  686. elif len(self) == 2 and item == self[0] + self.step / 2:
  687. # e.g. inserting 1 into [0, 2]
  688. step = int(self.step / 2)
  689. new_rng = range(self.start, self.stop, step)
  690. return type(self)._simple_new(new_rng, name=self.name)
  691. return super().insert(loc, item)
  692. def _concat(self, indexes: list[Index], name: Hashable) -> Index:
  693. """
  694. Overriding parent method for the case of all RangeIndex instances.
  695. When all members of "indexes" are of type RangeIndex: result will be
  696. RangeIndex if possible, Index with a int64 dtype otherwise. E.g.:
  697. indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
  698. indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Index([0,1,2,4,5], dtype='int64')
  699. """
  700. if not all(isinstance(x, RangeIndex) for x in indexes):
  701. return super()._concat(indexes, name)
  702. elif len(indexes) == 1:
  703. return indexes[0]
  704. rng_indexes = cast(List[RangeIndex], indexes)
  705. start = step = next_ = None
  706. # Filter the empty indexes
  707. non_empty_indexes = [obj for obj in rng_indexes if len(obj)]
  708. for obj in non_empty_indexes:
  709. rng = obj._range
  710. if start is None:
  711. # This is set by the first non-empty index
  712. start = rng.start
  713. if step is None and len(rng) > 1:
  714. step = rng.step
  715. elif step is None:
  716. # First non-empty index had only one element
  717. if rng.start == start:
  718. values = np.concatenate([x._values for x in rng_indexes])
  719. result = self._constructor(values)
  720. return result.rename(name)
  721. step = rng.start - start
  722. non_consecutive = (step != rng.step and len(rng) > 1) or (
  723. next_ is not None and rng.start != next_
  724. )
  725. if non_consecutive:
  726. result = self._constructor(
  727. np.concatenate([x._values for x in rng_indexes])
  728. )
  729. return result.rename(name)
  730. if step is not None:
  731. next_ = rng[-1] + step
  732. if non_empty_indexes:
  733. # Get the stop value from "next" or alternatively
  734. # from the last non-empty index
  735. stop = non_empty_indexes[-1].stop if next_ is None else next_
  736. return RangeIndex(start, stop, step).rename(name)
  737. # Here all "indexes" had 0 length, i.e. were empty.
  738. # In this case return an empty range index.
  739. return RangeIndex(0, 0).rename(name)
  740. def __len__(self) -> int:
  741. """
  742. return the length of the RangeIndex
  743. """
  744. return len(self._range)
  745. @property
  746. def size(self) -> int:
  747. return len(self)
  748. def __getitem__(self, key):
  749. """
  750. Conserve RangeIndex type for scalar and slice keys.
  751. """
  752. if isinstance(key, slice):
  753. new_range = self._range[key]
  754. return self._simple_new(new_range, name=self._name)
  755. elif is_integer(key):
  756. new_key = int(key)
  757. try:
  758. return self._range[new_key]
  759. except IndexError as err:
  760. raise IndexError(
  761. f"index {key} is out of bounds for axis 0 with size {len(self)}"
  762. ) from err
  763. elif is_scalar(key):
  764. raise IndexError(
  765. "only integers, slices (`:`), "
  766. "ellipsis (`...`), numpy.newaxis (`None`) "
  767. "and integer or boolean "
  768. "arrays are valid indices"
  769. )
  770. return super().__getitem__(key)
  771. def _getitem_slice(self: RangeIndex, slobj: slice) -> RangeIndex:
  772. """
  773. Fastpath for __getitem__ when we know we have a slice.
  774. """
  775. res = self._range[slobj]
  776. return type(self)._simple_new(res, name=self._name)
  777. @unpack_zerodim_and_defer("__floordiv__")
  778. def __floordiv__(self, other):
  779. if is_integer(other) and other != 0:
  780. if len(self) == 0 or self.start % other == 0 and self.step % other == 0:
  781. start = self.start // other
  782. step = self.step // other
  783. stop = start + len(self) * step
  784. new_range = range(start, stop, step or 1)
  785. return self._simple_new(new_range, name=self.name)
  786. if len(self) == 1:
  787. start = self.start // other
  788. new_range = range(start, start + 1, 1)
  789. return self._simple_new(new_range, name=self.name)
  790. return super().__floordiv__(other)
  791. # --------------------------------------------------------------------
  792. # Reductions
  793. def all(self, *args, **kwargs) -> bool:
  794. return 0 not in self._range
  795. def any(self, *args, **kwargs) -> bool:
  796. return any(self._range)
  797. # --------------------------------------------------------------------
  798. def _cmp_method(self, other, op):
  799. if isinstance(other, RangeIndex) and self._range == other._range:
  800. # Both are immutable so if ._range attr. are equal, shortcut is possible
  801. return super()._cmp_method(self, op)
  802. return super()._cmp_method(other, op)
  803. def _arith_method(self, other, op):
  804. """
  805. Parameters
  806. ----------
  807. other : Any
  808. op : callable that accepts 2 params
  809. perform the binary op
  810. """
  811. if isinstance(other, ABCTimedeltaIndex):
  812. # Defer to TimedeltaIndex implementation
  813. return NotImplemented
  814. elif isinstance(other, (timedelta, np.timedelta64)):
  815. # GH#19333 is_integer evaluated True on timedelta64,
  816. # so we need to catch these explicitly
  817. return super()._arith_method(other, op)
  818. elif is_timedelta64_dtype(other):
  819. # Must be an np.ndarray; GH#22390
  820. return super()._arith_method(other, op)
  821. if op in [
  822. operator.pow,
  823. ops.rpow,
  824. operator.mod,
  825. ops.rmod,
  826. operator.floordiv,
  827. ops.rfloordiv,
  828. divmod,
  829. ops.rdivmod,
  830. ]:
  831. return super()._arith_method(other, op)
  832. step: Callable | None = None
  833. if op in [operator.mul, ops.rmul, operator.truediv, ops.rtruediv]:
  834. step = op
  835. # TODO: if other is a RangeIndex we may have more efficient options
  836. right = extract_array(other, extract_numpy=True, extract_range=True)
  837. left = self
  838. try:
  839. # apply if we have an override
  840. if step:
  841. with np.errstate(all="ignore"):
  842. rstep = step(left.step, right)
  843. # we don't have a representable op
  844. # so return a base index
  845. if not is_integer(rstep) or not rstep:
  846. raise ValueError
  847. else:
  848. rstep = left.step
  849. with np.errstate(all="ignore"):
  850. rstart = op(left.start, right)
  851. rstop = op(left.stop, right)
  852. res_name = ops.get_op_result_name(self, other)
  853. result = type(self)(rstart, rstop, rstep, name=res_name)
  854. # for compat with numpy / Index with int64 dtype
  855. # even if we can represent as a RangeIndex, return
  856. # as a float64 Index if we have float-like descriptors
  857. if not all(is_integer(x) for x in [rstart, rstop, rstep]):
  858. result = result.astype("float64")
  859. return result
  860. except (ValueError, TypeError, ZeroDivisionError):
  861. # test_arithmetic_explicit_conversions
  862. return super()._arith_method(other, op)