interval.py 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796
  1. from __future__ import annotations
  2. import operator
  3. from operator import (
  4. le,
  5. lt,
  6. )
  7. import textwrap
  8. from typing import (
  9. TYPE_CHECKING,
  10. Iterator,
  11. Literal,
  12. Sequence,
  13. TypeVar,
  14. Union,
  15. cast,
  16. overload,
  17. )
  18. import numpy as np
  19. from pandas._config import get_option
  20. from pandas._libs import lib
  21. from pandas._libs.interval import (
  22. VALID_CLOSED,
  23. Interval,
  24. IntervalMixin,
  25. intervals_to_interval_bounds,
  26. )
  27. from pandas._libs.missing import NA
  28. from pandas._typing import (
  29. ArrayLike,
  30. AxisInt,
  31. Dtype,
  32. IntervalClosedType,
  33. NpDtype,
  34. PositionalIndexer,
  35. ScalarIndexer,
  36. SequenceIndexer,
  37. SortKind,
  38. TimeArrayLike,
  39. npt,
  40. )
  41. from pandas.compat.numpy import function as nv
  42. from pandas.errors import IntCastingNaNError
  43. from pandas.util._decorators import Appender
  44. from pandas.core.dtypes.cast import (
  45. LossySetitemError,
  46. maybe_upcast_numeric_to_64bit,
  47. )
  48. from pandas.core.dtypes.common import (
  49. is_categorical_dtype,
  50. is_dtype_equal,
  51. is_float_dtype,
  52. is_integer_dtype,
  53. is_interval_dtype,
  54. is_list_like,
  55. is_object_dtype,
  56. is_scalar,
  57. is_string_dtype,
  58. needs_i8_conversion,
  59. pandas_dtype,
  60. )
  61. from pandas.core.dtypes.dtypes import IntervalDtype
  62. from pandas.core.dtypes.generic import (
  63. ABCDataFrame,
  64. ABCDatetimeIndex,
  65. ABCIntervalIndex,
  66. ABCPeriodIndex,
  67. )
  68. from pandas.core.dtypes.missing import (
  69. is_valid_na_for_dtype,
  70. isna,
  71. notna,
  72. )
  73. from pandas.core.algorithms import (
  74. isin,
  75. take,
  76. unique,
  77. value_counts,
  78. )
  79. from pandas.core.arrays.base import (
  80. ExtensionArray,
  81. _extension_array_shared_docs,
  82. )
  83. from pandas.core.arrays.datetimes import DatetimeArray
  84. from pandas.core.arrays.timedeltas import TimedeltaArray
  85. import pandas.core.common as com
  86. from pandas.core.construction import (
  87. array as pd_array,
  88. ensure_wrapped_if_datetimelike,
  89. extract_array,
  90. )
  91. from pandas.core.indexers import check_array_indexer
  92. from pandas.core.ops import (
  93. invalid_comparison,
  94. unpack_zerodim_and_defer,
  95. )
  96. if TYPE_CHECKING:
  97. from pandas import (
  98. Index,
  99. Series,
  100. )
  101. IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")
  102. IntervalSideT = Union[TimeArrayLike, np.ndarray]
  103. IntervalOrNA = Union[Interval, float]
  104. _interval_shared_docs: dict[str, str] = {}
  105. _shared_docs_kwargs = {
  106. "klass": "IntervalArray",
  107. "qualname": "arrays.IntervalArray",
  108. "name": "",
  109. }
  110. _interval_shared_docs[
  111. "class"
  112. ] = """
  113. %(summary)s
  114. .. versionadded:: %(versionadded)s
  115. Parameters
  116. ----------
  117. data : array-like (1-dimensional)
  118. Array-like (ndarray, :class:`DateTimeArray`, :class:`TimeDeltaArray`) containing
  119. Interval objects from which to build the %(klass)s.
  120. closed : {'left', 'right', 'both', 'neither'}, default 'right'
  121. Whether the intervals are closed on the left-side, right-side, both or
  122. neither.
  123. dtype : dtype or None, default None
  124. If None, dtype will be inferred.
  125. copy : bool, default False
  126. Copy the input data.
  127. %(name)s\
  128. verify_integrity : bool, default True
  129. Verify that the %(klass)s is valid.
  130. Attributes
  131. ----------
  132. left
  133. right
  134. closed
  135. mid
  136. length
  137. is_empty
  138. is_non_overlapping_monotonic
  139. %(extra_attributes)s\
  140. Methods
  141. -------
  142. from_arrays
  143. from_tuples
  144. from_breaks
  145. contains
  146. overlaps
  147. set_closed
  148. to_tuples
  149. %(extra_methods)s\
  150. See Also
  151. --------
  152. Index : The base pandas Index type.
  153. Interval : A bounded slice-like interval; the elements of an %(klass)s.
  154. interval_range : Function to create a fixed frequency IntervalIndex.
  155. cut : Bin values into discrete Intervals.
  156. qcut : Bin values into equal-sized Intervals based on rank or sample quantiles.
  157. Notes
  158. -----
  159. See the `user guide
  160. <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__
  161. for more.
  162. %(examples)s\
  163. """
  164. @Appender(
  165. _interval_shared_docs["class"]
  166. % {
  167. "klass": "IntervalArray",
  168. "summary": "Pandas array for interval data that are closed on the same side.",
  169. "versionadded": "0.24.0",
  170. "name": "",
  171. "extra_attributes": "",
  172. "extra_methods": "",
  173. "examples": textwrap.dedent(
  174. """\
  175. Examples
  176. --------
  177. A new ``IntervalArray`` can be constructed directly from an array-like of
  178. ``Interval`` objects:
  179. >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
  180. <IntervalArray>
  181. [(0, 1], (1, 5]]
  182. Length: 2, dtype: interval[int64, right]
  183. It may also be constructed using one of the constructor
  184. methods: :meth:`IntervalArray.from_arrays`,
  185. :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.
  186. """
  187. ),
  188. }
  189. )
  190. class IntervalArray(IntervalMixin, ExtensionArray):
  191. can_hold_na = True
  192. _na_value = _fill_value = np.nan
  193. @property
  194. def ndim(self) -> Literal[1]:
  195. return 1
  196. # To make mypy recognize the fields
  197. _left: IntervalSideT
  198. _right: IntervalSideT
  199. _dtype: IntervalDtype
  200. # ---------------------------------------------------------------------
  201. # Constructors
  202. def __new__(
  203. cls: type[IntervalArrayT],
  204. data,
  205. closed=None,
  206. dtype: Dtype | None = None,
  207. copy: bool = False,
  208. verify_integrity: bool = True,
  209. ):
  210. data = extract_array(data, extract_numpy=True)
  211. if isinstance(data, cls):
  212. left: IntervalSideT = data._left
  213. right: IntervalSideT = data._right
  214. closed = closed or data.closed
  215. dtype = IntervalDtype(left.dtype, closed=closed)
  216. else:
  217. # don't allow scalars
  218. if is_scalar(data):
  219. msg = (
  220. f"{cls.__name__}(...) must be called with a collection "
  221. f"of some kind, {data} was passed"
  222. )
  223. raise TypeError(msg)
  224. # might need to convert empty or purely na data
  225. data = _maybe_convert_platform_interval(data)
  226. left, right, infer_closed = intervals_to_interval_bounds(
  227. data, validate_closed=closed is None
  228. )
  229. if left.dtype == object:
  230. left = lib.maybe_convert_objects(left)
  231. right = lib.maybe_convert_objects(right)
  232. closed = closed or infer_closed
  233. left, right, dtype = cls._ensure_simple_new_inputs(
  234. left,
  235. right,
  236. closed=closed,
  237. copy=copy,
  238. dtype=dtype,
  239. )
  240. if verify_integrity:
  241. cls._validate(left, right, dtype=dtype)
  242. return cls._simple_new(
  243. left,
  244. right,
  245. dtype=dtype,
  246. )
  247. @classmethod
  248. def _simple_new(
  249. cls: type[IntervalArrayT],
  250. left: IntervalSideT,
  251. right: IntervalSideT,
  252. dtype: IntervalDtype,
  253. ) -> IntervalArrayT:
  254. result = IntervalMixin.__new__(cls)
  255. result._left = left
  256. result._right = right
  257. result._dtype = dtype
  258. return result
  259. @classmethod
  260. def _ensure_simple_new_inputs(
  261. cls,
  262. left,
  263. right,
  264. closed: IntervalClosedType | None = None,
  265. copy: bool = False,
  266. dtype: Dtype | None = None,
  267. ) -> tuple[IntervalSideT, IntervalSideT, IntervalDtype]:
  268. """Ensure correctness of input parameters for cls._simple_new."""
  269. from pandas.core.indexes.base import ensure_index
  270. left = ensure_index(left, copy=copy)
  271. left = maybe_upcast_numeric_to_64bit(left)
  272. right = ensure_index(right, copy=copy)
  273. right = maybe_upcast_numeric_to_64bit(right)
  274. if closed is None and isinstance(dtype, IntervalDtype):
  275. closed = dtype.closed
  276. closed = closed or "right"
  277. if dtype is not None:
  278. # GH 19262: dtype must be an IntervalDtype to override inferred
  279. dtype = pandas_dtype(dtype)
  280. if is_interval_dtype(dtype):
  281. dtype = cast(IntervalDtype, dtype)
  282. if dtype.subtype is not None:
  283. left = left.astype(dtype.subtype)
  284. right = right.astype(dtype.subtype)
  285. else:
  286. msg = f"dtype must be an IntervalDtype, got {dtype}"
  287. raise TypeError(msg)
  288. if dtype.closed is None:
  289. # possibly loading an old pickle
  290. dtype = IntervalDtype(dtype.subtype, closed)
  291. elif closed != dtype.closed:
  292. raise ValueError("closed keyword does not match dtype.closed")
  293. # coerce dtypes to match if needed
  294. if is_float_dtype(left) and is_integer_dtype(right):
  295. right = right.astype(left.dtype)
  296. elif is_float_dtype(right) and is_integer_dtype(left):
  297. left = left.astype(right.dtype)
  298. if type(left) != type(right):
  299. msg = (
  300. f"must not have differing left [{type(left).__name__}] and "
  301. f"right [{type(right).__name__}] types"
  302. )
  303. raise ValueError(msg)
  304. if is_categorical_dtype(left.dtype) or is_string_dtype(left.dtype):
  305. # GH 19016
  306. msg = (
  307. "category, object, and string subtypes are not supported "
  308. "for IntervalArray"
  309. )
  310. raise TypeError(msg)
  311. if isinstance(left, ABCPeriodIndex):
  312. msg = "Period dtypes are not supported, use a PeriodIndex instead"
  313. raise ValueError(msg)
  314. if isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz):
  315. msg = (
  316. "left and right must have the same time zone, got "
  317. f"'{left.tz}' and '{right.tz}'"
  318. )
  319. raise ValueError(msg)
  320. # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray
  321. left = ensure_wrapped_if_datetimelike(left)
  322. left = extract_array(left, extract_numpy=True)
  323. right = ensure_wrapped_if_datetimelike(right)
  324. right = extract_array(right, extract_numpy=True)
  325. lbase = getattr(left, "_ndarray", left).base
  326. rbase = getattr(right, "_ndarray", right).base
  327. if lbase is not None and lbase is rbase:
  328. # If these share data, then setitem could corrupt our IA
  329. right = right.copy()
  330. dtype = IntervalDtype(left.dtype, closed=closed)
  331. return left, right, dtype
  332. @classmethod
  333. def _from_sequence(
  334. cls: type[IntervalArrayT],
  335. scalars,
  336. *,
  337. dtype: Dtype | None = None,
  338. copy: bool = False,
  339. ) -> IntervalArrayT:
  340. return cls(scalars, dtype=dtype, copy=copy)
  341. @classmethod
  342. def _from_factorized(
  343. cls: type[IntervalArrayT], values: np.ndarray, original: IntervalArrayT
  344. ) -> IntervalArrayT:
  345. if len(values) == 0:
  346. # An empty array returns object-dtype here. We can't create
  347. # a new IA from an (empty) object-dtype array, so turn it into the
  348. # correct dtype.
  349. values = values.astype(original.dtype.subtype)
  350. return cls(values, closed=original.closed)
  351. _interval_shared_docs["from_breaks"] = textwrap.dedent(
  352. """
  353. Construct an %(klass)s from an array of splits.
  354. Parameters
  355. ----------
  356. breaks : array-like (1-dimensional)
  357. Left and right bounds for each interval.
  358. closed : {'left', 'right', 'both', 'neither'}, default 'right'
  359. Whether the intervals are closed on the left-side, right-side, both
  360. or neither.\
  361. %(name)s
  362. copy : bool, default False
  363. Copy the data.
  364. dtype : dtype or None, default None
  365. If None, dtype will be inferred.
  366. Returns
  367. -------
  368. %(klass)s
  369. See Also
  370. --------
  371. interval_range : Function to create a fixed frequency IntervalIndex.
  372. %(klass)s.from_arrays : Construct from a left and right array.
  373. %(klass)s.from_tuples : Construct from a sequence of tuples.
  374. %(examples)s\
  375. """
  376. )
  377. @classmethod
  378. @Appender(
  379. _interval_shared_docs["from_breaks"]
  380. % {
  381. "klass": "IntervalArray",
  382. "name": "",
  383. "examples": textwrap.dedent(
  384. """\
  385. Examples
  386. --------
  387. >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])
  388. <IntervalArray>
  389. [(0, 1], (1, 2], (2, 3]]
  390. Length: 3, dtype: interval[int64, right]
  391. """
  392. ),
  393. }
  394. )
  395. def from_breaks(
  396. cls: type[IntervalArrayT],
  397. breaks,
  398. closed: IntervalClosedType | None = "right",
  399. copy: bool = False,
  400. dtype: Dtype | None = None,
  401. ) -> IntervalArrayT:
  402. breaks = _maybe_convert_platform_interval(breaks)
  403. return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype)
  404. _interval_shared_docs["from_arrays"] = textwrap.dedent(
  405. """
  406. Construct from two arrays defining the left and right bounds.
  407. Parameters
  408. ----------
  409. left : array-like (1-dimensional)
  410. Left bounds for each interval.
  411. right : array-like (1-dimensional)
  412. Right bounds for each interval.
  413. closed : {'left', 'right', 'both', 'neither'}, default 'right'
  414. Whether the intervals are closed on the left-side, right-side, both
  415. or neither.\
  416. %(name)s
  417. copy : bool, default False
  418. Copy the data.
  419. dtype : dtype, optional
  420. If None, dtype will be inferred.
  421. Returns
  422. -------
  423. %(klass)s
  424. Raises
  425. ------
  426. ValueError
  427. When a value is missing in only one of `left` or `right`.
  428. When a value in `left` is greater than the corresponding value
  429. in `right`.
  430. See Also
  431. --------
  432. interval_range : Function to create a fixed frequency IntervalIndex.
  433. %(klass)s.from_breaks : Construct an %(klass)s from an array of
  434. splits.
  435. %(klass)s.from_tuples : Construct an %(klass)s from an
  436. array-like of tuples.
  437. Notes
  438. -----
  439. Each element of `left` must be less than or equal to the `right`
  440. element at the same position. If an element is missing, it must be
  441. missing in both `left` and `right`. A TypeError is raised when
  442. using an unsupported type for `left` or `right`. At the moment,
  443. 'category', 'object', and 'string' subtypes are not supported.
  444. %(examples)s\
  445. """
  446. )
  447. @classmethod
  448. @Appender(
  449. _interval_shared_docs["from_arrays"]
  450. % {
  451. "klass": "IntervalArray",
  452. "name": "",
  453. "examples": textwrap.dedent(
  454. """\
  455. >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])
  456. <IntervalArray>
  457. [(0, 1], (1, 2], (2, 3]]
  458. Length: 3, dtype: interval[int64, right]
  459. """
  460. ),
  461. }
  462. )
  463. def from_arrays(
  464. cls: type[IntervalArrayT],
  465. left,
  466. right,
  467. closed: IntervalClosedType | None = "right",
  468. copy: bool = False,
  469. dtype: Dtype | None = None,
  470. ) -> IntervalArrayT:
  471. left = _maybe_convert_platform_interval(left)
  472. right = _maybe_convert_platform_interval(right)
  473. left, right, dtype = cls._ensure_simple_new_inputs(
  474. left,
  475. right,
  476. closed=closed,
  477. copy=copy,
  478. dtype=dtype,
  479. )
  480. cls._validate(left, right, dtype=dtype)
  481. return cls._simple_new(left, right, dtype=dtype)
  482. _interval_shared_docs["from_tuples"] = textwrap.dedent(
  483. """
  484. Construct an %(klass)s from an array-like of tuples.
  485. Parameters
  486. ----------
  487. data : array-like (1-dimensional)
  488. Array of tuples.
  489. closed : {'left', 'right', 'both', 'neither'}, default 'right'
  490. Whether the intervals are closed on the left-side, right-side, both
  491. or neither.\
  492. %(name)s
  493. copy : bool, default False
  494. By-default copy the data, this is compat only and ignored.
  495. dtype : dtype or None, default None
  496. If None, dtype will be inferred.
  497. Returns
  498. -------
  499. %(klass)s
  500. See Also
  501. --------
  502. interval_range : Function to create a fixed frequency IntervalIndex.
  503. %(klass)s.from_arrays : Construct an %(klass)s from a left and
  504. right array.
  505. %(klass)s.from_breaks : Construct an %(klass)s from an array of
  506. splits.
  507. %(examples)s\
  508. """
  509. )
  510. @classmethod
  511. @Appender(
  512. _interval_shared_docs["from_tuples"]
  513. % {
  514. "klass": "IntervalArray",
  515. "name": "",
  516. "examples": textwrap.dedent(
  517. """\
  518. Examples
  519. --------
  520. >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
  521. <IntervalArray>
  522. [(0, 1], (1, 2]]
  523. Length: 2, dtype: interval[int64, right]
  524. """
  525. ),
  526. }
  527. )
  528. def from_tuples(
  529. cls: type[IntervalArrayT],
  530. data,
  531. closed: IntervalClosedType | None = "right",
  532. copy: bool = False,
  533. dtype: Dtype | None = None,
  534. ) -> IntervalArrayT:
  535. if len(data):
  536. left, right = [], []
  537. else:
  538. # ensure that empty data keeps input dtype
  539. left = right = data
  540. for d in data:
  541. if not isinstance(d, tuple) and isna(d):
  542. lhs = rhs = np.nan
  543. else:
  544. name = cls.__name__
  545. try:
  546. # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...]
  547. lhs, rhs = d
  548. except ValueError as err:
  549. msg = f"{name}.from_tuples requires tuples of length 2, got {d}"
  550. raise ValueError(msg) from err
  551. except TypeError as err:
  552. msg = f"{name}.from_tuples received an invalid item, {d}"
  553. raise TypeError(msg) from err
  554. left.append(lhs)
  555. right.append(rhs)
  556. return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)
  557. @classmethod
  558. def _validate(cls, left, right, dtype: IntervalDtype) -> None:
  559. """
  560. Verify that the IntervalArray is valid.
  561. Checks that
  562. * dtype is correct
  563. * left and right match lengths
  564. * left and right have the same missing values
  565. * left is always below right
  566. """
  567. if not isinstance(dtype, IntervalDtype):
  568. msg = f"invalid dtype: {dtype}"
  569. raise ValueError(msg)
  570. if len(left) != len(right):
  571. msg = "left and right must have the same length"
  572. raise ValueError(msg)
  573. left_mask = notna(left)
  574. right_mask = notna(right)
  575. if not (left_mask == right_mask).all():
  576. msg = (
  577. "missing values must be missing in the same "
  578. "location both left and right sides"
  579. )
  580. raise ValueError(msg)
  581. if not (left[left_mask] <= right[left_mask]).all():
  582. msg = "left side of interval must be <= right side"
  583. raise ValueError(msg)
  584. def _shallow_copy(self: IntervalArrayT, left, right) -> IntervalArrayT:
  585. """
  586. Return a new IntervalArray with the replacement attributes
  587. Parameters
  588. ----------
  589. left : Index
  590. Values to be used for the left-side of the intervals.
  591. right : Index
  592. Values to be used for the right-side of the intervals.
  593. """
  594. dtype = IntervalDtype(left.dtype, closed=self.closed)
  595. left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype)
  596. return self._simple_new(left, right, dtype=dtype)
  597. # ---------------------------------------------------------------------
  598. # Descriptive
  599. @property
  600. def dtype(self) -> IntervalDtype:
  601. return self._dtype
  602. @property
  603. def nbytes(self) -> int:
  604. return self.left.nbytes + self.right.nbytes
  605. @property
  606. def size(self) -> int:
  607. # Avoid materializing self.values
  608. return self.left.size
  609. # ---------------------------------------------------------------------
  610. # EA Interface
  611. def __iter__(self) -> Iterator:
  612. return iter(np.asarray(self))
  613. def __len__(self) -> int:
  614. return len(self._left)
  615. @overload
  616. def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA:
  617. ...
  618. @overload
  619. def __getitem__(self: IntervalArrayT, key: SequenceIndexer) -> IntervalArrayT:
  620. ...
  621. def __getitem__(
  622. self: IntervalArrayT, key: PositionalIndexer
  623. ) -> IntervalArrayT | IntervalOrNA:
  624. key = check_array_indexer(self, key)
  625. left = self._left[key]
  626. right = self._right[key]
  627. if not isinstance(left, (np.ndarray, ExtensionArray)):
  628. # scalar
  629. if is_scalar(left) and isna(left):
  630. return self._fill_value
  631. return Interval(left, right, self.closed)
  632. if np.ndim(left) > 1:
  633. # GH#30588 multi-dimensional indexer disallowed
  634. raise ValueError("multi-dimensional indexing not allowed")
  635. # Argument 2 to "_simple_new" of "IntervalArray" has incompatible type
  636. # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray,
  637. # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray],
  638. # ndarray[Any, Any]]"
  639. return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type]
  640. def __setitem__(self, key, value) -> None:
  641. value_left, value_right = self._validate_setitem_value(value)
  642. key = check_array_indexer(self, key)
  643. self._left[key] = value_left
  644. self._right[key] = value_right
  645. def _cmp_method(self, other, op):
  646. # ensure pandas array for list-like and eliminate non-interval scalars
  647. if is_list_like(other):
  648. if len(self) != len(other):
  649. raise ValueError("Lengths must match to compare")
  650. other = pd_array(other)
  651. elif not isinstance(other, Interval):
  652. # non-interval scalar -> no matches
  653. if other is NA:
  654. # GH#31882
  655. from pandas.core.arrays import BooleanArray
  656. arr = np.empty(self.shape, dtype=bool)
  657. mask = np.ones(self.shape, dtype=bool)
  658. return BooleanArray(arr, mask)
  659. return invalid_comparison(self, other, op)
  660. # determine the dtype of the elements we want to compare
  661. if isinstance(other, Interval):
  662. other_dtype = pandas_dtype("interval")
  663. elif not is_categorical_dtype(other.dtype):
  664. other_dtype = other.dtype
  665. else:
  666. # for categorical defer to categories for dtype
  667. other_dtype = other.categories.dtype
  668. # extract intervals if we have interval categories with matching closed
  669. if is_interval_dtype(other_dtype):
  670. if self.closed != other.categories.closed:
  671. return invalid_comparison(self, other, op)
  672. other = other.categories.take(
  673. other.codes, allow_fill=True, fill_value=other.categories._na_value
  674. )
  675. # interval-like -> need same closed and matching endpoints
  676. if is_interval_dtype(other_dtype):
  677. if self.closed != other.closed:
  678. return invalid_comparison(self, other, op)
  679. elif not isinstance(other, Interval):
  680. other = type(self)(other)
  681. if op is operator.eq:
  682. return (self._left == other.left) & (self._right == other.right)
  683. elif op is operator.ne:
  684. return (self._left != other.left) | (self._right != other.right)
  685. elif op is operator.gt:
  686. return (self._left > other.left) | (
  687. (self._left == other.left) & (self._right > other.right)
  688. )
  689. elif op is operator.ge:
  690. return (self == other) | (self > other)
  691. elif op is operator.lt:
  692. return (self._left < other.left) | (
  693. (self._left == other.left) & (self._right < other.right)
  694. )
  695. else:
  696. # operator.lt
  697. return (self == other) | (self < other)
  698. # non-interval/non-object dtype -> no matches
  699. if not is_object_dtype(other_dtype):
  700. return invalid_comparison(self, other, op)
  701. # object dtype -> iteratively check for intervals
  702. result = np.zeros(len(self), dtype=bool)
  703. for i, obj in enumerate(other):
  704. try:
  705. result[i] = op(self[i], obj)
  706. except TypeError:
  707. if obj is NA:
  708. # comparison with np.nan returns NA
  709. # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092
  710. result = result.astype(object)
  711. result[i] = NA
  712. else:
  713. raise
  714. return result
  715. @unpack_zerodim_and_defer("__eq__")
  716. def __eq__(self, other):
  717. return self._cmp_method(other, operator.eq)
  718. @unpack_zerodim_and_defer("__ne__")
  719. def __ne__(self, other):
  720. return self._cmp_method(other, operator.ne)
  721. @unpack_zerodim_and_defer("__gt__")
  722. def __gt__(self, other):
  723. return self._cmp_method(other, operator.gt)
  724. @unpack_zerodim_and_defer("__ge__")
  725. def __ge__(self, other):
  726. return self._cmp_method(other, operator.ge)
  727. @unpack_zerodim_and_defer("__lt__")
  728. def __lt__(self, other):
  729. return self._cmp_method(other, operator.lt)
  730. @unpack_zerodim_and_defer("__le__")
  731. def __le__(self, other):
  732. return self._cmp_method(other, operator.le)
  733. def argsort(
  734. self,
  735. *,
  736. ascending: bool = True,
  737. kind: SortKind = "quicksort",
  738. na_position: str = "last",
  739. **kwargs,
  740. ) -> np.ndarray:
  741. ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)
  742. if ascending and kind == "quicksort" and na_position == "last":
  743. # TODO: in an IntervalIndex we can re-use the cached
  744. # IntervalTree.left_sorter
  745. return np.lexsort((self.right, self.left))
  746. # TODO: other cases we can use lexsort for? much more performant.
  747. return super().argsort(
  748. ascending=ascending, kind=kind, na_position=na_position, **kwargs
  749. )
  750. def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:
  751. nv.validate_minmax_axis(axis, self.ndim)
  752. if not len(self):
  753. return self._na_value
  754. mask = self.isna()
  755. if mask.any():
  756. if not skipna:
  757. return self._na_value
  758. obj = self[~mask]
  759. else:
  760. obj = self
  761. indexer = obj.argsort()[0]
  762. return obj[indexer]
  763. def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:
  764. nv.validate_minmax_axis(axis, self.ndim)
  765. if not len(self):
  766. return self._na_value
  767. mask = self.isna()
  768. if mask.any():
  769. if not skipna:
  770. return self._na_value
  771. obj = self[~mask]
  772. else:
  773. obj = self
  774. indexer = obj.argsort()[-1]
  775. return obj[indexer]
  776. def fillna(
  777. self: IntervalArrayT, value=None, method=None, limit=None
  778. ) -> IntervalArrayT:
  779. """
  780. Fill NA/NaN values using the specified method.
  781. Parameters
  782. ----------
  783. value : scalar, dict, Series
  784. If a scalar value is passed it is used to fill all missing values.
  785. Alternatively, a Series or dict can be used to fill in different
  786. values for each index. The value should not be a list. The
  787. value(s) passed should be either Interval objects or NA/NaN.
  788. method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
  789. (Not implemented yet for IntervalArray)
  790. Method to use for filling holes in reindexed Series
  791. limit : int, default None
  792. (Not implemented yet for IntervalArray)
  793. If method is specified, this is the maximum number of consecutive
  794. NaN values to forward/backward fill. In other words, if there is
  795. a gap with more than this number of consecutive NaNs, it will only
  796. be partially filled. If method is not specified, this is the
  797. maximum number of entries along the entire axis where NaNs will be
  798. filled.
  799. Returns
  800. -------
  801. filled : IntervalArray with NA/NaN filled
  802. """
  803. if method is not None:
  804. raise TypeError("Filling by method is not supported for IntervalArray.")
  805. if limit is not None:
  806. raise TypeError("limit is not supported for IntervalArray.")
  807. value_left, value_right = self._validate_scalar(value)
  808. left = self.left.fillna(value=value_left)
  809. right = self.right.fillna(value=value_right)
  810. return self._shallow_copy(left, right)
  811. def astype(self, dtype, copy: bool = True):
  812. """
  813. Cast to an ExtensionArray or NumPy array with dtype 'dtype'.
  814. Parameters
  815. ----------
  816. dtype : str or dtype
  817. Typecode or data-type to which the array is cast.
  818. copy : bool, default True
  819. Whether to copy the data, even if not necessary. If False,
  820. a copy is made only if the old dtype does not match the
  821. new dtype.
  822. Returns
  823. -------
  824. array : ExtensionArray or ndarray
  825. ExtensionArray or NumPy ndarray with 'dtype' for its dtype.
  826. """
  827. from pandas import Index
  828. if dtype is not None:
  829. dtype = pandas_dtype(dtype)
  830. if is_interval_dtype(dtype):
  831. if dtype == self.dtype:
  832. return self.copy() if copy else self
  833. if is_float_dtype(self.dtype.subtype) and needs_i8_conversion(
  834. dtype.subtype
  835. ):
  836. # This is allowed on the Index.astype but we disallow it here
  837. msg = (
  838. f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
  839. )
  840. raise TypeError(msg)
  841. # need to cast to different subtype
  842. try:
  843. # We need to use Index rules for astype to prevent casting
  844. # np.nan entries to int subtypes
  845. new_left = Index(self._left, copy=False).astype(dtype.subtype)
  846. new_right = Index(self._right, copy=False).astype(dtype.subtype)
  847. except IntCastingNaNError:
  848. # e.g test_subtype_integer
  849. raise
  850. except (TypeError, ValueError) as err:
  851. # e.g. test_subtype_integer_errors f8->u8 can be lossy
  852. # and raises ValueError
  853. msg = (
  854. f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
  855. )
  856. raise TypeError(msg) from err
  857. return self._shallow_copy(new_left, new_right)
  858. else:
  859. try:
  860. return super().astype(dtype, copy=copy)
  861. except (TypeError, ValueError) as err:
  862. msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
  863. raise TypeError(msg) from err
  864. def equals(self, other) -> bool:
  865. if type(self) != type(other):
  866. return False
  867. return bool(
  868. self.closed == other.closed
  869. and self.left.equals(other.left)
  870. and self.right.equals(other.right)
  871. )
  872. @classmethod
  873. def _concat_same_type(
  874. cls: type[IntervalArrayT], to_concat: Sequence[IntervalArrayT]
  875. ) -> IntervalArrayT:
  876. """
  877. Concatenate multiple IntervalArray
  878. Parameters
  879. ----------
  880. to_concat : sequence of IntervalArray
  881. Returns
  882. -------
  883. IntervalArray
  884. """
  885. closed_set = {interval.closed for interval in to_concat}
  886. if len(closed_set) != 1:
  887. raise ValueError("Intervals must all be closed on the same side.")
  888. closed = closed_set.pop()
  889. left = np.concatenate([interval.left for interval in to_concat])
  890. right = np.concatenate([interval.right for interval in to_concat])
  891. left, right, dtype = cls._ensure_simple_new_inputs(left, right, closed=closed)
  892. return cls._simple_new(left, right, dtype=dtype)
  893. def copy(self: IntervalArrayT) -> IntervalArrayT:
  894. """
  895. Return a copy of the array.
  896. Returns
  897. -------
  898. IntervalArray
  899. """
  900. left = self._left.copy()
  901. right = self._right.copy()
  902. dtype = self.dtype
  903. return self._simple_new(left, right, dtype=dtype)
  904. def isna(self) -> np.ndarray:
  905. return isna(self._left)
  906. def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:
  907. if not len(self) or periods == 0:
  908. return self.copy()
  909. self._validate_scalar(fill_value)
  910. # ExtensionArray.shift doesn't work for two reasons
  911. # 1. IntervalArray.dtype.na_value may not be correct for the dtype.
  912. # 2. IntervalArray._from_sequence only accepts NaN for missing values,
  913. # not other values like NaT
  914. empty_len = min(abs(periods), len(self))
  915. if isna(fill_value):
  916. from pandas import Index
  917. fill_value = Index(self._left, copy=False)._na_value
  918. empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
  919. else:
  920. empty = self._from_sequence([fill_value] * empty_len)
  921. if periods > 0:
  922. a = empty
  923. b = self[:-periods]
  924. else:
  925. a = self[abs(periods) :]
  926. b = empty
  927. return self._concat_same_type([a, b])
  928. def take(
  929. self: IntervalArrayT,
  930. indices,
  931. *,
  932. allow_fill: bool = False,
  933. fill_value=None,
  934. axis=None,
  935. **kwargs,
  936. ) -> IntervalArrayT:
  937. """
  938. Take elements from the IntervalArray.
  939. Parameters
  940. ----------
  941. indices : sequence of integers
  942. Indices to be taken.
  943. allow_fill : bool, default False
  944. How to handle negative values in `indices`.
  945. * False: negative values in `indices` indicate positional indices
  946. from the right (the default). This is similar to
  947. :func:`numpy.take`.
  948. * True: negative values in `indices` indicate
  949. missing values. These values are set to `fill_value`. Any other
  950. other negative values raise a ``ValueError``.
  951. fill_value : Interval or NA, optional
  952. Fill value to use for NA-indices when `allow_fill` is True.
  953. This may be ``None``, in which case the default NA value for
  954. the type, ``self.dtype.na_value``, is used.
  955. For many ExtensionArrays, there will be two representations of
  956. `fill_value`: a user-facing "boxed" scalar, and a low-level
  957. physical NA value. `fill_value` should be the user-facing version,
  958. and the implementation should handle translating that to the
  959. physical version for processing the take if necessary.
  960. axis : any, default None
  961. Present for compat with IntervalIndex; does nothing.
  962. Returns
  963. -------
  964. IntervalArray
  965. Raises
  966. ------
  967. IndexError
  968. When the indices are out of bounds for the array.
  969. ValueError
  970. When `indices` contains negative values other than ``-1``
  971. and `allow_fill` is True.
  972. """
  973. nv.validate_take((), kwargs)
  974. fill_left = fill_right = fill_value
  975. if allow_fill:
  976. fill_left, fill_right = self._validate_scalar(fill_value)
  977. left_take = take(
  978. self._left, indices, allow_fill=allow_fill, fill_value=fill_left
  979. )
  980. right_take = take(
  981. self._right, indices, allow_fill=allow_fill, fill_value=fill_right
  982. )
  983. return self._shallow_copy(left_take, right_take)
  984. def _validate_listlike(self, value):
  985. # list-like of intervals
  986. try:
  987. array = IntervalArray(value)
  988. self._check_closed_matches(array, name="value")
  989. value_left, value_right = array.left, array.right
  990. except TypeError as err:
  991. # wrong type: not interval or NA
  992. msg = f"'value' should be an interval type, got {type(value)} instead."
  993. raise TypeError(msg) from err
  994. try:
  995. self.left._validate_fill_value(value_left)
  996. except (LossySetitemError, TypeError) as err:
  997. msg = (
  998. "'value' should be a compatible interval type, "
  999. f"got {type(value)} instead."
  1000. )
  1001. raise TypeError(msg) from err
  1002. return value_left, value_right
  1003. def _validate_scalar(self, value):
  1004. if isinstance(value, Interval):
  1005. self._check_closed_matches(value, name="value")
  1006. left, right = value.left, value.right
  1007. # TODO: check subdtype match like _validate_setitem_value?
  1008. elif is_valid_na_for_dtype(value, self.left.dtype):
  1009. # GH#18295
  1010. left = right = self.left._na_value
  1011. else:
  1012. raise TypeError(
  1013. "can only insert Interval objects and NA into an IntervalArray"
  1014. )
  1015. return left, right
  1016. def _validate_setitem_value(self, value):
  1017. if is_valid_na_for_dtype(value, self.left.dtype):
  1018. # na value: need special casing to set directly on numpy arrays
  1019. value = self.left._na_value
  1020. if is_integer_dtype(self.dtype.subtype):
  1021. # can't set NaN on a numpy integer array
  1022. # GH#45484 TypeError, not ValueError, matches what we get with
  1023. # non-NA un-holdable value.
  1024. raise TypeError("Cannot set float NaN to integer-backed IntervalArray")
  1025. value_left, value_right = value, value
  1026. elif isinstance(value, Interval):
  1027. # scalar interval
  1028. self._check_closed_matches(value, name="value")
  1029. value_left, value_right = value.left, value.right
  1030. self.left._validate_fill_value(value_left)
  1031. self.left._validate_fill_value(value_right)
  1032. else:
  1033. return self._validate_listlike(value)
  1034. return value_left, value_right
  1035. def value_counts(self, dropna: bool = True) -> Series:
  1036. """
  1037. Returns a Series containing counts of each interval.
  1038. Parameters
  1039. ----------
  1040. dropna : bool, default True
  1041. Don't include counts of NaN.
  1042. Returns
  1043. -------
  1044. counts : Series
  1045. See Also
  1046. --------
  1047. Series.value_counts
  1048. """
  1049. # TODO: implement this is a non-naive way!
  1050. return value_counts(np.asarray(self), dropna=dropna)
  1051. # ---------------------------------------------------------------------
  1052. # Rendering Methods
  1053. def _format_data(self) -> str:
  1054. # TODO: integrate with categorical and make generic
  1055. # name argument is unused here; just for compat with base / categorical
  1056. n = len(self)
  1057. max_seq_items = min((get_option("display.max_seq_items") or n) // 10, 10)
  1058. formatter = str
  1059. if n == 0:
  1060. summary = "[]"
  1061. elif n == 1:
  1062. first = formatter(self[0])
  1063. summary = f"[{first}]"
  1064. elif n == 2:
  1065. first = formatter(self[0])
  1066. last = formatter(self[-1])
  1067. summary = f"[{first}, {last}]"
  1068. else:
  1069. if n > max_seq_items:
  1070. n = min(max_seq_items // 2, 10)
  1071. head = [formatter(x) for x in self[:n]]
  1072. tail = [formatter(x) for x in self[-n:]]
  1073. head_str = ", ".join(head)
  1074. tail_str = ", ".join(tail)
  1075. summary = f"[{head_str} ... {tail_str}]"
  1076. else:
  1077. tail = [formatter(x) for x in self]
  1078. tail_str = ", ".join(tail)
  1079. summary = f"[{tail_str}]"
  1080. return summary
  1081. def __repr__(self) -> str:
  1082. # the short repr has no trailing newline, while the truncated
  1083. # repr does. So we include a newline in our template, and strip
  1084. # any trailing newlines from format_object_summary
  1085. data = self._format_data()
  1086. class_name = f"<{type(self).__name__}>\n"
  1087. template = f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
  1088. return template
  1089. def _format_space(self) -> str:
  1090. space = " " * (len(type(self).__name__) + 1)
  1091. return f"\n{space}"
  1092. # ---------------------------------------------------------------------
  1093. # Vectorized Interval Properties/Attributes
  1094. @property
  1095. def left(self):
  1096. """
  1097. Return the left endpoints of each Interval in the IntervalArray as an Index.
  1098. """
  1099. from pandas import Index
  1100. return Index(self._left, copy=False)
  1101. @property
  1102. def right(self):
  1103. """
  1104. Return the right endpoints of each Interval in the IntervalArray as an Index.
  1105. """
  1106. from pandas import Index
  1107. return Index(self._right, copy=False)
  1108. @property
  1109. def length(self) -> Index:
  1110. """
  1111. Return an Index with entries denoting the length of each Interval.
  1112. """
  1113. return self.right - self.left
  1114. @property
  1115. def mid(self) -> Index:
  1116. """
  1117. Return the midpoint of each Interval in the IntervalArray as an Index.
  1118. """
  1119. try:
  1120. return 0.5 * (self.left + self.right)
  1121. except TypeError:
  1122. # datetime safe version
  1123. return self.left + 0.5 * self.length
  1124. _interval_shared_docs["overlaps"] = textwrap.dedent(
  1125. """
  1126. Check elementwise if an Interval overlaps the values in the %(klass)s.
  1127. Two intervals overlap if they share a common point, including closed
  1128. endpoints. Intervals that only have an open endpoint in common do not
  1129. overlap.
  1130. Parameters
  1131. ----------
  1132. other : %(klass)s
  1133. Interval to check against for an overlap.
  1134. Returns
  1135. -------
  1136. ndarray
  1137. Boolean array positionally indicating where an overlap occurs.
  1138. See Also
  1139. --------
  1140. Interval.overlaps : Check whether two Interval objects overlap.
  1141. Examples
  1142. --------
  1143. %(examples)s
  1144. >>> intervals.overlaps(pd.Interval(0.5, 1.5))
  1145. array([ True, True, False])
  1146. Intervals that share closed endpoints overlap:
  1147. >>> intervals.overlaps(pd.Interval(1, 3, closed='left'))
  1148. array([ True, True, True])
  1149. Intervals that only have an open endpoint in common do not overlap:
  1150. >>> intervals.overlaps(pd.Interval(1, 2, closed='right'))
  1151. array([False, True, False])
  1152. """
  1153. )
  1154. @Appender(
  1155. _interval_shared_docs["overlaps"]
  1156. % {
  1157. "klass": "IntervalArray",
  1158. "examples": textwrap.dedent(
  1159. """\
  1160. >>> data = [(0, 1), (1, 3), (2, 4)]
  1161. >>> intervals = pd.arrays.IntervalArray.from_tuples(data)
  1162. >>> intervals
  1163. <IntervalArray>
  1164. [(0, 1], (1, 3], (2, 4]]
  1165. Length: 3, dtype: interval[int64, right]
  1166. """
  1167. ),
  1168. }
  1169. )
  1170. def overlaps(self, other):
  1171. if isinstance(other, (IntervalArray, ABCIntervalIndex)):
  1172. raise NotImplementedError
  1173. if not isinstance(other, Interval):
  1174. msg = f"`other` must be Interval-like, got {type(other).__name__}"
  1175. raise TypeError(msg)
  1176. # equality is okay if both endpoints are closed (overlap at a point)
  1177. op1 = le if (self.closed_left and other.closed_right) else lt
  1178. op2 = le if (other.closed_left and self.closed_right) else lt
  1179. # overlaps is equivalent negation of two interval being disjoint:
  1180. # disjoint = (A.left > B.right) or (B.left > A.right)
  1181. # (simplifying the negation allows this to be done in less operations)
  1182. return op1(self.left, other.right) & op2(other.left, self.right)
  1183. # ---------------------------------------------------------------------
  1184. @property
  1185. def closed(self) -> IntervalClosedType:
  1186. """
  1187. String describing the inclusive side the intervals.
  1188. Either ``left``, ``right``, ``both`` or ``neither``.
  1189. """
  1190. return self.dtype.closed
  1191. _interval_shared_docs["set_closed"] = textwrap.dedent(
  1192. """
  1193. Return an identical %(klass)s closed on the specified side.
  1194. Parameters
  1195. ----------
  1196. closed : {'left', 'right', 'both', 'neither'}
  1197. Whether the intervals are closed on the left-side, right-side, both
  1198. or neither.
  1199. Returns
  1200. -------
  1201. %(klass)s
  1202. %(examples)s\
  1203. """
  1204. )
  1205. @Appender(
  1206. _interval_shared_docs["set_closed"]
  1207. % {
  1208. "klass": "IntervalArray",
  1209. "examples": textwrap.dedent(
  1210. """\
  1211. Examples
  1212. --------
  1213. >>> index = pd.arrays.IntervalArray.from_breaks(range(4))
  1214. >>> index
  1215. <IntervalArray>
  1216. [(0, 1], (1, 2], (2, 3]]
  1217. Length: 3, dtype: interval[int64, right]
  1218. >>> index.set_closed('both')
  1219. <IntervalArray>
  1220. [[0, 1], [1, 2], [2, 3]]
  1221. Length: 3, dtype: interval[int64, both]
  1222. """
  1223. ),
  1224. }
  1225. )
  1226. def set_closed(self: IntervalArrayT, closed: IntervalClosedType) -> IntervalArrayT:
  1227. if closed not in VALID_CLOSED:
  1228. msg = f"invalid option for 'closed': {closed}"
  1229. raise ValueError(msg)
  1230. left, right = self._left, self._right
  1231. dtype = IntervalDtype(left.dtype, closed=closed)
  1232. return self._simple_new(left, right, dtype=dtype)
  1233. _interval_shared_docs[
  1234. "is_non_overlapping_monotonic"
  1235. ] = """
  1236. Return a boolean whether the %(klass)s is non-overlapping and monotonic.
  1237. Non-overlapping means (no Intervals share points), and monotonic means
  1238. either monotonic increasing or monotonic decreasing.
  1239. """
  1240. @property
  1241. @Appender(
  1242. _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs
  1243. )
  1244. def is_non_overlapping_monotonic(self) -> bool:
  1245. # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... )
  1246. # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)
  1247. # we already require left <= right
  1248. # strict inequality for closed == 'both'; equality implies overlapping
  1249. # at a point when both sides of intervals are included
  1250. if self.closed == "both":
  1251. return bool(
  1252. (self._right[:-1] < self._left[1:]).all()
  1253. or (self._left[:-1] > self._right[1:]).all()
  1254. )
  1255. # non-strict inequality when closed != 'both'; at least one side is
  1256. # not included in the intervals, so equality does not imply overlapping
  1257. return bool(
  1258. (self._right[:-1] <= self._left[1:]).all()
  1259. or (self._left[:-1] >= self._right[1:]).all()
  1260. )
  1261. # ---------------------------------------------------------------------
  1262. # Conversion
  1263. def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
  1264. """
  1265. Return the IntervalArray's data as a numpy array of Interval
  1266. objects (with dtype='object')
  1267. """
  1268. left = self._left
  1269. right = self._right
  1270. mask = self.isna()
  1271. closed = self.closed
  1272. result = np.empty(len(left), dtype=object)
  1273. for i, left_value in enumerate(left):
  1274. if mask[i]:
  1275. result[i] = np.nan
  1276. else:
  1277. result[i] = Interval(left_value, right[i], closed)
  1278. return result
  1279. def __arrow_array__(self, type=None):
  1280. """
  1281. Convert myself into a pyarrow Array.
  1282. """
  1283. import pyarrow
  1284. from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
  1285. try:
  1286. subtype = pyarrow.from_numpy_dtype(self.dtype.subtype)
  1287. except TypeError as err:
  1288. raise TypeError(
  1289. f"Conversion to arrow with subtype '{self.dtype.subtype}' "
  1290. "is not supported"
  1291. ) from err
  1292. interval_type = ArrowIntervalType(subtype, self.closed)
  1293. storage_array = pyarrow.StructArray.from_arrays(
  1294. [
  1295. pyarrow.array(self._left, type=subtype, from_pandas=True),
  1296. pyarrow.array(self._right, type=subtype, from_pandas=True),
  1297. ],
  1298. names=["left", "right"],
  1299. )
  1300. mask = self.isna()
  1301. if mask.any():
  1302. # if there are missing values, set validity bitmap also on the array level
  1303. null_bitmap = pyarrow.array(~mask).buffers()[1]
  1304. storage_array = pyarrow.StructArray.from_buffers(
  1305. storage_array.type,
  1306. len(storage_array),
  1307. [null_bitmap],
  1308. children=[storage_array.field(0), storage_array.field(1)],
  1309. )
  1310. if type is not None:
  1311. if type.equals(interval_type.storage_type):
  1312. return storage_array
  1313. elif isinstance(type, ArrowIntervalType):
  1314. # ensure we have the same subtype and closed attributes
  1315. if not type.equals(interval_type):
  1316. raise TypeError(
  1317. "Not supported to convert IntervalArray to type with "
  1318. f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) "
  1319. f"and 'closed' ({self.closed} vs {type.closed}) attributes"
  1320. )
  1321. else:
  1322. raise TypeError(
  1323. f"Not supported to convert IntervalArray to '{type}' type"
  1324. )
  1325. return pyarrow.ExtensionArray.from_storage(interval_type, storage_array)
  1326. _interval_shared_docs[
  1327. "to_tuples"
  1328. ] = """
  1329. Return an %(return_type)s of tuples of the form (left, right).
  1330. Parameters
  1331. ----------
  1332. na_tuple : bool, default True
  1333. Returns NA as a tuple if True, ``(nan, nan)``, or just as the NA
  1334. value itself if False, ``nan``.
  1335. Returns
  1336. -------
  1337. tuples: %(return_type)s
  1338. %(examples)s\
  1339. """
  1340. @Appender(
  1341. _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""}
  1342. )
  1343. def to_tuples(self, na_tuple: bool = True) -> np.ndarray:
  1344. tuples = com.asarray_tuplesafe(zip(self._left, self._right))
  1345. if not na_tuple:
  1346. # GH 18756
  1347. tuples = np.where(~self.isna(), tuples, np.nan)
  1348. return tuples
  1349. # ---------------------------------------------------------------------
  1350. def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
  1351. value_left, value_right = self._validate_setitem_value(value)
  1352. if isinstance(self._left, np.ndarray):
  1353. np.putmask(self._left, mask, value_left)
  1354. assert isinstance(self._right, np.ndarray)
  1355. np.putmask(self._right, mask, value_right)
  1356. else:
  1357. self._left._putmask(mask, value_left)
  1358. assert not isinstance(self._right, np.ndarray)
  1359. self._right._putmask(mask, value_right)
  1360. def insert(self: IntervalArrayT, loc: int, item: Interval) -> IntervalArrayT:
  1361. """
  1362. Return a new IntervalArray inserting new item at location. Follows
  1363. Python numpy.insert semantics for negative values. Only Interval
  1364. objects and NA can be inserted into an IntervalIndex
  1365. Parameters
  1366. ----------
  1367. loc : int
  1368. item : Interval
  1369. Returns
  1370. -------
  1371. IntervalArray
  1372. """
  1373. left_insert, right_insert = self._validate_scalar(item)
  1374. new_left = self.left.insert(loc, left_insert)
  1375. new_right = self.right.insert(loc, right_insert)
  1376. return self._shallow_copy(new_left, new_right)
  1377. def delete(self: IntervalArrayT, loc) -> IntervalArrayT:
  1378. if isinstance(self._left, np.ndarray):
  1379. new_left = np.delete(self._left, loc)
  1380. assert isinstance(self._right, np.ndarray)
  1381. new_right = np.delete(self._right, loc)
  1382. else:
  1383. new_left = self._left.delete(loc)
  1384. assert not isinstance(self._right, np.ndarray)
  1385. new_right = self._right.delete(loc)
  1386. return self._shallow_copy(left=new_left, right=new_right)
  1387. @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs)
  1388. def repeat(
  1389. self: IntervalArrayT,
  1390. repeats: int | Sequence[int],
  1391. axis: AxisInt | None = None,
  1392. ) -> IntervalArrayT:
  1393. nv.validate_repeat((), {"axis": axis})
  1394. left_repeat = self.left.repeat(repeats)
  1395. right_repeat = self.right.repeat(repeats)
  1396. return self._shallow_copy(left=left_repeat, right=right_repeat)
  1397. _interval_shared_docs["contains"] = textwrap.dedent(
  1398. """
  1399. Check elementwise if the Intervals contain the value.
  1400. Return a boolean mask whether the value is contained in the Intervals
  1401. of the %(klass)s.
  1402. Parameters
  1403. ----------
  1404. other : scalar
  1405. The value to check whether it is contained in the Intervals.
  1406. Returns
  1407. -------
  1408. boolean array
  1409. See Also
  1410. --------
  1411. Interval.contains : Check whether Interval object contains value.
  1412. %(klass)s.overlaps : Check if an Interval overlaps the values in the
  1413. %(klass)s.
  1414. Examples
  1415. --------
  1416. %(examples)s
  1417. >>> intervals.contains(0.5)
  1418. array([ True, False, False])
  1419. """
  1420. )
  1421. @Appender(
  1422. _interval_shared_docs["contains"]
  1423. % {
  1424. "klass": "IntervalArray",
  1425. "examples": textwrap.dedent(
  1426. """\
  1427. >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)])
  1428. >>> intervals
  1429. <IntervalArray>
  1430. [(0, 1], (1, 3], (2, 4]]
  1431. Length: 3, dtype: interval[int64, right]
  1432. """
  1433. ),
  1434. }
  1435. )
  1436. def contains(self, other):
  1437. if isinstance(other, Interval):
  1438. raise NotImplementedError("contains not implemented for two intervals")
  1439. return (self._left < other if self.open_left else self._left <= other) & (
  1440. other < self._right if self.open_right else other <= self._right
  1441. )
  1442. def isin(self, values) -> npt.NDArray[np.bool_]:
  1443. if not hasattr(values, "dtype"):
  1444. values = np.array(values)
  1445. values = extract_array(values, extract_numpy=True)
  1446. if is_interval_dtype(values.dtype):
  1447. if self.closed != values.closed:
  1448. # not comparable -> no overlap
  1449. return np.zeros(self.shape, dtype=bool)
  1450. if is_dtype_equal(self.dtype, values.dtype):
  1451. # GH#38353 instead of casting to object, operating on a
  1452. # complex128 ndarray is much more performant.
  1453. left = self._combined.view("complex128")
  1454. right = values._combined.view("complex128")
  1455. # error: Argument 1 to "in1d" has incompatible type
  1456. # "Union[ExtensionArray, ndarray[Any, Any],
  1457. # ndarray[Any, dtype[Any]]]"; expected
  1458. # "Union[_SupportsArray[dtype[Any]],
  1459. # _NestedSequence[_SupportsArray[dtype[Any]]], bool,
  1460. # int, float, complex, str, bytes, _NestedSequence[
  1461. # Union[bool, int, float, complex, str, bytes]]]"
  1462. return np.in1d(left, right) # type: ignore[arg-type]
  1463. elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(
  1464. values.left.dtype
  1465. ):
  1466. # not comparable -> no overlap
  1467. return np.zeros(self.shape, dtype=bool)
  1468. return isin(self.astype(object), values.astype(object))
  1469. @property
  1470. def _combined(self) -> IntervalSideT:
  1471. left = self.left._values.reshape(-1, 1)
  1472. right = self.right._values.reshape(-1, 1)
  1473. if needs_i8_conversion(left.dtype):
  1474. comb = left._concat_same_type([left, right], axis=1)
  1475. else:
  1476. comb = np.concatenate([left, right], axis=1)
  1477. return comb
  1478. def _from_combined(self, combined: np.ndarray) -> IntervalArray:
  1479. """
  1480. Create a new IntervalArray with our dtype from a 1D complex128 ndarray.
  1481. """
  1482. nc = combined.view("i8").reshape(-1, 2)
  1483. dtype = self._left.dtype
  1484. if needs_i8_conversion(dtype):
  1485. assert isinstance(self._left, (DatetimeArray, TimedeltaArray))
  1486. new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
  1487. assert isinstance(self._right, (DatetimeArray, TimedeltaArray))
  1488. new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
  1489. else:
  1490. assert isinstance(dtype, np.dtype)
  1491. new_left = nc[:, 0].view(dtype)
  1492. new_right = nc[:, 1].view(dtype)
  1493. return self._shallow_copy(left=new_left, right=new_right)
  1494. def unique(self) -> IntervalArray:
  1495. # No overload variant of "__getitem__" of "ExtensionArray" matches argument
  1496. # type "Tuple[slice, int]"
  1497. nc = unique(
  1498. self._combined.view("complex128")[:, 0] # type: ignore[call-overload]
  1499. )
  1500. nc = nc[:, None]
  1501. return self._from_combined(nc)
  1502. def _maybe_convert_platform_interval(values) -> ArrayLike:
  1503. """
  1504. Try to do platform conversion, with special casing for IntervalArray.
  1505. Wrapper around maybe_convert_platform that alters the default return
  1506. dtype in certain cases to be compatible with IntervalArray. For example,
  1507. empty lists return with integer dtype instead of object dtype, which is
  1508. prohibited for IntervalArray.
  1509. Parameters
  1510. ----------
  1511. values : array-like
  1512. Returns
  1513. -------
  1514. array
  1515. """
  1516. if isinstance(values, (list, tuple)) and len(values) == 0:
  1517. # GH 19016
  1518. # empty lists/tuples get object dtype by default, but this is
  1519. # prohibited for IntervalArray, so coerce to integer instead
  1520. return np.array([], dtype=np.int64)
  1521. elif not is_list_like(values) or isinstance(values, ABCDataFrame):
  1522. # This will raise later, but we avoid passing to maybe_convert_platform
  1523. return values
  1524. elif is_categorical_dtype(values):
  1525. values = np.asarray(values)
  1526. elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):
  1527. # TODO: should we just cast these to list?
  1528. return values
  1529. else:
  1530. values = extract_array(values, extract_numpy=True)
  1531. if not hasattr(values, "dtype"):
  1532. values = np.asarray(values)
  1533. if is_integer_dtype(values) and values.dtype != np.int64:
  1534. values = values.astype(np.int64)
  1535. return values