interval.pyx 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650
  1. import numbers
  2. from operator import (
  3. le,
  4. lt,
  5. )
  6. from cpython.datetime cimport (
  7. PyDelta_Check,
  8. import_datetime,
  9. )
  10. import_datetime()
  11. cimport cython
  12. from cpython.object cimport PyObject_RichCompare
  13. from cython cimport Py_ssize_t
  14. import numpy as np
  15. cimport numpy as cnp
  16. from numpy cimport (
  17. NPY_QUICKSORT,
  18. PyArray_ArgSort,
  19. PyArray_Take,
  20. float64_t,
  21. int64_t,
  22. ndarray,
  23. uint64_t,
  24. )
  25. cnp.import_array()
  26. from pandas._libs cimport util
  27. from pandas._libs.hashtable cimport Int64Vector
  28. from pandas._libs.tslibs.timedeltas cimport _Timedelta
  29. from pandas._libs.tslibs.timestamps cimport _Timestamp
  30. from pandas._libs.tslibs.timezones cimport tz_compare
  31. from pandas._libs.tslibs.util cimport (
  32. is_float_object,
  33. is_integer_object,
  34. is_timedelta64_object,
  35. )
  36. VALID_CLOSED = frozenset(["left", "right", "both", "neither"])
  37. cdef class IntervalMixin:
  38. @property
  39. def closed_left(self):
  40. """
  41. Check if the interval is closed on the left side.
  42. For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
  43. Returns
  44. -------
  45. bool
  46. True if the Interval is closed on the left-side.
  47. See Also
  48. --------
  49. Interval.closed_right : Check if the interval is closed on the right side.
  50. Interval.open_left : Boolean inverse of closed_left.
  51. Examples
  52. --------
  53. >>> iv = pd.Interval(0, 5, closed='left')
  54. >>> iv.closed_left
  55. True
  56. >>> iv = pd.Interval(0, 5, closed='right')
  57. >>> iv.closed_left
  58. False
  59. """
  60. return self.closed in ("left", "both")
  61. @property
  62. def closed_right(self):
  63. """
  64. Check if the interval is closed on the right side.
  65. For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
  66. Returns
  67. -------
  68. bool
  69. True if the Interval is closed on the left-side.
  70. See Also
  71. --------
  72. Interval.closed_left : Check if the interval is closed on the left side.
  73. Interval.open_right : Boolean inverse of closed_right.
  74. Examples
  75. --------
  76. >>> iv = pd.Interval(0, 5, closed='both')
  77. >>> iv.closed_right
  78. True
  79. >>> iv = pd.Interval(0, 5, closed='left')
  80. >>> iv.closed_right
  81. False
  82. """
  83. return self.closed in ("right", "both")
  84. @property
  85. def open_left(self):
  86. """
  87. Check if the interval is open on the left side.
  88. For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
  89. Returns
  90. -------
  91. bool
  92. True if the Interval is not closed on the left-side.
  93. See Also
  94. --------
  95. Interval.open_right : Check if the interval is open on the right side.
  96. Interval.closed_left : Boolean inverse of open_left.
  97. Examples
  98. --------
  99. >>> iv = pd.Interval(0, 5, closed='neither')
  100. >>> iv.open_left
  101. True
  102. >>> iv = pd.Interval(0, 5, closed='both')
  103. >>> iv.open_left
  104. False
  105. """
  106. return not self.closed_left
  107. @property
  108. def open_right(self):
  109. """
  110. Check if the interval is open on the right side.
  111. For the meaning of `closed` and `open` see :class:`~pandas.Interval`.
  112. Returns
  113. -------
  114. bool
  115. True if the Interval is not closed on the left-side.
  116. See Also
  117. --------
  118. Interval.open_left : Check if the interval is open on the left side.
  119. Interval.closed_right : Boolean inverse of open_right.
  120. Examples
  121. --------
  122. >>> iv = pd.Interval(0, 5, closed='left')
  123. >>> iv.open_right
  124. True
  125. >>> iv = pd.Interval(0, 5)
  126. >>> iv.open_right
  127. False
  128. """
  129. return not self.closed_right
  130. @property
  131. def mid(self):
  132. """
  133. Return the midpoint of the Interval.
  134. Examples
  135. --------
  136. >>> iv = pd.Interval(0, 5)
  137. >>> iv.mid
  138. 2.5
  139. """
  140. try:
  141. return 0.5 * (self.left + self.right)
  142. except TypeError:
  143. # datetime safe version
  144. return self.left + 0.5 * self.length
  145. @property
  146. def length(self):
  147. """
  148. Return the length of the Interval.
  149. See Also
  150. --------
  151. Interval.is_empty : Indicates if an interval contains no points.
  152. """
  153. return self.right - self.left
  154. @property
  155. def is_empty(self):
  156. """
  157. Indicates if an interval is empty, meaning it contains no points.
  158. Returns
  159. -------
  160. bool or ndarray
  161. A boolean indicating if a scalar :class:`Interval` is empty, or a
  162. boolean ``ndarray`` positionally indicating if an ``Interval`` in
  163. an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is
  164. empty.
  165. See Also
  166. --------
  167. Interval.length : Return the length of the Interval.
  168. Examples
  169. --------
  170. An :class:`Interval` that contains points is not empty:
  171. >>> pd.Interval(0, 1, closed='right').is_empty
  172. False
  173. An ``Interval`` that does not contain any points is empty:
  174. >>> pd.Interval(0, 0, closed='right').is_empty
  175. True
  176. >>> pd.Interval(0, 0, closed='left').is_empty
  177. True
  178. >>> pd.Interval(0, 0, closed='neither').is_empty
  179. True
  180. An ``Interval`` that contains a single point is not empty:
  181. >>> pd.Interval(0, 0, closed='both').is_empty
  182. False
  183. An :class:`~arrays.IntervalArray` or :class:`IntervalIndex` returns a
  184. boolean ``ndarray`` positionally indicating if an ``Interval`` is
  185. empty:
  186. >>> ivs = [pd.Interval(0, 0, closed='neither'),
  187. ... pd.Interval(1, 2, closed='neither')]
  188. >>> pd.arrays.IntervalArray(ivs).is_empty
  189. array([ True, False])
  190. Missing values are not considered empty:
  191. >>> ivs = [pd.Interval(0, 0, closed='neither'), np.nan]
  192. >>> pd.IntervalIndex(ivs).is_empty
  193. array([ True, False])
  194. """
  195. return (self.right == self.left) & (self.closed != "both")
  196. def _check_closed_matches(self, other, name="other"):
  197. """
  198. Check if the closed attribute of `other` matches.
  199. Note that 'left' and 'right' are considered different from 'both'.
  200. Parameters
  201. ----------
  202. other : Interval, IntervalIndex, IntervalArray
  203. name : str
  204. Name to use for 'other' in the error message.
  205. Raises
  206. ------
  207. ValueError
  208. When `other` is not closed exactly the same as self.
  209. """
  210. if self.closed != other.closed:
  211. raise ValueError(f"'{name}.closed' is {repr(other.closed)}, "
  212. f"expected {repr(self.closed)}.")
  213. cdef bint _interval_like(other):
  214. return (hasattr(other, "left")
  215. and hasattr(other, "right")
  216. and hasattr(other, "closed"))
  217. cdef class Interval(IntervalMixin):
  218. """
  219. Immutable object implementing an Interval, a bounded slice-like interval.
  220. Parameters
  221. ----------
  222. left : orderable scalar
  223. Left bound for the interval.
  224. right : orderable scalar
  225. Right bound for the interval.
  226. closed : {'right', 'left', 'both', 'neither'}, default 'right'
  227. Whether the interval is closed on the left-side, right-side, both or
  228. neither. See the Notes for more detailed explanation.
  229. See Also
  230. --------
  231. IntervalIndex : An Index of Interval objects that are all closed on the
  232. same side.
  233. cut : Convert continuous data into discrete bins (Categorical
  234. of Interval objects).
  235. qcut : Convert continuous data into bins (Categorical of Interval objects)
  236. based on quantiles.
  237. Period : Represents a period of time.
  238. Notes
  239. -----
  240. The parameters `left` and `right` must be from the same type, you must be
  241. able to compare them and they must satisfy ``left <= right``.
  242. A closed interval (in mathematics denoted by square brackets) contains
  243. its endpoints, i.e. the closed interval ``[0, 5]`` is characterized by the
  244. conditions ``0 <= x <= 5``. This is what ``closed='both'`` stands for.
  245. An open interval (in mathematics denoted by parentheses) does not contain
  246. its endpoints, i.e. the open interval ``(0, 5)`` is characterized by the
  247. conditions ``0 < x < 5``. This is what ``closed='neither'`` stands for.
  248. Intervals can also be half-open or half-closed, i.e. ``[0, 5)`` is
  249. described by ``0 <= x < 5`` (``closed='left'``) and ``(0, 5]`` is
  250. described by ``0 < x <= 5`` (``closed='right'``).
  251. Examples
  252. --------
  253. It is possible to build Intervals of different types, like numeric ones:
  254. >>> iv = pd.Interval(left=0, right=5)
  255. >>> iv
  256. Interval(0, 5, closed='right')
  257. You can check if an element belongs to it, or if it contains another interval:
  258. >>> 2.5 in iv
  259. True
  260. >>> pd.Interval(left=2, right=5, closed='both') in iv
  261. True
  262. You can test the bounds (``closed='right'``, so ``0 < x <= 5``):
  263. >>> 0 in iv
  264. False
  265. >>> 5 in iv
  266. True
  267. >>> 0.0001 in iv
  268. True
  269. Calculate its length
  270. >>> iv.length
  271. 5
  272. You can operate with `+` and `*` over an Interval and the operation
  273. is applied to each of its bounds, so the result depends on the type
  274. of the bound elements
  275. >>> shifted_iv = iv + 3
  276. >>> shifted_iv
  277. Interval(3, 8, closed='right')
  278. >>> extended_iv = iv * 10.0
  279. >>> extended_iv
  280. Interval(0.0, 50.0, closed='right')
  281. To create a time interval you can use Timestamps as the bounds
  282. >>> year_2017 = pd.Interval(pd.Timestamp('2017-01-01 00:00:00'),
  283. ... pd.Timestamp('2018-01-01 00:00:00'),
  284. ... closed='left')
  285. >>> pd.Timestamp('2017-01-01 00:00') in year_2017
  286. True
  287. >>> year_2017.length
  288. Timedelta('365 days 00:00:00')
  289. """
  290. _typ = "interval"
  291. __array_priority__ = 1000
  292. cdef readonly object left
  293. """
  294. Left bound for the interval.
  295. """
  296. cdef readonly object right
  297. """
  298. Right bound for the interval.
  299. """
  300. cdef readonly str closed
  301. """
  302. String describing the inclusive side the intervals.
  303. Either ``left``, ``right``, ``both`` or ``neither``.
  304. """
  305. def __init__(self, left, right, str closed="right"):
  306. # note: it is faster to just do these checks than to use a special
  307. # constructor (__cinit__/__new__) to avoid them
  308. self._validate_endpoint(left)
  309. self._validate_endpoint(right)
  310. if closed not in VALID_CLOSED:
  311. raise ValueError(f"invalid option for 'closed': {closed}")
  312. if not left <= right:
  313. raise ValueError("left side of interval must be <= right side")
  314. if (isinstance(left, _Timestamp) and
  315. not tz_compare(left.tzinfo, right.tzinfo)):
  316. # GH 18538
  317. raise ValueError("left and right must have the same time zone, got "
  318. f"{repr(left.tzinfo)}' and {repr(right.tzinfo)}")
  319. self.left = left
  320. self.right = right
  321. self.closed = closed
  322. def _validate_endpoint(self, endpoint):
  323. # GH 23013
  324. if not (is_integer_object(endpoint) or is_float_object(endpoint) or
  325. isinstance(endpoint, (_Timestamp, _Timedelta))):
  326. raise ValueError("Only numeric, Timestamp and Timedelta endpoints "
  327. "are allowed when constructing an Interval.")
  328. def __hash__(self):
  329. return hash((self.left, self.right, self.closed))
  330. def __contains__(self, key) -> bool:
  331. if _interval_like(key):
  332. key_closed_left = key.closed in ("left", "both")
  333. key_closed_right = key.closed in ("right", "both")
  334. if self.open_left and key_closed_left:
  335. left_contained = self.left < key.left
  336. else:
  337. left_contained = self.left <= key.left
  338. if self.open_right and key_closed_right:
  339. right_contained = key.right < self.right
  340. else:
  341. right_contained = key.right <= self.right
  342. return left_contained and right_contained
  343. return ((self.left < key if self.open_left else self.left <= key) and
  344. (key < self.right if self.open_right else key <= self.right))
  345. def __richcmp__(self, other, op: int):
  346. if isinstance(other, Interval):
  347. self_tuple = (self.left, self.right, self.closed)
  348. other_tuple = (other.left, other.right, other.closed)
  349. return PyObject_RichCompare(self_tuple, other_tuple, op)
  350. elif util.is_array(other):
  351. return np.array(
  352. [PyObject_RichCompare(self, x, op) for x in other],
  353. dtype=bool,
  354. )
  355. return NotImplemented
  356. def __reduce__(self):
  357. args = (self.left, self.right, self.closed)
  358. return (type(self), args)
  359. def _repr_base(self):
  360. left = self.left
  361. right = self.right
  362. # TODO: need more general formatting methodology here
  363. if isinstance(left, _Timestamp) and isinstance(right, _Timestamp):
  364. left = left._short_repr
  365. right = right._short_repr
  366. return left, right
  367. def __repr__(self) -> str:
  368. left, right = self._repr_base()
  369. name = type(self).__name__
  370. repr_str = f"{name}({repr(left)}, {repr(right)}, closed={repr(self.closed)})"
  371. return repr_str
  372. def __str__(self) -> str:
  373. left, right = self._repr_base()
  374. start_symbol = "[" if self.closed_left else "("
  375. end_symbol = "]" if self.closed_right else ")"
  376. return f"{start_symbol}{left}, {right}{end_symbol}"
  377. def __add__(self, y):
  378. if (
  379. isinstance(y, numbers.Number)
  380. or PyDelta_Check(y)
  381. or is_timedelta64_object(y)
  382. ):
  383. return Interval(self.left + y, self.right + y, closed=self.closed)
  384. elif (
  385. # __radd__ pattern
  386. # TODO(cython3): remove this
  387. isinstance(y, Interval)
  388. and (
  389. isinstance(self, numbers.Number)
  390. or PyDelta_Check(self)
  391. or is_timedelta64_object(self)
  392. )
  393. ):
  394. return Interval(y.left + self, y.right + self, closed=y.closed)
  395. return NotImplemented
  396. def __radd__(self, other):
  397. if (
  398. isinstance(other, numbers.Number)
  399. or PyDelta_Check(other)
  400. or is_timedelta64_object(other)
  401. ):
  402. return Interval(self.left + other, self.right + other, closed=self.closed)
  403. return NotImplemented
  404. def __sub__(self, y):
  405. if (
  406. isinstance(y, numbers.Number)
  407. or PyDelta_Check(y)
  408. or is_timedelta64_object(y)
  409. ):
  410. return Interval(self.left - y, self.right - y, closed=self.closed)
  411. return NotImplemented
  412. def __mul__(self, y):
  413. if isinstance(y, numbers.Number):
  414. return Interval(self.left * y, self.right * y, closed=self.closed)
  415. elif isinstance(y, Interval) and isinstance(self, numbers.Number):
  416. # __radd__ semantics
  417. # TODO(cython3): remove this
  418. return Interval(y.left * self, y.right * self, closed=y.closed)
  419. return NotImplemented
  420. def __rmul__(self, other):
  421. if isinstance(other, numbers.Number):
  422. return Interval(self.left * other, self.right * other, closed=self.closed)
  423. return NotImplemented
  424. def __truediv__(self, y):
  425. if isinstance(y, numbers.Number):
  426. return Interval(self.left / y, self.right / y, closed=self.closed)
  427. return NotImplemented
  428. def __floordiv__(self, y):
  429. if isinstance(y, numbers.Number):
  430. return Interval(
  431. self.left // y, self.right // y, closed=self.closed)
  432. return NotImplemented
  433. def overlaps(self, other):
  434. """
  435. Check whether two Interval objects overlap.
  436. Two intervals overlap if they share a common point, including closed
  437. endpoints. Intervals that only have an open endpoint in common do not
  438. overlap.
  439. Parameters
  440. ----------
  441. other : Interval
  442. Interval to check against for an overlap.
  443. Returns
  444. -------
  445. bool
  446. True if the two intervals overlap.
  447. See Also
  448. --------
  449. IntervalArray.overlaps : The corresponding method for IntervalArray.
  450. IntervalIndex.overlaps : The corresponding method for IntervalIndex.
  451. Examples
  452. --------
  453. >>> i1 = pd.Interval(0, 2)
  454. >>> i2 = pd.Interval(1, 3)
  455. >>> i1.overlaps(i2)
  456. True
  457. >>> i3 = pd.Interval(4, 5)
  458. >>> i1.overlaps(i3)
  459. False
  460. Intervals that share closed endpoints overlap:
  461. >>> i4 = pd.Interval(0, 1, closed='both')
  462. >>> i5 = pd.Interval(1, 2, closed='both')
  463. >>> i4.overlaps(i5)
  464. True
  465. Intervals that only have an open endpoint in common do not overlap:
  466. >>> i6 = pd.Interval(1, 2, closed='neither')
  467. >>> i4.overlaps(i6)
  468. False
  469. """
  470. if not isinstance(other, Interval):
  471. raise TypeError("`other` must be an Interval, "
  472. f"got {type(other).__name__}")
  473. # equality is okay if both endpoints are closed (overlap at a point)
  474. op1 = le if (self.closed_left and other.closed_right) else lt
  475. op2 = le if (other.closed_left and self.closed_right) else lt
  476. # overlaps is equivalent negation of two interval being disjoint:
  477. # disjoint = (A.left > B.right) or (B.left > A.right)
  478. # (simplifying the negation allows this to be done in less operations)
  479. return op1(self.left, other.right) and op2(other.left, self.right)
  480. @cython.wraparound(False)
  481. @cython.boundscheck(False)
  482. def intervals_to_interval_bounds(ndarray intervals, bint validate_closed=True):
  483. """
  484. Parameters
  485. ----------
  486. intervals : ndarray
  487. Object array of Intervals / nulls.
  488. validate_closed: bool, default True
  489. Boolean indicating if all intervals must be closed on the same side.
  490. Mismatching closed will raise if True, else return None for closed.
  491. Returns
  492. -------
  493. tuple of
  494. left : ndarray
  495. right : ndarray
  496. closed: str
  497. """
  498. cdef:
  499. object closed = None, interval
  500. Py_ssize_t i, n = len(intervals)
  501. ndarray left, right
  502. bint seen_closed = False
  503. left = np.empty(n, dtype=intervals.dtype)
  504. right = np.empty(n, dtype=intervals.dtype)
  505. for i in range(n):
  506. interval = intervals[i]
  507. if interval is None or util.is_nan(interval):
  508. left[i] = np.nan
  509. right[i] = np.nan
  510. continue
  511. if not isinstance(interval, Interval):
  512. raise TypeError(f"type {type(interval)} with value "
  513. f"{interval} is not an interval")
  514. left[i] = interval.left
  515. right[i] = interval.right
  516. if not seen_closed:
  517. seen_closed = True
  518. closed = interval.closed
  519. elif closed != interval.closed:
  520. closed = None
  521. if validate_closed:
  522. raise ValueError("intervals must all be closed on the same side")
  523. return left, right, closed
  524. include "intervaltree.pxi"