test_interval.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934
  1. from itertools import permutations
  2. import re
  3. import numpy as np
  4. import pytest
  5. import pandas as pd
  6. from pandas import (
  7. Index,
  8. Interval,
  9. IntervalIndex,
  10. Timedelta,
  11. Timestamp,
  12. date_range,
  13. interval_range,
  14. isna,
  15. notna,
  16. timedelta_range,
  17. )
  18. import pandas._testing as tm
  19. import pandas.core.common as com
  20. @pytest.fixture(params=[None, "foo"])
  21. def name(request):
  22. return request.param
  23. class TestIntervalIndex:
  24. index = IntervalIndex.from_arrays([0, 1], [1, 2])
  25. def create_index(self, closed="right"):
  26. return IntervalIndex.from_breaks(range(11), closed=closed)
  27. def create_index_with_nan(self, closed="right"):
  28. mask = [True, False] + [True] * 8
  29. return IntervalIndex.from_arrays(
  30. np.where(mask, np.arange(10), np.nan),
  31. np.where(mask, np.arange(1, 11), np.nan),
  32. closed=closed,
  33. )
  34. def test_properties(self, closed):
  35. index = self.create_index(closed=closed)
  36. assert len(index) == 10
  37. assert index.size == 10
  38. assert index.shape == (10,)
  39. tm.assert_index_equal(index.left, Index(np.arange(10, dtype=np.int64)))
  40. tm.assert_index_equal(index.right, Index(np.arange(1, 11, dtype=np.int64)))
  41. tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5, dtype=np.float64)))
  42. assert index.closed == closed
  43. ivs = [
  44. Interval(left, right, closed)
  45. for left, right in zip(range(10), range(1, 11))
  46. ]
  47. expected = np.array(ivs, dtype=object)
  48. tm.assert_numpy_array_equal(np.asarray(index), expected)
  49. # with nans
  50. index = self.create_index_with_nan(closed=closed)
  51. assert len(index) == 10
  52. assert index.size == 10
  53. assert index.shape == (10,)
  54. expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
  55. expected_right = expected_left + 1
  56. expected_mid = expected_left + 0.5
  57. tm.assert_index_equal(index.left, expected_left)
  58. tm.assert_index_equal(index.right, expected_right)
  59. tm.assert_index_equal(index.mid, expected_mid)
  60. assert index.closed == closed
  61. ivs = [
  62. Interval(left, right, closed) if notna(left) else np.nan
  63. for left, right in zip(expected_left, expected_right)
  64. ]
  65. expected = np.array(ivs, dtype=object)
  66. tm.assert_numpy_array_equal(np.asarray(index), expected)
  67. @pytest.mark.parametrize(
  68. "breaks",
  69. [
  70. [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
  71. [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
  72. pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
  73. pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5H", "6D"]),
  74. ],
  75. )
  76. def test_length(self, closed, breaks):
  77. # GH 18789
  78. index = IntervalIndex.from_breaks(breaks, closed=closed)
  79. result = index.length
  80. expected = Index(iv.length for iv in index)
  81. tm.assert_index_equal(result, expected)
  82. # with NA
  83. index = index.insert(1, np.nan)
  84. result = index.length
  85. expected = Index(iv.length if notna(iv) else iv for iv in index)
  86. tm.assert_index_equal(result, expected)
  87. def test_with_nans(self, closed):
  88. index = self.create_index(closed=closed)
  89. assert index.hasnans is False
  90. result = index.isna()
  91. expected = np.zeros(len(index), dtype=bool)
  92. tm.assert_numpy_array_equal(result, expected)
  93. result = index.notna()
  94. expected = np.ones(len(index), dtype=bool)
  95. tm.assert_numpy_array_equal(result, expected)
  96. index = self.create_index_with_nan(closed=closed)
  97. assert index.hasnans is True
  98. result = index.isna()
  99. expected = np.array([False, True] + [False] * (len(index) - 2))
  100. tm.assert_numpy_array_equal(result, expected)
  101. result = index.notna()
  102. expected = np.array([True, False] + [True] * (len(index) - 2))
  103. tm.assert_numpy_array_equal(result, expected)
  104. def test_copy(self, closed):
  105. expected = self.create_index(closed=closed)
  106. result = expected.copy()
  107. assert result.equals(expected)
  108. result = expected.copy(deep=True)
  109. assert result.equals(expected)
  110. assert result.left is not expected.left
  111. def test_ensure_copied_data(self, closed):
  112. # exercise the copy flag in the constructor
  113. # not copying
  114. index = self.create_index(closed=closed)
  115. result = IntervalIndex(index, copy=False)
  116. tm.assert_numpy_array_equal(
  117. index.left.values, result.left.values, check_same="same"
  118. )
  119. tm.assert_numpy_array_equal(
  120. index.right.values, result.right.values, check_same="same"
  121. )
  122. # by-definition make a copy
  123. result = IntervalIndex(np.array(index), copy=False)
  124. tm.assert_numpy_array_equal(
  125. index.left.values, result.left.values, check_same="copy"
  126. )
  127. tm.assert_numpy_array_equal(
  128. index.right.values, result.right.values, check_same="copy"
  129. )
  130. def test_delete(self, closed):
  131. breaks = np.arange(1, 11, dtype=np.int64)
  132. expected = IntervalIndex.from_breaks(breaks, closed=closed)
  133. result = self.create_index(closed=closed).delete(0)
  134. tm.assert_index_equal(result, expected)
  135. @pytest.mark.parametrize(
  136. "data",
  137. [
  138. interval_range(0, periods=10, closed="neither"),
  139. interval_range(1.7, periods=8, freq=2.5, closed="both"),
  140. interval_range(Timestamp("20170101"), periods=12, closed="left"),
  141. interval_range(Timedelta("1 day"), periods=6, closed="right"),
  142. ],
  143. )
  144. def test_insert(self, data):
  145. item = data[0]
  146. idx_item = IntervalIndex([item])
  147. # start
  148. expected = idx_item.append(data)
  149. result = data.insert(0, item)
  150. tm.assert_index_equal(result, expected)
  151. # end
  152. expected = data.append(idx_item)
  153. result = data.insert(len(data), item)
  154. tm.assert_index_equal(result, expected)
  155. # mid
  156. expected = data[:3].append(idx_item).append(data[3:])
  157. result = data.insert(3, item)
  158. tm.assert_index_equal(result, expected)
  159. # invalid type
  160. res = data.insert(1, "foo")
  161. expected = data.astype(object).insert(1, "foo")
  162. tm.assert_index_equal(res, expected)
  163. msg = "can only insert Interval objects and NA into an IntervalArray"
  164. with pytest.raises(TypeError, match=msg):
  165. data._data.insert(1, "foo")
  166. # invalid closed
  167. msg = "'value.closed' is 'left', expected 'right'."
  168. for closed in {"left", "right", "both", "neither"} - {item.closed}:
  169. msg = f"'value.closed' is '{closed}', expected '{item.closed}'."
  170. bad_item = Interval(item.left, item.right, closed=closed)
  171. res = data.insert(1, bad_item)
  172. expected = data.astype(object).insert(1, bad_item)
  173. tm.assert_index_equal(res, expected)
  174. with pytest.raises(ValueError, match=msg):
  175. data._data.insert(1, bad_item)
  176. # GH 18295 (test missing)
  177. na_idx = IntervalIndex([np.nan], closed=data.closed)
  178. for na in [np.nan, None, pd.NA]:
  179. expected = data[:1].append(na_idx).append(data[1:])
  180. result = data.insert(1, na)
  181. tm.assert_index_equal(result, expected)
  182. if data.left.dtype.kind not in ["m", "M"]:
  183. # trying to insert pd.NaT into a numeric-dtyped Index should cast
  184. expected = data.astype(object).insert(1, pd.NaT)
  185. msg = "can only insert Interval objects and NA into an IntervalArray"
  186. with pytest.raises(TypeError, match=msg):
  187. data._data.insert(1, pd.NaT)
  188. result = data.insert(1, pd.NaT)
  189. tm.assert_index_equal(result, expected)
  190. def test_is_unique_interval(self, closed):
  191. """
  192. Interval specific tests for is_unique in addition to base class tests
  193. """
  194. # unique overlapping - distinct endpoints
  195. idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
  196. assert idx.is_unique is True
  197. # unique overlapping - shared endpoints
  198. idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
  199. assert idx.is_unique is True
  200. # unique nested
  201. idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
  202. assert idx.is_unique is True
  203. # unique NaN
  204. idx = IntervalIndex.from_tuples([(np.NaN, np.NaN)], closed=closed)
  205. assert idx.is_unique is True
  206. # non-unique NaN
  207. idx = IntervalIndex.from_tuples(
  208. [(np.NaN, np.NaN), (np.NaN, np.NaN)], closed=closed
  209. )
  210. assert idx.is_unique is False
  211. def test_monotonic(self, closed):
  212. # increasing non-overlapping
  213. idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed)
  214. assert idx.is_monotonic_increasing is True
  215. assert idx._is_strictly_monotonic_increasing is True
  216. assert idx.is_monotonic_decreasing is False
  217. assert idx._is_strictly_monotonic_decreasing is False
  218. # decreasing non-overlapping
  219. idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed)
  220. assert idx.is_monotonic_increasing is False
  221. assert idx._is_strictly_monotonic_increasing is False
  222. assert idx.is_monotonic_decreasing is True
  223. assert idx._is_strictly_monotonic_decreasing is True
  224. # unordered non-overlapping
  225. idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed)
  226. assert idx.is_monotonic_increasing is False
  227. assert idx._is_strictly_monotonic_increasing is False
  228. assert idx.is_monotonic_decreasing is False
  229. assert idx._is_strictly_monotonic_decreasing is False
  230. # increasing overlapping
  231. idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed)
  232. assert idx.is_monotonic_increasing is True
  233. assert idx._is_strictly_monotonic_increasing is True
  234. assert idx.is_monotonic_decreasing is False
  235. assert idx._is_strictly_monotonic_decreasing is False
  236. # decreasing overlapping
  237. idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed)
  238. assert idx.is_monotonic_increasing is False
  239. assert idx._is_strictly_monotonic_increasing is False
  240. assert idx.is_monotonic_decreasing is True
  241. assert idx._is_strictly_monotonic_decreasing is True
  242. # unordered overlapping
  243. idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed)
  244. assert idx.is_monotonic_increasing is False
  245. assert idx._is_strictly_monotonic_increasing is False
  246. assert idx.is_monotonic_decreasing is False
  247. assert idx._is_strictly_monotonic_decreasing is False
  248. # increasing overlapping shared endpoints
  249. idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
  250. assert idx.is_monotonic_increasing is True
  251. assert idx._is_strictly_monotonic_increasing is True
  252. assert idx.is_monotonic_decreasing is False
  253. assert idx._is_strictly_monotonic_decreasing is False
  254. # decreasing overlapping shared endpoints
  255. idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
  256. assert idx.is_monotonic_increasing is False
  257. assert idx._is_strictly_monotonic_increasing is False
  258. assert idx.is_monotonic_decreasing is True
  259. assert idx._is_strictly_monotonic_decreasing is True
  260. # stationary
  261. idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
  262. assert idx.is_monotonic_increasing is True
  263. assert idx._is_strictly_monotonic_increasing is False
  264. assert idx.is_monotonic_decreasing is True
  265. assert idx._is_strictly_monotonic_decreasing is False
  266. # empty
  267. idx = IntervalIndex([], closed=closed)
  268. assert idx.is_monotonic_increasing is True
  269. assert idx._is_strictly_monotonic_increasing is True
  270. assert idx.is_monotonic_decreasing is True
  271. assert idx._is_strictly_monotonic_decreasing is True
  272. def test_is_monotonic_with_nans(self):
  273. # GH#41831
  274. index = IntervalIndex([np.nan, np.nan])
  275. assert not index.is_monotonic_increasing
  276. assert not index._is_strictly_monotonic_increasing
  277. assert not index.is_monotonic_increasing
  278. assert not index._is_strictly_monotonic_decreasing
  279. assert not index.is_monotonic_decreasing
  280. def test_get_item(self, closed):
  281. i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
  282. assert i[0] == Interval(0.0, 1.0, closed=closed)
  283. assert i[1] == Interval(1.0, 2.0, closed=closed)
  284. assert isna(i[2])
  285. result = i[0:1]
  286. expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed)
  287. tm.assert_index_equal(result, expected)
  288. result = i[0:2]
  289. expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed)
  290. tm.assert_index_equal(result, expected)
  291. result = i[1:3]
  292. expected = IntervalIndex.from_arrays(
  293. (1.0, np.nan), (2.0, np.nan), closed=closed
  294. )
  295. tm.assert_index_equal(result, expected)
  296. @pytest.mark.parametrize(
  297. "breaks",
  298. [
  299. date_range("20180101", periods=4),
  300. date_range("20180101", periods=4, tz="US/Eastern"),
  301. timedelta_range("0 days", periods=4),
  302. ],
  303. ids=lambda x: str(x.dtype),
  304. )
  305. def test_maybe_convert_i8(self, breaks):
  306. # GH 20636
  307. index = IntervalIndex.from_breaks(breaks)
  308. # intervalindex
  309. result = index._maybe_convert_i8(index)
  310. expected = IntervalIndex.from_breaks(breaks.asi8)
  311. tm.assert_index_equal(result, expected)
  312. # interval
  313. interval = Interval(breaks[0], breaks[1])
  314. result = index._maybe_convert_i8(interval)
  315. expected = Interval(breaks[0]._value, breaks[1]._value)
  316. assert result == expected
  317. # datetimelike index
  318. result = index._maybe_convert_i8(breaks)
  319. expected = Index(breaks.asi8)
  320. tm.assert_index_equal(result, expected)
  321. # datetimelike scalar
  322. result = index._maybe_convert_i8(breaks[0])
  323. expected = breaks[0]._value
  324. assert result == expected
  325. # list-like of datetimelike scalars
  326. result = index._maybe_convert_i8(list(breaks))
  327. expected = Index(breaks.asi8)
  328. tm.assert_index_equal(result, expected)
  329. @pytest.mark.parametrize(
  330. "breaks",
  331. [date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)],
  332. )
  333. def test_maybe_convert_i8_nat(self, breaks):
  334. # GH 20636
  335. index = IntervalIndex.from_breaks(breaks)
  336. to_convert = breaks._constructor([pd.NaT] * 3)
  337. expected = Index([np.nan] * 3, dtype=np.float64)
  338. result = index._maybe_convert_i8(to_convert)
  339. tm.assert_index_equal(result, expected)
  340. to_convert = to_convert.insert(0, breaks[0])
  341. expected = expected.insert(0, float(breaks[0]._value))
  342. result = index._maybe_convert_i8(to_convert)
  343. tm.assert_index_equal(result, expected)
  344. @pytest.mark.parametrize(
  345. "make_key",
  346. [lambda breaks: breaks, list],
  347. ids=["lambda", "list"],
  348. )
  349. def test_maybe_convert_i8_numeric(self, make_key, any_real_numpy_dtype):
  350. # GH 20636
  351. breaks = np.arange(5, dtype=any_real_numpy_dtype)
  352. index = IntervalIndex.from_breaks(breaks)
  353. key = make_key(breaks)
  354. result = index._maybe_convert_i8(key)
  355. kind = breaks.dtype.kind
  356. expected_dtype = {"i": np.int64, "u": np.uint64, "f": np.float64}[kind]
  357. expected = Index(key, dtype=expected_dtype)
  358. tm.assert_index_equal(result, expected)
  359. @pytest.mark.parametrize(
  360. "make_key",
  361. [
  362. IntervalIndex.from_breaks,
  363. lambda breaks: Interval(breaks[0], breaks[1]),
  364. lambda breaks: breaks[0],
  365. ],
  366. ids=["IntervalIndex", "Interval", "scalar"],
  367. )
  368. def test_maybe_convert_i8_numeric_identical(self, make_key, any_real_numpy_dtype):
  369. # GH 20636
  370. breaks = np.arange(5, dtype=any_real_numpy_dtype)
  371. index = IntervalIndex.from_breaks(breaks)
  372. key = make_key(breaks)
  373. # test if _maybe_convert_i8 won't change key if an Interval or IntervalIndex
  374. result = index._maybe_convert_i8(key)
  375. assert result is key
  376. @pytest.mark.parametrize(
  377. "breaks1, breaks2",
  378. permutations(
  379. [
  380. date_range("20180101", periods=4),
  381. date_range("20180101", periods=4, tz="US/Eastern"),
  382. timedelta_range("0 days", periods=4),
  383. ],
  384. 2,
  385. ),
  386. ids=lambda x: str(x.dtype),
  387. )
  388. @pytest.mark.parametrize(
  389. "make_key",
  390. [
  391. IntervalIndex.from_breaks,
  392. lambda breaks: Interval(breaks[0], breaks[1]),
  393. lambda breaks: breaks,
  394. lambda breaks: breaks[0],
  395. list,
  396. ],
  397. ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
  398. )
  399. def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
  400. # GH 20636
  401. index = IntervalIndex.from_breaks(breaks1)
  402. key = make_key(breaks2)
  403. msg = (
  404. f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
  405. f"values of dtype {breaks2.dtype}"
  406. )
  407. msg = re.escape(msg)
  408. with pytest.raises(ValueError, match=msg):
  409. index._maybe_convert_i8(key)
  410. def test_contains_method(self):
  411. # can select values that are IN the range of a value
  412. i = IntervalIndex.from_arrays([0, 1], [1, 2])
  413. expected = np.array([False, False], dtype="bool")
  414. actual = i.contains(0)
  415. tm.assert_numpy_array_equal(actual, expected)
  416. actual = i.contains(3)
  417. tm.assert_numpy_array_equal(actual, expected)
  418. expected = np.array([True, False], dtype="bool")
  419. actual = i.contains(0.5)
  420. tm.assert_numpy_array_equal(actual, expected)
  421. actual = i.contains(1)
  422. tm.assert_numpy_array_equal(actual, expected)
  423. # __contains__ not implemented for "interval in interval", follow
  424. # that for the contains method for now
  425. with pytest.raises(
  426. NotImplementedError, match="contains not implemented for two"
  427. ):
  428. i.contains(Interval(0, 1))
  429. def test_dropna(self, closed):
  430. expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed)
  431. ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
  432. result = ii.dropna()
  433. tm.assert_index_equal(result, expected)
  434. ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed)
  435. result = ii.dropna()
  436. tm.assert_index_equal(result, expected)
  437. def test_non_contiguous(self, closed):
  438. index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
  439. target = [0.5, 1.5, 2.5]
  440. actual = index.get_indexer(target)
  441. expected = np.array([0, -1, 1], dtype="intp")
  442. tm.assert_numpy_array_equal(actual, expected)
  443. assert 1.5 not in index
  444. def test_isin(self, closed):
  445. index = self.create_index(closed=closed)
  446. expected = np.array([True] + [False] * (len(index) - 1))
  447. result = index.isin(index[:1])
  448. tm.assert_numpy_array_equal(result, expected)
  449. result = index.isin([index[0]])
  450. tm.assert_numpy_array_equal(result, expected)
  451. other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
  452. expected = np.array([True] * (len(index) - 1) + [False])
  453. result = index.isin(other)
  454. tm.assert_numpy_array_equal(result, expected)
  455. result = index.isin(other.tolist())
  456. tm.assert_numpy_array_equal(result, expected)
  457. for other_closed in ["right", "left", "both", "neither"]:
  458. other = self.create_index(closed=other_closed)
  459. expected = np.repeat(closed == other_closed, len(index))
  460. result = index.isin(other)
  461. tm.assert_numpy_array_equal(result, expected)
  462. result = index.isin(other.tolist())
  463. tm.assert_numpy_array_equal(result, expected)
  464. def test_comparison(self):
  465. actual = Interval(0, 1) < self.index
  466. expected = np.array([False, True])
  467. tm.assert_numpy_array_equal(actual, expected)
  468. actual = Interval(0.5, 1.5) < self.index
  469. expected = np.array([False, True])
  470. tm.assert_numpy_array_equal(actual, expected)
  471. actual = self.index > Interval(0.5, 1.5)
  472. tm.assert_numpy_array_equal(actual, expected)
  473. actual = self.index == self.index
  474. expected = np.array([True, True])
  475. tm.assert_numpy_array_equal(actual, expected)
  476. actual = self.index <= self.index
  477. tm.assert_numpy_array_equal(actual, expected)
  478. actual = self.index >= self.index
  479. tm.assert_numpy_array_equal(actual, expected)
  480. actual = self.index < self.index
  481. expected = np.array([False, False])
  482. tm.assert_numpy_array_equal(actual, expected)
  483. actual = self.index > self.index
  484. tm.assert_numpy_array_equal(actual, expected)
  485. actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
  486. tm.assert_numpy_array_equal(actual, expected)
  487. actual = self.index == self.index.values
  488. tm.assert_numpy_array_equal(actual, np.array([True, True]))
  489. actual = self.index.values == self.index
  490. tm.assert_numpy_array_equal(actual, np.array([True, True]))
  491. actual = self.index <= self.index.values
  492. tm.assert_numpy_array_equal(actual, np.array([True, True]))
  493. actual = self.index != self.index.values
  494. tm.assert_numpy_array_equal(actual, np.array([False, False]))
  495. actual = self.index > self.index.values
  496. tm.assert_numpy_array_equal(actual, np.array([False, False]))
  497. actual = self.index.values > self.index
  498. tm.assert_numpy_array_equal(actual, np.array([False, False]))
  499. # invalid comparisons
  500. actual = self.index == 0
  501. tm.assert_numpy_array_equal(actual, np.array([False, False]))
  502. actual = self.index == self.index.left
  503. tm.assert_numpy_array_equal(actual, np.array([False, False]))
  504. msg = "|".join(
  505. [
  506. "not supported between instances of 'int' and '.*.Interval'",
  507. r"Invalid comparison between dtype=interval\[int64, right\] and ",
  508. ]
  509. )
  510. with pytest.raises(TypeError, match=msg):
  511. self.index > 0
  512. with pytest.raises(TypeError, match=msg):
  513. self.index <= 0
  514. with pytest.raises(TypeError, match=msg):
  515. self.index > np.arange(2)
  516. msg = "Lengths must match to compare"
  517. with pytest.raises(ValueError, match=msg):
  518. self.index > np.arange(3)
  519. def test_missing_values(self, closed):
  520. idx = Index(
  521. [np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)]
  522. )
  523. idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed)
  524. assert idx.equals(idx2)
  525. msg = (
  526. "missing values must be missing in the same location both left "
  527. "and right sides"
  528. )
  529. with pytest.raises(ValueError, match=msg):
  530. IntervalIndex.from_arrays(
  531. [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed
  532. )
  533. tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))
  534. def test_sort_values(self, closed):
  535. index = self.create_index(closed=closed)
  536. result = index.sort_values()
  537. tm.assert_index_equal(result, index)
  538. result = index.sort_values(ascending=False)
  539. tm.assert_index_equal(result, index[::-1])
  540. # with nan
  541. index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])
  542. result = index.sort_values()
  543. expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
  544. tm.assert_index_equal(result, expected)
  545. result = index.sort_values(ascending=False, na_position="first")
  546. expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
  547. tm.assert_index_equal(result, expected)
  548. @pytest.mark.parametrize("tz", [None, "US/Eastern"])
  549. def test_datetime(self, tz):
  550. start = Timestamp("2000-01-01", tz=tz)
  551. dates = date_range(start=start, periods=10)
  552. index = IntervalIndex.from_breaks(dates)
  553. # test mid
  554. start = Timestamp("2000-01-01T12:00", tz=tz)
  555. expected = date_range(start=start, periods=9)
  556. tm.assert_index_equal(index.mid, expected)
  557. # __contains__ doesn't check individual points
  558. assert Timestamp("2000-01-01", tz=tz) not in index
  559. assert Timestamp("2000-01-01T12", tz=tz) not in index
  560. assert Timestamp("2000-01-02", tz=tz) not in index
  561. iv_true = Interval(
  562. Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)
  563. )
  564. iv_false = Interval(
  565. Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)
  566. )
  567. assert iv_true in index
  568. assert iv_false not in index
  569. # .contains does check individual points
  570. assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
  571. assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
  572. assert index.contains(Timestamp("2000-01-02", tz=tz)).any()
  573. # test get_indexer
  574. start = Timestamp("1999-12-31T12:00", tz=tz)
  575. target = date_range(start=start, periods=7, freq="12H")
  576. actual = index.get_indexer(target)
  577. expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
  578. tm.assert_numpy_array_equal(actual, expected)
  579. start = Timestamp("2000-01-08T18:00", tz=tz)
  580. target = date_range(start=start, periods=7, freq="6H")
  581. actual = index.get_indexer(target)
  582. expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
  583. tm.assert_numpy_array_equal(actual, expected)
  584. def test_append(self, closed):
  585. index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
  586. index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)
  587. result = index1.append(index2)
  588. expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed)
  589. tm.assert_index_equal(result, expected)
  590. result = index1.append([index1, index2])
  591. expected = IntervalIndex.from_arrays(
  592. [0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed
  593. )
  594. tm.assert_index_equal(result, expected)
  595. for other_closed in {"left", "right", "both", "neither"} - {closed}:
  596. index_other_closed = IntervalIndex.from_arrays(
  597. [0, 1], [1, 2], closed=other_closed
  598. )
  599. result = index1.append(index_other_closed)
  600. expected = index1.astype(object).append(index_other_closed.astype(object))
  601. tm.assert_index_equal(result, expected)
  602. def test_is_non_overlapping_monotonic(self, closed):
  603. # Should be True in all cases
  604. tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
  605. idx = IntervalIndex.from_tuples(tpls, closed=closed)
  606. assert idx.is_non_overlapping_monotonic is True
  607. idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
  608. assert idx.is_non_overlapping_monotonic is True
  609. # Should be False in all cases (overlapping)
  610. tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
  611. idx = IntervalIndex.from_tuples(tpls, closed=closed)
  612. assert idx.is_non_overlapping_monotonic is False
  613. idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
  614. assert idx.is_non_overlapping_monotonic is False
  615. # Should be False in all cases (non-monotonic)
  616. tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
  617. idx = IntervalIndex.from_tuples(tpls, closed=closed)
  618. assert idx.is_non_overlapping_monotonic is False
  619. idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
  620. assert idx.is_non_overlapping_monotonic is False
  621. # Should be False for closed='both', otherwise True (GH16560)
  622. if closed == "both":
  623. idx = IntervalIndex.from_breaks(range(4), closed=closed)
  624. assert idx.is_non_overlapping_monotonic is False
  625. else:
  626. idx = IntervalIndex.from_breaks(range(4), closed=closed)
  627. assert idx.is_non_overlapping_monotonic is True
  628. @pytest.mark.parametrize(
  629. "start, shift, na_value",
  630. [
  631. (0, 1, np.nan),
  632. (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
  633. (Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
  634. ],
  635. )
  636. def test_is_overlapping(self, start, shift, na_value, closed):
  637. # GH 23309
  638. # see test_interval_tree.py for extensive tests; interface tests here
  639. # non-overlapping
  640. tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)]
  641. index = IntervalIndex.from_tuples(tuples, closed=closed)
  642. assert index.is_overlapping is False
  643. # non-overlapping with NA
  644. tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
  645. index = IntervalIndex.from_tuples(tuples, closed=closed)
  646. assert index.is_overlapping is False
  647. # overlapping
  648. tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)]
  649. index = IntervalIndex.from_tuples(tuples, closed=closed)
  650. assert index.is_overlapping is True
  651. # overlapping with NA
  652. tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
  653. index = IntervalIndex.from_tuples(tuples, closed=closed)
  654. assert index.is_overlapping is True
  655. # common endpoints
  656. tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)]
  657. index = IntervalIndex.from_tuples(tuples, closed=closed)
  658. result = index.is_overlapping
  659. expected = closed == "both"
  660. assert result is expected
  661. # common endpoints with NA
  662. tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
  663. index = IntervalIndex.from_tuples(tuples, closed=closed)
  664. result = index.is_overlapping
  665. assert result is expected
  666. # intervals with duplicate left values
  667. a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85]
  668. b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
  669. index = IntervalIndex.from_arrays(a, b, closed="right")
  670. result = index.is_overlapping
  671. assert result is False
  672. @pytest.mark.parametrize(
  673. "tuples",
  674. [
  675. list(zip(range(10), range(1, 11))),
  676. list(
  677. zip(
  678. date_range("20170101", periods=10),
  679. date_range("20170101", periods=10),
  680. )
  681. ),
  682. list(
  683. zip(
  684. timedelta_range("0 days", periods=10),
  685. timedelta_range("1 day", periods=10),
  686. )
  687. ),
  688. ],
  689. )
  690. def test_to_tuples(self, tuples):
  691. # GH 18756
  692. idx = IntervalIndex.from_tuples(tuples)
  693. result = idx.to_tuples()
  694. expected = Index(com.asarray_tuplesafe(tuples))
  695. tm.assert_index_equal(result, expected)
  696. @pytest.mark.parametrize(
  697. "tuples",
  698. [
  699. list(zip(range(10), range(1, 11))) + [np.nan],
  700. list(
  701. zip(
  702. date_range("20170101", periods=10),
  703. date_range("20170101", periods=10),
  704. )
  705. )
  706. + [np.nan],
  707. list(
  708. zip(
  709. timedelta_range("0 days", periods=10),
  710. timedelta_range("1 day", periods=10),
  711. )
  712. )
  713. + [np.nan],
  714. ],
  715. )
  716. @pytest.mark.parametrize("na_tuple", [True, False])
  717. def test_to_tuples_na(self, tuples, na_tuple):
  718. # GH 18756
  719. idx = IntervalIndex.from_tuples(tuples)
  720. result = idx.to_tuples(na_tuple=na_tuple)
  721. # check the non-NA portion
  722. expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
  723. result_notna = result[:-1]
  724. tm.assert_index_equal(result_notna, expected_notna)
  725. # check the NA portion
  726. result_na = result[-1]
  727. if na_tuple:
  728. assert isinstance(result_na, tuple)
  729. assert len(result_na) == 2
  730. assert all(isna(x) for x in result_na)
  731. else:
  732. assert isna(result_na)
  733. def test_nbytes(self):
  734. # GH 19209
  735. left = np.arange(0, 4, dtype="i8")
  736. right = np.arange(1, 5, dtype="i8")
  737. result = IntervalIndex.from_arrays(left, right).nbytes
  738. expected = 64 # 4 * 8 * 2
  739. assert result == expected
  740. @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
  741. def test_set_closed(self, name, closed, new_closed):
  742. # GH 21670
  743. index = interval_range(0, 5, closed=closed, name=name)
  744. result = index.set_closed(new_closed)
  745. expected = interval_range(0, 5, closed=new_closed, name=name)
  746. tm.assert_index_equal(result, expected)
  747. @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
  748. def test_set_closed_errors(self, bad_closed):
  749. # GH 21670
  750. index = interval_range(0, 5)
  751. msg = f"invalid option for 'closed': {bad_closed}"
  752. with pytest.raises(ValueError, match=msg):
  753. index.set_closed(bad_closed)
  754. def test_is_all_dates(self):
  755. # GH 23576
  756. year_2017 = Interval(
  757. Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")
  758. )
  759. year_2017_index = IntervalIndex([year_2017])
  760. assert not year_2017_index._is_all_dates
  761. def test_dir():
  762. # GH#27571 dir(interval_index) should not raise
  763. index = IntervalIndex.from_arrays([0, 1], [1, 2])
  764. result = dir(index)
  765. assert "str" not in result
  766. def test_searchsorted_different_argument_classes(listlike_box):
  767. # https://github.com/pandas-dev/pandas/issues/32762
  768. values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
  769. result = values.searchsorted(listlike_box(values))
  770. expected = np.array([0, 1], dtype=result.dtype)
  771. tm.assert_numpy_array_equal(result, expected)
  772. result = values._data.searchsorted(listlike_box(values))
  773. tm.assert_numpy_array_equal(result, expected)
  774. @pytest.mark.parametrize(
  775. "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
  776. )
  777. def test_searchsorted_invalid_argument(arg):
  778. values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
  779. msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and "
  780. with pytest.raises(TypeError, match=msg):
  781. values.searchsorted(arg)