123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934 |
- from itertools import permutations
- import re
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import (
- Index,
- Interval,
- IntervalIndex,
- Timedelta,
- Timestamp,
- date_range,
- interval_range,
- isna,
- notna,
- timedelta_range,
- )
- import pandas._testing as tm
- import pandas.core.common as com
- @pytest.fixture(params=[None, "foo"])
- def name(request):
- return request.param
- class TestIntervalIndex:
- index = IntervalIndex.from_arrays([0, 1], [1, 2])
- def create_index(self, closed="right"):
- return IntervalIndex.from_breaks(range(11), closed=closed)
- def create_index_with_nan(self, closed="right"):
- mask = [True, False] + [True] * 8
- return IntervalIndex.from_arrays(
- np.where(mask, np.arange(10), np.nan),
- np.where(mask, np.arange(1, 11), np.nan),
- closed=closed,
- )
- def test_properties(self, closed):
- index = self.create_index(closed=closed)
- assert len(index) == 10
- assert index.size == 10
- assert index.shape == (10,)
- tm.assert_index_equal(index.left, Index(np.arange(10, dtype=np.int64)))
- tm.assert_index_equal(index.right, Index(np.arange(1, 11, dtype=np.int64)))
- tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5, dtype=np.float64)))
- assert index.closed == closed
- ivs = [
- Interval(left, right, closed)
- for left, right in zip(range(10), range(1, 11))
- ]
- expected = np.array(ivs, dtype=object)
- tm.assert_numpy_array_equal(np.asarray(index), expected)
- # with nans
- index = self.create_index_with_nan(closed=closed)
- assert len(index) == 10
- assert index.size == 10
- assert index.shape == (10,)
- expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
- expected_right = expected_left + 1
- expected_mid = expected_left + 0.5
- tm.assert_index_equal(index.left, expected_left)
- tm.assert_index_equal(index.right, expected_right)
- tm.assert_index_equal(index.mid, expected_mid)
- assert index.closed == closed
- ivs = [
- Interval(left, right, closed) if notna(left) else np.nan
- for left, right in zip(expected_left, expected_right)
- ]
- expected = np.array(ivs, dtype=object)
- tm.assert_numpy_array_equal(np.asarray(index), expected)
- @pytest.mark.parametrize(
- "breaks",
- [
- [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
- [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
- pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]),
- pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5H", "6D"]),
- ],
- )
- def test_length(self, closed, breaks):
- # GH 18789
- index = IntervalIndex.from_breaks(breaks, closed=closed)
- result = index.length
- expected = Index(iv.length for iv in index)
- tm.assert_index_equal(result, expected)
- # with NA
- index = index.insert(1, np.nan)
- result = index.length
- expected = Index(iv.length if notna(iv) else iv for iv in index)
- tm.assert_index_equal(result, expected)
- def test_with_nans(self, closed):
- index = self.create_index(closed=closed)
- assert index.hasnans is False
- result = index.isna()
- expected = np.zeros(len(index), dtype=bool)
- tm.assert_numpy_array_equal(result, expected)
- result = index.notna()
- expected = np.ones(len(index), dtype=bool)
- tm.assert_numpy_array_equal(result, expected)
- index = self.create_index_with_nan(closed=closed)
- assert index.hasnans is True
- result = index.isna()
- expected = np.array([False, True] + [False] * (len(index) - 2))
- tm.assert_numpy_array_equal(result, expected)
- result = index.notna()
- expected = np.array([True, False] + [True] * (len(index) - 2))
- tm.assert_numpy_array_equal(result, expected)
- def test_copy(self, closed):
- expected = self.create_index(closed=closed)
- result = expected.copy()
- assert result.equals(expected)
- result = expected.copy(deep=True)
- assert result.equals(expected)
- assert result.left is not expected.left
- def test_ensure_copied_data(self, closed):
- # exercise the copy flag in the constructor
- # not copying
- index = self.create_index(closed=closed)
- result = IntervalIndex(index, copy=False)
- tm.assert_numpy_array_equal(
- index.left.values, result.left.values, check_same="same"
- )
- tm.assert_numpy_array_equal(
- index.right.values, result.right.values, check_same="same"
- )
- # by-definition make a copy
- result = IntervalIndex(np.array(index), copy=False)
- tm.assert_numpy_array_equal(
- index.left.values, result.left.values, check_same="copy"
- )
- tm.assert_numpy_array_equal(
- index.right.values, result.right.values, check_same="copy"
- )
- def test_delete(self, closed):
- breaks = np.arange(1, 11, dtype=np.int64)
- expected = IntervalIndex.from_breaks(breaks, closed=closed)
- result = self.create_index(closed=closed).delete(0)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "data",
- [
- interval_range(0, periods=10, closed="neither"),
- interval_range(1.7, periods=8, freq=2.5, closed="both"),
- interval_range(Timestamp("20170101"), periods=12, closed="left"),
- interval_range(Timedelta("1 day"), periods=6, closed="right"),
- ],
- )
- def test_insert(self, data):
- item = data[0]
- idx_item = IntervalIndex([item])
- # start
- expected = idx_item.append(data)
- result = data.insert(0, item)
- tm.assert_index_equal(result, expected)
- # end
- expected = data.append(idx_item)
- result = data.insert(len(data), item)
- tm.assert_index_equal(result, expected)
- # mid
- expected = data[:3].append(idx_item).append(data[3:])
- result = data.insert(3, item)
- tm.assert_index_equal(result, expected)
- # invalid type
- res = data.insert(1, "foo")
- expected = data.astype(object).insert(1, "foo")
- tm.assert_index_equal(res, expected)
- msg = "can only insert Interval objects and NA into an IntervalArray"
- with pytest.raises(TypeError, match=msg):
- data._data.insert(1, "foo")
- # invalid closed
- msg = "'value.closed' is 'left', expected 'right'."
- for closed in {"left", "right", "both", "neither"} - {item.closed}:
- msg = f"'value.closed' is '{closed}', expected '{item.closed}'."
- bad_item = Interval(item.left, item.right, closed=closed)
- res = data.insert(1, bad_item)
- expected = data.astype(object).insert(1, bad_item)
- tm.assert_index_equal(res, expected)
- with pytest.raises(ValueError, match=msg):
- data._data.insert(1, bad_item)
- # GH 18295 (test missing)
- na_idx = IntervalIndex([np.nan], closed=data.closed)
- for na in [np.nan, None, pd.NA]:
- expected = data[:1].append(na_idx).append(data[1:])
- result = data.insert(1, na)
- tm.assert_index_equal(result, expected)
- if data.left.dtype.kind not in ["m", "M"]:
- # trying to insert pd.NaT into a numeric-dtyped Index should cast
- expected = data.astype(object).insert(1, pd.NaT)
- msg = "can only insert Interval objects and NA into an IntervalArray"
- with pytest.raises(TypeError, match=msg):
- data._data.insert(1, pd.NaT)
- result = data.insert(1, pd.NaT)
- tm.assert_index_equal(result, expected)
- def test_is_unique_interval(self, closed):
- """
- Interval specific tests for is_unique in addition to base class tests
- """
- # unique overlapping - distinct endpoints
- idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
- assert idx.is_unique is True
- # unique overlapping - shared endpoints
- idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
- assert idx.is_unique is True
- # unique nested
- idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
- assert idx.is_unique is True
- # unique NaN
- idx = IntervalIndex.from_tuples([(np.NaN, np.NaN)], closed=closed)
- assert idx.is_unique is True
- # non-unique NaN
- idx = IntervalIndex.from_tuples(
- [(np.NaN, np.NaN), (np.NaN, np.NaN)], closed=closed
- )
- assert idx.is_unique is False
- def test_monotonic(self, closed):
- # increasing non-overlapping
- idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed)
- assert idx.is_monotonic_increasing is True
- assert idx._is_strictly_monotonic_increasing is True
- assert idx.is_monotonic_decreasing is False
- assert idx._is_strictly_monotonic_decreasing is False
- # decreasing non-overlapping
- idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed)
- assert idx.is_monotonic_increasing is False
- assert idx._is_strictly_monotonic_increasing is False
- assert idx.is_monotonic_decreasing is True
- assert idx._is_strictly_monotonic_decreasing is True
- # unordered non-overlapping
- idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed)
- assert idx.is_monotonic_increasing is False
- assert idx._is_strictly_monotonic_increasing is False
- assert idx.is_monotonic_decreasing is False
- assert idx._is_strictly_monotonic_decreasing is False
- # increasing overlapping
- idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed)
- assert idx.is_monotonic_increasing is True
- assert idx._is_strictly_monotonic_increasing is True
- assert idx.is_monotonic_decreasing is False
- assert idx._is_strictly_monotonic_decreasing is False
- # decreasing overlapping
- idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed)
- assert idx.is_monotonic_increasing is False
- assert idx._is_strictly_monotonic_increasing is False
- assert idx.is_monotonic_decreasing is True
- assert idx._is_strictly_monotonic_decreasing is True
- # unordered overlapping
- idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed)
- assert idx.is_monotonic_increasing is False
- assert idx._is_strictly_monotonic_increasing is False
- assert idx.is_monotonic_decreasing is False
- assert idx._is_strictly_monotonic_decreasing is False
- # increasing overlapping shared endpoints
- idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
- assert idx.is_monotonic_increasing is True
- assert idx._is_strictly_monotonic_increasing is True
- assert idx.is_monotonic_decreasing is False
- assert idx._is_strictly_monotonic_decreasing is False
- # decreasing overlapping shared endpoints
- idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
- assert idx.is_monotonic_increasing is False
- assert idx._is_strictly_monotonic_increasing is False
- assert idx.is_monotonic_decreasing is True
- assert idx._is_strictly_monotonic_decreasing is True
- # stationary
- idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
- assert idx.is_monotonic_increasing is True
- assert idx._is_strictly_monotonic_increasing is False
- assert idx.is_monotonic_decreasing is True
- assert idx._is_strictly_monotonic_decreasing is False
- # empty
- idx = IntervalIndex([], closed=closed)
- assert idx.is_monotonic_increasing is True
- assert idx._is_strictly_monotonic_increasing is True
- assert idx.is_monotonic_decreasing is True
- assert idx._is_strictly_monotonic_decreasing is True
- def test_is_monotonic_with_nans(self):
- # GH#41831
- index = IntervalIndex([np.nan, np.nan])
- assert not index.is_monotonic_increasing
- assert not index._is_strictly_monotonic_increasing
- assert not index.is_monotonic_increasing
- assert not index._is_strictly_monotonic_decreasing
- assert not index.is_monotonic_decreasing
- def test_get_item(self, closed):
- i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
- assert i[0] == Interval(0.0, 1.0, closed=closed)
- assert i[1] == Interval(1.0, 2.0, closed=closed)
- assert isna(i[2])
- result = i[0:1]
- expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed)
- tm.assert_index_equal(result, expected)
- result = i[0:2]
- expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed)
- tm.assert_index_equal(result, expected)
- result = i[1:3]
- expected = IntervalIndex.from_arrays(
- (1.0, np.nan), (2.0, np.nan), closed=closed
- )
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "breaks",
- [
- date_range("20180101", periods=4),
- date_range("20180101", periods=4, tz="US/Eastern"),
- timedelta_range("0 days", periods=4),
- ],
- ids=lambda x: str(x.dtype),
- )
- def test_maybe_convert_i8(self, breaks):
- # GH 20636
- index = IntervalIndex.from_breaks(breaks)
- # intervalindex
- result = index._maybe_convert_i8(index)
- expected = IntervalIndex.from_breaks(breaks.asi8)
- tm.assert_index_equal(result, expected)
- # interval
- interval = Interval(breaks[0], breaks[1])
- result = index._maybe_convert_i8(interval)
- expected = Interval(breaks[0]._value, breaks[1]._value)
- assert result == expected
- # datetimelike index
- result = index._maybe_convert_i8(breaks)
- expected = Index(breaks.asi8)
- tm.assert_index_equal(result, expected)
- # datetimelike scalar
- result = index._maybe_convert_i8(breaks[0])
- expected = breaks[0]._value
- assert result == expected
- # list-like of datetimelike scalars
- result = index._maybe_convert_i8(list(breaks))
- expected = Index(breaks.asi8)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "breaks",
- [date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)],
- )
- def test_maybe_convert_i8_nat(self, breaks):
- # GH 20636
- index = IntervalIndex.from_breaks(breaks)
- to_convert = breaks._constructor([pd.NaT] * 3)
- expected = Index([np.nan] * 3, dtype=np.float64)
- result = index._maybe_convert_i8(to_convert)
- tm.assert_index_equal(result, expected)
- to_convert = to_convert.insert(0, breaks[0])
- expected = expected.insert(0, float(breaks[0]._value))
- result = index._maybe_convert_i8(to_convert)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "make_key",
- [lambda breaks: breaks, list],
- ids=["lambda", "list"],
- )
- def test_maybe_convert_i8_numeric(self, make_key, any_real_numpy_dtype):
- # GH 20636
- breaks = np.arange(5, dtype=any_real_numpy_dtype)
- index = IntervalIndex.from_breaks(breaks)
- key = make_key(breaks)
- result = index._maybe_convert_i8(key)
- kind = breaks.dtype.kind
- expected_dtype = {"i": np.int64, "u": np.uint64, "f": np.float64}[kind]
- expected = Index(key, dtype=expected_dtype)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "make_key",
- [
- IntervalIndex.from_breaks,
- lambda breaks: Interval(breaks[0], breaks[1]),
- lambda breaks: breaks[0],
- ],
- ids=["IntervalIndex", "Interval", "scalar"],
- )
- def test_maybe_convert_i8_numeric_identical(self, make_key, any_real_numpy_dtype):
- # GH 20636
- breaks = np.arange(5, dtype=any_real_numpy_dtype)
- index = IntervalIndex.from_breaks(breaks)
- key = make_key(breaks)
- # test if _maybe_convert_i8 won't change key if an Interval or IntervalIndex
- result = index._maybe_convert_i8(key)
- assert result is key
- @pytest.mark.parametrize(
- "breaks1, breaks2",
- permutations(
- [
- date_range("20180101", periods=4),
- date_range("20180101", periods=4, tz="US/Eastern"),
- timedelta_range("0 days", periods=4),
- ],
- 2,
- ),
- ids=lambda x: str(x.dtype),
- )
- @pytest.mark.parametrize(
- "make_key",
- [
- IntervalIndex.from_breaks,
- lambda breaks: Interval(breaks[0], breaks[1]),
- lambda breaks: breaks,
- lambda breaks: breaks[0],
- list,
- ],
- ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
- )
- def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
- # GH 20636
- index = IntervalIndex.from_breaks(breaks1)
- key = make_key(breaks2)
- msg = (
- f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
- f"values of dtype {breaks2.dtype}"
- )
- msg = re.escape(msg)
- with pytest.raises(ValueError, match=msg):
- index._maybe_convert_i8(key)
- def test_contains_method(self):
- # can select values that are IN the range of a value
- i = IntervalIndex.from_arrays([0, 1], [1, 2])
- expected = np.array([False, False], dtype="bool")
- actual = i.contains(0)
- tm.assert_numpy_array_equal(actual, expected)
- actual = i.contains(3)
- tm.assert_numpy_array_equal(actual, expected)
- expected = np.array([True, False], dtype="bool")
- actual = i.contains(0.5)
- tm.assert_numpy_array_equal(actual, expected)
- actual = i.contains(1)
- tm.assert_numpy_array_equal(actual, expected)
- # __contains__ not implemented for "interval in interval", follow
- # that for the contains method for now
- with pytest.raises(
- NotImplementedError, match="contains not implemented for two"
- ):
- i.contains(Interval(0, 1))
- def test_dropna(self, closed):
- expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed)
- ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
- result = ii.dropna()
- tm.assert_index_equal(result, expected)
- ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed)
- result = ii.dropna()
- tm.assert_index_equal(result, expected)
- def test_non_contiguous(self, closed):
- index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
- target = [0.5, 1.5, 2.5]
- actual = index.get_indexer(target)
- expected = np.array([0, -1, 1], dtype="intp")
- tm.assert_numpy_array_equal(actual, expected)
- assert 1.5 not in index
- def test_isin(self, closed):
- index = self.create_index(closed=closed)
- expected = np.array([True] + [False] * (len(index) - 1))
- result = index.isin(index[:1])
- tm.assert_numpy_array_equal(result, expected)
- result = index.isin([index[0]])
- tm.assert_numpy_array_equal(result, expected)
- other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
- expected = np.array([True] * (len(index) - 1) + [False])
- result = index.isin(other)
- tm.assert_numpy_array_equal(result, expected)
- result = index.isin(other.tolist())
- tm.assert_numpy_array_equal(result, expected)
- for other_closed in ["right", "left", "both", "neither"]:
- other = self.create_index(closed=other_closed)
- expected = np.repeat(closed == other_closed, len(index))
- result = index.isin(other)
- tm.assert_numpy_array_equal(result, expected)
- result = index.isin(other.tolist())
- tm.assert_numpy_array_equal(result, expected)
- def test_comparison(self):
- actual = Interval(0, 1) < self.index
- expected = np.array([False, True])
- tm.assert_numpy_array_equal(actual, expected)
- actual = Interval(0.5, 1.5) < self.index
- expected = np.array([False, True])
- tm.assert_numpy_array_equal(actual, expected)
- actual = self.index > Interval(0.5, 1.5)
- tm.assert_numpy_array_equal(actual, expected)
- actual = self.index == self.index
- expected = np.array([True, True])
- tm.assert_numpy_array_equal(actual, expected)
- actual = self.index <= self.index
- tm.assert_numpy_array_equal(actual, expected)
- actual = self.index >= self.index
- tm.assert_numpy_array_equal(actual, expected)
- actual = self.index < self.index
- expected = np.array([False, False])
- tm.assert_numpy_array_equal(actual, expected)
- actual = self.index > self.index
- tm.assert_numpy_array_equal(actual, expected)
- actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
- tm.assert_numpy_array_equal(actual, expected)
- actual = self.index == self.index.values
- tm.assert_numpy_array_equal(actual, np.array([True, True]))
- actual = self.index.values == self.index
- tm.assert_numpy_array_equal(actual, np.array([True, True]))
- actual = self.index <= self.index.values
- tm.assert_numpy_array_equal(actual, np.array([True, True]))
- actual = self.index != self.index.values
- tm.assert_numpy_array_equal(actual, np.array([False, False]))
- actual = self.index > self.index.values
- tm.assert_numpy_array_equal(actual, np.array([False, False]))
- actual = self.index.values > self.index
- tm.assert_numpy_array_equal(actual, np.array([False, False]))
- # invalid comparisons
- actual = self.index == 0
- tm.assert_numpy_array_equal(actual, np.array([False, False]))
- actual = self.index == self.index.left
- tm.assert_numpy_array_equal(actual, np.array([False, False]))
- msg = "|".join(
- [
- "not supported between instances of 'int' and '.*.Interval'",
- r"Invalid comparison between dtype=interval\[int64, right\] and ",
- ]
- )
- with pytest.raises(TypeError, match=msg):
- self.index > 0
- with pytest.raises(TypeError, match=msg):
- self.index <= 0
- with pytest.raises(TypeError, match=msg):
- self.index > np.arange(2)
- msg = "Lengths must match to compare"
- with pytest.raises(ValueError, match=msg):
- self.index > np.arange(3)
- def test_missing_values(self, closed):
- idx = Index(
- [np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)]
- )
- idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed)
- assert idx.equals(idx2)
- msg = (
- "missing values must be missing in the same location both left "
- "and right sides"
- )
- with pytest.raises(ValueError, match=msg):
- IntervalIndex.from_arrays(
- [np.nan, 0, 1], np.array([0, 1, 2]), closed=closed
- )
- tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))
- def test_sort_values(self, closed):
- index = self.create_index(closed=closed)
- result = index.sort_values()
- tm.assert_index_equal(result, index)
- result = index.sort_values(ascending=False)
- tm.assert_index_equal(result, index[::-1])
- # with nan
- index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])
- result = index.sort_values()
- expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
- tm.assert_index_equal(result, expected)
- result = index.sort_values(ascending=False, na_position="first")
- expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("tz", [None, "US/Eastern"])
- def test_datetime(self, tz):
- start = Timestamp("2000-01-01", tz=tz)
- dates = date_range(start=start, periods=10)
- index = IntervalIndex.from_breaks(dates)
- # test mid
- start = Timestamp("2000-01-01T12:00", tz=tz)
- expected = date_range(start=start, periods=9)
- tm.assert_index_equal(index.mid, expected)
- # __contains__ doesn't check individual points
- assert Timestamp("2000-01-01", tz=tz) not in index
- assert Timestamp("2000-01-01T12", tz=tz) not in index
- assert Timestamp("2000-01-02", tz=tz) not in index
- iv_true = Interval(
- Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)
- )
- iv_false = Interval(
- Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)
- )
- assert iv_true in index
- assert iv_false not in index
- # .contains does check individual points
- assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
- assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
- assert index.contains(Timestamp("2000-01-02", tz=tz)).any()
- # test get_indexer
- start = Timestamp("1999-12-31T12:00", tz=tz)
- target = date_range(start=start, periods=7, freq="12H")
- actual = index.get_indexer(target)
- expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
- tm.assert_numpy_array_equal(actual, expected)
- start = Timestamp("2000-01-08T18:00", tz=tz)
- target = date_range(start=start, periods=7, freq="6H")
- actual = index.get_indexer(target)
- expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
- tm.assert_numpy_array_equal(actual, expected)
- def test_append(self, closed):
- index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
- index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)
- result = index1.append(index2)
- expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed)
- tm.assert_index_equal(result, expected)
- result = index1.append([index1, index2])
- expected = IntervalIndex.from_arrays(
- [0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed
- )
- tm.assert_index_equal(result, expected)
- for other_closed in {"left", "right", "both", "neither"} - {closed}:
- index_other_closed = IntervalIndex.from_arrays(
- [0, 1], [1, 2], closed=other_closed
- )
- result = index1.append(index_other_closed)
- expected = index1.astype(object).append(index_other_closed.astype(object))
- tm.assert_index_equal(result, expected)
- def test_is_non_overlapping_monotonic(self, closed):
- # Should be True in all cases
- tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
- idx = IntervalIndex.from_tuples(tpls, closed=closed)
- assert idx.is_non_overlapping_monotonic is True
- idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
- assert idx.is_non_overlapping_monotonic is True
- # Should be False in all cases (overlapping)
- tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
- idx = IntervalIndex.from_tuples(tpls, closed=closed)
- assert idx.is_non_overlapping_monotonic is False
- idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
- assert idx.is_non_overlapping_monotonic is False
- # Should be False in all cases (non-monotonic)
- tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
- idx = IntervalIndex.from_tuples(tpls, closed=closed)
- assert idx.is_non_overlapping_monotonic is False
- idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
- assert idx.is_non_overlapping_monotonic is False
- # Should be False for closed='both', otherwise True (GH16560)
- if closed == "both":
- idx = IntervalIndex.from_breaks(range(4), closed=closed)
- assert idx.is_non_overlapping_monotonic is False
- else:
- idx = IntervalIndex.from_breaks(range(4), closed=closed)
- assert idx.is_non_overlapping_monotonic is True
- @pytest.mark.parametrize(
- "start, shift, na_value",
- [
- (0, 1, np.nan),
- (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
- (Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
- ],
- )
- def test_is_overlapping(self, start, shift, na_value, closed):
- # GH 23309
- # see test_interval_tree.py for extensive tests; interface tests here
- # non-overlapping
- tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)]
- index = IntervalIndex.from_tuples(tuples, closed=closed)
- assert index.is_overlapping is False
- # non-overlapping with NA
- tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
- index = IntervalIndex.from_tuples(tuples, closed=closed)
- assert index.is_overlapping is False
- # overlapping
- tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)]
- index = IntervalIndex.from_tuples(tuples, closed=closed)
- assert index.is_overlapping is True
- # overlapping with NA
- tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
- index = IntervalIndex.from_tuples(tuples, closed=closed)
- assert index.is_overlapping is True
- # common endpoints
- tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)]
- index = IntervalIndex.from_tuples(tuples, closed=closed)
- result = index.is_overlapping
- expected = closed == "both"
- assert result is expected
- # common endpoints with NA
- tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
- index = IntervalIndex.from_tuples(tuples, closed=closed)
- result = index.is_overlapping
- assert result is expected
- # intervals with duplicate left values
- a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85]
- b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
- index = IntervalIndex.from_arrays(a, b, closed="right")
- result = index.is_overlapping
- assert result is False
- @pytest.mark.parametrize(
- "tuples",
- [
- list(zip(range(10), range(1, 11))),
- list(
- zip(
- date_range("20170101", periods=10),
- date_range("20170101", periods=10),
- )
- ),
- list(
- zip(
- timedelta_range("0 days", periods=10),
- timedelta_range("1 day", periods=10),
- )
- ),
- ],
- )
- def test_to_tuples(self, tuples):
- # GH 18756
- idx = IntervalIndex.from_tuples(tuples)
- result = idx.to_tuples()
- expected = Index(com.asarray_tuplesafe(tuples))
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "tuples",
- [
- list(zip(range(10), range(1, 11))) + [np.nan],
- list(
- zip(
- date_range("20170101", periods=10),
- date_range("20170101", periods=10),
- )
- )
- + [np.nan],
- list(
- zip(
- timedelta_range("0 days", periods=10),
- timedelta_range("1 day", periods=10),
- )
- )
- + [np.nan],
- ],
- )
- @pytest.mark.parametrize("na_tuple", [True, False])
- def test_to_tuples_na(self, tuples, na_tuple):
- # GH 18756
- idx = IntervalIndex.from_tuples(tuples)
- result = idx.to_tuples(na_tuple=na_tuple)
- # check the non-NA portion
- expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
- result_notna = result[:-1]
- tm.assert_index_equal(result_notna, expected_notna)
- # check the NA portion
- result_na = result[-1]
- if na_tuple:
- assert isinstance(result_na, tuple)
- assert len(result_na) == 2
- assert all(isna(x) for x in result_na)
- else:
- assert isna(result_na)
- def test_nbytes(self):
- # GH 19209
- left = np.arange(0, 4, dtype="i8")
- right = np.arange(1, 5, dtype="i8")
- result = IntervalIndex.from_arrays(left, right).nbytes
- expected = 64 # 4 * 8 * 2
- assert result == expected
- @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
- def test_set_closed(self, name, closed, new_closed):
- # GH 21670
- index = interval_range(0, 5, closed=closed, name=name)
- result = index.set_closed(new_closed)
- expected = interval_range(0, 5, closed=new_closed, name=name)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
- def test_set_closed_errors(self, bad_closed):
- # GH 21670
- index = interval_range(0, 5)
- msg = f"invalid option for 'closed': {bad_closed}"
- with pytest.raises(ValueError, match=msg):
- index.set_closed(bad_closed)
- def test_is_all_dates(self):
- # GH 23576
- year_2017 = Interval(
- Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")
- )
- year_2017_index = IntervalIndex([year_2017])
- assert not year_2017_index._is_all_dates
- def test_dir():
- # GH#27571 dir(interval_index) should not raise
- index = IntervalIndex.from_arrays([0, 1], [1, 2])
- result = dir(index)
- assert "str" not in result
- def test_searchsorted_different_argument_classes(listlike_box):
- # https://github.com/pandas-dev/pandas/issues/32762
- values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
- result = values.searchsorted(listlike_box(values))
- expected = np.array([0, 1], dtype=result.dtype)
- tm.assert_numpy_array_equal(result, expected)
- result = values._data.searchsorted(listlike_box(values))
- tm.assert_numpy_array_equal(result, expected)
- @pytest.mark.parametrize(
- "arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
- )
- def test_searchsorted_invalid_argument(arg):
- values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
- msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and "
- with pytest.raises(TypeError, match=msg):
- values.searchsorted(arg)
|