123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360 |
- """
- test_indexing tests the following Index methods:
- __getitem__
- get_loc
- get_value
- __contains__
- take
- where
- get_indexer
- get_indexer_for
- slice_locs
- asof_locs
- The corresponding tests.indexes.[index_type].test_indexing files
- contain tests for the corresponding methods specific to those Index subclasses.
- """
- import numpy as np
- import pytest
- from pandas.errors import InvalidIndexError
- from pandas.core.dtypes.common import (
- is_float_dtype,
- is_scalar,
- )
- from pandas import (
- NA,
- DatetimeIndex,
- Index,
- IntervalIndex,
- MultiIndex,
- NaT,
- PeriodIndex,
- TimedeltaIndex,
- )
- import pandas._testing as tm
- class TestTake:
- def test_take_invalid_kwargs(self, index):
- indices = [1, 2]
- msg = r"take\(\) got an unexpected keyword argument 'foo'"
- with pytest.raises(TypeError, match=msg):
- index.take(indices, foo=2)
- msg = "the 'out' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- index.take(indices, out=indices)
- msg = "the 'mode' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- index.take(indices, mode="clip")
- def test_take(self, index):
- indexer = [4, 3, 0, 2]
- if len(index) < 5:
- # not enough elements; ignore
- return
- result = index.take(indexer)
- expected = index[indexer]
- assert result.equals(expected)
- if not isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
- # GH 10791
- msg = r"'(.*Index)' object has no attribute 'freq'"
- with pytest.raises(AttributeError, match=msg):
- index.freq
- def test_take_indexer_type(self):
- # GH#42875
- integer_index = Index([0, 1, 2, 3])
- scalar_index = 1
- msg = "Expected indices to be array-like"
- with pytest.raises(TypeError, match=msg):
- integer_index.take(scalar_index)
- def test_take_minus1_without_fill(self, index):
- # -1 does not get treated as NA unless allow_fill=True is passed
- if len(index) == 0:
- # Test is not applicable
- return
- result = index.take([0, 0, -1])
- expected = index.take([0, 0, len(index) - 1])
- tm.assert_index_equal(result, expected)
- class TestContains:
- @pytest.mark.parametrize(
- "index,val",
- [
- (Index([0, 1, 2]), 2),
- (Index([0, 1, "2"]), "2"),
- (Index([0, 1, 2, np.inf, 4]), 4),
- (Index([0, 1, 2, np.nan, 4]), 4),
- (Index([0, 1, 2, np.inf]), np.inf),
- (Index([0, 1, 2, np.nan]), np.nan),
- ],
- )
- def test_index_contains(self, index, val):
- assert val in index
- @pytest.mark.parametrize(
- "index,val",
- [
- (Index([0, 1, 2]), "2"),
- (Index([0, 1, "2"]), 2),
- (Index([0, 1, 2, np.inf]), 4),
- (Index([0, 1, 2, np.nan]), 4),
- (Index([0, 1, 2, np.inf]), np.nan),
- (Index([0, 1, 2, np.nan]), np.inf),
- # Checking if np.inf in int64 Index should not cause an OverflowError
- # Related to GH 16957
- (Index([0, 1, 2], dtype=np.int64), np.inf),
- (Index([0, 1, 2], dtype=np.int64), np.nan),
- (Index([0, 1, 2], dtype=np.uint64), np.inf),
- (Index([0, 1, 2], dtype=np.uint64), np.nan),
- ],
- )
- def test_index_not_contains(self, index, val):
- assert val not in index
- @pytest.mark.parametrize(
- "index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")]
- )
- def test_mixed_index_contains(self, index, val):
- # GH#19860
- assert val in index
- @pytest.mark.parametrize(
- "index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)]
- )
- def test_mixed_index_not_contains(self, index, val):
- # GH#19860
- assert val not in index
- def test_contains_with_float_index(self, any_real_numpy_dtype):
- # GH#22085
- dtype = any_real_numpy_dtype
- data = [0, 1, 2, 3] if not is_float_dtype(dtype) else [0.1, 1.1, 2.2, 3.3]
- index = Index(data, dtype=dtype)
- if not is_float_dtype(index.dtype):
- assert 1.1 not in index
- assert 1.0 in index
- assert 1 in index
- else:
- assert 1.1 in index
- assert 1.0 not in index
- assert 1 not in index
- def test_contains_requires_hashable_raises(self, index):
- if isinstance(index, MultiIndex):
- return # TODO: do we want this to raise?
- msg = "unhashable type: 'list'"
- with pytest.raises(TypeError, match=msg):
- [] in index
- msg = "|".join(
- [
- r"unhashable type: 'dict'",
- r"must be real number, not dict",
- r"an integer is required",
- r"\{\}",
- r"pandas\._libs\.interval\.IntervalTree' is not iterable",
- ]
- )
- with pytest.raises(TypeError, match=msg):
- {} in index._engine
- class TestGetLoc:
- def test_get_loc_non_hashable(self, index):
- # MultiIndex and Index raise TypeError, others InvalidIndexError
- with pytest.raises((TypeError, InvalidIndexError), match="slice"):
- index.get_loc(slice(0, 1))
- def test_get_loc_non_scalar_hashable(self, index):
- # GH52877
- from enum import Enum
- class E(Enum):
- X1 = "x1"
- assert not is_scalar(E.X1)
- exc = KeyError
- msg = "<E.X1: 'x1'>"
- if isinstance(
- index,
- (
- DatetimeIndex,
- TimedeltaIndex,
- PeriodIndex,
- IntervalIndex,
- ),
- ):
- # TODO: make these more consistent?
- exc = InvalidIndexError
- msg = "E.X1"
- with pytest.raises(exc, match=msg):
- index.get_loc(E.X1)
- def test_get_loc_generator(self, index):
- exc = KeyError
- if isinstance(
- index,
- (
- DatetimeIndex,
- TimedeltaIndex,
- PeriodIndex,
- IntervalIndex,
- MultiIndex,
- ),
- ):
- # TODO: make these more consistent?
- exc = InvalidIndexError
- with pytest.raises(exc, match="generator object"):
- # MultiIndex specifically checks for generator; others for scalar
- index.get_loc(x for x in range(5))
- def test_get_loc_masked_duplicated_na(self):
- # GH#48411
- idx = Index([1, 2, NA, NA], dtype="Int64")
- result = idx.get_loc(NA)
- expected = np.array([False, False, True, True])
- tm.assert_numpy_array_equal(result, expected)
- class TestGetIndexer:
- def test_get_indexer_base(self, index):
- if index._index_as_unique:
- expected = np.arange(index.size, dtype=np.intp)
- actual = index.get_indexer(index)
- tm.assert_numpy_array_equal(expected, actual)
- else:
- msg = "Reindexing only valid with uniquely valued Index objects"
- with pytest.raises(InvalidIndexError, match=msg):
- index.get_indexer(index)
- with pytest.raises(ValueError, match="Invalid fill method"):
- index.get_indexer(index, method="invalid")
- def test_get_indexer_consistency(self, index):
- # See GH#16819
- if index._index_as_unique:
- indexer = index.get_indexer(index[0:2])
- assert isinstance(indexer, np.ndarray)
- assert indexer.dtype == np.intp
- else:
- msg = "Reindexing only valid with uniquely valued Index objects"
- with pytest.raises(InvalidIndexError, match=msg):
- index.get_indexer(index[0:2])
- indexer, _ = index.get_indexer_non_unique(index[0:2])
- assert isinstance(indexer, np.ndarray)
- assert indexer.dtype == np.intp
- def test_get_indexer_masked_duplicated_na(self):
- # GH#48411
- idx = Index([1, 2, NA, NA], dtype="Int64")
- result = idx.get_indexer_for(Index([1, NA], dtype="Int64"))
- expected = np.array([0, 2, 3], dtype=result.dtype)
- tm.assert_numpy_array_equal(result, expected)
- class TestConvertSliceIndexer:
- def test_convert_almost_null_slice(self, index):
- # slice with None at both ends, but not step
- key = slice(None, None, "foo")
- if isinstance(index, IntervalIndex):
- msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
- with pytest.raises(ValueError, match=msg):
- index._convert_slice_indexer(key, "loc")
- else:
- msg = "'>=' not supported between instances of 'str' and 'int'"
- with pytest.raises(TypeError, match=msg):
- index._convert_slice_indexer(key, "loc")
- class TestPutmask:
- def test_putmask_with_wrong_mask(self, index):
- # GH#18368
- if not len(index):
- return
- fill = index[0]
- msg = "putmask: mask and data must be the same size"
- with pytest.raises(ValueError, match=msg):
- index.putmask(np.ones(len(index) + 1, np.bool_), fill)
- with pytest.raises(ValueError, match=msg):
- index.putmask(np.ones(len(index) - 1, np.bool_), fill)
- with pytest.raises(ValueError, match=msg):
- index.putmask("foo", fill)
- @pytest.mark.parametrize(
- "idx", [Index([1, 2, 3]), Index([0.1, 0.2, 0.3]), Index(["a", "b", "c"])]
- )
- def test_getitem_deprecated_float(idx):
- # https://github.com/pandas-dev/pandas/issues/34191
- msg = "Indexing with a float is no longer supported"
- with pytest.raises(IndexError, match=msg):
- idx[1.0]
- @pytest.mark.parametrize(
- "idx,target,expected",
- [
- ([np.nan, "var1", np.nan], [np.nan], np.array([0, 2], dtype=np.intp)),
- (
- [np.nan, "var1", np.nan],
- [np.nan, "var1"],
- np.array([0, 2, 1], dtype=np.intp),
- ),
- (
- np.array([np.nan, "var1", np.nan], dtype=object),
- [np.nan],
- np.array([0, 2], dtype=np.intp),
- ),
- (
- DatetimeIndex(["2020-08-05", NaT, NaT]),
- [NaT],
- np.array([1, 2], dtype=np.intp),
- ),
- (["a", "b", "a", np.nan], [np.nan], np.array([3], dtype=np.intp)),
- (
- np.array(["b", np.nan, float("NaN"), "b"], dtype=object),
- Index([np.nan], dtype=object),
- np.array([1, 2], dtype=np.intp),
- ),
- ],
- )
- def test_get_indexer_non_unique_multiple_nans(idx, target, expected):
- # GH 35392
- axis = Index(idx)
- actual = axis.get_indexer_for(target)
- tm.assert_numpy_array_equal(actual, expected)
- def test_get_indexer_non_unique_nans_in_object_dtype_target(nulls_fixture):
- idx = Index([1.0, 2.0])
- target = Index([1, nulls_fixture], dtype="object")
- result_idx, result_missing = idx.get_indexer_non_unique(target)
- tm.assert_numpy_array_equal(result_idx, np.array([0, -1], dtype=np.intp))
- tm.assert_numpy_array_equal(result_missing, np.array([1], dtype=np.intp))
|