123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488 |
- """
- Collection of tests asserting things that should be true for
- any index subclass except for MultiIndex. Makes use of the `index_flat`
- fixture defined in pandas/conftest.py.
- """
- from copy import (
- copy,
- deepcopy,
- )
- import re
- import numpy as np
- import pytest
- from pandas.compat import IS64
- from pandas.core.dtypes.common import (
- is_integer_dtype,
- is_numeric_dtype,
- )
- import pandas as pd
- from pandas import (
- CategoricalIndex,
- MultiIndex,
- PeriodIndex,
- RangeIndex,
- )
- import pandas._testing as tm
- class TestCommon:
- @pytest.mark.parametrize("name", [None, "new_name"])
- def test_to_frame(self, name, index_flat):
- # see GH#15230, GH#22580
- idx = index_flat
- if name:
- idx_name = name
- else:
- idx_name = idx.name or 0
- df = idx.to_frame(name=idx_name)
- assert df.index is idx
- assert len(df.columns) == 1
- assert df.columns[0] == idx_name
- assert df[idx_name].values is not idx.values
- df = idx.to_frame(index=False, name=idx_name)
- assert df.index is not idx
- def test_droplevel(self, index_flat):
- # GH 21115
- # MultiIndex is tested separately in test_multi.py
- index = index_flat
- assert index.droplevel([]).equals(index)
- for level in [index.name, [index.name]]:
- if isinstance(index.name, tuple) and level is index.name:
- # GH 21121 : droplevel with tuple name
- continue
- msg = (
- "Cannot remove 1 levels from an index with 1 levels: at least one "
- "level must be left."
- )
- with pytest.raises(ValueError, match=msg):
- index.droplevel(level)
- for level in "wrong", ["wrong"]:
- with pytest.raises(
- KeyError,
- match=r"'Requested level \(wrong\) does not match index name \(None\)'",
- ):
- index.droplevel(level)
- def test_constructor_non_hashable_name(self, index_flat):
- # GH 20527
- index = index_flat
- message = "Index.name must be a hashable type"
- renamed = [["1"]]
- # With .rename()
- with pytest.raises(TypeError, match=message):
- index.rename(name=renamed)
- # With .set_names()
- with pytest.raises(TypeError, match=message):
- index.set_names(names=renamed)
- def test_constructor_unwraps_index(self, index_flat):
- a = index_flat
- # Passing dtype is necessary for Index([True, False], dtype=object)
- # case.
- b = type(a)(a, dtype=a.dtype)
- tm.assert_equal(a._data, b._data)
- def test_to_flat_index(self, index_flat):
- # 22866
- index = index_flat
- result = index.to_flat_index()
- tm.assert_index_equal(result, index)
- def test_set_name_methods(self, index_flat):
- # MultiIndex tested separately
- index = index_flat
- new_name = "This is the new name for this index"
- original_name = index.name
- new_ind = index.set_names([new_name])
- assert new_ind.name == new_name
- assert index.name == original_name
- res = index.rename(new_name, inplace=True)
- # should return None
- assert res is None
- assert index.name == new_name
- assert index.names == [new_name]
- # FIXME: dont leave commented-out
- # with pytest.raises(TypeError, match="list-like"):
- # # should still fail even if it would be the right length
- # ind.set_names("a")
- with pytest.raises(ValueError, match="Level must be None"):
- index.set_names("a", level=0)
- # rename in place just leaves tuples and other containers alone
- name = ("A", "B")
- index.rename(name, inplace=True)
- assert index.name == name
- assert index.names == [name]
- def test_copy_and_deepcopy(self, index_flat):
- index = index_flat
- for func in (copy, deepcopy):
- idx_copy = func(index)
- assert idx_copy is not index
- assert idx_copy.equals(index)
- new_copy = index.copy(deep=True, name="banana")
- assert new_copy.name == "banana"
- def test_copy_name(self, index_flat):
- # GH#12309: Check that the "name" argument
- # passed at initialization is honored.
- index = index_flat
- first = type(index)(index, copy=True, name="mario")
- second = type(first)(first, copy=False)
- # Even though "copy=False", we want a new object.
- assert first is not second
- tm.assert_index_equal(first, second)
- # Not using tm.assert_index_equal() since names differ.
- assert index.equals(first)
- assert first.name == "mario"
- assert second.name == "mario"
- # TODO: belongs in series arithmetic tests?
- s1 = pd.Series(2, index=first)
- s2 = pd.Series(3, index=second[:-1])
- # See GH#13365
- s3 = s1 * s2
- assert s3.index.name == "mario"
- def test_copy_name2(self, index_flat):
- # GH#35592
- index = index_flat
- assert index.copy(name="mario").name == "mario"
- with pytest.raises(ValueError, match="Length of new names must be 1, got 2"):
- index.copy(name=["mario", "luigi"])
- msg = f"{type(index).__name__}.name must be a hashable type"
- with pytest.raises(TypeError, match=msg):
- index.copy(name=[["mario"]])
- def test_unique_level(self, index_flat):
- # don't test a MultiIndex here (as its tested separated)
- index = index_flat
- # GH 17896
- expected = index.drop_duplicates()
- for level in [0, index.name, None]:
- result = index.unique(level=level)
- tm.assert_index_equal(result, expected)
- msg = "Too many levels: Index has only 1 level, not 4"
- with pytest.raises(IndexError, match=msg):
- index.unique(level=3)
- msg = (
- rf"Requested level \(wrong\) does not match index name "
- rf"\({re.escape(index.name.__repr__())}\)"
- )
- with pytest.raises(KeyError, match=msg):
- index.unique(level="wrong")
- def test_unique(self, index_flat):
- # MultiIndex tested separately
- index = index_flat
- if not len(index):
- pytest.skip("Skip check for empty Index and MultiIndex")
- idx = index[[0] * 5]
- idx_unique = index[[0]]
- # We test against `idx_unique`, so first we make sure it's unique
- # and doesn't contain nans.
- assert idx_unique.is_unique is True
- try:
- assert idx_unique.hasnans is False
- except NotImplementedError:
- pass
- result = idx.unique()
- tm.assert_index_equal(result, idx_unique)
- # nans:
- if not index._can_hold_na:
- pytest.skip("Skip na-check if index cannot hold na")
- vals = index._values[[0] * 5]
- vals[0] = np.nan
- vals_unique = vals[:2]
- idx_nan = index._shallow_copy(vals)
- idx_unique_nan = index._shallow_copy(vals_unique)
- assert idx_unique_nan.is_unique is True
- assert idx_nan.dtype == index.dtype
- assert idx_unique_nan.dtype == index.dtype
- expected = idx_unique_nan
- for pos, i in enumerate([idx_nan, idx_unique_nan]):
- result = i.unique()
- tm.assert_index_equal(result, expected)
- def test_searchsorted_monotonic(self, index_flat, request):
- # GH17271
- index = index_flat
- # not implemented for tuple searches in MultiIndex
- # or Intervals searches in IntervalIndex
- if isinstance(index, pd.IntervalIndex):
- mark = pytest.mark.xfail(
- reason="IntervalIndex.searchsorted does not support Interval arg",
- raises=NotImplementedError,
- )
- request.node.add_marker(mark)
- # nothing to test if the index is empty
- if index.empty:
- pytest.skip("Skip check for empty Index")
- value = index[0]
- # determine the expected results (handle dupes for 'right')
- expected_left, expected_right = 0, (index == value).argmin()
- if expected_right == 0:
- # all values are the same, expected_right should be length
- expected_right = len(index)
- # test _searchsorted_monotonic in all cases
- # test searchsorted only for increasing
- if index.is_monotonic_increasing:
- ssm_left = index._searchsorted_monotonic(value, side="left")
- assert expected_left == ssm_left
- ssm_right = index._searchsorted_monotonic(value, side="right")
- assert expected_right == ssm_right
- ss_left = index.searchsorted(value, side="left")
- assert expected_left == ss_left
- ss_right = index.searchsorted(value, side="right")
- assert expected_right == ss_right
- elif index.is_monotonic_decreasing:
- ssm_left = index._searchsorted_monotonic(value, side="left")
- assert expected_left == ssm_left
- ssm_right = index._searchsorted_monotonic(value, side="right")
- assert expected_right == ssm_right
- else:
- # non-monotonic should raise.
- msg = "index must be monotonic increasing or decreasing"
- with pytest.raises(ValueError, match=msg):
- index._searchsorted_monotonic(value, side="left")
- def test_drop_duplicates(self, index_flat, keep):
- # MultiIndex is tested separately
- index = index_flat
- if isinstance(index, RangeIndex):
- pytest.skip(
- "RangeIndex is tested in test_drop_duplicates_no_duplicates "
- "as it cannot hold duplicates"
- )
- if len(index) == 0:
- pytest.skip(
- "empty index is tested in test_drop_duplicates_no_duplicates "
- "as it cannot hold duplicates"
- )
- # make unique index
- holder = type(index)
- unique_values = list(set(index))
- dtype = index.dtype if is_numeric_dtype(index) else None
- unique_idx = holder(unique_values, dtype=dtype)
- # make duplicated index
- n = len(unique_idx)
- duplicated_selection = np.random.choice(n, int(n * 1.5))
- idx = holder(unique_idx.values[duplicated_selection])
- # Series.duplicated is tested separately
- expected_duplicated = (
- pd.Series(duplicated_selection).duplicated(keep=keep).values
- )
- tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated)
- # Series.drop_duplicates is tested separately
- expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep))
- tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped)
- def test_drop_duplicates_no_duplicates(self, index_flat):
- # MultiIndex is tested separately
- index = index_flat
- # make unique index
- if isinstance(index, RangeIndex):
- # RangeIndex cannot have duplicates
- unique_idx = index
- else:
- holder = type(index)
- unique_values = list(set(index))
- dtype = index.dtype if is_numeric_dtype(index) else None
- unique_idx = holder(unique_values, dtype=dtype)
- # check on unique index
- expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
- tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated)
- result_dropped = unique_idx.drop_duplicates()
- tm.assert_index_equal(result_dropped, unique_idx)
- # validate shallow copy
- assert result_dropped is not unique_idx
- def test_drop_duplicates_inplace(self, index):
- msg = r"drop_duplicates\(\) got an unexpected keyword argument"
- with pytest.raises(TypeError, match=msg):
- index.drop_duplicates(inplace=True)
- def test_has_duplicates(self, index_flat):
- # MultiIndex tested separately in:
- # tests/indexes/multi/test_unique_and_duplicates.
- index = index_flat
- holder = type(index)
- if not len(index) or isinstance(index, RangeIndex):
- # MultiIndex tested separately in:
- # tests/indexes/multi/test_unique_and_duplicates.
- # RangeIndex is unique by definition.
- pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex")
- idx = holder([index[0]] * 5)
- assert idx.is_unique is False
- assert idx.has_duplicates is True
- @pytest.mark.parametrize(
- "dtype",
- ["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"],
- )
- def test_astype_preserves_name(self, index, dtype):
- # https://github.com/pandas-dev/pandas/issues/32013
- if isinstance(index, MultiIndex):
- index.names = ["idx" + str(i) for i in range(index.nlevels)]
- else:
- index.name = "idx"
- warn = None
- if index.dtype.kind == "c" and dtype in ["float64", "int64", "uint64"]:
- # imaginary components discarded
- warn = np.ComplexWarning
- is_pyarrow_str = str(index.dtype) == "string[pyarrow]" and dtype == "category"
- try:
- # Some of these conversions cannot succeed so we use a try / except
- with tm.assert_produces_warning(
- warn,
- raise_on_extra_warnings=is_pyarrow_str,
- check_stacklevel=False,
- ):
- result = index.astype(dtype)
- except (ValueError, TypeError, NotImplementedError, SystemError):
- return
- if isinstance(index, MultiIndex):
- assert result.names == index.names
- else:
- assert result.name == index.name
- def test_hasnans_isnans(self, index_flat):
- # GH#11343, added tests for hasnans / isnans
- index = index_flat
- # cases in indices doesn't include NaN
- idx = index.copy(deep=True)
- expected = np.array([False] * len(idx), dtype=bool)
- tm.assert_numpy_array_equal(idx._isnan, expected)
- assert idx.hasnans is False
- idx = index.copy(deep=True)
- values = idx._values
- if len(index) == 0:
- return
- elif is_integer_dtype(index.dtype):
- return
- elif index.dtype == bool:
- # values[1] = np.nan below casts to True!
- return
- values[1] = np.nan
- idx = type(index)(values)
- expected = np.array([False] * len(idx), dtype=bool)
- expected[1] = True
- tm.assert_numpy_array_equal(idx._isnan, expected)
- assert idx.hasnans is True
- @pytest.mark.parametrize("na_position", [None, "middle"])
- def test_sort_values_invalid_na_position(index_with_missing, na_position):
- with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
- index_with_missing.sort_values(na_position=na_position)
- @pytest.mark.parametrize("na_position", ["first", "last"])
- def test_sort_values_with_missing(index_with_missing, na_position, request):
- # GH 35584. Test that sort_values works with missing values,
- # sort non-missing and place missing according to na_position
- if isinstance(index_with_missing, CategoricalIndex):
- request.node.add_marker(
- pytest.mark.xfail(
- reason="missing value sorting order not well-defined", strict=False
- )
- )
- missing_count = np.sum(index_with_missing.isna())
- not_na_vals = index_with_missing[index_with_missing.notna()].values
- sorted_values = np.sort(not_na_vals)
- if na_position == "first":
- sorted_values = np.concatenate([[None] * missing_count, sorted_values])
- else:
- sorted_values = np.concatenate([sorted_values, [None] * missing_count])
- # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
- expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)
- result = index_with_missing.sort_values(na_position=na_position)
- tm.assert_index_equal(result, expected)
- def test_ndarray_compat_properties(index):
- if isinstance(index, PeriodIndex) and not IS64:
- pytest.skip("Overflow")
- idx = index
- assert idx.T.equals(idx)
- assert idx.transpose().equals(idx)
- values = idx.values
- assert idx.shape == values.shape
- assert idx.ndim == values.ndim
- assert idx.size == values.size
- if not isinstance(index, (RangeIndex, MultiIndex)):
- # These two are not backed by an ndarray
- assert idx.nbytes == values.nbytes
- # test for validity
- idx.nbytes
- idx.values.nbytes
|