123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400 |
- """
- Tests for the Index constructor conducting inference.
- """
- from datetime import (
- datetime,
- timedelta,
- )
- from decimal import Decimal
- import numpy as np
- import pytest
- from pandas import (
- NA,
- Categorical,
- CategoricalIndex,
- DatetimeIndex,
- Index,
- IntervalIndex,
- MultiIndex,
- NaT,
- PeriodIndex,
- Series,
- TimedeltaIndex,
- Timestamp,
- array,
- date_range,
- period_range,
- timedelta_range,
- )
- import pandas._testing as tm
- class TestIndexConstructorInference:
- def test_object_all_bools(self):
- # GH#49594 match Series behavior on ndarray[object] of all bools
- arr = np.array([True, False], dtype=object)
- res = Index(arr)
- assert res.dtype == object
- # since the point is matching Series behavior, let's double check
- assert Series(arr).dtype == object
- def test_object_all_complex(self):
- # GH#49594 match Series behavior on ndarray[object] of all complex
- arr = np.array([complex(1), complex(2)], dtype=object)
- res = Index(arr)
- assert res.dtype == object
- # since the point is matching Series behavior, let's double check
- assert Series(arr).dtype == object
- @pytest.mark.parametrize("val", [NaT, None, np.nan, float("nan")])
- def test_infer_nat(self, val):
- # GH#49340 all NaT/None/nan and at least 1 NaT -> datetime64[ns],
- # matching Series behavior
- values = [NaT, val]
- idx = Index(values)
- assert idx.dtype == "datetime64[ns]" and idx.isna().all()
- idx = Index(values[::-1])
- assert idx.dtype == "datetime64[ns]" and idx.isna().all()
- idx = Index(np.array(values, dtype=object))
- assert idx.dtype == "datetime64[ns]" and idx.isna().all()
- idx = Index(np.array(values, dtype=object)[::-1])
- assert idx.dtype == "datetime64[ns]" and idx.isna().all()
- @pytest.mark.parametrize("na_value", [None, np.nan])
- @pytest.mark.parametrize("vtype", [list, tuple, iter])
- def test_construction_list_tuples_nan(self, na_value, vtype):
- # GH#18505 : valid tuples containing NaN
- values = [(1, "two"), (3.0, na_value)]
- result = Index(vtype(values))
- expected = MultiIndex.from_tuples(values)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "dtype",
- [int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"],
- )
- def test_constructor_int_dtype_float(self, dtype):
- # GH#18400
- expected = Index([0, 1, 2, 3], dtype=dtype)
- result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("cast_index", [True, False])
- @pytest.mark.parametrize(
- "vals", [[True, False, True], np.array([True, False, True], dtype=bool)]
- )
- def test_constructor_dtypes_to_object(self, cast_index, vals):
- if cast_index:
- index = Index(vals, dtype=bool)
- else:
- index = Index(vals)
- assert type(index) is Index
- assert index.dtype == bool
- def test_constructor_categorical_to_object(self):
- # GH#32167 Categorical data and dtype=object should return object-dtype
- ci = CategoricalIndex(range(5))
- result = Index(ci, dtype=object)
- assert not isinstance(result, CategoricalIndex)
- def test_constructor_infer_periodindex(self):
- xp = period_range("2012-1-1", freq="M", periods=3)
- rs = Index(xp)
- tm.assert_index_equal(rs, xp)
- assert isinstance(rs, PeriodIndex)
- def test_from_list_of_periods(self):
- rng = period_range("1/1/2000", periods=20, freq="D")
- periods = list(rng)
- result = Index(periods)
- assert isinstance(result, PeriodIndex)
- @pytest.mark.parametrize("pos", [0, 1])
- @pytest.mark.parametrize(
- "klass,dtype,ctor",
- [
- (DatetimeIndex, "datetime64[ns]", np.datetime64("nat")),
- (TimedeltaIndex, "timedelta64[ns]", np.timedelta64("nat")),
- ],
- )
- def test_constructor_infer_nat_dt_like(
- self, pos, klass, dtype, ctor, nulls_fixture, request
- ):
- if isinstance(nulls_fixture, Decimal):
- # We dont cast these to datetime64/timedelta64
- return
- expected = klass([NaT, NaT])
- assert expected.dtype == dtype
- data = [ctor]
- data.insert(pos, nulls_fixture)
- warn = None
- if nulls_fixture is NA:
- expected = Index([NA, NaT])
- mark = pytest.mark.xfail(reason="Broken with np.NaT ctor; see GH 31884")
- request.node.add_marker(mark)
- # GH#35942 numpy will emit a DeprecationWarning within the
- # assert_index_equal calls. Since we can't do anything
- # about it until GH#31884 is fixed, we suppress that warning.
- warn = DeprecationWarning
- result = Index(data)
- with tm.assert_produces_warning(warn):
- tm.assert_index_equal(result, expected)
- result = Index(np.array(data, dtype=object))
- with tm.assert_produces_warning(warn):
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("swap_objs", [True, False])
- def test_constructor_mixed_nat_objs_infers_object(self, swap_objs):
- # mixed np.datetime64/timedelta64 nat results in object
- data = [np.datetime64("nat"), np.timedelta64("nat")]
- if swap_objs:
- data = data[::-1]
- expected = Index(data, dtype=object)
- tm.assert_index_equal(Index(data), expected)
- tm.assert_index_equal(Index(np.array(data, dtype=object)), expected)
- @pytest.mark.parametrize("swap_objs", [True, False])
- def test_constructor_datetime_and_datetime64(self, swap_objs):
- data = [Timestamp(2021, 6, 8, 9, 42), np.datetime64("now")]
- if swap_objs:
- data = data[::-1]
- expected = DatetimeIndex(data)
- tm.assert_index_equal(Index(data), expected)
- tm.assert_index_equal(Index(np.array(data, dtype=object)), expected)
- class TestDtypeEnforced:
- # check we don't silently ignore the dtype keyword
- def test_constructor_object_dtype_with_ea_data(self, any_numeric_ea_dtype):
- # GH#45206
- arr = array([0], dtype=any_numeric_ea_dtype)
- idx = Index(arr, dtype=object)
- assert idx.dtype == object
- @pytest.mark.parametrize("dtype", [object, "float64", "uint64", "category"])
- def test_constructor_range_values_mismatched_dtype(self, dtype):
- rng = Index(range(5))
- result = Index(rng, dtype=dtype)
- assert result.dtype == dtype
- result = Index(range(5), dtype=dtype)
- assert result.dtype == dtype
- @pytest.mark.parametrize("dtype", [object, "float64", "uint64", "category"])
- def test_constructor_categorical_values_mismatched_non_ea_dtype(self, dtype):
- cat = Categorical([1, 2, 3])
- result = Index(cat, dtype=dtype)
- assert result.dtype == dtype
- def test_constructor_categorical_values_mismatched_dtype(self):
- dti = date_range("2016-01-01", periods=3)
- cat = Categorical(dti)
- result = Index(cat, dti.dtype)
- tm.assert_index_equal(result, dti)
- dti2 = dti.tz_localize("Asia/Tokyo")
- cat2 = Categorical(dti2)
- result = Index(cat2, dti2.dtype)
- tm.assert_index_equal(result, dti2)
- ii = IntervalIndex.from_breaks(range(5))
- cat3 = Categorical(ii)
- result = Index(cat3, dtype=ii.dtype)
- tm.assert_index_equal(result, ii)
- def test_constructor_ea_values_mismatched_categorical_dtype(self):
- dti = date_range("2016-01-01", periods=3)
- result = Index(dti, dtype="category")
- expected = CategoricalIndex(dti)
- tm.assert_index_equal(result, expected)
- dti2 = date_range("2016-01-01", periods=3, tz="US/Pacific")
- result = Index(dti2, dtype="category")
- expected = CategoricalIndex(dti2)
- tm.assert_index_equal(result, expected)
- def test_constructor_period_values_mismatched_dtype(self):
- pi = period_range("2016-01-01", periods=3, freq="D")
- result = Index(pi, dtype="category")
- expected = CategoricalIndex(pi)
- tm.assert_index_equal(result, expected)
- def test_constructor_timedelta64_values_mismatched_dtype(self):
- # check we don't silently ignore the dtype keyword
- tdi = timedelta_range("4 Days", periods=5)
- result = Index(tdi, dtype="category")
- expected = CategoricalIndex(tdi)
- tm.assert_index_equal(result, expected)
- def test_constructor_interval_values_mismatched_dtype(self):
- dti = date_range("2016-01-01", periods=3)
- ii = IntervalIndex.from_breaks(dti)
- result = Index(ii, dtype="category")
- expected = CategoricalIndex(ii)
- tm.assert_index_equal(result, expected)
- def test_constructor_datetime64_values_mismatched_period_dtype(self):
- dti = date_range("2016-01-01", periods=3)
- result = Index(dti, dtype="Period[D]")
- expected = dti.to_period("D")
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("dtype", ["int64", "uint64"])
- def test_constructor_int_dtype_nan_raises(self, dtype):
- # see GH#15187
- data = [np.nan]
- msg = "cannot convert"
- with pytest.raises(ValueError, match=msg):
- Index(data, dtype=dtype)
- @pytest.mark.parametrize(
- "vals",
- [
- [1, 2, 3],
- np.array([1, 2, 3]),
- np.array([1, 2, 3], dtype=int),
- # below should coerce
- [1.0, 2.0, 3.0],
- np.array([1.0, 2.0, 3.0], dtype=float),
- ],
- )
- def test_constructor_dtypes_to_int(self, vals, any_int_numpy_dtype):
- dtype = any_int_numpy_dtype
- index = Index(vals, dtype=dtype)
- assert index.dtype == dtype
- @pytest.mark.parametrize(
- "vals",
- [
- [1, 2, 3],
- [1.0, 2.0, 3.0],
- np.array([1.0, 2.0, 3.0]),
- np.array([1, 2, 3], dtype=int),
- np.array([1.0, 2.0, 3.0], dtype=float),
- ],
- )
- def test_constructor_dtypes_to_float(self, vals, float_numpy_dtype):
- dtype = float_numpy_dtype
- index = Index(vals, dtype=dtype)
- assert index.dtype == dtype
- @pytest.mark.parametrize(
- "vals",
- [
- [1, 2, 3],
- np.array([1, 2, 3], dtype=int),
- np.array(["2011-01-01", "2011-01-02"], dtype="datetime64[ns]"),
- [datetime(2011, 1, 1), datetime(2011, 1, 2)],
- ],
- )
- def test_constructor_dtypes_to_categorical(self, vals):
- index = Index(vals, dtype="category")
- assert isinstance(index, CategoricalIndex)
- @pytest.mark.parametrize("cast_index", [True, False])
- @pytest.mark.parametrize(
- "vals",
- [
- Index(np.array([np.datetime64("2011-01-01"), np.datetime64("2011-01-02")])),
- Index([datetime(2011, 1, 1), datetime(2011, 1, 2)]),
- ],
- )
- def test_constructor_dtypes_to_datetime(self, cast_index, vals):
- if cast_index:
- index = Index(vals, dtype=object)
- assert isinstance(index, Index)
- assert index.dtype == object
- else:
- index = Index(vals)
- assert isinstance(index, DatetimeIndex)
- @pytest.mark.parametrize("cast_index", [True, False])
- @pytest.mark.parametrize(
- "vals",
- [
- np.array([np.timedelta64(1, "D"), np.timedelta64(1, "D")]),
- [timedelta(1), timedelta(1)],
- ],
- )
- def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
- if cast_index:
- index = Index(vals, dtype=object)
- assert isinstance(index, Index)
- assert index.dtype == object
- else:
- index = Index(vals)
- assert isinstance(index, TimedeltaIndex)
- class TestIndexConstructorUnwrapping:
- # Test passing different arraylike values to pd.Index
- @pytest.mark.parametrize("klass", [Index, DatetimeIndex])
- def test_constructor_from_series_dt64(self, klass):
- stamps = [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")]
- expected = DatetimeIndex(stamps)
- ser = Series(stamps)
- result = klass(ser)
- tm.assert_index_equal(result, expected)
- def test_constructor_no_pandas_array(self):
- ser = Series([1, 2, 3])
- result = Index(ser.array)
- expected = Index([1, 2, 3])
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "array",
- [
- np.arange(5),
- np.array(["a", "b", "c"]),
- date_range("2000-01-01", periods=3).values,
- ],
- )
- def test_constructor_ndarray_like(self, array):
- # GH#5460#issuecomment-44474502
- # it should be possible to convert any object that satisfies the numpy
- # ndarray interface directly into an Index
- class ArrayLike:
- def __init__(self, array) -> None:
- self.array = array
- def __array__(self, dtype=None) -> np.ndarray:
- return self.array
- expected = Index(array)
- result = Index(ArrayLike(array))
- tm.assert_index_equal(result, expected)
- class TestIndexConstructionErrors:
- def test_constructor_overflow_int64(self):
- # see GH#15832
- msg = (
- "The elements provided in the data cannot "
- "all be casted to the dtype int64"
- )
- with pytest.raises(OverflowError, match=msg):
- Index([np.iinfo(np.uint64).max - 1], dtype="int64")
|