test_index_new.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. """
  2. Tests for the Index constructor conducting inference.
  3. """
  4. from datetime import (
  5. datetime,
  6. timedelta,
  7. )
  8. from decimal import Decimal
  9. import numpy as np
  10. import pytest
  11. from pandas import (
  12. NA,
  13. Categorical,
  14. CategoricalIndex,
  15. DatetimeIndex,
  16. Index,
  17. IntervalIndex,
  18. MultiIndex,
  19. NaT,
  20. PeriodIndex,
  21. Series,
  22. TimedeltaIndex,
  23. Timestamp,
  24. array,
  25. date_range,
  26. period_range,
  27. timedelta_range,
  28. )
  29. import pandas._testing as tm
  30. class TestIndexConstructorInference:
  31. def test_object_all_bools(self):
  32. # GH#49594 match Series behavior on ndarray[object] of all bools
  33. arr = np.array([True, False], dtype=object)
  34. res = Index(arr)
  35. assert res.dtype == object
  36. # since the point is matching Series behavior, let's double check
  37. assert Series(arr).dtype == object
  38. def test_object_all_complex(self):
  39. # GH#49594 match Series behavior on ndarray[object] of all complex
  40. arr = np.array([complex(1), complex(2)], dtype=object)
  41. res = Index(arr)
  42. assert res.dtype == object
  43. # since the point is matching Series behavior, let's double check
  44. assert Series(arr).dtype == object
  45. @pytest.mark.parametrize("val", [NaT, None, np.nan, float("nan")])
  46. def test_infer_nat(self, val):
  47. # GH#49340 all NaT/None/nan and at least 1 NaT -> datetime64[ns],
  48. # matching Series behavior
  49. values = [NaT, val]
  50. idx = Index(values)
  51. assert idx.dtype == "datetime64[ns]" and idx.isna().all()
  52. idx = Index(values[::-1])
  53. assert idx.dtype == "datetime64[ns]" and idx.isna().all()
  54. idx = Index(np.array(values, dtype=object))
  55. assert idx.dtype == "datetime64[ns]" and idx.isna().all()
  56. idx = Index(np.array(values, dtype=object)[::-1])
  57. assert idx.dtype == "datetime64[ns]" and idx.isna().all()
  58. @pytest.mark.parametrize("na_value", [None, np.nan])
  59. @pytest.mark.parametrize("vtype", [list, tuple, iter])
  60. def test_construction_list_tuples_nan(self, na_value, vtype):
  61. # GH#18505 : valid tuples containing NaN
  62. values = [(1, "two"), (3.0, na_value)]
  63. result = Index(vtype(values))
  64. expected = MultiIndex.from_tuples(values)
  65. tm.assert_index_equal(result, expected)
  66. @pytest.mark.parametrize(
  67. "dtype",
  68. [int, "int64", "int32", "int16", "int8", "uint64", "uint32", "uint16", "uint8"],
  69. )
  70. def test_constructor_int_dtype_float(self, dtype):
  71. # GH#18400
  72. expected = Index([0, 1, 2, 3], dtype=dtype)
  73. result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype)
  74. tm.assert_index_equal(result, expected)
  75. @pytest.mark.parametrize("cast_index", [True, False])
  76. @pytest.mark.parametrize(
  77. "vals", [[True, False, True], np.array([True, False, True], dtype=bool)]
  78. )
  79. def test_constructor_dtypes_to_object(self, cast_index, vals):
  80. if cast_index:
  81. index = Index(vals, dtype=bool)
  82. else:
  83. index = Index(vals)
  84. assert type(index) is Index
  85. assert index.dtype == bool
  86. def test_constructor_categorical_to_object(self):
  87. # GH#32167 Categorical data and dtype=object should return object-dtype
  88. ci = CategoricalIndex(range(5))
  89. result = Index(ci, dtype=object)
  90. assert not isinstance(result, CategoricalIndex)
  91. def test_constructor_infer_periodindex(self):
  92. xp = period_range("2012-1-1", freq="M", periods=3)
  93. rs = Index(xp)
  94. tm.assert_index_equal(rs, xp)
  95. assert isinstance(rs, PeriodIndex)
  96. def test_from_list_of_periods(self):
  97. rng = period_range("1/1/2000", periods=20, freq="D")
  98. periods = list(rng)
  99. result = Index(periods)
  100. assert isinstance(result, PeriodIndex)
  101. @pytest.mark.parametrize("pos", [0, 1])
  102. @pytest.mark.parametrize(
  103. "klass,dtype,ctor",
  104. [
  105. (DatetimeIndex, "datetime64[ns]", np.datetime64("nat")),
  106. (TimedeltaIndex, "timedelta64[ns]", np.timedelta64("nat")),
  107. ],
  108. )
  109. def test_constructor_infer_nat_dt_like(
  110. self, pos, klass, dtype, ctor, nulls_fixture, request
  111. ):
  112. if isinstance(nulls_fixture, Decimal):
  113. # We dont cast these to datetime64/timedelta64
  114. return
  115. expected = klass([NaT, NaT])
  116. assert expected.dtype == dtype
  117. data = [ctor]
  118. data.insert(pos, nulls_fixture)
  119. warn = None
  120. if nulls_fixture is NA:
  121. expected = Index([NA, NaT])
  122. mark = pytest.mark.xfail(reason="Broken with np.NaT ctor; see GH 31884")
  123. request.node.add_marker(mark)
  124. # GH#35942 numpy will emit a DeprecationWarning within the
  125. # assert_index_equal calls. Since we can't do anything
  126. # about it until GH#31884 is fixed, we suppress that warning.
  127. warn = DeprecationWarning
  128. result = Index(data)
  129. with tm.assert_produces_warning(warn):
  130. tm.assert_index_equal(result, expected)
  131. result = Index(np.array(data, dtype=object))
  132. with tm.assert_produces_warning(warn):
  133. tm.assert_index_equal(result, expected)
  134. @pytest.mark.parametrize("swap_objs", [True, False])
  135. def test_constructor_mixed_nat_objs_infers_object(self, swap_objs):
  136. # mixed np.datetime64/timedelta64 nat results in object
  137. data = [np.datetime64("nat"), np.timedelta64("nat")]
  138. if swap_objs:
  139. data = data[::-1]
  140. expected = Index(data, dtype=object)
  141. tm.assert_index_equal(Index(data), expected)
  142. tm.assert_index_equal(Index(np.array(data, dtype=object)), expected)
  143. @pytest.mark.parametrize("swap_objs", [True, False])
  144. def test_constructor_datetime_and_datetime64(self, swap_objs):
  145. data = [Timestamp(2021, 6, 8, 9, 42), np.datetime64("now")]
  146. if swap_objs:
  147. data = data[::-1]
  148. expected = DatetimeIndex(data)
  149. tm.assert_index_equal(Index(data), expected)
  150. tm.assert_index_equal(Index(np.array(data, dtype=object)), expected)
  151. class TestDtypeEnforced:
  152. # check we don't silently ignore the dtype keyword
  153. def test_constructor_object_dtype_with_ea_data(self, any_numeric_ea_dtype):
  154. # GH#45206
  155. arr = array([0], dtype=any_numeric_ea_dtype)
  156. idx = Index(arr, dtype=object)
  157. assert idx.dtype == object
  158. @pytest.mark.parametrize("dtype", [object, "float64", "uint64", "category"])
  159. def test_constructor_range_values_mismatched_dtype(self, dtype):
  160. rng = Index(range(5))
  161. result = Index(rng, dtype=dtype)
  162. assert result.dtype == dtype
  163. result = Index(range(5), dtype=dtype)
  164. assert result.dtype == dtype
  165. @pytest.mark.parametrize("dtype", [object, "float64", "uint64", "category"])
  166. def test_constructor_categorical_values_mismatched_non_ea_dtype(self, dtype):
  167. cat = Categorical([1, 2, 3])
  168. result = Index(cat, dtype=dtype)
  169. assert result.dtype == dtype
  170. def test_constructor_categorical_values_mismatched_dtype(self):
  171. dti = date_range("2016-01-01", periods=3)
  172. cat = Categorical(dti)
  173. result = Index(cat, dti.dtype)
  174. tm.assert_index_equal(result, dti)
  175. dti2 = dti.tz_localize("Asia/Tokyo")
  176. cat2 = Categorical(dti2)
  177. result = Index(cat2, dti2.dtype)
  178. tm.assert_index_equal(result, dti2)
  179. ii = IntervalIndex.from_breaks(range(5))
  180. cat3 = Categorical(ii)
  181. result = Index(cat3, dtype=ii.dtype)
  182. tm.assert_index_equal(result, ii)
  183. def test_constructor_ea_values_mismatched_categorical_dtype(self):
  184. dti = date_range("2016-01-01", periods=3)
  185. result = Index(dti, dtype="category")
  186. expected = CategoricalIndex(dti)
  187. tm.assert_index_equal(result, expected)
  188. dti2 = date_range("2016-01-01", periods=3, tz="US/Pacific")
  189. result = Index(dti2, dtype="category")
  190. expected = CategoricalIndex(dti2)
  191. tm.assert_index_equal(result, expected)
  192. def test_constructor_period_values_mismatched_dtype(self):
  193. pi = period_range("2016-01-01", periods=3, freq="D")
  194. result = Index(pi, dtype="category")
  195. expected = CategoricalIndex(pi)
  196. tm.assert_index_equal(result, expected)
  197. def test_constructor_timedelta64_values_mismatched_dtype(self):
  198. # check we don't silently ignore the dtype keyword
  199. tdi = timedelta_range("4 Days", periods=5)
  200. result = Index(tdi, dtype="category")
  201. expected = CategoricalIndex(tdi)
  202. tm.assert_index_equal(result, expected)
  203. def test_constructor_interval_values_mismatched_dtype(self):
  204. dti = date_range("2016-01-01", periods=3)
  205. ii = IntervalIndex.from_breaks(dti)
  206. result = Index(ii, dtype="category")
  207. expected = CategoricalIndex(ii)
  208. tm.assert_index_equal(result, expected)
  209. def test_constructor_datetime64_values_mismatched_period_dtype(self):
  210. dti = date_range("2016-01-01", periods=3)
  211. result = Index(dti, dtype="Period[D]")
  212. expected = dti.to_period("D")
  213. tm.assert_index_equal(result, expected)
  214. @pytest.mark.parametrize("dtype", ["int64", "uint64"])
  215. def test_constructor_int_dtype_nan_raises(self, dtype):
  216. # see GH#15187
  217. data = [np.nan]
  218. msg = "cannot convert"
  219. with pytest.raises(ValueError, match=msg):
  220. Index(data, dtype=dtype)
  221. @pytest.mark.parametrize(
  222. "vals",
  223. [
  224. [1, 2, 3],
  225. np.array([1, 2, 3]),
  226. np.array([1, 2, 3], dtype=int),
  227. # below should coerce
  228. [1.0, 2.0, 3.0],
  229. np.array([1.0, 2.0, 3.0], dtype=float),
  230. ],
  231. )
  232. def test_constructor_dtypes_to_int(self, vals, any_int_numpy_dtype):
  233. dtype = any_int_numpy_dtype
  234. index = Index(vals, dtype=dtype)
  235. assert index.dtype == dtype
  236. @pytest.mark.parametrize(
  237. "vals",
  238. [
  239. [1, 2, 3],
  240. [1.0, 2.0, 3.0],
  241. np.array([1.0, 2.0, 3.0]),
  242. np.array([1, 2, 3], dtype=int),
  243. np.array([1.0, 2.0, 3.0], dtype=float),
  244. ],
  245. )
  246. def test_constructor_dtypes_to_float(self, vals, float_numpy_dtype):
  247. dtype = float_numpy_dtype
  248. index = Index(vals, dtype=dtype)
  249. assert index.dtype == dtype
  250. @pytest.mark.parametrize(
  251. "vals",
  252. [
  253. [1, 2, 3],
  254. np.array([1, 2, 3], dtype=int),
  255. np.array(["2011-01-01", "2011-01-02"], dtype="datetime64[ns]"),
  256. [datetime(2011, 1, 1), datetime(2011, 1, 2)],
  257. ],
  258. )
  259. def test_constructor_dtypes_to_categorical(self, vals):
  260. index = Index(vals, dtype="category")
  261. assert isinstance(index, CategoricalIndex)
  262. @pytest.mark.parametrize("cast_index", [True, False])
  263. @pytest.mark.parametrize(
  264. "vals",
  265. [
  266. Index(np.array([np.datetime64("2011-01-01"), np.datetime64("2011-01-02")])),
  267. Index([datetime(2011, 1, 1), datetime(2011, 1, 2)]),
  268. ],
  269. )
  270. def test_constructor_dtypes_to_datetime(self, cast_index, vals):
  271. if cast_index:
  272. index = Index(vals, dtype=object)
  273. assert isinstance(index, Index)
  274. assert index.dtype == object
  275. else:
  276. index = Index(vals)
  277. assert isinstance(index, DatetimeIndex)
  278. @pytest.mark.parametrize("cast_index", [True, False])
  279. @pytest.mark.parametrize(
  280. "vals",
  281. [
  282. np.array([np.timedelta64(1, "D"), np.timedelta64(1, "D")]),
  283. [timedelta(1), timedelta(1)],
  284. ],
  285. )
  286. def test_constructor_dtypes_to_timedelta(self, cast_index, vals):
  287. if cast_index:
  288. index = Index(vals, dtype=object)
  289. assert isinstance(index, Index)
  290. assert index.dtype == object
  291. else:
  292. index = Index(vals)
  293. assert isinstance(index, TimedeltaIndex)
  294. class TestIndexConstructorUnwrapping:
  295. # Test passing different arraylike values to pd.Index
  296. @pytest.mark.parametrize("klass", [Index, DatetimeIndex])
  297. def test_constructor_from_series_dt64(self, klass):
  298. stamps = [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")]
  299. expected = DatetimeIndex(stamps)
  300. ser = Series(stamps)
  301. result = klass(ser)
  302. tm.assert_index_equal(result, expected)
  303. def test_constructor_no_pandas_array(self):
  304. ser = Series([1, 2, 3])
  305. result = Index(ser.array)
  306. expected = Index([1, 2, 3])
  307. tm.assert_index_equal(result, expected)
  308. @pytest.mark.parametrize(
  309. "array",
  310. [
  311. np.arange(5),
  312. np.array(["a", "b", "c"]),
  313. date_range("2000-01-01", periods=3).values,
  314. ],
  315. )
  316. def test_constructor_ndarray_like(self, array):
  317. # GH#5460#issuecomment-44474502
  318. # it should be possible to convert any object that satisfies the numpy
  319. # ndarray interface directly into an Index
  320. class ArrayLike:
  321. def __init__(self, array) -> None:
  322. self.array = array
  323. def __array__(self, dtype=None) -> np.ndarray:
  324. return self.array
  325. expected = Index(array)
  326. result = Index(ArrayLike(array))
  327. tm.assert_index_equal(result, expected)
  328. class TestIndexConstructionErrors:
  329. def test_constructor_overflow_int64(self):
  330. # see GH#15832
  331. msg = (
  332. "The elements provided in the data cannot "
  333. "all be casted to the dtype int64"
  334. )
  335. with pytest.raises(OverflowError, match=msg):
  336. Index([np.iinfo(np.uint64).max - 1], dtype="int64")