test_constructors.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. from functools import partial
  2. import numpy as np
  3. import pytest
  4. from pandas.core.dtypes.common import is_categorical_dtype
  5. from pandas.core.dtypes.dtypes import IntervalDtype
  6. from pandas import (
  7. Categorical,
  8. CategoricalIndex,
  9. Index,
  10. Interval,
  11. IntervalIndex,
  12. date_range,
  13. notna,
  14. period_range,
  15. timedelta_range,
  16. )
  17. import pandas._testing as tm
  18. from pandas.core.arrays import IntervalArray
  19. import pandas.core.common as com
  20. @pytest.fixture(params=[None, "foo"])
  21. def name(request):
  22. return request.param
  23. class ConstructorTests:
  24. """
  25. Common tests for all variations of IntervalIndex construction. Input data
  26. to be supplied in breaks format, then converted by the subclass method
  27. get_kwargs_from_breaks to the expected format.
  28. """
  29. @pytest.fixture(
  30. params=[
  31. ([3, 14, 15, 92, 653], np.int64),
  32. (np.arange(10, dtype="int64"), np.int64),
  33. (Index(np.arange(-10, 11, dtype=np.int64)), np.int64),
  34. (Index(np.arange(10, 31, dtype=np.uint64)), np.uint64),
  35. (Index(np.arange(20, 30, 0.5), dtype=np.float64), np.float64),
  36. (date_range("20180101", periods=10), "<M8[ns]"),
  37. (
  38. date_range("20180101", periods=10, tz="US/Eastern"),
  39. "datetime64[ns, US/Eastern]",
  40. ),
  41. (timedelta_range("1 day", periods=10), "<m8[ns]"),
  42. ]
  43. )
  44. def breaks_and_expected_subtype(self, request):
  45. return request.param
  46. def test_constructor(self, constructor, breaks_and_expected_subtype, closed, name):
  47. breaks, expected_subtype = breaks_and_expected_subtype
  48. result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
  49. result = constructor(closed=closed, name=name, **result_kwargs)
  50. assert result.closed == closed
  51. assert result.name == name
  52. assert result.dtype.subtype == expected_subtype
  53. tm.assert_index_equal(result.left, Index(breaks[:-1], dtype=expected_subtype))
  54. tm.assert_index_equal(result.right, Index(breaks[1:], dtype=expected_subtype))
  55. @pytest.mark.parametrize(
  56. "breaks, subtype",
  57. [
  58. (Index([0, 1, 2, 3, 4], dtype=np.int64), "float64"),
  59. (Index([0, 1, 2, 3, 4], dtype=np.int64), "datetime64[ns]"),
  60. (Index([0, 1, 2, 3, 4], dtype=np.int64), "timedelta64[ns]"),
  61. (Index([0, 1, 2, 3, 4], dtype=np.float64), "int64"),
  62. (date_range("2017-01-01", periods=5), "int64"),
  63. (timedelta_range("1 day", periods=5), "int64"),
  64. ],
  65. )
  66. def test_constructor_dtype(self, constructor, breaks, subtype):
  67. # GH 19262: conversion via dtype parameter
  68. expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
  69. expected = constructor(**expected_kwargs)
  70. result_kwargs = self.get_kwargs_from_breaks(breaks)
  71. iv_dtype = IntervalDtype(subtype, "right")
  72. for dtype in (iv_dtype, str(iv_dtype)):
  73. result = constructor(dtype=dtype, **result_kwargs)
  74. tm.assert_index_equal(result, expected)
  75. @pytest.mark.parametrize(
  76. "breaks",
  77. [
  78. Index([0, 1, 2, 3, 4], dtype=np.int64),
  79. Index([0, 1, 2, 3, 4], dtype=np.uint64),
  80. Index([0, 1, 2, 3, 4], dtype=np.float64),
  81. date_range("2017-01-01", periods=5),
  82. timedelta_range("1 day", periods=5),
  83. ],
  84. )
  85. def test_constructor_pass_closed(self, constructor, breaks):
  86. # not passing closed to IntervalDtype, but to IntervalArray constructor
  87. iv_dtype = IntervalDtype(breaks.dtype)
  88. result_kwargs = self.get_kwargs_from_breaks(breaks)
  89. for dtype in (iv_dtype, str(iv_dtype)):
  90. with tm.assert_produces_warning(None):
  91. result = constructor(dtype=dtype, closed="left", **result_kwargs)
  92. assert result.dtype.closed == "left"
  93. @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
  94. def test_constructor_nan(self, constructor, breaks, closed):
  95. # GH 18421
  96. result_kwargs = self.get_kwargs_from_breaks(breaks)
  97. result = constructor(closed=closed, **result_kwargs)
  98. expected_subtype = np.float64
  99. expected_values = np.array(breaks[:-1], dtype=object)
  100. assert result.closed == closed
  101. assert result.dtype.subtype == expected_subtype
  102. tm.assert_numpy_array_equal(np.array(result), expected_values)
  103. @pytest.mark.parametrize(
  104. "breaks",
  105. [
  106. [],
  107. np.array([], dtype="int64"),
  108. np.array([], dtype="uint64"),
  109. np.array([], dtype="float64"),
  110. np.array([], dtype="datetime64[ns]"),
  111. np.array([], dtype="timedelta64[ns]"),
  112. ],
  113. )
  114. def test_constructor_empty(self, constructor, breaks, closed):
  115. # GH 18421
  116. result_kwargs = self.get_kwargs_from_breaks(breaks)
  117. result = constructor(closed=closed, **result_kwargs)
  118. expected_values = np.array([], dtype=object)
  119. expected_subtype = getattr(breaks, "dtype", np.int64)
  120. assert result.empty
  121. assert result.closed == closed
  122. assert result.dtype.subtype == expected_subtype
  123. tm.assert_numpy_array_equal(np.array(result), expected_values)
  124. @pytest.mark.parametrize(
  125. "breaks",
  126. [
  127. tuple("0123456789"),
  128. list("abcdefghij"),
  129. np.array(list("abcdefghij"), dtype=object),
  130. np.array(list("abcdefghij"), dtype="<U1"),
  131. ],
  132. )
  133. def test_constructor_string(self, constructor, breaks):
  134. # GH 19016
  135. msg = (
  136. "category, object, and string subtypes are not supported "
  137. "for IntervalIndex"
  138. )
  139. with pytest.raises(TypeError, match=msg):
  140. constructor(**self.get_kwargs_from_breaks(breaks))
  141. @pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
  142. def test_constructor_categorical_valid(self, constructor, cat_constructor):
  143. # GH 21243/21253
  144. breaks = np.arange(10, dtype="int64")
  145. expected = IntervalIndex.from_breaks(breaks)
  146. cat_breaks = cat_constructor(breaks)
  147. result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
  148. result = constructor(**result_kwargs)
  149. tm.assert_index_equal(result, expected)
  150. def test_generic_errors(self, constructor):
  151. # filler input data to be used when supplying invalid kwargs
  152. filler = self.get_kwargs_from_breaks(range(10))
  153. # invalid closed
  154. msg = "closed must be one of 'right', 'left', 'both', 'neither'"
  155. with pytest.raises(ValueError, match=msg):
  156. constructor(closed="invalid", **filler)
  157. # unsupported dtype
  158. msg = "dtype must be an IntervalDtype, got int64"
  159. with pytest.raises(TypeError, match=msg):
  160. constructor(dtype="int64", **filler)
  161. # invalid dtype
  162. msg = "data type [\"']invalid[\"'] not understood"
  163. with pytest.raises(TypeError, match=msg):
  164. constructor(dtype="invalid", **filler)
  165. # no point in nesting periods in an IntervalIndex
  166. periods = period_range("2000-01-01", periods=10)
  167. periods_kwargs = self.get_kwargs_from_breaks(periods)
  168. msg = "Period dtypes are not supported, use a PeriodIndex instead"
  169. with pytest.raises(ValueError, match=msg):
  170. constructor(**periods_kwargs)
  171. # decreasing values
  172. decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
  173. msg = "left side of interval must be <= right side"
  174. with pytest.raises(ValueError, match=msg):
  175. constructor(**decreasing_kwargs)
  176. class TestFromArrays(ConstructorTests):
  177. """Tests specific to IntervalIndex.from_arrays"""
  178. @pytest.fixture
  179. def constructor(self):
  180. return IntervalIndex.from_arrays
  181. def get_kwargs_from_breaks(self, breaks, closed="right"):
  182. """
  183. converts intervals in breaks format to a dictionary of kwargs to
  184. specific to the format expected by IntervalIndex.from_arrays
  185. """
  186. return {"left": breaks[:-1], "right": breaks[1:]}
  187. def test_constructor_errors(self):
  188. # GH 19016: categorical data
  189. data = Categorical(list("01234abcde"), ordered=True)
  190. msg = (
  191. "category, object, and string subtypes are not supported "
  192. "for IntervalIndex"
  193. )
  194. with pytest.raises(TypeError, match=msg):
  195. IntervalIndex.from_arrays(data[:-1], data[1:])
  196. # unequal length
  197. left = [0, 1, 2]
  198. right = [2, 3]
  199. msg = "left and right must have the same length"
  200. with pytest.raises(ValueError, match=msg):
  201. IntervalIndex.from_arrays(left, right)
  202. @pytest.mark.parametrize(
  203. "left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)]
  204. )
  205. def test_mixed_float_int(self, left_subtype, right_subtype):
  206. """mixed int/float left/right results in float for both sides"""
  207. left = np.arange(9, dtype=left_subtype)
  208. right = np.arange(1, 10, dtype=right_subtype)
  209. result = IntervalIndex.from_arrays(left, right)
  210. expected_left = Index(left, dtype=np.float64)
  211. expected_right = Index(right, dtype=np.float64)
  212. expected_subtype = np.float64
  213. tm.assert_index_equal(result.left, expected_left)
  214. tm.assert_index_equal(result.right, expected_right)
  215. assert result.dtype.subtype == expected_subtype
  216. class TestFromBreaks(ConstructorTests):
  217. """Tests specific to IntervalIndex.from_breaks"""
  218. @pytest.fixture
  219. def constructor(self):
  220. return IntervalIndex.from_breaks
  221. def get_kwargs_from_breaks(self, breaks, closed="right"):
  222. """
  223. converts intervals in breaks format to a dictionary of kwargs to
  224. specific to the format expected by IntervalIndex.from_breaks
  225. """
  226. return {"breaks": breaks}
  227. def test_constructor_errors(self):
  228. # GH 19016: categorical data
  229. data = Categorical(list("01234abcde"), ordered=True)
  230. msg = (
  231. "category, object, and string subtypes are not supported "
  232. "for IntervalIndex"
  233. )
  234. with pytest.raises(TypeError, match=msg):
  235. IntervalIndex.from_breaks(data)
  236. def test_length_one(self):
  237. """breaks of length one produce an empty IntervalIndex"""
  238. breaks = [0]
  239. result = IntervalIndex.from_breaks(breaks)
  240. expected = IntervalIndex.from_breaks([])
  241. tm.assert_index_equal(result, expected)
  242. def test_left_right_dont_share_data(self):
  243. # GH#36310
  244. breaks = np.arange(5)
  245. result = IntervalIndex.from_breaks(breaks)._data
  246. assert result._left.base is None or result._left.base is not result._right.base
  247. class TestFromTuples(ConstructorTests):
  248. """Tests specific to IntervalIndex.from_tuples"""
  249. @pytest.fixture
  250. def constructor(self):
  251. return IntervalIndex.from_tuples
  252. def get_kwargs_from_breaks(self, breaks, closed="right"):
  253. """
  254. converts intervals in breaks format to a dictionary of kwargs to
  255. specific to the format expected by IntervalIndex.from_tuples
  256. """
  257. if tm.is_unsigned_integer_dtype(breaks):
  258. pytest.skip(f"{breaks.dtype} not relevant IntervalIndex.from_tuples tests")
  259. if len(breaks) == 0:
  260. return {"data": breaks}
  261. tuples = list(zip(breaks[:-1], breaks[1:]))
  262. if isinstance(breaks, (list, tuple)):
  263. return {"data": tuples}
  264. elif is_categorical_dtype(breaks):
  265. return {"data": breaks._constructor(tuples)}
  266. return {"data": com.asarray_tuplesafe(tuples)}
  267. def test_constructor_errors(self):
  268. # non-tuple
  269. tuples = [(0, 1), 2, (3, 4)]
  270. msg = "IntervalIndex.from_tuples received an invalid item, 2"
  271. with pytest.raises(TypeError, match=msg.format(t=tuples)):
  272. IntervalIndex.from_tuples(tuples)
  273. # too few/many items
  274. tuples = [(0, 1), (2,), (3, 4)]
  275. msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}"
  276. with pytest.raises(ValueError, match=msg.format(t=tuples)):
  277. IntervalIndex.from_tuples(tuples)
  278. tuples = [(0, 1), (2, 3, 4), (5, 6)]
  279. with pytest.raises(ValueError, match=msg.format(t=tuples)):
  280. IntervalIndex.from_tuples(tuples)
  281. def test_na_tuples(self):
  282. # tuple (NA, NA) evaluates the same as NA as an element
  283. na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
  284. idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
  285. idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
  286. tm.assert_index_equal(idx_na_tuple, idx_na_element)
  287. class TestClassConstructors(ConstructorTests):
  288. """Tests specific to the IntervalIndex/Index constructors"""
  289. @pytest.fixture(
  290. params=[IntervalIndex, partial(Index, dtype="interval")],
  291. ids=["IntervalIndex", "Index"],
  292. )
  293. def klass(self, request):
  294. # We use a separate fixture here to include Index.__new__ with dtype kwarg
  295. return request.param
  296. @pytest.fixture
  297. def constructor(self):
  298. return IntervalIndex
  299. def get_kwargs_from_breaks(self, breaks, closed="right"):
  300. """
  301. converts intervals in breaks format to a dictionary of kwargs to
  302. specific to the format expected by the IntervalIndex/Index constructors
  303. """
  304. if tm.is_unsigned_integer_dtype(breaks):
  305. pytest.skip(f"{breaks.dtype} not relevant for class constructor tests")
  306. if len(breaks) == 0:
  307. return {"data": breaks}
  308. ivs = [
  309. Interval(left, right, closed) if notna(left) else left
  310. for left, right in zip(breaks[:-1], breaks[1:])
  311. ]
  312. if isinstance(breaks, list):
  313. return {"data": ivs}
  314. elif is_categorical_dtype(breaks):
  315. return {"data": breaks._constructor(ivs)}
  316. return {"data": np.array(ivs, dtype=object)}
  317. def test_generic_errors(self, constructor):
  318. """
  319. override the base class implementation since errors are handled
  320. differently; checks unnecessary since caught at the Interval level
  321. """
  322. def test_constructor_string(self):
  323. # GH23013
  324. # When forming the interval from breaks,
  325. # the interval of strings is already forbidden.
  326. pass
  327. def test_constructor_errors(self, klass):
  328. # mismatched closed within intervals with no constructor override
  329. ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")]
  330. msg = "intervals must all be closed on the same side"
  331. with pytest.raises(ValueError, match=msg):
  332. klass(ivs)
  333. # scalar
  334. msg = (
  335. r"(IntervalIndex|Index)\(...\) must be called with a collection of "
  336. "some kind, 5 was passed"
  337. )
  338. with pytest.raises(TypeError, match=msg):
  339. klass(5)
  340. # not an interval; dtype depends on 32bit/windows builds
  341. msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval"
  342. with pytest.raises(TypeError, match=msg):
  343. klass([0, 1])
  344. @pytest.mark.parametrize(
  345. "data, closed",
  346. [
  347. ([], "both"),
  348. ([np.nan, np.nan], "neither"),
  349. (
  350. [Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")],
  351. "left",
  352. ),
  353. (
  354. [Interval(0, 3, closed="left"), Interval(2, 5, closed="right")],
  355. "neither",
  356. ),
  357. (IntervalIndex.from_breaks(range(5), closed="both"), "right"),
  358. ],
  359. )
  360. def test_override_inferred_closed(self, constructor, data, closed):
  361. # GH 19370
  362. if isinstance(data, IntervalIndex):
  363. tuples = data.to_tuples()
  364. else:
  365. tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
  366. expected = IntervalIndex.from_tuples(tuples, closed=closed)
  367. result = constructor(data, closed=closed)
  368. tm.assert_index_equal(result, expected)
  369. @pytest.mark.parametrize(
  370. "values_constructor", [list, np.array, IntervalIndex, IntervalArray]
  371. )
  372. def test_index_object_dtype(self, values_constructor):
  373. # Index(intervals, dtype=object) is an Index (not an IntervalIndex)
  374. intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
  375. values = values_constructor(intervals)
  376. result = Index(values, dtype=object)
  377. assert type(result) is Index
  378. tm.assert_numpy_array_equal(result.values, np.array(values))
  379. def test_index_mixed_closed(self):
  380. # GH27172
  381. intervals = [
  382. Interval(0, 1, closed="left"),
  383. Interval(1, 2, closed="right"),
  384. Interval(2, 3, closed="neither"),
  385. Interval(3, 4, closed="both"),
  386. ]
  387. result = Index(intervals)
  388. expected = Index(intervals, dtype=object)
  389. tm.assert_index_equal(result, expected)
  390. def test_dtype_closed_mismatch():
  391. # GH#38394 closed specified in both dtype and IntervalIndex constructor
  392. dtype = IntervalDtype(np.int64, "left")
  393. msg = "closed keyword does not match dtype.closed"
  394. with pytest.raises(ValueError, match=msg):
  395. IntervalIndex([], dtype=dtype, closed="neither")
  396. with pytest.raises(ValueError, match=msg):
  397. IntervalArray([], dtype=dtype, closed="neither")