test_numeric.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import (
  5. Index,
  6. Series,
  7. )
  8. import pandas._testing as tm
  9. from pandas.tests.indexes.common import NumericBase
  10. class TestFloatNumericIndex(NumericBase):
  11. _index_cls = Index
  12. @pytest.fixture(params=[np.float64, np.float32])
  13. def dtype(self, request):
  14. return request.param
  15. @pytest.fixture
  16. def simple_index(self, dtype):
  17. values = np.arange(5, dtype=dtype)
  18. return self._index_cls(values)
  19. @pytest.fixture(
  20. params=[
  21. [1.5, 2, 3, 4, 5],
  22. [0.0, 2.5, 5.0, 7.5, 10.0],
  23. [5, 4, 3, 2, 1.5],
  24. [10.0, 7.5, 5.0, 2.5, 0.0],
  25. ],
  26. ids=["mixed", "float", "mixed_dec", "float_dec"],
  27. )
  28. def index(self, request, dtype):
  29. return self._index_cls(request.param, dtype=dtype)
  30. @pytest.fixture
  31. def mixed_index(self, dtype):
  32. return self._index_cls([1.5, 2, 3, 4, 5], dtype=dtype)
  33. @pytest.fixture
  34. def float_index(self, dtype):
  35. return self._index_cls([0.0, 2.5, 5.0, 7.5, 10.0], dtype=dtype)
  36. def test_repr_roundtrip(self, index):
  37. tm.assert_index_equal(eval(repr(index)), index, exact=True)
  38. def check_coerce(self, a, b, is_float_index=True):
  39. assert a.equals(b)
  40. tm.assert_index_equal(a, b, exact=False)
  41. if is_float_index:
  42. assert isinstance(b, self._index_cls)
  43. else:
  44. assert type(b) is Index
  45. def test_constructor_from_list_no_dtype(self):
  46. index = self._index_cls([1.5, 2.5, 3.5])
  47. assert index.dtype == np.float64
  48. def test_constructor(self, dtype):
  49. index_cls = self._index_cls
  50. # explicit construction
  51. index = index_cls([1, 2, 3, 4, 5], dtype=dtype)
  52. assert isinstance(index, index_cls)
  53. assert index.dtype == dtype
  54. expected = np.array([1, 2, 3, 4, 5], dtype=dtype)
  55. tm.assert_numpy_array_equal(index.values, expected)
  56. index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=dtype)
  57. assert isinstance(index, index_cls)
  58. assert index.dtype == dtype
  59. index = index_cls([1.0, 2, 3, 4, 5], dtype=dtype)
  60. assert isinstance(index, index_cls)
  61. assert index.dtype == dtype
  62. index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype)
  63. assert isinstance(index, index_cls)
  64. assert index.dtype == dtype
  65. index = index_cls([1.0, 2, 3, 4, 5], dtype=dtype)
  66. assert isinstance(index, index_cls)
  67. assert index.dtype == dtype
  68. index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype)
  69. assert isinstance(index, index_cls)
  70. assert index.dtype == dtype
  71. # nan handling
  72. result = index_cls([np.nan, np.nan], dtype=dtype)
  73. assert pd.isna(result.values).all()
  74. result = index_cls(np.array([np.nan]), dtype=dtype)
  75. assert pd.isna(result.values).all()
  76. def test_constructor_invalid(self):
  77. index_cls = self._index_cls
  78. cls_name = index_cls.__name__
  79. # invalid
  80. msg = (
  81. rf"{cls_name}\(\.\.\.\) must be called with a collection of "
  82. r"some kind, 0\.0 was passed"
  83. )
  84. with pytest.raises(TypeError, match=msg):
  85. index_cls(0.0)
  86. def test_constructor_coerce(self, mixed_index, float_index):
  87. self.check_coerce(mixed_index, Index([1.5, 2, 3, 4, 5]))
  88. self.check_coerce(float_index, Index(np.arange(5) * 2.5))
  89. result = Index(np.array(np.arange(5) * 2.5, dtype=object))
  90. assert result.dtype == object # as of 2.0 to match Series
  91. self.check_coerce(float_index, result.astype("float64"))
  92. def test_constructor_explicit(self, mixed_index, float_index):
  93. # these don't auto convert
  94. self.check_coerce(
  95. float_index, Index((np.arange(5) * 2.5), dtype=object), is_float_index=False
  96. )
  97. self.check_coerce(
  98. mixed_index, Index([1.5, 2, 3, 4, 5], dtype=object), is_float_index=False
  99. )
  100. def test_type_coercion_fail(self, any_int_numpy_dtype):
  101. # see gh-15832
  102. msg = "Trying to coerce float values to integers"
  103. with pytest.raises(ValueError, match=msg):
  104. Index([1, 2, 3.5], dtype=any_int_numpy_dtype)
  105. def test_equals_numeric(self):
  106. index_cls = self._index_cls
  107. idx = index_cls([1.0, 2.0])
  108. assert idx.equals(idx)
  109. assert idx.identical(idx)
  110. idx2 = index_cls([1.0, 2.0])
  111. assert idx.equals(idx2)
  112. idx = index_cls([1.0, np.nan])
  113. assert idx.equals(idx)
  114. assert idx.identical(idx)
  115. idx2 = index_cls([1.0, np.nan])
  116. assert idx.equals(idx2)
  117. @pytest.mark.parametrize(
  118. "other",
  119. (
  120. Index([1, 2], dtype=np.int64),
  121. Index([1.0, 2.0], dtype=object),
  122. Index([1, 2], dtype=object),
  123. ),
  124. )
  125. def test_equals_numeric_other_index_type(self, other):
  126. idx = self._index_cls([1.0, 2.0])
  127. assert idx.equals(other)
  128. assert other.equals(idx)
  129. @pytest.mark.parametrize(
  130. "vals",
  131. [
  132. pd.date_range("2016-01-01", periods=3),
  133. pd.timedelta_range("1 Day", periods=3),
  134. ],
  135. )
  136. def test_lookups_datetimelike_values(self, vals, dtype):
  137. # If we have datetime64 or timedelta64 values, make sure they are
  138. # wrapped correctly GH#31163
  139. ser = Series(vals, index=range(3, 6))
  140. ser.index = ser.index.astype(dtype)
  141. expected = vals[1]
  142. result = ser[4.0]
  143. assert isinstance(result, type(expected)) and result == expected
  144. result = ser[4]
  145. assert isinstance(result, type(expected)) and result == expected
  146. result = ser.loc[4.0]
  147. assert isinstance(result, type(expected)) and result == expected
  148. result = ser.loc[4]
  149. assert isinstance(result, type(expected)) and result == expected
  150. result = ser.at[4.0]
  151. assert isinstance(result, type(expected)) and result == expected
  152. # GH#31329 .at[4] should cast to 4.0, matching .loc behavior
  153. result = ser.at[4]
  154. assert isinstance(result, type(expected)) and result == expected
  155. result = ser.iloc[1]
  156. assert isinstance(result, type(expected)) and result == expected
  157. result = ser.iat[1]
  158. assert isinstance(result, type(expected)) and result == expected
  159. def test_doesnt_contain_all_the_things(self):
  160. idx = self._index_cls([np.nan])
  161. assert not idx.isin([0]).item()
  162. assert not idx.isin([1]).item()
  163. assert idx.isin([np.nan]).item()
  164. def test_nan_multiple_containment(self):
  165. index_cls = self._index_cls
  166. idx = index_cls([1.0, np.nan])
  167. tm.assert_numpy_array_equal(idx.isin([1.0]), np.array([True, False]))
  168. tm.assert_numpy_array_equal(idx.isin([2.0, np.pi]), np.array([False, False]))
  169. tm.assert_numpy_array_equal(idx.isin([np.nan]), np.array([False, True]))
  170. tm.assert_numpy_array_equal(idx.isin([1.0, np.nan]), np.array([True, True]))
  171. idx = index_cls([1.0, 2.0])
  172. tm.assert_numpy_array_equal(idx.isin([np.nan]), np.array([False, False]))
  173. def test_fillna_float64(self):
  174. index_cls = self._index_cls
  175. # GH 11343
  176. idx = Index([1.0, np.nan, 3.0], dtype=float, name="x")
  177. # can't downcast
  178. exp = Index([1.0, 0.1, 3.0], name="x")
  179. tm.assert_index_equal(idx.fillna(0.1), exp, exact=True)
  180. # downcast
  181. exp = index_cls([1.0, 2.0, 3.0], name="x")
  182. tm.assert_index_equal(idx.fillna(2), exp)
  183. # object
  184. exp = Index([1.0, "obj", 3.0], name="x")
  185. tm.assert_index_equal(idx.fillna("obj"), exp, exact=True)
  186. class NumericInt(NumericBase):
  187. _index_cls = Index
  188. def test_is_monotonic(self):
  189. index_cls = self._index_cls
  190. index = index_cls([1, 2, 3, 4])
  191. assert index.is_monotonic_increasing is True
  192. assert index.is_monotonic_increasing is True
  193. assert index._is_strictly_monotonic_increasing is True
  194. assert index.is_monotonic_decreasing is False
  195. assert index._is_strictly_monotonic_decreasing is False
  196. index = index_cls([4, 3, 2, 1])
  197. assert index.is_monotonic_increasing is False
  198. assert index._is_strictly_monotonic_increasing is False
  199. assert index._is_strictly_monotonic_decreasing is True
  200. index = index_cls([1])
  201. assert index.is_monotonic_increasing is True
  202. assert index.is_monotonic_increasing is True
  203. assert index.is_monotonic_decreasing is True
  204. assert index._is_strictly_monotonic_increasing is True
  205. assert index._is_strictly_monotonic_decreasing is True
  206. def test_is_strictly_monotonic(self):
  207. index_cls = self._index_cls
  208. index = index_cls([1, 1, 2, 3])
  209. assert index.is_monotonic_increasing is True
  210. assert index._is_strictly_monotonic_increasing is False
  211. index = index_cls([3, 2, 1, 1])
  212. assert index.is_monotonic_decreasing is True
  213. assert index._is_strictly_monotonic_decreasing is False
  214. index = index_cls([1, 1])
  215. assert index.is_monotonic_increasing
  216. assert index.is_monotonic_decreasing
  217. assert not index._is_strictly_monotonic_increasing
  218. assert not index._is_strictly_monotonic_decreasing
  219. def test_logical_compat(self, simple_index):
  220. idx = simple_index
  221. assert idx.all() == idx.values.all()
  222. assert idx.any() == idx.values.any()
  223. def test_identical(self, simple_index, dtype):
  224. index = simple_index
  225. idx = Index(index.copy())
  226. assert idx.identical(index)
  227. same_values_different_type = Index(idx, dtype=object)
  228. assert not idx.identical(same_values_different_type)
  229. idx = index.astype(dtype=object)
  230. idx = idx.rename("foo")
  231. same_values = Index(idx, dtype=object)
  232. assert same_values.identical(idx)
  233. assert not idx.identical(index)
  234. assert Index(same_values, name="foo", dtype=object).identical(idx)
  235. assert not index.astype(dtype=object).identical(index.astype(dtype=dtype))
  236. def test_cant_or_shouldnt_cast(self, dtype):
  237. msg = r"invalid literal for int\(\) with base 10: 'foo'"
  238. # can't
  239. data = ["foo", "bar", "baz"]
  240. with pytest.raises(ValueError, match=msg):
  241. self._index_cls(data, dtype=dtype)
  242. def test_view_index(self, simple_index):
  243. index = simple_index
  244. index.view(Index)
  245. def test_prevent_casting(self, simple_index):
  246. index = simple_index
  247. result = index.astype("O")
  248. assert result.dtype == np.object_
  249. class TestIntNumericIndex(NumericInt):
  250. @pytest.fixture(params=[np.int64, np.int32, np.int16, np.int8])
  251. def dtype(self, request):
  252. return request.param
  253. @pytest.fixture
  254. def simple_index(self, dtype):
  255. return self._index_cls(range(0, 20, 2), dtype=dtype)
  256. @pytest.fixture(
  257. params=[range(0, 20, 2), range(19, -1, -1)], ids=["index_inc", "index_dec"]
  258. )
  259. def index(self, request, dtype):
  260. return self._index_cls(request.param, dtype=dtype)
  261. def test_constructor_from_list_no_dtype(self):
  262. index = self._index_cls([1, 2, 3])
  263. assert index.dtype == np.int64
  264. def test_constructor(self, dtype):
  265. index_cls = self._index_cls
  266. # scalar raise Exception
  267. msg = (
  268. rf"{index_cls.__name__}\(\.\.\.\) must be called with a collection of some "
  269. "kind, 5 was passed"
  270. )
  271. with pytest.raises(TypeError, match=msg):
  272. index_cls(5)
  273. # copy
  274. # pass list, coerce fine
  275. index = index_cls([-5, 0, 1, 2], dtype=dtype)
  276. arr = index.values
  277. new_index = index_cls(arr, copy=True)
  278. tm.assert_index_equal(new_index, index, exact=True)
  279. val = arr[0] + 3000
  280. # this should not change index
  281. arr[0] = val
  282. assert new_index[0] != val
  283. if dtype == np.int64:
  284. # pass list, coerce fine
  285. index = index_cls([-5, 0, 1, 2], dtype=dtype)
  286. expected = Index([-5, 0, 1, 2], dtype=dtype)
  287. tm.assert_index_equal(index, expected)
  288. # from iterable
  289. index = index_cls(iter([-5, 0, 1, 2]), dtype=dtype)
  290. expected = index_cls([-5, 0, 1, 2], dtype=dtype)
  291. tm.assert_index_equal(index, expected, exact=True)
  292. # interpret list-like
  293. expected = index_cls([5, 0], dtype=dtype)
  294. for cls in [Index, index_cls]:
  295. for idx in [
  296. cls([5, 0], dtype=dtype),
  297. cls(np.array([5, 0]), dtype=dtype),
  298. cls(Series([5, 0]), dtype=dtype),
  299. ]:
  300. tm.assert_index_equal(idx, expected)
  301. def test_constructor_corner(self, dtype):
  302. index_cls = self._index_cls
  303. arr = np.array([1, 2, 3, 4], dtype=object)
  304. index = index_cls(arr, dtype=dtype)
  305. assert index.values.dtype == index.dtype
  306. if dtype == np.int64:
  307. without_dtype = Index(arr)
  308. # as of 2.0 we do not infer a dtype when we get an object-dtype
  309. # ndarray of numbers, matching Series behavior
  310. assert without_dtype.dtype == object
  311. tm.assert_index_equal(index, without_dtype.astype(np.int64))
  312. # preventing casting
  313. arr = np.array([1, "2", 3, "4"], dtype=object)
  314. msg = "Trying to coerce float values to integers"
  315. with pytest.raises(ValueError, match=msg):
  316. index_cls(arr, dtype=dtype)
  317. def test_constructor_coercion_signed_to_unsigned(
  318. self,
  319. any_unsigned_int_numpy_dtype,
  320. ):
  321. # see gh-15832
  322. msg = "Trying to coerce negative values to unsigned integers"
  323. with pytest.raises(OverflowError, match=msg):
  324. Index([-1], dtype=any_unsigned_int_numpy_dtype)
  325. def test_constructor_np_signed(self, any_signed_int_numpy_dtype):
  326. # GH#47475
  327. scalar = np.dtype(any_signed_int_numpy_dtype).type(1)
  328. result = Index([scalar])
  329. expected = Index([1], dtype=any_signed_int_numpy_dtype)
  330. tm.assert_index_equal(result, expected, exact=True)
  331. def test_constructor_np_unsigned(self, any_unsigned_int_numpy_dtype):
  332. # GH#47475
  333. scalar = np.dtype(any_unsigned_int_numpy_dtype).type(1)
  334. result = Index([scalar])
  335. expected = Index([1], dtype=any_unsigned_int_numpy_dtype)
  336. tm.assert_index_equal(result, expected, exact=True)
  337. def test_coerce_list(self):
  338. # coerce things
  339. arr = Index([1, 2, 3, 4])
  340. assert isinstance(arr, self._index_cls)
  341. # but not if explicit dtype passed
  342. arr = Index([1, 2, 3, 4], dtype=object)
  343. assert type(arr) is Index
  344. class TestFloat16Index:
  345. # float 16 indexes not supported
  346. # GH 49535
  347. _index_cls = Index
  348. def test_constructor(self):
  349. index_cls = self._index_cls
  350. dtype = np.float16
  351. msg = "float16 indexes are not supported"
  352. # explicit construction
  353. with pytest.raises(NotImplementedError, match=msg):
  354. index_cls([1, 2, 3, 4, 5], dtype=dtype)
  355. with pytest.raises(NotImplementedError, match=msg):
  356. index_cls(np.array([1, 2, 3, 4, 5]), dtype=dtype)
  357. with pytest.raises(NotImplementedError, match=msg):
  358. index_cls([1.0, 2, 3, 4, 5], dtype=dtype)
  359. with pytest.raises(NotImplementedError, match=msg):
  360. index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype)
  361. with pytest.raises(NotImplementedError, match=msg):
  362. index_cls([1.0, 2, 3, 4, 5], dtype=dtype)
  363. with pytest.raises(NotImplementedError, match=msg):
  364. index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype)
  365. # nan handling
  366. with pytest.raises(NotImplementedError, match=msg):
  367. index_cls([np.nan, np.nan], dtype=dtype)
  368. with pytest.raises(NotImplementedError, match=msg):
  369. index_cls(np.array([np.nan]), dtype=dtype)
  370. class TestUIntNumericIndex(NumericInt):
  371. @pytest.fixture(params=[np.uint64])
  372. def dtype(self, request):
  373. return request.param
  374. @pytest.fixture
  375. def simple_index(self, dtype):
  376. # compat with shared Int64/Float64 tests
  377. return self._index_cls(np.arange(5, dtype=dtype))
  378. @pytest.fixture(
  379. params=[
  380. [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25],
  381. [2**63 + 25, 2**63 + 20, 2**63 + 15, 2**63 + 10, 2**63],
  382. ],
  383. ids=["index_inc", "index_dec"],
  384. )
  385. def index(self, request):
  386. return self._index_cls(request.param, dtype=np.uint64)
  387. @pytest.mark.parametrize(
  388. "box",
  389. [list, lambda x: np.array(x, dtype=object), lambda x: Index(x, dtype=object)],
  390. )
  391. def test_uint_index_does_not_convert_to_float64(box):
  392. # https://github.com/pandas-dev/pandas/issues/28279
  393. # https://github.com/pandas-dev/pandas/issues/28023
  394. series = Series(
  395. [0, 1, 2, 3, 4, 5],
  396. index=[
  397. 7606741985629028552,
  398. 17876870360202815256,
  399. 17876870360202815256,
  400. 13106359306506049338,
  401. 8991270399732411471,
  402. 8991270399732411472,
  403. ],
  404. )
  405. result = series.loc[box([7606741985629028552, 17876870360202815256])]
  406. expected = Index(
  407. [7606741985629028552, 17876870360202815256, 17876870360202815256],
  408. dtype="uint64",
  409. )
  410. tm.assert_index_equal(result.index, expected)
  411. tm.assert_equal(result, series.iloc[:3])
  412. def test_float64_index_equals():
  413. # https://github.com/pandas-dev/pandas/issues/35217
  414. float_index = Index([1.0, 2, 3])
  415. string_index = Index(["1", "2", "3"])
  416. result = float_index.equals(string_index)
  417. assert result is False
  418. result = string_index.equals(float_index)
  419. assert result is False
  420. def test_map_dtype_inference_unsigned_to_signed():
  421. # GH#44609 cases where we don't retain dtype
  422. idx = Index([1, 2, 3], dtype=np.uint64)
  423. result = idx.map(lambda x: -x)
  424. expected = Index([-1, -2, -3], dtype=np.int64)
  425. tm.assert_index_equal(result, expected)
  426. def test_map_dtype_inference_overflows():
  427. # GH#44609 case where we have to upcast
  428. idx = Index(np.array([1, 2, 3], dtype=np.int8))
  429. result = idx.map(lambda x: x * 1000)
  430. # TODO: we could plausibly try to infer down to int16 here
  431. expected = Index([1000, 2000, 3000], dtype=np.int64)
  432. tm.assert_index_equal(result, expected)