test_lib.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. import numpy as np
  2. import pytest
  3. from pandas._libs import (
  4. Timedelta,
  5. lib,
  6. writers as libwriters,
  7. )
  8. from pandas.compat import IS64
  9. from pandas import Index
  10. import pandas._testing as tm
  11. class TestMisc:
  12. def test_max_len_string_array(self):
  13. arr = a = np.array(["foo", "b", np.nan], dtype="object")
  14. assert libwriters.max_len_string_array(arr) == 3
  15. # unicode
  16. arr = a.astype("U").astype(object)
  17. assert libwriters.max_len_string_array(arr) == 3
  18. # bytes for python3
  19. arr = a.astype("S").astype(object)
  20. assert libwriters.max_len_string_array(arr) == 3
  21. # raises
  22. msg = "No matching signature found"
  23. with pytest.raises(TypeError, match=msg):
  24. libwriters.max_len_string_array(arr.astype("U"))
  25. def test_fast_unique_multiple_list_gen_sort(self):
  26. keys = [["p", "a"], ["n", "d"], ["a", "s"]]
  27. gen = (key for key in keys)
  28. expected = np.array(["a", "d", "n", "p", "s"])
  29. out = lib.fast_unique_multiple_list_gen(gen, sort=True)
  30. tm.assert_numpy_array_equal(np.array(out), expected)
  31. gen = (key for key in keys)
  32. expected = np.array(["p", "a", "n", "d", "s"])
  33. out = lib.fast_unique_multiple_list_gen(gen, sort=False)
  34. tm.assert_numpy_array_equal(np.array(out), expected)
  35. def test_fast_multiget_timedelta_resos(self):
  36. # This will become relevant for test_constructor_dict_timedelta64_index
  37. # once Timedelta constructor preserves reso when passed a
  38. # np.timedelta64 object
  39. td = Timedelta(days=1)
  40. mapping1 = {td: 1}
  41. mapping2 = {td.as_unit("s"): 1}
  42. oindex = Index([td * n for n in range(3)])._values.astype(object)
  43. expected = lib.fast_multiget(mapping1, oindex)
  44. result = lib.fast_multiget(mapping2, oindex)
  45. tm.assert_numpy_array_equal(result, expected)
  46. # case that can't be cast to td64ns
  47. td = Timedelta(np.timedelta64(400, "Y"))
  48. assert hash(td) == hash(td.as_unit("ms"))
  49. assert hash(td) == hash(td.as_unit("us"))
  50. mapping1 = {td: 1}
  51. mapping2 = {td.as_unit("ms"): 1}
  52. oindex = Index([td * n for n in range(3)])._values.astype(object)
  53. expected = lib.fast_multiget(mapping1, oindex)
  54. result = lib.fast_multiget(mapping2, oindex)
  55. tm.assert_numpy_array_equal(result, expected)
  56. class TestIndexing:
  57. def test_maybe_indices_to_slice_left_edge(self):
  58. target = np.arange(100)
  59. # slice
  60. indices = np.array([], dtype=np.intp)
  61. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  62. assert isinstance(maybe_slice, slice)
  63. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  64. @pytest.mark.parametrize("end", [1, 2, 5, 20, 99])
  65. @pytest.mark.parametrize("step", [1, 2, 4])
  66. def test_maybe_indices_to_slice_left_edge_not_slice_end_steps(self, end, step):
  67. target = np.arange(100)
  68. indices = np.arange(0, end, step, dtype=np.intp)
  69. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  70. assert isinstance(maybe_slice, slice)
  71. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  72. # reverse
  73. indices = indices[::-1]
  74. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  75. assert isinstance(maybe_slice, slice)
  76. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  77. @pytest.mark.parametrize(
  78. "case", [[2, 1, 2, 0], [2, 2, 1, 0], [0, 1, 2, 1], [-2, 0, 2], [2, 0, -2]]
  79. )
  80. def test_maybe_indices_to_slice_left_edge_not_slice(self, case):
  81. # not slice
  82. target = np.arange(100)
  83. indices = np.array(case, dtype=np.intp)
  84. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  85. assert not isinstance(maybe_slice, slice)
  86. tm.assert_numpy_array_equal(maybe_slice, indices)
  87. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  88. @pytest.mark.parametrize("start", [0, 2, 5, 20, 97, 98])
  89. @pytest.mark.parametrize("step", [1, 2, 4])
  90. def test_maybe_indices_to_slice_right_edge(self, start, step):
  91. target = np.arange(100)
  92. # slice
  93. indices = np.arange(start, 99, step, dtype=np.intp)
  94. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  95. assert isinstance(maybe_slice, slice)
  96. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  97. # reverse
  98. indices = indices[::-1]
  99. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  100. assert isinstance(maybe_slice, slice)
  101. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  102. def test_maybe_indices_to_slice_right_edge_not_slice(self):
  103. # not slice
  104. target = np.arange(100)
  105. indices = np.array([97, 98, 99, 100], dtype=np.intp)
  106. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  107. assert not isinstance(maybe_slice, slice)
  108. tm.assert_numpy_array_equal(maybe_slice, indices)
  109. msg = "index 100 is out of bounds for axis (0|1) with size 100"
  110. with pytest.raises(IndexError, match=msg):
  111. target[indices]
  112. with pytest.raises(IndexError, match=msg):
  113. target[maybe_slice]
  114. indices = np.array([100, 99, 98, 97], dtype=np.intp)
  115. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  116. assert not isinstance(maybe_slice, slice)
  117. tm.assert_numpy_array_equal(maybe_slice, indices)
  118. with pytest.raises(IndexError, match=msg):
  119. target[indices]
  120. with pytest.raises(IndexError, match=msg):
  121. target[maybe_slice]
  122. @pytest.mark.parametrize(
  123. "case", [[99, 97, 99, 96], [99, 99, 98, 97], [98, 98, 97, 96]]
  124. )
  125. def test_maybe_indices_to_slice_right_edge_cases(self, case):
  126. target = np.arange(100)
  127. indices = np.array(case, dtype=np.intp)
  128. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  129. assert not isinstance(maybe_slice, slice)
  130. tm.assert_numpy_array_equal(maybe_slice, indices)
  131. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  132. @pytest.mark.parametrize("step", [1, 2, 4, 5, 8, 9])
  133. def test_maybe_indices_to_slice_both_edges(self, step):
  134. target = np.arange(10)
  135. # slice
  136. indices = np.arange(0, 9, step, dtype=np.intp)
  137. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  138. assert isinstance(maybe_slice, slice)
  139. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  140. # reverse
  141. indices = indices[::-1]
  142. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  143. assert isinstance(maybe_slice, slice)
  144. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  145. @pytest.mark.parametrize("case", [[4, 2, 0, -2], [2, 2, 1, 0], [0, 1, 2, 1]])
  146. def test_maybe_indices_to_slice_both_edges_not_slice(self, case):
  147. # not slice
  148. target = np.arange(10)
  149. indices = np.array(case, dtype=np.intp)
  150. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  151. assert not isinstance(maybe_slice, slice)
  152. tm.assert_numpy_array_equal(maybe_slice, indices)
  153. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  154. @pytest.mark.parametrize("start, end", [(2, 10), (5, 25), (65, 97)])
  155. @pytest.mark.parametrize("step", [1, 2, 4, 20])
  156. def test_maybe_indices_to_slice_middle(self, start, end, step):
  157. target = np.arange(100)
  158. # slice
  159. indices = np.arange(start, end, step, dtype=np.intp)
  160. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  161. assert isinstance(maybe_slice, slice)
  162. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  163. # reverse
  164. indices = indices[::-1]
  165. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  166. assert isinstance(maybe_slice, slice)
  167. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  168. @pytest.mark.parametrize(
  169. "case", [[14, 12, 10, 12], [12, 12, 11, 10], [10, 11, 12, 11]]
  170. )
  171. def test_maybe_indices_to_slice_middle_not_slice(self, case):
  172. # not slice
  173. target = np.arange(100)
  174. indices = np.array(case, dtype=np.intp)
  175. maybe_slice = lib.maybe_indices_to_slice(indices, len(target))
  176. assert not isinstance(maybe_slice, slice)
  177. tm.assert_numpy_array_equal(maybe_slice, indices)
  178. tm.assert_numpy_array_equal(target[indices], target[maybe_slice])
  179. def test_maybe_booleans_to_slice(self):
  180. arr = np.array([0, 0, 1, 1, 1, 0, 1], dtype=np.uint8)
  181. result = lib.maybe_booleans_to_slice(arr)
  182. assert result.dtype == np.bool_
  183. result = lib.maybe_booleans_to_slice(arr[:0])
  184. assert result == slice(0, 0)
  185. def test_get_reverse_indexer(self):
  186. indexer = np.array([-1, -1, 1, 2, 0, -1, 3, 4], dtype=np.intp)
  187. result = lib.get_reverse_indexer(indexer, 5)
  188. expected = np.array([4, 2, 3, 6, 7], dtype=np.intp)
  189. tm.assert_numpy_array_equal(result, expected)
  190. @pytest.mark.parametrize("dtype", ["int64", "int32"])
  191. def test_is_range_indexer(self, dtype):
  192. # GH#50592
  193. left = np.arange(0, 100, dtype=dtype)
  194. assert lib.is_range_indexer(left, 100)
  195. @pytest.mark.skipif(
  196. not IS64,
  197. reason="2**31 is too big for Py_ssize_t on 32-bit. "
  198. "It doesn't matter though since you cannot create an array that long on 32-bit",
  199. )
  200. @pytest.mark.parametrize("dtype", ["int64", "int32"])
  201. def test_is_range_indexer_big_n(self, dtype):
  202. # GH53616
  203. left = np.arange(0, 100, dtype=dtype)
  204. assert not lib.is_range_indexer(left, 2**31)
  205. @pytest.mark.parametrize("dtype", ["int64", "int32"])
  206. def test_is_range_indexer_not_equal(self, dtype):
  207. # GH#50592
  208. left = np.array([1, 2], dtype=dtype)
  209. assert not lib.is_range_indexer(left, 2)
  210. @pytest.mark.parametrize("dtype", ["int64", "int32"])
  211. def test_is_range_indexer_not_equal_shape(self, dtype):
  212. # GH#50592
  213. left = np.array([0, 1, 2], dtype=dtype)
  214. assert not lib.is_range_indexer(left, 2)
  215. def test_cache_readonly_preserve_docstrings():
  216. # GH18197
  217. assert Index.hasnans.__doc__ is not None
  218. def test_no_default_pickle():
  219. # GH#40397
  220. obj = tm.round_trip_pickle(lib.no_default)
  221. assert obj is lib.no_default