test_indexing.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. from pandas.core.arrays.sparse import (
  6. SparseArray,
  7. SparseDtype,
  8. )
  9. arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
  10. arr = SparseArray(arr_data)
  11. class TestGetitem:
  12. def test_getitem(self):
  13. dense = arr.to_dense()
  14. for i, value in enumerate(arr):
  15. tm.assert_almost_equal(value, dense[i])
  16. tm.assert_almost_equal(arr[-i], dense[-i])
  17. def test_getitem_arraylike_mask(self):
  18. arr = SparseArray([0, 1, 2])
  19. result = arr[[True, False, True]]
  20. expected = SparseArray([0, 2])
  21. tm.assert_sp_array_equal(result, expected)
  22. @pytest.mark.parametrize(
  23. "slc",
  24. [
  25. np.s_[:],
  26. np.s_[1:10],
  27. np.s_[1:100],
  28. np.s_[10:1],
  29. np.s_[:-3],
  30. np.s_[-5:-4],
  31. np.s_[:-12],
  32. np.s_[-12:],
  33. np.s_[2:],
  34. np.s_[2::3],
  35. np.s_[::2],
  36. np.s_[::-1],
  37. np.s_[::-2],
  38. np.s_[1:6:2],
  39. np.s_[:-6:-2],
  40. ],
  41. )
  42. @pytest.mark.parametrize(
  43. "as_dense", [[np.nan] * 10, [1] * 10, [np.nan] * 5 + [1] * 5, []]
  44. )
  45. def test_getslice(self, slc, as_dense):
  46. as_dense = np.array(as_dense)
  47. arr = SparseArray(as_dense)
  48. result = arr[slc]
  49. expected = SparseArray(as_dense[slc])
  50. tm.assert_sp_array_equal(result, expected)
  51. def test_getslice_tuple(self):
  52. dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0])
  53. sparse = SparseArray(dense)
  54. res = sparse[(slice(4, None),)]
  55. exp = SparseArray(dense[4:])
  56. tm.assert_sp_array_equal(res, exp)
  57. sparse = SparseArray(dense, fill_value=0)
  58. res = sparse[(slice(4, None),)]
  59. exp = SparseArray(dense[4:], fill_value=0)
  60. tm.assert_sp_array_equal(res, exp)
  61. msg = "too many indices for array"
  62. with pytest.raises(IndexError, match=msg):
  63. sparse[4:, :]
  64. with pytest.raises(IndexError, match=msg):
  65. # check numpy compat
  66. dense[4:, :]
  67. def test_boolean_slice_empty(self):
  68. arr = SparseArray([0, 1, 2])
  69. res = arr[[False, False, False]]
  70. assert res.dtype == arr.dtype
  71. def test_getitem_bool_sparse_array(self):
  72. # GH 23122
  73. spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
  74. exp = SparseArray([np.nan, 2, np.nan, 5, 6])
  75. tm.assert_sp_array_equal(arr[spar_bool], exp)
  76. spar_bool = ~spar_bool
  77. res = arr[spar_bool]
  78. exp = SparseArray([np.nan, 1, 3, 4, np.nan])
  79. tm.assert_sp_array_equal(res, exp)
  80. spar_bool = SparseArray(
  81. [False, True, np.nan] * 3, dtype=np.bool_, fill_value=np.nan
  82. )
  83. res = arr[spar_bool]
  84. exp = SparseArray([np.nan, 3, 5])
  85. tm.assert_sp_array_equal(res, exp)
  86. def test_getitem_bool_sparse_array_as_comparison(self):
  87. # GH 45110
  88. arr = SparseArray([1, 2, 3, 4, np.nan, np.nan], fill_value=np.nan)
  89. res = arr[arr > 2]
  90. exp = SparseArray([3.0, 4.0], fill_value=np.nan)
  91. tm.assert_sp_array_equal(res, exp)
  92. def test_get_item(self):
  93. zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
  94. assert np.isnan(arr[1])
  95. assert arr[2] == 1
  96. assert arr[7] == 5
  97. assert zarr[0] == 0
  98. assert zarr[2] == 1
  99. assert zarr[7] == 5
  100. errmsg = "must be an integer between -10 and 10"
  101. with pytest.raises(IndexError, match=errmsg):
  102. arr[11]
  103. with pytest.raises(IndexError, match=errmsg):
  104. arr[-11]
  105. assert arr[-1] == arr[len(arr) - 1]
  106. class TestSetitem:
  107. def test_set_item(self):
  108. arr = SparseArray(arr_data).copy()
  109. def setitem():
  110. arr[5] = 3
  111. def setslice():
  112. arr[1:5] = 2
  113. with pytest.raises(TypeError, match="assignment via setitem"):
  114. setitem()
  115. with pytest.raises(TypeError, match="assignment via setitem"):
  116. setslice()
  117. class TestTake:
  118. def test_take_scalar_raises(self):
  119. msg = "'indices' must be an array, not a scalar '2'."
  120. with pytest.raises(ValueError, match=msg):
  121. arr.take(2)
  122. def test_take(self):
  123. exp = SparseArray(np.take(arr_data, [2, 3]))
  124. tm.assert_sp_array_equal(arr.take([2, 3]), exp)
  125. exp = SparseArray(np.take(arr_data, [0, 1, 2]))
  126. tm.assert_sp_array_equal(arr.take([0, 1, 2]), exp)
  127. def test_take_all_empty(self):
  128. a = pd.array([0, 0], dtype=SparseDtype("int64"))
  129. result = a.take([0, 1], allow_fill=True, fill_value=np.nan)
  130. tm.assert_sp_array_equal(a, result)
  131. def test_take_fill_value(self):
  132. data = np.array([1, np.nan, 0, 3, 0])
  133. sparse = SparseArray(data, fill_value=0)
  134. exp = SparseArray(np.take(data, [0]), fill_value=0)
  135. tm.assert_sp_array_equal(sparse.take([0]), exp)
  136. exp = SparseArray(np.take(data, [1, 3, 4]), fill_value=0)
  137. tm.assert_sp_array_equal(sparse.take([1, 3, 4]), exp)
  138. def test_take_negative(self):
  139. exp = SparseArray(np.take(arr_data, [-1]))
  140. tm.assert_sp_array_equal(arr.take([-1]), exp)
  141. exp = SparseArray(np.take(arr_data, [-4, -3, -2]))
  142. tm.assert_sp_array_equal(arr.take([-4, -3, -2]), exp)
  143. def test_bad_take(self):
  144. with pytest.raises(IndexError, match="bounds"):
  145. arr.take([11])
  146. def test_take_filling(self):
  147. # similar tests as GH 12631
  148. sparse = SparseArray([np.nan, np.nan, 1, np.nan, 4])
  149. result = sparse.take(np.array([1, 0, -1]))
  150. expected = SparseArray([np.nan, np.nan, 4])
  151. tm.assert_sp_array_equal(result, expected)
  152. # TODO: actionable?
  153. # XXX: test change: fill_value=True -> allow_fill=True
  154. result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
  155. expected = SparseArray([np.nan, np.nan, np.nan])
  156. tm.assert_sp_array_equal(result, expected)
  157. # allow_fill=False
  158. result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
  159. expected = SparseArray([np.nan, np.nan, 4])
  160. tm.assert_sp_array_equal(result, expected)
  161. msg = "Invalid value in 'indices'"
  162. with pytest.raises(ValueError, match=msg):
  163. sparse.take(np.array([1, 0, -2]), allow_fill=True)
  164. with pytest.raises(ValueError, match=msg):
  165. sparse.take(np.array([1, 0, -5]), allow_fill=True)
  166. msg = "out of bounds value in 'indices'"
  167. with pytest.raises(IndexError, match=msg):
  168. sparse.take(np.array([1, -6]))
  169. with pytest.raises(IndexError, match=msg):
  170. sparse.take(np.array([1, 5]))
  171. with pytest.raises(IndexError, match=msg):
  172. sparse.take(np.array([1, 5]), allow_fill=True)
  173. def test_take_filling_fill_value(self):
  174. # same tests as GH#12631
  175. sparse = SparseArray([np.nan, 0, 1, 0, 4], fill_value=0)
  176. result = sparse.take(np.array([1, 0, -1]))
  177. expected = SparseArray([0, np.nan, 4], fill_value=0)
  178. tm.assert_sp_array_equal(result, expected)
  179. # fill_value
  180. result = sparse.take(np.array([1, 0, -1]), allow_fill=True)
  181. # TODO: actionable?
  182. # XXX: behavior change.
  183. # the old way of filling self.fill_value doesn't follow EA rules.
  184. # It's supposed to be self.dtype.na_value (nan in this case)
  185. expected = SparseArray([0, np.nan, np.nan], fill_value=0)
  186. tm.assert_sp_array_equal(result, expected)
  187. # allow_fill=False
  188. result = sparse.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
  189. expected = SparseArray([0, np.nan, 4], fill_value=0)
  190. tm.assert_sp_array_equal(result, expected)
  191. msg = "Invalid value in 'indices'."
  192. with pytest.raises(ValueError, match=msg):
  193. sparse.take(np.array([1, 0, -2]), allow_fill=True)
  194. with pytest.raises(ValueError, match=msg):
  195. sparse.take(np.array([1, 0, -5]), allow_fill=True)
  196. msg = "out of bounds value in 'indices'"
  197. with pytest.raises(IndexError, match=msg):
  198. sparse.take(np.array([1, -6]))
  199. with pytest.raises(IndexError, match=msg):
  200. sparse.take(np.array([1, 5]))
  201. with pytest.raises(IndexError, match=msg):
  202. sparse.take(np.array([1, 5]), fill_value=True)
  203. @pytest.mark.parametrize("kind", ["block", "integer"])
  204. def test_take_filling_all_nan(self, kind):
  205. sparse = SparseArray([np.nan, np.nan, np.nan, np.nan, np.nan], kind=kind)
  206. result = sparse.take(np.array([1, 0, -1]))
  207. expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
  208. tm.assert_sp_array_equal(result, expected)
  209. result = sparse.take(np.array([1, 0, -1]), fill_value=True)
  210. expected = SparseArray([np.nan, np.nan, np.nan], kind=kind)
  211. tm.assert_sp_array_equal(result, expected)
  212. msg = "out of bounds value in 'indices'"
  213. with pytest.raises(IndexError, match=msg):
  214. sparse.take(np.array([1, -6]))
  215. with pytest.raises(IndexError, match=msg):
  216. sparse.take(np.array([1, 5]))
  217. with pytest.raises(IndexError, match=msg):
  218. sparse.take(np.array([1, 5]), fill_value=True)
  219. class TestWhere:
  220. def test_where_retain_fill_value(self):
  221. # GH#45691 don't lose fill_value on _where
  222. arr = SparseArray([np.nan, 1.0], fill_value=0)
  223. mask = np.array([True, False])
  224. res = arr._where(~mask, 1)
  225. exp = SparseArray([1, 1.0], fill_value=0)
  226. tm.assert_sp_array_equal(res, exp)
  227. ser = pd.Series(arr)
  228. res = ser.where(~mask, 1)
  229. tm.assert_series_equal(res, pd.Series(exp))