test_reductions.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. NaT,
  5. Timestamp,
  6. isna,
  7. )
  8. from pandas.core.arrays.sparse import (
  9. SparseArray,
  10. SparseDtype,
  11. )
  12. class TestReductions:
  13. @pytest.mark.parametrize(
  14. "data,pos,neg",
  15. [
  16. ([True, True, True], True, False),
  17. ([1, 2, 1], 1, 0),
  18. ([1.0, 2.0, 1.0], 1.0, 0.0),
  19. ],
  20. )
  21. def test_all(self, data, pos, neg):
  22. # GH#17570
  23. out = SparseArray(data).all()
  24. assert out
  25. out = SparseArray(data, fill_value=pos).all()
  26. assert out
  27. data[1] = neg
  28. out = SparseArray(data).all()
  29. assert not out
  30. out = SparseArray(data, fill_value=pos).all()
  31. assert not out
  32. @pytest.mark.parametrize(
  33. "data,pos,neg",
  34. [
  35. ([True, True, True], True, False),
  36. ([1, 2, 1], 1, 0),
  37. ([1.0, 2.0, 1.0], 1.0, 0.0),
  38. ],
  39. )
  40. def test_numpy_all(self, data, pos, neg):
  41. # GH#17570
  42. out = np.all(SparseArray(data))
  43. assert out
  44. out = np.all(SparseArray(data, fill_value=pos))
  45. assert out
  46. data[1] = neg
  47. out = np.all(SparseArray(data))
  48. assert not out
  49. out = np.all(SparseArray(data, fill_value=pos))
  50. assert not out
  51. # raises with a different message on py2.
  52. msg = "the 'out' parameter is not supported"
  53. with pytest.raises(ValueError, match=msg):
  54. np.all(SparseArray(data), out=np.array([]))
  55. @pytest.mark.parametrize(
  56. "data,pos,neg",
  57. [
  58. ([False, True, False], True, False),
  59. ([0, 2, 0], 2, 0),
  60. ([0.0, 2.0, 0.0], 2.0, 0.0),
  61. ],
  62. )
  63. def test_any(self, data, pos, neg):
  64. # GH#17570
  65. out = SparseArray(data).any()
  66. assert out
  67. out = SparseArray(data, fill_value=pos).any()
  68. assert out
  69. data[1] = neg
  70. out = SparseArray(data).any()
  71. assert not out
  72. out = SparseArray(data, fill_value=pos).any()
  73. assert not out
  74. @pytest.mark.parametrize(
  75. "data,pos,neg",
  76. [
  77. ([False, True, False], True, False),
  78. ([0, 2, 0], 2, 0),
  79. ([0.0, 2.0, 0.0], 2.0, 0.0),
  80. ],
  81. )
  82. def test_numpy_any(self, data, pos, neg):
  83. # GH#17570
  84. out = np.any(SparseArray(data))
  85. assert out
  86. out = np.any(SparseArray(data, fill_value=pos))
  87. assert out
  88. data[1] = neg
  89. out = np.any(SparseArray(data))
  90. assert not out
  91. out = np.any(SparseArray(data, fill_value=pos))
  92. assert not out
  93. msg = "the 'out' parameter is not supported"
  94. with pytest.raises(ValueError, match=msg):
  95. np.any(SparseArray(data), out=out)
  96. def test_sum(self):
  97. data = np.arange(10).astype(float)
  98. out = SparseArray(data).sum()
  99. assert out == 45.0
  100. data[5] = np.nan
  101. out = SparseArray(data, fill_value=2).sum()
  102. assert out == 40.0
  103. out = SparseArray(data, fill_value=np.nan).sum()
  104. assert out == 40.0
  105. @pytest.mark.parametrize(
  106. "arr",
  107. [np.array([0, 1, np.nan, 1]), np.array([0, 1, 1])],
  108. )
  109. @pytest.mark.parametrize("fill_value", [0, 1, np.nan])
  110. @pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)])
  111. def test_sum_min_count(self, arr, fill_value, min_count, expected):
  112. # GH#25777
  113. sparray = SparseArray(arr, fill_value=fill_value)
  114. result = sparray.sum(min_count=min_count)
  115. if np.isnan(expected):
  116. assert np.isnan(result)
  117. else:
  118. assert result == expected
  119. def test_bool_sum_min_count(self):
  120. spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
  121. res = spar_bool.sum(min_count=1)
  122. assert res == 5
  123. res = spar_bool.sum(min_count=11)
  124. assert isna(res)
  125. def test_numpy_sum(self):
  126. data = np.arange(10).astype(float)
  127. out = np.sum(SparseArray(data))
  128. assert out == 45.0
  129. data[5] = np.nan
  130. out = np.sum(SparseArray(data, fill_value=2))
  131. assert out == 40.0
  132. out = np.sum(SparseArray(data, fill_value=np.nan))
  133. assert out == 40.0
  134. msg = "the 'dtype' parameter is not supported"
  135. with pytest.raises(ValueError, match=msg):
  136. np.sum(SparseArray(data), dtype=np.int64)
  137. msg = "the 'out' parameter is not supported"
  138. with pytest.raises(ValueError, match=msg):
  139. np.sum(SparseArray(data), out=out)
  140. def test_mean(self):
  141. data = np.arange(10).astype(float)
  142. out = SparseArray(data).mean()
  143. assert out == 4.5
  144. data[5] = np.nan
  145. out = SparseArray(data).mean()
  146. assert out == 40.0 / 9
  147. def test_numpy_mean(self):
  148. data = np.arange(10).astype(float)
  149. out = np.mean(SparseArray(data))
  150. assert out == 4.5
  151. data[5] = np.nan
  152. out = np.mean(SparseArray(data))
  153. assert out == 40.0 / 9
  154. msg = "the 'dtype' parameter is not supported"
  155. with pytest.raises(ValueError, match=msg):
  156. np.mean(SparseArray(data), dtype=np.int64)
  157. msg = "the 'out' parameter is not supported"
  158. with pytest.raises(ValueError, match=msg):
  159. np.mean(SparseArray(data), out=out)
  160. class TestMinMax:
  161. @pytest.mark.parametrize(
  162. "raw_data,max_expected,min_expected",
  163. [
  164. (np.arange(5.0), [4], [0]),
  165. (-np.arange(5.0), [0], [-4]),
  166. (np.array([0, 1, 2, np.nan, 4]), [4], [0]),
  167. (np.array([np.nan] * 5), [np.nan], [np.nan]),
  168. (np.array([]), [np.nan], [np.nan]),
  169. ],
  170. )
  171. def test_nan_fill_value(self, raw_data, max_expected, min_expected):
  172. arr = SparseArray(raw_data)
  173. max_result = arr.max()
  174. min_result = arr.min()
  175. assert max_result in max_expected
  176. assert min_result in min_expected
  177. max_result = arr.max(skipna=False)
  178. min_result = arr.min(skipna=False)
  179. if np.isnan(raw_data).any():
  180. assert np.isnan(max_result)
  181. assert np.isnan(min_result)
  182. else:
  183. assert max_result in max_expected
  184. assert min_result in min_expected
  185. @pytest.mark.parametrize(
  186. "fill_value,max_expected,min_expected",
  187. [
  188. (100, 100, 0),
  189. (-100, 1, -100),
  190. ],
  191. )
  192. def test_fill_value(self, fill_value, max_expected, min_expected):
  193. arr = SparseArray(
  194. np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value)
  195. )
  196. max_result = arr.max()
  197. assert max_result == max_expected
  198. min_result = arr.min()
  199. assert min_result == min_expected
  200. def test_only_fill_value(self):
  201. fv = 100
  202. arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv))
  203. assert len(arr._valid_sp_values) == 0
  204. assert arr.max() == fv
  205. assert arr.min() == fv
  206. assert arr.max(skipna=False) == fv
  207. assert arr.min(skipna=False) == fv
  208. @pytest.mark.parametrize("func", ["min", "max"])
  209. @pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])])
  210. @pytest.mark.parametrize(
  211. "dtype,expected",
  212. [
  213. (SparseDtype(np.float64, np.nan), np.nan),
  214. (SparseDtype(np.float64, 5.0), np.nan),
  215. (SparseDtype("datetime64[ns]", NaT), NaT),
  216. (SparseDtype("datetime64[ns]", Timestamp("2018-05-05")), NaT),
  217. ],
  218. )
  219. def test_na_value_if_no_valid_values(self, func, data, dtype, expected):
  220. arr = SparseArray(data, dtype=dtype)
  221. result = getattr(arr, func)()
  222. if expected is NaT:
  223. # TODO: pin down whether we wrap datetime64("NaT")
  224. assert result is NaT or np.isnat(result)
  225. else:
  226. assert np.isnan(result)
  227. class TestArgmaxArgmin:
  228. @pytest.mark.parametrize(
  229. "arr,argmax_expected,argmin_expected",
  230. [
  231. (SparseArray([1, 2, 0, 1, 2]), 1, 2),
  232. (SparseArray([-1, -2, 0, -1, -2]), 2, 1),
  233. (SparseArray([np.nan, 1, 0, 0, np.nan, -1]), 1, 5),
  234. (SparseArray([np.nan, 1, 0, 0, np.nan, 2]), 5, 2),
  235. (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=-1), 5, 2),
  236. (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=0), 5, 2),
  237. (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=1), 5, 2),
  238. (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=2), 5, 2),
  239. (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=3), 5, 2),
  240. (SparseArray([0] * 10 + [-1], fill_value=0), 0, 10),
  241. (SparseArray([0] * 10 + [-1], fill_value=-1), 0, 10),
  242. (SparseArray([0] * 10 + [-1], fill_value=1), 0, 10),
  243. (SparseArray([-1] + [0] * 10, fill_value=0), 1, 0),
  244. (SparseArray([1] + [0] * 10, fill_value=0), 0, 1),
  245. (SparseArray([-1] + [0] * 10, fill_value=-1), 1, 0),
  246. (SparseArray([1] + [0] * 10, fill_value=1), 0, 1),
  247. ],
  248. )
  249. def test_argmax_argmin(self, arr, argmax_expected, argmin_expected):
  250. argmax_result = arr.argmax()
  251. argmin_result = arr.argmin()
  252. assert argmax_result == argmax_expected
  253. assert argmin_result == argmin_expected
  254. @pytest.mark.parametrize(
  255. "arr,method",
  256. [(SparseArray([]), "argmax"), (SparseArray([]), "argmin")],
  257. )
  258. def test_empty_array(self, arr, method):
  259. msg = f"attempt to get {method} of an empty sequence"
  260. with pytest.raises(ValueError, match=msg):
  261. arr.argmax() if method == "argmax" else arr.argmin()