test_constructors.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.sparse import IntIndex
  4. import pandas.util._test_decorators as td
  5. import pandas as pd
  6. from pandas import isna
  7. import pandas._testing as tm
  8. from pandas.core.arrays.sparse import (
  9. SparseArray,
  10. SparseDtype,
  11. )
  12. class TestConstructors:
  13. def test_constructor_dtype(self):
  14. arr = SparseArray([np.nan, 1, 2, np.nan])
  15. assert arr.dtype == SparseDtype(np.float64, np.nan)
  16. assert arr.dtype.subtype == np.float64
  17. assert np.isnan(arr.fill_value)
  18. arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
  19. assert arr.dtype == SparseDtype(np.float64, 0)
  20. assert arr.fill_value == 0
  21. arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
  22. assert arr.dtype == SparseDtype(np.float64, np.nan)
  23. assert np.isnan(arr.fill_value)
  24. arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
  25. assert arr.dtype == SparseDtype(np.int64, 0)
  26. assert arr.fill_value == 0
  27. arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
  28. assert arr.dtype == SparseDtype(np.int64, 0)
  29. assert arr.fill_value == 0
  30. arr = SparseArray([0, 1, 2, 4], dtype=None)
  31. assert arr.dtype == SparseDtype(np.int64, 0)
  32. assert arr.fill_value == 0
  33. arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
  34. assert arr.dtype == SparseDtype(np.int64, 0)
  35. assert arr.fill_value == 0
  36. def test_constructor_dtype_str(self):
  37. result = SparseArray([1, 2, 3], dtype="int")
  38. expected = SparseArray([1, 2, 3], dtype=int)
  39. tm.assert_sp_array_equal(result, expected)
  40. def test_constructor_sparse_dtype(self):
  41. result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1))
  42. expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
  43. tm.assert_sp_array_equal(result, expected)
  44. assert result.sp_values.dtype == np.dtype("int64")
  45. def test_constructor_sparse_dtype_str(self):
  46. result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]")
  47. expected = SparseArray([1, 0, 0, 1], dtype=np.int32)
  48. tm.assert_sp_array_equal(result, expected)
  49. assert result.sp_values.dtype == np.dtype("int32")
  50. def test_constructor_object_dtype(self):
  51. # GH#11856
  52. arr = SparseArray(["A", "A", np.nan, "B"], dtype=object)
  53. assert arr.dtype == SparseDtype(object)
  54. assert np.isnan(arr.fill_value)
  55. arr = SparseArray(["A", "A", np.nan, "B"], dtype=object, fill_value="A")
  56. assert arr.dtype == SparseDtype(object, "A")
  57. assert arr.fill_value == "A"
  58. def test_constructor_object_dtype_bool_fill(self):
  59. # GH#17574
  60. data = [False, 0, 100.0, 0.0]
  61. arr = SparseArray(data, dtype=object, fill_value=False)
  62. assert arr.dtype == SparseDtype(object, False)
  63. assert arr.fill_value is False
  64. arr_expected = np.array(data, dtype=object)
  65. it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
  66. assert np.fromiter(it, dtype=np.bool_).all()
  67. @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])
  68. def test_constructor_na_dtype(self, dtype):
  69. with pytest.raises(ValueError, match="Cannot convert"):
  70. SparseArray([0, 1, np.nan], dtype=dtype)
  71. def test_constructor_warns_when_losing_timezone(self):
  72. # GH#32501 warn when losing timezone information
  73. dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
  74. expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]"))
  75. with tm.assert_produces_warning(UserWarning):
  76. result = SparseArray(dti)
  77. tm.assert_sp_array_equal(result, expected)
  78. with tm.assert_produces_warning(UserWarning):
  79. result = SparseArray(pd.Series(dti))
  80. tm.assert_sp_array_equal(result, expected)
  81. def test_constructor_spindex_dtype(self):
  82. arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
  83. # TODO: actionable?
  84. # XXX: Behavior change: specifying SparseIndex no longer changes the
  85. # fill_value
  86. expected = SparseArray([0, 1, 2, 0], kind="integer")
  87. tm.assert_sp_array_equal(arr, expected)
  88. assert arr.dtype == SparseDtype(np.int64)
  89. assert arr.fill_value == 0
  90. arr = SparseArray(
  91. data=[1, 2, 3],
  92. sparse_index=IntIndex(4, [1, 2, 3]),
  93. dtype=np.int64,
  94. fill_value=0,
  95. )
  96. exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
  97. tm.assert_sp_array_equal(arr, exp)
  98. assert arr.dtype == SparseDtype(np.int64)
  99. assert arr.fill_value == 0
  100. arr = SparseArray(
  101. data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64
  102. )
  103. exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
  104. tm.assert_sp_array_equal(arr, exp)
  105. assert arr.dtype == SparseDtype(np.int64)
  106. assert arr.fill_value == 0
  107. arr = SparseArray(
  108. data=[1, 2, 3],
  109. sparse_index=IntIndex(4, [1, 2, 3]),
  110. dtype=None,
  111. fill_value=0,
  112. )
  113. exp = SparseArray([0, 1, 2, 3], dtype=None)
  114. tm.assert_sp_array_equal(arr, exp)
  115. assert arr.dtype == SparseDtype(np.int64)
  116. assert arr.fill_value == 0
  117. @pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])])
  118. def test_constructor_spindex_dtype_scalar(self, sparse_index):
  119. # scalar input
  120. arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
  121. exp = SparseArray([1], dtype=None)
  122. tm.assert_sp_array_equal(arr, exp)
  123. assert arr.dtype == SparseDtype(np.int64)
  124. assert arr.fill_value == 0
  125. arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
  126. exp = SparseArray([1], dtype=None)
  127. tm.assert_sp_array_equal(arr, exp)
  128. assert arr.dtype == SparseDtype(np.int64)
  129. assert arr.fill_value == 0
  130. def test_constructor_spindex_dtype_scalar_broadcasts(self):
  131. arr = SparseArray(
  132. data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None
  133. )
  134. exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
  135. tm.assert_sp_array_equal(arr, exp)
  136. assert arr.dtype == SparseDtype(np.int64)
  137. assert arr.fill_value == 0
  138. @pytest.mark.parametrize(
  139. "data, fill_value",
  140. [
  141. (np.array([1, 2]), 0),
  142. (np.array([1.0, 2.0]), np.nan),
  143. ([True, False], False),
  144. ([pd.Timestamp("2017-01-01")], pd.NaT),
  145. ],
  146. )
  147. def test_constructor_inferred_fill_value(self, data, fill_value):
  148. result = SparseArray(data).fill_value
  149. if isna(fill_value):
  150. assert isna(result)
  151. else:
  152. assert result == fill_value
  153. @pytest.mark.parametrize("format", ["coo", "csc", "csr"])
  154. @pytest.mark.parametrize("size", [0, 10])
  155. @td.skip_if_no_scipy
  156. def test_from_spmatrix(self, size, format):
  157. import scipy.sparse
  158. mat = scipy.sparse.random(size, 1, density=0.5, format=format)
  159. result = SparseArray.from_spmatrix(mat)
  160. result = np.asarray(result)
  161. expected = mat.toarray().ravel()
  162. tm.assert_numpy_array_equal(result, expected)
  163. @pytest.mark.parametrize("format", ["coo", "csc", "csr"])
  164. @td.skip_if_no_scipy
  165. def test_from_spmatrix_including_explicit_zero(self, format):
  166. import scipy.sparse
  167. mat = scipy.sparse.random(10, 1, density=0.5, format=format)
  168. mat.data[0] = 0
  169. result = SparseArray.from_spmatrix(mat)
  170. result = np.asarray(result)
  171. expected = mat.toarray().ravel()
  172. tm.assert_numpy_array_equal(result, expected)
  173. @td.skip_if_no_scipy
  174. def test_from_spmatrix_raises(self):
  175. import scipy.sparse
  176. mat = scipy.sparse.eye(5, 4, format="csc")
  177. with pytest.raises(ValueError, match="not '4'"):
  178. SparseArray.from_spmatrix(mat)
  179. def test_constructor_from_too_large_array(self):
  180. with pytest.raises(TypeError, match="expected dimension <= 1 data"):
  181. SparseArray(np.arange(10).reshape((2, 5)))
  182. def test_constructor_from_sparse(self):
  183. zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
  184. res = SparseArray(zarr)
  185. assert res.fill_value == 0
  186. tm.assert_almost_equal(res.sp_values, zarr.sp_values)
  187. def test_constructor_copy(self):
  188. arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
  189. arr = SparseArray(arr_data)
  190. cp = SparseArray(arr, copy=True)
  191. cp.sp_values[:3] = 0
  192. assert not (arr.sp_values[:3] == 0).any()
  193. not_copy = SparseArray(arr)
  194. not_copy.sp_values[:3] = 0
  195. assert (arr.sp_values[:3] == 0).all()
  196. def test_constructor_bool(self):
  197. # GH#10648
  198. data = np.array([False, False, True, True, False, False])
  199. arr = SparseArray(data, fill_value=False, dtype=bool)
  200. assert arr.dtype == SparseDtype(bool)
  201. tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
  202. # Behavior change: np.asarray densifies.
  203. # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
  204. tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32))
  205. dense = arr.to_dense()
  206. assert dense.dtype == bool
  207. tm.assert_numpy_array_equal(dense, data)
  208. def test_constructor_bool_fill_value(self):
  209. arr = SparseArray([True, False, True], dtype=None)
  210. assert arr.dtype == SparseDtype(np.bool_)
  211. assert not arr.fill_value
  212. arr = SparseArray([True, False, True], dtype=np.bool_)
  213. assert arr.dtype == SparseDtype(np.bool_)
  214. assert not arr.fill_value
  215. arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True)
  216. assert arr.dtype == SparseDtype(np.bool_, True)
  217. assert arr.fill_value
  218. def test_constructor_float32(self):
  219. # GH#10648
  220. data = np.array([1.0, np.nan, 3], dtype=np.float32)
  221. arr = SparseArray(data, dtype=np.float32)
  222. assert arr.dtype == SparseDtype(np.float32)
  223. tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32))
  224. # Behavior change: np.asarray densifies.
  225. # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
  226. tm.assert_numpy_array_equal(
  227. arr.sp_index.indices, np.array([0, 2], dtype=np.int32)
  228. )
  229. dense = arr.to_dense()
  230. assert dense.dtype == np.float32
  231. tm.assert_numpy_array_equal(dense, data)