test_sparsetools.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. import sys
  2. import os
  3. import gc
  4. import threading
  5. import numpy as np
  6. from numpy.testing import assert_equal, assert_, assert_allclose
  7. from scipy.sparse import (_sparsetools, coo_matrix, csr_matrix, csc_matrix,
  8. bsr_matrix, dia_matrix)
  9. from scipy.sparse._sputils import supported_dtypes
  10. from scipy._lib._testutils import check_free_memory
  11. import pytest
  12. from pytest import raises as assert_raises
  13. def int_to_int8(n):
  14. """
  15. Wrap an integer to the interval [-128, 127].
  16. """
  17. return (n + 128) % 256 - 128
  18. def test_exception():
  19. assert_raises(MemoryError, _sparsetools.test_throw_error)
  20. def test_threads():
  21. # Smoke test for parallel threaded execution; doesn't actually
  22. # check that code runs in parallel, but just that it produces
  23. # expected results.
  24. nthreads = 10
  25. niter = 100
  26. n = 20
  27. a = csr_matrix(np.ones([n, n]))
  28. bres = []
  29. class Worker(threading.Thread):
  30. def run(self):
  31. b = a.copy()
  32. for j in range(niter):
  33. _sparsetools.csr_plus_csr(n, n,
  34. a.indptr, a.indices, a.data,
  35. a.indptr, a.indices, a.data,
  36. b.indptr, b.indices, b.data)
  37. bres.append(b)
  38. threads = [Worker() for _ in range(nthreads)]
  39. for thread in threads:
  40. thread.start()
  41. for thread in threads:
  42. thread.join()
  43. for b in bres:
  44. assert_(np.all(b.toarray() == 2))
  45. def test_regression_std_vector_dtypes():
  46. # Regression test for gh-3780, checking the std::vector typemaps
  47. # in sparsetools.cxx are complete.
  48. for dtype in supported_dtypes:
  49. ad = np.array([[1, 2], [3, 4]]).astype(dtype)
  50. a = csr_matrix(ad, dtype=dtype)
  51. # getcol is one function using std::vector typemaps, and should not fail
  52. assert_equal(a.getcol(0).toarray(), ad[:, :1])
  53. @pytest.mark.slow
  54. @pytest.mark.xfail_on_32bit("Can't create large array for test")
  55. def test_nnz_overflow():
  56. # Regression test for gh-7230 / gh-7871, checking that coo_toarray
  57. # with nnz > int32max doesn't overflow.
  58. nnz = np.iinfo(np.int32).max + 1
  59. # Ensure ~20 GB of RAM is free to run this test.
  60. check_free_memory((4 + 4 + 1) * nnz / 1e6 + 0.5)
  61. # Use nnz duplicate entries to keep the dense version small.
  62. row = np.zeros(nnz, dtype=np.int32)
  63. col = np.zeros(nnz, dtype=np.int32)
  64. data = np.zeros(nnz, dtype=np.int8)
  65. data[-1] = 4
  66. s = coo_matrix((data, (row, col)), shape=(1, 1), copy=False)
  67. # Sums nnz duplicates to produce a 1x1 array containing 4.
  68. d = s.toarray()
  69. assert_allclose(d, [[4]])
  70. @pytest.mark.skipif(not (sys.platform.startswith('linux') and np.dtype(np.intp).itemsize >= 8),
  71. reason="test requires 64-bit Linux")
  72. class TestInt32Overflow:
  73. """
  74. Some of the sparsetools routines use dense 2D matrices whose
  75. total size is not bounded by the nnz of the sparse matrix. These
  76. routines used to suffer from int32 wraparounds; here, we try to
  77. check that the wraparounds don't occur any more.
  78. """
  79. # choose n large enough
  80. n = 50000
  81. def setup_method(self):
  82. assert self.n**2 > np.iinfo(np.int32).max
  83. # check there's enough memory even if everything is run at the
  84. # same time
  85. try:
  86. parallel_count = int(os.environ.get('PYTEST_XDIST_WORKER_COUNT', '1'))
  87. except ValueError:
  88. parallel_count = np.inf
  89. check_free_memory(3000 * parallel_count)
  90. def teardown_method(self):
  91. gc.collect()
  92. def test_coo_todense(self):
  93. # Check *_todense routines (cf. gh-2179)
  94. #
  95. # All of them in the end call coo_matrix.todense
  96. n = self.n
  97. i = np.array([0, n-1])
  98. j = np.array([0, n-1])
  99. data = np.array([1, 2], dtype=np.int8)
  100. m = coo_matrix((data, (i, j)))
  101. r = m.todense()
  102. assert_equal(r[0,0], 1)
  103. assert_equal(r[-1,-1], 2)
  104. del r
  105. gc.collect()
  106. @pytest.mark.slow
  107. def test_matvecs(self):
  108. # Check *_matvecs routines
  109. n = self.n
  110. i = np.array([0, n-1])
  111. j = np.array([0, n-1])
  112. data = np.array([1, 2], dtype=np.int8)
  113. m = coo_matrix((data, (i, j)))
  114. b = np.ones((n, n), dtype=np.int8)
  115. for sptype in (csr_matrix, csc_matrix, bsr_matrix):
  116. m2 = sptype(m)
  117. r = m2.dot(b)
  118. assert_equal(r[0,0], 1)
  119. assert_equal(r[-1,-1], 2)
  120. del r
  121. gc.collect()
  122. del b
  123. gc.collect()
  124. @pytest.mark.slow
  125. def test_dia_matvec(self):
  126. # Check: huge dia_matrix _matvec
  127. n = self.n
  128. data = np.ones((n, n), dtype=np.int8)
  129. offsets = np.arange(n)
  130. m = dia_matrix((data, offsets), shape=(n, n))
  131. v = np.ones(m.shape[1], dtype=np.int8)
  132. r = m.dot(v)
  133. assert_equal(r[0], int_to_int8(n))
  134. del data, offsets, m, v, r
  135. gc.collect()
  136. _bsr_ops = [pytest.param("matmat", marks=pytest.mark.xslow),
  137. pytest.param("matvecs", marks=pytest.mark.xslow),
  138. "matvec",
  139. "diagonal",
  140. "sort_indices",
  141. pytest.param("transpose", marks=pytest.mark.xslow)]
  142. @pytest.mark.slow
  143. @pytest.mark.parametrize("op", _bsr_ops)
  144. def test_bsr_1_block(self, op):
  145. # Check: huge bsr_matrix (1-block)
  146. #
  147. # The point here is that indices inside a block may overflow.
  148. def get_matrix():
  149. n = self.n
  150. data = np.ones((1, n, n), dtype=np.int8)
  151. indptr = np.array([0, 1], dtype=np.int32)
  152. indices = np.array([0], dtype=np.int32)
  153. m = bsr_matrix((data, indices, indptr), blocksize=(n, n), copy=False)
  154. del data, indptr, indices
  155. return m
  156. gc.collect()
  157. try:
  158. getattr(self, "_check_bsr_" + op)(get_matrix)
  159. finally:
  160. gc.collect()
  161. @pytest.mark.slow
  162. @pytest.mark.parametrize("op", _bsr_ops)
  163. def test_bsr_n_block(self, op):
  164. # Check: huge bsr_matrix (n-block)
  165. #
  166. # The point here is that while indices within a block don't
  167. # overflow, accumulators across many block may.
  168. def get_matrix():
  169. n = self.n
  170. data = np.ones((n, n, 1), dtype=np.int8)
  171. indptr = np.array([0, n], dtype=np.int32)
  172. indices = np.arange(n, dtype=np.int32)
  173. m = bsr_matrix((data, indices, indptr), blocksize=(n, 1), copy=False)
  174. del data, indptr, indices
  175. return m
  176. gc.collect()
  177. try:
  178. getattr(self, "_check_bsr_" + op)(get_matrix)
  179. finally:
  180. gc.collect()
  181. def _check_bsr_matvecs(self, m):
  182. m = m()
  183. n = self.n
  184. # _matvecs
  185. r = m.dot(np.ones((n, 2), dtype=np.int8))
  186. assert_equal(r[0, 0], int_to_int8(n))
  187. def _check_bsr_matvec(self, m):
  188. m = m()
  189. n = self.n
  190. # _matvec
  191. r = m.dot(np.ones((n,), dtype=np.int8))
  192. assert_equal(r[0], int_to_int8(n))
  193. def _check_bsr_diagonal(self, m):
  194. m = m()
  195. n = self.n
  196. # _diagonal
  197. r = m.diagonal()
  198. assert_equal(r, np.ones(n))
  199. def _check_bsr_sort_indices(self, m):
  200. # _sort_indices
  201. m = m()
  202. m.sort_indices()
  203. def _check_bsr_transpose(self, m):
  204. # _transpose
  205. m = m()
  206. m.transpose()
  207. def _check_bsr_matmat(self, m):
  208. m = m()
  209. n = self.n
  210. # _bsr_matmat
  211. m2 = bsr_matrix(np.ones((n, 2), dtype=np.int8), blocksize=(m.blocksize[1], 2))
  212. m.dot(m2) # shouldn't SIGSEGV
  213. del m2
  214. # _bsr_matmat
  215. m2 = bsr_matrix(np.ones((2, n), dtype=np.int8), blocksize=(2, m.blocksize[0]))
  216. m2.dot(m) # shouldn't SIGSEGV
  217. @pytest.mark.skip(reason="64-bit indices in sparse matrices not available")
  218. def test_csr_matmat_int64_overflow():
  219. n = 3037000500
  220. assert n**2 > np.iinfo(np.int64).max
  221. # the test would take crazy amounts of memory
  222. check_free_memory(n * (8*2 + 1) * 3 / 1e6)
  223. # int64 overflow
  224. data = np.ones((n,), dtype=np.int8)
  225. indptr = np.arange(n+1, dtype=np.int64)
  226. indices = np.zeros(n, dtype=np.int64)
  227. a = csr_matrix((data, indices, indptr))
  228. b = a.T
  229. assert_raises(RuntimeError, a.dot, b)
  230. def test_upcast():
  231. a0 = csr_matrix([[np.pi, np.pi*1j], [3, 4]], dtype=complex)
  232. b0 = np.array([256+1j, 2**32], dtype=complex)
  233. for a_dtype in supported_dtypes:
  234. for b_dtype in supported_dtypes:
  235. msg = "(%r, %r)" % (a_dtype, b_dtype)
  236. if np.issubdtype(a_dtype, np.complexfloating):
  237. a = a0.copy().astype(a_dtype)
  238. else:
  239. a = a0.real.copy().astype(a_dtype)
  240. if np.issubdtype(b_dtype, np.complexfloating):
  241. b = b0.copy().astype(b_dtype)
  242. else:
  243. with np.errstate(invalid="ignore"):
  244. # Casting a large value (2**32) to int8 causes a warning in
  245. # numpy >1.23
  246. b = b0.real.copy().astype(b_dtype)
  247. if not (a_dtype == np.bool_ and b_dtype == np.bool_):
  248. c = np.zeros((2,), dtype=np.bool_)
  249. assert_raises(ValueError, _sparsetools.csr_matvec,
  250. 2, 2, a.indptr, a.indices, a.data, b, c)
  251. if ((np.issubdtype(a_dtype, np.complexfloating) and
  252. not np.issubdtype(b_dtype, np.complexfloating)) or
  253. (not np.issubdtype(a_dtype, np.complexfloating) and
  254. np.issubdtype(b_dtype, np.complexfloating))):
  255. c = np.zeros((2,), dtype=np.float64)
  256. assert_raises(ValueError, _sparsetools.csr_matvec,
  257. 2, 2, a.indptr, a.indices, a.data, b, c)
  258. c = np.zeros((2,), dtype=np.result_type(a_dtype, b_dtype))
  259. _sparsetools.csr_matvec(2, 2, a.indptr, a.indices, a.data, b, c)
  260. assert_allclose(c, np.dot(a.toarray(), b), err_msg=msg)
  261. def test_endianness():
  262. d = np.ones((3,4))
  263. offsets = [-1,0,1]
  264. a = dia_matrix((d.astype('<f8'), offsets), (4, 4))
  265. b = dia_matrix((d.astype('>f8'), offsets), (4, 4))
  266. v = np.arange(4)
  267. assert_allclose(a.dot(v), [1, 3, 6, 5])
  268. assert_allclose(b.dot(v), [1, 3, 6, 5])