_dia.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. """Sparse DIAgonal format"""
  2. __docformat__ = "restructuredtext en"
  3. __all__ = ['dia_matrix', 'isspmatrix_dia']
  4. import numpy as np
  5. from ._base import isspmatrix, _formats, spmatrix
  6. from ._data import _data_matrix
  7. from ._sputils import (isshape, upcast_char, getdtype, get_index_dtype,
  8. get_sum_dtype, validateaxis, check_shape)
  9. from ._sparsetools import dia_matvec
  10. class dia_matrix(_data_matrix):
  11. """Sparse matrix with DIAgonal storage
  12. This can be instantiated in several ways:
  13. dia_matrix(D)
  14. with a dense matrix
  15. dia_matrix(S)
  16. with another sparse matrix S (equivalent to S.todia())
  17. dia_matrix((M, N), [dtype])
  18. to construct an empty matrix with shape (M, N),
  19. dtype is optional, defaulting to dtype='d'.
  20. dia_matrix((data, offsets), shape=(M, N))
  21. where the ``data[k,:]`` stores the diagonal entries for
  22. diagonal ``offsets[k]`` (See example below)
  23. Attributes
  24. ----------
  25. dtype : dtype
  26. Data type of the matrix
  27. shape : 2-tuple
  28. Shape of the matrix
  29. ndim : int
  30. Number of dimensions (this is always 2)
  31. nnz
  32. Number of stored values, including explicit zeros
  33. data
  34. DIA format data array of the matrix
  35. offsets
  36. DIA format offset array of the matrix
  37. Notes
  38. -----
  39. Sparse matrices can be used in arithmetic operations: they support
  40. addition, subtraction, multiplication, division, and matrix power.
  41. Examples
  42. --------
  43. >>> import numpy as np
  44. >>> from scipy.sparse import dia_matrix
  45. >>> dia_matrix((3, 4), dtype=np.int8).toarray()
  46. array([[0, 0, 0, 0],
  47. [0, 0, 0, 0],
  48. [0, 0, 0, 0]], dtype=int8)
  49. >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
  50. >>> offsets = np.array([0, -1, 2])
  51. >>> dia_matrix((data, offsets), shape=(4, 4)).toarray()
  52. array([[1, 0, 3, 0],
  53. [1, 2, 0, 4],
  54. [0, 2, 3, 0],
  55. [0, 0, 3, 4]])
  56. >>> from scipy.sparse import dia_matrix
  57. >>> n = 10
  58. >>> ex = np.ones(n)
  59. >>> data = np.array([ex, 2 * ex, ex])
  60. >>> offsets = np.array([-1, 0, 1])
  61. >>> dia_matrix((data, offsets), shape=(n, n)).toarray()
  62. array([[2., 1., 0., ..., 0., 0., 0.],
  63. [1., 2., 1., ..., 0., 0., 0.],
  64. [0., 1., 2., ..., 0., 0., 0.],
  65. ...,
  66. [0., 0., 0., ..., 2., 1., 0.],
  67. [0., 0., 0., ..., 1., 2., 1.],
  68. [0., 0., 0., ..., 0., 1., 2.]])
  69. """
  70. format = 'dia'
  71. def __init__(self, arg1, shape=None, dtype=None, copy=False):
  72. _data_matrix.__init__(self)
  73. if isspmatrix_dia(arg1):
  74. if copy:
  75. arg1 = arg1.copy()
  76. self.data = arg1.data
  77. self.offsets = arg1.offsets
  78. self._shape = check_shape(arg1.shape)
  79. elif isspmatrix(arg1):
  80. if isspmatrix_dia(arg1) and copy:
  81. A = arg1.copy()
  82. else:
  83. A = arg1.todia()
  84. self.data = A.data
  85. self.offsets = A.offsets
  86. self._shape = check_shape(A.shape)
  87. elif isinstance(arg1, tuple):
  88. if isshape(arg1):
  89. # It's a tuple of matrix dimensions (M, N)
  90. # create empty matrix
  91. self._shape = check_shape(arg1)
  92. self.data = np.zeros((0,0), getdtype(dtype, default=float))
  93. idx_dtype = get_index_dtype(maxval=max(self.shape))
  94. self.offsets = np.zeros((0), dtype=idx_dtype)
  95. else:
  96. try:
  97. # Try interpreting it as (data, offsets)
  98. data, offsets = arg1
  99. except Exception as e:
  100. raise ValueError('unrecognized form for dia_matrix constructor') from e
  101. else:
  102. if shape is None:
  103. raise ValueError('expected a shape argument')
  104. self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy))
  105. self.offsets = np.atleast_1d(np.array(arg1[1],
  106. dtype=get_index_dtype(maxval=max(shape)),
  107. copy=copy))
  108. self._shape = check_shape(shape)
  109. else:
  110. #must be dense, convert to COO first, then to DIA
  111. try:
  112. arg1 = np.asarray(arg1)
  113. except Exception as e:
  114. raise ValueError("unrecognized form for"
  115. " %s_matrix constructor" % self.format) from e
  116. A = self._coo_container(arg1, dtype=dtype, shape=shape).todia()
  117. self.data = A.data
  118. self.offsets = A.offsets
  119. self._shape = check_shape(A.shape)
  120. if dtype is not None:
  121. self.data = self.data.astype(dtype)
  122. #check format
  123. if self.offsets.ndim != 1:
  124. raise ValueError('offsets array must have rank 1')
  125. if self.data.ndim != 2:
  126. raise ValueError('data array must have rank 2')
  127. if self.data.shape[0] != len(self.offsets):
  128. raise ValueError('number of diagonals (%d) '
  129. 'does not match the number of offsets (%d)'
  130. % (self.data.shape[0], len(self.offsets)))
  131. if len(np.unique(self.offsets)) != len(self.offsets):
  132. raise ValueError('offset array contains duplicate values')
  133. def __repr__(self):
  134. format = _formats[self.getformat()][1]
  135. return "<%dx%d sparse matrix of type '%s'\n" \
  136. "\twith %d stored elements (%d diagonals) in %s format>" % \
  137. (self.shape + (self.dtype.type, self.nnz, self.data.shape[0],
  138. format))
  139. def _data_mask(self):
  140. """Returns a mask of the same shape as self.data, where
  141. mask[i,j] is True when data[i,j] corresponds to a stored element."""
  142. num_rows, num_cols = self.shape
  143. offset_inds = np.arange(self.data.shape[1])
  144. row = offset_inds - self.offsets[:,None]
  145. mask = (row >= 0)
  146. mask &= (row < num_rows)
  147. mask &= (offset_inds < num_cols)
  148. return mask
  149. def count_nonzero(self):
  150. mask = self._data_mask()
  151. return np.count_nonzero(self.data[mask])
  152. def getnnz(self, axis=None):
  153. if axis is not None:
  154. raise NotImplementedError("getnnz over an axis is not implemented "
  155. "for DIA format")
  156. M,N = self.shape
  157. nnz = 0
  158. for k in self.offsets:
  159. if k > 0:
  160. nnz += min(M,N-k)
  161. else:
  162. nnz += min(M+k,N)
  163. return int(nnz)
  164. getnnz.__doc__ = spmatrix.getnnz.__doc__
  165. count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
  166. def sum(self, axis=None, dtype=None, out=None):
  167. validateaxis(axis)
  168. if axis is not None and axis < 0:
  169. axis += 2
  170. res_dtype = get_sum_dtype(self.dtype)
  171. num_rows, num_cols = self.shape
  172. ret = None
  173. if axis == 0:
  174. mask = self._data_mask()
  175. x = (self.data * mask).sum(axis=0)
  176. if x.shape[0] == num_cols:
  177. res = x
  178. else:
  179. res = np.zeros(num_cols, dtype=x.dtype)
  180. res[:x.shape[0]] = x
  181. ret = self._ascontainer(res, dtype=res_dtype)
  182. else:
  183. row_sums = np.zeros((num_rows, 1), dtype=res_dtype)
  184. one = np.ones(num_cols, dtype=res_dtype)
  185. dia_matvec(num_rows, num_cols, len(self.offsets),
  186. self.data.shape[1], self.offsets, self.data, one, row_sums)
  187. row_sums = self._ascontainer(row_sums)
  188. if axis is None:
  189. return row_sums.sum(dtype=dtype, out=out)
  190. ret = self._ascontainer(row_sums.sum(axis=axis))
  191. if out is not None and out.shape != ret.shape:
  192. raise ValueError("dimensions do not match")
  193. return ret.sum(axis=(), dtype=dtype, out=out)
  194. sum.__doc__ = spmatrix.sum.__doc__
  195. def _add_sparse(self, other):
  196. # Check if other is also of type dia_matrix
  197. if not isinstance(other, type(self)):
  198. # If other is not of type dia_matrix, default to
  199. # converting to csr_matrix, as is done in the _add_sparse
  200. # method of parent class spmatrix
  201. return self.tocsr()._add_sparse(other)
  202. # The task is to compute m = self + other
  203. # Start by making a copy of self, of the datatype
  204. # that should result from adding self and other
  205. dtype = np.promote_types(self.dtype, other.dtype)
  206. m = self.astype(dtype, copy=True)
  207. # Then, add all the stored diagonals of other.
  208. for d in other.offsets:
  209. # Check if the diagonal has already been added.
  210. if d in m.offsets:
  211. # If the diagonal is already there, we need to take
  212. # the sum of the existing and the new
  213. m.setdiag(m.diagonal(d) + other.diagonal(d), d)
  214. else:
  215. m.setdiag(other.diagonal(d), d)
  216. return m
  217. def _mul_vector(self, other):
  218. x = other
  219. y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
  220. x.dtype.char))
  221. L = self.data.shape[1]
  222. M,N = self.shape
  223. dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data, x.ravel(), y.ravel())
  224. return y
  225. def _mul_multimatrix(self, other):
  226. return np.hstack([self._mul_vector(col).reshape(-1,1) for col in other.T])
  227. def _setdiag(self, values, k=0):
  228. M, N = self.shape
  229. if values.ndim == 0:
  230. # broadcast
  231. values_n = np.inf
  232. else:
  233. values_n = len(values)
  234. if k < 0:
  235. n = min(M + k, N, values_n)
  236. min_index = 0
  237. max_index = n
  238. else:
  239. n = min(M, N - k, values_n)
  240. min_index = k
  241. max_index = k + n
  242. if values.ndim != 0:
  243. # allow also longer sequences
  244. values = values[:n]
  245. data_rows, data_cols = self.data.shape
  246. if k in self.offsets:
  247. if max_index > data_cols:
  248. data = np.zeros((data_rows, max_index), dtype=self.data.dtype)
  249. data[:, :data_cols] = self.data
  250. self.data = data
  251. self.data[self.offsets == k, min_index:max_index] = values
  252. else:
  253. self.offsets = np.append(self.offsets, self.offsets.dtype.type(k))
  254. m = max(max_index, data_cols)
  255. data = np.zeros((data_rows + 1, m), dtype=self.data.dtype)
  256. data[:-1, :data_cols] = self.data
  257. data[-1, min_index:max_index] = values
  258. self.data = data
  259. def todia(self, copy=False):
  260. if copy:
  261. return self.copy()
  262. else:
  263. return self
  264. todia.__doc__ = spmatrix.todia.__doc__
  265. def transpose(self, axes=None, copy=False):
  266. if axes is not None:
  267. raise ValueError(("Sparse matrices do not support "
  268. "an 'axes' parameter because swapping "
  269. "dimensions is the only logical permutation."))
  270. num_rows, num_cols = self.shape
  271. max_dim = max(self.shape)
  272. # flip diagonal offsets
  273. offsets = -self.offsets
  274. # re-align the data matrix
  275. r = np.arange(len(offsets), dtype=np.intc)[:, None]
  276. c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None]
  277. pad_amount = max(0, max_dim-self.data.shape[1])
  278. data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount),
  279. dtype=self.data.dtype)))
  280. data = data[r, c]
  281. return self._dia_container((data, offsets), shape=(
  282. num_cols, num_rows), copy=copy)
  283. transpose.__doc__ = spmatrix.transpose.__doc__
  284. def diagonal(self, k=0):
  285. rows, cols = self.shape
  286. if k <= -rows or k >= cols:
  287. return np.empty(0, dtype=self.data.dtype)
  288. idx, = np.nonzero(self.offsets == k)
  289. first_col = max(0, k)
  290. last_col = min(rows + k, cols)
  291. result_size = last_col - first_col
  292. if idx.size == 0:
  293. return np.zeros(result_size, dtype=self.data.dtype)
  294. result = self.data[idx[0], first_col:last_col]
  295. padding = result_size - len(result)
  296. if padding > 0:
  297. result = np.pad(result, (0, padding), mode='constant')
  298. return result
  299. diagonal.__doc__ = spmatrix.diagonal.__doc__
  300. def tocsc(self, copy=False):
  301. if self.nnz == 0:
  302. return self._csc_container(self.shape, dtype=self.dtype)
  303. num_rows, num_cols = self.shape
  304. num_offsets, offset_len = self.data.shape
  305. offset_inds = np.arange(offset_len)
  306. row = offset_inds - self.offsets[:,None]
  307. mask = (row >= 0)
  308. mask &= (row < num_rows)
  309. mask &= (offset_inds < num_cols)
  310. mask &= (self.data != 0)
  311. idx_dtype = get_index_dtype(maxval=max(self.shape))
  312. indptr = np.zeros(num_cols + 1, dtype=idx_dtype)
  313. indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0)[:num_cols])
  314. if offset_len < num_cols:
  315. indptr[offset_len+1:] = indptr[offset_len]
  316. indices = row.T[mask.T].astype(idx_dtype, copy=False)
  317. data = self.data.T[mask.T]
  318. return self._csc_container((data, indices, indptr), shape=self.shape,
  319. dtype=self.dtype)
  320. tocsc.__doc__ = spmatrix.tocsc.__doc__
  321. def tocoo(self, copy=False):
  322. num_rows, num_cols = self.shape
  323. num_offsets, offset_len = self.data.shape
  324. offset_inds = np.arange(offset_len)
  325. row = offset_inds - self.offsets[:,None]
  326. mask = (row >= 0)
  327. mask &= (row < num_rows)
  328. mask &= (offset_inds < num_cols)
  329. mask &= (self.data != 0)
  330. row = row[mask]
  331. col = np.tile(offset_inds, num_offsets)[mask.ravel()]
  332. data = self.data[mask]
  333. A = self._coo_container(
  334. (data, (row, col)), shape=self.shape, dtype=self.dtype
  335. )
  336. A.has_canonical_format = True
  337. return A
  338. tocoo.__doc__ = spmatrix.tocoo.__doc__
  339. # needed by _data_matrix
  340. def _with_data(self, data, copy=True):
  341. """Returns a matrix with the same sparsity structure as self,
  342. but with different data. By default the structure arrays are copied.
  343. """
  344. if copy:
  345. return self._dia_container(
  346. (data, self.offsets.copy()), shape=self.shape
  347. )
  348. else:
  349. return self._dia_container(
  350. (data, self.offsets), shape=self.shape
  351. )
  352. def resize(self, *shape):
  353. shape = check_shape(shape)
  354. M, N = shape
  355. # we do not need to handle the case of expanding N
  356. self.data = self.data[:, :N]
  357. if (M > self.shape[0] and
  358. np.any(self.offsets + self.shape[0] < self.data.shape[1])):
  359. # explicitly clear values that were previously hidden
  360. mask = (self.offsets[:, None] + self.shape[0] <=
  361. np.arange(self.data.shape[1]))
  362. self.data[mask] = 0
  363. self._shape = shape
  364. resize.__doc__ = spmatrix.resize.__doc__
  365. def isspmatrix_dia(x):
  366. """Is x of dia_matrix type?
  367. Parameters
  368. ----------
  369. x
  370. object to check for being a dia matrix
  371. Returns
  372. -------
  373. bool
  374. True if x is a dia matrix, False otherwise
  375. Examples
  376. --------
  377. >>> from scipy.sparse import dia_matrix, isspmatrix_dia
  378. >>> isspmatrix_dia(dia_matrix([[5]]))
  379. True
  380. >>> from scipy.sparse import dia_matrix, csr_matrix, isspmatrix_dia
  381. >>> isspmatrix_dia(csr_matrix([[5]]))
  382. False
  383. """
  384. from ._arrays import dia_array
  385. return isinstance(x, dia_matrix) or isinstance(x, dia_array)