_dok.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. """Dictionary Of Keys based matrix"""
  2. __docformat__ = "restructuredtext en"
  3. __all__ = ['dok_matrix', 'isspmatrix_dok']
  4. import itertools
  5. import numpy as np
  6. from ._base import spmatrix, isspmatrix
  7. from ._index import IndexMixin
  8. from ._sputils import (isdense, getdtype, isshape, isintlike, isscalarlike,
  9. upcast, upcast_scalar, get_index_dtype, check_shape)
  10. try:
  11. from operator import isSequenceType as _is_sequence
  12. except ImportError:
  13. def _is_sequence(x):
  14. return (hasattr(x, '__len__') or hasattr(x, '__next__')
  15. or hasattr(x, 'next'))
  16. class dok_matrix(spmatrix, IndexMixin, dict):
  17. """
  18. Dictionary Of Keys based sparse matrix.
  19. This is an efficient structure for constructing sparse
  20. matrices incrementally.
  21. This can be instantiated in several ways:
  22. dok_matrix(D)
  23. with a dense matrix, D
  24. dok_matrix(S)
  25. with a sparse matrix, S
  26. dok_matrix((M,N), [dtype])
  27. create the matrix with initial shape (M,N)
  28. dtype is optional, defaulting to dtype='d'
  29. Attributes
  30. ----------
  31. dtype : dtype
  32. Data type of the matrix
  33. shape : 2-tuple
  34. Shape of the matrix
  35. ndim : int
  36. Number of dimensions (this is always 2)
  37. nnz
  38. Number of nonzero elements
  39. Notes
  40. -----
  41. Sparse matrices can be used in arithmetic operations: they support
  42. addition, subtraction, multiplication, division, and matrix power.
  43. Allows for efficient O(1) access of individual elements.
  44. Duplicates are not allowed.
  45. Can be efficiently converted to a coo_matrix once constructed.
  46. Examples
  47. --------
  48. >>> import numpy as np
  49. >>> from scipy.sparse import dok_matrix
  50. >>> S = dok_matrix((5, 5), dtype=np.float32)
  51. >>> for i in range(5):
  52. ... for j in range(5):
  53. ... S[i, j] = i + j # Update element
  54. """
  55. format = 'dok'
  56. def __init__(self, arg1, shape=None, dtype=None, copy=False):
  57. dict.__init__(self)
  58. spmatrix.__init__(self)
  59. self.dtype = getdtype(dtype, default=float)
  60. if isinstance(arg1, tuple) and isshape(arg1): # (M,N)
  61. M, N = arg1
  62. self._shape = check_shape((M, N))
  63. elif isspmatrix(arg1): # Sparse ctor
  64. if isspmatrix_dok(arg1) and copy:
  65. arg1 = arg1.copy()
  66. else:
  67. arg1 = arg1.todok()
  68. if dtype is not None:
  69. arg1 = arg1.astype(dtype, copy=False)
  70. dict.update(self, arg1)
  71. self._shape = check_shape(arg1.shape)
  72. self.dtype = arg1.dtype
  73. else: # Dense ctor
  74. try:
  75. arg1 = np.asarray(arg1)
  76. except Exception as e:
  77. raise TypeError('Invalid input format.') from e
  78. if len(arg1.shape) != 2:
  79. raise TypeError('Expected rank <=2 dense array or matrix.')
  80. d = self._coo_container(arg1, dtype=dtype).todok()
  81. dict.update(self, d)
  82. self._shape = check_shape(arg1.shape)
  83. self.dtype = d.dtype
  84. def update(self, val):
  85. # Prevent direct usage of update
  86. raise NotImplementedError("Direct modification to dok_matrix element "
  87. "is not allowed.")
  88. def _update(self, data):
  89. """An update method for dict data defined for direct access to
  90. `dok_matrix` data. Main purpose is to be used for effcient conversion
  91. from other spmatrix classes. Has no checking if `data` is valid."""
  92. return dict.update(self, data)
  93. def set_shape(self, shape):
  94. new_matrix = self.reshape(shape, copy=False).asformat(self.format)
  95. self.__dict__ = new_matrix.__dict__
  96. dict.clear(self)
  97. dict.update(self, new_matrix)
  98. shape = property(fget=spmatrix.get_shape, fset=set_shape)
  99. def getnnz(self, axis=None):
  100. if axis is not None:
  101. raise NotImplementedError("getnnz over an axis is not implemented "
  102. "for DOK format.")
  103. return dict.__len__(self)
  104. def count_nonzero(self):
  105. return sum(x != 0 for x in self.values())
  106. getnnz.__doc__ = spmatrix.getnnz.__doc__
  107. count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
  108. def __len__(self):
  109. return dict.__len__(self)
  110. def get(self, key, default=0.):
  111. """This overrides the dict.get method, providing type checking
  112. but otherwise equivalent functionality.
  113. """
  114. try:
  115. i, j = key
  116. assert isintlike(i) and isintlike(j)
  117. except (AssertionError, TypeError, ValueError) as e:
  118. raise IndexError('Index must be a pair of integers.') from e
  119. if (i < 0 or i >= self.shape[0] or j < 0 or j >= self.shape[1]):
  120. raise IndexError('Index out of bounds.')
  121. return dict.get(self, key, default)
  122. def _get_intXint(self, row, col):
  123. return dict.get(self, (row, col), self.dtype.type(0))
  124. def _get_intXslice(self, row, col):
  125. return self._get_sliceXslice(slice(row, row+1), col)
  126. def _get_sliceXint(self, row, col):
  127. return self._get_sliceXslice(row, slice(col, col+1))
  128. def _get_sliceXslice(self, row, col):
  129. row_start, row_stop, row_step = row.indices(self.shape[0])
  130. col_start, col_stop, col_step = col.indices(self.shape[1])
  131. row_range = range(row_start, row_stop, row_step)
  132. col_range = range(col_start, col_stop, col_step)
  133. shape = (len(row_range), len(col_range))
  134. # Switch paths only when advantageous
  135. # (count the iterations in the loops, adjust for complexity)
  136. if len(self) >= 2 * shape[0] * shape[1]:
  137. # O(nr*nc) path: loop over <row x col>
  138. return self._get_columnXarray(row_range, col_range)
  139. # O(nnz) path: loop over entries of self
  140. newdok = self._dok_container(shape, dtype=self.dtype)
  141. for key in self.keys():
  142. i, ri = divmod(int(key[0]) - row_start, row_step)
  143. if ri != 0 or i < 0 or i >= shape[0]:
  144. continue
  145. j, rj = divmod(int(key[1]) - col_start, col_step)
  146. if rj != 0 or j < 0 or j >= shape[1]:
  147. continue
  148. x = dict.__getitem__(self, key)
  149. dict.__setitem__(newdok, (i, j), x)
  150. return newdok
  151. def _get_intXarray(self, row, col):
  152. col = col.squeeze()
  153. return self._get_columnXarray([row], col)
  154. def _get_arrayXint(self, row, col):
  155. row = row.squeeze()
  156. return self._get_columnXarray(row, [col])
  157. def _get_sliceXarray(self, row, col):
  158. row = list(range(*row.indices(self.shape[0])))
  159. return self._get_columnXarray(row, col)
  160. def _get_arrayXslice(self, row, col):
  161. col = list(range(*col.indices(self.shape[1])))
  162. return self._get_columnXarray(row, col)
  163. def _get_columnXarray(self, row, col):
  164. # outer indexing
  165. newdok = self._dok_container((len(row), len(col)), dtype=self.dtype)
  166. for i, r in enumerate(row):
  167. for j, c in enumerate(col):
  168. v = dict.get(self, (r, c), 0)
  169. if v:
  170. dict.__setitem__(newdok, (i, j), v)
  171. return newdok
  172. def _get_arrayXarray(self, row, col):
  173. # inner indexing
  174. i, j = map(np.atleast_2d, np.broadcast_arrays(row, col))
  175. newdok = self._dok_container(i.shape, dtype=self.dtype)
  176. for key in itertools.product(range(i.shape[0]), range(i.shape[1])):
  177. v = dict.get(self, (i[key], j[key]), 0)
  178. if v:
  179. dict.__setitem__(newdok, key, v)
  180. return newdok
  181. def _set_intXint(self, row, col, x):
  182. key = (row, col)
  183. if x:
  184. dict.__setitem__(self, key, x)
  185. elif dict.__contains__(self, key):
  186. del self[key]
  187. def _set_arrayXarray(self, row, col, x):
  188. row = list(map(int, row.ravel()))
  189. col = list(map(int, col.ravel()))
  190. x = x.ravel()
  191. dict.update(self, zip(zip(row, col), x))
  192. for i in np.nonzero(x == 0)[0]:
  193. key = (row[i], col[i])
  194. if dict.__getitem__(self, key) == 0:
  195. # may have been superseded by later update
  196. del self[key]
  197. def __add__(self, other):
  198. if isscalarlike(other):
  199. res_dtype = upcast_scalar(self.dtype, other)
  200. new = self._dok_container(self.shape, dtype=res_dtype)
  201. # Add this scalar to every element.
  202. M, N = self.shape
  203. for key in itertools.product(range(M), range(N)):
  204. aij = dict.get(self, (key), 0) + other
  205. if aij:
  206. new[key] = aij
  207. # new.dtype.char = self.dtype.char
  208. elif isspmatrix_dok(other):
  209. if other.shape != self.shape:
  210. raise ValueError("Matrix dimensions are not equal.")
  211. # We could alternatively set the dimensions to the largest of
  212. # the two matrices to be summed. Would this be a good idea?
  213. res_dtype = upcast(self.dtype, other.dtype)
  214. new = self._dok_container(self.shape, dtype=res_dtype)
  215. dict.update(new, self)
  216. with np.errstate(over='ignore'):
  217. dict.update(new,
  218. ((k, new[k] + other[k]) for k in other.keys()))
  219. elif isspmatrix(other):
  220. csc = self.tocsc()
  221. new = csc + other
  222. elif isdense(other):
  223. new = self.todense() + other
  224. else:
  225. return NotImplemented
  226. return new
  227. def __radd__(self, other):
  228. if isscalarlike(other):
  229. new = self._dok_container(self.shape, dtype=self.dtype)
  230. M, N = self.shape
  231. for key in itertools.product(range(M), range(N)):
  232. aij = dict.get(self, (key), 0) + other
  233. if aij:
  234. new[key] = aij
  235. elif isspmatrix_dok(other):
  236. if other.shape != self.shape:
  237. raise ValueError("Matrix dimensions are not equal.")
  238. new = self._dok_container(self.shape, dtype=self.dtype)
  239. dict.update(new, self)
  240. dict.update(new,
  241. ((k, self[k] + other[k]) for k in other.keys()))
  242. elif isspmatrix(other):
  243. csc = self.tocsc()
  244. new = csc + other
  245. elif isdense(other):
  246. new = other + self.todense()
  247. else:
  248. return NotImplemented
  249. return new
  250. def __neg__(self):
  251. if self.dtype.kind == 'b':
  252. raise NotImplementedError('Negating a sparse boolean matrix is not'
  253. ' supported.')
  254. new = self._dok_container(self.shape, dtype=self.dtype)
  255. dict.update(new, ((k, -self[k]) for k in self.keys()))
  256. return new
  257. def _mul_scalar(self, other):
  258. res_dtype = upcast_scalar(self.dtype, other)
  259. # Multiply this scalar by every element.
  260. new = self._dok_container(self.shape, dtype=res_dtype)
  261. dict.update(new, ((k, v * other) for k, v in self.items()))
  262. return new
  263. def _mul_vector(self, other):
  264. # matrix * vector
  265. result = np.zeros(self.shape[0], dtype=upcast(self.dtype, other.dtype))
  266. for (i, j), v in self.items():
  267. result[i] += v * other[j]
  268. return result
  269. def _mul_multivector(self, other):
  270. # matrix * multivector
  271. result_shape = (self.shape[0], other.shape[1])
  272. result_dtype = upcast(self.dtype, other.dtype)
  273. result = np.zeros(result_shape, dtype=result_dtype)
  274. for (i, j), v in self.items():
  275. result[i,:] += v * other[j,:]
  276. return result
  277. def __imul__(self, other):
  278. if isscalarlike(other):
  279. dict.update(self, ((k, v * other) for k, v in self.items()))
  280. return self
  281. return NotImplemented
  282. def __truediv__(self, other):
  283. if isscalarlike(other):
  284. res_dtype = upcast_scalar(self.dtype, other)
  285. new = self._dok_container(self.shape, dtype=res_dtype)
  286. dict.update(new, ((k, v / other) for k, v in self.items()))
  287. return new
  288. return self.tocsr() / other
  289. def __itruediv__(self, other):
  290. if isscalarlike(other):
  291. dict.update(self, ((k, v / other) for k, v in self.items()))
  292. return self
  293. return NotImplemented
  294. def __reduce__(self):
  295. # this approach is necessary because __setstate__ is called after
  296. # __setitem__ upon unpickling and since __init__ is not called there
  297. # is no shape attribute hence it is not possible to unpickle it.
  298. return dict.__reduce__(self)
  299. # What should len(sparse) return? For consistency with dense matrices,
  300. # perhaps it should be the number of rows? For now it returns the number
  301. # of non-zeros.
  302. def transpose(self, axes=None, copy=False):
  303. if axes is not None:
  304. raise ValueError("Sparse matrices do not support "
  305. "an 'axes' parameter because swapping "
  306. "dimensions is the only logical permutation.")
  307. M, N = self.shape
  308. new = self._dok_container((N, M), dtype=self.dtype, copy=copy)
  309. dict.update(new, (((right, left), val)
  310. for (left, right), val in self.items()))
  311. return new
  312. transpose.__doc__ = spmatrix.transpose.__doc__
  313. def conjtransp(self):
  314. """Return the conjugate transpose."""
  315. M, N = self.shape
  316. new = self._dok_container((N, M), dtype=self.dtype)
  317. dict.update(new, (((right, left), np.conj(val))
  318. for (left, right), val in self.items()))
  319. return new
  320. def copy(self):
  321. new = self._dok_container(self.shape, dtype=self.dtype)
  322. dict.update(new, self)
  323. return new
  324. copy.__doc__ = spmatrix.copy.__doc__
  325. def tocoo(self, copy=False):
  326. if self.nnz == 0:
  327. return self._coo_container(self.shape, dtype=self.dtype)
  328. idx_dtype = get_index_dtype(maxval=max(self.shape))
  329. data = np.fromiter(self.values(), dtype=self.dtype, count=self.nnz)
  330. row = np.fromiter((i for i, _ in self.keys()), dtype=idx_dtype, count=self.nnz)
  331. col = np.fromiter((j for _, j in self.keys()), dtype=idx_dtype, count=self.nnz)
  332. A = self._coo_container(
  333. (data, (row, col)), shape=self.shape, dtype=self.dtype
  334. )
  335. A.has_canonical_format = True
  336. return A
  337. tocoo.__doc__ = spmatrix.tocoo.__doc__
  338. def todok(self, copy=False):
  339. if copy:
  340. return self.copy()
  341. return self
  342. todok.__doc__ = spmatrix.todok.__doc__
  343. def tocsc(self, copy=False):
  344. return self.tocoo(copy=False).tocsc(copy=copy)
  345. tocsc.__doc__ = spmatrix.tocsc.__doc__
  346. def resize(self, *shape):
  347. shape = check_shape(shape)
  348. newM, newN = shape
  349. M, N = self.shape
  350. if newM < M or newN < N:
  351. # Remove all elements outside new dimensions
  352. for (i, j) in list(self.keys()):
  353. if i >= newM or j >= newN:
  354. del self[i, j]
  355. self._shape = shape
  356. resize.__doc__ = spmatrix.resize.__doc__
  357. def isspmatrix_dok(x):
  358. """Is x of dok_matrix type?
  359. Parameters
  360. ----------
  361. x
  362. object to check for being a dok matrix
  363. Returns
  364. -------
  365. bool
  366. True if x is a dok matrix, False otherwise
  367. Examples
  368. --------
  369. >>> from scipy.sparse import dok_matrix, isspmatrix_dok
  370. >>> isspmatrix_dok(dok_matrix([[5]]))
  371. True
  372. >>> from scipy.sparse import dok_matrix, csr_matrix, isspmatrix_dok
  373. >>> isspmatrix_dok(csr_matrix([[5]]))
  374. False
  375. """
  376. from ._arrays import dok_array
  377. return isinstance(x, dok_matrix) or isinstance(x, dok_array)