_lil.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. """List of Lists sparse matrix class
  2. """
  3. __docformat__ = "restructuredtext en"
  4. __all__ = ['lil_matrix', 'isspmatrix_lil']
  5. from bisect import bisect_left
  6. import numpy as np
  7. from ._base import spmatrix, isspmatrix
  8. from ._index import IndexMixin, INT_TYPES, _broadcast_arrays
  9. from ._sputils import (getdtype, isshape, isscalarlike, upcast_scalar,
  10. get_index_dtype, check_shape, check_reshape_kwargs)
  11. from . import _csparsetools
  12. class lil_matrix(spmatrix, IndexMixin):
  13. """Row-based LIst of Lists sparse matrix
  14. This is a structure for constructing sparse matrices incrementally.
  15. Note that inserting a single item can take linear time in the worst case;
  16. to construct a matrix efficiently, make sure the items are pre-sorted by
  17. index, per row.
  18. This can be instantiated in several ways:
  19. lil_matrix(D)
  20. with a dense matrix or rank-2 ndarray D
  21. lil_matrix(S)
  22. with another sparse matrix S (equivalent to S.tolil())
  23. lil_matrix((M, N), [dtype])
  24. to construct an empty matrix with shape (M, N)
  25. dtype is optional, defaulting to dtype='d'.
  26. Attributes
  27. ----------
  28. dtype : dtype
  29. Data type of the matrix
  30. shape : 2-tuple
  31. Shape of the matrix
  32. ndim : int
  33. Number of dimensions (this is always 2)
  34. nnz
  35. Number of stored values, including explicit zeros
  36. data
  37. LIL format data array of the matrix
  38. rows
  39. LIL format row index array of the matrix
  40. Notes
  41. -----
  42. Sparse matrices can be used in arithmetic operations: they support
  43. addition, subtraction, multiplication, division, and matrix power.
  44. Advantages of the LIL format
  45. - supports flexible slicing
  46. - changes to the matrix sparsity structure are efficient
  47. Disadvantages of the LIL format
  48. - arithmetic operations LIL + LIL are slow (consider CSR or CSC)
  49. - slow column slicing (consider CSC)
  50. - slow matrix vector products (consider CSR or CSC)
  51. Intended Usage
  52. - LIL is a convenient format for constructing sparse matrices
  53. - once a matrix has been constructed, convert to CSR or
  54. CSC format for fast arithmetic and matrix vector operations
  55. - consider using the COO format when constructing large matrices
  56. Data Structure
  57. - An array (``self.rows``) of rows, each of which is a sorted
  58. list of column indices of non-zero elements.
  59. - The corresponding nonzero values are stored in similar
  60. fashion in ``self.data``.
  61. """
  62. format = 'lil'
  63. def __init__(self, arg1, shape=None, dtype=None, copy=False):
  64. spmatrix.__init__(self)
  65. self.dtype = getdtype(dtype, arg1, default=float)
  66. # First get the shape
  67. if isspmatrix(arg1):
  68. if isspmatrix_lil(arg1) and copy:
  69. A = arg1.copy()
  70. else:
  71. A = arg1.tolil()
  72. if dtype is not None:
  73. A = A.astype(dtype, copy=False)
  74. self._shape = check_shape(A.shape)
  75. self.dtype = A.dtype
  76. self.rows = A.rows
  77. self.data = A.data
  78. elif isinstance(arg1,tuple):
  79. if isshape(arg1):
  80. if shape is not None:
  81. raise ValueError('invalid use of shape parameter')
  82. M, N = arg1
  83. self._shape = check_shape((M, N))
  84. self.rows = np.empty((M,), dtype=object)
  85. self.data = np.empty((M,), dtype=object)
  86. for i in range(M):
  87. self.rows[i] = []
  88. self.data[i] = []
  89. else:
  90. raise TypeError('unrecognized lil_matrix constructor usage')
  91. else:
  92. # assume A is dense
  93. try:
  94. A = self._ascontainer(arg1)
  95. except TypeError as e:
  96. raise TypeError('unsupported matrix type') from e
  97. else:
  98. A = self._csr_container(A, dtype=dtype).tolil()
  99. self._shape = check_shape(A.shape)
  100. self.dtype = A.dtype
  101. self.rows = A.rows
  102. self.data = A.data
  103. def __iadd__(self,other):
  104. self[:,:] = self + other
  105. return self
  106. def __isub__(self,other):
  107. self[:,:] = self - other
  108. return self
  109. def __imul__(self,other):
  110. if isscalarlike(other):
  111. self[:,:] = self * other
  112. return self
  113. else:
  114. return NotImplemented
  115. def __itruediv__(self,other):
  116. if isscalarlike(other):
  117. self[:,:] = self / other
  118. return self
  119. else:
  120. return NotImplemented
  121. # Whenever the dimensions change, empty lists should be created for each
  122. # row
  123. def getnnz(self, axis=None):
  124. if axis is None:
  125. return sum([len(rowvals) for rowvals in self.data])
  126. if axis < 0:
  127. axis += 2
  128. if axis == 0:
  129. out = np.zeros(self.shape[1], dtype=np.intp)
  130. for row in self.rows:
  131. out[row] += 1
  132. return out
  133. elif axis == 1:
  134. return np.array([len(rowvals) for rowvals in self.data], dtype=np.intp)
  135. else:
  136. raise ValueError('axis out of bounds')
  137. def count_nonzero(self):
  138. return sum(np.count_nonzero(rowvals) for rowvals in self.data)
  139. getnnz.__doc__ = spmatrix.getnnz.__doc__
  140. count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
  141. def __str__(self):
  142. val = ''
  143. for i, row in enumerate(self.rows):
  144. for pos, j in enumerate(row):
  145. val += " %s\t%s\n" % (str((i, j)), str(self.data[i][pos]))
  146. return val[:-1]
  147. def getrowview(self, i):
  148. """Returns a view of the 'i'th row (without copying).
  149. """
  150. new = self._lil_container((1, self.shape[1]), dtype=self.dtype)
  151. new.rows[0] = self.rows[i]
  152. new.data[0] = self.data[i]
  153. return new
  154. def getrow(self, i):
  155. """Returns a copy of the 'i'th row.
  156. """
  157. M, N = self.shape
  158. if i < 0:
  159. i += M
  160. if i < 0 or i >= M:
  161. raise IndexError('row index out of bounds')
  162. new = self._lil_container((1, N), dtype=self.dtype)
  163. new.rows[0] = self.rows[i][:]
  164. new.data[0] = self.data[i][:]
  165. return new
  166. def __getitem__(self, key):
  167. # Fast path for simple (int, int) indexing.
  168. if (isinstance(key, tuple) and len(key) == 2 and
  169. isinstance(key[0], INT_TYPES) and
  170. isinstance(key[1], INT_TYPES)):
  171. # lil_get1 handles validation for us.
  172. return self._get_intXint(*key)
  173. # Everything else takes the normal path.
  174. return IndexMixin.__getitem__(self, key)
  175. def _asindices(self, idx, N):
  176. # LIL routines handle bounds-checking for us, so don't do it here.
  177. try:
  178. x = np.asarray(idx)
  179. except (ValueError, TypeError, MemoryError) as e:
  180. raise IndexError('invalid index') from e
  181. if x.ndim not in (1, 2):
  182. raise IndexError('Index dimension must be <= 2')
  183. return x
  184. def _get_intXint(self, row, col):
  185. v = _csparsetools.lil_get1(self.shape[0], self.shape[1], self.rows,
  186. self.data, row, col)
  187. return self.dtype.type(v)
  188. def _get_sliceXint(self, row, col):
  189. row = range(*row.indices(self.shape[0]))
  190. return self._get_row_ranges(row, slice(col, col+1))
  191. def _get_arrayXint(self, row, col):
  192. row = row.squeeze()
  193. return self._get_row_ranges(row, slice(col, col+1))
  194. def _get_intXslice(self, row, col):
  195. return self._get_row_ranges((row,), col)
  196. def _get_sliceXslice(self, row, col):
  197. row = range(*row.indices(self.shape[0]))
  198. return self._get_row_ranges(row, col)
  199. def _get_arrayXslice(self, row, col):
  200. return self._get_row_ranges(row, col)
  201. def _get_intXarray(self, row, col):
  202. row = np.array(row, dtype=col.dtype, ndmin=1)
  203. return self._get_columnXarray(row, col)
  204. def _get_sliceXarray(self, row, col):
  205. row = np.arange(*row.indices(self.shape[0]))
  206. return self._get_columnXarray(row, col)
  207. def _get_columnXarray(self, row, col):
  208. # outer indexing
  209. row, col = _broadcast_arrays(row[:,None], col)
  210. return self._get_arrayXarray(row, col)
  211. def _get_arrayXarray(self, row, col):
  212. # inner indexing
  213. i, j = map(np.atleast_2d, _prepare_index_for_memoryview(row, col))
  214. new = self._lil_container(i.shape, dtype=self.dtype)
  215. _csparsetools.lil_fancy_get(self.shape[0], self.shape[1],
  216. self.rows, self.data,
  217. new.rows, new.data,
  218. i, j)
  219. return new
  220. def _get_row_ranges(self, rows, col_slice):
  221. """
  222. Fast path for indexing in the case where column index is slice.
  223. This gains performance improvement over brute force by more
  224. efficient skipping of zeros, by accessing the elements
  225. column-wise in order.
  226. Parameters
  227. ----------
  228. rows : sequence or range
  229. Rows indexed. If range, must be within valid bounds.
  230. col_slice : slice
  231. Columns indexed
  232. """
  233. j_start, j_stop, j_stride = col_slice.indices(self.shape[1])
  234. col_range = range(j_start, j_stop, j_stride)
  235. nj = len(col_range)
  236. new = self._lil_container((len(rows), nj), dtype=self.dtype)
  237. _csparsetools.lil_get_row_ranges(self.shape[0], self.shape[1],
  238. self.rows, self.data,
  239. new.rows, new.data,
  240. rows,
  241. j_start, j_stop, j_stride, nj)
  242. return new
  243. def _set_intXint(self, row, col, x):
  244. _csparsetools.lil_insert(self.shape[0], self.shape[1], self.rows,
  245. self.data, row, col, x)
  246. def _set_arrayXarray(self, row, col, x):
  247. i, j, x = map(np.atleast_2d, _prepare_index_for_memoryview(row, col, x))
  248. _csparsetools.lil_fancy_set(self.shape[0], self.shape[1],
  249. self.rows, self.data,
  250. i, j, x)
  251. def _set_arrayXarray_sparse(self, row, col, x):
  252. # Special case: full matrix assignment
  253. if (x.shape == self.shape and
  254. isinstance(row, slice) and row == slice(None) and
  255. isinstance(col, slice) and col == slice(None)):
  256. x = self._lil_container(x, dtype=self.dtype)
  257. self.rows = x.rows
  258. self.data = x.data
  259. return
  260. # Fall back to densifying x
  261. x = np.asarray(x.toarray(), dtype=self.dtype)
  262. x, _ = _broadcast_arrays(x, row)
  263. self._set_arrayXarray(row, col, x)
  264. def __setitem__(self, key, x):
  265. # Fast path for simple (int, int) indexing.
  266. if (isinstance(key, tuple) and len(key) == 2 and
  267. isinstance(key[0], INT_TYPES) and
  268. isinstance(key[1], INT_TYPES)):
  269. x = self.dtype.type(x)
  270. if x.size > 1:
  271. raise ValueError("Trying to assign a sequence to an item")
  272. return self._set_intXint(key[0], key[1], x)
  273. # Everything else takes the normal path.
  274. IndexMixin.__setitem__(self, key, x)
  275. def _mul_scalar(self, other):
  276. if other == 0:
  277. # Multiply by zero: return the zero matrix
  278. new = self._lil_container(self.shape, dtype=self.dtype)
  279. else:
  280. res_dtype = upcast_scalar(self.dtype, other)
  281. new = self.copy()
  282. new = new.astype(res_dtype)
  283. # Multiply this scalar by every element.
  284. for j, rowvals in enumerate(new.data):
  285. new.data[j] = [val*other for val in rowvals]
  286. return new
  287. def __truediv__(self, other): # self / other
  288. if isscalarlike(other):
  289. new = self.copy()
  290. # Divide every element by this scalar
  291. for j, rowvals in enumerate(new.data):
  292. new.data[j] = [val/other for val in rowvals]
  293. return new
  294. else:
  295. return self.tocsr() / other
  296. def copy(self):
  297. M, N = self.shape
  298. new = self._lil_container(self.shape, dtype=self.dtype)
  299. # This is ~14x faster than calling deepcopy() on rows and data.
  300. _csparsetools.lil_get_row_ranges(M, N, self.rows, self.data,
  301. new.rows, new.data, range(M),
  302. 0, N, 1, N)
  303. return new
  304. copy.__doc__ = spmatrix.copy.__doc__
  305. def reshape(self, *args, **kwargs):
  306. shape = check_shape(args, self.shape)
  307. order, copy = check_reshape_kwargs(kwargs)
  308. # Return early if reshape is not required
  309. if shape == self.shape:
  310. if copy:
  311. return self.copy()
  312. else:
  313. return self
  314. new = self._lil_container(shape, dtype=self.dtype)
  315. if order == 'C':
  316. ncols = self.shape[1]
  317. for i, row in enumerate(self.rows):
  318. for col, j in enumerate(row):
  319. new_r, new_c = np.unravel_index(i * ncols + j, shape)
  320. new[new_r, new_c] = self[i, j]
  321. elif order == 'F':
  322. nrows = self.shape[0]
  323. for i, row in enumerate(self.rows):
  324. for col, j in enumerate(row):
  325. new_r, new_c = np.unravel_index(i + j * nrows, shape, order)
  326. new[new_r, new_c] = self[i, j]
  327. else:
  328. raise ValueError("'order' must be 'C' or 'F'")
  329. return new
  330. reshape.__doc__ = spmatrix.reshape.__doc__
  331. def resize(self, *shape):
  332. shape = check_shape(shape)
  333. new_M, new_N = shape
  334. M, N = self.shape
  335. if new_M < M:
  336. self.rows = self.rows[:new_M]
  337. self.data = self.data[:new_M]
  338. elif new_M > M:
  339. self.rows = np.resize(self.rows, new_M)
  340. self.data = np.resize(self.data, new_M)
  341. for i in range(M, new_M):
  342. self.rows[i] = []
  343. self.data[i] = []
  344. if new_N < N:
  345. for row, data in zip(self.rows, self.data):
  346. trunc = bisect_left(row, new_N)
  347. del row[trunc:]
  348. del data[trunc:]
  349. self._shape = shape
  350. resize.__doc__ = spmatrix.resize.__doc__
  351. def toarray(self, order=None, out=None):
  352. d = self._process_toarray_args(order, out)
  353. for i, row in enumerate(self.rows):
  354. for pos, j in enumerate(row):
  355. d[i, j] = self.data[i][pos]
  356. return d
  357. toarray.__doc__ = spmatrix.toarray.__doc__
  358. def transpose(self, axes=None, copy=False):
  359. return self.tocsr(copy=copy).transpose(axes=axes, copy=False).tolil(copy=False)
  360. transpose.__doc__ = spmatrix.transpose.__doc__
  361. def tolil(self, copy=False):
  362. if copy:
  363. return self.copy()
  364. else:
  365. return self
  366. tolil.__doc__ = spmatrix.tolil.__doc__
  367. def tocsr(self, copy=False):
  368. M, N = self.shape
  369. if M == 0 or N == 0:
  370. return self._csr_container((M, N), dtype=self.dtype)
  371. # construct indptr array
  372. if M*N <= np.iinfo(np.int32).max:
  373. # fast path: it is known that 64-bit indexing will not be needed.
  374. idx_dtype = np.int32
  375. indptr = np.empty(M + 1, dtype=idx_dtype)
  376. indptr[0] = 0
  377. _csparsetools.lil_get_lengths(self.rows, indptr[1:])
  378. np.cumsum(indptr, out=indptr)
  379. nnz = indptr[-1]
  380. else:
  381. idx_dtype = get_index_dtype(maxval=N)
  382. lengths = np.empty(M, dtype=idx_dtype)
  383. _csparsetools.lil_get_lengths(self.rows, lengths)
  384. nnz = lengths.sum(dtype=np.int64)
  385. idx_dtype = get_index_dtype(maxval=max(N, nnz))
  386. indptr = np.empty(M + 1, dtype=idx_dtype)
  387. indptr[0] = 0
  388. np.cumsum(lengths, dtype=idx_dtype, out=indptr[1:])
  389. indices = np.empty(nnz, dtype=idx_dtype)
  390. data = np.empty(nnz, dtype=self.dtype)
  391. _csparsetools.lil_flatten_to_array(self.rows, indices)
  392. _csparsetools.lil_flatten_to_array(self.data, data)
  393. # init csr matrix
  394. return self._csr_container((data, indices, indptr), shape=self.shape)
  395. tocsr.__doc__ = spmatrix.tocsr.__doc__
  396. def _prepare_index_for_memoryview(i, j, x=None):
  397. """
  398. Convert index and data arrays to form suitable for passing to the
  399. Cython fancy getset routines.
  400. The conversions are necessary since to (i) ensure the integer
  401. index arrays are in one of the accepted types, and (ii) to ensure
  402. the arrays are writable so that Cython memoryview support doesn't
  403. choke on them.
  404. Parameters
  405. ----------
  406. i, j
  407. Index arrays
  408. x : optional
  409. Data arrays
  410. Returns
  411. -------
  412. i, j, x
  413. Re-formatted arrays (x is omitted, if input was None)
  414. """
  415. if i.dtype > j.dtype:
  416. j = j.astype(i.dtype)
  417. elif i.dtype < j.dtype:
  418. i = i.astype(j.dtype)
  419. if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
  420. i = i.astype(np.intp)
  421. if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
  422. j = j.astype(np.intp)
  423. if x is not None:
  424. if not x.flags.writeable:
  425. x = x.copy()
  426. return i, j, x
  427. else:
  428. return i, j
  429. def isspmatrix_lil(x):
  430. """Is x of lil_matrix type?
  431. Parameters
  432. ----------
  433. x
  434. object to check for being a lil matrix
  435. Returns
  436. -------
  437. bool
  438. True if x is a lil matrix, False otherwise
  439. Examples
  440. --------
  441. >>> from scipy.sparse import lil_matrix, isspmatrix_lil
  442. >>> isspmatrix_lil(lil_matrix([[5]]))
  443. True
  444. >>> from scipy.sparse import lil_matrix, csr_matrix, isspmatrix_lil
  445. >>> isspmatrix_lil(csr_matrix([[5]]))
  446. False
  447. """
  448. from ._arrays import lil_array
  449. return isinstance(x, lil_matrix) or isinstance(x, lil_array)