123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470 |
- """Sparse DIAgonal format"""
- __docformat__ = "restructuredtext en"
- __all__ = ['dia_matrix', 'isspmatrix_dia']
- import numpy as np
- from ._base import isspmatrix, _formats, spmatrix
- from ._data import _data_matrix
- from ._sputils import (isshape, upcast_char, getdtype, get_index_dtype,
- get_sum_dtype, validateaxis, check_shape)
- from ._sparsetools import dia_matvec
- class dia_matrix(_data_matrix):
- """Sparse matrix with DIAgonal storage
- This can be instantiated in several ways:
- dia_matrix(D)
- with a dense matrix
- dia_matrix(S)
- with another sparse matrix S (equivalent to S.todia())
- dia_matrix((M, N), [dtype])
- to construct an empty matrix with shape (M, N),
- dtype is optional, defaulting to dtype='d'.
- dia_matrix((data, offsets), shape=(M, N))
- where the ``data[k,:]`` stores the diagonal entries for
- diagonal ``offsets[k]`` (See example below)
- Attributes
- ----------
- dtype : dtype
- Data type of the matrix
- shape : 2-tuple
- Shape of the matrix
- ndim : int
- Number of dimensions (this is always 2)
- nnz
- Number of stored values, including explicit zeros
- data
- DIA format data array of the matrix
- offsets
- DIA format offset array of the matrix
- Notes
- -----
- Sparse matrices can be used in arithmetic operations: they support
- addition, subtraction, multiplication, division, and matrix power.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.sparse import dia_matrix
- >>> dia_matrix((3, 4), dtype=np.int8).toarray()
- array([[0, 0, 0, 0],
- [0, 0, 0, 0],
- [0, 0, 0, 0]], dtype=int8)
- >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0)
- >>> offsets = np.array([0, -1, 2])
- >>> dia_matrix((data, offsets), shape=(4, 4)).toarray()
- array([[1, 0, 3, 0],
- [1, 2, 0, 4],
- [0, 2, 3, 0],
- [0, 0, 3, 4]])
- >>> from scipy.sparse import dia_matrix
- >>> n = 10
- >>> ex = np.ones(n)
- >>> data = np.array([ex, 2 * ex, ex])
- >>> offsets = np.array([-1, 0, 1])
- >>> dia_matrix((data, offsets), shape=(n, n)).toarray()
- array([[2., 1., 0., ..., 0., 0., 0.],
- [1., 2., 1., ..., 0., 0., 0.],
- [0., 1., 2., ..., 0., 0., 0.],
- ...,
- [0., 0., 0., ..., 2., 1., 0.],
- [0., 0., 0., ..., 1., 2., 1.],
- [0., 0., 0., ..., 0., 1., 2.]])
- """
- format = 'dia'
- def __init__(self, arg1, shape=None, dtype=None, copy=False):
- _data_matrix.__init__(self)
- if isspmatrix_dia(arg1):
- if copy:
- arg1 = arg1.copy()
- self.data = arg1.data
- self.offsets = arg1.offsets
- self._shape = check_shape(arg1.shape)
- elif isspmatrix(arg1):
- if isspmatrix_dia(arg1) and copy:
- A = arg1.copy()
- else:
- A = arg1.todia()
- self.data = A.data
- self.offsets = A.offsets
- self._shape = check_shape(A.shape)
- elif isinstance(arg1, tuple):
- if isshape(arg1):
- # It's a tuple of matrix dimensions (M, N)
- # create empty matrix
- self._shape = check_shape(arg1)
- self.data = np.zeros((0,0), getdtype(dtype, default=float))
- idx_dtype = get_index_dtype(maxval=max(self.shape))
- self.offsets = np.zeros((0), dtype=idx_dtype)
- else:
- try:
- # Try interpreting it as (data, offsets)
- data, offsets = arg1
- except Exception as e:
- raise ValueError('unrecognized form for dia_matrix constructor') from e
- else:
- if shape is None:
- raise ValueError('expected a shape argument')
- self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy))
- self.offsets = np.atleast_1d(np.array(arg1[1],
- dtype=get_index_dtype(maxval=max(shape)),
- copy=copy))
- self._shape = check_shape(shape)
- else:
- #must be dense, convert to COO first, then to DIA
- try:
- arg1 = np.asarray(arg1)
- except Exception as e:
- raise ValueError("unrecognized form for"
- " %s_matrix constructor" % self.format) from e
- A = self._coo_container(arg1, dtype=dtype, shape=shape).todia()
- self.data = A.data
- self.offsets = A.offsets
- self._shape = check_shape(A.shape)
- if dtype is not None:
- self.data = self.data.astype(dtype)
- #check format
- if self.offsets.ndim != 1:
- raise ValueError('offsets array must have rank 1')
- if self.data.ndim != 2:
- raise ValueError('data array must have rank 2')
- if self.data.shape[0] != len(self.offsets):
- raise ValueError('number of diagonals (%d) '
- 'does not match the number of offsets (%d)'
- % (self.data.shape[0], len(self.offsets)))
- if len(np.unique(self.offsets)) != len(self.offsets):
- raise ValueError('offset array contains duplicate values')
- def __repr__(self):
- format = _formats[self.getformat()][1]
- return "<%dx%d sparse matrix of type '%s'\n" \
- "\twith %d stored elements (%d diagonals) in %s format>" % \
- (self.shape + (self.dtype.type, self.nnz, self.data.shape[0],
- format))
- def _data_mask(self):
- """Returns a mask of the same shape as self.data, where
- mask[i,j] is True when data[i,j] corresponds to a stored element."""
- num_rows, num_cols = self.shape
- offset_inds = np.arange(self.data.shape[1])
- row = offset_inds - self.offsets[:,None]
- mask = (row >= 0)
- mask &= (row < num_rows)
- mask &= (offset_inds < num_cols)
- return mask
- def count_nonzero(self):
- mask = self._data_mask()
- return np.count_nonzero(self.data[mask])
- def getnnz(self, axis=None):
- if axis is not None:
- raise NotImplementedError("getnnz over an axis is not implemented "
- "for DIA format")
- M,N = self.shape
- nnz = 0
- for k in self.offsets:
- if k > 0:
- nnz += min(M,N-k)
- else:
- nnz += min(M+k,N)
- return int(nnz)
- getnnz.__doc__ = spmatrix.getnnz.__doc__
- count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
- def sum(self, axis=None, dtype=None, out=None):
- validateaxis(axis)
- if axis is not None and axis < 0:
- axis += 2
- res_dtype = get_sum_dtype(self.dtype)
- num_rows, num_cols = self.shape
- ret = None
- if axis == 0:
- mask = self._data_mask()
- x = (self.data * mask).sum(axis=0)
- if x.shape[0] == num_cols:
- res = x
- else:
- res = np.zeros(num_cols, dtype=x.dtype)
- res[:x.shape[0]] = x
- ret = self._ascontainer(res, dtype=res_dtype)
- else:
- row_sums = np.zeros((num_rows, 1), dtype=res_dtype)
- one = np.ones(num_cols, dtype=res_dtype)
- dia_matvec(num_rows, num_cols, len(self.offsets),
- self.data.shape[1], self.offsets, self.data, one, row_sums)
- row_sums = self._ascontainer(row_sums)
- if axis is None:
- return row_sums.sum(dtype=dtype, out=out)
- ret = self._ascontainer(row_sums.sum(axis=axis))
- if out is not None and out.shape != ret.shape:
- raise ValueError("dimensions do not match")
- return ret.sum(axis=(), dtype=dtype, out=out)
- sum.__doc__ = spmatrix.sum.__doc__
- def _add_sparse(self, other):
- # Check if other is also of type dia_matrix
- if not isinstance(other, type(self)):
- # If other is not of type dia_matrix, default to
- # converting to csr_matrix, as is done in the _add_sparse
- # method of parent class spmatrix
- return self.tocsr()._add_sparse(other)
- # The task is to compute m = self + other
- # Start by making a copy of self, of the datatype
- # that should result from adding self and other
- dtype = np.promote_types(self.dtype, other.dtype)
- m = self.astype(dtype, copy=True)
- # Then, add all the stored diagonals of other.
- for d in other.offsets:
- # Check if the diagonal has already been added.
- if d in m.offsets:
- # If the diagonal is already there, we need to take
- # the sum of the existing and the new
- m.setdiag(m.diagonal(d) + other.diagonal(d), d)
- else:
- m.setdiag(other.diagonal(d), d)
- return m
- def _mul_vector(self, other):
- x = other
- y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
- x.dtype.char))
- L = self.data.shape[1]
- M,N = self.shape
- dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data, x.ravel(), y.ravel())
- return y
- def _mul_multimatrix(self, other):
- return np.hstack([self._mul_vector(col).reshape(-1,1) for col in other.T])
- def _setdiag(self, values, k=0):
- M, N = self.shape
- if values.ndim == 0:
- # broadcast
- values_n = np.inf
- else:
- values_n = len(values)
- if k < 0:
- n = min(M + k, N, values_n)
- min_index = 0
- max_index = n
- else:
- n = min(M, N - k, values_n)
- min_index = k
- max_index = k + n
- if values.ndim != 0:
- # allow also longer sequences
- values = values[:n]
- data_rows, data_cols = self.data.shape
- if k in self.offsets:
- if max_index > data_cols:
- data = np.zeros((data_rows, max_index), dtype=self.data.dtype)
- data[:, :data_cols] = self.data
- self.data = data
- self.data[self.offsets == k, min_index:max_index] = values
- else:
- self.offsets = np.append(self.offsets, self.offsets.dtype.type(k))
- m = max(max_index, data_cols)
- data = np.zeros((data_rows + 1, m), dtype=self.data.dtype)
- data[:-1, :data_cols] = self.data
- data[-1, min_index:max_index] = values
- self.data = data
- def todia(self, copy=False):
- if copy:
- return self.copy()
- else:
- return self
- todia.__doc__ = spmatrix.todia.__doc__
- def transpose(self, axes=None, copy=False):
- if axes is not None:
- raise ValueError(("Sparse matrices do not support "
- "an 'axes' parameter because swapping "
- "dimensions is the only logical permutation."))
- num_rows, num_cols = self.shape
- max_dim = max(self.shape)
- # flip diagonal offsets
- offsets = -self.offsets
- # re-align the data matrix
- r = np.arange(len(offsets), dtype=np.intc)[:, None]
- c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None]
- pad_amount = max(0, max_dim-self.data.shape[1])
- data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount),
- dtype=self.data.dtype)))
- data = data[r, c]
- return self._dia_container((data, offsets), shape=(
- num_cols, num_rows), copy=copy)
- transpose.__doc__ = spmatrix.transpose.__doc__
- def diagonal(self, k=0):
- rows, cols = self.shape
- if k <= -rows or k >= cols:
- return np.empty(0, dtype=self.data.dtype)
- idx, = np.nonzero(self.offsets == k)
- first_col = max(0, k)
- last_col = min(rows + k, cols)
- result_size = last_col - first_col
- if idx.size == 0:
- return np.zeros(result_size, dtype=self.data.dtype)
- result = self.data[idx[0], first_col:last_col]
- padding = result_size - len(result)
- if padding > 0:
- result = np.pad(result, (0, padding), mode='constant')
- return result
- diagonal.__doc__ = spmatrix.diagonal.__doc__
- def tocsc(self, copy=False):
- if self.nnz == 0:
- return self._csc_container(self.shape, dtype=self.dtype)
- num_rows, num_cols = self.shape
- num_offsets, offset_len = self.data.shape
- offset_inds = np.arange(offset_len)
- row = offset_inds - self.offsets[:,None]
- mask = (row >= 0)
- mask &= (row < num_rows)
- mask &= (offset_inds < num_cols)
- mask &= (self.data != 0)
- idx_dtype = get_index_dtype(maxval=max(self.shape))
- indptr = np.zeros(num_cols + 1, dtype=idx_dtype)
- indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0)[:num_cols])
- if offset_len < num_cols:
- indptr[offset_len+1:] = indptr[offset_len]
- indices = row.T[mask.T].astype(idx_dtype, copy=False)
- data = self.data.T[mask.T]
- return self._csc_container((data, indices, indptr), shape=self.shape,
- dtype=self.dtype)
- tocsc.__doc__ = spmatrix.tocsc.__doc__
- def tocoo(self, copy=False):
- num_rows, num_cols = self.shape
- num_offsets, offset_len = self.data.shape
- offset_inds = np.arange(offset_len)
- row = offset_inds - self.offsets[:,None]
- mask = (row >= 0)
- mask &= (row < num_rows)
- mask &= (offset_inds < num_cols)
- mask &= (self.data != 0)
- row = row[mask]
- col = np.tile(offset_inds, num_offsets)[mask.ravel()]
- data = self.data[mask]
- A = self._coo_container(
- (data, (row, col)), shape=self.shape, dtype=self.dtype
- )
- A.has_canonical_format = True
- return A
- tocoo.__doc__ = spmatrix.tocoo.__doc__
- # needed by _data_matrix
- def _with_data(self, data, copy=True):
- """Returns a matrix with the same sparsity structure as self,
- but with different data. By default the structure arrays are copied.
- """
- if copy:
- return self._dia_container(
- (data, self.offsets.copy()), shape=self.shape
- )
- else:
- return self._dia_container(
- (data, self.offsets), shape=self.shape
- )
- def resize(self, *shape):
- shape = check_shape(shape)
- M, N = shape
- # we do not need to handle the case of expanding N
- self.data = self.data[:, :N]
- if (M > self.shape[0] and
- np.any(self.offsets + self.shape[0] < self.data.shape[1])):
- # explicitly clear values that were previously hidden
- mask = (self.offsets[:, None] + self.shape[0] <=
- np.arange(self.data.shape[1]))
- self.data[mask] = 0
- self._shape = shape
- resize.__doc__ = spmatrix.resize.__doc__
- def isspmatrix_dia(x):
- """Is x of dia_matrix type?
- Parameters
- ----------
- x
- object to check for being a dia matrix
- Returns
- -------
- bool
- True if x is a dia matrix, False otherwise
- Examples
- --------
- >>> from scipy.sparse import dia_matrix, isspmatrix_dia
- >>> isspmatrix_dia(dia_matrix([[5]]))
- True
- >>> from scipy.sparse import dia_matrix, csr_matrix, isspmatrix_dia
- >>> isspmatrix_dia(csr_matrix([[5]]))
- False
- """
- from ._arrays import dia_array
- return isinstance(x, dia_matrix) or isinstance(x, dia_array)
|