123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555 |
- """
- Laplacian of a compressed-sparse graph
- """
- import numpy as np
- from scipy.sparse import isspmatrix
- from scipy.sparse.linalg import LinearOperator
- ###############################################################################
- # Graph laplacian
- def laplacian(
- csgraph,
- normed=False,
- return_diag=False,
- use_out_degree=False,
- *,
- copy=True,
- form="array",
- dtype=None,
- symmetrized=False,
- ):
- """
- Return the Laplacian of a directed graph.
- Parameters
- ----------
- csgraph : array_like or sparse matrix, 2 dimensions
- compressed-sparse graph, with shape (N, N).
- normed : bool, optional
- If True, then compute symmetrically normalized Laplacian.
- Default: False.
- return_diag : bool, optional
- If True, then also return an array related to vertex degrees.
- Default: False.
- use_out_degree : bool, optional
- If True, then use out-degree instead of in-degree.
- This distinction matters only if the graph is asymmetric.
- Default: False.
- copy: bool, optional
- If False, then change `csgraph` in place if possible,
- avoiding doubling the memory use.
- Default: True, for backward compatibility.
- form: 'array', or 'function', or 'lo'
- Determines the format of the output Laplacian:
- * 'array' is a numpy array;
- * 'function' is a pointer to evaluating the Laplacian-vector
- or Laplacian-matrix product;
- * 'lo' results in the format of the `LinearOperator`.
- Choosing 'function' or 'lo' always avoids doubling
- the memory use, ignoring `copy` value.
- Default: 'array', for backward compatibility.
- dtype: None or one of numeric numpy dtypes, optional
- The dtype of the output. If ``dtype=None``, the dtype of the
- output matches the dtype of the input csgraph, except for
- the case ``normed=True`` and integer-like csgraph, where
- the output dtype is 'float' allowing accurate normalization,
- but dramatically increasing the memory use.
- Default: None, for backward compatibility.
- symmetrized: bool, optional
- If True, then the output Laplacian is symmetric/Hermitian.
- The symmetrization is done by ``csgraph + csgraph.T.conj``
- without dividing by 2 to preserve integer dtypes if possible
- prior to the construction of the Laplacian.
- The symmetrization will increase the memory footprint of
- sparse matrices unless the sparsity pattern is symmetric or
- `form` is 'function' or 'lo'.
- Default: False, for backward compatibility.
- Returns
- -------
- lap : ndarray, or sparse matrix, or `LinearOperator`
- The N x N Laplacian of csgraph. It will be a NumPy array (dense)
- if the input was dense, or a sparse matrix otherwise, or
- the format of a function or `LinearOperator` if
- `form` equals 'function' or 'lo', respectively.
- diag : ndarray, optional
- The length-N main diagonal of the Laplacian matrix.
- For the normalized Laplacian, this is the array of square roots
- of vertex degrees or 1 if the degree is zero.
- Notes
- -----
- The Laplacian matrix of a graph is sometimes referred to as the
- "Kirchhoff matrix" or just the "Laplacian", and is useful in many
- parts of spectral graph theory.
- In particular, the eigen-decomposition of the Laplacian can give
- insight into many properties of the graph, e.g.,
- is commonly used for spectral data embedding and clustering.
- The constructed Laplacian doubles the memory use if ``copy=True`` and
- ``form="array"`` which is the default.
- Choosing ``copy=False`` has no effect unless ``form="array"``
- or the matrix is sparse in the ``coo`` format, or dense array, except
- for the integer input with ``normed=True`` that forces the float output.
- Sparse input is reformatted into ``coo`` if ``form="array"``,
- which is the default.
- If the input adjacency matrix is not symmetic, the Laplacian is
- also non-symmetric unless ``symmetrized=True`` is used.
- Diagonal entries of the input adjacency matrix are ignored and
- replaced with zeros for the purpose of normalization where ``normed=True``.
- The normalization uses the inverse square roots of row-sums of the input
- adjacency matrix, and thus may fail if the row-sums contain
- negative or complex with a non-zero imaginary part values.
- The normalization is symmetric, making the normalized Laplacian also
- symmetric if the input csgraph was symmetric.
- References
- ----------
- .. [1] Laplacian matrix. https://en.wikipedia.org/wiki/Laplacian_matrix
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.sparse import csgraph
- Our first illustration is the symmetric graph
- >>> G = np.arange(4) * np.arange(4)[:, np.newaxis]
- >>> G
- array([[0, 0, 0, 0],
- [0, 1, 2, 3],
- [0, 2, 4, 6],
- [0, 3, 6, 9]])
- and its symmetric Laplacian matrix
- >>> csgraph.laplacian(G)
- array([[ 0, 0, 0, 0],
- [ 0, 5, -2, -3],
- [ 0, -2, 8, -6],
- [ 0, -3, -6, 9]])
- The non-symmetric graph
- >>> G = np.arange(9).reshape(3, 3)
- >>> G
- array([[0, 1, 2],
- [3, 4, 5],
- [6, 7, 8]])
- has different row- and column sums, resulting in two varieties
- of the Laplacian matrix, using an in-degree, which is the default
- >>> L_in_degree = csgraph.laplacian(G)
- >>> L_in_degree
- array([[ 9, -1, -2],
- [-3, 8, -5],
- [-6, -7, 7]])
- or alternatively an out-degree
- >>> L_out_degree = csgraph.laplacian(G, use_out_degree=True)
- >>> L_out_degree
- array([[ 3, -1, -2],
- [-3, 8, -5],
- [-6, -7, 13]])
- Constructing a symmetric Laplacian matrix, one can add the two as
- >>> L_in_degree + L_out_degree.T
- array([[ 12, -4, -8],
- [ -4, 16, -12],
- [ -8, -12, 20]])
- or use the ``symmetrized=True`` option
- >>> csgraph.laplacian(G, symmetrized=True)
- array([[ 12, -4, -8],
- [ -4, 16, -12],
- [ -8, -12, 20]])
- that is equivalent to symmetrizing the original graph
- >>> csgraph.laplacian(G + G.T)
- array([[ 12, -4, -8],
- [ -4, 16, -12],
- [ -8, -12, 20]])
- The goal of normalization is to make the non-zero diagonal entries
- of the Laplacian matrix to be all unit, also scaling off-diagonal
- entries correspondingly. The normalization can be done manually, e.g.,
- >>> G = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]])
- >>> L, d = csgraph.laplacian(G, return_diag=True)
- >>> L
- array([[ 2, -1, -1],
- [-1, 2, -1],
- [-1, -1, 2]])
- >>> d
- array([2, 2, 2])
- >>> scaling = np.sqrt(d)
- >>> scaling
- array([1.41421356, 1.41421356, 1.41421356])
- >>> (1/scaling)*L*(1/scaling)
- array([[ 1. , -0.5, -0.5],
- [-0.5, 1. , -0.5],
- [-0.5, -0.5, 1. ]])
- Or using ``normed=True`` option
- >>> L, d = csgraph.laplacian(G, return_diag=True, normed=True)
- >>> L
- array([[ 1. , -0.5, -0.5],
- [-0.5, 1. , -0.5],
- [-0.5, -0.5, 1. ]])
- which now instead of the diagonal returns the scaling coefficients
- >>> d
- array([1.41421356, 1.41421356, 1.41421356])
- Zero scaling coefficients are substituted with 1s, where scaling
- has thus no effect, e.g.,
- >>> G = np.array([[0, 0, 0], [0, 0, 1], [0, 1, 0]])
- >>> G
- array([[0, 0, 0],
- [0, 0, 1],
- [0, 1, 0]])
- >>> L, d = csgraph.laplacian(G, return_diag=True, normed=True)
- >>> L
- array([[ 0., -0., -0.],
- [-0., 1., -1.],
- [-0., -1., 1.]])
- >>> d
- array([1., 1., 1.])
- Only the symmetric normalization is implemented, resulting
- in a symmetric Laplacian matrix if and only if its graph is symmetric
- and has all non-negative degrees, like in the examples above.
- The output Laplacian matrix is by default a dense array or a sparse matrix
- inferring its shape, format, and dtype from the input graph matrix:
- >>> G = np.array([[0, 1, 1], [1, 0, 1], [1, 1, 0]]).astype(np.float32)
- >>> G
- array([[0., 1., 1.],
- [1., 0., 1.],
- [1., 1., 0.]], dtype=float32)
- >>> csgraph.laplacian(G)
- array([[ 2., -1., -1.],
- [-1., 2., -1.],
- [-1., -1., 2.]], dtype=float32)
- but can alternatively be generated matrix-free as a LinearOperator:
- >>> L = csgraph.laplacian(G, form="lo")
- >>> L
- <3x3 _CustomLinearOperator with dtype=float32>
- >>> L(np.eye(3))
- array([[ 2., -1., -1.],
- [-1., 2., -1.],
- [-1., -1., 2.]])
- or as a lambda-function:
- >>> L = csgraph.laplacian(G, form="function")
- >>> L
- <function _laplace.<locals>.<lambda> at 0x0000012AE6F5A598>
- >>> L(np.eye(3))
- array([[ 2., -1., -1.],
- [-1., 2., -1.],
- [-1., -1., 2.]])
- The Laplacian matrix is used for
- spectral data clustering and embedding
- as well as for spectral graph partitioning.
- Our final example illustrates the latter
- for a noisy directed linear graph.
- >>> from scipy.sparse import diags, random
- >>> from scipy.sparse.linalg import lobpcg
- Create a directed linear graph with ``N=35`` vertices
- using a sparse adjacency matrix ``G``:
- >>> N = 35
- >>> G = diags(np.ones(N-1), 1, format="csr")
- Fix a random seed ``rng`` and add a random sparse noise to the graph ``G``:
- >>> rng = np.random.default_rng()
- >>> G += 1e-2 * random(N, N, density=0.1, random_state=rng)
- Set initial approximations for eigenvectors:
- >>> X = rng.random((N, 2))
- The constant vector of ones is always a trivial eigenvector
- of the non-normalized Laplacian to be filtered out:
- >>> Y = np.ones((N, 1))
- Alternating (1) the sign of the graph weights allows determining
- labels for spectral max- and min- cuts in a single loop.
- Since the graph is undirected, the option ``symmetrized=True``
- must be used in the construction of the Laplacian.
- The option ``normed=True`` cannot be used in (2) for the negative weights
- here as the symmetric normalization evaluates square roots.
- The option ``form="lo"`` in (2) is matrix-free, i.e., guarantees
- a fixed memory footprint and read-only access to the graph.
- Calling the eigenvalue solver ``lobpcg`` (3) computes the Fiedler vector
- that determines the labels as the signs of its components in (5).
- Since the sign in an eigenvector is not deterministic and can flip,
- we fix the sign of the first component to be always +1 in (4).
- >>> for cut in ["max", "min"]:
- ... G = -G # 1.
- ... L = csgraph.laplacian(G, symmetrized=True, form="lo") # 2.
- ... _, eves = lobpcg(L, X, Y=Y, largest=False, tol=1e-3) # 3.
- ... eves *= np.sign(eves[0, 0]) # 4.
- ... print(cut + "-cut labels:\\n", 1 * (eves[:, 0]>0)) # 5.
- max-cut labels:
- [1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1]
- min-cut labels:
- [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
- As anticipated for a (slightly noisy) linear graph,
- the max-cut strips all the edges of the graph coloring all
- odd vertices into one color and all even vertices into another one,
- while the balanced min-cut partitions the graph
- in the middle by deleting a single edge.
- Both determined partitions are optimal.
- """
- if csgraph.ndim != 2 or csgraph.shape[0] != csgraph.shape[1]:
- raise ValueError('csgraph must be a square matrix or array')
- if normed and (
- np.issubdtype(csgraph.dtype, np.signedinteger)
- or np.issubdtype(csgraph.dtype, np.uint)
- ):
- csgraph = csgraph.astype(np.float64)
- if form == "array":
- create_lap = (
- _laplacian_sparse if isspmatrix(csgraph) else _laplacian_dense
- )
- else:
- create_lap = (
- _laplacian_sparse_flo
- if isspmatrix(csgraph)
- else _laplacian_dense_flo
- )
- degree_axis = 1 if use_out_degree else 0
- lap, d = create_lap(
- csgraph,
- normed=normed,
- axis=degree_axis,
- copy=copy,
- form=form,
- dtype=dtype,
- symmetrized=symmetrized,
- )
- if return_diag:
- return lap, d
- return lap
- def _setdiag_dense(m, d):
- step = len(d) + 1
- m.flat[::step] = d
- def _laplace(m, d):
- return lambda v: v * d[:, np.newaxis] - m @ v
- def _laplace_normed(m, d, nd):
- laplace = _laplace(m, d)
- return lambda v: nd[:, np.newaxis] * laplace(v * nd[:, np.newaxis])
- def _laplace_sym(m, d):
- return (
- lambda v: v * d[:, np.newaxis]
- - m @ v
- - np.transpose(np.conjugate(np.transpose(np.conjugate(v)) @ m))
- )
- def _laplace_normed_sym(m, d, nd):
- laplace_sym = _laplace_sym(m, d)
- return lambda v: nd[:, np.newaxis] * laplace_sym(v * nd[:, np.newaxis])
- def _linearoperator(mv, shape, dtype):
- return LinearOperator(matvec=mv, matmat=mv, shape=shape, dtype=dtype)
- def _laplacian_sparse_flo(graph, normed, axis, copy, form, dtype, symmetrized):
- # The keyword argument `copy` is unused and has no effect here.
- del copy
- if dtype is None:
- dtype = graph.dtype
- graph_sum = graph.sum(axis=axis).getA1()
- graph_diagonal = graph.diagonal()
- diag = graph_sum - graph_diagonal
- if symmetrized:
- graph_sum += graph.sum(axis=1 - axis).getA1()
- diag = graph_sum - graph_diagonal - graph_diagonal
- if normed:
- isolated_node_mask = diag == 0
- w = np.where(isolated_node_mask, 1, np.sqrt(diag))
- if symmetrized:
- md = _laplace_normed_sym(graph, graph_sum, 1.0 / w)
- else:
- md = _laplace_normed(graph, graph_sum, 1.0 / w)
- if form == "function":
- return md, w.astype(dtype, copy=False)
- elif form == "lo":
- m = _linearoperator(md, shape=graph.shape, dtype=dtype)
- return m, w.astype(dtype, copy=False)
- else:
- raise ValueError(f"Invalid form: {form!r}")
- else:
- if symmetrized:
- md = _laplace_sym(graph, graph_sum)
- else:
- md = _laplace(graph, graph_sum)
- if form == "function":
- return md, diag.astype(dtype, copy=False)
- elif form == "lo":
- m = _linearoperator(md, shape=graph.shape, dtype=dtype)
- return m, diag.astype(dtype, copy=False)
- else:
- raise ValueError(f"Invalid form: {form!r}")
- def _laplacian_sparse(graph, normed, axis, copy, form, dtype, symmetrized):
- # The keyword argument `form` is unused and has no effect here.
- del form
- if dtype is None:
- dtype = graph.dtype
- needs_copy = False
- if graph.format in ('lil', 'dok'):
- m = graph.tocoo()
- else:
- m = graph
- if copy:
- needs_copy = True
- if symmetrized:
- m += m.T.conj()
- w = m.sum(axis=axis).getA1() - m.diagonal()
- if normed:
- m = m.tocoo(copy=needs_copy)
- isolated_node_mask = (w == 0)
- w = np.where(isolated_node_mask, 1, np.sqrt(w))
- m.data /= w[m.row]
- m.data /= w[m.col]
- m.data *= -1
- m.setdiag(1 - isolated_node_mask)
- else:
- if m.format == 'dia':
- m = m.copy()
- else:
- m = m.tocoo(copy=needs_copy)
- m.data *= -1
- m.setdiag(w)
- return m.astype(dtype, copy=False), w.astype(dtype)
- def _laplacian_dense_flo(graph, normed, axis, copy, form, dtype, symmetrized):
- if copy:
- m = np.array(graph)
- else:
- m = np.asarray(graph)
- if dtype is None:
- dtype = m.dtype
- graph_sum = m.sum(axis=axis)
- graph_diagonal = m.diagonal()
- diag = graph_sum - graph_diagonal
- if symmetrized:
- graph_sum += m.sum(axis=1 - axis)
- diag = graph_sum - graph_diagonal - graph_diagonal
- if normed:
- isolated_node_mask = diag == 0
- w = np.where(isolated_node_mask, 1, np.sqrt(diag))
- if symmetrized:
- md = _laplace_normed_sym(m, graph_sum, 1.0 / w)
- else:
- md = _laplace_normed(m, graph_sum, 1.0 / w)
- if form == "function":
- return md, w.astype(dtype, copy=False)
- elif form == "lo":
- m = _linearoperator(md, shape=graph.shape, dtype=dtype)
- return m, w.astype(dtype, copy=False)
- else:
- raise ValueError(f"Invalid form: {form!r}")
- else:
- if symmetrized:
- md = _laplace_sym(m, graph_sum)
- else:
- md = _laplace(m, graph_sum)
- if form == "function":
- return md, diag.astype(dtype, copy=False)
- elif form == "lo":
- m = _linearoperator(md, shape=graph.shape, dtype=dtype)
- return m, diag.astype(dtype, copy=False)
- else:
- raise ValueError(f"Invalid form: {form!r}")
- def _laplacian_dense(graph, normed, axis, copy, form, dtype, symmetrized):
- if form != "array":
- raise ValueError(f'{form!r} must be "array"')
- if dtype is None:
- dtype = graph.dtype
- if copy:
- m = np.array(graph)
- else:
- m = np.asarray(graph)
- if dtype is None:
- dtype = m.dtype
- if symmetrized:
- m += m.T.conj()
- np.fill_diagonal(m, 0)
- w = m.sum(axis=axis)
- if normed:
- isolated_node_mask = (w == 0)
- w = np.where(isolated_node_mask, 1, np.sqrt(w))
- m /= w
- m /= w[:, np.newaxis]
- m *= -1
- _setdiag_dense(m, 1 - isolated_node_mask)
- else:
- m *= -1
- _setdiag_dense(m, w)
- return m.astype(dtype, copy=False), w.astype(dtype, copy=False)
|