123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340 |
- """Current-flow betweenness centrality measures."""
- import networkx as nx
- from networkx.algorithms.centrality.flow_matrix import (
- CGInverseLaplacian,
- FullInverseLaplacian,
- SuperLUInverseLaplacian,
- flow_matrix_row,
- )
- from networkx.utils import (
- not_implemented_for,
- py_random_state,
- reverse_cuthill_mckee_ordering,
- )
- __all__ = [
- "current_flow_betweenness_centrality",
- "approximate_current_flow_betweenness_centrality",
- "edge_current_flow_betweenness_centrality",
- ]
- @py_random_state(7)
- @not_implemented_for("directed")
- def approximate_current_flow_betweenness_centrality(
- G,
- normalized=True,
- weight=None,
- dtype=float,
- solver="full",
- epsilon=0.5,
- kmax=10000,
- seed=None,
- ):
- r"""Compute the approximate current-flow betweenness centrality for nodes.
- Approximates the current-flow betweenness centrality within absolute
- error of epsilon with high probability [1]_.
- Parameters
- ----------
- G : graph
- A NetworkX graph
- normalized : bool, optional (default=True)
- If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
- n is the number of nodes in G.
- weight : string or None, optional (default=None)
- Key for edge data used as the edge weight.
- If None, then use 1 as each edge weight.
- The weight reflects the capacity or the strength of the
- edge.
- dtype : data type (float)
- Default data type for internal matrices.
- Set to np.float32 for lower memory consumption.
- solver : string (default='full')
- Type of linear solver to use for computing the flow matrix.
- Options are "full" (uses most memory), "lu" (recommended), and
- "cg" (uses least memory).
- epsilon: float
- Absolute error tolerance.
- kmax: int
- Maximum number of sample node pairs to use for approximation.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Returns
- -------
- nodes : dictionary
- Dictionary of nodes with betweenness centrality as the value.
- See Also
- --------
- current_flow_betweenness_centrality
- Notes
- -----
- The running time is $O((1/\epsilon^2)m{\sqrt k} \log n)$
- and the space required is $O(m)$ for $n$ nodes and $m$ edges.
- If the edges have a 'weight' attribute they will be used as
- weights in this algorithm. Unspecified weights are set to 1.
- References
- ----------
- .. [1] Ulrik Brandes and Daniel Fleischer:
- Centrality Measures Based on Current Flow.
- Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
- LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
- https://doi.org/10.1007/978-3-540-31856-9_44
- """
- import numpy as np
- if not nx.is_connected(G):
- raise nx.NetworkXError("Graph not connected.")
- solvername = {
- "full": FullInverseLaplacian,
- "lu": SuperLUInverseLaplacian,
- "cg": CGInverseLaplacian,
- }
- n = G.number_of_nodes()
- ordering = list(reverse_cuthill_mckee_ordering(G))
- # make a copy with integer labels according to rcm ordering
- # this could be done without a copy if we really wanted to
- H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
- L = nx.laplacian_matrix(H, nodelist=range(n), weight=weight).asformat("csc")
- L = L.astype(dtype)
- C = solvername[solver](L, dtype=dtype) # initialize solver
- betweenness = dict.fromkeys(H, 0.0)
- nb = (n - 1.0) * (n - 2.0) # normalization factor
- cstar = n * (n - 1) / nb
- l = 1 # parameter in approximation, adjustable
- k = l * int(np.ceil((cstar / epsilon) ** 2 * np.log(n)))
- if k > kmax:
- msg = f"Number random pairs k>kmax ({k}>{kmax}) "
- raise nx.NetworkXError(msg, "Increase kmax or epsilon")
- cstar2k = cstar / (2 * k)
- for _ in range(k):
- s, t = pair = seed.sample(range(n), 2)
- b = np.zeros(n, dtype=dtype)
- b[s] = 1
- b[t] = -1
- p = C.solve(b)
- for v in H:
- if v in pair:
- continue
- for nbr in H[v]:
- w = H[v][nbr].get(weight, 1.0)
- betweenness[v] += w * np.abs(p[v] - p[nbr]) * cstar2k
- if normalized:
- factor = 1.0
- else:
- factor = nb / 2.0
- # remap to original node names and "unnormalize" if required
- return {ordering[k]: v * factor for k, v in betweenness.items()}
- @not_implemented_for("directed")
- def current_flow_betweenness_centrality(
- G, normalized=True, weight=None, dtype=float, solver="full"
- ):
- r"""Compute current-flow betweenness centrality for nodes.
- Current-flow betweenness centrality uses an electrical current
- model for information spreading in contrast to betweenness
- centrality which uses shortest paths.
- Current-flow betweenness centrality is also known as
- random-walk betweenness centrality [2]_.
- Parameters
- ----------
- G : graph
- A NetworkX graph
- normalized : bool, optional (default=True)
- If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
- n is the number of nodes in G.
- weight : string or None, optional (default=None)
- Key for edge data used as the edge weight.
- If None, then use 1 as each edge weight.
- The weight reflects the capacity or the strength of the
- edge.
- dtype : data type (float)
- Default data type for internal matrices.
- Set to np.float32 for lower memory consumption.
- solver : string (default='full')
- Type of linear solver to use for computing the flow matrix.
- Options are "full" (uses most memory), "lu" (recommended), and
- "cg" (uses least memory).
- Returns
- -------
- nodes : dictionary
- Dictionary of nodes with betweenness centrality as the value.
- See Also
- --------
- approximate_current_flow_betweenness_centrality
- betweenness_centrality
- edge_betweenness_centrality
- edge_current_flow_betweenness_centrality
- Notes
- -----
- Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
- time [1]_, where $I(n-1)$ is the time needed to compute the
- inverse Laplacian. For a full matrix this is $O(n^3)$ but using
- sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
- Laplacian matrix condition number.
- The space required is $O(nw)$ where $w$ is the width of the sparse
- Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
- If the edges have a 'weight' attribute they will be used as
- weights in this algorithm. Unspecified weights are set to 1.
- References
- ----------
- .. [1] Centrality Measures Based on Current Flow.
- Ulrik Brandes and Daniel Fleischer,
- Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
- LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
- https://doi.org/10.1007/978-3-540-31856-9_44
- .. [2] A measure of betweenness centrality based on random walks,
- M. E. J. Newman, Social Networks 27, 39-54 (2005).
- """
- if not nx.is_connected(G):
- raise nx.NetworkXError("Graph not connected.")
- n = G.number_of_nodes()
- ordering = list(reverse_cuthill_mckee_ordering(G))
- # make a copy with integer labels according to rcm ordering
- # this could be done without a copy if we really wanted to
- H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
- betweenness = dict.fromkeys(H, 0.0) # b[v]=0 for v in H
- for row, (s, t) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
- pos = dict(zip(row.argsort()[::-1], range(n)))
- for i in range(n):
- betweenness[s] += (i - pos[i]) * row[i]
- betweenness[t] += (n - i - 1 - pos[i]) * row[i]
- if normalized:
- nb = (n - 1.0) * (n - 2.0) # normalization factor
- else:
- nb = 2.0
- for v in H:
- betweenness[v] = float((betweenness[v] - v) * 2.0 / nb)
- return {ordering[k]: v for k, v in betweenness.items()}
- @not_implemented_for("directed")
- def edge_current_flow_betweenness_centrality(
- G, normalized=True, weight=None, dtype=float, solver="full"
- ):
- r"""Compute current-flow betweenness centrality for edges.
- Current-flow betweenness centrality uses an electrical current
- model for information spreading in contrast to betweenness
- centrality which uses shortest paths.
- Current-flow betweenness centrality is also known as
- random-walk betweenness centrality [2]_.
- Parameters
- ----------
- G : graph
- A NetworkX graph
- normalized : bool, optional (default=True)
- If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
- n is the number of nodes in G.
- weight : string or None, optional (default=None)
- Key for edge data used as the edge weight.
- If None, then use 1 as each edge weight.
- The weight reflects the capacity or the strength of the
- edge.
- dtype : data type (default=float)
- Default data type for internal matrices.
- Set to np.float32 for lower memory consumption.
- solver : string (default='full')
- Type of linear solver to use for computing the flow matrix.
- Options are "full" (uses most memory), "lu" (recommended), and
- "cg" (uses least memory).
- Returns
- -------
- nodes : dictionary
- Dictionary of edge tuples with betweenness centrality as the value.
- Raises
- ------
- NetworkXError
- The algorithm does not support DiGraphs.
- If the input graph is an instance of DiGraph class, NetworkXError
- is raised.
- See Also
- --------
- betweenness_centrality
- edge_betweenness_centrality
- current_flow_betweenness_centrality
- Notes
- -----
- Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
- time [1]_, where $I(n-1)$ is the time needed to compute the
- inverse Laplacian. For a full matrix this is $O(n^3)$ but using
- sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
- Laplacian matrix condition number.
- The space required is $O(nw)$ where $w$ is the width of the sparse
- Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
- If the edges have a 'weight' attribute they will be used as
- weights in this algorithm. Unspecified weights are set to 1.
- References
- ----------
- .. [1] Centrality Measures Based on Current Flow.
- Ulrik Brandes and Daniel Fleischer,
- Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
- LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
- https://doi.org/10.1007/978-3-540-31856-9_44
- .. [2] A measure of betweenness centrality based on random walks,
- M. E. J. Newman, Social Networks 27, 39-54 (2005).
- """
- if not nx.is_connected(G):
- raise nx.NetworkXError("Graph not connected.")
- n = G.number_of_nodes()
- ordering = list(reverse_cuthill_mckee_ordering(G))
- # make a copy with integer labels according to rcm ordering
- # this could be done without a copy if we really wanted to
- H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
- edges = (tuple(sorted((u, v))) for u, v in H.edges())
- betweenness = dict.fromkeys(edges, 0.0)
- if normalized:
- nb = (n - 1.0) * (n - 2.0) # normalization factor
- else:
- nb = 2.0
- for row, (e) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
- pos = dict(zip(row.argsort()[::-1], range(1, n + 1)))
- for i in range(n):
- betweenness[e] += (i + 1 - pos[i]) * row[i]
- betweenness[e] += (n - i - pos[i]) * row[i]
- betweenness[e] /= nb
- return {(ordering[s], ordering[t]): v for (s, t), v in betweenness.items()}
|