123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430 |
- """Betweenness centrality measures."""
- from collections import deque
- from heapq import heappop, heappush
- from itertools import count
- import networkx as nx
- from networkx.algorithms.shortest_paths.weighted import _weight_function
- from networkx.utils import py_random_state
- from networkx.utils.decorators import not_implemented_for
- __all__ = ["betweenness_centrality", "edge_betweenness_centrality"]
- @nx._dispatch
- @py_random_state(5)
- def betweenness_centrality(
- G, k=None, normalized=True, weight=None, endpoints=False, seed=None
- ):
- r"""Compute the shortest-path betweenness centrality for nodes.
- Betweenness centrality of a node $v$ is the sum of the
- fraction of all-pairs shortest paths that pass through $v$
- .. math::
- c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
- where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
- shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of
- those paths passing through some node $v$ other than $s, t$.
- If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,
- $\sigma(s, t|v) = 0$ [2]_.
- Parameters
- ----------
- G : graph
- A NetworkX graph.
- k : int, optional (default=None)
- If k is not None use k node samples to estimate betweenness.
- The value of k <= n where n is the number of nodes in the graph.
- Higher values give better approximation.
- normalized : bool, optional
- If True the betweenness values are normalized by `2/((n-1)(n-2))`
- for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
- is the number of nodes in G.
- weight : None or string, optional (default=None)
- If None, all edge weights are considered equal.
- Otherwise holds the name of the edge attribute used as weight.
- Weights are used to calculate weighted shortest paths, so they are
- interpreted as distances.
- endpoints : bool, optional
- If True include the endpoints in the shortest path counts.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Note that this is only used if k is not None.
- Returns
- -------
- nodes : dictionary
- Dictionary of nodes with betweenness centrality as the value.
- See Also
- --------
- edge_betweenness_centrality
- load_centrality
- Notes
- -----
- The algorithm is from Ulrik Brandes [1]_.
- See [4]_ for the original first published version and [2]_ for details on
- algorithms for variations and related metrics.
- For approximate betweenness calculations set k=#samples to use
- k nodes ("pivots") to estimate the betweenness values. For an estimate
- of the number of pivots needed see [3]_.
- For weighted graphs the edge weights must be greater than zero.
- Zero edge weights can produce an infinite number of equal length
- paths between pairs of nodes.
- The total number of paths between source and target is counted
- differently for directed and undirected graphs. Directed paths
- are easy to count. Undirected paths are tricky: should a path
- from "u" to "v" count as 1 undirected path or as 2 directed paths?
- For betweenness_centrality we report the number of undirected
- paths when G is undirected.
- For betweenness_centrality_subset the reporting is different.
- If the source and target subsets are the same, then we want
- to count undirected paths. But if the source and target subsets
- differ -- for example, if sources is {0} and targets is {1},
- then we are only counting the paths in one direction. They are
- undirected paths but we are counting them in a directed way.
- To count them as undirected paths, each should count as half a path.
- References
- ----------
- .. [1] Ulrik Brandes:
- A Faster Algorithm for Betweenness Centrality.
- Journal of Mathematical Sociology 25(2):163-177, 2001.
- https://doi.org/10.1080/0022250X.2001.9990249
- .. [2] Ulrik Brandes:
- On Variants of Shortest-Path Betweenness
- Centrality and their Generic Computation.
- Social Networks 30(2):136-145, 2008.
- https://doi.org/10.1016/j.socnet.2007.11.001
- .. [3] Ulrik Brandes and Christian Pich:
- Centrality Estimation in Large Networks.
- International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
- https://dx.doi.org/10.1142/S0218127407018403
- .. [4] Linton C. Freeman:
- A set of measures of centrality based on betweenness.
- Sociometry 40: 35–41, 1977
- https://doi.org/10.2307/3033543
- """
- betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
- if k is None:
- nodes = G
- else:
- nodes = seed.sample(list(G.nodes()), k)
- for s in nodes:
- # single source shortest paths
- if weight is None: # use BFS
- S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
- else: # use Dijkstra's algorithm
- S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
- # accumulation
- if endpoints:
- betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s)
- else:
- betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s)
- # rescaling
- betweenness = _rescale(
- betweenness,
- len(G),
- normalized=normalized,
- directed=G.is_directed(),
- k=k,
- endpoints=endpoints,
- )
- return betweenness
- @nx._dispatch
- @py_random_state(4)
- def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):
- r"""Compute betweenness centrality for edges.
- Betweenness centrality of an edge $e$ is the sum of the
- fraction of all-pairs shortest paths that pass through $e$
- .. math::
- c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}
- where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
- shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of
- those paths passing through edge $e$ [2]_.
- Parameters
- ----------
- G : graph
- A NetworkX graph.
- k : int, optional (default=None)
- If k is not None use k node samples to estimate betweenness.
- The value of k <= n where n is the number of nodes in the graph.
- Higher values give better approximation.
- normalized : bool, optional
- If True the betweenness values are normalized by $2/(n(n-1))$
- for graphs, and $1/(n(n-1))$ for directed graphs where $n$
- is the number of nodes in G.
- weight : None or string, optional (default=None)
- If None, all edge weights are considered equal.
- Otherwise holds the name of the edge attribute used as weight.
- Weights are used to calculate weighted shortest paths, so they are
- interpreted as distances.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Note that this is only used if k is not None.
- Returns
- -------
- edges : dictionary
- Dictionary of edges with betweenness centrality as the value.
- See Also
- --------
- betweenness_centrality
- edge_load
- Notes
- -----
- The algorithm is from Ulrik Brandes [1]_.
- For weighted graphs the edge weights must be greater than zero.
- Zero edge weights can produce an infinite number of equal length
- paths between pairs of nodes.
- References
- ----------
- .. [1] A Faster Algorithm for Betweenness Centrality. Ulrik Brandes,
- Journal of Mathematical Sociology 25(2):163-177, 2001.
- https://doi.org/10.1080/0022250X.2001.9990249
- .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness
- Centrality and their Generic Computation.
- Social Networks 30(2):136-145, 2008.
- https://doi.org/10.1016/j.socnet.2007.11.001
- """
- betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
- # b[e]=0 for e in G.edges()
- betweenness.update(dict.fromkeys(G.edges(), 0.0))
- if k is None:
- nodes = G
- else:
- nodes = seed.sample(list(G.nodes()), k)
- for s in nodes:
- # single source shortest paths
- if weight is None: # use BFS
- S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
- else: # use Dijkstra's algorithm
- S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
- # accumulation
- betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
- # rescaling
- for n in G: # remove nodes to only return edges
- del betweenness[n]
- betweenness = _rescale_e(
- betweenness, len(G), normalized=normalized, directed=G.is_directed()
- )
- if G.is_multigraph():
- betweenness = _add_edge_keys(G, betweenness, weight=weight)
- return betweenness
- # helpers for betweenness centrality
- def _single_source_shortest_path_basic(G, s):
- S = []
- P = {}
- for v in G:
- P[v] = []
- sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
- D = {}
- sigma[s] = 1.0
- D[s] = 0
- Q = deque([s])
- while Q: # use BFS to find shortest paths
- v = Q.popleft()
- S.append(v)
- Dv = D[v]
- sigmav = sigma[v]
- for w in G[v]:
- if w not in D:
- Q.append(w)
- D[w] = Dv + 1
- if D[w] == Dv + 1: # this is a shortest path, count paths
- sigma[w] += sigmav
- P[w].append(v) # predecessors
- return S, P, sigma, D
- def _single_source_dijkstra_path_basic(G, s, weight):
- weight = _weight_function(G, weight)
- # modified from Eppstein
- S = []
- P = {}
- for v in G:
- P[v] = []
- sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
- D = {}
- sigma[s] = 1.0
- push = heappush
- pop = heappop
- seen = {s: 0}
- c = count()
- Q = [] # use Q as heap with (distance,node id) tuples
- push(Q, (0, next(c), s, s))
- while Q:
- (dist, _, pred, v) = pop(Q)
- if v in D:
- continue # already searched this node.
- sigma[v] += sigma[pred] # count paths
- S.append(v)
- D[v] = dist
- for w, edgedata in G[v].items():
- vw_dist = dist + weight(v, w, edgedata)
- if w not in D and (w not in seen or vw_dist < seen[w]):
- seen[w] = vw_dist
- push(Q, (vw_dist, next(c), v, w))
- sigma[w] = 0.0
- P[w] = [v]
- elif vw_dist == seen[w]: # handle equal paths
- sigma[w] += sigma[v]
- P[w].append(v)
- return S, P, sigma, D
- def _accumulate_basic(betweenness, S, P, sigma, s):
- delta = dict.fromkeys(S, 0)
- while S:
- w = S.pop()
- coeff = (1 + delta[w]) / sigma[w]
- for v in P[w]:
- delta[v] += sigma[v] * coeff
- if w != s:
- betweenness[w] += delta[w]
- return betweenness, delta
- def _accumulate_endpoints(betweenness, S, P, sigma, s):
- betweenness[s] += len(S) - 1
- delta = dict.fromkeys(S, 0)
- while S:
- w = S.pop()
- coeff = (1 + delta[w]) / sigma[w]
- for v in P[w]:
- delta[v] += sigma[v] * coeff
- if w != s:
- betweenness[w] += delta[w] + 1
- return betweenness, delta
- def _accumulate_edges(betweenness, S, P, sigma, s):
- delta = dict.fromkeys(S, 0)
- while S:
- w = S.pop()
- coeff = (1 + delta[w]) / sigma[w]
- for v in P[w]:
- c = sigma[v] * coeff
- if (v, w) not in betweenness:
- betweenness[(w, v)] += c
- else:
- betweenness[(v, w)] += c
- delta[v] += c
- if w != s:
- betweenness[w] += delta[w]
- return betweenness
- def _rescale(betweenness, n, normalized, directed=False, k=None, endpoints=False):
- if normalized:
- if endpoints:
- if n < 2:
- scale = None # no normalization
- else:
- # Scale factor should include endpoint nodes
- scale = 1 / (n * (n - 1))
- elif n <= 2:
- scale = None # no normalization b=0 for all nodes
- else:
- scale = 1 / ((n - 1) * (n - 2))
- else: # rescale by 2 for undirected graphs
- if not directed:
- scale = 0.5
- else:
- scale = None
- if scale is not None:
- if k is not None:
- scale = scale * n / k
- for v in betweenness:
- betweenness[v] *= scale
- return betweenness
- def _rescale_e(betweenness, n, normalized, directed=False, k=None):
- if normalized:
- if n <= 1:
- scale = None # no normalization b=0 for all nodes
- else:
- scale = 1 / (n * (n - 1))
- else: # rescale by 2 for undirected graphs
- if not directed:
- scale = 0.5
- else:
- scale = None
- if scale is not None:
- if k is not None:
- scale = scale * n / k
- for v in betweenness:
- betweenness[v] *= scale
- return betweenness
- @not_implemented_for("graph")
- def _add_edge_keys(G, betweenness, weight=None):
- r"""Adds the corrected betweenness centrality (BC) values for multigraphs.
- Parameters
- ----------
- G : NetworkX graph.
- betweenness : dictionary
- Dictionary mapping adjacent node tuples to betweenness centrality values.
- weight : string or function
- See `_weight_function` for details. Defaults to `None`.
- Returns
- -------
- edges : dictionary
- The parameter `betweenness` including edges with keys and their
- betweenness centrality values.
- The BC value is divided among edges of equal weight.
- """
- _weight = _weight_function(G, weight)
- edge_bc = dict.fromkeys(G.edges, 0.0)
- for u, v in betweenness:
- d = G[u][v]
- wt = _weight(u, v, d)
- keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt]
- bc = betweenness[(u, v)] / len(keys)
- for k in keys:
- edge_bc[(u, v, k)] = bc
- return edge_bc
|