betweenness_subset.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. """Betweenness centrality measures for subsets of nodes."""
  2. from networkx.algorithms.centrality.betweenness import (
  3. _add_edge_keys,
  4. )
  5. from networkx.algorithms.centrality.betweenness import (
  6. _single_source_dijkstra_path_basic as dijkstra,
  7. )
  8. from networkx.algorithms.centrality.betweenness import (
  9. _single_source_shortest_path_basic as shortest_path,
  10. )
  11. __all__ = [
  12. "betweenness_centrality_subset",
  13. "edge_betweenness_centrality_subset",
  14. ]
  15. def betweenness_centrality_subset(G, sources, targets, normalized=False, weight=None):
  16. r"""Compute betweenness centrality for a subset of nodes.
  17. .. math::
  18. c_B(v) =\sum_{s\in S, t \in T} \frac{\sigma(s, t|v)}{\sigma(s, t)}
  19. where $S$ is the set of sources, $T$ is the set of targets,
  20. $\sigma(s, t)$ is the number of shortest $(s, t)$-paths,
  21. and $\sigma(s, t|v)$ is the number of those paths
  22. passing through some node $v$ other than $s, t$.
  23. If $s = t$, $\sigma(s, t) = 1$,
  24. and if $v \in {s, t}$, $\sigma(s, t|v) = 0$ [2]_.
  25. Parameters
  26. ----------
  27. G : graph
  28. A NetworkX graph.
  29. sources: list of nodes
  30. Nodes to use as sources for shortest paths in betweenness
  31. targets: list of nodes
  32. Nodes to use as targets for shortest paths in betweenness
  33. normalized : bool, optional
  34. If True the betweenness values are normalized by $2/((n-1)(n-2))$
  35. for graphs, and $1/((n-1)(n-2))$ for directed graphs where $n$
  36. is the number of nodes in G.
  37. weight : None or string, optional (default=None)
  38. If None, all edge weights are considered equal.
  39. Otherwise holds the name of the edge attribute used as weight.
  40. Weights are used to calculate weighted shortest paths, so they are
  41. interpreted as distances.
  42. Returns
  43. -------
  44. nodes : dictionary
  45. Dictionary of nodes with betweenness centrality as the value.
  46. See Also
  47. --------
  48. edge_betweenness_centrality
  49. load_centrality
  50. Notes
  51. -----
  52. The basic algorithm is from [1]_.
  53. For weighted graphs the edge weights must be greater than zero.
  54. Zero edge weights can produce an infinite number of equal length
  55. paths between pairs of nodes.
  56. The normalization might seem a little strange but it is
  57. designed to make betweenness_centrality(G) be the same as
  58. betweenness_centrality_subset(G,sources=G.nodes(),targets=G.nodes()).
  59. The total number of paths between source and target is counted
  60. differently for directed and undirected graphs. Directed paths
  61. are easy to count. Undirected paths are tricky: should a path
  62. from "u" to "v" count as 1 undirected path or as 2 directed paths?
  63. For betweenness_centrality we report the number of undirected
  64. paths when G is undirected.
  65. For betweenness_centrality_subset the reporting is different.
  66. If the source and target subsets are the same, then we want
  67. to count undirected paths. But if the source and target subsets
  68. differ -- for example, if sources is {0} and targets is {1},
  69. then we are only counting the paths in one direction. They are
  70. undirected paths but we are counting them in a directed way.
  71. To count them as undirected paths, each should count as half a path.
  72. References
  73. ----------
  74. .. [1] Ulrik Brandes, A Faster Algorithm for Betweenness Centrality.
  75. Journal of Mathematical Sociology 25(2):163-177, 2001.
  76. https://doi.org/10.1080/0022250X.2001.9990249
  77. .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness
  78. Centrality and their Generic Computation.
  79. Social Networks 30(2):136-145, 2008.
  80. https://doi.org/10.1016/j.socnet.2007.11.001
  81. """
  82. b = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
  83. for s in sources:
  84. # single source shortest paths
  85. if weight is None: # use BFS
  86. S, P, sigma, _ = shortest_path(G, s)
  87. else: # use Dijkstra's algorithm
  88. S, P, sigma, _ = dijkstra(G, s, weight)
  89. b = _accumulate_subset(b, S, P, sigma, s, targets)
  90. b = _rescale(b, len(G), normalized=normalized, directed=G.is_directed())
  91. return b
  92. def edge_betweenness_centrality_subset(
  93. G, sources, targets, normalized=False, weight=None
  94. ):
  95. r"""Compute betweenness centrality for edges for a subset of nodes.
  96. .. math::
  97. c_B(v) =\sum_{s\in S,t \in T} \frac{\sigma(s, t|e)}{\sigma(s, t)}
  98. where $S$ is the set of sources, $T$ is the set of targets,
  99. $\sigma(s, t)$ is the number of shortest $(s, t)$-paths,
  100. and $\sigma(s, t|e)$ is the number of those paths
  101. passing through edge $e$ [2]_.
  102. Parameters
  103. ----------
  104. G : graph
  105. A networkx graph.
  106. sources: list of nodes
  107. Nodes to use as sources for shortest paths in betweenness
  108. targets: list of nodes
  109. Nodes to use as targets for shortest paths in betweenness
  110. normalized : bool, optional
  111. If True the betweenness values are normalized by `2/(n(n-1))`
  112. for graphs, and `1/(n(n-1))` for directed graphs where `n`
  113. is the number of nodes in G.
  114. weight : None or string, optional (default=None)
  115. If None, all edge weights are considered equal.
  116. Otherwise holds the name of the edge attribute used as weight.
  117. Weights are used to calculate weighted shortest paths, so they are
  118. interpreted as distances.
  119. Returns
  120. -------
  121. edges : dictionary
  122. Dictionary of edges with Betweenness centrality as the value.
  123. See Also
  124. --------
  125. betweenness_centrality
  126. edge_load
  127. Notes
  128. -----
  129. The basic algorithm is from [1]_.
  130. For weighted graphs the edge weights must be greater than zero.
  131. Zero edge weights can produce an infinite number of equal length
  132. paths between pairs of nodes.
  133. The normalization might seem a little strange but it is the same
  134. as in edge_betweenness_centrality() and is designed to make
  135. edge_betweenness_centrality(G) be the same as
  136. edge_betweenness_centrality_subset(G,sources=G.nodes(),targets=G.nodes()).
  137. References
  138. ----------
  139. .. [1] Ulrik Brandes, A Faster Algorithm for Betweenness Centrality.
  140. Journal of Mathematical Sociology 25(2):163-177, 2001.
  141. https://doi.org/10.1080/0022250X.2001.9990249
  142. .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness
  143. Centrality and their Generic Computation.
  144. Social Networks 30(2):136-145, 2008.
  145. https://doi.org/10.1016/j.socnet.2007.11.001
  146. """
  147. b = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
  148. b.update(dict.fromkeys(G.edges(), 0.0)) # b[e] for e in G.edges()
  149. for s in sources:
  150. # single source shortest paths
  151. if weight is None: # use BFS
  152. S, P, sigma, _ = shortest_path(G, s)
  153. else: # use Dijkstra's algorithm
  154. S, P, sigma, _ = dijkstra(G, s, weight)
  155. b = _accumulate_edges_subset(b, S, P, sigma, s, targets)
  156. for n in G: # remove nodes to only return edges
  157. del b[n]
  158. b = _rescale_e(b, len(G), normalized=normalized, directed=G.is_directed())
  159. if G.is_multigraph():
  160. b = _add_edge_keys(G, b, weight=weight)
  161. return b
  162. def _accumulate_subset(betweenness, S, P, sigma, s, targets):
  163. delta = dict.fromkeys(S, 0.0)
  164. target_set = set(targets) - {s}
  165. while S:
  166. w = S.pop()
  167. if w in target_set:
  168. coeff = (delta[w] + 1.0) / sigma[w]
  169. else:
  170. coeff = delta[w] / sigma[w]
  171. for v in P[w]:
  172. delta[v] += sigma[v] * coeff
  173. if w != s:
  174. betweenness[w] += delta[w]
  175. return betweenness
  176. def _accumulate_edges_subset(betweenness, S, P, sigma, s, targets):
  177. """edge_betweenness_centrality_subset helper."""
  178. delta = dict.fromkeys(S, 0)
  179. target_set = set(targets)
  180. while S:
  181. w = S.pop()
  182. for v in P[w]:
  183. if w in target_set:
  184. c = (sigma[v] / sigma[w]) * (1.0 + delta[w])
  185. else:
  186. c = delta[w] / len(P[w])
  187. if (v, w) not in betweenness:
  188. betweenness[(w, v)] += c
  189. else:
  190. betweenness[(v, w)] += c
  191. delta[v] += c
  192. if w != s:
  193. betweenness[w] += delta[w]
  194. return betweenness
  195. def _rescale(betweenness, n, normalized, directed=False):
  196. """betweenness_centrality_subset helper."""
  197. if normalized:
  198. if n <= 2:
  199. scale = None # no normalization b=0 for all nodes
  200. else:
  201. scale = 1.0 / ((n - 1) * (n - 2))
  202. else: # rescale by 2 for undirected graphs
  203. if not directed:
  204. scale = 0.5
  205. else:
  206. scale = None
  207. if scale is not None:
  208. for v in betweenness:
  209. betweenness[v] *= scale
  210. return betweenness
  211. def _rescale_e(betweenness, n, normalized, directed=False):
  212. """edge_betweenness_centrality_subset helper."""
  213. if normalized:
  214. if n <= 1:
  215. scale = None # no normalization b=0 for all nodes
  216. else:
  217. scale = 1.0 / (n * (n - 1))
  218. else: # rescale by 2 for undirected graphs
  219. if not directed:
  220. scale = 0.5
  221. else:
  222. scale = None
  223. if scale is not None:
  224. for v in betweenness:
  225. betweenness[v] *= scale
  226. return betweenness