betweenness.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. """Betweenness centrality measures."""
  2. from collections import deque
  3. from heapq import heappop, heappush
  4. from itertools import count
  5. import networkx as nx
  6. from networkx.algorithms.shortest_paths.weighted import _weight_function
  7. from networkx.utils import py_random_state
  8. from networkx.utils.decorators import not_implemented_for
  9. __all__ = ["betweenness_centrality", "edge_betweenness_centrality"]
  10. @nx._dispatch
  11. @py_random_state(5)
  12. def betweenness_centrality(
  13. G, k=None, normalized=True, weight=None, endpoints=False, seed=None
  14. ):
  15. r"""Compute the shortest-path betweenness centrality for nodes.
  16. Betweenness centrality of a node $v$ is the sum of the
  17. fraction of all-pairs shortest paths that pass through $v$
  18. .. math::
  19. c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
  20. where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
  21. shortest $(s, t)$-paths, and $\sigma(s, t|v)$ is the number of
  22. those paths passing through some node $v$ other than $s, t$.
  23. If $s = t$, $\sigma(s, t) = 1$, and if $v \in {s, t}$,
  24. $\sigma(s, t|v) = 0$ [2]_.
  25. Parameters
  26. ----------
  27. G : graph
  28. A NetworkX graph.
  29. k : int, optional (default=None)
  30. If k is not None use k node samples to estimate betweenness.
  31. The value of k <= n where n is the number of nodes in the graph.
  32. Higher values give better approximation.
  33. normalized : bool, optional
  34. If True the betweenness values are normalized by `2/((n-1)(n-2))`
  35. for graphs, and `1/((n-1)(n-2))` for directed graphs where `n`
  36. is the number of nodes in G.
  37. weight : None or string, optional (default=None)
  38. If None, all edge weights are considered equal.
  39. Otherwise holds the name of the edge attribute used as weight.
  40. Weights are used to calculate weighted shortest paths, so they are
  41. interpreted as distances.
  42. endpoints : bool, optional
  43. If True include the endpoints in the shortest path counts.
  44. seed : integer, random_state, or None (default)
  45. Indicator of random number generation state.
  46. See :ref:`Randomness<randomness>`.
  47. Note that this is only used if k is not None.
  48. Returns
  49. -------
  50. nodes : dictionary
  51. Dictionary of nodes with betweenness centrality as the value.
  52. See Also
  53. --------
  54. edge_betweenness_centrality
  55. load_centrality
  56. Notes
  57. -----
  58. The algorithm is from Ulrik Brandes [1]_.
  59. See [4]_ for the original first published version and [2]_ for details on
  60. algorithms for variations and related metrics.
  61. For approximate betweenness calculations set k=#samples to use
  62. k nodes ("pivots") to estimate the betweenness values. For an estimate
  63. of the number of pivots needed see [3]_.
  64. For weighted graphs the edge weights must be greater than zero.
  65. Zero edge weights can produce an infinite number of equal length
  66. paths between pairs of nodes.
  67. The total number of paths between source and target is counted
  68. differently for directed and undirected graphs. Directed paths
  69. are easy to count. Undirected paths are tricky: should a path
  70. from "u" to "v" count as 1 undirected path or as 2 directed paths?
  71. For betweenness_centrality we report the number of undirected
  72. paths when G is undirected.
  73. For betweenness_centrality_subset the reporting is different.
  74. If the source and target subsets are the same, then we want
  75. to count undirected paths. But if the source and target subsets
  76. differ -- for example, if sources is {0} and targets is {1},
  77. then we are only counting the paths in one direction. They are
  78. undirected paths but we are counting them in a directed way.
  79. To count them as undirected paths, each should count as half a path.
  80. References
  81. ----------
  82. .. [1] Ulrik Brandes:
  83. A Faster Algorithm for Betweenness Centrality.
  84. Journal of Mathematical Sociology 25(2):163-177, 2001.
  85. https://doi.org/10.1080/0022250X.2001.9990249
  86. .. [2] Ulrik Brandes:
  87. On Variants of Shortest-Path Betweenness
  88. Centrality and their Generic Computation.
  89. Social Networks 30(2):136-145, 2008.
  90. https://doi.org/10.1016/j.socnet.2007.11.001
  91. .. [3] Ulrik Brandes and Christian Pich:
  92. Centrality Estimation in Large Networks.
  93. International Journal of Bifurcation and Chaos 17(7):2303-2318, 2007.
  94. https://dx.doi.org/10.1142/S0218127407018403
  95. .. [4] Linton C. Freeman:
  96. A set of measures of centrality based on betweenness.
  97. Sociometry 40: 35–41, 1977
  98. https://doi.org/10.2307/3033543
  99. """
  100. betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
  101. if k is None:
  102. nodes = G
  103. else:
  104. nodes = seed.sample(list(G.nodes()), k)
  105. for s in nodes:
  106. # single source shortest paths
  107. if weight is None: # use BFS
  108. S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
  109. else: # use Dijkstra's algorithm
  110. S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
  111. # accumulation
  112. if endpoints:
  113. betweenness, _ = _accumulate_endpoints(betweenness, S, P, sigma, s)
  114. else:
  115. betweenness, _ = _accumulate_basic(betweenness, S, P, sigma, s)
  116. # rescaling
  117. betweenness = _rescale(
  118. betweenness,
  119. len(G),
  120. normalized=normalized,
  121. directed=G.is_directed(),
  122. k=k,
  123. endpoints=endpoints,
  124. )
  125. return betweenness
  126. @nx._dispatch
  127. @py_random_state(4)
  128. def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None):
  129. r"""Compute betweenness centrality for edges.
  130. Betweenness centrality of an edge $e$ is the sum of the
  131. fraction of all-pairs shortest paths that pass through $e$
  132. .. math::
  133. c_B(e) =\sum_{s,t \in V} \frac{\sigma(s, t|e)}{\sigma(s, t)}
  134. where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
  135. shortest $(s, t)$-paths, and $\sigma(s, t|e)$ is the number of
  136. those paths passing through edge $e$ [2]_.
  137. Parameters
  138. ----------
  139. G : graph
  140. A NetworkX graph.
  141. k : int, optional (default=None)
  142. If k is not None use k node samples to estimate betweenness.
  143. The value of k <= n where n is the number of nodes in the graph.
  144. Higher values give better approximation.
  145. normalized : bool, optional
  146. If True the betweenness values are normalized by $2/(n(n-1))$
  147. for graphs, and $1/(n(n-1))$ for directed graphs where $n$
  148. is the number of nodes in G.
  149. weight : None or string, optional (default=None)
  150. If None, all edge weights are considered equal.
  151. Otherwise holds the name of the edge attribute used as weight.
  152. Weights are used to calculate weighted shortest paths, so they are
  153. interpreted as distances.
  154. seed : integer, random_state, or None (default)
  155. Indicator of random number generation state.
  156. See :ref:`Randomness<randomness>`.
  157. Note that this is only used if k is not None.
  158. Returns
  159. -------
  160. edges : dictionary
  161. Dictionary of edges with betweenness centrality as the value.
  162. See Also
  163. --------
  164. betweenness_centrality
  165. edge_load
  166. Notes
  167. -----
  168. The algorithm is from Ulrik Brandes [1]_.
  169. For weighted graphs the edge weights must be greater than zero.
  170. Zero edge weights can produce an infinite number of equal length
  171. paths between pairs of nodes.
  172. References
  173. ----------
  174. .. [1] A Faster Algorithm for Betweenness Centrality. Ulrik Brandes,
  175. Journal of Mathematical Sociology 25(2):163-177, 2001.
  176. https://doi.org/10.1080/0022250X.2001.9990249
  177. .. [2] Ulrik Brandes: On Variants of Shortest-Path Betweenness
  178. Centrality and their Generic Computation.
  179. Social Networks 30(2):136-145, 2008.
  180. https://doi.org/10.1016/j.socnet.2007.11.001
  181. """
  182. betweenness = dict.fromkeys(G, 0.0) # b[v]=0 for v in G
  183. # b[e]=0 for e in G.edges()
  184. betweenness.update(dict.fromkeys(G.edges(), 0.0))
  185. if k is None:
  186. nodes = G
  187. else:
  188. nodes = seed.sample(list(G.nodes()), k)
  189. for s in nodes:
  190. # single source shortest paths
  191. if weight is None: # use BFS
  192. S, P, sigma, _ = _single_source_shortest_path_basic(G, s)
  193. else: # use Dijkstra's algorithm
  194. S, P, sigma, _ = _single_source_dijkstra_path_basic(G, s, weight)
  195. # accumulation
  196. betweenness = _accumulate_edges(betweenness, S, P, sigma, s)
  197. # rescaling
  198. for n in G: # remove nodes to only return edges
  199. del betweenness[n]
  200. betweenness = _rescale_e(
  201. betweenness, len(G), normalized=normalized, directed=G.is_directed()
  202. )
  203. if G.is_multigraph():
  204. betweenness = _add_edge_keys(G, betweenness, weight=weight)
  205. return betweenness
  206. # helpers for betweenness centrality
  207. def _single_source_shortest_path_basic(G, s):
  208. S = []
  209. P = {}
  210. for v in G:
  211. P[v] = []
  212. sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
  213. D = {}
  214. sigma[s] = 1.0
  215. D[s] = 0
  216. Q = deque([s])
  217. while Q: # use BFS to find shortest paths
  218. v = Q.popleft()
  219. S.append(v)
  220. Dv = D[v]
  221. sigmav = sigma[v]
  222. for w in G[v]:
  223. if w not in D:
  224. Q.append(w)
  225. D[w] = Dv + 1
  226. if D[w] == Dv + 1: # this is a shortest path, count paths
  227. sigma[w] += sigmav
  228. P[w].append(v) # predecessors
  229. return S, P, sigma, D
  230. def _single_source_dijkstra_path_basic(G, s, weight):
  231. weight = _weight_function(G, weight)
  232. # modified from Eppstein
  233. S = []
  234. P = {}
  235. for v in G:
  236. P[v] = []
  237. sigma = dict.fromkeys(G, 0.0) # sigma[v]=0 for v in G
  238. D = {}
  239. sigma[s] = 1.0
  240. push = heappush
  241. pop = heappop
  242. seen = {s: 0}
  243. c = count()
  244. Q = [] # use Q as heap with (distance,node id) tuples
  245. push(Q, (0, next(c), s, s))
  246. while Q:
  247. (dist, _, pred, v) = pop(Q)
  248. if v in D:
  249. continue # already searched this node.
  250. sigma[v] += sigma[pred] # count paths
  251. S.append(v)
  252. D[v] = dist
  253. for w, edgedata in G[v].items():
  254. vw_dist = dist + weight(v, w, edgedata)
  255. if w not in D and (w not in seen or vw_dist < seen[w]):
  256. seen[w] = vw_dist
  257. push(Q, (vw_dist, next(c), v, w))
  258. sigma[w] = 0.0
  259. P[w] = [v]
  260. elif vw_dist == seen[w]: # handle equal paths
  261. sigma[w] += sigma[v]
  262. P[w].append(v)
  263. return S, P, sigma, D
  264. def _accumulate_basic(betweenness, S, P, sigma, s):
  265. delta = dict.fromkeys(S, 0)
  266. while S:
  267. w = S.pop()
  268. coeff = (1 + delta[w]) / sigma[w]
  269. for v in P[w]:
  270. delta[v] += sigma[v] * coeff
  271. if w != s:
  272. betweenness[w] += delta[w]
  273. return betweenness, delta
  274. def _accumulate_endpoints(betweenness, S, P, sigma, s):
  275. betweenness[s] += len(S) - 1
  276. delta = dict.fromkeys(S, 0)
  277. while S:
  278. w = S.pop()
  279. coeff = (1 + delta[w]) / sigma[w]
  280. for v in P[w]:
  281. delta[v] += sigma[v] * coeff
  282. if w != s:
  283. betweenness[w] += delta[w] + 1
  284. return betweenness, delta
  285. def _accumulate_edges(betweenness, S, P, sigma, s):
  286. delta = dict.fromkeys(S, 0)
  287. while S:
  288. w = S.pop()
  289. coeff = (1 + delta[w]) / sigma[w]
  290. for v in P[w]:
  291. c = sigma[v] * coeff
  292. if (v, w) not in betweenness:
  293. betweenness[(w, v)] += c
  294. else:
  295. betweenness[(v, w)] += c
  296. delta[v] += c
  297. if w != s:
  298. betweenness[w] += delta[w]
  299. return betweenness
  300. def _rescale(betweenness, n, normalized, directed=False, k=None, endpoints=False):
  301. if normalized:
  302. if endpoints:
  303. if n < 2:
  304. scale = None # no normalization
  305. else:
  306. # Scale factor should include endpoint nodes
  307. scale = 1 / (n * (n - 1))
  308. elif n <= 2:
  309. scale = None # no normalization b=0 for all nodes
  310. else:
  311. scale = 1 / ((n - 1) * (n - 2))
  312. else: # rescale by 2 for undirected graphs
  313. if not directed:
  314. scale = 0.5
  315. else:
  316. scale = None
  317. if scale is not None:
  318. if k is not None:
  319. scale = scale * n / k
  320. for v in betweenness:
  321. betweenness[v] *= scale
  322. return betweenness
  323. def _rescale_e(betweenness, n, normalized, directed=False, k=None):
  324. if normalized:
  325. if n <= 1:
  326. scale = None # no normalization b=0 for all nodes
  327. else:
  328. scale = 1 / (n * (n - 1))
  329. else: # rescale by 2 for undirected graphs
  330. if not directed:
  331. scale = 0.5
  332. else:
  333. scale = None
  334. if scale is not None:
  335. if k is not None:
  336. scale = scale * n / k
  337. for v in betweenness:
  338. betweenness[v] *= scale
  339. return betweenness
  340. @not_implemented_for("graph")
  341. def _add_edge_keys(G, betweenness, weight=None):
  342. r"""Adds the corrected betweenness centrality (BC) values for multigraphs.
  343. Parameters
  344. ----------
  345. G : NetworkX graph.
  346. betweenness : dictionary
  347. Dictionary mapping adjacent node tuples to betweenness centrality values.
  348. weight : string or function
  349. See `_weight_function` for details. Defaults to `None`.
  350. Returns
  351. -------
  352. edges : dictionary
  353. The parameter `betweenness` including edges with keys and their
  354. betweenness centrality values.
  355. The BC value is divided among edges of equal weight.
  356. """
  357. _weight = _weight_function(G, weight)
  358. edge_bc = dict.fromkeys(G.edges, 0.0)
  359. for u, v in betweenness:
  360. d = G[u][v]
  361. wt = _weight(u, v, d)
  362. keys = [k for k in d if _weight(u, v, {k: d[k]}) == wt]
  363. bc = betweenness[(u, v)] / len(keys)
  364. for k in keys:
  365. edge_bc[(u, v, k)] = bc
  366. return edge_bc