current_flow_betweenness.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. """Current-flow betweenness centrality measures."""
  2. import networkx as nx
  3. from networkx.algorithms.centrality.flow_matrix import (
  4. CGInverseLaplacian,
  5. FullInverseLaplacian,
  6. SuperLUInverseLaplacian,
  7. flow_matrix_row,
  8. )
  9. from networkx.utils import (
  10. not_implemented_for,
  11. py_random_state,
  12. reverse_cuthill_mckee_ordering,
  13. )
  14. __all__ = [
  15. "current_flow_betweenness_centrality",
  16. "approximate_current_flow_betweenness_centrality",
  17. "edge_current_flow_betweenness_centrality",
  18. ]
  19. @py_random_state(7)
  20. @not_implemented_for("directed")
  21. def approximate_current_flow_betweenness_centrality(
  22. G,
  23. normalized=True,
  24. weight=None,
  25. dtype=float,
  26. solver="full",
  27. epsilon=0.5,
  28. kmax=10000,
  29. seed=None,
  30. ):
  31. r"""Compute the approximate current-flow betweenness centrality for nodes.
  32. Approximates the current-flow betweenness centrality within absolute
  33. error of epsilon with high probability [1]_.
  34. Parameters
  35. ----------
  36. G : graph
  37. A NetworkX graph
  38. normalized : bool, optional (default=True)
  39. If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
  40. n is the number of nodes in G.
  41. weight : string or None, optional (default=None)
  42. Key for edge data used as the edge weight.
  43. If None, then use 1 as each edge weight.
  44. The weight reflects the capacity or the strength of the
  45. edge.
  46. dtype : data type (float)
  47. Default data type for internal matrices.
  48. Set to np.float32 for lower memory consumption.
  49. solver : string (default='full')
  50. Type of linear solver to use for computing the flow matrix.
  51. Options are "full" (uses most memory), "lu" (recommended), and
  52. "cg" (uses least memory).
  53. epsilon: float
  54. Absolute error tolerance.
  55. kmax: int
  56. Maximum number of sample node pairs to use for approximation.
  57. seed : integer, random_state, or None (default)
  58. Indicator of random number generation state.
  59. See :ref:`Randomness<randomness>`.
  60. Returns
  61. -------
  62. nodes : dictionary
  63. Dictionary of nodes with betweenness centrality as the value.
  64. See Also
  65. --------
  66. current_flow_betweenness_centrality
  67. Notes
  68. -----
  69. The running time is $O((1/\epsilon^2)m{\sqrt k} \log n)$
  70. and the space required is $O(m)$ for $n$ nodes and $m$ edges.
  71. If the edges have a 'weight' attribute they will be used as
  72. weights in this algorithm. Unspecified weights are set to 1.
  73. References
  74. ----------
  75. .. [1] Ulrik Brandes and Daniel Fleischer:
  76. Centrality Measures Based on Current Flow.
  77. Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
  78. LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
  79. https://doi.org/10.1007/978-3-540-31856-9_44
  80. """
  81. import numpy as np
  82. if not nx.is_connected(G):
  83. raise nx.NetworkXError("Graph not connected.")
  84. solvername = {
  85. "full": FullInverseLaplacian,
  86. "lu": SuperLUInverseLaplacian,
  87. "cg": CGInverseLaplacian,
  88. }
  89. n = G.number_of_nodes()
  90. ordering = list(reverse_cuthill_mckee_ordering(G))
  91. # make a copy with integer labels according to rcm ordering
  92. # this could be done without a copy if we really wanted to
  93. H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
  94. L = nx.laplacian_matrix(H, nodelist=range(n), weight=weight).asformat("csc")
  95. L = L.astype(dtype)
  96. C = solvername[solver](L, dtype=dtype) # initialize solver
  97. betweenness = dict.fromkeys(H, 0.0)
  98. nb = (n - 1.0) * (n - 2.0) # normalization factor
  99. cstar = n * (n - 1) / nb
  100. l = 1 # parameter in approximation, adjustable
  101. k = l * int(np.ceil((cstar / epsilon) ** 2 * np.log(n)))
  102. if k > kmax:
  103. msg = f"Number random pairs k>kmax ({k}>{kmax}) "
  104. raise nx.NetworkXError(msg, "Increase kmax or epsilon")
  105. cstar2k = cstar / (2 * k)
  106. for _ in range(k):
  107. s, t = pair = seed.sample(range(n), 2)
  108. b = np.zeros(n, dtype=dtype)
  109. b[s] = 1
  110. b[t] = -1
  111. p = C.solve(b)
  112. for v in H:
  113. if v in pair:
  114. continue
  115. for nbr in H[v]:
  116. w = H[v][nbr].get(weight, 1.0)
  117. betweenness[v] += w * np.abs(p[v] - p[nbr]) * cstar2k
  118. if normalized:
  119. factor = 1.0
  120. else:
  121. factor = nb / 2.0
  122. # remap to original node names and "unnormalize" if required
  123. return {ordering[k]: v * factor for k, v in betweenness.items()}
  124. @not_implemented_for("directed")
  125. def current_flow_betweenness_centrality(
  126. G, normalized=True, weight=None, dtype=float, solver="full"
  127. ):
  128. r"""Compute current-flow betweenness centrality for nodes.
  129. Current-flow betweenness centrality uses an electrical current
  130. model for information spreading in contrast to betweenness
  131. centrality which uses shortest paths.
  132. Current-flow betweenness centrality is also known as
  133. random-walk betweenness centrality [2]_.
  134. Parameters
  135. ----------
  136. G : graph
  137. A NetworkX graph
  138. normalized : bool, optional (default=True)
  139. If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
  140. n is the number of nodes in G.
  141. weight : string or None, optional (default=None)
  142. Key for edge data used as the edge weight.
  143. If None, then use 1 as each edge weight.
  144. The weight reflects the capacity or the strength of the
  145. edge.
  146. dtype : data type (float)
  147. Default data type for internal matrices.
  148. Set to np.float32 for lower memory consumption.
  149. solver : string (default='full')
  150. Type of linear solver to use for computing the flow matrix.
  151. Options are "full" (uses most memory), "lu" (recommended), and
  152. "cg" (uses least memory).
  153. Returns
  154. -------
  155. nodes : dictionary
  156. Dictionary of nodes with betweenness centrality as the value.
  157. See Also
  158. --------
  159. approximate_current_flow_betweenness_centrality
  160. betweenness_centrality
  161. edge_betweenness_centrality
  162. edge_current_flow_betweenness_centrality
  163. Notes
  164. -----
  165. Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
  166. time [1]_, where $I(n-1)$ is the time needed to compute the
  167. inverse Laplacian. For a full matrix this is $O(n^3)$ but using
  168. sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
  169. Laplacian matrix condition number.
  170. The space required is $O(nw)$ where $w$ is the width of the sparse
  171. Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
  172. If the edges have a 'weight' attribute they will be used as
  173. weights in this algorithm. Unspecified weights are set to 1.
  174. References
  175. ----------
  176. .. [1] Centrality Measures Based on Current Flow.
  177. Ulrik Brandes and Daniel Fleischer,
  178. Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
  179. LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
  180. https://doi.org/10.1007/978-3-540-31856-9_44
  181. .. [2] A measure of betweenness centrality based on random walks,
  182. M. E. J. Newman, Social Networks 27, 39-54 (2005).
  183. """
  184. if not nx.is_connected(G):
  185. raise nx.NetworkXError("Graph not connected.")
  186. n = G.number_of_nodes()
  187. ordering = list(reverse_cuthill_mckee_ordering(G))
  188. # make a copy with integer labels according to rcm ordering
  189. # this could be done without a copy if we really wanted to
  190. H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
  191. betweenness = dict.fromkeys(H, 0.0) # b[v]=0 for v in H
  192. for row, (s, t) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
  193. pos = dict(zip(row.argsort()[::-1], range(n)))
  194. for i in range(n):
  195. betweenness[s] += (i - pos[i]) * row[i]
  196. betweenness[t] += (n - i - 1 - pos[i]) * row[i]
  197. if normalized:
  198. nb = (n - 1.0) * (n - 2.0) # normalization factor
  199. else:
  200. nb = 2.0
  201. for v in H:
  202. betweenness[v] = float((betweenness[v] - v) * 2.0 / nb)
  203. return {ordering[k]: v for k, v in betweenness.items()}
  204. @not_implemented_for("directed")
  205. def edge_current_flow_betweenness_centrality(
  206. G, normalized=True, weight=None, dtype=float, solver="full"
  207. ):
  208. r"""Compute current-flow betweenness centrality for edges.
  209. Current-flow betweenness centrality uses an electrical current
  210. model for information spreading in contrast to betweenness
  211. centrality which uses shortest paths.
  212. Current-flow betweenness centrality is also known as
  213. random-walk betweenness centrality [2]_.
  214. Parameters
  215. ----------
  216. G : graph
  217. A NetworkX graph
  218. normalized : bool, optional (default=True)
  219. If True the betweenness values are normalized by 2/[(n-1)(n-2)] where
  220. n is the number of nodes in G.
  221. weight : string or None, optional (default=None)
  222. Key for edge data used as the edge weight.
  223. If None, then use 1 as each edge weight.
  224. The weight reflects the capacity or the strength of the
  225. edge.
  226. dtype : data type (default=float)
  227. Default data type for internal matrices.
  228. Set to np.float32 for lower memory consumption.
  229. solver : string (default='full')
  230. Type of linear solver to use for computing the flow matrix.
  231. Options are "full" (uses most memory), "lu" (recommended), and
  232. "cg" (uses least memory).
  233. Returns
  234. -------
  235. nodes : dictionary
  236. Dictionary of edge tuples with betweenness centrality as the value.
  237. Raises
  238. ------
  239. NetworkXError
  240. The algorithm does not support DiGraphs.
  241. If the input graph is an instance of DiGraph class, NetworkXError
  242. is raised.
  243. See Also
  244. --------
  245. betweenness_centrality
  246. edge_betweenness_centrality
  247. current_flow_betweenness_centrality
  248. Notes
  249. -----
  250. Current-flow betweenness can be computed in $O(I(n-1)+mn \log n)$
  251. time [1]_, where $I(n-1)$ is the time needed to compute the
  252. inverse Laplacian. For a full matrix this is $O(n^3)$ but using
  253. sparse methods you can achieve $O(nm{\sqrt k})$ where $k$ is the
  254. Laplacian matrix condition number.
  255. The space required is $O(nw)$ where $w$ is the width of the sparse
  256. Laplacian matrix. Worse case is $w=n$ for $O(n^2)$.
  257. If the edges have a 'weight' attribute they will be used as
  258. weights in this algorithm. Unspecified weights are set to 1.
  259. References
  260. ----------
  261. .. [1] Centrality Measures Based on Current Flow.
  262. Ulrik Brandes and Daniel Fleischer,
  263. Proc. 22nd Symp. Theoretical Aspects of Computer Science (STACS '05).
  264. LNCS 3404, pp. 533-544. Springer-Verlag, 2005.
  265. https://doi.org/10.1007/978-3-540-31856-9_44
  266. .. [2] A measure of betweenness centrality based on random walks,
  267. M. E. J. Newman, Social Networks 27, 39-54 (2005).
  268. """
  269. if not nx.is_connected(G):
  270. raise nx.NetworkXError("Graph not connected.")
  271. n = G.number_of_nodes()
  272. ordering = list(reverse_cuthill_mckee_ordering(G))
  273. # make a copy with integer labels according to rcm ordering
  274. # this could be done without a copy if we really wanted to
  275. H = nx.relabel_nodes(G, dict(zip(ordering, range(n))))
  276. edges = (tuple(sorted((u, v))) for u, v in H.edges())
  277. betweenness = dict.fromkeys(edges, 0.0)
  278. if normalized:
  279. nb = (n - 1.0) * (n - 2.0) # normalization factor
  280. else:
  281. nb = 2.0
  282. for row, (e) in flow_matrix_row(H, weight=weight, dtype=dtype, solver=solver):
  283. pos = dict(zip(row.argsort()[::-1], range(1, n + 1)))
  284. for i in range(n):
  285. betweenness[e] += (i + 1 - pos[i]) * row[i]
  286. betweenness[e] += (n - i - pos[i]) * row[i]
  287. betweenness[e] /= nb
  288. return {(ordering[s], ordering[t]): v for (s, t), v in betweenness.items()}