cuts.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. """Functions for finding and evaluating cuts in a graph.
  2. """
  3. from itertools import chain
  4. import networkx as nx
  5. __all__ = [
  6. "boundary_expansion",
  7. "conductance",
  8. "cut_size",
  9. "edge_expansion",
  10. "mixing_expansion",
  11. "node_expansion",
  12. "normalized_cut_size",
  13. "volume",
  14. ]
  15. # TODO STILL NEED TO UPDATE ALL THE DOCUMENTATION!
  16. @nx._dispatch
  17. def cut_size(G, S, T=None, weight=None):
  18. """Returns the size of the cut between two sets of nodes.
  19. A *cut* is a partition of the nodes of a graph into two sets. The
  20. *cut size* is the sum of the weights of the edges "between" the two
  21. sets of nodes.
  22. Parameters
  23. ----------
  24. G : NetworkX graph
  25. S : collection
  26. A collection of nodes in `G`.
  27. T : collection
  28. A collection of nodes in `G`. If not specified, this is taken to
  29. be the set complement of `S`.
  30. weight : object
  31. Edge attribute key to use as weight. If not specified, edges
  32. have weight one.
  33. Returns
  34. -------
  35. number
  36. Total weight of all edges from nodes in set `S` to nodes in
  37. set `T` (and, in the case of directed graphs, all edges from
  38. nodes in `T` to nodes in `S`).
  39. Examples
  40. --------
  41. In the graph with two cliques joined by a single edges, the natural
  42. bipartition of the graph into two blocks, one for each clique,
  43. yields a cut of weight one::
  44. >>> G = nx.barbell_graph(3, 0)
  45. >>> S = {0, 1, 2}
  46. >>> T = {3, 4, 5}
  47. >>> nx.cut_size(G, S, T)
  48. 1
  49. Each parallel edge in a multigraph is counted when determining the
  50. cut size::
  51. >>> G = nx.MultiGraph(["ab", "ab"])
  52. >>> S = {"a"}
  53. >>> T = {"b"}
  54. >>> nx.cut_size(G, S, T)
  55. 2
  56. Notes
  57. -----
  58. In a multigraph, the cut size is the total weight of edges including
  59. multiplicity.
  60. """
  61. edges = nx.edge_boundary(G, S, T, data=weight, default=1)
  62. if G.is_directed():
  63. edges = chain(edges, nx.edge_boundary(G, T, S, data=weight, default=1))
  64. return sum(weight for u, v, weight in edges)
  65. @nx._dispatch
  66. def volume(G, S, weight=None):
  67. """Returns the volume of a set of nodes.
  68. The *volume* of a set *S* is the sum of the (out-)degrees of nodes
  69. in *S* (taking into account parallel edges in multigraphs). [1]
  70. Parameters
  71. ----------
  72. G : NetworkX graph
  73. S : collection
  74. A collection of nodes in `G`.
  75. weight : object
  76. Edge attribute key to use as weight. If not specified, edges
  77. have weight one.
  78. Returns
  79. -------
  80. number
  81. The volume of the set of nodes represented by `S` in the graph
  82. `G`.
  83. See also
  84. --------
  85. conductance
  86. cut_size
  87. edge_expansion
  88. edge_boundary
  89. normalized_cut_size
  90. References
  91. ----------
  92. .. [1] David Gleich.
  93. *Hierarchical Directed Spectral Graph Partitioning*.
  94. <https://www.cs.purdue.edu/homes/dgleich/publications/Gleich%202005%20-%20hierarchical%20directed%20spectral.pdf>
  95. """
  96. degree = G.out_degree if G.is_directed() else G.degree
  97. return sum(d for v, d in degree(S, weight=weight))
  98. @nx._dispatch
  99. def normalized_cut_size(G, S, T=None, weight=None):
  100. """Returns the normalized size of the cut between two sets of nodes.
  101. The *normalized cut size* is the cut size times the sum of the
  102. reciprocal sizes of the volumes of the two sets. [1]
  103. Parameters
  104. ----------
  105. G : NetworkX graph
  106. S : collection
  107. A collection of nodes in `G`.
  108. T : collection
  109. A collection of nodes in `G`.
  110. weight : object
  111. Edge attribute key to use as weight. If not specified, edges
  112. have weight one.
  113. Returns
  114. -------
  115. number
  116. The normalized cut size between the two sets `S` and `T`.
  117. Notes
  118. -----
  119. In a multigraph, the cut size is the total weight of edges including
  120. multiplicity.
  121. See also
  122. --------
  123. conductance
  124. cut_size
  125. edge_expansion
  126. volume
  127. References
  128. ----------
  129. .. [1] David Gleich.
  130. *Hierarchical Directed Spectral Graph Partitioning*.
  131. <https://www.cs.purdue.edu/homes/dgleich/publications/Gleich%202005%20-%20hierarchical%20directed%20spectral.pdf>
  132. """
  133. if T is None:
  134. T = set(G) - set(S)
  135. num_cut_edges = cut_size(G, S, T=T, weight=weight)
  136. volume_S = volume(G, S, weight=weight)
  137. volume_T = volume(G, T, weight=weight)
  138. return num_cut_edges * ((1 / volume_S) + (1 / volume_T))
  139. @nx._dispatch
  140. def conductance(G, S, T=None, weight=None):
  141. """Returns the conductance of two sets of nodes.
  142. The *conductance* is the quotient of the cut size and the smaller of
  143. the volumes of the two sets. [1]
  144. Parameters
  145. ----------
  146. G : NetworkX graph
  147. S : collection
  148. A collection of nodes in `G`.
  149. T : collection
  150. A collection of nodes in `G`.
  151. weight : object
  152. Edge attribute key to use as weight. If not specified, edges
  153. have weight one.
  154. Returns
  155. -------
  156. number
  157. The conductance between the two sets `S` and `T`.
  158. See also
  159. --------
  160. cut_size
  161. edge_expansion
  162. normalized_cut_size
  163. volume
  164. References
  165. ----------
  166. .. [1] David Gleich.
  167. *Hierarchical Directed Spectral Graph Partitioning*.
  168. <https://www.cs.purdue.edu/homes/dgleich/publications/Gleich%202005%20-%20hierarchical%20directed%20spectral.pdf>
  169. """
  170. if T is None:
  171. T = set(G) - set(S)
  172. num_cut_edges = cut_size(G, S, T, weight=weight)
  173. volume_S = volume(G, S, weight=weight)
  174. volume_T = volume(G, T, weight=weight)
  175. return num_cut_edges / min(volume_S, volume_T)
  176. @nx._dispatch
  177. def edge_expansion(G, S, T=None, weight=None):
  178. """Returns the edge expansion between two node sets.
  179. The *edge expansion* is the quotient of the cut size and the smaller
  180. of the cardinalities of the two sets. [1]
  181. Parameters
  182. ----------
  183. G : NetworkX graph
  184. S : collection
  185. A collection of nodes in `G`.
  186. T : collection
  187. A collection of nodes in `G`.
  188. weight : object
  189. Edge attribute key to use as weight. If not specified, edges
  190. have weight one.
  191. Returns
  192. -------
  193. number
  194. The edge expansion between the two sets `S` and `T`.
  195. See also
  196. --------
  197. boundary_expansion
  198. mixing_expansion
  199. node_expansion
  200. References
  201. ----------
  202. .. [1] Fan Chung.
  203. *Spectral Graph Theory*.
  204. (CBMS Regional Conference Series in Mathematics, No. 92),
  205. American Mathematical Society, 1997, ISBN 0-8218-0315-8
  206. <http://www.math.ucsd.edu/~fan/research/revised.html>
  207. """
  208. if T is None:
  209. T = set(G) - set(S)
  210. num_cut_edges = cut_size(G, S, T=T, weight=weight)
  211. return num_cut_edges / min(len(S), len(T))
  212. @nx._dispatch
  213. def mixing_expansion(G, S, T=None, weight=None):
  214. """Returns the mixing expansion between two node sets.
  215. The *mixing expansion* is the quotient of the cut size and twice the
  216. number of edges in the graph. [1]
  217. Parameters
  218. ----------
  219. G : NetworkX graph
  220. S : collection
  221. A collection of nodes in `G`.
  222. T : collection
  223. A collection of nodes in `G`.
  224. weight : object
  225. Edge attribute key to use as weight. If not specified, edges
  226. have weight one.
  227. Returns
  228. -------
  229. number
  230. The mixing expansion between the two sets `S` and `T`.
  231. See also
  232. --------
  233. boundary_expansion
  234. edge_expansion
  235. node_expansion
  236. References
  237. ----------
  238. .. [1] Vadhan, Salil P.
  239. "Pseudorandomness."
  240. *Foundations and Trends
  241. in Theoretical Computer Science* 7.1–3 (2011): 1–336.
  242. <https://doi.org/10.1561/0400000010>
  243. """
  244. num_cut_edges = cut_size(G, S, T=T, weight=weight)
  245. num_total_edges = G.number_of_edges()
  246. return num_cut_edges / (2 * num_total_edges)
  247. # TODO What is the generalization to two arguments, S and T? Does the
  248. # denominator become `min(len(S), len(T))`?
  249. @nx._dispatch
  250. def node_expansion(G, S):
  251. """Returns the node expansion of the set `S`.
  252. The *node expansion* is the quotient of the size of the node
  253. boundary of *S* and the cardinality of *S*. [1]
  254. Parameters
  255. ----------
  256. G : NetworkX graph
  257. S : collection
  258. A collection of nodes in `G`.
  259. Returns
  260. -------
  261. number
  262. The node expansion of the set `S`.
  263. See also
  264. --------
  265. boundary_expansion
  266. edge_expansion
  267. mixing_expansion
  268. References
  269. ----------
  270. .. [1] Vadhan, Salil P.
  271. "Pseudorandomness."
  272. *Foundations and Trends
  273. in Theoretical Computer Science* 7.1–3 (2011): 1–336.
  274. <https://doi.org/10.1561/0400000010>
  275. """
  276. neighborhood = set(chain.from_iterable(G.neighbors(v) for v in S))
  277. return len(neighborhood) / len(S)
  278. # TODO What is the generalization to two arguments, S and T? Does the
  279. # denominator become `min(len(S), len(T))`?
  280. @nx._dispatch
  281. def boundary_expansion(G, S):
  282. """Returns the boundary expansion of the set `S`.
  283. The *boundary expansion* is the quotient of the size
  284. of the node boundary and the cardinality of *S*. [1]
  285. Parameters
  286. ----------
  287. G : NetworkX graph
  288. S : collection
  289. A collection of nodes in `G`.
  290. Returns
  291. -------
  292. number
  293. The boundary expansion of the set `S`.
  294. See also
  295. --------
  296. edge_expansion
  297. mixing_expansion
  298. node_expansion
  299. References
  300. ----------
  301. .. [1] Vadhan, Salil P.
  302. "Pseudorandomness."
  303. *Foundations and Trends in Theoretical Computer Science*
  304. 7.1–3 (2011): 1–336.
  305. <https://doi.org/10.1561/0400000010>
  306. """
  307. return len(nx.node_boundary(G, S)) / len(S)