edge_kcomponents.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. """
  2. Algorithms for finding k-edge-connected components and subgraphs.
  3. A k-edge-connected component (k-edge-cc) is a maximal set of nodes in G, such
  4. that all pairs of node have an edge-connectivity of at least k.
  5. A k-edge-connected subgraph (k-edge-subgraph) is a maximal set of nodes in G,
  6. such that the subgraph of G defined by the nodes has an edge-connectivity at
  7. least k.
  8. """
  9. import itertools as it
  10. from functools import partial
  11. import networkx as nx
  12. from networkx.algorithms import bridges
  13. from networkx.utils import arbitrary_element, not_implemented_for
  14. __all__ = [
  15. "k_edge_components",
  16. "k_edge_subgraphs",
  17. "bridge_components",
  18. "EdgeComponentAuxGraph",
  19. ]
  20. @not_implemented_for("multigraph")
  21. def k_edge_components(G, k):
  22. """Generates nodes in each maximal k-edge-connected component in G.
  23. Parameters
  24. ----------
  25. G : NetworkX graph
  26. k : Integer
  27. Desired edge connectivity
  28. Returns
  29. -------
  30. k_edge_components : a generator of k-edge-ccs. Each set of returned nodes
  31. will have k-edge-connectivity in the graph G.
  32. See Also
  33. --------
  34. :func:`local_edge_connectivity`
  35. :func:`k_edge_subgraphs` : similar to this function, but the subgraph
  36. defined by the nodes must also have k-edge-connectivity.
  37. :func:`k_components` : similar to this function, but uses node-connectivity
  38. instead of edge-connectivity
  39. Raises
  40. ------
  41. NetworkXNotImplemented
  42. If the input graph is a multigraph.
  43. ValueError:
  44. If k is less than 1
  45. Notes
  46. -----
  47. Attempts to use the most efficient implementation available based on k.
  48. If k=1, this is simply connected components for directed graphs and
  49. connected components for undirected graphs.
  50. If k=2 on an efficient bridge connected component algorithm from _[1] is
  51. run based on the chain decomposition.
  52. Otherwise, the algorithm from _[2] is used.
  53. Examples
  54. --------
  55. >>> import itertools as it
  56. >>> from networkx.utils import pairwise
  57. >>> paths = [
  58. ... (1, 2, 4, 3, 1, 4),
  59. ... (5, 6, 7, 8, 5, 7, 8, 6),
  60. ... ]
  61. >>> G = nx.Graph()
  62. >>> G.add_nodes_from(it.chain(*paths))
  63. >>> G.add_edges_from(it.chain(*[pairwise(path) for path in paths]))
  64. >>> # note this returns {1, 4} unlike k_edge_subgraphs
  65. >>> sorted(map(sorted, nx.k_edge_components(G, k=3)))
  66. [[1, 4], [2], [3], [5, 6, 7, 8]]
  67. References
  68. ----------
  69. .. [1] https://en.wikipedia.org/wiki/Bridge_%28graph_theory%29
  70. .. [2] Wang, Tianhao, et al. (2015) A simple algorithm for finding all
  71. k-edge-connected components.
  72. http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0136264
  73. """
  74. # Compute k-edge-ccs using the most efficient algorithms available.
  75. if k < 1:
  76. raise ValueError("k cannot be less than 1")
  77. if G.is_directed():
  78. if k == 1:
  79. return nx.strongly_connected_components(G)
  80. else:
  81. # TODO: investigate https://arxiv.org/abs/1412.6466 for k=2
  82. aux_graph = EdgeComponentAuxGraph.construct(G)
  83. return aux_graph.k_edge_components(k)
  84. else:
  85. if k == 1:
  86. return nx.connected_components(G)
  87. elif k == 2:
  88. return bridge_components(G)
  89. else:
  90. aux_graph = EdgeComponentAuxGraph.construct(G)
  91. return aux_graph.k_edge_components(k)
  92. @not_implemented_for("multigraph")
  93. def k_edge_subgraphs(G, k):
  94. """Generates nodes in each maximal k-edge-connected subgraph in G.
  95. Parameters
  96. ----------
  97. G : NetworkX graph
  98. k : Integer
  99. Desired edge connectivity
  100. Returns
  101. -------
  102. k_edge_subgraphs : a generator of k-edge-subgraphs
  103. Each k-edge-subgraph is a maximal set of nodes that defines a subgraph
  104. of G that is k-edge-connected.
  105. See Also
  106. --------
  107. :func:`edge_connectivity`
  108. :func:`k_edge_components` : similar to this function, but nodes only
  109. need to have k-edge-connctivity within the graph G and the subgraphs
  110. might not be k-edge-connected.
  111. Raises
  112. ------
  113. NetworkXNotImplemented
  114. If the input graph is a multigraph.
  115. ValueError:
  116. If k is less than 1
  117. Notes
  118. -----
  119. Attempts to use the most efficient implementation available based on k.
  120. If k=1, or k=2 and the graph is undirected, then this simply calls
  121. `k_edge_components`. Otherwise the algorithm from _[1] is used.
  122. Examples
  123. --------
  124. >>> import itertools as it
  125. >>> from networkx.utils import pairwise
  126. >>> paths = [
  127. ... (1, 2, 4, 3, 1, 4),
  128. ... (5, 6, 7, 8, 5, 7, 8, 6),
  129. ... ]
  130. >>> G = nx.Graph()
  131. >>> G.add_nodes_from(it.chain(*paths))
  132. >>> G.add_edges_from(it.chain(*[pairwise(path) for path in paths]))
  133. >>> # note this does not return {1, 4} unlike k_edge_components
  134. >>> sorted(map(sorted, nx.k_edge_subgraphs(G, k=3)))
  135. [[1], [2], [3], [4], [5, 6, 7, 8]]
  136. References
  137. ----------
  138. .. [1] Zhou, Liu, et al. (2012) Finding maximal k-edge-connected subgraphs
  139. from a large graph. ACM International Conference on Extending Database
  140. Technology 2012 480-–491.
  141. https://openproceedings.org/2012/conf/edbt/ZhouLYLCL12.pdf
  142. """
  143. if k < 1:
  144. raise ValueError("k cannot be less than 1")
  145. if G.is_directed():
  146. if k <= 1:
  147. # For directed graphs ,
  148. # When k == 1, k-edge-ccs and k-edge-subgraphs are the same
  149. return k_edge_components(G, k)
  150. else:
  151. return _k_edge_subgraphs_nodes(G, k)
  152. else:
  153. if k <= 2:
  154. # For undirected graphs,
  155. # when k <= 2, k-edge-ccs and k-edge-subgraphs are the same
  156. return k_edge_components(G, k)
  157. else:
  158. return _k_edge_subgraphs_nodes(G, k)
  159. def _k_edge_subgraphs_nodes(G, k):
  160. """Helper to get the nodes from the subgraphs.
  161. This allows k_edge_subgraphs to return a generator.
  162. """
  163. for C in general_k_edge_subgraphs(G, k):
  164. yield set(C.nodes())
  165. @not_implemented_for("directed")
  166. @not_implemented_for("multigraph")
  167. def bridge_components(G):
  168. """Finds all bridge-connected components G.
  169. Parameters
  170. ----------
  171. G : NetworkX undirected graph
  172. Returns
  173. -------
  174. bridge_components : a generator of 2-edge-connected components
  175. See Also
  176. --------
  177. :func:`k_edge_subgraphs` : this function is a special case for an
  178. undirected graph where k=2.
  179. :func:`biconnected_components` : similar to this function, but is defined
  180. using 2-node-connectivity instead of 2-edge-connectivity.
  181. Raises
  182. ------
  183. NetworkXNotImplemented
  184. If the input graph is directed or a multigraph.
  185. Notes
  186. -----
  187. Bridge-connected components are also known as 2-edge-connected components.
  188. Examples
  189. --------
  190. >>> # The barbell graph with parameter zero has a single bridge
  191. >>> G = nx.barbell_graph(5, 0)
  192. >>> from networkx.algorithms.connectivity.edge_kcomponents import bridge_components
  193. >>> sorted(map(sorted, bridge_components(G)))
  194. [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]
  195. """
  196. H = G.copy()
  197. H.remove_edges_from(bridges(G))
  198. yield from nx.connected_components(H)
  199. class EdgeComponentAuxGraph:
  200. r"""A simple algorithm to find all k-edge-connected components in a graph.
  201. Constructing the auxiliary graph (which may take some time) allows for the
  202. k-edge-ccs to be found in linear time for arbitrary k.
  203. Notes
  204. -----
  205. This implementation is based on [1]_. The idea is to construct an auxiliary
  206. graph from which the k-edge-ccs can be extracted in linear time. The
  207. auxiliary graph is constructed in $O(|V|\cdot F)$ operations, where F is the
  208. complexity of max flow. Querying the components takes an additional $O(|V|)$
  209. operations. This algorithm can be slow for large graphs, but it handles an
  210. arbitrary k and works for both directed and undirected inputs.
  211. The undirected case for k=1 is exactly connected components.
  212. The undirected case for k=2 is exactly bridge connected components.
  213. The directed case for k=1 is exactly strongly connected components.
  214. References
  215. ----------
  216. .. [1] Wang, Tianhao, et al. (2015) A simple algorithm for finding all
  217. k-edge-connected components.
  218. http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0136264
  219. Examples
  220. --------
  221. >>> import itertools as it
  222. >>> from networkx.utils import pairwise
  223. >>> from networkx.algorithms.connectivity import EdgeComponentAuxGraph
  224. >>> # Build an interesting graph with multiple levels of k-edge-ccs
  225. >>> paths = [
  226. ... (1, 2, 3, 4, 1, 3, 4, 2), # a 3-edge-cc (a 4 clique)
  227. ... (5, 6, 7, 5), # a 2-edge-cc (a 3 clique)
  228. ... (1, 5), # combine first two ccs into a 1-edge-cc
  229. ... (0,), # add an additional disconnected 1-edge-cc
  230. ... ]
  231. >>> G = nx.Graph()
  232. >>> G.add_nodes_from(it.chain(*paths))
  233. >>> G.add_edges_from(it.chain(*[pairwise(path) for path in paths]))
  234. >>> # Constructing the AuxGraph takes about O(n ** 4)
  235. >>> aux_graph = EdgeComponentAuxGraph.construct(G)
  236. >>> # Once constructed, querying takes O(n)
  237. >>> sorted(map(sorted, aux_graph.k_edge_components(k=1)))
  238. [[0], [1, 2, 3, 4, 5, 6, 7]]
  239. >>> sorted(map(sorted, aux_graph.k_edge_components(k=2)))
  240. [[0], [1, 2, 3, 4], [5, 6, 7]]
  241. >>> sorted(map(sorted, aux_graph.k_edge_components(k=3)))
  242. [[0], [1, 2, 3, 4], [5], [6], [7]]
  243. >>> sorted(map(sorted, aux_graph.k_edge_components(k=4)))
  244. [[0], [1], [2], [3], [4], [5], [6], [7]]
  245. The auxiliary graph is primarily used for k-edge-ccs but it
  246. can also speed up the queries of k-edge-subgraphs by refining the
  247. search space.
  248. >>> import itertools as it
  249. >>> from networkx.utils import pairwise
  250. >>> from networkx.algorithms.connectivity import EdgeComponentAuxGraph
  251. >>> paths = [
  252. ... (1, 2, 4, 3, 1, 4),
  253. ... ]
  254. >>> G = nx.Graph()
  255. >>> G.add_nodes_from(it.chain(*paths))
  256. >>> G.add_edges_from(it.chain(*[pairwise(path) for path in paths]))
  257. >>> aux_graph = EdgeComponentAuxGraph.construct(G)
  258. >>> sorted(map(sorted, aux_graph.k_edge_subgraphs(k=3)))
  259. [[1], [2], [3], [4]]
  260. >>> sorted(map(sorted, aux_graph.k_edge_components(k=3)))
  261. [[1, 4], [2], [3]]
  262. """
  263. # @not_implemented_for('multigraph') # TODO: fix decor for classmethods
  264. @classmethod
  265. def construct(EdgeComponentAuxGraph, G):
  266. """Builds an auxiliary graph encoding edge-connectivity between nodes.
  267. Notes
  268. -----
  269. Given G=(V, E), initialize an empty auxiliary graph A.
  270. Choose an arbitrary source node s. Initialize a set N of available
  271. nodes (that can be used as the sink). The algorithm picks an
  272. arbitrary node t from N - {s}, and then computes the minimum st-cut
  273. (S, T) with value w. If G is directed the minimum of the st-cut or
  274. the ts-cut is used instead. Then, the edge (s, t) is added to the
  275. auxiliary graph with weight w. The algorithm is called recursively
  276. first using S as the available nodes and s as the source, and then
  277. using T and t. Recursion stops when the source is the only available
  278. node.
  279. Parameters
  280. ----------
  281. G : NetworkX graph
  282. """
  283. # workaround for classmethod decorator
  284. not_implemented_for("multigraph")(lambda G: G)(G)
  285. def _recursive_build(H, A, source, avail):
  286. # Terminate once the flow has been compute to every node.
  287. if {source} == avail:
  288. return
  289. # pick an arbitrary node as the sink
  290. sink = arbitrary_element(avail - {source})
  291. # find the minimum cut and its weight
  292. value, (S, T) = nx.minimum_cut(H, source, sink)
  293. if H.is_directed():
  294. # check if the reverse direction has a smaller cut
  295. value_, (T_, S_) = nx.minimum_cut(H, sink, source)
  296. if value_ < value:
  297. value, S, T = value_, S_, T_
  298. # add edge with weight of cut to the aux graph
  299. A.add_edge(source, sink, weight=value)
  300. # recursively call until all but one node is used
  301. _recursive_build(H, A, source, avail.intersection(S))
  302. _recursive_build(H, A, sink, avail.intersection(T))
  303. # Copy input to ensure all edges have unit capacity
  304. H = G.__class__()
  305. H.add_nodes_from(G.nodes())
  306. H.add_edges_from(G.edges(), capacity=1)
  307. # A is the auxiliary graph to be constructed
  308. # It is a weighted undirected tree
  309. A = nx.Graph()
  310. # Pick an arbitrary node as the source
  311. if H.number_of_nodes() > 0:
  312. source = arbitrary_element(H.nodes())
  313. # Initialize a set of elements that can be chosen as the sink
  314. avail = set(H.nodes())
  315. # This constructs A
  316. _recursive_build(H, A, source, avail)
  317. # This class is a container the holds the auxiliary graph A and
  318. # provides access the k_edge_components function.
  319. self = EdgeComponentAuxGraph()
  320. self.A = A
  321. self.H = H
  322. return self
  323. def k_edge_components(self, k):
  324. """Queries the auxiliary graph for k-edge-connected components.
  325. Parameters
  326. ----------
  327. k : Integer
  328. Desired edge connectivity
  329. Returns
  330. -------
  331. k_edge_components : a generator of k-edge-ccs
  332. Notes
  333. -----
  334. Given the auxiliary graph, the k-edge-connected components can be
  335. determined in linear time by removing all edges with weights less than
  336. k from the auxiliary graph. The resulting connected components are the
  337. k-edge-ccs in the original graph.
  338. """
  339. if k < 1:
  340. raise ValueError("k cannot be less than 1")
  341. A = self.A
  342. # "traverse the auxiliary graph A and delete all edges with weights less
  343. # than k"
  344. aux_weights = nx.get_edge_attributes(A, "weight")
  345. # Create a relevant graph with the auxiliary edges with weights >= k
  346. R = nx.Graph()
  347. R.add_nodes_from(A.nodes())
  348. R.add_edges_from(e for e, w in aux_weights.items() if w >= k)
  349. # Return the nodes that are k-edge-connected in the original graph
  350. yield from nx.connected_components(R)
  351. def k_edge_subgraphs(self, k):
  352. """Queries the auxiliary graph for k-edge-connected subgraphs.
  353. Parameters
  354. ----------
  355. k : Integer
  356. Desired edge connectivity
  357. Returns
  358. -------
  359. k_edge_subgraphs : a generator of k-edge-subgraphs
  360. Notes
  361. -----
  362. Refines the k-edge-ccs into k-edge-subgraphs. The running time is more
  363. than $O(|V|)$.
  364. For single values of k it is faster to use `nx.k_edge_subgraphs`.
  365. But for multiple values of k, it can be faster to build AuxGraph and
  366. then use this method.
  367. """
  368. if k < 1:
  369. raise ValueError("k cannot be less than 1")
  370. H = self.H
  371. A = self.A
  372. # "traverse the auxiliary graph A and delete all edges with weights less
  373. # than k"
  374. aux_weights = nx.get_edge_attributes(A, "weight")
  375. # Create a relevant graph with the auxiliary edges with weights >= k
  376. R = nx.Graph()
  377. R.add_nodes_from(A.nodes())
  378. R.add_edges_from(e for e, w in aux_weights.items() if w >= k)
  379. # Return the components whose subgraphs are k-edge-connected
  380. for cc in nx.connected_components(R):
  381. if len(cc) < k:
  382. # Early return optimization
  383. for node in cc:
  384. yield {node}
  385. else:
  386. # Call subgraph solution to refine the results
  387. C = H.subgraph(cc)
  388. yield from k_edge_subgraphs(C, k)
  389. def _low_degree_nodes(G, k, nbunch=None):
  390. """Helper for finding nodes with degree less than k."""
  391. # Nodes with degree less than k cannot be k-edge-connected.
  392. if G.is_directed():
  393. # Consider both in and out degree in the directed case
  394. seen = set()
  395. for node, degree in G.out_degree(nbunch):
  396. if degree < k:
  397. seen.add(node)
  398. yield node
  399. for node, degree in G.in_degree(nbunch):
  400. if node not in seen and degree < k:
  401. seen.add(node)
  402. yield node
  403. else:
  404. # Only the degree matters in the undirected case
  405. for node, degree in G.degree(nbunch):
  406. if degree < k:
  407. yield node
  408. def _high_degree_components(G, k):
  409. """Helper for filtering components that can't be k-edge-connected.
  410. Removes and generates each node with degree less than k. Then generates
  411. remaining components where all nodes have degree at least k.
  412. """
  413. # Iteravely remove parts of the graph that are not k-edge-connected
  414. H = G.copy()
  415. singletons = set(_low_degree_nodes(H, k))
  416. while singletons:
  417. # Only search neighbors of removed nodes
  418. nbunch = set(it.chain.from_iterable(map(H.neighbors, singletons)))
  419. nbunch.difference_update(singletons)
  420. H.remove_nodes_from(singletons)
  421. for node in singletons:
  422. yield {node}
  423. singletons = set(_low_degree_nodes(H, k, nbunch))
  424. # Note: remaining connected components may not be k-edge-connected
  425. if G.is_directed():
  426. yield from nx.strongly_connected_components(H)
  427. else:
  428. yield from nx.connected_components(H)
  429. def general_k_edge_subgraphs(G, k):
  430. """General algorithm to find all maximal k-edge-connected subgraphs in G.
  431. Returns
  432. -------
  433. k_edge_subgraphs : a generator of nx.Graphs that are k-edge-subgraphs
  434. Each k-edge-subgraph is a maximal set of nodes that defines a subgraph
  435. of G that is k-edge-connected.
  436. Notes
  437. -----
  438. Implementation of the basic algorithm from _[1]. The basic idea is to find
  439. a global minimum cut of the graph. If the cut value is at least k, then the
  440. graph is a k-edge-connected subgraph and can be added to the results.
  441. Otherwise, the cut is used to split the graph in two and the procedure is
  442. applied recursively. If the graph is just a single node, then it is also
  443. added to the results. At the end, each result is either guaranteed to be
  444. a single node or a subgraph of G that is k-edge-connected.
  445. This implementation contains optimizations for reducing the number of calls
  446. to max-flow, but there are other optimizations in _[1] that could be
  447. implemented.
  448. References
  449. ----------
  450. .. [1] Zhou, Liu, et al. (2012) Finding maximal k-edge-connected subgraphs
  451. from a large graph. ACM International Conference on Extending Database
  452. Technology 2012 480-–491.
  453. https://openproceedings.org/2012/conf/edbt/ZhouLYLCL12.pdf
  454. Examples
  455. --------
  456. >>> from networkx.utils import pairwise
  457. >>> paths = [
  458. ... (11, 12, 13, 14, 11, 13, 14, 12), # a 4-clique
  459. ... (21, 22, 23, 24, 21, 23, 24, 22), # another 4-clique
  460. ... # connect the cliques with high degree but low connectivity
  461. ... (50, 13),
  462. ... (12, 50, 22),
  463. ... (13, 102, 23),
  464. ... (14, 101, 24),
  465. ... ]
  466. >>> G = nx.Graph(it.chain(*[pairwise(path) for path in paths]))
  467. >>> sorted(map(len, k_edge_subgraphs(G, k=3)))
  468. [1, 1, 1, 4, 4]
  469. """
  470. if k < 1:
  471. raise ValueError("k cannot be less than 1")
  472. # Node pruning optimization (incorporates early return)
  473. # find_ccs is either connected_components/strongly_connected_components
  474. find_ccs = partial(_high_degree_components, k=k)
  475. # Quick return optimization
  476. if G.number_of_nodes() < k:
  477. for node in G.nodes():
  478. yield G.subgraph([node]).copy()
  479. return
  480. # Intermediate results
  481. R0 = {G.subgraph(cc).copy() for cc in find_ccs(G)}
  482. # Subdivide CCs in the intermediate results until they are k-conn
  483. while R0:
  484. G1 = R0.pop()
  485. if G1.number_of_nodes() == 1:
  486. yield G1
  487. else:
  488. # Find a global minimum cut
  489. cut_edges = nx.minimum_edge_cut(G1)
  490. cut_value = len(cut_edges)
  491. if cut_value < k:
  492. # G1 is not k-edge-connected, so subdivide it
  493. G1.remove_edges_from(cut_edges)
  494. for cc in find_ccs(G1):
  495. R0.add(G1.subgraph(cc).copy())
  496. else:
  497. # Otherwise we found a k-edge-connected subgraph
  498. yield G1