lowest_common_ancestors.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. """Algorithms for finding the lowest common ancestor of trees and DAGs."""
  2. from collections import defaultdict
  3. from collections.abc import Mapping, Set
  4. from itertools import combinations_with_replacement
  5. import networkx as nx
  6. from networkx.utils import UnionFind, arbitrary_element, not_implemented_for
  7. __all__ = [
  8. "all_pairs_lowest_common_ancestor",
  9. "tree_all_pairs_lowest_common_ancestor",
  10. "lowest_common_ancestor",
  11. ]
  12. @not_implemented_for("undirected")
  13. def all_pairs_lowest_common_ancestor(G, pairs=None):
  14. """Return the lowest common ancestor of all pairs or the provided pairs
  15. Parameters
  16. ----------
  17. G : NetworkX directed graph
  18. pairs : iterable of pairs of nodes, optional (default: all pairs)
  19. The pairs of nodes of interest.
  20. If None, will find the LCA of all pairs of nodes.
  21. Yields
  22. ------
  23. ((node1, node2), lca) : 2-tuple
  24. Where lca is least common ancestor of node1 and node2.
  25. Note that for the default case, the order of the node pair is not considered,
  26. e.g. you will not get both ``(a, b)`` and ``(b, a)``
  27. Raises
  28. ------
  29. NetworkXPointlessConcept
  30. If `G` is null.
  31. NetworkXError
  32. If `G` is not a DAG.
  33. Examples
  34. --------
  35. The default behavior is to yield the lowest common ancestor for all
  36. possible combinations of nodes in `G`, including self-pairings:
  37. >>> G = nx.DiGraph([(0, 1), (0, 3), (1, 2)])
  38. >>> dict(nx.all_pairs_lowest_common_ancestor(G))
  39. {(0, 0): 0, (0, 1): 0, (0, 3): 0, (0, 2): 0, (1, 1): 1, (1, 3): 0, (1, 2): 1, (3, 3): 3, (3, 2): 0, (2, 2): 2}
  40. The pairs argument can be used to limit the output to only the
  41. specified node pairings:
  42. >>> dict(nx.all_pairs_lowest_common_ancestor(G, pairs=[(1, 2), (2, 3)]))
  43. {(1, 2): 1, (2, 3): 0}
  44. Notes
  45. -----
  46. Only defined on non-null directed acyclic graphs.
  47. See Also
  48. --------
  49. lowest_common_ancestor
  50. """
  51. if not nx.is_directed_acyclic_graph(G):
  52. raise nx.NetworkXError("LCA only defined on directed acyclic graphs.")
  53. if len(G) == 0:
  54. raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.")
  55. if pairs is None:
  56. pairs = combinations_with_replacement(G, 2)
  57. else:
  58. # Convert iterator to iterable, if necessary. Trim duplicates.
  59. pairs = dict.fromkeys(pairs)
  60. # Verify that each of the nodes in the provided pairs is in G
  61. nodeset = set(G)
  62. for pair in pairs:
  63. if set(pair) - nodeset:
  64. raise nx.NodeNotFound(
  65. f"Node(s) {set(pair) - nodeset} from pair {pair} not in G."
  66. )
  67. # Once input validation is done, construct the generator
  68. def generate_lca_from_pairs(G, pairs):
  69. ancestor_cache = {}
  70. for v, w in pairs:
  71. if v not in ancestor_cache:
  72. ancestor_cache[v] = nx.ancestors(G, v)
  73. ancestor_cache[v].add(v)
  74. if w not in ancestor_cache:
  75. ancestor_cache[w] = nx.ancestors(G, w)
  76. ancestor_cache[w].add(w)
  77. common_ancestors = ancestor_cache[v] & ancestor_cache[w]
  78. if common_ancestors:
  79. common_ancestor = next(iter(common_ancestors))
  80. while True:
  81. successor = None
  82. for lower_ancestor in G.successors(common_ancestor):
  83. if lower_ancestor in common_ancestors:
  84. successor = lower_ancestor
  85. break
  86. if successor is None:
  87. break
  88. common_ancestor = successor
  89. yield ((v, w), common_ancestor)
  90. return generate_lca_from_pairs(G, pairs)
  91. @not_implemented_for("undirected")
  92. def lowest_common_ancestor(G, node1, node2, default=None):
  93. """Compute the lowest common ancestor of the given pair of nodes.
  94. Parameters
  95. ----------
  96. G : NetworkX directed graph
  97. node1, node2 : nodes in the graph.
  98. default : object
  99. Returned if no common ancestor between `node1` and `node2`
  100. Returns
  101. -------
  102. The lowest common ancestor of node1 and node2,
  103. or default if they have no common ancestors.
  104. Examples
  105. --------
  106. >>> G = nx.DiGraph()
  107. >>> nx.add_path(G, (0, 1, 2, 3))
  108. >>> nx.add_path(G, (0, 4, 3))
  109. >>> nx.lowest_common_ancestor(G, 2, 4)
  110. 0
  111. See Also
  112. --------
  113. all_pairs_lowest_common_ancestor"""
  114. ans = list(all_pairs_lowest_common_ancestor(G, pairs=[(node1, node2)]))
  115. if ans:
  116. assert len(ans) == 1
  117. return ans[0][1]
  118. return default
  119. @not_implemented_for("undirected")
  120. def tree_all_pairs_lowest_common_ancestor(G, root=None, pairs=None):
  121. r"""Yield the lowest common ancestor for sets of pairs in a tree.
  122. Parameters
  123. ----------
  124. G : NetworkX directed graph (must be a tree)
  125. root : node, optional (default: None)
  126. The root of the subtree to operate on.
  127. If None, assume the entire graph has exactly one source and use that.
  128. pairs : iterable or iterator of pairs of nodes, optional (default: None)
  129. The pairs of interest. If None, Defaults to all pairs of nodes
  130. under `root` that have a lowest common ancestor.
  131. Returns
  132. -------
  133. lcas : generator of tuples `((u, v), lca)` where `u` and `v` are nodes
  134. in `pairs` and `lca` is their lowest common ancestor.
  135. Examples
  136. --------
  137. >>> import pprint
  138. >>> G = nx.DiGraph([(1, 3), (2, 4), (1, 2)])
  139. >>> pprint.pprint(dict(nx.tree_all_pairs_lowest_common_ancestor(G)))
  140. {(1, 1): 1,
  141. (2, 1): 1,
  142. (2, 2): 2,
  143. (3, 1): 1,
  144. (3, 2): 1,
  145. (3, 3): 3,
  146. (3, 4): 1,
  147. (4, 1): 1,
  148. (4, 2): 2,
  149. (4, 4): 4}
  150. We can also use `pairs` argument to specify the pairs of nodes for which we
  151. want to compute lowest common ancestors. Here is an example:
  152. >>> dict(nx.tree_all_pairs_lowest_common_ancestor(G, pairs=[(1, 4), (2, 3)]))
  153. {(2, 3): 1, (1, 4): 1}
  154. Notes
  155. -----
  156. Only defined on non-null trees represented with directed edges from
  157. parents to children. Uses Tarjan's off-line lowest-common-ancestors
  158. algorithm. Runs in time $O(4 \times (V + E + P))$ time, where 4 is the largest
  159. value of the inverse Ackermann function likely to ever come up in actual
  160. use, and $P$ is the number of pairs requested (or $V^2$ if all are needed).
  161. Tarjan, R. E. (1979), "Applications of path compression on balanced trees",
  162. Journal of the ACM 26 (4): 690-715, doi:10.1145/322154.322161.
  163. See Also
  164. --------
  165. all_pairs_lowest_common_ancestor: similar routine for general DAGs
  166. lowest_common_ancestor: just a single pair for general DAGs
  167. """
  168. if len(G) == 0:
  169. raise nx.NetworkXPointlessConcept("LCA meaningless on null graphs.")
  170. # Index pairs of interest for efficient lookup from either side.
  171. if pairs is not None:
  172. pair_dict = defaultdict(set)
  173. # See note on all_pairs_lowest_common_ancestor.
  174. if not isinstance(pairs, (Mapping, Set)):
  175. pairs = set(pairs)
  176. for u, v in pairs:
  177. for n in (u, v):
  178. if n not in G:
  179. msg = f"The node {str(n)} is not in the digraph."
  180. raise nx.NodeNotFound(msg)
  181. pair_dict[u].add(v)
  182. pair_dict[v].add(u)
  183. # If root is not specified, find the exactly one node with in degree 0 and
  184. # use it. Raise an error if none are found, or more than one is. Also check
  185. # for any nodes with in degree larger than 1, which would imply G is not a
  186. # tree.
  187. if root is None:
  188. for n, deg in G.in_degree:
  189. if deg == 0:
  190. if root is not None:
  191. msg = "No root specified and tree has multiple sources."
  192. raise nx.NetworkXError(msg)
  193. root = n
  194. # checking deg>1 is not sufficient for MultiDiGraphs
  195. elif deg > 1 and len(G.pred[n]) > 1:
  196. msg = "Tree LCA only defined on trees; use DAG routine."
  197. raise nx.NetworkXError(msg)
  198. if root is None:
  199. raise nx.NetworkXError("Graph contains a cycle.")
  200. # Iterative implementation of Tarjan's offline lca algorithm
  201. # as described in CLRS on page 521 (2nd edition)/page 584 (3rd edition)
  202. uf = UnionFind()
  203. ancestors = {}
  204. for node in G:
  205. ancestors[node] = uf[node]
  206. colors = defaultdict(bool)
  207. for node in nx.dfs_postorder_nodes(G, root):
  208. colors[node] = True
  209. for v in pair_dict[node] if pairs is not None else G:
  210. if colors[v]:
  211. # If the user requested both directions of a pair, give it.
  212. # Otherwise, just give one.
  213. if pairs is not None and (node, v) in pairs:
  214. yield (node, v), ancestors[uf[v]]
  215. if pairs is None or (v, node) in pairs:
  216. yield (v, node), ancestors[uf[v]]
  217. if node != root:
  218. parent = arbitrary_element(G.pred[node])
  219. uf.union(parent, node)
  220. ancestors[uf[parent]] = parent