coding.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. """Functions for encoding and decoding trees.
  2. Since a tree is a highly restricted form of graph, it can be represented
  3. concisely in several ways. This module includes functions for encoding
  4. and decoding trees in the form of nested tuples and Prüfer
  5. sequences. The former requires a rooted tree, whereas the latter can be
  6. applied to unrooted trees. Furthermore, there is a bijection from Prüfer
  7. sequences to labeled trees.
  8. """
  9. from collections import Counter
  10. from itertools import chain
  11. import networkx as nx
  12. from networkx.utils import not_implemented_for
  13. __all__ = [
  14. "from_nested_tuple",
  15. "from_prufer_sequence",
  16. "NotATree",
  17. "to_nested_tuple",
  18. "to_prufer_sequence",
  19. ]
  20. class NotATree(nx.NetworkXException):
  21. """Raised when a function expects a tree (that is, a connected
  22. undirected graph with no cycles) but gets a non-tree graph as input
  23. instead.
  24. """
  25. @not_implemented_for("directed")
  26. def to_nested_tuple(T, root, canonical_form=False):
  27. """Returns a nested tuple representation of the given tree.
  28. The nested tuple representation of a tree is defined
  29. recursively. The tree with one node and no edges is represented by
  30. the empty tuple, ``()``. A tree with ``k`` subtrees is represented
  31. by a tuple of length ``k`` in which each element is the nested tuple
  32. representation of a subtree.
  33. Parameters
  34. ----------
  35. T : NetworkX graph
  36. An undirected graph object representing a tree.
  37. root : node
  38. The node in ``T`` to interpret as the root of the tree.
  39. canonical_form : bool
  40. If ``True``, each tuple is sorted so that the function returns
  41. a canonical form for rooted trees. This means "lighter" subtrees
  42. will appear as nested tuples before "heavier" subtrees. In this
  43. way, each isomorphic rooted tree has the same nested tuple
  44. representation.
  45. Returns
  46. -------
  47. tuple
  48. A nested tuple representation of the tree.
  49. Notes
  50. -----
  51. This function is *not* the inverse of :func:`from_nested_tuple`; the
  52. only guarantee is that the rooted trees are isomorphic.
  53. See also
  54. --------
  55. from_nested_tuple
  56. to_prufer_sequence
  57. Examples
  58. --------
  59. The tree need not be a balanced binary tree::
  60. >>> T = nx.Graph()
  61. >>> T.add_edges_from([(0, 1), (0, 2), (0, 3)])
  62. >>> T.add_edges_from([(1, 4), (1, 5)])
  63. >>> T.add_edges_from([(3, 6), (3, 7)])
  64. >>> root = 0
  65. >>> nx.to_nested_tuple(T, root)
  66. (((), ()), (), ((), ()))
  67. Continuing the above example, if ``canonical_form`` is ``True``, the
  68. nested tuples will be sorted::
  69. >>> nx.to_nested_tuple(T, root, canonical_form=True)
  70. ((), ((), ()), ((), ()))
  71. Even the path graph can be interpreted as a tree::
  72. >>> T = nx.path_graph(4)
  73. >>> root = 0
  74. >>> nx.to_nested_tuple(T, root)
  75. ((((),),),)
  76. """
  77. def _make_tuple(T, root, _parent):
  78. """Recursively compute the nested tuple representation of the
  79. given rooted tree.
  80. ``_parent`` is the parent node of ``root`` in the supertree in
  81. which ``T`` is a subtree, or ``None`` if ``root`` is the root of
  82. the supertree. This argument is used to determine which
  83. neighbors of ``root`` are children and which is the parent.
  84. """
  85. # Get the neighbors of `root` that are not the parent node. We
  86. # are guaranteed that `root` is always in `T` by construction.
  87. children = set(T[root]) - {_parent}
  88. if len(children) == 0:
  89. return ()
  90. nested = (_make_tuple(T, v, root) for v in children)
  91. if canonical_form:
  92. nested = sorted(nested)
  93. return tuple(nested)
  94. # Do some sanity checks on the input.
  95. if not nx.is_tree(T):
  96. raise nx.NotATree("provided graph is not a tree")
  97. if root not in T:
  98. raise nx.NodeNotFound(f"Graph {T} contains no node {root}")
  99. return _make_tuple(T, root, None)
  100. def from_nested_tuple(sequence, sensible_relabeling=False):
  101. """Returns the rooted tree corresponding to the given nested tuple.
  102. The nested tuple representation of a tree is defined
  103. recursively. The tree with one node and no edges is represented by
  104. the empty tuple, ``()``. A tree with ``k`` subtrees is represented
  105. by a tuple of length ``k`` in which each element is the nested tuple
  106. representation of a subtree.
  107. Parameters
  108. ----------
  109. sequence : tuple
  110. A nested tuple representing a rooted tree.
  111. sensible_relabeling : bool
  112. Whether to relabel the nodes of the tree so that nodes are
  113. labeled in increasing order according to their breadth-first
  114. search order from the root node.
  115. Returns
  116. -------
  117. NetworkX graph
  118. The tree corresponding to the given nested tuple, whose root
  119. node is node 0. If ``sensible_labeling`` is ``True``, nodes will
  120. be labeled in breadth-first search order starting from the root
  121. node.
  122. Notes
  123. -----
  124. This function is *not* the inverse of :func:`to_nested_tuple`; the
  125. only guarantee is that the rooted trees are isomorphic.
  126. See also
  127. --------
  128. to_nested_tuple
  129. from_prufer_sequence
  130. Examples
  131. --------
  132. Sensible relabeling ensures that the nodes are labeled from the root
  133. starting at 0::
  134. >>> balanced = (((), ()), ((), ()))
  135. >>> T = nx.from_nested_tuple(balanced, sensible_relabeling=True)
  136. >>> edges = [(0, 1), (0, 2), (1, 3), (1, 4), (2, 5), (2, 6)]
  137. >>> all((u, v) in T.edges() or (v, u) in T.edges() for (u, v) in edges)
  138. True
  139. """
  140. def _make_tree(sequence):
  141. """Recursively creates a tree from the given sequence of nested
  142. tuples.
  143. This function employs the :func:`~networkx.tree.join` function
  144. to recursively join subtrees into a larger tree.
  145. """
  146. # The empty sequence represents the empty tree, which is the
  147. # (unique) graph with a single node. We mark the single node
  148. # with an attribute that indicates that it is the root of the
  149. # graph.
  150. if len(sequence) == 0:
  151. return nx.empty_graph(1)
  152. # For a nonempty sequence, get the subtrees for each child
  153. # sequence and join all the subtrees at their roots. After
  154. # joining the subtrees, the root is node 0.
  155. return nx.tree.join([(_make_tree(child), 0) for child in sequence])
  156. # Make the tree and remove the `is_root` node attribute added by the
  157. # helper function.
  158. T = _make_tree(sequence)
  159. if sensible_relabeling:
  160. # Relabel the nodes according to their breadth-first search
  161. # order, starting from the root node (that is, the node 0).
  162. bfs_nodes = chain([0], (v for u, v in nx.bfs_edges(T, 0)))
  163. labels = {v: i for i, v in enumerate(bfs_nodes)}
  164. # We would like to use `copy=False`, but `relabel_nodes` doesn't
  165. # allow a relabel mapping that can't be topologically sorted.
  166. T = nx.relabel_nodes(T, labels)
  167. return T
  168. @not_implemented_for("directed")
  169. def to_prufer_sequence(T):
  170. r"""Returns the Prüfer sequence of the given tree.
  171. A *Prüfer sequence* is a list of *n* - 2 numbers between 0 and
  172. *n* - 1, inclusive. The tree corresponding to a given Prüfer
  173. sequence can be recovered by repeatedly joining a node in the
  174. sequence with a node with the smallest potential degree according to
  175. the sequence.
  176. Parameters
  177. ----------
  178. T : NetworkX graph
  179. An undirected graph object representing a tree.
  180. Returns
  181. -------
  182. list
  183. The Prüfer sequence of the given tree.
  184. Raises
  185. ------
  186. NetworkXPointlessConcept
  187. If the number of nodes in `T` is less than two.
  188. NotATree
  189. If `T` is not a tree.
  190. KeyError
  191. If the set of nodes in `T` is not {0, …, *n* - 1}.
  192. Notes
  193. -----
  194. There is a bijection from labeled trees to Prüfer sequences. This
  195. function is the inverse of the :func:`from_prufer_sequence`
  196. function.
  197. Sometimes Prüfer sequences use nodes labeled from 1 to *n* instead
  198. of from 0 to *n* - 1. This function requires nodes to be labeled in
  199. the latter form. You can use :func:`~networkx.relabel_nodes` to
  200. relabel the nodes of your tree to the appropriate format.
  201. This implementation is from [1]_ and has a running time of
  202. $O(n)$.
  203. See also
  204. --------
  205. to_nested_tuple
  206. from_prufer_sequence
  207. References
  208. ----------
  209. .. [1] Wang, Xiaodong, Lei Wang, and Yingjie Wu.
  210. "An optimal algorithm for Prufer codes."
  211. *Journal of Software Engineering and Applications* 2.02 (2009): 111.
  212. <https://doi.org/10.4236/jsea.2009.22016>
  213. Examples
  214. --------
  215. There is a bijection between Prüfer sequences and labeled trees, so
  216. this function is the inverse of the :func:`from_prufer_sequence`
  217. function:
  218. >>> edges = [(0, 3), (1, 3), (2, 3), (3, 4), (4, 5)]
  219. >>> tree = nx.Graph(edges)
  220. >>> sequence = nx.to_prufer_sequence(tree)
  221. >>> sequence
  222. [3, 3, 3, 4]
  223. >>> tree2 = nx.from_prufer_sequence(sequence)
  224. >>> list(tree2.edges()) == edges
  225. True
  226. """
  227. # Perform some sanity checks on the input.
  228. n = len(T)
  229. if n < 2:
  230. msg = "Prüfer sequence undefined for trees with fewer than two nodes"
  231. raise nx.NetworkXPointlessConcept(msg)
  232. if not nx.is_tree(T):
  233. raise nx.NotATree("provided graph is not a tree")
  234. if set(T) != set(range(n)):
  235. raise KeyError("tree must have node labels {0, ..., n - 1}")
  236. degree = dict(T.degree())
  237. def parents(u):
  238. return next(v for v in T[u] if degree[v] > 1)
  239. index = u = next(k for k in range(n) if degree[k] == 1)
  240. result = []
  241. for i in range(n - 2):
  242. v = parents(u)
  243. result.append(v)
  244. degree[v] -= 1
  245. if v < index and degree[v] == 1:
  246. u = v
  247. else:
  248. index = u = next(k for k in range(index + 1, n) if degree[k] == 1)
  249. return result
  250. def from_prufer_sequence(sequence):
  251. r"""Returns the tree corresponding to the given Prüfer sequence.
  252. A *Prüfer sequence* is a list of *n* - 2 numbers between 0 and
  253. *n* - 1, inclusive. The tree corresponding to a given Prüfer
  254. sequence can be recovered by repeatedly joining a node in the
  255. sequence with a node with the smallest potential degree according to
  256. the sequence.
  257. Parameters
  258. ----------
  259. sequence : list
  260. A Prüfer sequence, which is a list of *n* - 2 integers between
  261. zero and *n* - 1, inclusive.
  262. Returns
  263. -------
  264. NetworkX graph
  265. The tree corresponding to the given Prüfer sequence.
  266. Notes
  267. -----
  268. There is a bijection from labeled trees to Prüfer sequences. This
  269. function is the inverse of the :func:`from_prufer_sequence` function.
  270. Sometimes Prüfer sequences use nodes labeled from 1 to *n* instead
  271. of from 0 to *n* - 1. This function requires nodes to be labeled in
  272. the latter form. You can use :func:`networkx.relabel_nodes` to
  273. relabel the nodes of your tree to the appropriate format.
  274. This implementation is from [1]_ and has a running time of
  275. $O(n)$.
  276. References
  277. ----------
  278. .. [1] Wang, Xiaodong, Lei Wang, and Yingjie Wu.
  279. "An optimal algorithm for Prufer codes."
  280. *Journal of Software Engineering and Applications* 2.02 (2009): 111.
  281. <https://doi.org/10.4236/jsea.2009.22016>
  282. See also
  283. --------
  284. from_nested_tuple
  285. to_prufer_sequence
  286. Examples
  287. --------
  288. There is a bijection between Prüfer sequences and labeled trees, so
  289. this function is the inverse of the :func:`to_prufer_sequence`
  290. function:
  291. >>> edges = [(0, 3), (1, 3), (2, 3), (3, 4), (4, 5)]
  292. >>> tree = nx.Graph(edges)
  293. >>> sequence = nx.to_prufer_sequence(tree)
  294. >>> sequence
  295. [3, 3, 3, 4]
  296. >>> tree2 = nx.from_prufer_sequence(sequence)
  297. >>> list(tree2.edges()) == edges
  298. True
  299. """
  300. n = len(sequence) + 2
  301. # `degree` stores the remaining degree (plus one) for each node. The
  302. # degree of a node in the decoded tree is one more than the number
  303. # of times it appears in the code.
  304. degree = Counter(chain(sequence, range(n)))
  305. T = nx.empty_graph(n)
  306. # `not_orphaned` is the set of nodes that have a parent in the
  307. # tree. After the loop, there should be exactly two nodes that are
  308. # not in this set.
  309. not_orphaned = set()
  310. index = u = next(k for k in range(n) if degree[k] == 1)
  311. for v in sequence:
  312. T.add_edge(u, v)
  313. not_orphaned.add(u)
  314. degree[v] -= 1
  315. if v < index and degree[v] == 1:
  316. u = v
  317. else:
  318. index = u = next(k for k in range(index + 1, n) if degree[k] == 1)
  319. # At this point, there must be exactly two orphaned nodes; join them.
  320. orphans = set(T) - not_orphaned
  321. u, v = orphans
  322. T.add_edge(u, v)
  323. return T