123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374 |
- """Functions for generating trees."""
- from collections import defaultdict
- import networkx as nx
- from networkx.utils import py_random_state
- __all__ = ["prefix_tree", "random_tree", "prefix_tree_recursive"]
- def prefix_tree(paths):
- """Creates a directed prefix tree from a list of paths.
- Usually the paths are described as strings or lists of integers.
- A "prefix tree" represents the prefix structure of the strings.
- Each node represents a prefix of some string. The root represents
- the empty prefix with children for the single letter prefixes which
- in turn have children for each double letter prefix starting with
- the single letter corresponding to the parent node, and so on.
- More generally the prefixes do not need to be strings. A prefix refers
- to the start of a sequence. The root has children for each one element
- prefix and they have children for each two element prefix that starts
- with the one element sequence of the parent, and so on.
- Note that this implementation uses integer nodes with an attribute.
- Each node has an attribute "source" whose value is the original element
- of the path to which this node corresponds. For example, suppose `paths`
- consists of one path: "can". Then the nodes `[1, 2, 3]` which represent
- this path have "source" values "c", "a" and "n".
- All the descendants of a node have a common prefix in the sequence/path
- associated with that node. From the returned tree, the prefix for each
- node can be constructed by traversing the tree up to the root and
- accumulating the "source" values along the way.
- The root node is always `0` and has "source" attribute `None`.
- The root is the only node with in-degree zero.
- The nil node is always `-1` and has "source" attribute `"NIL"`.
- The nil node is the only node with out-degree zero.
- Parameters
- ----------
- paths: iterable of paths
- An iterable of paths which are themselves sequences.
- Matching prefixes among these sequences are identified with
- nodes of the prefix tree. One leaf of the tree is associated
- with each path. (Identical paths are associated with the same
- leaf of the tree.)
- Returns
- -------
- tree: DiGraph
- A directed graph representing an arborescence consisting of the
- prefix tree generated by `paths`. Nodes are directed "downward",
- from parent to child. A special "synthetic" root node is added
- to be the parent of the first node in each path. A special
- "synthetic" leaf node, the "nil" node `-1`, is added to be the child
- of all nodes representing the last element in a path. (The
- addition of this nil node technically makes this not an
- arborescence but a directed acyclic graph; removing the nil node
- makes it an arborescence.)
- Notes
- -----
- The prefix tree is also known as a *trie*.
- Examples
- --------
- Create a prefix tree from a list of strings with common prefixes::
- >>> paths = ["ab", "abs", "ad"]
- >>> T = nx.prefix_tree(paths)
- >>> list(T.edges)
- [(0, 1), (1, 2), (1, 4), (2, -1), (2, 3), (3, -1), (4, -1)]
- The leaf nodes can be obtained as predecessors of the nil node::
- >>> root, NIL = 0, -1
- >>> list(T.predecessors(NIL))
- [2, 3, 4]
- To recover the original paths that generated the prefix tree,
- traverse up the tree from the node `-1` to the node `0`::
- >>> recovered = []
- >>> for v in T.predecessors(NIL):
- ... prefix = ""
- ... while v != root:
- ... prefix = str(T.nodes[v]["source"]) + prefix
- ... v = next(T.predecessors(v)) # only one predecessor
- ... recovered.append(prefix)
- >>> sorted(recovered)
- ['ab', 'abs', 'ad']
- """
- def get_children(parent, paths):
- children = defaultdict(list)
- # Populate dictionary with key(s) as the child/children of the root and
- # value(s) as the remaining paths of the corresponding child/children
- for path in paths:
- # If path is empty, we add an edge to the NIL node.
- if not path:
- tree.add_edge(parent, NIL)
- continue
- child, *rest = path
- # `child` may exist as the head of more than one path in `paths`.
- children[child].append(rest)
- return children
- # Initialize the prefix tree with a root node and a nil node.
- tree = nx.DiGraph()
- root = 0
- tree.add_node(root, source=None)
- NIL = -1
- tree.add_node(NIL, source="NIL")
- children = get_children(root, paths)
- stack = [(root, iter(children.items()))]
- while stack:
- parent, remaining_children = stack[-1]
- try:
- child, remaining_paths = next(remaining_children)
- # Pop item off stack if there are no remaining children
- except StopIteration:
- stack.pop()
- continue
- # We relabel each child with an unused name.
- new_name = len(tree) - 1
- # The "source" node attribute stores the original node name.
- tree.add_node(new_name, source=child)
- tree.add_edge(parent, new_name)
- children = get_children(new_name, remaining_paths)
- stack.append((new_name, iter(children.items())))
- return tree
- def prefix_tree_recursive(paths):
- """Recursively creates a directed prefix tree from a list of paths.
- The original recursive version of prefix_tree for comparison. It is
- the same algorithm but the recursion is unrolled onto a stack.
- Usually the paths are described as strings or lists of integers.
- A "prefix tree" represents the prefix structure of the strings.
- Each node represents a prefix of some string. The root represents
- the empty prefix with children for the single letter prefixes which
- in turn have children for each double letter prefix starting with
- the single letter corresponding to the parent node, and so on.
- More generally the prefixes do not need to be strings. A prefix refers
- to the start of a sequence. The root has children for each one element
- prefix and they have children for each two element prefix that starts
- with the one element sequence of the parent, and so on.
- Note that this implementation uses integer nodes with an attribute.
- Each node has an attribute "source" whose value is the original element
- of the path to which this node corresponds. For example, suppose `paths`
- consists of one path: "can". Then the nodes `[1, 2, 3]` which represent
- this path have "source" values "c", "a" and "n".
- All the descendants of a node have a common prefix in the sequence/path
- associated with that node. From the returned tree, ehe prefix for each
- node can be constructed by traversing the tree up to the root and
- accumulating the "source" values along the way.
- The root node is always `0` and has "source" attribute `None`.
- The root is the only node with in-degree zero.
- The nil node is always `-1` and has "source" attribute `"NIL"`.
- The nil node is the only node with out-degree zero.
- Parameters
- ----------
- paths: iterable of paths
- An iterable of paths which are themselves sequences.
- Matching prefixes among these sequences are identified with
- nodes of the prefix tree. One leaf of the tree is associated
- with each path. (Identical paths are associated with the same
- leaf of the tree.)
- Returns
- -------
- tree: DiGraph
- A directed graph representing an arborescence consisting of the
- prefix tree generated by `paths`. Nodes are directed "downward",
- from parent to child. A special "synthetic" root node is added
- to be the parent of the first node in each path. A special
- "synthetic" leaf node, the "nil" node `-1`, is added to be the child
- of all nodes representing the last element in a path. (The
- addition of this nil node technically makes this not an
- arborescence but a directed acyclic graph; removing the nil node
- makes it an arborescence.)
- Notes
- -----
- The prefix tree is also known as a *trie*.
- Examples
- --------
- Create a prefix tree from a list of strings with common prefixes::
- >>> paths = ["ab", "abs", "ad"]
- >>> T = nx.prefix_tree(paths)
- >>> list(T.edges)
- [(0, 1), (1, 2), (1, 4), (2, -1), (2, 3), (3, -1), (4, -1)]
- The leaf nodes can be obtained as predecessors of the nil node.
- >>> root, NIL = 0, -1
- >>> list(T.predecessors(NIL))
- [2, 3, 4]
- To recover the original paths that generated the prefix tree,
- traverse up the tree from the node `-1` to the node `0`::
- >>> recovered = []
- >>> for v in T.predecessors(NIL):
- ... prefix = ""
- ... while v != root:
- ... prefix = str(T.nodes[v]["source"]) + prefix
- ... v = next(T.predecessors(v)) # only one predecessor
- ... recovered.append(prefix)
- >>> sorted(recovered)
- ['ab', 'abs', 'ad']
- """
- def _helper(paths, root, tree):
- """Recursively create a trie from the given list of paths.
- `paths` is a list of paths, each of which is itself a list of
- nodes, relative to the given `root` (but not including it). This
- list of paths will be interpreted as a tree-like structure, in
- which two paths that share a prefix represent two branches of
- the tree with the same initial segment.
- `root` is the parent of the node at index 0 in each path.
- `tree` is the "accumulator", the :class:`networkx.DiGraph`
- representing the branching to which the new nodes and edges will
- be added.
- """
- # For each path, remove the first node and make it a child of root.
- # Any remaining paths then get processed recursively.
- children = defaultdict(list)
- for path in paths:
- # If path is empty, we add an edge to the NIL node.
- if not path:
- tree.add_edge(root, NIL)
- continue
- child, *rest = path
- # `child` may exist as the head of more than one path in `paths`.
- children[child].append(rest)
- # Add a node for each child, connect root, recurse to remaining paths
- for child, remaining_paths in children.items():
- # We relabel each child with an unused name.
- new_name = len(tree) - 1
- # The "source" node attribute stores the original node name.
- tree.add_node(new_name, source=child)
- tree.add_edge(root, new_name)
- _helper(remaining_paths, new_name, tree)
- # Initialize the prefix tree with a root node and a nil node.
- tree = nx.DiGraph()
- root = 0
- tree.add_node(root, source=None)
- NIL = -1
- tree.add_node(NIL, source="NIL")
- # Populate the tree.
- _helper(paths, root, tree)
- return tree
- # From the Wikipedia article on Prüfer sequences:
- #
- # > Generating uniformly distributed random Prüfer sequences and
- # > converting them into the corresponding trees is a straightforward
- # > method of generating uniformly distributed random labelled trees.
- #
- @py_random_state(1)
- def random_tree(n, seed=None, create_using=None):
- """Returns a uniformly random tree on `n` nodes.
- Parameters
- ----------
- n : int
- A positive integer representing the number of nodes in the tree.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- create_using : NetworkX graph constructor, optional (default=nx.Graph)
- Graph type to create. If graph instance, then cleared before populated.
- Returns
- -------
- NetworkX graph
- A tree, given as an undirected graph, whose nodes are numbers in
- the set {0, …, *n* - 1}.
- Raises
- ------
- NetworkXPointlessConcept
- If `n` is zero (because the null graph is not a tree).
- Notes
- -----
- The current implementation of this function generates a uniformly
- random Prüfer sequence then converts that to a tree via the
- :func:`~networkx.from_prufer_sequence` function. Since there is a
- bijection between Prüfer sequences of length *n* - 2 and trees on
- *n* nodes, the tree is chosen uniformly at random from the set of
- all trees on *n* nodes.
- Examples
- --------
- >>> tree = nx.random_tree(n=10, seed=0)
- >>> nx.write_network_text(tree, sources=[0])
- ╙── 0
- ├── 3
- └── 4
- ├── 6
- │ ├── 1
- │ ├── 2
- │ └── 7
- │ └── 8
- │ └── 5
- └── 9
- >>> tree = nx.random_tree(n=10, seed=0, create_using=nx.DiGraph)
- >>> nx.write_network_text(tree)
- ╙── 0
- ├─╼ 3
- └─╼ 4
- ├─╼ 6
- │ ├─╼ 1
- │ ├─╼ 2
- │ └─╼ 7
- │ └─╼ 8
- │ └─╼ 5
- └─╼ 9
- """
- if n == 0:
- raise nx.NetworkXPointlessConcept("the null graph is not a tree")
- # Cannot create a Prüfer sequence unless `n` is at least two.
- if n == 1:
- utree = nx.empty_graph(1, create_using)
- else:
- sequence = [seed.choice(range(n)) for i in range(n - 2)]
- utree = nx.from_prufer_sequence(sequence)
- if create_using is None:
- tree = utree
- else:
- tree = nx.empty_graph(0, create_using)
- if tree.is_directed():
- # Use a arbitrary root node and dfs to define edge directions
- edges = nx.dfs_edges(utree, source=0)
- else:
- edges = utree.edges
- # Populate the specified graph type
- tree.add_nodes_from(utree.nodes)
- tree.add_edges_from(edges)
- return tree
|