123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122 |
- """
- Algorithms for calculating min/max spanning trees/forests.
- """
- from dataclasses import dataclass, field
- from enum import Enum
- from heapq import heappop, heappush
- from itertools import count
- from math import isnan
- from operator import itemgetter
- from queue import PriorityQueue
- import networkx as nx
- from networkx.utils import UnionFind, not_implemented_for, py_random_state
- __all__ = [
- "minimum_spanning_edges",
- "maximum_spanning_edges",
- "minimum_spanning_tree",
- "maximum_spanning_tree",
- "random_spanning_tree",
- "partition_spanning_tree",
- "EdgePartition",
- "SpanningTreeIterator",
- ]
- class EdgePartition(Enum):
- """
- An enum to store the state of an edge partition. The enum is written to the
- edges of a graph before being pasted to `kruskal_mst_edges`. Options are:
- - EdgePartition.OPEN
- - EdgePartition.INCLUDED
- - EdgePartition.EXCLUDED
- """
- OPEN = 0
- INCLUDED = 1
- EXCLUDED = 2
- @not_implemented_for("multigraph")
- def boruvka_mst_edges(
- G, minimum=True, weight="weight", keys=False, data=True, ignore_nan=False
- ):
- """Iterate over edges of a Borůvka's algorithm min/max spanning tree.
- Parameters
- ----------
- G : NetworkX Graph
- The edges of `G` must have distinct weights,
- otherwise the edges may not form a tree.
- minimum : bool (default: True)
- Find the minimum (True) or maximum (False) spanning tree.
- weight : string (default: 'weight')
- The name of the edge attribute holding the edge weights.
- keys : bool (default: True)
- This argument is ignored since this function is not
- implemented for multigraphs; it exists only for consistency
- with the other minimum spanning tree functions.
- data : bool (default: True)
- Flag for whether to yield edge attribute dicts.
- If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
- If False, yield edges `(u, v)`.
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
- """
- # Initialize a forest, assuming initially that it is the discrete
- # partition of the nodes of the graph.
- forest = UnionFind(G)
- def best_edge(component):
- """Returns the optimum (minimum or maximum) edge on the edge
- boundary of the given set of nodes.
- A return value of ``None`` indicates an empty boundary.
- """
- sign = 1 if minimum else -1
- minwt = float("inf")
- boundary = None
- for e in nx.edge_boundary(G, component, data=True):
- wt = e[-1].get(weight, 1) * sign
- if isnan(wt):
- if ignore_nan:
- continue
- msg = f"NaN found as an edge weight. Edge {e}"
- raise ValueError(msg)
- if wt < minwt:
- minwt = wt
- boundary = e
- return boundary
- # Determine the optimum edge in the edge boundary of each component
- # in the forest.
- best_edges = (best_edge(component) for component in forest.to_sets())
- best_edges = [edge for edge in best_edges if edge is not None]
- # If each entry was ``None``, that means the graph was disconnected,
- # so we are done generating the forest.
- while best_edges:
- # Determine the optimum edge in the edge boundary of each
- # component in the forest.
- #
- # This must be a sequence, not an iterator. In this list, the
- # same edge may appear twice, in different orientations (but
- # that's okay, since a union operation will be called on the
- # endpoints the first time it is seen, but not the second time).
- #
- # Any ``None`` indicates that the edge boundary for that
- # component was empty, so that part of the forest has been
- # completed.
- #
- # TODO This can be parallelized, both in the outer loop over
- # each component in the forest and in the computation of the
- # minimum. (Same goes for the identical lines outside the loop.)
- best_edges = (best_edge(component) for component in forest.to_sets())
- best_edges = [edge for edge in best_edges if edge is not None]
- # Join trees in the forest using the best edges, and yield that
- # edge, since it is part of the spanning tree.
- #
- # TODO This loop can be parallelized, to an extent (the union
- # operation must be atomic).
- for u, v, d in best_edges:
- if forest[u] != forest[v]:
- if data:
- yield u, v, d
- else:
- yield u, v
- forest.union(u, v)
- def kruskal_mst_edges(
- G, minimum, weight="weight", keys=True, data=True, ignore_nan=False, partition=None
- ):
- """
- Iterate over edge of a Kruskal's algorithm min/max spanning tree.
- Parameters
- ----------
- G : NetworkX Graph
- The graph holding the tree of interest.
- minimum : bool (default: True)
- Find the minimum (True) or maximum (False) spanning tree.
- weight : string (default: 'weight')
- The name of the edge attribute holding the edge weights.
- keys : bool (default: True)
- If `G` is a multigraph, `keys` controls whether edge keys ar yielded.
- Otherwise `keys` is ignored.
- data : bool (default: True)
- Flag for whether to yield edge attribute dicts.
- If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
- If False, yield edges `(u, v)`.
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
- partition : string (default: None)
- The name of the edge attribute holding the partition data, if it exists.
- Partition data is written to the edges using the `EdgePartition` enum.
- If a partition exists, all included edges and none of the excluded edges
- will appear in the final tree. Open edges may or may not be used.
- Yields
- ------
- edge tuple
- The edges as discovered by Kruskal's method. Each edge can
- take the following forms: `(u, v)`, `(u, v, d)` or `(u, v, k, d)`
- depending on the `key` and `data` parameters
- """
- subtrees = UnionFind()
- if G.is_multigraph():
- edges = G.edges(keys=True, data=True)
- else:
- edges = G.edges(data=True)
- """
- Sort the edges of the graph with respect to the partition data.
- Edges are returned in the following order:
- * Included edges
- * Open edges from smallest to largest weight
- * Excluded edges
- """
- included_edges = []
- open_edges = []
- for e in edges:
- d = e[-1]
- wt = d.get(weight, 1)
- if isnan(wt):
- if ignore_nan:
- continue
- raise ValueError(f"NaN found as an edge weight. Edge {e}")
- edge = (wt,) + e
- if d.get(partition) == EdgePartition.INCLUDED:
- included_edges.append(edge)
- elif d.get(partition) == EdgePartition.EXCLUDED:
- continue
- else:
- open_edges.append(edge)
- if minimum:
- sorted_open_edges = sorted(open_edges, key=itemgetter(0))
- else:
- sorted_open_edges = sorted(open_edges, key=itemgetter(0), reverse=True)
- # Condense the lists into one
- included_edges.extend(sorted_open_edges)
- sorted_edges = included_edges
- del open_edges, sorted_open_edges, included_edges
- # Multigraphs need to handle edge keys in addition to edge data.
- if G.is_multigraph():
- for wt, u, v, k, d in sorted_edges:
- if subtrees[u] != subtrees[v]:
- if keys:
- if data:
- yield u, v, k, d
- else:
- yield u, v, k
- else:
- if data:
- yield u, v, d
- else:
- yield u, v
- subtrees.union(u, v)
- else:
- for wt, u, v, d in sorted_edges:
- if subtrees[u] != subtrees[v]:
- if data:
- yield u, v, d
- else:
- yield u, v
- subtrees.union(u, v)
- def prim_mst_edges(G, minimum, weight="weight", keys=True, data=True, ignore_nan=False):
- """Iterate over edges of Prim's algorithm min/max spanning tree.
- Parameters
- ----------
- G : NetworkX Graph
- The graph holding the tree of interest.
- minimum : bool (default: True)
- Find the minimum (True) or maximum (False) spanning tree.
- weight : string (default: 'weight')
- The name of the edge attribute holding the edge weights.
- keys : bool (default: True)
- If `G` is a multigraph, `keys` controls whether edge keys ar yielded.
- Otherwise `keys` is ignored.
- data : bool (default: True)
- Flag for whether to yield edge attribute dicts.
- If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
- If False, yield edges `(u, v)`.
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
- """
- is_multigraph = G.is_multigraph()
- push = heappush
- pop = heappop
- nodes = set(G)
- c = count()
- sign = 1 if minimum else -1
- while nodes:
- u = nodes.pop()
- frontier = []
- visited = {u}
- if is_multigraph:
- for v, keydict in G.adj[u].items():
- for k, d in keydict.items():
- wt = d.get(weight, 1) * sign
- if isnan(wt):
- if ignore_nan:
- continue
- msg = f"NaN found as an edge weight. Edge {(u, v, k, d)}"
- raise ValueError(msg)
- push(frontier, (wt, next(c), u, v, k, d))
- else:
- for v, d in G.adj[u].items():
- wt = d.get(weight, 1) * sign
- if isnan(wt):
- if ignore_nan:
- continue
- msg = f"NaN found as an edge weight. Edge {(u, v, d)}"
- raise ValueError(msg)
- push(frontier, (wt, next(c), u, v, d))
- while nodes and frontier:
- if is_multigraph:
- W, _, u, v, k, d = pop(frontier)
- else:
- W, _, u, v, d = pop(frontier)
- if v in visited or v not in nodes:
- continue
- # Multigraphs need to handle edge keys in addition to edge data.
- if is_multigraph and keys:
- if data:
- yield u, v, k, d
- else:
- yield u, v, k
- else:
- if data:
- yield u, v, d
- else:
- yield u, v
- # update frontier
- visited.add(v)
- nodes.discard(v)
- if is_multigraph:
- for w, keydict in G.adj[v].items():
- if w in visited:
- continue
- for k2, d2 in keydict.items():
- new_weight = d2.get(weight, 1) * sign
- if isnan(new_weight):
- if ignore_nan:
- continue
- msg = f"NaN found as an edge weight. Edge {(v, w, k2, d2)}"
- raise ValueError(msg)
- push(frontier, (new_weight, next(c), v, w, k2, d2))
- else:
- for w, d2 in G.adj[v].items():
- if w in visited:
- continue
- new_weight = d2.get(weight, 1) * sign
- if isnan(new_weight):
- if ignore_nan:
- continue
- msg = f"NaN found as an edge weight. Edge {(v, w, d2)}"
- raise ValueError(msg)
- push(frontier, (new_weight, next(c), v, w, d2))
- ALGORITHMS = {
- "boruvka": boruvka_mst_edges,
- "borůvka": boruvka_mst_edges,
- "kruskal": kruskal_mst_edges,
- "prim": prim_mst_edges,
- }
- @not_implemented_for("directed")
- def minimum_spanning_edges(
- G, algorithm="kruskal", weight="weight", keys=True, data=True, ignore_nan=False
- ):
- """Generate edges in a minimum spanning forest of an undirected
- weighted graph.
- A minimum spanning tree is a subgraph of the graph (a tree)
- with the minimum sum of edge weights. A spanning forest is a
- union of the spanning trees for each connected component of the graph.
- Parameters
- ----------
- G : undirected Graph
- An undirected graph. If `G` is connected, then the algorithm finds a
- spanning tree. Otherwise, a spanning forest is found.
- algorithm : string
- The algorithm to use when finding a minimum spanning tree. Valid
- choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'.
- weight : string
- Edge data key to use for weight (default 'weight').
- keys : bool
- Whether to yield edge key in multigraphs in addition to the edge.
- If `G` is not a multigraph, this is ignored.
- data : bool, optional
- If True yield the edge data along with the edge.
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
- Returns
- -------
- edges : iterator
- An iterator over edges in a maximum spanning tree of `G`.
- Edges connecting nodes `u` and `v` are represented as tuples:
- `(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)`
- If `G` is a multigraph, `keys` indicates whether the edge key `k` will
- be reported in the third position in the edge tuple. `data` indicates
- whether the edge datadict `d` will appear at the end of the edge tuple.
- If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True
- or `(u, v)` if `data` is False.
- Examples
- --------
- >>> from networkx.algorithms import tree
- Find minimum spanning edges by Kruskal's algorithm
- >>> G = nx.cycle_graph(4)
- >>> G.add_edge(0, 3, weight=2)
- >>> mst = tree.minimum_spanning_edges(G, algorithm="kruskal", data=False)
- >>> edgelist = list(mst)
- >>> sorted(sorted(e) for e in edgelist)
- [[0, 1], [1, 2], [2, 3]]
- Find minimum spanning edges by Prim's algorithm
- >>> G = nx.cycle_graph(4)
- >>> G.add_edge(0, 3, weight=2)
- >>> mst = tree.minimum_spanning_edges(G, algorithm="prim", data=False)
- >>> edgelist = list(mst)
- >>> sorted(sorted(e) for e in edgelist)
- [[0, 1], [1, 2], [2, 3]]
- Notes
- -----
- For Borůvka's algorithm, each edge must have a weight attribute, and
- each edge weight must be distinct.
- For the other algorithms, if the graph edges do not have a weight
- attribute a default weight of 1 will be used.
- Modified code from David Eppstein, April 2006
- http://www.ics.uci.edu/~eppstein/PADS/
- """
- try:
- algo = ALGORITHMS[algorithm]
- except KeyError as err:
- msg = f"{algorithm} is not a valid choice for an algorithm."
- raise ValueError(msg) from err
- return algo(
- G, minimum=True, weight=weight, keys=keys, data=data, ignore_nan=ignore_nan
- )
- @not_implemented_for("directed")
- def maximum_spanning_edges(
- G, algorithm="kruskal", weight="weight", keys=True, data=True, ignore_nan=False
- ):
- """Generate edges in a maximum spanning forest of an undirected
- weighted graph.
- A maximum spanning tree is a subgraph of the graph (a tree)
- with the maximum possible sum of edge weights. A spanning forest is a
- union of the spanning trees for each connected component of the graph.
- Parameters
- ----------
- G : undirected Graph
- An undirected graph. If `G` is connected, then the algorithm finds a
- spanning tree. Otherwise, a spanning forest is found.
- algorithm : string
- The algorithm to use when finding a maximum spanning tree. Valid
- choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'.
- weight : string
- Edge data key to use for weight (default 'weight').
- keys : bool
- Whether to yield edge key in multigraphs in addition to the edge.
- If `G` is not a multigraph, this is ignored.
- data : bool, optional
- If True yield the edge data along with the edge.
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
- Returns
- -------
- edges : iterator
- An iterator over edges in a maximum spanning tree of `G`.
- Edges connecting nodes `u` and `v` are represented as tuples:
- `(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)`
- If `G` is a multigraph, `keys` indicates whether the edge key `k` will
- be reported in the third position in the edge tuple. `data` indicates
- whether the edge datadict `d` will appear at the end of the edge tuple.
- If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True
- or `(u, v)` if `data` is False.
- Examples
- --------
- >>> from networkx.algorithms import tree
- Find maximum spanning edges by Kruskal's algorithm
- >>> G = nx.cycle_graph(4)
- >>> G.add_edge(0, 3, weight=2)
- >>> mst = tree.maximum_spanning_edges(G, algorithm="kruskal", data=False)
- >>> edgelist = list(mst)
- >>> sorted(sorted(e) for e in edgelist)
- [[0, 1], [0, 3], [1, 2]]
- Find maximum spanning edges by Prim's algorithm
- >>> G = nx.cycle_graph(4)
- >>> G.add_edge(0, 3, weight=2) # assign weight 2 to edge 0-3
- >>> mst = tree.maximum_spanning_edges(G, algorithm="prim", data=False)
- >>> edgelist = list(mst)
- >>> sorted(sorted(e) for e in edgelist)
- [[0, 1], [0, 3], [2, 3]]
- Notes
- -----
- For Borůvka's algorithm, each edge must have a weight attribute, and
- each edge weight must be distinct.
- For the other algorithms, if the graph edges do not have a weight
- attribute a default weight of 1 will be used.
- Modified code from David Eppstein, April 2006
- http://www.ics.uci.edu/~eppstein/PADS/
- """
- try:
- algo = ALGORITHMS[algorithm]
- except KeyError as err:
- msg = f"{algorithm} is not a valid choice for an algorithm."
- raise ValueError(msg) from err
- return algo(
- G, minimum=False, weight=weight, keys=keys, data=data, ignore_nan=ignore_nan
- )
- def minimum_spanning_tree(G, weight="weight", algorithm="kruskal", ignore_nan=False):
- """Returns a minimum spanning tree or forest on an undirected graph `G`.
- Parameters
- ----------
- G : undirected graph
- An undirected graph. If `G` is connected, then the algorithm finds a
- spanning tree. Otherwise, a spanning forest is found.
- weight : str
- Data key to use for edge weights.
- algorithm : string
- The algorithm to use when finding a minimum spanning tree. Valid
- choices are 'kruskal', 'prim', or 'boruvka'. The default is
- 'kruskal'.
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
- Returns
- -------
- G : NetworkX Graph
- A minimum spanning tree or forest.
- Examples
- --------
- >>> G = nx.cycle_graph(4)
- >>> G.add_edge(0, 3, weight=2)
- >>> T = nx.minimum_spanning_tree(G)
- >>> sorted(T.edges(data=True))
- [(0, 1, {}), (1, 2, {}), (2, 3, {})]
- Notes
- -----
- For Borůvka's algorithm, each edge must have a weight attribute, and
- each edge weight must be distinct.
- For the other algorithms, if the graph edges do not have a weight
- attribute a default weight of 1 will be used.
- There may be more than one tree with the same minimum or maximum weight.
- See :mod:`networkx.tree.recognition` for more detailed definitions.
- Isolated nodes with self-loops are in the tree as edgeless isolated nodes.
- """
- edges = minimum_spanning_edges(
- G, algorithm, weight, keys=True, data=True, ignore_nan=ignore_nan
- )
- T = G.__class__() # Same graph class as G
- T.graph.update(G.graph)
- T.add_nodes_from(G.nodes.items())
- T.add_edges_from(edges)
- return T
- def partition_spanning_tree(
- G, minimum=True, weight="weight", partition="partition", ignore_nan=False
- ):
- """
- Find a spanning tree while respecting a partition of edges.
- Edges can be flagged as either `INCLUDED` which are required to be in the
- returned tree, `EXCLUDED`, which cannot be in the returned tree and `OPEN`.
- This is used in the SpanningTreeIterator to create new partitions following
- the algorithm of Sörensen and Janssens [1]_.
- Parameters
- ----------
- G : undirected graph
- An undirected graph.
- minimum : bool (default: True)
- Determines whether the returned tree is the minimum spanning tree of
- the partition of the maximum one.
- weight : str
- Data key to use for edge weights.
- partition : str
- The key for the edge attribute containing the partition
- data on the graph. Edges can be included, excluded or open using the
- `EdgePartition` enum.
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
- Returns
- -------
- G : NetworkX Graph
- A minimum spanning tree using all of the included edges in the graph and
- none of the excluded edges.
- References
- ----------
- .. [1] G.K. Janssens, K. Sörensen, An algorithm to generate all spanning
- trees in order of increasing cost, Pesquisa Operacional, 2005-08,
- Vol. 25 (2), p. 219-229,
- https://www.scielo.br/j/pope/a/XHswBwRwJyrfL88dmMwYNWp/?lang=en
- """
- edges = kruskal_mst_edges(
- G,
- minimum,
- weight,
- keys=True,
- data=True,
- ignore_nan=ignore_nan,
- partition=partition,
- )
- T = G.__class__() # Same graph class as G
- T.graph.update(G.graph)
- T.add_nodes_from(G.nodes.items())
- T.add_edges_from(edges)
- return T
- def maximum_spanning_tree(G, weight="weight", algorithm="kruskal", ignore_nan=False):
- """Returns a maximum spanning tree or forest on an undirected graph `G`.
- Parameters
- ----------
- G : undirected graph
- An undirected graph. If `G` is connected, then the algorithm finds a
- spanning tree. Otherwise, a spanning forest is found.
- weight : str
- Data key to use for edge weights.
- algorithm : string
- The algorithm to use when finding a maximum spanning tree. Valid
- choices are 'kruskal', 'prim', or 'boruvka'. The default is
- 'kruskal'.
- ignore_nan : bool (default: False)
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
- Returns
- -------
- G : NetworkX Graph
- A maximum spanning tree or forest.
- Examples
- --------
- >>> G = nx.cycle_graph(4)
- >>> G.add_edge(0, 3, weight=2)
- >>> T = nx.maximum_spanning_tree(G)
- >>> sorted(T.edges(data=True))
- [(0, 1, {}), (0, 3, {'weight': 2}), (1, 2, {})]
- Notes
- -----
- For Borůvka's algorithm, each edge must have a weight attribute, and
- each edge weight must be distinct.
- For the other algorithms, if the graph edges do not have a weight
- attribute a default weight of 1 will be used.
- There may be more than one tree with the same minimum or maximum weight.
- See :mod:`networkx.tree.recognition` for more detailed definitions.
- Isolated nodes with self-loops are in the tree as edgeless isolated nodes.
- """
- edges = maximum_spanning_edges(
- G, algorithm, weight, keys=True, data=True, ignore_nan=ignore_nan
- )
- edges = list(edges)
- T = G.__class__() # Same graph class as G
- T.graph.update(G.graph)
- T.add_nodes_from(G.nodes.items())
- T.add_edges_from(edges)
- return T
- @py_random_state(3)
- def random_spanning_tree(G, weight=None, *, multiplicative=True, seed=None):
- """
- Sample a random spanning tree using the edges weights of `G`.
- This function supports two different methods for determining the
- probability of the graph. If ``multiplicative=True``, the probability
- is based on the product of edge weights, and if ``multiplicative=False``
- it is based on the sum of the edge weight. However, since it is
- easier to determine the total weight of all spanning trees for the
- multiplicative version, that is significantly faster and should be used if
- possible. Additionally, setting `weight` to `None` will cause a spanning tree
- to be selected with uniform probability.
- The function uses algorithm A8 in [1]_ .
- Parameters
- ----------
- G : nx.Graph
- An undirected version of the original graph.
- weight : string
- The edge key for the edge attribute holding edge weight.
- multiplicative : bool, default=True
- If `True`, the probability of each tree is the product of its edge weight
- over the sum of the product of all the spanning trees in the graph. If
- `False`, the probability is the sum of its edge weight over the sum of
- the sum of weights for all spanning trees in the graph.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Returns
- -------
- nx.Graph
- A spanning tree using the distribution defined by the weight of the tree.
- References
- ----------
- .. [1] V. Kulkarni, Generating random combinatorial objects, Journal of
- Algorithms, 11 (1990), pp. 185–207
- """
- def find_node(merged_nodes, node):
- """
- We can think of clusters of contracted nodes as having one
- representative in the graph. Each node which is not in merged_nodes
- is still its own representative. Since a representative can be later
- contracted, we need to recursively search though the dict to find
- the final representative, but once we know it we can use path
- compression to speed up the access of the representative for next time.
- This cannot be replaced by the standard NetworkX union_find since that
- data structure will merge nodes with less representing nodes into the
- one with more representing nodes but this function requires we merge
- them using the order that contract_edges contracts using.
- Parameters
- ----------
- merged_nodes : dict
- The dict storing the mapping from node to representative
- node
- The node whose representative we seek
- Returns
- -------
- The representative of the `node`
- """
- if node not in merged_nodes:
- return node
- else:
- rep = find_node(merged_nodes, merged_nodes[node])
- merged_nodes[node] = rep
- return rep
- def prepare_graph():
- """
- For the graph `G`, remove all edges not in the set `V` and then
- contract all edges in the set `U`.
- Returns
- -------
- A copy of `G` which has had all edges not in `V` removed and all edges
- in `U` contracted.
- """
- # The result is a MultiGraph version of G so that parallel edges are
- # allowed during edge contraction
- result = nx.MultiGraph(incoming_graph_data=G)
- # Remove all edges not in V
- edges_to_remove = set(result.edges()).difference(V)
- result.remove_edges_from(edges_to_remove)
- # Contract all edges in U
- #
- # Imagine that you have two edges to contract and they share an
- # endpoint like this:
- # [0] ----- [1] ----- [2]
- # If we contract (0, 1) first, the contraction function will always
- # delete the second node it is passed so the resulting graph would be
- # [0] ----- [2]
- # and edge (1, 2) no longer exists but (0, 2) would need to be contracted
- # in its place now. That is why I use the below dict as a merge-find
- # data structure with path compression to track how the nodes are merged.
- merged_nodes = {}
- for u, v in U:
- u_rep = find_node(merged_nodes, u)
- v_rep = find_node(merged_nodes, v)
- # We cannot contract a node with itself
- if u_rep == v_rep:
- continue
- nx.contracted_nodes(result, u_rep, v_rep, self_loops=False, copy=False)
- merged_nodes[v_rep] = u_rep
- return merged_nodes, result
- def spanning_tree_total_weight(G, weight):
- """
- Find the sum of weights of the spanning trees of `G` using the
- approioate `method`.
- This is easy if the chosen method is 'multiplicative', since we can
- use Kirchhoff's Tree Matrix Theorem directly. However, with the
- 'additive' method, this process is slightly more complex and less
- computatiionally efficient as we have to find the number of spanning
- trees which contain each possible edge in the graph.
- Parameters
- ----------
- G : NetworkX Graph
- The graph to find the total weight of all spanning trees on.
- weight : string
- The key for the weight edge attribute of the graph.
- Returns
- -------
- float
- The sum of either the multiplicative or additive weight for all
- spanning trees in the graph.
- """
- if multiplicative:
- return nx.total_spanning_tree_weight(G, weight)
- else:
- # There are two cases for the total spanning tree additive weight.
- # 1. There is one edge in the graph. Then the only spanning tree is
- # that edge itself, which will have a total weight of that edge
- # itself.
- if G.number_of_edges() == 1:
- return G.edges(data=weight).__iter__().__next__()[2]
- # 2. There are more than two edges in the graph. Then, we can find the
- # total weight of the spanning trees using the formula in the
- # reference paper: take the weight of that edge and multiple it by
- # the number of spanning trees which have to include that edge. This
- # can be accomplished by contracting the edge and finding the
- # multiplicative total spanning tree weight if the weight of each edge
- # is assumed to be 1, which is conveniently built into networkx already,
- # by calling total_spanning_tree_weight with weight=None
- else:
- total = 0
- for u, v, w in G.edges(data=weight):
- total += w * nx.total_spanning_tree_weight(
- nx.contracted_edge(G, edge=(u, v), self_loops=False), None
- )
- return total
- U = set()
- st_cached_value = 0
- V = set(G.edges())
- shuffled_edges = list(G.edges())
- seed.shuffle(shuffled_edges)
- for u, v in shuffled_edges:
- e_weight = G[u][v][weight] if weight is not None else 1
- node_map, prepared_G = prepare_graph()
- G_total_tree_weight = spanning_tree_total_weight(prepared_G, weight)
- # Add the edge to U so that we can compute the total tree weight
- # assuming we include that edge
- # Now, if (u, v) cannot exist in G because it is fully contracted out
- # of existence, then it by definition cannot influence G_e's Kirchhoff
- # value. But, we also cannot pick it.
- rep_edge = (find_node(node_map, u), find_node(node_map, v))
- # Check to see if the 'representative edge' for the current edge is
- # in prepared_G. If so, then we can pick it.
- if rep_edge in prepared_G.edges:
- prepared_G_e = nx.contracted_edge(
- prepared_G, edge=rep_edge, self_loops=False
- )
- G_e_total_tree_weight = spanning_tree_total_weight(prepared_G_e, weight)
- if multiplicative:
- threshold = e_weight * G_e_total_tree_weight / G_total_tree_weight
- else:
- numerator = (
- st_cached_value + e_weight
- ) * nx.total_spanning_tree_weight(prepared_G_e) + G_e_total_tree_weight
- denominator = (
- st_cached_value * nx.total_spanning_tree_weight(prepared_G)
- + G_total_tree_weight
- )
- threshold = numerator / denominator
- else:
- threshold = 0.0
- z = seed.uniform(0.0, 1.0)
- if z > threshold:
- # Remove the edge from V since we did not pick it.
- V.remove((u, v))
- else:
- # Add the edge to U since we picked it.
- st_cached_value += e_weight
- U.add((u, v))
- # If we decide to keep an edge, it may complete the spanning tree.
- if len(U) == G.number_of_nodes() - 1:
- spanning_tree = nx.Graph()
- spanning_tree.add_edges_from(U)
- return spanning_tree
- raise Exception(f"Something went wrong! Only {len(U)} edges in the spanning tree!")
- class SpanningTreeIterator:
- """
- Iterate over all spanning trees of a graph in either increasing or
- decreasing cost.
- Notes
- -----
- This iterator uses the partition scheme from [1]_ (included edges,
- excluded edges and open edges) as well as a modified Kruskal's Algorithm
- to generate minimum spanning trees which respect the partition of edges.
- For spanning trees with the same weight, ties are broken arbitrarily.
- References
- ----------
- .. [1] G.K. Janssens, K. Sörensen, An algorithm to generate all spanning
- trees in order of increasing cost, Pesquisa Operacional, 2005-08,
- Vol. 25 (2), p. 219-229,
- https://www.scielo.br/j/pope/a/XHswBwRwJyrfL88dmMwYNWp/?lang=en
- """
- @dataclass(order=True)
- class Partition:
- """
- This dataclass represents a partition and stores a dict with the edge
- data and the weight of the minimum spanning tree of the partition dict.
- """
- mst_weight: float
- partition_dict: dict = field(compare=False)
- def __copy__(self):
- return SpanningTreeIterator.Partition(
- self.mst_weight, self.partition_dict.copy()
- )
- def __init__(self, G, weight="weight", minimum=True, ignore_nan=False):
- """
- Initialize the iterator
- Parameters
- ----------
- G : nx.Graph
- The directed graph which we need to iterate trees over
- weight : String, default = "weight"
- The edge attribute used to store the weight of the edge
- minimum : bool, default = True
- Return the trees in increasing order while true and decreasing order
- while false.
- ignore_nan : bool, default = False
- If a NaN is found as an edge weight normally an exception is raised.
- If `ignore_nan is True` then that edge is ignored instead.
- """
- self.G = G.copy()
- self.weight = weight
- self.minimum = minimum
- self.ignore_nan = ignore_nan
- # Randomly create a key for an edge attribute to hold the partition data
- self.partition_key = (
- "SpanningTreeIterators super secret partition attribute name"
- )
- def __iter__(self):
- """
- Returns
- -------
- SpanningTreeIterator
- The iterator object for this graph
- """
- self.partition_queue = PriorityQueue()
- self._clear_partition(self.G)
- mst_weight = partition_spanning_tree(
- self.G, self.minimum, self.weight, self.partition_key, self.ignore_nan
- ).size(weight=self.weight)
- self.partition_queue.put(
- self.Partition(mst_weight if self.minimum else -mst_weight, {})
- )
- return self
- def __next__(self):
- """
- Returns
- -------
- (multi)Graph
- The spanning tree of next greatest weight, which ties broken
- arbitrarily.
- """
- if self.partition_queue.empty():
- del self.G, self.partition_queue
- raise StopIteration
- partition = self.partition_queue.get()
- self._write_partition(partition)
- next_tree = partition_spanning_tree(
- self.G, self.minimum, self.weight, self.partition_key, self.ignore_nan
- )
- self._partition(partition, next_tree)
- self._clear_partition(next_tree)
- return next_tree
- def _partition(self, partition, partition_tree):
- """
- Create new partitions based of the minimum spanning tree of the
- current minimum partition.
- Parameters
- ----------
- partition : Partition
- The Partition instance used to generate the current minimum spanning
- tree.
- partition_tree : nx.Graph
- The minimum spanning tree of the input partition.
- """
- # create two new partitions with the data from the input partition dict
- p1 = self.Partition(0, partition.partition_dict.copy())
- p2 = self.Partition(0, partition.partition_dict.copy())
- for e in partition_tree.edges:
- # determine if the edge was open or included
- if e not in partition.partition_dict:
- # This is an open edge
- p1.partition_dict[e] = EdgePartition.EXCLUDED
- p2.partition_dict[e] = EdgePartition.INCLUDED
- self._write_partition(p1)
- p1_mst = partition_spanning_tree(
- self.G,
- self.minimum,
- self.weight,
- self.partition_key,
- self.ignore_nan,
- )
- p1_mst_weight = p1_mst.size(weight=self.weight)
- if nx.is_connected(p1_mst):
- p1.mst_weight = p1_mst_weight if self.minimum else -p1_mst_weight
- self.partition_queue.put(p1.__copy__())
- p1.partition_dict = p2.partition_dict.copy()
- def _write_partition(self, partition):
- """
- Writes the desired partition into the graph to calculate the minimum
- spanning tree.
- Parameters
- ----------
- partition : Partition
- A Partition dataclass describing a partition on the edges of the
- graph.
- """
- for u, v, d in self.G.edges(data=True):
- if (u, v) in partition.partition_dict:
- d[self.partition_key] = partition.partition_dict[(u, v)]
- else:
- d[self.partition_key] = EdgePartition.OPEN
- def _clear_partition(self, G):
- """
- Removes partition data from the graph
- """
- for u, v, d in G.edges(data=True):
- if self.partition_key in d:
- del d[self.partition_key]
|