123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585 |
- # This module uses material from the Wikipedia article Hopcroft--Karp algorithm
- # <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>, accessed on
- # January 3, 2015, which is released under the Creative Commons
- # Attribution-Share-Alike License 3.0
- # <http://creativecommons.org/licenses/by-sa/3.0/>. That article includes
- # pseudocode, which has been translated into the corresponding Python code.
- #
- # Portions of this module use code from David Eppstein's Python Algorithms and
- # Data Structures (PADS) library, which is dedicated to the public domain (for
- # proof, see <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>).
- """Provides functions for computing maximum cardinality matchings and minimum
- weight full matchings in a bipartite graph.
- If you don't care about the particular implementation of the maximum matching
- algorithm, simply use the :func:`maximum_matching`. If you do care, you can
- import one of the named maximum matching algorithms directly.
- For example, to find a maximum matching in the complete bipartite graph with
- two vertices on the left and three vertices on the right:
- >>> G = nx.complete_bipartite_graph(2, 3)
- >>> left, right = nx.bipartite.sets(G)
- >>> list(left)
- [0, 1]
- >>> list(right)
- [2, 3, 4]
- >>> nx.bipartite.maximum_matching(G)
- {0: 2, 1: 3, 2: 0, 3: 1}
- The dictionary returned by :func:`maximum_matching` includes a mapping for
- vertices in both the left and right vertex sets.
- Similarly, :func:`minimum_weight_full_matching` produces, for a complete
- weighted bipartite graph, a matching whose cardinality is the cardinality of
- the smaller of the two partitions, and for which the sum of the weights of the
- edges included in the matching is minimal.
- """
- import collections
- import itertools
- import networkx as nx
- from networkx.algorithms.bipartite import sets as bipartite_sets
- from networkx.algorithms.bipartite.matrix import biadjacency_matrix
- __all__ = [
- "maximum_matching",
- "hopcroft_karp_matching",
- "eppstein_matching",
- "to_vertex_cover",
- "minimum_weight_full_matching",
- ]
- INFINITY = float("inf")
- def hopcroft_karp_matching(G, top_nodes=None):
- """Returns the maximum cardinality matching of the bipartite graph `G`.
- A matching is a set of edges that do not share any nodes. A maximum
- cardinality matching is a matching with the most edges possible. It
- is not always unique. Finding a matching in a bipartite graph can be
- treated as a networkx flow problem.
- The functions ``hopcroft_karp_matching`` and ``maximum_matching``
- are aliases of the same function.
- Parameters
- ----------
- G : NetworkX graph
- Undirected bipartite graph
- top_nodes : container of nodes
- Container with all nodes in one bipartite node set. If not supplied
- it will be computed. But if more than one solution exists an exception
- will be raised.
- Returns
- -------
- matches : dictionary
- The matching is returned as a dictionary, `matches`, such that
- ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
- nodes do not occur as a key in `matches`.
- Raises
- ------
- AmbiguousSolution
- Raised if the input bipartite graph is disconnected and no container
- with all nodes in one bipartite set is provided. When determining
- the nodes in each bipartite set more than one valid solution is
- possible if the input graph is disconnected.
- Notes
- -----
- This function is implemented with the `Hopcroft--Karp matching algorithm
- <https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm>`_ for
- bipartite graphs.
- See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
- for further details on how bipartite graphs are handled in NetworkX.
- See Also
- --------
- maximum_matching
- hopcroft_karp_matching
- eppstein_matching
- References
- ----------
- .. [1] John E. Hopcroft and Richard M. Karp. "An n^{5 / 2} Algorithm for
- Maximum Matchings in Bipartite Graphs" In: **SIAM Journal of Computing**
- 2.4 (1973), pp. 225--231. <https://doi.org/10.1137/0202019>.
- """
- # First we define some auxiliary search functions.
- #
- # If you are a human reading these auxiliary search functions, the "global"
- # variables `leftmatches`, `rightmatches`, `distances`, etc. are defined
- # below the functions, so that they are initialized close to the initial
- # invocation of the search functions.
- def breadth_first_search():
- for v in left:
- if leftmatches[v] is None:
- distances[v] = 0
- queue.append(v)
- else:
- distances[v] = INFINITY
- distances[None] = INFINITY
- while queue:
- v = queue.popleft()
- if distances[v] < distances[None]:
- for u in G[v]:
- if distances[rightmatches[u]] is INFINITY:
- distances[rightmatches[u]] = distances[v] + 1
- queue.append(rightmatches[u])
- return distances[None] is not INFINITY
- def depth_first_search(v):
- if v is not None:
- for u in G[v]:
- if distances[rightmatches[u]] == distances[v] + 1:
- if depth_first_search(rightmatches[u]):
- rightmatches[u] = v
- leftmatches[v] = u
- return True
- distances[v] = INFINITY
- return False
- return True
- # Initialize the "global" variables that maintain state during the search.
- left, right = bipartite_sets(G, top_nodes)
- leftmatches = {v: None for v in left}
- rightmatches = {v: None for v in right}
- distances = {}
- queue = collections.deque()
- # Implementation note: this counter is incremented as pairs are matched but
- # it is currently not used elsewhere in the computation.
- num_matched_pairs = 0
- while breadth_first_search():
- for v in left:
- if leftmatches[v] is None:
- if depth_first_search(v):
- num_matched_pairs += 1
- # Strip the entries matched to `None`.
- leftmatches = {k: v for k, v in leftmatches.items() if v is not None}
- rightmatches = {k: v for k, v in rightmatches.items() if v is not None}
- # At this point, the left matches and the right matches are inverses of one
- # another. In other words,
- #
- # leftmatches == {v, k for k, v in rightmatches.items()}
- #
- # Finally, we combine both the left matches and right matches.
- return dict(itertools.chain(leftmatches.items(), rightmatches.items()))
- def eppstein_matching(G, top_nodes=None):
- """Returns the maximum cardinality matching of the bipartite graph `G`.
- Parameters
- ----------
- G : NetworkX graph
- Undirected bipartite graph
- top_nodes : container
- Container with all nodes in one bipartite node set. If not supplied
- it will be computed. But if more than one solution exists an exception
- will be raised.
- Returns
- -------
- matches : dictionary
- The matching is returned as a dictionary, `matching`, such that
- ``matching[v] == w`` if node `v` is matched to node `w`. Unmatched
- nodes do not occur as a key in `matching`.
- Raises
- ------
- AmbiguousSolution
- Raised if the input bipartite graph is disconnected and no container
- with all nodes in one bipartite set is provided. When determining
- the nodes in each bipartite set more than one valid solution is
- possible if the input graph is disconnected.
- Notes
- -----
- This function is implemented with David Eppstein's version of the algorithm
- Hopcroft--Karp algorithm (see :func:`hopcroft_karp_matching`), which
- originally appeared in the `Python Algorithms and Data Structures library
- (PADS) <http://www.ics.uci.edu/~eppstein/PADS/ABOUT-PADS.txt>`_.
- See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
- for further details on how bipartite graphs are handled in NetworkX.
- See Also
- --------
- hopcroft_karp_matching
- """
- # Due to its original implementation, a directed graph is needed
- # so that the two sets of bipartite nodes can be distinguished
- left, right = bipartite_sets(G, top_nodes)
- G = nx.DiGraph(G.edges(left))
- # initialize greedy matching (redundant, but faster than full search)
- matching = {}
- for u in G:
- for v in G[u]:
- if v not in matching:
- matching[v] = u
- break
- while True:
- # structure residual graph into layers
- # pred[u] gives the neighbor in the previous layer for u in U
- # preds[v] gives a list of neighbors in the previous layer for v in V
- # unmatched gives a list of unmatched vertices in final layer of V,
- # and is also used as a flag value for pred[u] when u is in the first
- # layer
- preds = {}
- unmatched = []
- pred = {u: unmatched for u in G}
- for v in matching:
- del pred[matching[v]]
- layer = list(pred)
- # repeatedly extend layering structure by another pair of layers
- while layer and not unmatched:
- newLayer = {}
- for u in layer:
- for v in G[u]:
- if v not in preds:
- newLayer.setdefault(v, []).append(u)
- layer = []
- for v in newLayer:
- preds[v] = newLayer[v]
- if v in matching:
- layer.append(matching[v])
- pred[matching[v]] = v
- else:
- unmatched.append(v)
- # did we finish layering without finding any alternating paths?
- if not unmatched:
- # TODO - The lines between --- were unused and were thus commented
- # out. This whole commented chunk should be reviewed to determine
- # whether it should be built upon or completely removed.
- # ---
- # unlayered = {}
- # for u in G:
- # # TODO Why is extra inner loop necessary?
- # for v in G[u]:
- # if v not in preds:
- # unlayered[v] = None
- # ---
- # TODO Originally, this function returned a three-tuple:
- #
- # return (matching, list(pred), list(unlayered))
- #
- # For some reason, the documentation for this function
- # indicated that the second and third elements of the returned
- # three-tuple would be the vertices in the left and right vertex
- # sets, respectively, that are also in the maximum independent set.
- # However, what I think the author meant was that the second
- # element is the list of vertices that were unmatched and the third
- # element was the list of vertices that were matched. Since that
- # seems to be the case, they don't really need to be returned,
- # since that information can be inferred from the matching
- # dictionary.
- # All the matched nodes must be a key in the dictionary
- for key in matching.copy():
- matching[matching[key]] = key
- return matching
- # recursively search backward through layers to find alternating paths
- # recursion returns true if found path, false otherwise
- def recurse(v):
- if v in preds:
- L = preds.pop(v)
- for u in L:
- if u in pred:
- pu = pred.pop(u)
- if pu is unmatched or recurse(pu):
- matching[v] = u
- return True
- return False
- for v in unmatched:
- recurse(v)
- def _is_connected_by_alternating_path(G, v, matched_edges, unmatched_edges, targets):
- """Returns True if and only if the vertex `v` is connected to one of
- the target vertices by an alternating path in `G`.
- An *alternating path* is a path in which every other edge is in the
- specified maximum matching (and the remaining edges in the path are not in
- the matching). An alternating path may have matched edges in the even
- positions or in the odd positions, as long as the edges alternate between
- 'matched' and 'unmatched'.
- `G` is an undirected bipartite NetworkX graph.
- `v` is a vertex in `G`.
- `matched_edges` is a set of edges present in a maximum matching in `G`.
- `unmatched_edges` is a set of edges not present in a maximum
- matching in `G`.
- `targets` is a set of vertices.
- """
- def _alternating_dfs(u, along_matched=True):
- """Returns True if and only if `u` is connected to one of the
- targets by an alternating path.
- `u` is a vertex in the graph `G`.
- If `along_matched` is True, this step of the depth-first search
- will continue only through edges in the given matching. Otherwise, it
- will continue only through edges *not* in the given matching.
- """
- visited = set()
- # Follow matched edges when depth is even,
- # and follow unmatched edges when depth is odd.
- initial_depth = 0 if along_matched else 1
- stack = [(u, iter(G[u]), initial_depth)]
- while stack:
- parent, children, depth = stack[-1]
- valid_edges = matched_edges if depth % 2 else unmatched_edges
- try:
- child = next(children)
- if child not in visited:
- if (parent, child) in valid_edges or (child, parent) in valid_edges:
- if child in targets:
- return True
- visited.add(child)
- stack.append((child, iter(G[child]), depth + 1))
- except StopIteration:
- stack.pop()
- return False
- # Check for alternating paths starting with edges in the matching, then
- # check for alternating paths starting with edges not in the
- # matching.
- return _alternating_dfs(v, along_matched=True) or _alternating_dfs(
- v, along_matched=False
- )
- def _connected_by_alternating_paths(G, matching, targets):
- """Returns the set of vertices that are connected to one of the target
- vertices by an alternating path in `G` or are themselves a target.
- An *alternating path* is a path in which every other edge is in the
- specified maximum matching (and the remaining edges in the path are not in
- the matching). An alternating path may have matched edges in the even
- positions or in the odd positions, as long as the edges alternate between
- 'matched' and 'unmatched'.
- `G` is an undirected bipartite NetworkX graph.
- `matching` is a dictionary representing a maximum matching in `G`, as
- returned by, for example, :func:`maximum_matching`.
- `targets` is a set of vertices.
- """
- # Get the set of matched edges and the set of unmatched edges. Only include
- # one version of each undirected edge (for example, include edge (1, 2) but
- # not edge (2, 1)). Using frozensets as an intermediary step we do not
- # require nodes to be orderable.
- edge_sets = {frozenset((u, v)) for u, v in matching.items()}
- matched_edges = {tuple(edge) for edge in edge_sets}
- unmatched_edges = {
- (u, v) for (u, v) in G.edges() if frozenset((u, v)) not in edge_sets
- }
- return {
- v
- for v in G
- if v in targets
- or _is_connected_by_alternating_path(
- G, v, matched_edges, unmatched_edges, targets
- )
- }
- def to_vertex_cover(G, matching, top_nodes=None):
- """Returns the minimum vertex cover corresponding to the given maximum
- matching of the bipartite graph `G`.
- Parameters
- ----------
- G : NetworkX graph
- Undirected bipartite graph
- matching : dictionary
- A dictionary whose keys are vertices in `G` and whose values are the
- distinct neighbors comprising the maximum matching for `G`, as returned
- by, for example, :func:`maximum_matching`. The dictionary *must*
- represent the maximum matching.
- top_nodes : container
- Container with all nodes in one bipartite node set. If not supplied
- it will be computed. But if more than one solution exists an exception
- will be raised.
- Returns
- -------
- vertex_cover : :class:`set`
- The minimum vertex cover in `G`.
- Raises
- ------
- AmbiguousSolution
- Raised if the input bipartite graph is disconnected and no container
- with all nodes in one bipartite set is provided. When determining
- the nodes in each bipartite set more than one valid solution is
- possible if the input graph is disconnected.
- Notes
- -----
- This function is implemented using the procedure guaranteed by `Konig's
- theorem
- <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29>`_,
- which proves an equivalence between a maximum matching and a minimum vertex
- cover in bipartite graphs.
- Since a minimum vertex cover is the complement of a maximum independent set
- for any graph, one can compute the maximum independent set of a bipartite
- graph this way:
- >>> G = nx.complete_bipartite_graph(2, 3)
- >>> matching = nx.bipartite.maximum_matching(G)
- >>> vertex_cover = nx.bipartite.to_vertex_cover(G, matching)
- >>> independent_set = set(G) - vertex_cover
- >>> print(list(independent_set))
- [2, 3, 4]
- See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
- for further details on how bipartite graphs are handled in NetworkX.
- """
- # This is a Python implementation of the algorithm described at
- # <https://en.wikipedia.org/wiki/K%C3%B6nig%27s_theorem_%28graph_theory%29#Proof>.
- L, R = bipartite_sets(G, top_nodes)
- # Let U be the set of unmatched vertices in the left vertex set.
- unmatched_vertices = set(G) - set(matching)
- U = unmatched_vertices & L
- # Let Z be the set of vertices that are either in U or are connected to U
- # by alternating paths.
- Z = _connected_by_alternating_paths(G, matching, U)
- # At this point, every edge either has a right endpoint in Z or a left
- # endpoint not in Z. This gives us the vertex cover.
- return (L - Z) | (R & Z)
- #: Returns the maximum cardinality matching in the given bipartite graph.
- #:
- #: This function is simply an alias for :func:`hopcroft_karp_matching`.
- maximum_matching = hopcroft_karp_matching
- def minimum_weight_full_matching(G, top_nodes=None, weight="weight"):
- r"""Returns a minimum weight full matching of the bipartite graph `G`.
- Let :math:`G = ((U, V), E)` be a weighted bipartite graph with real weights
- :math:`w : E \to \mathbb{R}`. This function then produces a matching
- :math:`M \subseteq E` with cardinality
- .. math::
- \lvert M \rvert = \min(\lvert U \rvert, \lvert V \rvert),
- which minimizes the sum of the weights of the edges included in the
- matching, :math:`\sum_{e \in M} w(e)`, or raises an error if no such
- matching exists.
- When :math:`\lvert U \rvert = \lvert V \rvert`, this is commonly
- referred to as a perfect matching; here, since we allow
- :math:`\lvert U \rvert` and :math:`\lvert V \rvert` to differ, we
- follow Karp [1]_ and refer to the matching as *full*.
- Parameters
- ----------
- G : NetworkX graph
- Undirected bipartite graph
- top_nodes : container
- Container with all nodes in one bipartite node set. If not supplied
- it will be computed.
- weight : string, optional (default='weight')
- The edge data key used to provide each value in the matrix.
- Returns
- -------
- matches : dictionary
- The matching is returned as a dictionary, `matches`, such that
- ``matches[v] == w`` if node `v` is matched to node `w`. Unmatched
- nodes do not occur as a key in `matches`.
- Raises
- ------
- ValueError
- Raised if no full matching exists.
- ImportError
- Raised if SciPy is not available.
- Notes
- -----
- The problem of determining a minimum weight full matching is also known as
- the rectangular linear assignment problem. This implementation defers the
- calculation of the assignment to SciPy.
- References
- ----------
- .. [1] Richard Manning Karp:
- An algorithm to Solve the m x n Assignment Problem in Expected Time
- O(mn log n).
- Networks, 10(2):143–152, 1980.
- """
- import numpy as np
- import scipy as sp
- import scipy.optimize # call as sp.optimize
- left, right = nx.bipartite.sets(G, top_nodes)
- U = list(left)
- V = list(right)
- # We explicitly create the biadjancency matrix having infinities
- # where edges are missing (as opposed to zeros, which is what one would
- # get by using toarray on the sparse matrix).
- weights_sparse = biadjacency_matrix(
- G, row_order=U, column_order=V, weight=weight, format="coo"
- )
- weights = np.full(weights_sparse.shape, np.inf)
- weights[weights_sparse.row, weights_sparse.col] = weights_sparse.data
- left_matches = sp.optimize.linear_sum_assignment(weights)
- d = {U[u]: V[v] for u, v in zip(*left_matches)}
- # d will contain the matching from edges in left to right; we need to
- # add the ones from right to left as well.
- d.update({v: u for u, v in d.items()})
- return d
|