123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- """
- Moody and White algorithm for k-components
- """
- from collections import defaultdict
- from itertools import combinations
- from operator import itemgetter
- import networkx as nx
- # Define the default maximum flow function.
- from networkx.algorithms.flow import edmonds_karp
- from networkx.utils import not_implemented_for
- default_flow_func = edmonds_karp
- __all__ = ["k_components"]
- @not_implemented_for("directed")
- def k_components(G, flow_func=None):
- r"""Returns the k-component structure of a graph G.
- A `k`-component is a maximal subgraph of a graph G that has, at least,
- node connectivity `k`: we need to remove at least `k` nodes to break it
- into more components. `k`-components have an inherent hierarchical
- structure because they are nested in terms of connectivity: a connected
- graph can contain several 2-components, each of which can contain
- one or more 3-components, and so forth.
- Parameters
- ----------
- G : NetworkX graph
- flow_func : function
- Function to perform the underlying flow computations. Default value
- :meth:`edmonds_karp`. This function performs better in sparse graphs with
- right tailed degree distributions. :meth:`shortest_augmenting_path` will
- perform better in denser graphs.
- Returns
- -------
- k_components : dict
- Dictionary with all connectivity levels `k` in the input Graph as keys
- and a list of sets of nodes that form a k-component of level `k` as
- values.
- Raises
- ------
- NetworkXNotImplemented
- If the input graph is directed.
- Examples
- --------
- >>> # Petersen graph has 10 nodes and it is triconnected, thus all
- >>> # nodes are in a single component on all three connectivity levels
- >>> G = nx.petersen_graph()
- >>> k_components = nx.k_components(G)
- Notes
- -----
- Moody and White [1]_ (appendix A) provide an algorithm for identifying
- k-components in a graph, which is based on Kanevsky's algorithm [2]_
- for finding all minimum-size node cut-sets of a graph (implemented in
- :meth:`all_node_cuts` function):
- 1. Compute node connectivity, k, of the input graph G.
- 2. Identify all k-cutsets at the current level of connectivity using
- Kanevsky's algorithm.
- 3. Generate new graph components based on the removal of
- these cutsets. Nodes in a cutset belong to both sides
- of the induced cut.
- 4. If the graph is neither complete nor trivial, return to 1;
- else end.
- This implementation also uses some heuristics (see [3]_ for details)
- to speed up the computation.
- See also
- --------
- node_connectivity
- all_node_cuts
- biconnected_components : special case of this function when k=2
- k_edge_components : similar to this function, but uses edge-connectivity
- instead of node-connectivity
- References
- ----------
- .. [1] Moody, J. and D. White (2003). Social cohesion and embeddedness:
- A hierarchical conception of social groups.
- American Sociological Review 68(1), 103--28.
- http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf
- .. [2] Kanevsky, A. (1993). Finding all minimum-size separating vertex
- sets in a graph. Networks 23(6), 533--541.
- http://onlinelibrary.wiley.com/doi/10.1002/net.3230230604/abstract
- .. [3] Torrents, J. and F. Ferraro (2015). Structural Cohesion:
- Visualization and Heuristics for Fast Computation.
- https://arxiv.org/pdf/1503.04476v1
- """
- # Dictionary with connectivity level (k) as keys and a list of
- # sets of nodes that form a k-component as values. Note that
- # k-components can overlap (but only k - 1 nodes).
- k_components = defaultdict(list)
- # Define default flow function
- if flow_func is None:
- flow_func = default_flow_func
- # Bicomponents as a base to check for higher order k-components
- for component in nx.connected_components(G):
- # isolated nodes have connectivity 0
- comp = set(component)
- if len(comp) > 1:
- k_components[1].append(comp)
- bicomponents = [G.subgraph(c) for c in nx.biconnected_components(G)]
- for bicomponent in bicomponents:
- bicomp = set(bicomponent)
- # avoid considering dyads as bicomponents
- if len(bicomp) > 2:
- k_components[2].append(bicomp)
- for B in bicomponents:
- if len(B) <= 2:
- continue
- k = nx.node_connectivity(B, flow_func=flow_func)
- if k > 2:
- k_components[k].append(set(B))
- # Perform cuts in a DFS like order.
- cuts = list(nx.all_node_cuts(B, k=k, flow_func=flow_func))
- stack = [(k, _generate_partition(B, cuts, k))]
- while stack:
- (parent_k, partition) = stack[-1]
- try:
- nodes = next(partition)
- C = B.subgraph(nodes)
- this_k = nx.node_connectivity(C, flow_func=flow_func)
- if this_k > parent_k and this_k > 2:
- k_components[this_k].append(set(C))
- cuts = list(nx.all_node_cuts(C, k=this_k, flow_func=flow_func))
- if cuts:
- stack.append((this_k, _generate_partition(C, cuts, this_k)))
- except StopIteration:
- stack.pop()
- # This is necessary because k-components may only be reported at their
- # maximum k level. But we want to return a dictionary in which keys are
- # connectivity levels and values list of sets of components, without
- # skipping any connectivity level. Also, it's possible that subsets of
- # an already detected k-component appear at a level k. Checking for this
- # in the while loop above penalizes the common case. Thus we also have to
- # _consolidate all connectivity levels in _reconstruct_k_components.
- return _reconstruct_k_components(k_components)
- def _consolidate(sets, k):
- """Merge sets that share k or more elements.
- See: http://rosettacode.org/wiki/Set_consolidation
- The iterative python implementation posted there is
- faster than this because of the overhead of building a
- Graph and calling nx.connected_components, but it's not
- clear for us if we can use it in NetworkX because there
- is no licence for the code.
- """
- G = nx.Graph()
- nodes = dict(enumerate(sets))
- G.add_nodes_from(nodes)
- G.add_edges_from(
- (u, v) for u, v in combinations(nodes, 2) if len(nodes[u] & nodes[v]) >= k
- )
- for component in nx.connected_components(G):
- yield set.union(*[nodes[n] for n in component])
- def _generate_partition(G, cuts, k):
- def has_nbrs_in_partition(G, node, partition):
- return any(n in partition for n in G[node])
- components = []
- nodes = {n for n, d in G.degree() if d > k} - {n for cut in cuts for n in cut}
- H = G.subgraph(nodes)
- for cc in nx.connected_components(H):
- component = set(cc)
- for cut in cuts:
- for node in cut:
- if has_nbrs_in_partition(G, node, cc):
- component.add(node)
- if len(component) < G.order():
- components.append(component)
- yield from _consolidate(components, k + 1)
- def _reconstruct_k_components(k_comps):
- result = {}
- max_k = max(k_comps)
- for k in reversed(range(1, max_k + 1)):
- if k == max_k:
- result[k] = list(_consolidate(k_comps[k], k))
- elif k not in k_comps:
- result[k] = list(_consolidate(result[k + 1], k))
- else:
- nodes_at_k = set.union(*k_comps[k])
- to_add = [c for c in result[k + 1] if any(n not in nodes_at_k for n in c)]
- if to_add:
- result[k] = list(_consolidate(k_comps[k] + to_add, k))
- else:
- result[k] = list(_consolidate(k_comps[k], k))
- return result
- def build_k_number_dict(kcomps):
- result = {}
- for k, comps in sorted(kcomps.items(), key=itemgetter(0)):
- for comp in comps:
- for node in comp:
- result[node] = k
- return result
|