123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368 |
- """ Fast approximation for k-component structure
- """
- import itertools
- from collections import defaultdict
- from collections.abc import Mapping
- from functools import cached_property
- import networkx as nx
- from networkx.algorithms.approximation import local_node_connectivity
- from networkx.exception import NetworkXError
- from networkx.utils import not_implemented_for
- __all__ = ["k_components"]
- @not_implemented_for("directed")
- def k_components(G, min_density=0.95):
- r"""Returns the approximate k-component structure of a graph G.
- A `k`-component is a maximal subgraph of a graph G that has, at least,
- node connectivity `k`: we need to remove at least `k` nodes to break it
- into more components. `k`-components have an inherent hierarchical
- structure because they are nested in terms of connectivity: a connected
- graph can contain several 2-components, each of which can contain
- one or more 3-components, and so forth.
- This implementation is based on the fast heuristics to approximate
- the `k`-component structure of a graph [1]_. Which, in turn, it is based on
- a fast approximation algorithm for finding good lower bounds of the number
- of node independent paths between two nodes [2]_.
- Parameters
- ----------
- G : NetworkX graph
- Undirected graph
- min_density : Float
- Density relaxation threshold. Default value 0.95
- Returns
- -------
- k_components : dict
- Dictionary with connectivity level `k` as key and a list of
- sets of nodes that form a k-component of level `k` as values.
- Raises
- ------
- NetworkXNotImplemented
- If G is directed.
- Examples
- --------
- >>> # Petersen graph has 10 nodes and it is triconnected, thus all
- >>> # nodes are in a single component on all three connectivity levels
- >>> from networkx.algorithms import approximation as apxa
- >>> G = nx.petersen_graph()
- >>> k_components = apxa.k_components(G)
- Notes
- -----
- The logic of the approximation algorithm for computing the `k`-component
- structure [1]_ is based on repeatedly applying simple and fast algorithms
- for `k`-cores and biconnected components in order to narrow down the
- number of pairs of nodes over which we have to compute White and Newman's
- approximation algorithm for finding node independent paths [2]_. More
- formally, this algorithm is based on Whitney's theorem, which states
- an inclusion relation among node connectivity, edge connectivity, and
- minimum degree for any graph G. This theorem implies that every
- `k`-component is nested inside a `k`-edge-component, which in turn,
- is contained in a `k`-core. Thus, this algorithm computes node independent
- paths among pairs of nodes in each biconnected part of each `k`-core,
- and repeats this procedure for each `k` from 3 to the maximal core number
- of a node in the input graph.
- Because, in practice, many nodes of the core of level `k` inside a
- bicomponent actually are part of a component of level k, the auxiliary
- graph needed for the algorithm is likely to be very dense. Thus, we use
- a complement graph data structure (see `AntiGraph`) to save memory.
- AntiGraph only stores information of the edges that are *not* present
- in the actual auxiliary graph. When applying algorithms to this
- complement graph data structure, it behaves as if it were the dense
- version.
- See also
- --------
- k_components
- References
- ----------
- .. [1] Torrents, J. and F. Ferraro (2015) Structural Cohesion:
- Visualization and Heuristics for Fast Computation.
- https://arxiv.org/pdf/1503.04476v1
- .. [2] White, Douglas R., and Mark Newman (2001) A Fast Algorithm for
- Node-Independent Paths. Santa Fe Institute Working Paper #01-07-035
- https://www.santafe.edu/research/results/working-papers/fast-approximation-algorithms-for-finding-node-ind
- .. [3] Moody, J. and D. White (2003). Social cohesion and embeddedness:
- A hierarchical conception of social groups.
- American Sociological Review 68(1), 103--28.
- https://doi.org/10.2307/3088904
- """
- # Dictionary with connectivity level (k) as keys and a list of
- # sets of nodes that form a k-component as values
- k_components = defaultdict(list)
- # make a few functions local for speed
- node_connectivity = local_node_connectivity
- k_core = nx.k_core
- core_number = nx.core_number
- biconnected_components = nx.biconnected_components
- combinations = itertools.combinations
- # Exact solution for k = {1,2}
- # There is a linear time algorithm for triconnectivity, if we had an
- # implementation available we could start from k = 4.
- for component in nx.connected_components(G):
- # isolated nodes have connectivity 0
- comp = set(component)
- if len(comp) > 1:
- k_components[1].append(comp)
- for bicomponent in nx.biconnected_components(G):
- # avoid considering dyads as bicomponents
- bicomp = set(bicomponent)
- if len(bicomp) > 2:
- k_components[2].append(bicomp)
- # There is no k-component of k > maximum core number
- # \kappa(G) <= \lambda(G) <= \delta(G)
- g_cnumber = core_number(G)
- max_core = max(g_cnumber.values())
- for k in range(3, max_core + 1):
- C = k_core(G, k, core_number=g_cnumber)
- for nodes in biconnected_components(C):
- # Build a subgraph SG induced by the nodes that are part of
- # each biconnected component of the k-core subgraph C.
- if len(nodes) < k:
- continue
- SG = G.subgraph(nodes)
- # Build auxiliary graph
- H = _AntiGraph()
- H.add_nodes_from(SG.nodes())
- for u, v in combinations(SG, 2):
- K = node_connectivity(SG, u, v, cutoff=k)
- if k > K:
- H.add_edge(u, v)
- for h_nodes in biconnected_components(H):
- if len(h_nodes) <= k:
- continue
- SH = H.subgraph(h_nodes)
- for Gc in _cliques_heuristic(SG, SH, k, min_density):
- for k_nodes in biconnected_components(Gc):
- Gk = nx.k_core(SG.subgraph(k_nodes), k)
- if len(Gk) <= k:
- continue
- k_components[k].append(set(Gk))
- return k_components
- def _cliques_heuristic(G, H, k, min_density):
- h_cnumber = nx.core_number(H)
- for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)):
- cands = {n for n, c in h_cnumber.items() if c == c_value}
- # Skip checking for overlap for the highest core value
- if i == 0:
- overlap = False
- else:
- overlap = set.intersection(
- *[{x for x in H[n] if x not in cands} for n in cands]
- )
- if overlap and len(overlap) < k:
- SH = H.subgraph(cands | overlap)
- else:
- SH = H.subgraph(cands)
- sh_cnumber = nx.core_number(SH)
- SG = nx.k_core(G.subgraph(SH), k)
- while not (_same(sh_cnumber) and nx.density(SH) >= min_density):
- # This subgraph must be writable => .copy()
- SH = H.subgraph(SG).copy()
- if len(SH) <= k:
- break
- sh_cnumber = nx.core_number(SH)
- sh_deg = dict(SH.degree())
- min_deg = min(sh_deg.values())
- SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg)
- SG = nx.k_core(G.subgraph(SH), k)
- else:
- yield SG
- def _same(measure, tol=0):
- vals = set(measure.values())
- if (max(vals) - min(vals)) <= tol:
- return True
- return False
- class _AntiGraph(nx.Graph):
- """
- Class for complement graphs.
- The main goal is to be able to work with big and dense graphs with
- a low memory footprint.
- In this class you add the edges that *do not exist* in the dense graph,
- the report methods of the class return the neighbors, the edges and
- the degree as if it was the dense graph. Thus it's possible to use
- an instance of this class with some of NetworkX functions. In this
- case we only use k-core, connected_components, and biconnected_components.
- """
- all_edge_dict = {"weight": 1}
- def single_edge_dict(self):
- return self.all_edge_dict
- edge_attr_dict_factory = single_edge_dict # type: ignore[assignment]
- def __getitem__(self, n):
- """Returns a dict of neighbors of node n in the dense graph.
- Parameters
- ----------
- n : node
- A node in the graph.
- Returns
- -------
- adj_dict : dictionary
- The adjacency dictionary for nodes connected to n.
- """
- all_edge_dict = self.all_edge_dict
- return {
- node: all_edge_dict for node in set(self._adj) - set(self._adj[n]) - {n}
- }
- def neighbors(self, n):
- """Returns an iterator over all neighbors of node n in the
- dense graph.
- """
- try:
- return iter(set(self._adj) - set(self._adj[n]) - {n})
- except KeyError as err:
- raise NetworkXError(f"The node {n} is not in the graph.") from err
- class AntiAtlasView(Mapping):
- """An adjacency inner dict for AntiGraph"""
- def __init__(self, graph, node):
- self._graph = graph
- self._atlas = graph._adj[node]
- self._node = node
- def __len__(self):
- return len(self._graph) - len(self._atlas) - 1
- def __iter__(self):
- return (n for n in self._graph if n not in self._atlas and n != self._node)
- def __getitem__(self, nbr):
- nbrs = set(self._graph._adj) - set(self._atlas) - {self._node}
- if nbr in nbrs:
- return self._graph.all_edge_dict
- raise KeyError(nbr)
- class AntiAdjacencyView(AntiAtlasView):
- """An adjacency outer dict for AntiGraph"""
- def __init__(self, graph):
- self._graph = graph
- self._atlas = graph._adj
- def __len__(self):
- return len(self._atlas)
- def __iter__(self):
- return iter(self._graph)
- def __getitem__(self, node):
- if node not in self._graph:
- raise KeyError(node)
- return self._graph.AntiAtlasView(self._graph, node)
- @cached_property
- def adj(self):
- return self.AntiAdjacencyView(self)
- def subgraph(self, nodes):
- """This subgraph method returns a full AntiGraph. Not a View"""
- nodes = set(nodes)
- G = _AntiGraph()
- G.add_nodes_from(nodes)
- for n in G:
- Gnbrs = G.adjlist_inner_dict_factory()
- G._adj[n] = Gnbrs
- for nbr, d in self._adj[n].items():
- if nbr in G._adj:
- Gnbrs[nbr] = d
- G._adj[nbr][n] = d
- G.graph = self.graph
- return G
- class AntiDegreeView(nx.reportviews.DegreeView):
- def __iter__(self):
- all_nodes = set(self._succ)
- for n in self._nodes:
- nbrs = all_nodes - set(self._succ[n]) - {n}
- yield (n, len(nbrs))
- def __getitem__(self, n):
- nbrs = set(self._succ) - set(self._succ[n]) - {n}
- # AntiGraph is a ThinGraph so all edges have weight 1
- return len(nbrs) + (n in nbrs)
- @cached_property
- def degree(self):
- """Returns an iterator for (node, degree) and degree for single node.
- The node degree is the number of edges adjacent to the node.
- Parameters
- ----------
- nbunch : iterable container, optional (default=all nodes)
- A container of nodes. The container will be iterated
- through once.
- weight : string or None, optional (default=None)
- The edge attribute that holds the numerical value used
- as a weight. If None, then each edge has weight 1.
- The degree is the sum of the edge weights adjacent to the node.
- Returns
- -------
- deg:
- Degree of the node, if a single node is passed as argument.
- nd_iter : an iterator
- The iterator returns two-tuples of (node, degree).
- See Also
- --------
- degree
- Examples
- --------
- >>> G = nx.path_graph(4)
- >>> G.degree(0) # node 0 with degree 1
- 1
- >>> list(G.degree([0, 1]))
- [(0, 1), (1, 2)]
- """
- return self.AntiDegreeView(self)
- def adjacency(self):
- """Returns an iterator of (node, adjacency set) tuples for all nodes
- in the dense graph.
- This is the fastest way to look at every edge.
- For directed graphs, only outgoing adjacencies are included.
- Returns
- -------
- adj_iter : iterator
- An iterator of (node, adjacency set) for all nodes in
- the graph.
- """
- for n in self._adj:
- yield (n, set(self._adj) - set(self._adj[n]) - {n})
|