123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- """Functions for computing clustering of pairs
- """
- import itertools
- import networkx as nx
- __all__ = [
- "clustering",
- "average_clustering",
- "latapy_clustering",
- "robins_alexander_clustering",
- ]
- def cc_dot(nu, nv):
- return len(nu & nv) / len(nu | nv)
- def cc_max(nu, nv):
- return len(nu & nv) / max(len(nu), len(nv))
- def cc_min(nu, nv):
- return len(nu & nv) / min(len(nu), len(nv))
- modes = {"dot": cc_dot, "min": cc_min, "max": cc_max}
- def latapy_clustering(G, nodes=None, mode="dot"):
- r"""Compute a bipartite clustering coefficient for nodes.
- The bipartie clustering coefficient is a measure of local density
- of connections defined as [1]_:
- .. math::
- c_u = \frac{\sum_{v \in N(N(u))} c_{uv} }{|N(N(u))|}
- where `N(N(u))` are the second order neighbors of `u` in `G` excluding `u`,
- and `c_{uv}` is the pairwise clustering coefficient between nodes
- `u` and `v`.
- The mode selects the function for `c_{uv}` which can be:
- `dot`:
- .. math::
- c_{uv}=\frac{|N(u)\cap N(v)|}{|N(u) \cup N(v)|}
- `min`:
- .. math::
- c_{uv}=\frac{|N(u)\cap N(v)|}{min(|N(u)|,|N(v)|)}
- `max`:
- .. math::
- c_{uv}=\frac{|N(u)\cap N(v)|}{max(|N(u)|,|N(v)|)}
- Parameters
- ----------
- G : graph
- A bipartite graph
- nodes : list or iterable (optional)
- Compute bipartite clustering for these nodes. The default
- is all nodes in G.
- mode : string
- The pariwise bipartite clustering method to be used in the computation.
- It must be "dot", "max", or "min".
- Returns
- -------
- clustering : dictionary
- A dictionary keyed by node with the clustering coefficient value.
- Examples
- --------
- >>> from networkx.algorithms import bipartite
- >>> G = nx.path_graph(4) # path graphs are bipartite
- >>> c = bipartite.clustering(G)
- >>> c[0]
- 0.5
- >>> c = bipartite.clustering(G, mode="min")
- >>> c[0]
- 1.0
- See Also
- --------
- robins_alexander_clustering
- average_clustering
- networkx.algorithms.cluster.square_clustering
- References
- ----------
- .. [1] Latapy, Matthieu, Clémence Magnien, and Nathalie Del Vecchio (2008).
- Basic notions for the analysis of large two-mode networks.
- Social Networks 30(1), 31--48.
- """
- if not nx.algorithms.bipartite.is_bipartite(G):
- raise nx.NetworkXError("Graph is not bipartite")
- try:
- cc_func = modes[mode]
- except KeyError as err:
- raise nx.NetworkXError(
- "Mode for bipartite clustering must be: dot, min or max"
- ) from err
- if nodes is None:
- nodes = G
- ccs = {}
- for v in nodes:
- cc = 0.0
- nbrs2 = {u for nbr in G[v] for u in G[nbr]} - {v}
- for u in nbrs2:
- cc += cc_func(set(G[u]), set(G[v]))
- if cc > 0.0: # len(nbrs2)>0
- cc /= len(nbrs2)
- ccs[v] = cc
- return ccs
- clustering = latapy_clustering
- def average_clustering(G, nodes=None, mode="dot"):
- r"""Compute the average bipartite clustering coefficient.
- A clustering coefficient for the whole graph is the average,
- .. math::
- C = \frac{1}{n}\sum_{v \in G} c_v,
- where `n` is the number of nodes in `G`.
- Similar measures for the two bipartite sets can be defined [1]_
- .. math::
- C_X = \frac{1}{|X|}\sum_{v \in X} c_v,
- where `X` is a bipartite set of `G`.
- Parameters
- ----------
- G : graph
- a bipartite graph
- nodes : list or iterable, optional
- A container of nodes to use in computing the average.
- The nodes should be either the entire graph (the default) or one of the
- bipartite sets.
- mode : string
- The pariwise bipartite clustering method.
- It must be "dot", "max", or "min"
- Returns
- -------
- clustering : float
- The average bipartite clustering for the given set of nodes or the
- entire graph if no nodes are specified.
- Examples
- --------
- >>> from networkx.algorithms import bipartite
- >>> G = nx.star_graph(3) # star graphs are bipartite
- >>> bipartite.average_clustering(G)
- 0.75
- >>> X, Y = bipartite.sets(G)
- >>> bipartite.average_clustering(G, X)
- 0.0
- >>> bipartite.average_clustering(G, Y)
- 1.0
- See Also
- --------
- clustering
- Notes
- -----
- The container of nodes passed to this function must contain all of the nodes
- in one of the bipartite sets ("top" or "bottom") in order to compute
- the correct average bipartite clustering coefficients.
- See :mod:`bipartite documentation <networkx.algorithms.bipartite>`
- for further details on how bipartite graphs are handled in NetworkX.
- References
- ----------
- .. [1] Latapy, Matthieu, Clémence Magnien, and Nathalie Del Vecchio (2008).
- Basic notions for the analysis of large two-mode networks.
- Social Networks 30(1), 31--48.
- """
- if nodes is None:
- nodes = G
- ccs = latapy_clustering(G, nodes=nodes, mode=mode)
- return sum(ccs[v] for v in nodes) / len(nodes)
- def robins_alexander_clustering(G):
- r"""Compute the bipartite clustering of G.
- Robins and Alexander [1]_ defined bipartite clustering coefficient as
- four times the number of four cycles `C_4` divided by the number of
- three paths `L_3` in a bipartite graph:
- .. math::
- CC_4 = \frac{4 * C_4}{L_3}
- Parameters
- ----------
- G : graph
- a bipartite graph
- Returns
- -------
- clustering : float
- The Robins and Alexander bipartite clustering for the input graph.
- Examples
- --------
- >>> from networkx.algorithms import bipartite
- >>> G = nx.davis_southern_women_graph()
- >>> print(round(bipartite.robins_alexander_clustering(G), 3))
- 0.468
- See Also
- --------
- latapy_clustering
- networkx.algorithms.cluster.square_clustering
- References
- ----------
- .. [1] Robins, G. and M. Alexander (2004). Small worlds among interlocking
- directors: Network structure and distance in bipartite graphs.
- Computational & Mathematical Organization Theory 10(1), 69–94.
- """
- if G.order() < 4 or G.size() < 3:
- return 0
- L_3 = _threepaths(G)
- if L_3 == 0:
- return 0
- C_4 = _four_cycles(G)
- return (4.0 * C_4) / L_3
- def _four_cycles(G):
- cycles = 0
- for v in G:
- for u, w in itertools.combinations(G[v], 2):
- cycles += len((set(G[u]) & set(G[w])) - {v})
- return cycles / 4
- def _threepaths(G):
- paths = 0
- for v in G:
- for u in G[v]:
- for w in set(G[u]) - {v}:
- paths += len(set(G[w]) - {v, u})
- # Divide by two because we count each three path twice
- # one for each possible starting point
- return paths / 2
|