123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341 |
- """Functions for measuring the quality of a partition (into
- communities).
- """
- from itertools import combinations
- import networkx as nx
- from networkx import NetworkXError
- from networkx.algorithms.community.community_utils import is_partition
- from networkx.utils import not_implemented_for
- from networkx.utils.decorators import argmap
- __all__ = ["modularity", "partition_quality"]
- class NotAPartition(NetworkXError):
- """Raised if a given collection is not a partition."""
- def __init__(self, G, collection):
- msg = f"{collection} is not a valid partition of the graph {G}"
- super().__init__(msg)
- def _require_partition(G, partition):
- """Decorator to check that a valid partition is input to a function
- Raises :exc:`networkx.NetworkXError` if the partition is not valid.
- This decorator should be used on functions whose first two arguments
- are a graph and a partition of the nodes of that graph (in that
- order)::
- >>> @require_partition
- ... def foo(G, partition):
- ... print("partition is valid!")
- ...
- >>> G = nx.complete_graph(5)
- >>> partition = [{0, 1}, {2, 3}, {4}]
- >>> foo(G, partition)
- partition is valid!
- >>> partition = [{0}, {2, 3}, {4}]
- >>> foo(G, partition)
- Traceback (most recent call last):
- ...
- networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
- >>> partition = [{0, 1}, {1, 2, 3}, {4}]
- >>> foo(G, partition)
- Traceback (most recent call last):
- ...
- networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
- """
- if is_partition(G, partition):
- return G, partition
- raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G")
- require_partition = argmap(_require_partition, (0, 1))
- @nx._dispatch
- def intra_community_edges(G, partition):
- """Returns the number of intra-community edges for a partition of `G`.
- Parameters
- ----------
- G : NetworkX graph.
- partition : iterable of sets of nodes
- This must be a partition of the nodes of `G`.
- The "intra-community edges" are those edges joining a pair of nodes
- in the same block of the partition.
- """
- return sum(G.subgraph(block).size() for block in partition)
- @nx._dispatch
- def inter_community_edges(G, partition):
- """Returns the number of inter-community edges for a partition of `G`.
- according to the given
- partition of the nodes of `G`.
- Parameters
- ----------
- G : NetworkX graph.
- partition : iterable of sets of nodes
- This must be a partition of the nodes of `G`.
- The *inter-community edges* are those edges joining a pair of nodes
- in different blocks of the partition.
- Implementation note: this function creates an intermediate graph
- that may require the same amount of memory as that of `G`.
- """
-
-
-
-
-
-
-
-
- MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph
- return nx.quotient_graph(G, partition, create_using=MG).size()
- def inter_community_non_edges(G, partition):
- """Returns the number of inter-community non-edges according to the
- given partition of the nodes of `G`.
- Parameters
- ----------
- G : NetworkX graph.
- partition : iterable of sets of nodes
- This must be a partition of the nodes of `G`.
- A *non-edge* is a pair of nodes (undirected if `G` is undirected)
- that are not adjacent in `G`. The *inter-community non-edges* are
- those non-edges on a pair of nodes in different blocks of the
- partition.
- Implementation note: this function creates two intermediate graphs,
- which may require up to twice the amount of memory as required to
- store `G`.
- """
-
-
-
-
-
-
-
-
- return inter_community_edges(nx.complement(G), partition)
- def modularity(G, communities, weight="weight", resolution=1):
- r"""Returns the modularity of the given partition of the graph.
- Modularity is defined in [1]_ as
- .. math::
- Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right)
- \delta(c_i,c_j)
- where $m$ is the number of edges, $A$ is the adjacency matrix of `G`,
- $k_i$ is the degree of $i$, $\gamma$ is the resolution parameter,
- and $\delta(c_i, c_j)$ is 1 if $i$ and $j$ are in the same community else 0.
- According to [2]_ (and verified by some algebra) this can be reduced to
- .. math::
- Q = \sum_{c=1}^{n}
- \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right]
- where the sum iterates over all communities $c$, $m$ is the number of edges,
- $L_c$ is the number of intra-community links for community $c$,
- $k_c$ is the sum of degrees of the nodes in community $c$,
- and $\gamma$ is the resolution parameter.
- The resolution parameter sets an arbitrary tradeoff between intra-group
- edges and inter-group edges. More complex grouping patterns can be
- discovered by analyzing the same network with multiple values of gamma
- and then combining the results [3]_. That said, it is very common to
- simply use gamma=1. More on the choice of gamma is in [4]_.
- The second formula is the one actually used in calculation of the modularity.
- For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$.
- Parameters
- ----------
- G : NetworkX Graph
- communities : list or iterable of set of nodes
- These node sets must represent a partition of G's nodes.
- weight : string or None, optional (default="weight")
- The edge attribute that holds the numerical value used
- as a weight. If None or an edge does not have that attribute,
- then that edge has weight 1.
- resolution : float (default=1)
- If resolution is less than 1, modularity favors larger communities.
- Greater than 1 favors smaller communities.
- Returns
- -------
- Q : float
- The modularity of the partition.
- Raises
- ------
- NotAPartition
- If `communities` is not a partition of the nodes of `G`.
- Examples
- --------
- >>> G = nx.barbell_graph(3, 0)
- >>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}])
- 0.35714285714285715
- >>> nx.community.modularity(G, nx.community.label_propagation_communities(G))
- 0.35714285714285715
- References
- ----------
- .. [1] M. E. J. Newman "Networks: An Introduction", page 224.
- Oxford University Press, 2011.
- .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
- "Finding community structure in very large networks."
- Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
- .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection"
- Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110
- .. [4] M. E. J. Newman, "Equivalence between modularity optimization and
- maximum likelihood methods for community detection"
- Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315
- """
- if not isinstance(communities, list):
- communities = list(communities)
- if not is_partition(G, communities):
- raise NotAPartition(G, communities)
- directed = G.is_directed()
- if directed:
- out_degree = dict(G.out_degree(weight=weight))
- in_degree = dict(G.in_degree(weight=weight))
- m = sum(out_degree.values())
- norm = 1 / m**2
- else:
- out_degree = in_degree = dict(G.degree(weight=weight))
- deg_sum = sum(out_degree.values())
- m = deg_sum / 2
- norm = 1 / deg_sum**2
- def community_contribution(community):
- comm = set(community)
- L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)
- out_degree_sum = sum(out_degree[u] for u in comm)
- in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum
- return L_c / m - resolution * out_degree_sum * in_degree_sum * norm
- return sum(map(community_contribution, communities))
- @require_partition
- def partition_quality(G, partition):
- """Returns the coverage and performance of a partition of G.
- The *coverage* of a partition is the ratio of the number of
- intra-community edges to the total number of edges in the graph.
- The *performance* of a partition is the number of
- intra-community edges plus inter-community non-edges divided by the total
- number of potential edges.
- This algorithm has complexity $O(C^2 + L)$ where C is the number of communities and L is the number of links.
- Parameters
- ----------
- G : NetworkX graph
- partition : sequence
- Partition of the nodes of `G`, represented as a sequence of
- sets of nodes (blocks). Each block of the partition represents a
- community.
- Returns
- -------
- (float, float)
- The (coverage, performance) tuple of the partition, as defined above.
- Raises
- ------
- NetworkXError
- If `partition` is not a valid partition of the nodes of `G`.
- Notes
- -----
- If `G` is a multigraph;
- - for coverage, the multiplicity of edges is counted
- - for performance, the result is -1 (total number of possible edges is not defined)
- References
- ----------
- .. [1] Santo Fortunato.
- "Community Detection in Graphs".
- *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174
- <https://arxiv.org/abs/0906.0612>
- """
- node_community = {}
- for i, community in enumerate(partition):
- for node in community:
- node_community[node] = i
-
- if not G.is_multigraph():
-
- possible_inter_community_edges = sum(
- len(p1) * len(p2) for p1, p2 in combinations(partition, 2)
- )
- if G.is_directed():
- possible_inter_community_edges *= 2
- else:
- possible_inter_community_edges = 0
-
-
- n = len(G)
- total_pairs = n * (n - 1)
- if not G.is_directed():
- total_pairs //= 2
- intra_community_edges = 0
- inter_community_non_edges = possible_inter_community_edges
-
- for e in G.edges():
- if node_community[e[0]] == node_community[e[1]]:
- intra_community_edges += 1
- else:
- inter_community_non_edges -= 1
- coverage = intra_community_edges / len(G.edges)
- if G.is_multigraph():
- performance = -1.0
- else:
- performance = (intra_community_edges + inter_community_non_edges) / total_pairs
- return coverage, performance
|