123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350 |
- """Function for detecting communities based on Louvain Community Detection
- Algorithm"""
- from collections import defaultdict, deque
- import networkx as nx
- from networkx.algorithms.community import modularity
- from networkx.utils import py_random_state
- __all__ = ["louvain_communities", "louvain_partitions"]
- @py_random_state("seed")
- def louvain_communities(
- G, weight="weight", resolution=1, threshold=0.0000001, seed=None
- ):
- r"""Find the best partition of a graph using the Louvain Community Detection
- Algorithm.
- Louvain Community Detection Algorithm is a simple method to extract the community
- structure of a network. This is a heuristic method based on modularity optimization. [1]_
- The algorithm works in 2 steps. On the first step it assigns every node to be
- in its own community and then for each node it tries to find the maximum positive
- modularity gain by moving each node to all of its neighbor communities. If no positive
- gain is achieved the node remains in its original community.
- The modularity gain obtained by moving an isolated node $i$ into a community $C$ can
- easily be calculated by the following formula (combining [1]_ [2]_ and some algebra):
- .. math::
- \Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2}
- where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links
- from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$,
- $\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$
- is the resolution parameter.
- For the directed case the modularity gain can be computed using this formula according to [3]_
- .. math::
- \Delta Q = \frac{k_{i,in}}{m}
- - \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2}
- where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and
- $\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident
- to nodes in $C$.
- The first phase continues until no individual move can improve the modularity.
- The second phase consists in building a new network whose nodes are now the communities
- found in the first phase. To do so, the weights of the links between the new nodes are given by
- the sum of the weight of the links between nodes in the corresponding two communities. Once this
- phase is complete it is possible to reapply the first phase creating bigger communities with
- increased modularity.
- The above two phases are executed until no modularity gain is achieved (or is less than
- the `threshold`).
- Parameters
- ----------
- G : NetworkX graph
- weight : string or None, optional (default="weight")
- The name of an edge attribute that holds the numerical value
- used as a weight. If None then each edge has weight 1.
- resolution : float, optional (default=1)
- If resolution is less than 1, the algorithm favors larger communities.
- Greater than 1 favors smaller communities
- threshold : float, optional (default=0.0000001)
- Modularity gain threshold for each level. If the gain of modularity
- between 2 levels of the algorithm is less than the given threshold
- then the algorithm stops and returns the resulting communities.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Returns
- -------
- list
- A list of sets (partition of `G`). Each set represents one community and contains
- all the nodes that constitute it.
- Examples
- --------
- >>> import networkx as nx
- >>> G = nx.petersen_graph()
- >>> nx.community.louvain_communities(G, seed=123)
- [{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}]
- Notes
- -----
- The order in which the nodes are considered can affect the final output. In the algorithm
- the ordering happens using a random shuffle.
- References
- ----------
- .. [1] Blondel, V.D. et al. Fast unfolding of communities in
- large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008
- .. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing
- well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z
- .. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks.
- [Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784
- See Also
- --------
- louvain_partitions
- """
- d = louvain_partitions(G, weight, resolution, threshold, seed)
- q = deque(d, maxlen=1)
- return q.pop()
- @py_random_state("seed")
- def louvain_partitions(
- G, weight="weight", resolution=1, threshold=0.0000001, seed=None
- ):
- """Yields partitions for each level of the Louvain Community Detection Algorithm
- Louvain Community Detection Algorithm is a simple method to extract the community
- structure of a network. This is a heuristic method based on modularity optimization. [1]_
- The partitions at each level (step of the algorithm) form a dendogram of communities.
- A dendrogram is a diagram representing a tree and each level represents
- a partition of the G graph. The top level contains the smallest communities
- and as you traverse to the bottom of the tree the communities get bigger
- and the overall modularity increases making the partition better.
- Each level is generated by executing the two phases of the Louvain Community
- Detection Algorithm.
- Parameters
- ----------
- G : NetworkX graph
- weight : string or None, optional (default="weight")
- The name of an edge attribute that holds the numerical value
- used as a weight. If None then each edge has weight 1.
- resolution : float, optional (default=1)
- If resolution is less than 1, the algorithm favors larger communities.
- Greater than 1 favors smaller communities
- threshold : float, optional (default=0.0000001)
- Modularity gain threshold for each level. If the gain of modularity
- between 2 levels of the algorithm is less than the given threshold
- then the algorithm stops and returns the resulting communities.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Yields
- ------
- list
- A list of sets (partition of `G`). Each set represents one community and contains
- all the nodes that constitute it.
- References
- ----------
- .. [1] Blondel, V.D. et al. Fast unfolding of communities in
- large networks. J. Stat. Mech 10008, 1-12(2008)
- See Also
- --------
- louvain_communities
- """
- partition = [{u} for u in G.nodes()]
- mod = modularity(G, partition, resolution=resolution, weight=weight)
- is_directed = G.is_directed()
- if G.is_multigraph():
- graph = _convert_multigraph(G, weight, is_directed)
- else:
- graph = G.__class__()
- graph.add_nodes_from(G)
- graph.add_weighted_edges_from(G.edges(data=weight, default=1))
- m = graph.size(weight="weight")
- partition, inner_partition, improvement = _one_level(
- graph, m, partition, resolution, is_directed, seed
- )
- improvement = True
- while improvement:
- # gh-5901 protect the sets in the yielded list from further manipulation here
- yield [s.copy() for s in partition]
- new_mod = modularity(
- graph, inner_partition, resolution=resolution, weight="weight"
- )
- if new_mod - mod <= threshold:
- return
- mod = new_mod
- graph = _gen_graph(graph, inner_partition)
- partition, inner_partition, improvement = _one_level(
- graph, m, partition, resolution, is_directed, seed
- )
- def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None):
- """Calculate one level of the Louvain partitions tree
- Parameters
- ----------
- G : NetworkX Graph/DiGraph
- The graph from which to detect communities
- m : number
- The size of the graph `G`.
- partition : list of sets of nodes
- A valid partition of the graph `G`
- resolution : positive number
- The resolution parameter for computing the modularity of a partition
- is_directed : bool
- True if `G` is a directed graph.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- """
- node2com = {u: i for i, u in enumerate(G.nodes())}
- inner_partition = [{u} for u in G.nodes()]
- if is_directed:
- in_degrees = dict(G.in_degree(weight="weight"))
- out_degrees = dict(G.out_degree(weight="weight"))
- Stot_in = list(in_degrees.values())
- Stot_out = list(out_degrees.values())
- # Calculate weights for both in and out neighbours
- nbrs = {}
- for u in G:
- nbrs[u] = defaultdict(float)
- for _, n, wt in G.out_edges(u, data="weight"):
- nbrs[u][n] += wt
- for n, _, wt in G.in_edges(u, data="weight"):
- nbrs[u][n] += wt
- else:
- degrees = dict(G.degree(weight="weight"))
- Stot = list(degrees.values())
- nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G}
- rand_nodes = list(G.nodes)
- seed.shuffle(rand_nodes)
- nb_moves = 1
- improvement = False
- while nb_moves > 0:
- nb_moves = 0
- for u in rand_nodes:
- best_mod = 0
- best_com = node2com[u]
- weights2com = _neighbor_weights(nbrs[u], node2com)
- if is_directed:
- in_degree = in_degrees[u]
- out_degree = out_degrees[u]
- Stot_in[best_com] -= in_degree
- Stot_out[best_com] -= out_degree
- remove_cost = (
- -weights2com[best_com] / m
- + resolution
- * (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com])
- / m**2
- )
- else:
- degree = degrees[u]
- Stot[best_com] -= degree
- remove_cost = -weights2com[best_com] / m + resolution * (
- Stot[best_com] * degree
- ) / (2 * m**2)
- for nbr_com, wt in weights2com.items():
- if is_directed:
- gain = (
- remove_cost
- + wt / m
- - resolution
- * (
- out_degree * Stot_in[nbr_com]
- + in_degree * Stot_out[nbr_com]
- )
- / m**2
- )
- else:
- gain = (
- remove_cost
- + wt / m
- - resolution * (Stot[nbr_com] * degree) / (2 * m**2)
- )
- if gain > best_mod:
- best_mod = gain
- best_com = nbr_com
- if is_directed:
- Stot_in[best_com] += in_degree
- Stot_out[best_com] += out_degree
- else:
- Stot[best_com] += degree
- if best_com != node2com[u]:
- com = G.nodes[u].get("nodes", {u})
- partition[node2com[u]].difference_update(com)
- inner_partition[node2com[u]].remove(u)
- partition[best_com].update(com)
- inner_partition[best_com].add(u)
- improvement = True
- nb_moves += 1
- node2com[u] = best_com
- partition = list(filter(len, partition))
- inner_partition = list(filter(len, inner_partition))
- return partition, inner_partition, improvement
- def _neighbor_weights(nbrs, node2com):
- """Calculate weights between node and its neighbor communities.
- Parameters
- ----------
- nbrs : dictionary
- Dictionary with nodes' neighbours as keys and their edge weight as value.
- node2com : dictionary
- Dictionary with all graph's nodes as keys and their community index as value.
- """
- weights = defaultdict(float)
- for nbr, wt in nbrs.items():
- weights[node2com[nbr]] += wt
- return weights
- def _gen_graph(G, partition):
- """Generate a new graph based on the partitions of a given graph"""
- H = G.__class__()
- node2com = {}
- for i, part in enumerate(partition):
- nodes = set()
- for node in part:
- node2com[node] = i
- nodes.update(G.nodes[node].get("nodes", {node}))
- H.add_node(i, nodes=nodes)
- for node1, node2, wt in G.edges(data=True):
- wt = wt["weight"]
- com1 = node2com[node1]
- com2 = node2com[node2]
- temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"]
- H.add_edge(com1, com2, weight=wt + temp)
- return H
- def _convert_multigraph(G, weight, is_directed):
- """Convert a Multigraph to normal Graph"""
- if is_directed:
- H = nx.DiGraph()
- else:
- H = nx.Graph()
- H.add_nodes_from(G)
- for u, v, wt in G.edges(data=weight, default=1):
- if H.has_edge(u, v):
- H[u][v]["weight"] += wt
- else:
- H.add_edge(u, v, weight=wt)
- return H
|