louvain.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. """Function for detecting communities based on Louvain Community Detection
  2. Algorithm"""
  3. from collections import defaultdict, deque
  4. import networkx as nx
  5. from networkx.algorithms.community import modularity
  6. from networkx.utils import py_random_state
  7. __all__ = ["louvain_communities", "louvain_partitions"]
  8. @py_random_state("seed")
  9. def louvain_communities(
  10. G, weight="weight", resolution=1, threshold=0.0000001, seed=None
  11. ):
  12. r"""Find the best partition of a graph using the Louvain Community Detection
  13. Algorithm.
  14. Louvain Community Detection Algorithm is a simple method to extract the community
  15. structure of a network. This is a heuristic method based on modularity optimization. [1]_
  16. The algorithm works in 2 steps. On the first step it assigns every node to be
  17. in its own community and then for each node it tries to find the maximum positive
  18. modularity gain by moving each node to all of its neighbor communities. If no positive
  19. gain is achieved the node remains in its original community.
  20. The modularity gain obtained by moving an isolated node $i$ into a community $C$ can
  21. easily be calculated by the following formula (combining [1]_ [2]_ and some algebra):
  22. .. math::
  23. \Delta Q = \frac{k_{i,in}}{2m} - \gamma\frac{ \Sigma_{tot} \cdot k_i}{2m^2}
  24. where $m$ is the size of the graph, $k_{i,in}$ is the sum of the weights of the links
  25. from $i$ to nodes in $C$, $k_i$ is the sum of the weights of the links incident to node $i$,
  26. $\Sigma_{tot}$ is the sum of the weights of the links incident to nodes in $C$ and $\gamma$
  27. is the resolution parameter.
  28. For the directed case the modularity gain can be computed using this formula according to [3]_
  29. .. math::
  30. \Delta Q = \frac{k_{i,in}}{m}
  31. - \gamma\frac{k_i^{out} \cdot\Sigma_{tot}^{in} + k_i^{in} \cdot \Sigma_{tot}^{out}}{m^2}
  32. where $k_i^{out}$, $k_i^{in}$ are the outer and inner weighted degrees of node $i$ and
  33. $\Sigma_{tot}^{in}$, $\Sigma_{tot}^{out}$ are the sum of in-going and out-going links incident
  34. to nodes in $C$.
  35. The first phase continues until no individual move can improve the modularity.
  36. The second phase consists in building a new network whose nodes are now the communities
  37. found in the first phase. To do so, the weights of the links between the new nodes are given by
  38. the sum of the weight of the links between nodes in the corresponding two communities. Once this
  39. phase is complete it is possible to reapply the first phase creating bigger communities with
  40. increased modularity.
  41. The above two phases are executed until no modularity gain is achieved (or is less than
  42. the `threshold`).
  43. Parameters
  44. ----------
  45. G : NetworkX graph
  46. weight : string or None, optional (default="weight")
  47. The name of an edge attribute that holds the numerical value
  48. used as a weight. If None then each edge has weight 1.
  49. resolution : float, optional (default=1)
  50. If resolution is less than 1, the algorithm favors larger communities.
  51. Greater than 1 favors smaller communities
  52. threshold : float, optional (default=0.0000001)
  53. Modularity gain threshold for each level. If the gain of modularity
  54. between 2 levels of the algorithm is less than the given threshold
  55. then the algorithm stops and returns the resulting communities.
  56. seed : integer, random_state, or None (default)
  57. Indicator of random number generation state.
  58. See :ref:`Randomness<randomness>`.
  59. Returns
  60. -------
  61. list
  62. A list of sets (partition of `G`). Each set represents one community and contains
  63. all the nodes that constitute it.
  64. Examples
  65. --------
  66. >>> import networkx as nx
  67. >>> G = nx.petersen_graph()
  68. >>> nx.community.louvain_communities(G, seed=123)
  69. [{0, 4, 5, 7, 9}, {1, 2, 3, 6, 8}]
  70. Notes
  71. -----
  72. The order in which the nodes are considered can affect the final output. In the algorithm
  73. the ordering happens using a random shuffle.
  74. References
  75. ----------
  76. .. [1] Blondel, V.D. et al. Fast unfolding of communities in
  77. large networks. J. Stat. Mech 10008, 1-12(2008). https://doi.org/10.1088/1742-5468/2008/10/P10008
  78. .. [2] Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing
  79. well-connected communities. Sci Rep 9, 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z
  80. .. [3] Nicolas Dugué, Anthony Perez. Directed Louvain : maximizing modularity in directed networks.
  81. [Research Report] Université d’Orléans. 2015. hal-01231784. https://hal.archives-ouvertes.fr/hal-01231784
  82. See Also
  83. --------
  84. louvain_partitions
  85. """
  86. d = louvain_partitions(G, weight, resolution, threshold, seed)
  87. q = deque(d, maxlen=1)
  88. return q.pop()
  89. @py_random_state("seed")
  90. def louvain_partitions(
  91. G, weight="weight", resolution=1, threshold=0.0000001, seed=None
  92. ):
  93. """Yields partitions for each level of the Louvain Community Detection Algorithm
  94. Louvain Community Detection Algorithm is a simple method to extract the community
  95. structure of a network. This is a heuristic method based on modularity optimization. [1]_
  96. The partitions at each level (step of the algorithm) form a dendogram of communities.
  97. A dendrogram is a diagram representing a tree and each level represents
  98. a partition of the G graph. The top level contains the smallest communities
  99. and as you traverse to the bottom of the tree the communities get bigger
  100. and the overall modularity increases making the partition better.
  101. Each level is generated by executing the two phases of the Louvain Community
  102. Detection Algorithm.
  103. Parameters
  104. ----------
  105. G : NetworkX graph
  106. weight : string or None, optional (default="weight")
  107. The name of an edge attribute that holds the numerical value
  108. used as a weight. If None then each edge has weight 1.
  109. resolution : float, optional (default=1)
  110. If resolution is less than 1, the algorithm favors larger communities.
  111. Greater than 1 favors smaller communities
  112. threshold : float, optional (default=0.0000001)
  113. Modularity gain threshold for each level. If the gain of modularity
  114. between 2 levels of the algorithm is less than the given threshold
  115. then the algorithm stops and returns the resulting communities.
  116. seed : integer, random_state, or None (default)
  117. Indicator of random number generation state.
  118. See :ref:`Randomness<randomness>`.
  119. Yields
  120. ------
  121. list
  122. A list of sets (partition of `G`). Each set represents one community and contains
  123. all the nodes that constitute it.
  124. References
  125. ----------
  126. .. [1] Blondel, V.D. et al. Fast unfolding of communities in
  127. large networks. J. Stat. Mech 10008, 1-12(2008)
  128. See Also
  129. --------
  130. louvain_communities
  131. """
  132. partition = [{u} for u in G.nodes()]
  133. mod = modularity(G, partition, resolution=resolution, weight=weight)
  134. is_directed = G.is_directed()
  135. if G.is_multigraph():
  136. graph = _convert_multigraph(G, weight, is_directed)
  137. else:
  138. graph = G.__class__()
  139. graph.add_nodes_from(G)
  140. graph.add_weighted_edges_from(G.edges(data=weight, default=1))
  141. m = graph.size(weight="weight")
  142. partition, inner_partition, improvement = _one_level(
  143. graph, m, partition, resolution, is_directed, seed
  144. )
  145. improvement = True
  146. while improvement:
  147. # gh-5901 protect the sets in the yielded list from further manipulation here
  148. yield [s.copy() for s in partition]
  149. new_mod = modularity(
  150. graph, inner_partition, resolution=resolution, weight="weight"
  151. )
  152. if new_mod - mod <= threshold:
  153. return
  154. mod = new_mod
  155. graph = _gen_graph(graph, inner_partition)
  156. partition, inner_partition, improvement = _one_level(
  157. graph, m, partition, resolution, is_directed, seed
  158. )
  159. def _one_level(G, m, partition, resolution=1, is_directed=False, seed=None):
  160. """Calculate one level of the Louvain partitions tree
  161. Parameters
  162. ----------
  163. G : NetworkX Graph/DiGraph
  164. The graph from which to detect communities
  165. m : number
  166. The size of the graph `G`.
  167. partition : list of sets of nodes
  168. A valid partition of the graph `G`
  169. resolution : positive number
  170. The resolution parameter for computing the modularity of a partition
  171. is_directed : bool
  172. True if `G` is a directed graph.
  173. seed : integer, random_state, or None (default)
  174. Indicator of random number generation state.
  175. See :ref:`Randomness<randomness>`.
  176. """
  177. node2com = {u: i for i, u in enumerate(G.nodes())}
  178. inner_partition = [{u} for u in G.nodes()]
  179. if is_directed:
  180. in_degrees = dict(G.in_degree(weight="weight"))
  181. out_degrees = dict(G.out_degree(weight="weight"))
  182. Stot_in = list(in_degrees.values())
  183. Stot_out = list(out_degrees.values())
  184. # Calculate weights for both in and out neighbours
  185. nbrs = {}
  186. for u in G:
  187. nbrs[u] = defaultdict(float)
  188. for _, n, wt in G.out_edges(u, data="weight"):
  189. nbrs[u][n] += wt
  190. for n, _, wt in G.in_edges(u, data="weight"):
  191. nbrs[u][n] += wt
  192. else:
  193. degrees = dict(G.degree(weight="weight"))
  194. Stot = list(degrees.values())
  195. nbrs = {u: {v: data["weight"] for v, data in G[u].items() if v != u} for u in G}
  196. rand_nodes = list(G.nodes)
  197. seed.shuffle(rand_nodes)
  198. nb_moves = 1
  199. improvement = False
  200. while nb_moves > 0:
  201. nb_moves = 0
  202. for u in rand_nodes:
  203. best_mod = 0
  204. best_com = node2com[u]
  205. weights2com = _neighbor_weights(nbrs[u], node2com)
  206. if is_directed:
  207. in_degree = in_degrees[u]
  208. out_degree = out_degrees[u]
  209. Stot_in[best_com] -= in_degree
  210. Stot_out[best_com] -= out_degree
  211. remove_cost = (
  212. -weights2com[best_com] / m
  213. + resolution
  214. * (out_degree * Stot_in[best_com] + in_degree * Stot_out[best_com])
  215. / m**2
  216. )
  217. else:
  218. degree = degrees[u]
  219. Stot[best_com] -= degree
  220. remove_cost = -weights2com[best_com] / m + resolution * (
  221. Stot[best_com] * degree
  222. ) / (2 * m**2)
  223. for nbr_com, wt in weights2com.items():
  224. if is_directed:
  225. gain = (
  226. remove_cost
  227. + wt / m
  228. - resolution
  229. * (
  230. out_degree * Stot_in[nbr_com]
  231. + in_degree * Stot_out[nbr_com]
  232. )
  233. / m**2
  234. )
  235. else:
  236. gain = (
  237. remove_cost
  238. + wt / m
  239. - resolution * (Stot[nbr_com] * degree) / (2 * m**2)
  240. )
  241. if gain > best_mod:
  242. best_mod = gain
  243. best_com = nbr_com
  244. if is_directed:
  245. Stot_in[best_com] += in_degree
  246. Stot_out[best_com] += out_degree
  247. else:
  248. Stot[best_com] += degree
  249. if best_com != node2com[u]:
  250. com = G.nodes[u].get("nodes", {u})
  251. partition[node2com[u]].difference_update(com)
  252. inner_partition[node2com[u]].remove(u)
  253. partition[best_com].update(com)
  254. inner_partition[best_com].add(u)
  255. improvement = True
  256. nb_moves += 1
  257. node2com[u] = best_com
  258. partition = list(filter(len, partition))
  259. inner_partition = list(filter(len, inner_partition))
  260. return partition, inner_partition, improvement
  261. def _neighbor_weights(nbrs, node2com):
  262. """Calculate weights between node and its neighbor communities.
  263. Parameters
  264. ----------
  265. nbrs : dictionary
  266. Dictionary with nodes' neighbours as keys and their edge weight as value.
  267. node2com : dictionary
  268. Dictionary with all graph's nodes as keys and their community index as value.
  269. """
  270. weights = defaultdict(float)
  271. for nbr, wt in nbrs.items():
  272. weights[node2com[nbr]] += wt
  273. return weights
  274. def _gen_graph(G, partition):
  275. """Generate a new graph based on the partitions of a given graph"""
  276. H = G.__class__()
  277. node2com = {}
  278. for i, part in enumerate(partition):
  279. nodes = set()
  280. for node in part:
  281. node2com[node] = i
  282. nodes.update(G.nodes[node].get("nodes", {node}))
  283. H.add_node(i, nodes=nodes)
  284. for node1, node2, wt in G.edges(data=True):
  285. wt = wt["weight"]
  286. com1 = node2com[node1]
  287. com2 = node2com[node2]
  288. temp = H.get_edge_data(com1, com2, {"weight": 0})["weight"]
  289. H.add_edge(com1, com2, weight=wt + temp)
  290. return H
  291. def _convert_multigraph(G, weight, is_directed):
  292. """Convert a Multigraph to normal Graph"""
  293. if is_directed:
  294. H = nx.DiGraph()
  295. else:
  296. H = nx.Graph()
  297. H.add_nodes_from(G)
  298. for u, v, wt in G.edges(data=weight, default=1):
  299. if H.has_edge(u, v):
  300. H[u][v]["weight"] += wt
  301. else:
  302. H.add_edge(u, v, weight=wt)
  303. return H