asyn_fluid.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. """Asynchronous Fluid Communities algorithm for community detection."""
  2. from collections import Counter
  3. from networkx.algorithms.components import is_connected
  4. from networkx.exception import NetworkXError
  5. from networkx.utils import groups, not_implemented_for, py_random_state
  6. __all__ = ["asyn_fluidc"]
  7. @py_random_state(3)
  8. @not_implemented_for("directed", "multigraph")
  9. def asyn_fluidc(G, k, max_iter=100, seed=None):
  10. """Returns communities in `G` as detected by Fluid Communities algorithm.
  11. The asynchronous fluid communities algorithm is described in
  12. [1]_. The algorithm is based on the simple idea of fluids interacting
  13. in an environment, expanding and pushing each other. Its initialization is
  14. random, so found communities may vary on different executions.
  15. The algorithm proceeds as follows. First each of the initial k communities
  16. is initialized in a random vertex in the graph. Then the algorithm iterates
  17. over all vertices in a random order, updating the community of each vertex
  18. based on its own community and the communities of its neighbours. This
  19. process is performed several times until convergence.
  20. At all times, each community has a total density of 1, which is equally
  21. distributed among the vertices it contains. If a vertex changes of
  22. community, vertex densities of affected communities are adjusted
  23. immediately. When a complete iteration over all vertices is done, such that
  24. no vertex changes the community it belongs to, the algorithm has converged
  25. and returns.
  26. This is the original version of the algorithm described in [1]_.
  27. Unfortunately, it does not support weighted graphs yet.
  28. Parameters
  29. ----------
  30. G : Graph
  31. k : integer
  32. The number of communities to be found.
  33. max_iter : integer
  34. The number of maximum iterations allowed. By default 100.
  35. seed : integer, random_state, or None (default)
  36. Indicator of random number generation state.
  37. See :ref:`Randomness<randomness>`.
  38. Returns
  39. -------
  40. communities : iterable
  41. Iterable of communities given as sets of nodes.
  42. Notes
  43. -----
  44. k variable is not an optional argument.
  45. References
  46. ----------
  47. .. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A
  48. Competitive and Highly Scalable Community Detection Algorithm".
  49. [https://arxiv.org/pdf/1703.09307.pdf].
  50. """
  51. # Initial checks
  52. if not isinstance(k, int):
  53. raise NetworkXError("k must be an integer.")
  54. if not k > 0:
  55. raise NetworkXError("k must be greater than 0.")
  56. if not is_connected(G):
  57. raise NetworkXError("Fluid Communities require connected Graphs.")
  58. if len(G) < k:
  59. raise NetworkXError("k cannot be bigger than the number of nodes.")
  60. # Initialization
  61. max_density = 1.0
  62. vertices = list(G)
  63. seed.shuffle(vertices)
  64. communities = {n: i for i, n in enumerate(vertices[:k])}
  65. density = {}
  66. com_to_numvertices = {}
  67. for vertex in communities:
  68. com_to_numvertices[communities[vertex]] = 1
  69. density[communities[vertex]] = max_density
  70. # Set up control variables and start iterating
  71. iter_count = 0
  72. cont = True
  73. while cont:
  74. cont = False
  75. iter_count += 1
  76. # Loop over all vertices in graph in a random order
  77. vertices = list(G)
  78. seed.shuffle(vertices)
  79. for vertex in vertices:
  80. # Updating rule
  81. com_counter = Counter()
  82. # Take into account self vertex community
  83. try:
  84. com_counter.update({communities[vertex]: density[communities[vertex]]})
  85. except KeyError:
  86. pass
  87. # Gather neighbour vertex communities
  88. for v in G[vertex]:
  89. try:
  90. com_counter.update({communities[v]: density[communities[v]]})
  91. except KeyError:
  92. continue
  93. # Check which is the community with highest density
  94. new_com = -1
  95. if len(com_counter.keys()) > 0:
  96. max_freq = max(com_counter.values())
  97. best_communities = [
  98. com
  99. for com, freq in com_counter.items()
  100. if (max_freq - freq) < 0.0001
  101. ]
  102. # If actual vertex com in best communities, it is preserved
  103. try:
  104. if communities[vertex] in best_communities:
  105. new_com = communities[vertex]
  106. except KeyError:
  107. pass
  108. # If vertex community changes...
  109. if new_com == -1:
  110. # Set flag of non-convergence
  111. cont = True
  112. # Randomly chose a new community from candidates
  113. new_com = seed.choice(best_communities)
  114. # Update previous community status
  115. try:
  116. com_to_numvertices[communities[vertex]] -= 1
  117. density[communities[vertex]] = (
  118. max_density / com_to_numvertices[communities[vertex]]
  119. )
  120. except KeyError:
  121. pass
  122. # Update new community status
  123. communities[vertex] = new_com
  124. com_to_numvertices[communities[vertex]] += 1
  125. density[communities[vertex]] = (
  126. max_density / com_to_numvertices[communities[vertex]]
  127. )
  128. # If maximum iterations reached --> output actual results
  129. if iter_count > max_iter:
  130. break
  131. # Return results by grouping communities as list of vertices
  132. return iter(groups(communities).values())