community.py 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061
  1. """Generators for classes of graphs used in studying social networks."""
  2. import itertools
  3. import math
  4. import networkx as nx
  5. from networkx.utils import py_random_state
  6. __all__ = [
  7. "caveman_graph",
  8. "connected_caveman_graph",
  9. "relaxed_caveman_graph",
  10. "random_partition_graph",
  11. "planted_partition_graph",
  12. "gaussian_random_partition_graph",
  13. "ring_of_cliques",
  14. "windmill_graph",
  15. "stochastic_block_model",
  16. "LFR_benchmark_graph",
  17. ]
  18. def caveman_graph(l, k):
  19. """Returns a caveman graph of `l` cliques of size `k`.
  20. Parameters
  21. ----------
  22. l : int
  23. Number of cliques
  24. k : int
  25. Size of cliques
  26. Returns
  27. -------
  28. G : NetworkX Graph
  29. caveman graph
  30. Notes
  31. -----
  32. This returns an undirected graph, it can be converted to a directed
  33. graph using :func:`nx.to_directed`, or a multigraph using
  34. ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
  35. described in [1]_ and it is unclear which of the directed
  36. generalizations is most useful.
  37. Examples
  38. --------
  39. >>> G = nx.caveman_graph(3, 3)
  40. See also
  41. --------
  42. connected_caveman_graph
  43. References
  44. ----------
  45. .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
  46. Amer. J. Soc. 105, 493-527, 1999.
  47. """
  48. # l disjoint cliques of size k
  49. G = nx.empty_graph(l * k)
  50. if k > 1:
  51. for start in range(0, l * k, k):
  52. edges = itertools.combinations(range(start, start + k), 2)
  53. G.add_edges_from(edges)
  54. return G
  55. def connected_caveman_graph(l, k):
  56. """Returns a connected caveman graph of `l` cliques of size `k`.
  57. The connected caveman graph is formed by creating `n` cliques of size
  58. `k`, then a single edge in each clique is rewired to a node in an
  59. adjacent clique.
  60. Parameters
  61. ----------
  62. l : int
  63. number of cliques
  64. k : int
  65. size of cliques (k at least 2 or NetworkXError is raised)
  66. Returns
  67. -------
  68. G : NetworkX Graph
  69. connected caveman graph
  70. Raises
  71. ------
  72. NetworkXError
  73. If the size of cliques `k` is smaller than 2.
  74. Notes
  75. -----
  76. This returns an undirected graph, it can be converted to a directed
  77. graph using :func:`nx.to_directed`, or a multigraph using
  78. ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
  79. described in [1]_ and it is unclear which of the directed
  80. generalizations is most useful.
  81. Examples
  82. --------
  83. >>> G = nx.connected_caveman_graph(3, 3)
  84. References
  85. ----------
  86. .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
  87. Amer. J. Soc. 105, 493-527, 1999.
  88. """
  89. if k < 2:
  90. raise nx.NetworkXError(
  91. "The size of cliques in a connected caveman graph " "must be at least 2."
  92. )
  93. G = nx.caveman_graph(l, k)
  94. for start in range(0, l * k, k):
  95. G.remove_edge(start, start + 1)
  96. G.add_edge(start, (start - 1) % (l * k))
  97. return G
  98. @py_random_state(3)
  99. def relaxed_caveman_graph(l, k, p, seed=None):
  100. """Returns a relaxed caveman graph.
  101. A relaxed caveman graph starts with `l` cliques of size `k`. Edges are
  102. then randomly rewired with probability `p` to link different cliques.
  103. Parameters
  104. ----------
  105. l : int
  106. Number of groups
  107. k : int
  108. Size of cliques
  109. p : float
  110. Probability of rewiring each edge.
  111. seed : integer, random_state, or None (default)
  112. Indicator of random number generation state.
  113. See :ref:`Randomness<randomness>`.
  114. Returns
  115. -------
  116. G : NetworkX Graph
  117. Relaxed Caveman Graph
  118. Raises
  119. ------
  120. NetworkXError
  121. If p is not in [0,1]
  122. Examples
  123. --------
  124. >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42)
  125. References
  126. ----------
  127. .. [1] Santo Fortunato, Community Detection in Graphs,
  128. Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174.
  129. https://arxiv.org/abs/0906.0612
  130. """
  131. G = nx.caveman_graph(l, k)
  132. nodes = list(G)
  133. for u, v in G.edges():
  134. if seed.random() < p: # rewire the edge
  135. x = seed.choice(nodes)
  136. if G.has_edge(u, x):
  137. continue
  138. G.remove_edge(u, v)
  139. G.add_edge(u, x)
  140. return G
  141. @py_random_state(3)
  142. def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False):
  143. """Returns the random partition graph with a partition of sizes.
  144. A partition graph is a graph of communities with sizes defined by
  145. s in sizes. Nodes in the same group are connected with probability
  146. p_in and nodes of different groups are connected with probability
  147. p_out.
  148. Parameters
  149. ----------
  150. sizes : list of ints
  151. Sizes of groups
  152. p_in : float
  153. probability of edges with in groups
  154. p_out : float
  155. probability of edges between groups
  156. directed : boolean optional, default=False
  157. Whether to create a directed graph
  158. seed : integer, random_state, or None (default)
  159. Indicator of random number generation state.
  160. See :ref:`Randomness<randomness>`.
  161. Returns
  162. -------
  163. G : NetworkX Graph or DiGraph
  164. random partition graph of size sum(gs)
  165. Raises
  166. ------
  167. NetworkXError
  168. If p_in or p_out is not in [0,1]
  169. Examples
  170. --------
  171. >>> G = nx.random_partition_graph([10, 10, 10], 0.25, 0.01)
  172. >>> len(G)
  173. 30
  174. >>> partition = G.graph["partition"]
  175. >>> len(partition)
  176. 3
  177. Notes
  178. -----
  179. This is a generalization of the planted-l-partition described in
  180. [1]_. It allows for the creation of groups of any size.
  181. The partition is store as a graph attribute 'partition'.
  182. References
  183. ----------
  184. .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports
  185. Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
  186. """
  187. # Use geometric method for O(n+m) complexity algorithm
  188. # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation'))
  189. if not 0.0 <= p_in <= 1.0:
  190. raise nx.NetworkXError("p_in must be in [0,1]")
  191. if not 0.0 <= p_out <= 1.0:
  192. raise nx.NetworkXError("p_out must be in [0,1]")
  193. # create connection matrix
  194. num_blocks = len(sizes)
  195. p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)]
  196. for r in range(num_blocks):
  197. p[r][r] = p_in
  198. return stochastic_block_model(
  199. sizes,
  200. p,
  201. nodelist=None,
  202. seed=seed,
  203. directed=directed,
  204. selfloops=False,
  205. sparse=True,
  206. )
  207. @py_random_state(4)
  208. def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False):
  209. """Returns the planted l-partition graph.
  210. This model partitions a graph with n=l*k vertices in
  211. l groups with k vertices each. Vertices of the same
  212. group are linked with a probability p_in, and vertices
  213. of different groups are linked with probability p_out.
  214. Parameters
  215. ----------
  216. l : int
  217. Number of groups
  218. k : int
  219. Number of vertices in each group
  220. p_in : float
  221. probability of connecting vertices within a group
  222. p_out : float
  223. probability of connected vertices between groups
  224. seed : integer, random_state, or None (default)
  225. Indicator of random number generation state.
  226. See :ref:`Randomness<randomness>`.
  227. directed : bool,optional (default=False)
  228. If True return a directed graph
  229. Returns
  230. -------
  231. G : NetworkX Graph or DiGraph
  232. planted l-partition graph
  233. Raises
  234. ------
  235. NetworkXError
  236. If p_in,p_out are not in [0,1] or
  237. Examples
  238. --------
  239. >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42)
  240. See Also
  241. --------
  242. random_partition_model
  243. References
  244. ----------
  245. .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning
  246. on the planted partition model,
  247. Random Struct. Algor. 18 (2001) 116-140.
  248. .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports
  249. Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
  250. """
  251. return random_partition_graph([k] * l, p_in, p_out, seed=seed, directed=directed)
  252. @py_random_state(6)
  253. def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False, seed=None):
  254. """Generate a Gaussian random partition graph.
  255. A Gaussian random partition graph is created by creating k partitions
  256. each with a size drawn from a normal distribution with mean s and variance
  257. s/v. Nodes are connected within clusters with probability p_in and
  258. between clusters with probability p_out[1]
  259. Parameters
  260. ----------
  261. n : int
  262. Number of nodes in the graph
  263. s : float
  264. Mean cluster size
  265. v : float
  266. Shape parameter. The variance of cluster size distribution is s/v.
  267. p_in : float
  268. Probability of intra cluster connection.
  269. p_out : float
  270. Probability of inter cluster connection.
  271. directed : boolean, optional default=False
  272. Whether to create a directed graph or not
  273. seed : integer, random_state, or None (default)
  274. Indicator of random number generation state.
  275. See :ref:`Randomness<randomness>`.
  276. Returns
  277. -------
  278. G : NetworkX Graph or DiGraph
  279. gaussian random partition graph
  280. Raises
  281. ------
  282. NetworkXError
  283. If s is > n
  284. If p_in or p_out is not in [0,1]
  285. Notes
  286. -----
  287. Note the number of partitions is dependent on s,v and n, and that the
  288. last partition may be considerably smaller, as it is sized to simply
  289. fill out the nodes [1]
  290. See Also
  291. --------
  292. random_partition_graph
  293. Examples
  294. --------
  295. >>> G = nx.gaussian_random_partition_graph(100, 10, 10, 0.25, 0.1)
  296. >>> len(G)
  297. 100
  298. References
  299. ----------
  300. .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner,
  301. Experiments on Graph Clustering Algorithms,
  302. In the proceedings of the 11th Europ. Symp. Algorithms, 2003.
  303. """
  304. if s > n:
  305. raise nx.NetworkXError("s must be <= n")
  306. assigned = 0
  307. sizes = []
  308. while True:
  309. size = int(seed.gauss(s, s / v + 0.5))
  310. if size < 1: # how to handle 0 or negative sizes?
  311. continue
  312. if assigned + size >= n:
  313. sizes.append(n - assigned)
  314. break
  315. assigned += size
  316. sizes.append(size)
  317. return random_partition_graph(sizes, p_in, p_out, seed=seed, directed=directed)
  318. def ring_of_cliques(num_cliques, clique_size):
  319. """Defines a "ring of cliques" graph.
  320. A ring of cliques graph is consisting of cliques, connected through single
  321. links. Each clique is a complete graph.
  322. Parameters
  323. ----------
  324. num_cliques : int
  325. Number of cliques
  326. clique_size : int
  327. Size of cliques
  328. Returns
  329. -------
  330. G : NetworkX Graph
  331. ring of cliques graph
  332. Raises
  333. ------
  334. NetworkXError
  335. If the number of cliques is lower than 2 or
  336. if the size of cliques is smaller than 2.
  337. Examples
  338. --------
  339. >>> G = nx.ring_of_cliques(8, 4)
  340. See Also
  341. --------
  342. connected_caveman_graph
  343. Notes
  344. -----
  345. The `connected_caveman_graph` graph removes a link from each clique to
  346. connect it with the next clique. Instead, the `ring_of_cliques` graph
  347. simply adds the link without removing any link from the cliques.
  348. """
  349. if num_cliques < 2:
  350. raise nx.NetworkXError("A ring of cliques must have at least " "two cliques")
  351. if clique_size < 2:
  352. raise nx.NetworkXError("The cliques must have at least two nodes")
  353. G = nx.Graph()
  354. for i in range(num_cliques):
  355. edges = itertools.combinations(
  356. range(i * clique_size, i * clique_size + clique_size), 2
  357. )
  358. G.add_edges_from(edges)
  359. G.add_edge(
  360. i * clique_size + 1, (i + 1) * clique_size % (num_cliques * clique_size)
  361. )
  362. return G
  363. def windmill_graph(n, k):
  364. """Generate a windmill graph.
  365. A windmill graph is a graph of `n` cliques each of size `k` that are all
  366. joined at one node.
  367. It can be thought of as taking a disjoint union of `n` cliques of size `k`,
  368. selecting one point from each, and contracting all of the selected points.
  369. Alternatively, one could generate `n` cliques of size `k-1` and one node
  370. that is connected to all other nodes in the graph.
  371. Parameters
  372. ----------
  373. n : int
  374. Number of cliques
  375. k : int
  376. Size of cliques
  377. Returns
  378. -------
  379. G : NetworkX Graph
  380. windmill graph with n cliques of size k
  381. Raises
  382. ------
  383. NetworkXError
  384. If the number of cliques is less than two
  385. If the size of the cliques are less than two
  386. Examples
  387. --------
  388. >>> G = nx.windmill_graph(4, 5)
  389. Notes
  390. -----
  391. The node labeled `0` will be the node connected to all other nodes.
  392. Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters
  393. are in the opposite order as the parameters of this method.
  394. """
  395. if n < 2:
  396. msg = "A windmill graph must have at least two cliques"
  397. raise nx.NetworkXError(msg)
  398. if k < 2:
  399. raise nx.NetworkXError("The cliques must have at least two nodes")
  400. G = nx.disjoint_union_all(
  401. itertools.chain(
  402. [nx.complete_graph(k)], (nx.complete_graph(k - 1) for _ in range(n - 1))
  403. )
  404. )
  405. G.add_edges_from((0, i) for i in range(k, G.number_of_nodes()))
  406. return G
  407. @py_random_state(3)
  408. def stochastic_block_model(
  409. sizes, p, nodelist=None, seed=None, directed=False, selfloops=False, sparse=True
  410. ):
  411. """Returns a stochastic block model graph.
  412. This model partitions the nodes in blocks of arbitrary sizes, and places
  413. edges between pairs of nodes independently, with a probability that depends
  414. on the blocks.
  415. Parameters
  416. ----------
  417. sizes : list of ints
  418. Sizes of blocks
  419. p : list of list of floats
  420. Element (r,s) gives the density of edges going from the nodes
  421. of group r to nodes of group s.
  422. p must match the number of groups (len(sizes) == len(p)),
  423. and it must be symmetric if the graph is undirected.
  424. nodelist : list, optional
  425. The block tags are assigned according to the node identifiers
  426. in nodelist. If nodelist is None, then the ordering is the
  427. range [0,sum(sizes)-1].
  428. seed : integer, random_state, or None (default)
  429. Indicator of random number generation state.
  430. See :ref:`Randomness<randomness>`.
  431. directed : boolean optional, default=False
  432. Whether to create a directed graph or not.
  433. selfloops : boolean optional, default=False
  434. Whether to include self-loops or not.
  435. sparse: boolean optional, default=True
  436. Use the sparse heuristic to speed up the generator.
  437. Returns
  438. -------
  439. g : NetworkX Graph or DiGraph
  440. Stochastic block model graph of size sum(sizes)
  441. Raises
  442. ------
  443. NetworkXError
  444. If probabilities are not in [0,1].
  445. If the probability matrix is not square (directed case).
  446. If the probability matrix is not symmetric (undirected case).
  447. If the sizes list does not match nodelist or the probability matrix.
  448. If nodelist contains duplicate.
  449. Examples
  450. --------
  451. >>> sizes = [75, 75, 300]
  452. >>> probs = [[0.25, 0.05, 0.02], [0.05, 0.35, 0.07], [0.02, 0.07, 0.40]]
  453. >>> g = nx.stochastic_block_model(sizes, probs, seed=0)
  454. >>> len(g)
  455. 450
  456. >>> H = nx.quotient_graph(g, g.graph["partition"], relabel=True)
  457. >>> for v in H.nodes(data=True):
  458. ... print(round(v[1]["density"], 3))
  459. ...
  460. 0.245
  461. 0.348
  462. 0.405
  463. >>> for v in H.edges(data=True):
  464. ... print(round(1.0 * v[2]["weight"] / (sizes[v[0]] * sizes[v[1]]), 3))
  465. ...
  466. 0.051
  467. 0.022
  468. 0.07
  469. See Also
  470. --------
  471. random_partition_graph
  472. planted_partition_graph
  473. gaussian_random_partition_graph
  474. gnp_random_graph
  475. References
  476. ----------
  477. .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S.,
  478. "Stochastic blockmodels: First steps",
  479. Social networks, 5(2), 109-137, 1983.
  480. """
  481. # Check if dimensions match
  482. if len(sizes) != len(p):
  483. raise nx.NetworkXException("'sizes' and 'p' do not match.")
  484. # Check for probability symmetry (undirected) and shape (directed)
  485. for row in p:
  486. if len(p) != len(row):
  487. raise nx.NetworkXException("'p' must be a square matrix.")
  488. if not directed:
  489. p_transpose = [list(i) for i in zip(*p)]
  490. for i in zip(p, p_transpose):
  491. for j in zip(i[0], i[1]):
  492. if abs(j[0] - j[1]) > 1e-08:
  493. raise nx.NetworkXException("'p' must be symmetric.")
  494. # Check for probability range
  495. for row in p:
  496. for prob in row:
  497. if prob < 0 or prob > 1:
  498. raise nx.NetworkXException("Entries of 'p' not in [0,1].")
  499. # Check for nodelist consistency
  500. if nodelist is not None:
  501. if len(nodelist) != sum(sizes):
  502. raise nx.NetworkXException("'nodelist' and 'sizes' do not match.")
  503. if len(nodelist) != len(set(nodelist)):
  504. raise nx.NetworkXException("nodelist contains duplicate.")
  505. else:
  506. nodelist = range(0, sum(sizes))
  507. # Setup the graph conditionally to the directed switch.
  508. block_range = range(len(sizes))
  509. if directed:
  510. g = nx.DiGraph()
  511. block_iter = itertools.product(block_range, block_range)
  512. else:
  513. g = nx.Graph()
  514. block_iter = itertools.combinations_with_replacement(block_range, 2)
  515. # Split nodelist in a partition (list of sets).
  516. size_cumsum = [sum(sizes[0:x]) for x in range(0, len(sizes) + 1)]
  517. g.graph["partition"] = [
  518. set(nodelist[size_cumsum[x] : size_cumsum[x + 1]])
  519. for x in range(0, len(size_cumsum) - 1)
  520. ]
  521. # Setup nodes and graph name
  522. for block_id, nodes in enumerate(g.graph["partition"]):
  523. for node in nodes:
  524. g.add_node(node, block=block_id)
  525. g.name = "stochastic_block_model"
  526. # Test for edge existence
  527. parts = g.graph["partition"]
  528. for i, j in block_iter:
  529. if i == j:
  530. if directed:
  531. if selfloops:
  532. edges = itertools.product(parts[i], parts[i])
  533. else:
  534. edges = itertools.permutations(parts[i], 2)
  535. else:
  536. edges = itertools.combinations(parts[i], 2)
  537. if selfloops:
  538. edges = itertools.chain(edges, zip(parts[i], parts[i]))
  539. for e in edges:
  540. if seed.random() < p[i][j]:
  541. g.add_edge(*e)
  542. else:
  543. edges = itertools.product(parts[i], parts[j])
  544. if sparse:
  545. if p[i][j] == 1: # Test edges cases p_ij = 0 or 1
  546. for e in edges:
  547. g.add_edge(*e)
  548. elif p[i][j] > 0:
  549. while True:
  550. try:
  551. logrand = math.log(seed.random())
  552. skip = math.floor(logrand / math.log(1 - p[i][j]))
  553. # consume "skip" edges
  554. next(itertools.islice(edges, skip, skip), None)
  555. e = next(edges)
  556. g.add_edge(*e) # __safe
  557. except StopIteration:
  558. break
  559. else:
  560. for e in edges:
  561. if seed.random() < p[i][j]:
  562. g.add_edge(*e) # __safe
  563. return g
  564. def _zipf_rv_below(gamma, xmin, threshold, seed):
  565. """Returns a random value chosen from the bounded Zipf distribution.
  566. Repeatedly draws values from the Zipf distribution until the
  567. threshold is met, then returns that value.
  568. """
  569. result = nx.utils.zipf_rv(gamma, xmin, seed)
  570. while result > threshold:
  571. result = nx.utils.zipf_rv(gamma, xmin, seed)
  572. return result
  573. def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed):
  574. """Returns a list of numbers obeying a constrained power law distribution.
  575. ``gamma`` and ``low`` are the parameters for the Zipf distribution.
  576. ``high`` is the maximum allowed value for values draw from the Zipf
  577. distribution. For more information, see :func:`_zipf_rv_below`.
  578. ``condition`` and ``length`` are Boolean-valued functions on
  579. lists. While generating the list, random values are drawn and
  580. appended to the list until ``length`` is satisfied by the created
  581. list. Once ``condition`` is satisfied, the sequence generated in
  582. this way is returned.
  583. ``max_iters`` indicates the number of times to generate a list
  584. satisfying ``length``. If the number of iterations exceeds this
  585. value, :exc:`~networkx.exception.ExceededMaxIterations` is raised.
  586. seed : integer, random_state, or None (default)
  587. Indicator of random number generation state.
  588. See :ref:`Randomness<randomness>`.
  589. """
  590. for i in range(max_iters):
  591. seq = []
  592. while not length(seq):
  593. seq.append(_zipf_rv_below(gamma, low, high, seed))
  594. if condition(seq):
  595. return seq
  596. raise nx.ExceededMaxIterations("Could not create power law sequence")
  597. def _hurwitz_zeta(x, q, tolerance):
  598. """The Hurwitz zeta function, or the Riemann zeta function of two arguments.
  599. ``x`` must be greater than one and ``q`` must be positive.
  600. This function repeatedly computes subsequent partial sums until
  601. convergence, as decided by ``tolerance``.
  602. """
  603. z = 0
  604. z_prev = -float("inf")
  605. k = 0
  606. while abs(z - z_prev) > tolerance:
  607. z_prev = z
  608. z += 1 / ((k + q) ** x)
  609. k += 1
  610. return z
  611. def _generate_min_degree(gamma, average_degree, max_degree, tolerance, max_iters):
  612. """Returns a minimum degree from the given average degree."""
  613. # Defines zeta function whether or not Scipy is available
  614. try:
  615. from scipy.special import zeta
  616. except ImportError:
  617. def zeta(x, q):
  618. return _hurwitz_zeta(x, q, tolerance)
  619. min_deg_top = max_degree
  620. min_deg_bot = 1
  621. min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
  622. itrs = 0
  623. mid_avg_deg = 0
  624. while abs(mid_avg_deg - average_degree) > tolerance:
  625. if itrs > max_iters:
  626. raise nx.ExceededMaxIterations("Could not match average_degree")
  627. mid_avg_deg = 0
  628. for x in range(int(min_deg_mid), max_degree + 1):
  629. mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid)
  630. if mid_avg_deg > average_degree:
  631. min_deg_top = min_deg_mid
  632. min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
  633. else:
  634. min_deg_bot = min_deg_mid
  635. min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
  636. itrs += 1
  637. # return int(min_deg_mid + 0.5)
  638. return round(min_deg_mid)
  639. def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed):
  640. """Returns a list of sets, each of which represents a community.
  641. ``degree_seq`` is the degree sequence that must be met by the
  642. graph.
  643. ``community_sizes`` is the community size distribution that must be
  644. met by the generated list of sets.
  645. ``mu`` is a float in the interval [0, 1] indicating the fraction of
  646. intra-community edges incident to each node.
  647. ``max_iters`` is the number of times to try to add a node to a
  648. community. This must be greater than the length of
  649. ``degree_seq``, otherwise this function will always fail. If
  650. the number of iterations exceeds this value,
  651. :exc:`~networkx.exception.ExceededMaxIterations` is raised.
  652. seed : integer, random_state, or None (default)
  653. Indicator of random number generation state.
  654. See :ref:`Randomness<randomness>`.
  655. The communities returned by this are sets of integers in the set {0,
  656. ..., *n* - 1}, where *n* is the length of ``degree_seq``.
  657. """
  658. # This assumes the nodes in the graph will be natural numbers.
  659. result = [set() for _ in community_sizes]
  660. n = len(degree_seq)
  661. free = list(range(n))
  662. for i in range(max_iters):
  663. v = free.pop()
  664. c = seed.choice(range(len(community_sizes)))
  665. # s = int(degree_seq[v] * (1 - mu) + 0.5)
  666. s = round(degree_seq[v] * (1 - mu))
  667. # If the community is large enough, add the node to the chosen
  668. # community. Otherwise, return it to the list of unaffiliated
  669. # nodes.
  670. if s < community_sizes[c]:
  671. result[c].add(v)
  672. else:
  673. free.append(v)
  674. # If the community is too big, remove a node from it.
  675. if len(result[c]) > community_sizes[c]:
  676. free.append(result[c].pop())
  677. if not free:
  678. return result
  679. msg = "Could not assign communities; try increasing min_community"
  680. raise nx.ExceededMaxIterations(msg)
  681. @py_random_state(11)
  682. def LFR_benchmark_graph(
  683. n,
  684. tau1,
  685. tau2,
  686. mu,
  687. average_degree=None,
  688. min_degree=None,
  689. max_degree=None,
  690. min_community=None,
  691. max_community=None,
  692. tol=1.0e-7,
  693. max_iters=500,
  694. seed=None,
  695. ):
  696. r"""Returns the LFR benchmark graph.
  697. This algorithm proceeds as follows:
  698. 1) Find a degree sequence with a power law distribution, and minimum
  699. value ``min_degree``, which has approximate average degree
  700. ``average_degree``. This is accomplished by either
  701. a) specifying ``min_degree`` and not ``average_degree``,
  702. b) specifying ``average_degree`` and not ``min_degree``, in which
  703. case a suitable minimum degree will be found.
  704. ``max_degree`` can also be specified, otherwise it will be set to
  705. ``n``. Each node *u* will have $\mu \mathrm{deg}(u)$ edges
  706. joining it to nodes in communities other than its own and $(1 -
  707. \mu) \mathrm{deg}(u)$ edges joining it to nodes in its own
  708. community.
  709. 2) Generate community sizes according to a power law distribution
  710. with exponent ``tau2``. If ``min_community`` and
  711. ``max_community`` are not specified they will be selected to be
  712. ``min_degree`` and ``max_degree``, respectively. Community sizes
  713. are generated until the sum of their sizes equals ``n``.
  714. 3) Each node will be randomly assigned a community with the
  715. condition that the community is large enough for the node's
  716. intra-community degree, $(1 - \mu) \mathrm{deg}(u)$ as
  717. described in step 2. If a community grows too large, a random node
  718. will be selected for reassignment to a new community, until all
  719. nodes have been assigned a community.
  720. 4) Each node *u* then adds $(1 - \mu) \mathrm{deg}(u)$
  721. intra-community edges and $\mu \mathrm{deg}(u)$ inter-community
  722. edges.
  723. Parameters
  724. ----------
  725. n : int
  726. Number of nodes in the created graph.
  727. tau1 : float
  728. Power law exponent for the degree distribution of the created
  729. graph. This value must be strictly greater than one.
  730. tau2 : float
  731. Power law exponent for the community size distribution in the
  732. created graph. This value must be strictly greater than one.
  733. mu : float
  734. Fraction of inter-community edges incident to each node. This
  735. value must be in the interval [0, 1].
  736. average_degree : float
  737. Desired average degree of nodes in the created graph. This value
  738. must be in the interval [0, *n*]. Exactly one of this and
  739. ``min_degree`` must be specified, otherwise a
  740. :exc:`NetworkXError` is raised.
  741. min_degree : int
  742. Minimum degree of nodes in the created graph. This value must be
  743. in the interval [0, *n*]. Exactly one of this and
  744. ``average_degree`` must be specified, otherwise a
  745. :exc:`NetworkXError` is raised.
  746. max_degree : int
  747. Maximum degree of nodes in the created graph. If not specified,
  748. this is set to ``n``, the total number of nodes in the graph.
  749. min_community : int
  750. Minimum size of communities in the graph. If not specified, this
  751. is set to ``min_degree``.
  752. max_community : int
  753. Maximum size of communities in the graph. If not specified, this
  754. is set to ``n``, the total number of nodes in the graph.
  755. tol : float
  756. Tolerance when comparing floats, specifically when comparing
  757. average degree values.
  758. max_iters : int
  759. Maximum number of iterations to try to create the community sizes,
  760. degree distribution, and community affiliations.
  761. seed : integer, random_state, or None (default)
  762. Indicator of random number generation state.
  763. See :ref:`Randomness<randomness>`.
  764. Returns
  765. -------
  766. G : NetworkX graph
  767. The LFR benchmark graph generated according to the specified
  768. parameters.
  769. Each node in the graph has a node attribute ``'community'`` that
  770. stores the community (that is, the set of nodes) that includes
  771. it.
  772. Raises
  773. ------
  774. NetworkXError
  775. If any of the parameters do not meet their upper and lower bounds:
  776. - ``tau1`` and ``tau2`` must be strictly greater than 1.
  777. - ``mu`` must be in [0, 1].
  778. - ``max_degree`` must be in {1, ..., *n*}.
  779. - ``min_community`` and ``max_community`` must be in {0, ...,
  780. *n*}.
  781. If not exactly one of ``average_degree`` and ``min_degree`` is
  782. specified.
  783. If ``min_degree`` is not specified and a suitable ``min_degree``
  784. cannot be found.
  785. ExceededMaxIterations
  786. If a valid degree sequence cannot be created within
  787. ``max_iters`` number of iterations.
  788. If a valid set of community sizes cannot be created within
  789. ``max_iters`` number of iterations.
  790. If a valid community assignment cannot be created within ``10 *
  791. n * max_iters`` number of iterations.
  792. Examples
  793. --------
  794. Basic usage::
  795. >>> from networkx.generators.community import LFR_benchmark_graph
  796. >>> n = 250
  797. >>> tau1 = 3
  798. >>> tau2 = 1.5
  799. >>> mu = 0.1
  800. >>> G = LFR_benchmark_graph(
  801. ... n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10
  802. ... )
  803. Continuing the example above, you can get the communities from the
  804. node attributes of the graph::
  805. >>> communities = {frozenset(G.nodes[v]["community"]) for v in G}
  806. Notes
  807. -----
  808. This algorithm differs slightly from the original way it was
  809. presented in [1].
  810. 1) Rather than connecting the graph via a configuration model then
  811. rewiring to match the intra-community and inter-community
  812. degrees, we do this wiring explicitly at the end, which should be
  813. equivalent.
  814. 2) The code posted on the author's website [2] calculates the random
  815. power law distributed variables and their average using
  816. continuous approximations, whereas we use the discrete
  817. distributions here as both degree and community size are
  818. discrete.
  819. Though the authors describe the algorithm as quite robust, testing
  820. during development indicates that a somewhat narrower parameter set
  821. is likely to successfully produce a graph. Some suggestions have
  822. been provided in the event of exceptions.
  823. References
  824. ----------
  825. .. [1] "Benchmark graphs for testing community detection algorithms",
  826. Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi,
  827. Phys. Rev. E 78, 046110 2008
  828. .. [2] https://www.santofortunato.net/resources
  829. """
  830. # Perform some basic parameter validation.
  831. if not tau1 > 1:
  832. raise nx.NetworkXError("tau1 must be greater than one")
  833. if not tau2 > 1:
  834. raise nx.NetworkXError("tau2 must be greater than one")
  835. if not 0 <= mu <= 1:
  836. raise nx.NetworkXError("mu must be in the interval [0, 1]")
  837. # Validate parameters for generating the degree sequence.
  838. if max_degree is None:
  839. max_degree = n
  840. elif not 0 < max_degree <= n:
  841. raise nx.NetworkXError("max_degree must be in the interval (0, n]")
  842. if not ((min_degree is None) ^ (average_degree is None)):
  843. raise nx.NetworkXError(
  844. "Must assign exactly one of min_degree and" " average_degree"
  845. )
  846. if min_degree is None:
  847. min_degree = _generate_min_degree(
  848. tau1, average_degree, max_degree, tol, max_iters
  849. )
  850. # Generate a degree sequence with a power law distribution.
  851. low, high = min_degree, max_degree
  852. def condition(seq):
  853. return sum(seq) % 2 == 0
  854. def length(seq):
  855. return len(seq) >= n
  856. deg_seq = _powerlaw_sequence(tau1, low, high, condition, length, max_iters, seed)
  857. # Validate parameters for generating the community size sequence.
  858. if min_community is None:
  859. min_community = min(deg_seq)
  860. if max_community is None:
  861. max_community = max(deg_seq)
  862. # Generate a community size sequence with a power law distribution.
  863. #
  864. # TODO The original code incremented the number of iterations each
  865. # time a new Zipf random value was drawn from the distribution. This
  866. # differed from the way the number of iterations was incremented in
  867. # `_powerlaw_degree_sequence`, so this code was changed to match
  868. # that one. As a result, this code is allowed many more chances to
  869. # generate a valid community size sequence.
  870. low, high = min_community, max_community
  871. def condition(seq):
  872. return sum(seq) == n
  873. def length(seq):
  874. return sum(seq) >= n
  875. comms = _powerlaw_sequence(tau2, low, high, condition, length, max_iters, seed)
  876. # Generate the communities based on the given degree sequence and
  877. # community sizes.
  878. max_iters *= 10 * n
  879. communities = _generate_communities(deg_seq, comms, mu, max_iters, seed)
  880. # Finally, generate the benchmark graph based on the given
  881. # communities, joining nodes according to the intra- and
  882. # inter-community degrees.
  883. G = nx.Graph()
  884. G.add_nodes_from(range(n))
  885. for c in communities:
  886. for u in c:
  887. while G.degree(u) < round(deg_seq[u] * (1 - mu)):
  888. v = seed.choice(list(c))
  889. G.add_edge(u, v)
  890. while G.degree(u) < deg_seq[u]:
  891. v = seed.choice(range(n))
  892. if v not in c:
  893. G.add_edge(u, v)
  894. G.nodes[u]["community"] = c
  895. return G