12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061 |
- """Generators for classes of graphs used in studying social networks."""
- import itertools
- import math
- import networkx as nx
- from networkx.utils import py_random_state
- __all__ = [
- "caveman_graph",
- "connected_caveman_graph",
- "relaxed_caveman_graph",
- "random_partition_graph",
- "planted_partition_graph",
- "gaussian_random_partition_graph",
- "ring_of_cliques",
- "windmill_graph",
- "stochastic_block_model",
- "LFR_benchmark_graph",
- ]
- def caveman_graph(l, k):
- """Returns a caveman graph of `l` cliques of size `k`.
- Parameters
- ----------
- l : int
- Number of cliques
- k : int
- Size of cliques
- Returns
- -------
- G : NetworkX Graph
- caveman graph
- Notes
- -----
- This returns an undirected graph, it can be converted to a directed
- graph using :func:`nx.to_directed`, or a multigraph using
- ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
- described in [1]_ and it is unclear which of the directed
- generalizations is most useful.
- Examples
- --------
- >>> G = nx.caveman_graph(3, 3)
- See also
- --------
- connected_caveman_graph
- References
- ----------
- .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
- Amer. J. Soc. 105, 493-527, 1999.
- """
- # l disjoint cliques of size k
- G = nx.empty_graph(l * k)
- if k > 1:
- for start in range(0, l * k, k):
- edges = itertools.combinations(range(start, start + k), 2)
- G.add_edges_from(edges)
- return G
- def connected_caveman_graph(l, k):
- """Returns a connected caveman graph of `l` cliques of size `k`.
- The connected caveman graph is formed by creating `n` cliques of size
- `k`, then a single edge in each clique is rewired to a node in an
- adjacent clique.
- Parameters
- ----------
- l : int
- number of cliques
- k : int
- size of cliques (k at least 2 or NetworkXError is raised)
- Returns
- -------
- G : NetworkX Graph
- connected caveman graph
- Raises
- ------
- NetworkXError
- If the size of cliques `k` is smaller than 2.
- Notes
- -----
- This returns an undirected graph, it can be converted to a directed
- graph using :func:`nx.to_directed`, or a multigraph using
- ``nx.MultiGraph(nx.caveman_graph(l, k))``. Only the undirected version is
- described in [1]_ and it is unclear which of the directed
- generalizations is most useful.
- Examples
- --------
- >>> G = nx.connected_caveman_graph(3, 3)
- References
- ----------
- .. [1] Watts, D. J. 'Networks, Dynamics, and the Small-World Phenomenon.'
- Amer. J. Soc. 105, 493-527, 1999.
- """
- if k < 2:
- raise nx.NetworkXError(
- "The size of cliques in a connected caveman graph " "must be at least 2."
- )
- G = nx.caveman_graph(l, k)
- for start in range(0, l * k, k):
- G.remove_edge(start, start + 1)
- G.add_edge(start, (start - 1) % (l * k))
- return G
- @py_random_state(3)
- def relaxed_caveman_graph(l, k, p, seed=None):
- """Returns a relaxed caveman graph.
- A relaxed caveman graph starts with `l` cliques of size `k`. Edges are
- then randomly rewired with probability `p` to link different cliques.
- Parameters
- ----------
- l : int
- Number of groups
- k : int
- Size of cliques
- p : float
- Probability of rewiring each edge.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Returns
- -------
- G : NetworkX Graph
- Relaxed Caveman Graph
- Raises
- ------
- NetworkXError
- If p is not in [0,1]
- Examples
- --------
- >>> G = nx.relaxed_caveman_graph(2, 3, 0.1, seed=42)
- References
- ----------
- .. [1] Santo Fortunato, Community Detection in Graphs,
- Physics Reports Volume 486, Issues 3-5, February 2010, Pages 75-174.
- https://arxiv.org/abs/0906.0612
- """
- G = nx.caveman_graph(l, k)
- nodes = list(G)
- for u, v in G.edges():
- if seed.random() < p: # rewire the edge
- x = seed.choice(nodes)
- if G.has_edge(u, x):
- continue
- G.remove_edge(u, v)
- G.add_edge(u, x)
- return G
- @py_random_state(3)
- def random_partition_graph(sizes, p_in, p_out, seed=None, directed=False):
- """Returns the random partition graph with a partition of sizes.
- A partition graph is a graph of communities with sizes defined by
- s in sizes. Nodes in the same group are connected with probability
- p_in and nodes of different groups are connected with probability
- p_out.
- Parameters
- ----------
- sizes : list of ints
- Sizes of groups
- p_in : float
- probability of edges with in groups
- p_out : float
- probability of edges between groups
- directed : boolean optional, default=False
- Whether to create a directed graph
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Returns
- -------
- G : NetworkX Graph or DiGraph
- random partition graph of size sum(gs)
- Raises
- ------
- NetworkXError
- If p_in or p_out is not in [0,1]
- Examples
- --------
- >>> G = nx.random_partition_graph([10, 10, 10], 0.25, 0.01)
- >>> len(G)
- 30
- >>> partition = G.graph["partition"]
- >>> len(partition)
- 3
- Notes
- -----
- This is a generalization of the planted-l-partition described in
- [1]_. It allows for the creation of groups of any size.
- The partition is store as a graph attribute 'partition'.
- References
- ----------
- .. [1] Santo Fortunato 'Community Detection in Graphs' Physical Reports
- Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
- """
- # Use geometric method for O(n+m) complexity algorithm
- # partition = nx.community_sets(nx.get_node_attributes(G, 'affiliation'))
- if not 0.0 <= p_in <= 1.0:
- raise nx.NetworkXError("p_in must be in [0,1]")
- if not 0.0 <= p_out <= 1.0:
- raise nx.NetworkXError("p_out must be in [0,1]")
- # create connection matrix
- num_blocks = len(sizes)
- p = [[p_out for s in range(num_blocks)] for r in range(num_blocks)]
- for r in range(num_blocks):
- p[r][r] = p_in
- return stochastic_block_model(
- sizes,
- p,
- nodelist=None,
- seed=seed,
- directed=directed,
- selfloops=False,
- sparse=True,
- )
- @py_random_state(4)
- def planted_partition_graph(l, k, p_in, p_out, seed=None, directed=False):
- """Returns the planted l-partition graph.
- This model partitions a graph with n=l*k vertices in
- l groups with k vertices each. Vertices of the same
- group are linked with a probability p_in, and vertices
- of different groups are linked with probability p_out.
- Parameters
- ----------
- l : int
- Number of groups
- k : int
- Number of vertices in each group
- p_in : float
- probability of connecting vertices within a group
- p_out : float
- probability of connected vertices between groups
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- directed : bool,optional (default=False)
- If True return a directed graph
- Returns
- -------
- G : NetworkX Graph or DiGraph
- planted l-partition graph
- Raises
- ------
- NetworkXError
- If p_in,p_out are not in [0,1] or
- Examples
- --------
- >>> G = nx.planted_partition_graph(4, 3, 0.5, 0.1, seed=42)
- See Also
- --------
- random_partition_model
- References
- ----------
- .. [1] A. Condon, R.M. Karp, Algorithms for graph partitioning
- on the planted partition model,
- Random Struct. Algor. 18 (2001) 116-140.
- .. [2] Santo Fortunato 'Community Detection in Graphs' Physical Reports
- Volume 486, Issue 3-5 p. 75-174. https://arxiv.org/abs/0906.0612
- """
- return random_partition_graph([k] * l, p_in, p_out, seed=seed, directed=directed)
- @py_random_state(6)
- def gaussian_random_partition_graph(n, s, v, p_in, p_out, directed=False, seed=None):
- """Generate a Gaussian random partition graph.
- A Gaussian random partition graph is created by creating k partitions
- each with a size drawn from a normal distribution with mean s and variance
- s/v. Nodes are connected within clusters with probability p_in and
- between clusters with probability p_out[1]
- Parameters
- ----------
- n : int
- Number of nodes in the graph
- s : float
- Mean cluster size
- v : float
- Shape parameter. The variance of cluster size distribution is s/v.
- p_in : float
- Probability of intra cluster connection.
- p_out : float
- Probability of inter cluster connection.
- directed : boolean, optional default=False
- Whether to create a directed graph or not
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Returns
- -------
- G : NetworkX Graph or DiGraph
- gaussian random partition graph
- Raises
- ------
- NetworkXError
- If s is > n
- If p_in or p_out is not in [0,1]
- Notes
- -----
- Note the number of partitions is dependent on s,v and n, and that the
- last partition may be considerably smaller, as it is sized to simply
- fill out the nodes [1]
- See Also
- --------
- random_partition_graph
- Examples
- --------
- >>> G = nx.gaussian_random_partition_graph(100, 10, 10, 0.25, 0.1)
- >>> len(G)
- 100
- References
- ----------
- .. [1] Ulrik Brandes, Marco Gaertler, Dorothea Wagner,
- Experiments on Graph Clustering Algorithms,
- In the proceedings of the 11th Europ. Symp. Algorithms, 2003.
- """
- if s > n:
- raise nx.NetworkXError("s must be <= n")
- assigned = 0
- sizes = []
- while True:
- size = int(seed.gauss(s, s / v + 0.5))
- if size < 1: # how to handle 0 or negative sizes?
- continue
- if assigned + size >= n:
- sizes.append(n - assigned)
- break
- assigned += size
- sizes.append(size)
- return random_partition_graph(sizes, p_in, p_out, seed=seed, directed=directed)
- def ring_of_cliques(num_cliques, clique_size):
- """Defines a "ring of cliques" graph.
- A ring of cliques graph is consisting of cliques, connected through single
- links. Each clique is a complete graph.
- Parameters
- ----------
- num_cliques : int
- Number of cliques
- clique_size : int
- Size of cliques
- Returns
- -------
- G : NetworkX Graph
- ring of cliques graph
- Raises
- ------
- NetworkXError
- If the number of cliques is lower than 2 or
- if the size of cliques is smaller than 2.
- Examples
- --------
- >>> G = nx.ring_of_cliques(8, 4)
- See Also
- --------
- connected_caveman_graph
- Notes
- -----
- The `connected_caveman_graph` graph removes a link from each clique to
- connect it with the next clique. Instead, the `ring_of_cliques` graph
- simply adds the link without removing any link from the cliques.
- """
- if num_cliques < 2:
- raise nx.NetworkXError("A ring of cliques must have at least " "two cliques")
- if clique_size < 2:
- raise nx.NetworkXError("The cliques must have at least two nodes")
- G = nx.Graph()
- for i in range(num_cliques):
- edges = itertools.combinations(
- range(i * clique_size, i * clique_size + clique_size), 2
- )
- G.add_edges_from(edges)
- G.add_edge(
- i * clique_size + 1, (i + 1) * clique_size % (num_cliques * clique_size)
- )
- return G
- def windmill_graph(n, k):
- """Generate a windmill graph.
- A windmill graph is a graph of `n` cliques each of size `k` that are all
- joined at one node.
- It can be thought of as taking a disjoint union of `n` cliques of size `k`,
- selecting one point from each, and contracting all of the selected points.
- Alternatively, one could generate `n` cliques of size `k-1` and one node
- that is connected to all other nodes in the graph.
- Parameters
- ----------
- n : int
- Number of cliques
- k : int
- Size of cliques
- Returns
- -------
- G : NetworkX Graph
- windmill graph with n cliques of size k
- Raises
- ------
- NetworkXError
- If the number of cliques is less than two
- If the size of the cliques are less than two
- Examples
- --------
- >>> G = nx.windmill_graph(4, 5)
- Notes
- -----
- The node labeled `0` will be the node connected to all other nodes.
- Note that windmill graphs are usually denoted `Wd(k,n)`, so the parameters
- are in the opposite order as the parameters of this method.
- """
- if n < 2:
- msg = "A windmill graph must have at least two cliques"
- raise nx.NetworkXError(msg)
- if k < 2:
- raise nx.NetworkXError("The cliques must have at least two nodes")
- G = nx.disjoint_union_all(
- itertools.chain(
- [nx.complete_graph(k)], (nx.complete_graph(k - 1) for _ in range(n - 1))
- )
- )
- G.add_edges_from((0, i) for i in range(k, G.number_of_nodes()))
- return G
- @py_random_state(3)
- def stochastic_block_model(
- sizes, p, nodelist=None, seed=None, directed=False, selfloops=False, sparse=True
- ):
- """Returns a stochastic block model graph.
- This model partitions the nodes in blocks of arbitrary sizes, and places
- edges between pairs of nodes independently, with a probability that depends
- on the blocks.
- Parameters
- ----------
- sizes : list of ints
- Sizes of blocks
- p : list of list of floats
- Element (r,s) gives the density of edges going from the nodes
- of group r to nodes of group s.
- p must match the number of groups (len(sizes) == len(p)),
- and it must be symmetric if the graph is undirected.
- nodelist : list, optional
- The block tags are assigned according to the node identifiers
- in nodelist. If nodelist is None, then the ordering is the
- range [0,sum(sizes)-1].
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- directed : boolean optional, default=False
- Whether to create a directed graph or not.
- selfloops : boolean optional, default=False
- Whether to include self-loops or not.
- sparse: boolean optional, default=True
- Use the sparse heuristic to speed up the generator.
- Returns
- -------
- g : NetworkX Graph or DiGraph
- Stochastic block model graph of size sum(sizes)
- Raises
- ------
- NetworkXError
- If probabilities are not in [0,1].
- If the probability matrix is not square (directed case).
- If the probability matrix is not symmetric (undirected case).
- If the sizes list does not match nodelist or the probability matrix.
- If nodelist contains duplicate.
- Examples
- --------
- >>> sizes = [75, 75, 300]
- >>> probs = [[0.25, 0.05, 0.02], [0.05, 0.35, 0.07], [0.02, 0.07, 0.40]]
- >>> g = nx.stochastic_block_model(sizes, probs, seed=0)
- >>> len(g)
- 450
- >>> H = nx.quotient_graph(g, g.graph["partition"], relabel=True)
- >>> for v in H.nodes(data=True):
- ... print(round(v[1]["density"], 3))
- ...
- 0.245
- 0.348
- 0.405
- >>> for v in H.edges(data=True):
- ... print(round(1.0 * v[2]["weight"] / (sizes[v[0]] * sizes[v[1]]), 3))
- ...
- 0.051
- 0.022
- 0.07
- See Also
- --------
- random_partition_graph
- planted_partition_graph
- gaussian_random_partition_graph
- gnp_random_graph
- References
- ----------
- .. [1] Holland, P. W., Laskey, K. B., & Leinhardt, S.,
- "Stochastic blockmodels: First steps",
- Social networks, 5(2), 109-137, 1983.
- """
- # Check if dimensions match
- if len(sizes) != len(p):
- raise nx.NetworkXException("'sizes' and 'p' do not match.")
- # Check for probability symmetry (undirected) and shape (directed)
- for row in p:
- if len(p) != len(row):
- raise nx.NetworkXException("'p' must be a square matrix.")
- if not directed:
- p_transpose = [list(i) for i in zip(*p)]
- for i in zip(p, p_transpose):
- for j in zip(i[0], i[1]):
- if abs(j[0] - j[1]) > 1e-08:
- raise nx.NetworkXException("'p' must be symmetric.")
- # Check for probability range
- for row in p:
- for prob in row:
- if prob < 0 or prob > 1:
- raise nx.NetworkXException("Entries of 'p' not in [0,1].")
- # Check for nodelist consistency
- if nodelist is not None:
- if len(nodelist) != sum(sizes):
- raise nx.NetworkXException("'nodelist' and 'sizes' do not match.")
- if len(nodelist) != len(set(nodelist)):
- raise nx.NetworkXException("nodelist contains duplicate.")
- else:
- nodelist = range(0, sum(sizes))
- # Setup the graph conditionally to the directed switch.
- block_range = range(len(sizes))
- if directed:
- g = nx.DiGraph()
- block_iter = itertools.product(block_range, block_range)
- else:
- g = nx.Graph()
- block_iter = itertools.combinations_with_replacement(block_range, 2)
- # Split nodelist in a partition (list of sets).
- size_cumsum = [sum(sizes[0:x]) for x in range(0, len(sizes) + 1)]
- g.graph["partition"] = [
- set(nodelist[size_cumsum[x] : size_cumsum[x + 1]])
- for x in range(0, len(size_cumsum) - 1)
- ]
- # Setup nodes and graph name
- for block_id, nodes in enumerate(g.graph["partition"]):
- for node in nodes:
- g.add_node(node, block=block_id)
- g.name = "stochastic_block_model"
- # Test for edge existence
- parts = g.graph["partition"]
- for i, j in block_iter:
- if i == j:
- if directed:
- if selfloops:
- edges = itertools.product(parts[i], parts[i])
- else:
- edges = itertools.permutations(parts[i], 2)
- else:
- edges = itertools.combinations(parts[i], 2)
- if selfloops:
- edges = itertools.chain(edges, zip(parts[i], parts[i]))
- for e in edges:
- if seed.random() < p[i][j]:
- g.add_edge(*e)
- else:
- edges = itertools.product(parts[i], parts[j])
- if sparse:
- if p[i][j] == 1: # Test edges cases p_ij = 0 or 1
- for e in edges:
- g.add_edge(*e)
- elif p[i][j] > 0:
- while True:
- try:
- logrand = math.log(seed.random())
- skip = math.floor(logrand / math.log(1 - p[i][j]))
- # consume "skip" edges
- next(itertools.islice(edges, skip, skip), None)
- e = next(edges)
- g.add_edge(*e) # __safe
- except StopIteration:
- break
- else:
- for e in edges:
- if seed.random() < p[i][j]:
- g.add_edge(*e) # __safe
- return g
- def _zipf_rv_below(gamma, xmin, threshold, seed):
- """Returns a random value chosen from the bounded Zipf distribution.
- Repeatedly draws values from the Zipf distribution until the
- threshold is met, then returns that value.
- """
- result = nx.utils.zipf_rv(gamma, xmin, seed)
- while result > threshold:
- result = nx.utils.zipf_rv(gamma, xmin, seed)
- return result
- def _powerlaw_sequence(gamma, low, high, condition, length, max_iters, seed):
- """Returns a list of numbers obeying a constrained power law distribution.
- ``gamma`` and ``low`` are the parameters for the Zipf distribution.
- ``high`` is the maximum allowed value for values draw from the Zipf
- distribution. For more information, see :func:`_zipf_rv_below`.
- ``condition`` and ``length`` are Boolean-valued functions on
- lists. While generating the list, random values are drawn and
- appended to the list until ``length`` is satisfied by the created
- list. Once ``condition`` is satisfied, the sequence generated in
- this way is returned.
- ``max_iters`` indicates the number of times to generate a list
- satisfying ``length``. If the number of iterations exceeds this
- value, :exc:`~networkx.exception.ExceededMaxIterations` is raised.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- """
- for i in range(max_iters):
- seq = []
- while not length(seq):
- seq.append(_zipf_rv_below(gamma, low, high, seed))
- if condition(seq):
- return seq
- raise nx.ExceededMaxIterations("Could not create power law sequence")
- def _hurwitz_zeta(x, q, tolerance):
- """The Hurwitz zeta function, or the Riemann zeta function of two arguments.
- ``x`` must be greater than one and ``q`` must be positive.
- This function repeatedly computes subsequent partial sums until
- convergence, as decided by ``tolerance``.
- """
- z = 0
- z_prev = -float("inf")
- k = 0
- while abs(z - z_prev) > tolerance:
- z_prev = z
- z += 1 / ((k + q) ** x)
- k += 1
- return z
- def _generate_min_degree(gamma, average_degree, max_degree, tolerance, max_iters):
- """Returns a minimum degree from the given average degree."""
- # Defines zeta function whether or not Scipy is available
- try:
- from scipy.special import zeta
- except ImportError:
- def zeta(x, q):
- return _hurwitz_zeta(x, q, tolerance)
- min_deg_top = max_degree
- min_deg_bot = 1
- min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
- itrs = 0
- mid_avg_deg = 0
- while abs(mid_avg_deg - average_degree) > tolerance:
- if itrs > max_iters:
- raise nx.ExceededMaxIterations("Could not match average_degree")
- mid_avg_deg = 0
- for x in range(int(min_deg_mid), max_degree + 1):
- mid_avg_deg += (x ** (-gamma + 1)) / zeta(gamma, min_deg_mid)
- if mid_avg_deg > average_degree:
- min_deg_top = min_deg_mid
- min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
- else:
- min_deg_bot = min_deg_mid
- min_deg_mid = (min_deg_top - min_deg_bot) / 2 + min_deg_bot
- itrs += 1
- # return int(min_deg_mid + 0.5)
- return round(min_deg_mid)
- def _generate_communities(degree_seq, community_sizes, mu, max_iters, seed):
- """Returns a list of sets, each of which represents a community.
- ``degree_seq`` is the degree sequence that must be met by the
- graph.
- ``community_sizes`` is the community size distribution that must be
- met by the generated list of sets.
- ``mu`` is a float in the interval [0, 1] indicating the fraction of
- intra-community edges incident to each node.
- ``max_iters`` is the number of times to try to add a node to a
- community. This must be greater than the length of
- ``degree_seq``, otherwise this function will always fail. If
- the number of iterations exceeds this value,
- :exc:`~networkx.exception.ExceededMaxIterations` is raised.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- The communities returned by this are sets of integers in the set {0,
- ..., *n* - 1}, where *n* is the length of ``degree_seq``.
- """
- # This assumes the nodes in the graph will be natural numbers.
- result = [set() for _ in community_sizes]
- n = len(degree_seq)
- free = list(range(n))
- for i in range(max_iters):
- v = free.pop()
- c = seed.choice(range(len(community_sizes)))
- # s = int(degree_seq[v] * (1 - mu) + 0.5)
- s = round(degree_seq[v] * (1 - mu))
- # If the community is large enough, add the node to the chosen
- # community. Otherwise, return it to the list of unaffiliated
- # nodes.
- if s < community_sizes[c]:
- result[c].add(v)
- else:
- free.append(v)
- # If the community is too big, remove a node from it.
- if len(result[c]) > community_sizes[c]:
- free.append(result[c].pop())
- if not free:
- return result
- msg = "Could not assign communities; try increasing min_community"
- raise nx.ExceededMaxIterations(msg)
- @py_random_state(11)
- def LFR_benchmark_graph(
- n,
- tau1,
- tau2,
- mu,
- average_degree=None,
- min_degree=None,
- max_degree=None,
- min_community=None,
- max_community=None,
- tol=1.0e-7,
- max_iters=500,
- seed=None,
- ):
- r"""Returns the LFR benchmark graph.
- This algorithm proceeds as follows:
- 1) Find a degree sequence with a power law distribution, and minimum
- value ``min_degree``, which has approximate average degree
- ``average_degree``. This is accomplished by either
- a) specifying ``min_degree`` and not ``average_degree``,
- b) specifying ``average_degree`` and not ``min_degree``, in which
- case a suitable minimum degree will be found.
- ``max_degree`` can also be specified, otherwise it will be set to
- ``n``. Each node *u* will have $\mu \mathrm{deg}(u)$ edges
- joining it to nodes in communities other than its own and $(1 -
- \mu) \mathrm{deg}(u)$ edges joining it to nodes in its own
- community.
- 2) Generate community sizes according to a power law distribution
- with exponent ``tau2``. If ``min_community`` and
- ``max_community`` are not specified they will be selected to be
- ``min_degree`` and ``max_degree``, respectively. Community sizes
- are generated until the sum of their sizes equals ``n``.
- 3) Each node will be randomly assigned a community with the
- condition that the community is large enough for the node's
- intra-community degree, $(1 - \mu) \mathrm{deg}(u)$ as
- described in step 2. If a community grows too large, a random node
- will be selected for reassignment to a new community, until all
- nodes have been assigned a community.
- 4) Each node *u* then adds $(1 - \mu) \mathrm{deg}(u)$
- intra-community edges and $\mu \mathrm{deg}(u)$ inter-community
- edges.
- Parameters
- ----------
- n : int
- Number of nodes in the created graph.
- tau1 : float
- Power law exponent for the degree distribution of the created
- graph. This value must be strictly greater than one.
- tau2 : float
- Power law exponent for the community size distribution in the
- created graph. This value must be strictly greater than one.
- mu : float
- Fraction of inter-community edges incident to each node. This
- value must be in the interval [0, 1].
- average_degree : float
- Desired average degree of nodes in the created graph. This value
- must be in the interval [0, *n*]. Exactly one of this and
- ``min_degree`` must be specified, otherwise a
- :exc:`NetworkXError` is raised.
- min_degree : int
- Minimum degree of nodes in the created graph. This value must be
- in the interval [0, *n*]. Exactly one of this and
- ``average_degree`` must be specified, otherwise a
- :exc:`NetworkXError` is raised.
- max_degree : int
- Maximum degree of nodes in the created graph. If not specified,
- this is set to ``n``, the total number of nodes in the graph.
- min_community : int
- Minimum size of communities in the graph. If not specified, this
- is set to ``min_degree``.
- max_community : int
- Maximum size of communities in the graph. If not specified, this
- is set to ``n``, the total number of nodes in the graph.
- tol : float
- Tolerance when comparing floats, specifically when comparing
- average degree values.
- max_iters : int
- Maximum number of iterations to try to create the community sizes,
- degree distribution, and community affiliations.
- seed : integer, random_state, or None (default)
- Indicator of random number generation state.
- See :ref:`Randomness<randomness>`.
- Returns
- -------
- G : NetworkX graph
- The LFR benchmark graph generated according to the specified
- parameters.
- Each node in the graph has a node attribute ``'community'`` that
- stores the community (that is, the set of nodes) that includes
- it.
- Raises
- ------
- NetworkXError
- If any of the parameters do not meet their upper and lower bounds:
- - ``tau1`` and ``tau2`` must be strictly greater than 1.
- - ``mu`` must be in [0, 1].
- - ``max_degree`` must be in {1, ..., *n*}.
- - ``min_community`` and ``max_community`` must be in {0, ...,
- *n*}.
- If not exactly one of ``average_degree`` and ``min_degree`` is
- specified.
- If ``min_degree`` is not specified and a suitable ``min_degree``
- cannot be found.
- ExceededMaxIterations
- If a valid degree sequence cannot be created within
- ``max_iters`` number of iterations.
- If a valid set of community sizes cannot be created within
- ``max_iters`` number of iterations.
- If a valid community assignment cannot be created within ``10 *
- n * max_iters`` number of iterations.
- Examples
- --------
- Basic usage::
- >>> from networkx.generators.community import LFR_benchmark_graph
- >>> n = 250
- >>> tau1 = 3
- >>> tau2 = 1.5
- >>> mu = 0.1
- >>> G = LFR_benchmark_graph(
- ... n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10
- ... )
- Continuing the example above, you can get the communities from the
- node attributes of the graph::
- >>> communities = {frozenset(G.nodes[v]["community"]) for v in G}
- Notes
- -----
- This algorithm differs slightly from the original way it was
- presented in [1].
- 1) Rather than connecting the graph via a configuration model then
- rewiring to match the intra-community and inter-community
- degrees, we do this wiring explicitly at the end, which should be
- equivalent.
- 2) The code posted on the author's website [2] calculates the random
- power law distributed variables and their average using
- continuous approximations, whereas we use the discrete
- distributions here as both degree and community size are
- discrete.
- Though the authors describe the algorithm as quite robust, testing
- during development indicates that a somewhat narrower parameter set
- is likely to successfully produce a graph. Some suggestions have
- been provided in the event of exceptions.
- References
- ----------
- .. [1] "Benchmark graphs for testing community detection algorithms",
- Andrea Lancichinetti, Santo Fortunato, and Filippo Radicchi,
- Phys. Rev. E 78, 046110 2008
- .. [2] https://www.santofortunato.net/resources
- """
- # Perform some basic parameter validation.
- if not tau1 > 1:
- raise nx.NetworkXError("tau1 must be greater than one")
- if not tau2 > 1:
- raise nx.NetworkXError("tau2 must be greater than one")
- if not 0 <= mu <= 1:
- raise nx.NetworkXError("mu must be in the interval [0, 1]")
- # Validate parameters for generating the degree sequence.
- if max_degree is None:
- max_degree = n
- elif not 0 < max_degree <= n:
- raise nx.NetworkXError("max_degree must be in the interval (0, n]")
- if not ((min_degree is None) ^ (average_degree is None)):
- raise nx.NetworkXError(
- "Must assign exactly one of min_degree and" " average_degree"
- )
- if min_degree is None:
- min_degree = _generate_min_degree(
- tau1, average_degree, max_degree, tol, max_iters
- )
- # Generate a degree sequence with a power law distribution.
- low, high = min_degree, max_degree
- def condition(seq):
- return sum(seq) % 2 == 0
- def length(seq):
- return len(seq) >= n
- deg_seq = _powerlaw_sequence(tau1, low, high, condition, length, max_iters, seed)
- # Validate parameters for generating the community size sequence.
- if min_community is None:
- min_community = min(deg_seq)
- if max_community is None:
- max_community = max(deg_seq)
- # Generate a community size sequence with a power law distribution.
- #
- # TODO The original code incremented the number of iterations each
- # time a new Zipf random value was drawn from the distribution. This
- # differed from the way the number of iterations was incremented in
- # `_powerlaw_degree_sequence`, so this code was changed to match
- # that one. As a result, this code is allowed many more chances to
- # generate a valid community size sequence.
- low, high = min_community, max_community
- def condition(seq):
- return sum(seq) == n
- def length(seq):
- return sum(seq) >= n
- comms = _powerlaw_sequence(tau2, low, high, condition, length, max_iters, seed)
- # Generate the communities based on the given degree sequence and
- # community sizes.
- max_iters *= 10 * n
- communities = _generate_communities(deg_seq, comms, mu, max_iters, seed)
- # Finally, generate the benchmark graph based on the given
- # communities, joining nodes according to the intra- and
- # inter-community degrees.
- G = nx.Graph()
- G.add_nodes_from(range(n))
- for c in communities:
- for u in c:
- while G.degree(u) < round(deg_seq[u] * (1 - mu)):
- v = seed.choice(list(c))
- G.add_edge(u, v)
- while G.degree(u) < deg_seq[u]:
- v = seed.choice(range(n))
- if v not in c:
- G.add_edge(u, v)
- G.nodes[u]["community"] = c
- return G
|