cluster.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. """Algorithms to characterize the number of triangles in a graph."""
  2. from collections import Counter
  3. from itertools import chain, combinations
  4. import networkx as nx
  5. from networkx.utils import not_implemented_for
  6. __all__ = [
  7. "triangles",
  8. "average_clustering",
  9. "clustering",
  10. "transitivity",
  11. "square_clustering",
  12. "generalized_degree",
  13. ]
  14. @nx._dispatch("triangles")
  15. @not_implemented_for("directed")
  16. def triangles(G, nodes=None):
  17. """Compute the number of triangles.
  18. Finds the number of triangles that include a node as one vertex.
  19. Parameters
  20. ----------
  21. G : graph
  22. A networkx graph
  23. nodes : container of nodes, optional (default= all nodes in G)
  24. Compute triangles for nodes in this container.
  25. Returns
  26. -------
  27. out : dictionary
  28. Number of triangles keyed by node label.
  29. Examples
  30. --------
  31. >>> G = nx.complete_graph(5)
  32. >>> print(nx.triangles(G, 0))
  33. 6
  34. >>> print(nx.triangles(G))
  35. {0: 6, 1: 6, 2: 6, 3: 6, 4: 6}
  36. >>> print(list(nx.triangles(G, (0, 1)).values()))
  37. [6, 6]
  38. Notes
  39. -----
  40. When computing triangles for the entire graph each triangle is counted
  41. three times, once at each node. Self loops are ignored.
  42. """
  43. # If `nodes` represents a single node in the graph, return only its number
  44. # of triangles.
  45. if nodes in G:
  46. return next(_triangles_and_degree_iter(G, nodes))[2] // 2
  47. # Otherwise, `nodes` represents an iterable of nodes, so return a
  48. # dictionary mapping node to number of triangles.
  49. return {v: t // 2 for v, d, t, _ in _triangles_and_degree_iter(G, nodes)}
  50. @not_implemented_for("multigraph")
  51. def _triangles_and_degree_iter(G, nodes=None):
  52. """Return an iterator of (node, degree, triangles, generalized degree).
  53. This double counts triangles so you may want to divide by 2.
  54. See degree(), triangles() and generalized_degree() for definitions
  55. and details.
  56. """
  57. if nodes is None:
  58. nodes_nbrs = G.adj.items()
  59. else:
  60. nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
  61. for v, v_nbrs in nodes_nbrs:
  62. vs = set(v_nbrs) - {v}
  63. gen_degree = Counter(len(vs & (set(G[w]) - {w})) for w in vs)
  64. ntriangles = sum(k * val for k, val in gen_degree.items())
  65. yield (v, len(vs), ntriangles, gen_degree)
  66. @not_implemented_for("multigraph")
  67. def _weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"):
  68. """Return an iterator of (node, degree, weighted_triangles).
  69. Used for weighted clustering.
  70. Note: this returns the geometric average weight of edges in the triangle.
  71. Also, each triangle is counted twice (each direction).
  72. So you may want to divide by 2.
  73. """
  74. import numpy as np
  75. if weight is None or G.number_of_edges() == 0:
  76. max_weight = 1
  77. else:
  78. max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
  79. if nodes is None:
  80. nodes_nbrs = G.adj.items()
  81. else:
  82. nodes_nbrs = ((n, G[n]) for n in G.nbunch_iter(nodes))
  83. def wt(u, v):
  84. return G[u][v].get(weight, 1) / max_weight
  85. for i, nbrs in nodes_nbrs:
  86. inbrs = set(nbrs) - {i}
  87. weighted_triangles = 0
  88. seen = set()
  89. for j in inbrs:
  90. seen.add(j)
  91. # This avoids counting twice -- we double at the end.
  92. jnbrs = set(G[j]) - seen
  93. # Only compute the edge weight once, before the inner inner
  94. # loop.
  95. wij = wt(i, j)
  96. weighted_triangles += sum(
  97. np.cbrt([(wij * wt(j, k) * wt(k, i)) for k in inbrs & jnbrs])
  98. )
  99. yield (i, len(inbrs), 2 * weighted_triangles)
  100. @not_implemented_for("multigraph")
  101. def _directed_triangles_and_degree_iter(G, nodes=None):
  102. """Return an iterator of
  103. (node, total_degree, reciprocal_degree, directed_triangles).
  104. Used for directed clustering.
  105. Note that unlike `_triangles_and_degree_iter()`, this function counts
  106. directed triangles so does not count triangles twice.
  107. """
  108. nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
  109. for i, preds, succs in nodes_nbrs:
  110. ipreds = set(preds) - {i}
  111. isuccs = set(succs) - {i}
  112. directed_triangles = 0
  113. for j in chain(ipreds, isuccs):
  114. jpreds = set(G._pred[j]) - {j}
  115. jsuccs = set(G._succ[j]) - {j}
  116. directed_triangles += sum(
  117. 1
  118. for k in chain(
  119. (ipreds & jpreds),
  120. (ipreds & jsuccs),
  121. (isuccs & jpreds),
  122. (isuccs & jsuccs),
  123. )
  124. )
  125. dtotal = len(ipreds) + len(isuccs)
  126. dbidirectional = len(ipreds & isuccs)
  127. yield (i, dtotal, dbidirectional, directed_triangles)
  128. @not_implemented_for("multigraph")
  129. def _directed_weighted_triangles_and_degree_iter(G, nodes=None, weight="weight"):
  130. """Return an iterator of
  131. (node, total_degree, reciprocal_degree, directed_weighted_triangles).
  132. Used for directed weighted clustering.
  133. Note that unlike `_weighted_triangles_and_degree_iter()`, this function counts
  134. directed triangles so does not count triangles twice.
  135. """
  136. import numpy as np
  137. if weight is None or G.number_of_edges() == 0:
  138. max_weight = 1
  139. else:
  140. max_weight = max(d.get(weight, 1) for u, v, d in G.edges(data=True))
  141. nodes_nbrs = ((n, G._pred[n], G._succ[n]) for n in G.nbunch_iter(nodes))
  142. def wt(u, v):
  143. return G[u][v].get(weight, 1) / max_weight
  144. for i, preds, succs in nodes_nbrs:
  145. ipreds = set(preds) - {i}
  146. isuccs = set(succs) - {i}
  147. directed_triangles = 0
  148. for j in ipreds:
  149. jpreds = set(G._pred[j]) - {j}
  150. jsuccs = set(G._succ[j]) - {j}
  151. directed_triangles += sum(
  152. np.cbrt([(wt(j, i) * wt(k, i) * wt(k, j)) for k in ipreds & jpreds])
  153. )
  154. directed_triangles += sum(
  155. np.cbrt([(wt(j, i) * wt(k, i) * wt(j, k)) for k in ipreds & jsuccs])
  156. )
  157. directed_triangles += sum(
  158. np.cbrt([(wt(j, i) * wt(i, k) * wt(k, j)) for k in isuccs & jpreds])
  159. )
  160. directed_triangles += sum(
  161. np.cbrt([(wt(j, i) * wt(i, k) * wt(j, k)) for k in isuccs & jsuccs])
  162. )
  163. for j in isuccs:
  164. jpreds = set(G._pred[j]) - {j}
  165. jsuccs = set(G._succ[j]) - {j}
  166. directed_triangles += sum(
  167. np.cbrt([(wt(i, j) * wt(k, i) * wt(k, j)) for k in ipreds & jpreds])
  168. )
  169. directed_triangles += sum(
  170. np.cbrt([(wt(i, j) * wt(k, i) * wt(j, k)) for k in ipreds & jsuccs])
  171. )
  172. directed_triangles += sum(
  173. np.cbrt([(wt(i, j) * wt(i, k) * wt(k, j)) for k in isuccs & jpreds])
  174. )
  175. directed_triangles += sum(
  176. np.cbrt([(wt(i, j) * wt(i, k) * wt(j, k)) for k in isuccs & jsuccs])
  177. )
  178. dtotal = len(ipreds) + len(isuccs)
  179. dbidirectional = len(ipreds & isuccs)
  180. yield (i, dtotal, dbidirectional, directed_triangles)
  181. @nx._dispatch(name="average_clustering")
  182. def average_clustering(G, nodes=None, weight=None, count_zeros=True):
  183. r"""Compute the average clustering coefficient for the graph G.
  184. The clustering coefficient for the graph is the average,
  185. .. math::
  186. C = \frac{1}{n}\sum_{v \in G} c_v,
  187. where :math:`n` is the number of nodes in `G`.
  188. Parameters
  189. ----------
  190. G : graph
  191. nodes : container of nodes, optional (default=all nodes in G)
  192. Compute average clustering for nodes in this container.
  193. weight : string or None, optional (default=None)
  194. The edge attribute that holds the numerical value used as a weight.
  195. If None, then each edge has weight 1.
  196. count_zeros : bool
  197. If False include only the nodes with nonzero clustering in the average.
  198. Returns
  199. -------
  200. avg : float
  201. Average clustering
  202. Examples
  203. --------
  204. >>> G = nx.complete_graph(5)
  205. >>> print(nx.average_clustering(G))
  206. 1.0
  207. Notes
  208. -----
  209. This is a space saving routine; it might be faster
  210. to use the clustering function to get a list and then take the average.
  211. Self loops are ignored.
  212. References
  213. ----------
  214. .. [1] Generalizations of the clustering coefficient to weighted
  215. complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
  216. K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
  217. http://jponnela.com/web_documents/a9.pdf
  218. .. [2] Marcus Kaiser, Mean clustering coefficients: the role of isolated
  219. nodes and leafs on clustering measures for small-world networks.
  220. https://arxiv.org/abs/0802.2512
  221. """
  222. c = clustering(G, nodes, weight=weight).values()
  223. if not count_zeros:
  224. c = [v for v in c if abs(v) > 0]
  225. return sum(c) / len(c)
  226. @nx._dispatch(name="clustering")
  227. def clustering(G, nodes=None, weight=None):
  228. r"""Compute the clustering coefficient for nodes.
  229. For unweighted graphs, the clustering of a node :math:`u`
  230. is the fraction of possible triangles through that node that exist,
  231. .. math::
  232. c_u = \frac{2 T(u)}{deg(u)(deg(u)-1)},
  233. where :math:`T(u)` is the number of triangles through node :math:`u` and
  234. :math:`deg(u)` is the degree of :math:`u`.
  235. For weighted graphs, there are several ways to define clustering [1]_.
  236. the one used here is defined
  237. as the geometric average of the subgraph edge weights [2]_,
  238. .. math::
  239. c_u = \frac{1}{deg(u)(deg(u)-1))}
  240. \sum_{vw} (\hat{w}_{uv} \hat{w}_{uw} \hat{w}_{vw})^{1/3}.
  241. The edge weights :math:`\hat{w}_{uv}` are normalized by the maximum weight
  242. in the network :math:`\hat{w}_{uv} = w_{uv}/\max(w)`.
  243. The value of :math:`c_u` is assigned to 0 if :math:`deg(u) < 2`.
  244. Additionally, this weighted definition has been generalized to support negative edge weights [3]_.
  245. For directed graphs, the clustering is similarly defined as the fraction
  246. of all possible directed triangles or geometric average of the subgraph
  247. edge weights for unweighted and weighted directed graph respectively [4]_.
  248. .. math::
  249. c_u = \frac{T(u)}{2(deg^{tot}(u)(deg^{tot}(u)-1) - 2deg^{\leftrightarrow}(u))},
  250. where :math:`T(u)` is the number of directed triangles through node
  251. :math:`u`, :math:`deg^{tot}(u)` is the sum of in degree and out degree of
  252. :math:`u` and :math:`deg^{\leftrightarrow}(u)` is the reciprocal degree of
  253. :math:`u`.
  254. Parameters
  255. ----------
  256. G : graph
  257. nodes : node, iterable of nodes, or None (default=None)
  258. If a singleton node, return the number of triangles for that node.
  259. If an iterable, compute the number of triangles for each of those nodes.
  260. If `None` (the default) compute the number of triangles for all nodes in `G`.
  261. weight : string or None, optional (default=None)
  262. The edge attribute that holds the numerical value used as a weight.
  263. If None, then each edge has weight 1.
  264. Returns
  265. -------
  266. out : float, or dictionary
  267. Clustering coefficient at specified nodes
  268. Examples
  269. --------
  270. >>> G = nx.complete_graph(5)
  271. >>> print(nx.clustering(G, 0))
  272. 1.0
  273. >>> print(nx.clustering(G))
  274. {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
  275. Notes
  276. -----
  277. Self loops are ignored.
  278. References
  279. ----------
  280. .. [1] Generalizations of the clustering coefficient to weighted
  281. complex networks by J. Saramäki, M. Kivelä, J.-P. Onnela,
  282. K. Kaski, and J. Kertész, Physical Review E, 75 027105 (2007).
  283. http://jponnela.com/web_documents/a9.pdf
  284. .. [2] Intensity and coherence of motifs in weighted complex
  285. networks by J. P. Onnela, J. Saramäki, J. Kertész, and K. Kaski,
  286. Physical Review E, 71(6), 065103 (2005).
  287. .. [3] Generalization of Clustering Coefficients to Signed Correlation Networks
  288. by G. Costantini and M. Perugini, PloS one, 9(2), e88669 (2014).
  289. .. [4] Clustering in complex directed networks by G. Fagiolo,
  290. Physical Review E, 76(2), 026107 (2007).
  291. """
  292. if G.is_directed():
  293. if weight is not None:
  294. td_iter = _directed_weighted_triangles_and_degree_iter(G, nodes, weight)
  295. clusterc = {
  296. v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
  297. for v, dt, db, t in td_iter
  298. }
  299. else:
  300. td_iter = _directed_triangles_and_degree_iter(G, nodes)
  301. clusterc = {
  302. v: 0 if t == 0 else t / ((dt * (dt - 1) - 2 * db) * 2)
  303. for v, dt, db, t in td_iter
  304. }
  305. else:
  306. # The formula 2*T/(d*(d-1)) from docs is t/(d*(d-1)) here b/c t==2*T
  307. if weight is not None:
  308. td_iter = _weighted_triangles_and_degree_iter(G, nodes, weight)
  309. clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t in td_iter}
  310. else:
  311. td_iter = _triangles_and_degree_iter(G, nodes)
  312. clusterc = {v: 0 if t == 0 else t / (d * (d - 1)) for v, d, t, _ in td_iter}
  313. if nodes in G:
  314. # Return the value of the sole entry in the dictionary.
  315. return clusterc[nodes]
  316. return clusterc
  317. @nx._dispatch("transitivity")
  318. def transitivity(G):
  319. r"""Compute graph transitivity, the fraction of all possible triangles
  320. present in G.
  321. Possible triangles are identified by the number of "triads"
  322. (two edges with a shared vertex).
  323. The transitivity is
  324. .. math::
  325. T = 3\frac{\#triangles}{\#triads}.
  326. Parameters
  327. ----------
  328. G : graph
  329. Returns
  330. -------
  331. out : float
  332. Transitivity
  333. Examples
  334. --------
  335. >>> G = nx.complete_graph(5)
  336. >>> print(nx.transitivity(G))
  337. 1.0
  338. """
  339. triangles_contri = [
  340. (t, d * (d - 1)) for v, d, t, _ in _triangles_and_degree_iter(G)
  341. ]
  342. # If the graph is empty
  343. if len(triangles_contri) == 0:
  344. return 0
  345. triangles, contri = map(sum, zip(*triangles_contri))
  346. return 0 if triangles == 0 else triangles / contri
  347. @nx._dispatch(name="square_clustering")
  348. def square_clustering(G, nodes=None):
  349. r"""Compute the squares clustering coefficient for nodes.
  350. For each node return the fraction of possible squares that exist at
  351. the node [1]_
  352. .. math::
  353. C_4(v) = \frac{ \sum_{u=1}^{k_v}
  354. \sum_{w=u+1}^{k_v} q_v(u,w) }{ \sum_{u=1}^{k_v}
  355. \sum_{w=u+1}^{k_v} [a_v(u,w) + q_v(u,w)]},
  356. where :math:`q_v(u,w)` are the number of common neighbors of :math:`u` and
  357. :math:`w` other than :math:`v` (ie squares), and :math:`a_v(u,w) = (k_u -
  358. (1+q_v(u,w)+\theta_{uv})) + (k_w - (1+q_v(u,w)+\theta_{uw}))`, where
  359. :math:`\theta_{uw} = 1` if :math:`u` and :math:`w` are connected and 0
  360. otherwise. [2]_
  361. Parameters
  362. ----------
  363. G : graph
  364. nodes : container of nodes, optional (default=all nodes in G)
  365. Compute clustering for nodes in this container.
  366. Returns
  367. -------
  368. c4 : dictionary
  369. A dictionary keyed by node with the square clustering coefficient value.
  370. Examples
  371. --------
  372. >>> G = nx.complete_graph(5)
  373. >>> print(nx.square_clustering(G, 0))
  374. 1.0
  375. >>> print(nx.square_clustering(G))
  376. {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0}
  377. Notes
  378. -----
  379. While :math:`C_3(v)` (triangle clustering) gives the probability that
  380. two neighbors of node v are connected with each other, :math:`C_4(v)` is
  381. the probability that two neighbors of node v share a common
  382. neighbor different from v. This algorithm can be applied to both
  383. bipartite and unipartite networks.
  384. References
  385. ----------
  386. .. [1] Pedro G. Lind, Marta C. González, and Hans J. Herrmann. 2005
  387. Cycles and clustering in bipartite networks.
  388. Physical Review E (72) 056127.
  389. .. [2] Zhang, Peng et al. Clustering Coefficient and Community Structure of
  390. Bipartite Networks. Physica A: Statistical Mechanics and its Applications 387.27 (2008): 6869–6875.
  391. https://arxiv.org/abs/0710.0117v1
  392. """
  393. if nodes is None:
  394. node_iter = G
  395. else:
  396. node_iter = G.nbunch_iter(nodes)
  397. clustering = {}
  398. for v in node_iter:
  399. clustering[v] = 0
  400. potential = 0
  401. for u, w in combinations(G[v], 2):
  402. squares = len((set(G[u]) & set(G[w])) - {v})
  403. clustering[v] += squares
  404. degm = squares + 1
  405. if w in G[u]:
  406. degm += 1
  407. potential += (len(G[u]) - degm) + (len(G[w]) - degm) + squares
  408. if potential > 0:
  409. clustering[v] /= potential
  410. if nodes in G:
  411. # Return the value of the sole entry in the dictionary.
  412. return clustering[nodes]
  413. return clustering
  414. @nx._dispatch("generalized_degree")
  415. @not_implemented_for("directed")
  416. def generalized_degree(G, nodes=None):
  417. r"""Compute the generalized degree for nodes.
  418. For each node, the generalized degree shows how many edges of given
  419. triangle multiplicity the node is connected to. The triangle multiplicity
  420. of an edge is the number of triangles an edge participates in. The
  421. generalized degree of node :math:`i` can be written as a vector
  422. :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc, k_i^{(N-2)})` where
  423. :math:`k_i^{(j)}` is the number of edges attached to node :math:`i` that
  424. participate in :math:`j` triangles.
  425. Parameters
  426. ----------
  427. G : graph
  428. nodes : container of nodes, optional (default=all nodes in G)
  429. Compute the generalized degree for nodes in this container.
  430. Returns
  431. -------
  432. out : Counter, or dictionary of Counters
  433. Generalized degree of specified nodes. The Counter is keyed by edge
  434. triangle multiplicity.
  435. Examples
  436. --------
  437. >>> G = nx.complete_graph(5)
  438. >>> print(nx.generalized_degree(G, 0))
  439. Counter({3: 4})
  440. >>> print(nx.generalized_degree(G))
  441. {0: Counter({3: 4}), 1: Counter({3: 4}), 2: Counter({3: 4}), 3: Counter({3: 4}), 4: Counter({3: 4})}
  442. To recover the number of triangles attached to a node:
  443. >>> k1 = nx.generalized_degree(G, 0)
  444. >>> sum([k * v for k, v in k1.items()]) / 2 == nx.triangles(G, 0)
  445. True
  446. Notes
  447. -----
  448. In a network of N nodes, the highest triangle multiplicity an edge can have
  449. is N-2.
  450. The return value does not include a `zero` entry if no edges of a
  451. particular triangle multiplicity are present.
  452. The number of triangles node :math:`i` is attached to can be recovered from
  453. the generalized degree :math:`\mathbf{k}_i=(k_i^{(0)}, \dotsc,
  454. k_i^{(N-2)})` by :math:`(k_i^{(1)}+2k_i^{(2)}+\dotsc +(N-2)k_i^{(N-2)})/2`.
  455. References
  456. ----------
  457. .. [1] Networks with arbitrary edge multiplicities by V. Zlatić,
  458. D. Garlaschelli and G. Caldarelli, EPL (Europhysics Letters),
  459. Volume 97, Number 2 (2012).
  460. https://iopscience.iop.org/article/10.1209/0295-5075/97/28005
  461. """
  462. if nodes in G:
  463. return next(_triangles_and_degree_iter(G, nodes))[3]
  464. return {v: gd for v, d, t, gd in _triangles_and_degree_iter(G, nodes)}