core.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. """
  2. Find the k-cores of a graph.
  3. The k-core is found by recursively pruning nodes with degrees less than k.
  4. See the following references for details:
  5. An O(m) Algorithm for Cores Decomposition of Networks
  6. Vladimir Batagelj and Matjaz Zaversnik, 2003.
  7. https://arxiv.org/abs/cs.DS/0310049
  8. Generalized Cores
  9. Vladimir Batagelj and Matjaz Zaversnik, 2002.
  10. https://arxiv.org/pdf/cs/0202039
  11. For directed graphs a more general notion is that of D-cores which
  12. looks at (k, l) restrictions on (in, out) degree. The (k, k) D-core
  13. is the k-core.
  14. D-cores: Measuring Collaboration of Directed Graphs Based on Degeneracy
  15. Christos Giatsidis, Dimitrios M. Thilikos, Michalis Vazirgiannis, ICDM 2011.
  16. http://www.graphdegeneracy.org/dcores_ICDM_2011.pdf
  17. Multi-scale structure and topological anomaly detection via a new network \
  18. statistic: The onion decomposition
  19. L. Hébert-Dufresne, J. A. Grochow, and A. Allard
  20. Scientific Reports 6, 31708 (2016)
  21. http://doi.org/10.1038/srep31708
  22. """
  23. import networkx as nx
  24. from networkx.exception import NetworkXError
  25. from networkx.utils import not_implemented_for
  26. __all__ = [
  27. "core_number",
  28. "k_core",
  29. "k_shell",
  30. "k_crust",
  31. "k_corona",
  32. "k_truss",
  33. "onion_layers",
  34. ]
  35. @nx._dispatch
  36. @not_implemented_for("multigraph")
  37. def core_number(G):
  38. """Returns the core number for each vertex.
  39. A k-core is a maximal subgraph that contains nodes of degree k or more.
  40. The core number of a node is the largest value k of a k-core containing
  41. that node.
  42. Parameters
  43. ----------
  44. G : NetworkX graph
  45. A graph or directed graph
  46. Returns
  47. -------
  48. core_number : dictionary
  49. A dictionary keyed by node to the core number.
  50. Raises
  51. ------
  52. NetworkXError
  53. The k-core is not implemented for graphs with self loops
  54. or parallel edges.
  55. Notes
  56. -----
  57. Not implemented for graphs with parallel edges or self loops.
  58. For directed graphs the node degree is defined to be the
  59. in-degree + out-degree.
  60. References
  61. ----------
  62. .. [1] An O(m) Algorithm for Cores Decomposition of Networks
  63. Vladimir Batagelj and Matjaz Zaversnik, 2003.
  64. https://arxiv.org/abs/cs.DS/0310049
  65. """
  66. if nx.number_of_selfloops(G) > 0:
  67. msg = (
  68. "Input graph has self loops which is not permitted; "
  69. "Consider using G.remove_edges_from(nx.selfloop_edges(G))."
  70. )
  71. raise NetworkXError(msg)
  72. degrees = dict(G.degree())
  73. # Sort nodes by degree.
  74. nodes = sorted(degrees, key=degrees.get)
  75. bin_boundaries = [0]
  76. curr_degree = 0
  77. for i, v in enumerate(nodes):
  78. if degrees[v] > curr_degree:
  79. bin_boundaries.extend([i] * (degrees[v] - curr_degree))
  80. curr_degree = degrees[v]
  81. node_pos = {v: pos for pos, v in enumerate(nodes)}
  82. # The initial guess for the core number of a node is its degree.
  83. core = degrees
  84. nbrs = {v: list(nx.all_neighbors(G, v)) for v in G}
  85. for v in nodes:
  86. for u in nbrs[v]:
  87. if core[u] > core[v]:
  88. nbrs[u].remove(v)
  89. pos = node_pos[u]
  90. bin_start = bin_boundaries[core[u]]
  91. node_pos[u] = bin_start
  92. node_pos[nodes[bin_start]] = pos
  93. nodes[bin_start], nodes[pos] = nodes[pos], nodes[bin_start]
  94. bin_boundaries[core[u]] += 1
  95. core[u] -= 1
  96. return core
  97. def _core_subgraph(G, k_filter, k=None, core=None):
  98. """Returns the subgraph induced by nodes passing filter `k_filter`.
  99. Parameters
  100. ----------
  101. G : NetworkX graph
  102. The graph or directed graph to process
  103. k_filter : filter function
  104. This function filters the nodes chosen. It takes three inputs:
  105. A node of G, the filter's cutoff, and the core dict of the graph.
  106. The function should return a Boolean value.
  107. k : int, optional
  108. The order of the core. If not specified use the max core number.
  109. This value is used as the cutoff for the filter.
  110. core : dict, optional
  111. Precomputed core numbers keyed by node for the graph `G`.
  112. If not specified, the core numbers will be computed from `G`.
  113. """
  114. if core is None:
  115. core = core_number(G)
  116. if k is None:
  117. k = max(core.values())
  118. nodes = (v for v in core if k_filter(v, k, core))
  119. return G.subgraph(nodes).copy()
  120. @nx._dispatch
  121. def k_core(G, k=None, core_number=None):
  122. """Returns the k-core of G.
  123. A k-core is a maximal subgraph that contains nodes of degree k or more.
  124. Parameters
  125. ----------
  126. G : NetworkX graph
  127. A graph or directed graph
  128. k : int, optional
  129. The order of the core. If not specified return the main core.
  130. core_number : dictionary, optional
  131. Precomputed core numbers for the graph G.
  132. Returns
  133. -------
  134. G : NetworkX graph
  135. The k-core subgraph
  136. Raises
  137. ------
  138. NetworkXError
  139. The k-core is not defined for graphs with self loops or parallel edges.
  140. Notes
  141. -----
  142. The main core is the core with the largest degree.
  143. Not implemented for graphs with parallel edges or self loops.
  144. For directed graphs the node degree is defined to be the
  145. in-degree + out-degree.
  146. Graph, node, and edge attributes are copied to the subgraph.
  147. See Also
  148. --------
  149. core_number
  150. References
  151. ----------
  152. .. [1] An O(m) Algorithm for Cores Decomposition of Networks
  153. Vladimir Batagelj and Matjaz Zaversnik, 2003.
  154. https://arxiv.org/abs/cs.DS/0310049
  155. """
  156. def k_filter(v, k, c):
  157. return c[v] >= k
  158. return _core_subgraph(G, k_filter, k, core_number)
  159. def k_shell(G, k=None, core_number=None):
  160. """Returns the k-shell of G.
  161. The k-shell is the subgraph induced by nodes with core number k.
  162. That is, nodes in the k-core that are not in the (k+1)-core.
  163. Parameters
  164. ----------
  165. G : NetworkX graph
  166. A graph or directed graph.
  167. k : int, optional
  168. The order of the shell. If not specified return the outer shell.
  169. core_number : dictionary, optional
  170. Precomputed core numbers for the graph G.
  171. Returns
  172. -------
  173. G : NetworkX graph
  174. The k-shell subgraph
  175. Raises
  176. ------
  177. NetworkXError
  178. The k-shell is not implemented for graphs with self loops
  179. or parallel edges.
  180. Notes
  181. -----
  182. This is similar to k_corona but in that case only neighbors in the
  183. k-core are considered.
  184. Not implemented for graphs with parallel edges or self loops.
  185. For directed graphs the node degree is defined to be the
  186. in-degree + out-degree.
  187. Graph, node, and edge attributes are copied to the subgraph.
  188. See Also
  189. --------
  190. core_number
  191. k_corona
  192. References
  193. ----------
  194. .. [1] A model of Internet topology using k-shell decomposition
  195. Shai Carmi, Shlomo Havlin, Scott Kirkpatrick, Yuval Shavitt,
  196. and Eran Shir, PNAS July 3, 2007 vol. 104 no. 27 11150-11154
  197. http://www.pnas.org/content/104/27/11150.full
  198. """
  199. def k_filter(v, k, c):
  200. return c[v] == k
  201. return _core_subgraph(G, k_filter, k, core_number)
  202. def k_crust(G, k=None, core_number=None):
  203. """Returns the k-crust of G.
  204. The k-crust is the graph G with the edges of the k-core removed
  205. and isolated nodes found after the removal of edges are also removed.
  206. Parameters
  207. ----------
  208. G : NetworkX graph
  209. A graph or directed graph.
  210. k : int, optional
  211. The order of the shell. If not specified return the main crust.
  212. core_number : dictionary, optional
  213. Precomputed core numbers for the graph G.
  214. Returns
  215. -------
  216. G : NetworkX graph
  217. The k-crust subgraph
  218. Raises
  219. ------
  220. NetworkXError
  221. The k-crust is not implemented for graphs with self loops
  222. or parallel edges.
  223. Notes
  224. -----
  225. This definition of k-crust is different than the definition in [1]_.
  226. The k-crust in [1]_ is equivalent to the k+1 crust of this algorithm.
  227. Not implemented for graphs with parallel edges or self loops.
  228. For directed graphs the node degree is defined to be the
  229. in-degree + out-degree.
  230. Graph, node, and edge attributes are copied to the subgraph.
  231. See Also
  232. --------
  233. core_number
  234. References
  235. ----------
  236. .. [1] A model of Internet topology using k-shell decomposition
  237. Shai Carmi, Shlomo Havlin, Scott Kirkpatrick, Yuval Shavitt,
  238. and Eran Shir, PNAS July 3, 2007 vol. 104 no. 27 11150-11154
  239. http://www.pnas.org/content/104/27/11150.full
  240. """
  241. # Default for k is one less than in _core_subgraph, so just inline.
  242. # Filter is c[v] <= k
  243. if core_number is None:
  244. core_number = nx.core_number(G)
  245. if k is None:
  246. k = max(core_number.values()) - 1
  247. nodes = (v for v in core_number if core_number[v] <= k)
  248. return G.subgraph(nodes).copy()
  249. def k_corona(G, k, core_number=None):
  250. """Returns the k-corona of G.
  251. The k-corona is the subgraph of nodes in the k-core which have
  252. exactly k neighbours in the k-core.
  253. Parameters
  254. ----------
  255. G : NetworkX graph
  256. A graph or directed graph
  257. k : int
  258. The order of the corona.
  259. core_number : dictionary, optional
  260. Precomputed core numbers for the graph G.
  261. Returns
  262. -------
  263. G : NetworkX graph
  264. The k-corona subgraph
  265. Raises
  266. ------
  267. NetworkXError
  268. The k-cornoa is not defined for graphs with self loops or
  269. parallel edges.
  270. Notes
  271. -----
  272. Not implemented for graphs with parallel edges or self loops.
  273. For directed graphs the node degree is defined to be the
  274. in-degree + out-degree.
  275. Graph, node, and edge attributes are copied to the subgraph.
  276. See Also
  277. --------
  278. core_number
  279. References
  280. ----------
  281. .. [1] k -core (bootstrap) percolation on complex networks:
  282. Critical phenomena and nonlocal effects,
  283. A. V. Goltsev, S. N. Dorogovtsev, and J. F. F. Mendes,
  284. Phys. Rev. E 73, 056101 (2006)
  285. http://link.aps.org/doi/10.1103/PhysRevE.73.056101
  286. """
  287. def func(v, k, c):
  288. return c[v] == k and k == sum(1 for w in G[v] if c[w] >= k)
  289. return _core_subgraph(G, func, k, core_number)
  290. @nx._dispatch
  291. @not_implemented_for("directed")
  292. @not_implemented_for("multigraph")
  293. def k_truss(G, k):
  294. """Returns the k-truss of `G`.
  295. The k-truss is the maximal induced subgraph of `G` which contains at least
  296. three vertices where every edge is incident to at least `k-2` triangles.
  297. Parameters
  298. ----------
  299. G : NetworkX graph
  300. An undirected graph
  301. k : int
  302. The order of the truss
  303. Returns
  304. -------
  305. H : NetworkX graph
  306. The k-truss subgraph
  307. Raises
  308. ------
  309. NetworkXError
  310. The k-truss is not defined for graphs with self loops, directed graphs
  311. and multigraphs.
  312. Notes
  313. -----
  314. A k-clique is a (k-2)-truss and a k-truss is a (k+1)-core.
  315. Not implemented for digraphs or graphs with parallel edges or self loops.
  316. Graph, node, and edge attributes are copied to the subgraph.
  317. K-trusses were originally defined in [2] which states that the k-truss
  318. is the maximal induced subgraph where each edge belongs to at least
  319. `k-2` triangles. A more recent paper, [1], uses a slightly different
  320. definition requiring that each edge belong to at least `k` triangles.
  321. This implementation uses the original definition of `k-2` triangles.
  322. References
  323. ----------
  324. .. [1] Bounds and Algorithms for k-truss. Paul Burkhardt, Vance Faber,
  325. David G. Harris, 2018. https://arxiv.org/abs/1806.05523v2
  326. .. [2] Trusses: Cohesive Subgraphs for Social Network Analysis. Jonathan
  327. Cohen, 2005.
  328. """
  329. if nx.number_of_selfloops(G) > 0:
  330. msg = (
  331. "Input graph has self loops which is not permitted; "
  332. "Consider using G.remove_edges_from(nx.selfloop_edges(G))."
  333. )
  334. raise NetworkXError(msg)
  335. H = G.copy()
  336. n_dropped = 1
  337. while n_dropped > 0:
  338. n_dropped = 0
  339. to_drop = []
  340. seen = set()
  341. for u in H:
  342. nbrs_u = set(H[u])
  343. seen.add(u)
  344. new_nbrs = [v for v in nbrs_u if v not in seen]
  345. for v in new_nbrs:
  346. if len(nbrs_u & set(H[v])) < (k - 2):
  347. to_drop.append((u, v))
  348. H.remove_edges_from(to_drop)
  349. n_dropped = len(to_drop)
  350. H.remove_nodes_from(list(nx.isolates(H)))
  351. return H
  352. @not_implemented_for("multigraph")
  353. @not_implemented_for("directed")
  354. def onion_layers(G):
  355. """Returns the layer of each vertex in an onion decomposition of the graph.
  356. The onion decomposition refines the k-core decomposition by providing
  357. information on the internal organization of each k-shell. It is usually
  358. used alongside the `core numbers`.
  359. Parameters
  360. ----------
  361. G : NetworkX graph
  362. A simple graph without self loops or parallel edges
  363. Returns
  364. -------
  365. od_layers : dictionary
  366. A dictionary keyed by vertex to the onion layer. The layers are
  367. contiguous integers starting at 1.
  368. Raises
  369. ------
  370. NetworkXError
  371. The onion decomposition is not implemented for graphs with self loops
  372. or parallel edges or for directed graphs.
  373. Notes
  374. -----
  375. Not implemented for graphs with parallel edges or self loops.
  376. Not implemented for directed graphs.
  377. See Also
  378. --------
  379. core_number
  380. References
  381. ----------
  382. .. [1] Multi-scale structure and topological anomaly detection via a new
  383. network statistic: The onion decomposition
  384. L. Hébert-Dufresne, J. A. Grochow, and A. Allard
  385. Scientific Reports 6, 31708 (2016)
  386. http://doi.org/10.1038/srep31708
  387. .. [2] Percolation and the effective structure of complex networks
  388. A. Allard and L. Hébert-Dufresne
  389. Physical Review X 9, 011023 (2019)
  390. http://doi.org/10.1103/PhysRevX.9.011023
  391. """
  392. if nx.number_of_selfloops(G) > 0:
  393. msg = (
  394. "Input graph contains self loops which is not permitted; "
  395. "Consider using G.remove_edges_from(nx.selfloop_edges(G))."
  396. )
  397. raise NetworkXError(msg)
  398. # Dictionaries to register the k-core/onion decompositions.
  399. od_layers = {}
  400. # Adjacency list
  401. neighbors = {v: list(nx.all_neighbors(G, v)) for v in G}
  402. # Effective degree of nodes.
  403. degrees = dict(G.degree())
  404. # Performs the onion decomposition.
  405. current_core = 1
  406. current_layer = 1
  407. # Sets vertices of degree 0 to layer 1, if any.
  408. isolated_nodes = list(nx.isolates(G))
  409. if len(isolated_nodes) > 0:
  410. for v in isolated_nodes:
  411. od_layers[v] = current_layer
  412. degrees.pop(v)
  413. current_layer = 2
  414. # Finds the layer for the remaining nodes.
  415. while len(degrees) > 0:
  416. # Sets the order for looking at nodes.
  417. nodes = sorted(degrees, key=degrees.get)
  418. # Sets properly the current core.
  419. min_degree = degrees[nodes[0]]
  420. if min_degree > current_core:
  421. current_core = min_degree
  422. # Identifies vertices in the current layer.
  423. this_layer = []
  424. for n in nodes:
  425. if degrees[n] > current_core:
  426. break
  427. this_layer.append(n)
  428. # Identifies the core/layer of the vertices in the current layer.
  429. for v in this_layer:
  430. od_layers[v] = current_layer
  431. for n in neighbors[v]:
  432. neighbors[n].remove(v)
  433. degrees[n] = degrees[n] - 1
  434. degrees.pop(v)
  435. # Updates the layer count.
  436. current_layer = current_layer + 1
  437. # Returns the dictionaries containing the onion layer of each vertices.
  438. return od_layers