group.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779
  1. """Group centrality measures."""
  2. from copy import deepcopy
  3. import networkx as nx
  4. from networkx.algorithms.centrality.betweenness import (
  5. _accumulate_endpoints,
  6. _single_source_dijkstra_path_basic,
  7. _single_source_shortest_path_basic,
  8. )
  9. from networkx.utils.decorators import not_implemented_for
  10. __all__ = [
  11. "group_betweenness_centrality",
  12. "group_closeness_centrality",
  13. "group_degree_centrality",
  14. "group_in_degree_centrality",
  15. "group_out_degree_centrality",
  16. "prominent_group",
  17. ]
  18. def group_betweenness_centrality(G, C, normalized=True, weight=None, endpoints=False):
  19. r"""Compute the group betweenness centrality for a group of nodes.
  20. Group betweenness centrality of a group of nodes $C$ is the sum of the
  21. fraction of all-pairs shortest paths that pass through any vertex in $C$
  22. .. math::
  23. c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
  24. where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
  25. shortest $(s, t)$-paths, and $\sigma(s, t|C)$ is the number of
  26. those paths passing through some node in group $C$. Note that
  27. $(s, t)$ are not members of the group ($V-C$ is the set of nodes
  28. in $V$ that are not in $C$).
  29. Parameters
  30. ----------
  31. G : graph
  32. A NetworkX graph.
  33. C : list or set or list of lists or list of sets
  34. A group or a list of groups containing nodes which belong to G, for which group betweenness
  35. centrality is to be calculated.
  36. normalized : bool, optional (default=True)
  37. If True, group betweenness is normalized by `1/((|V|-|C|)(|V|-|C|-1))`
  38. where `|V|` is the number of nodes in G and `|C|` is the number of nodes in C.
  39. weight : None or string, optional (default=None)
  40. If None, all edge weights are considered equal.
  41. Otherwise holds the name of the edge attribute used as weight.
  42. The weight of an edge is treated as the length or distance between the two sides.
  43. endpoints : bool, optional (default=False)
  44. If True include the endpoints in the shortest path counts.
  45. Raises
  46. ------
  47. NodeNotFound
  48. If node(s) in C are not present in G.
  49. Returns
  50. -------
  51. betweenness : list of floats or float
  52. If C is a single group then return a float. If C is a list with
  53. several groups then return a list of group betweenness centralities.
  54. See Also
  55. --------
  56. betweenness_centrality
  57. Notes
  58. -----
  59. Group betweenness centrality is described in [1]_ and its importance discussed in [3]_.
  60. The initial implementation of the algorithm is mentioned in [2]_. This function uses
  61. an improved algorithm presented in [4]_.
  62. The number of nodes in the group must be a maximum of n - 2 where `n`
  63. is the total number of nodes in the graph.
  64. For weighted graphs the edge weights must be greater than zero.
  65. Zero edge weights can produce an infinite number of equal length
  66. paths between pairs of nodes.
  67. The total number of paths between source and target is counted
  68. differently for directed and undirected graphs. Directed paths
  69. between "u" and "v" are counted as two possible paths (one each
  70. direction) while undirected paths between "u" and "v" are counted
  71. as one path. Said another way, the sum in the expression above is
  72. over all ``s != t`` for directed graphs and for ``s < t`` for undirected graphs.
  73. References
  74. ----------
  75. .. [1] M G Everett and S P Borgatti:
  76. The Centrality of Groups and Classes.
  77. Journal of Mathematical Sociology. 23(3): 181-201. 1999.
  78. http://www.analytictech.com/borgatti/group_centrality.htm
  79. .. [2] Ulrik Brandes:
  80. On Variants of Shortest-Path Betweenness
  81. Centrality and their Generic Computation.
  82. Social Networks 30(2):136-145, 2008.
  83. http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.9610&rep=rep1&type=pdf
  84. .. [3] Sourav Medya et. al.:
  85. Group Centrality Maximization via Network Design.
  86. SIAM International Conference on Data Mining, SDM 2018, 126–134.
  87. https://sites.cs.ucsb.edu/~arlei/pubs/sdm18.pdf
  88. .. [4] Rami Puzis, Yuval Elovici, and Shlomi Dolev.
  89. "Fast algorithm for successive computation of group betweenness centrality."
  90. https://journals.aps.org/pre/pdf/10.1103/PhysRevE.76.056709
  91. """
  92. GBC = [] # initialize betweenness
  93. list_of_groups = True
  94. # check weather C contains one or many groups
  95. if any(el in G for el in C):
  96. C = [C]
  97. list_of_groups = False
  98. set_v = {node for group in C for node in group}
  99. if set_v - G.nodes: # element(s) of C not in G
  100. raise nx.NodeNotFound(f"The node(s) {set_v - G.nodes} are in C but not in G.")
  101. # pre-processing
  102. PB, sigma, D = _group_preprocessing(G, set_v, weight)
  103. # the algorithm for each group
  104. for group in C:
  105. group = set(group) # set of nodes in group
  106. # initialize the matrices of the sigma and the PB
  107. GBC_group = 0
  108. sigma_m = deepcopy(sigma)
  109. PB_m = deepcopy(PB)
  110. sigma_m_v = deepcopy(sigma_m)
  111. PB_m_v = deepcopy(PB_m)
  112. for v in group:
  113. GBC_group += PB_m[v][v]
  114. for x in group:
  115. for y in group:
  116. dxvy = 0
  117. dxyv = 0
  118. dvxy = 0
  119. if not (
  120. sigma_m[x][y] == 0 or sigma_m[x][v] == 0 or sigma_m[v][y] == 0
  121. ):
  122. if D[x][v] == D[x][y] + D[y][v]:
  123. dxyv = sigma_m[x][y] * sigma_m[y][v] / sigma_m[x][v]
  124. if D[x][y] == D[x][v] + D[v][y]:
  125. dxvy = sigma_m[x][v] * sigma_m[v][y] / sigma_m[x][y]
  126. if D[v][y] == D[v][x] + D[x][y]:
  127. dvxy = sigma_m[v][x] * sigma[x][y] / sigma[v][y]
  128. sigma_m_v[x][y] = sigma_m[x][y] * (1 - dxvy)
  129. PB_m_v[x][y] = PB_m[x][y] - PB_m[x][y] * dxvy
  130. if y != v:
  131. PB_m_v[x][y] -= PB_m[x][v] * dxyv
  132. if x != v:
  133. PB_m_v[x][y] -= PB_m[v][y] * dvxy
  134. sigma_m, sigma_m_v = sigma_m_v, sigma_m
  135. PB_m, PB_m_v = PB_m_v, PB_m
  136. # endpoints
  137. v, c = len(G), len(group)
  138. if not endpoints:
  139. scale = 0
  140. # if the graph is connected then subtract the endpoints from
  141. # the count for all the nodes in the graph. else count how many
  142. # nodes are connected to the group's nodes and subtract that.
  143. if nx.is_directed(G):
  144. if nx.is_strongly_connected(G):
  145. scale = c * (2 * v - c - 1)
  146. elif nx.is_connected(G):
  147. scale = c * (2 * v - c - 1)
  148. if scale == 0:
  149. for group_node1 in group:
  150. for node in D[group_node1]:
  151. if node != group_node1:
  152. if node in group:
  153. scale += 1
  154. else:
  155. scale += 2
  156. GBC_group -= scale
  157. # normalized
  158. if normalized:
  159. scale = 1 / ((v - c) * (v - c - 1))
  160. GBC_group *= scale
  161. # If undirected than count only the undirected edges
  162. elif not G.is_directed():
  163. GBC_group /= 2
  164. GBC.append(GBC_group)
  165. if list_of_groups:
  166. return GBC
  167. return GBC[0]
  168. def _group_preprocessing(G, set_v, weight):
  169. sigma = {}
  170. delta = {}
  171. D = {}
  172. betweenness = dict.fromkeys(G, 0)
  173. for s in G:
  174. if weight is None: # use BFS
  175. S, P, sigma[s], D[s] = _single_source_shortest_path_basic(G, s)
  176. else: # use Dijkstra's algorithm
  177. S, P, sigma[s], D[s] = _single_source_dijkstra_path_basic(G, s, weight)
  178. betweenness, delta[s] = _accumulate_endpoints(betweenness, S, P, sigma[s], s)
  179. for i in delta[s]: # add the paths from s to i and rescale sigma
  180. if s != i:
  181. delta[s][i] += 1
  182. if weight is not None:
  183. sigma[s][i] = sigma[s][i] / 2
  184. # building the path betweenness matrix only for nodes that appear in the group
  185. PB = dict.fromkeys(G)
  186. for group_node1 in set_v:
  187. PB[group_node1] = dict.fromkeys(G, 0.0)
  188. for group_node2 in set_v:
  189. if group_node2 not in D[group_node1]:
  190. continue
  191. for node in G:
  192. # if node is connected to the two group nodes than continue
  193. if group_node2 in D[node] and group_node1 in D[node]:
  194. if (
  195. D[node][group_node2]
  196. == D[node][group_node1] + D[group_node1][group_node2]
  197. ):
  198. PB[group_node1][group_node2] += (
  199. delta[node][group_node2]
  200. * sigma[node][group_node1]
  201. * sigma[group_node1][group_node2]
  202. / sigma[node][group_node2]
  203. )
  204. return PB, sigma, D
  205. def prominent_group(
  206. G, k, weight=None, C=None, endpoints=False, normalized=True, greedy=False
  207. ):
  208. r"""Find the prominent group of size $k$ in graph $G$. The prominence of the
  209. group is evaluated by the group betweenness centrality.
  210. Group betweenness centrality of a group of nodes $C$ is the sum of the
  211. fraction of all-pairs shortest paths that pass through any vertex in $C$
  212. .. math::
  213. c_B(v) =\sum_{s,t \in V} \frac{\sigma(s, t|v)}{\sigma(s, t)}
  214. where $V$ is the set of nodes, $\sigma(s, t)$ is the number of
  215. shortest $(s, t)$-paths, and $\sigma(s, t|C)$ is the number of
  216. those paths passing through some node in group $C$. Note that
  217. $(s, t)$ are not members of the group ($V-C$ is the set of nodes
  218. in $V$ that are not in $C$).
  219. Parameters
  220. ----------
  221. G : graph
  222. A NetworkX graph.
  223. k : int
  224. The number of nodes in the group.
  225. normalized : bool, optional (default=True)
  226. If True, group betweenness is normalized by ``1/((|V|-|C|)(|V|-|C|-1))``
  227. where ``|V|`` is the number of nodes in G and ``|C|`` is the number of
  228. nodes in C.
  229. weight : None or string, optional (default=None)
  230. If None, all edge weights are considered equal.
  231. Otherwise holds the name of the edge attribute used as weight.
  232. The weight of an edge is treated as the length or distance between the two sides.
  233. endpoints : bool, optional (default=False)
  234. If True include the endpoints in the shortest path counts.
  235. C : list or set, optional (default=None)
  236. list of nodes which won't be candidates of the prominent group.
  237. greedy : bool, optional (default=False)
  238. Using a naive greedy algorithm in order to find non-optimal prominent
  239. group. For scale free networks the results are negligibly below the optimal
  240. results.
  241. Raises
  242. ------
  243. NodeNotFound
  244. If node(s) in C are not present in G.
  245. Returns
  246. -------
  247. max_GBC : float
  248. The group betweenness centrality of the prominent group.
  249. max_group : list
  250. The list of nodes in the prominent group.
  251. See Also
  252. --------
  253. betweenness_centrality, group_betweenness_centrality
  254. Notes
  255. -----
  256. Group betweenness centrality is described in [1]_ and its importance discussed in [3]_.
  257. The algorithm is described in [2]_ and is based on techniques mentioned in [4]_.
  258. The number of nodes in the group must be a maximum of ``n - 2`` where ``n``
  259. is the total number of nodes in the graph.
  260. For weighted graphs the edge weights must be greater than zero.
  261. Zero edge weights can produce an infinite number of equal length
  262. paths between pairs of nodes.
  263. The total number of paths between source and target is counted
  264. differently for directed and undirected graphs. Directed paths
  265. between "u" and "v" are counted as two possible paths (one each
  266. direction) while undirected paths between "u" and "v" are counted
  267. as one path. Said another way, the sum in the expression above is
  268. over all ``s != t`` for directed graphs and for ``s < t`` for undirected graphs.
  269. References
  270. ----------
  271. .. [1] M G Everett and S P Borgatti:
  272. The Centrality of Groups and Classes.
  273. Journal of Mathematical Sociology. 23(3): 181-201. 1999.
  274. http://www.analytictech.com/borgatti/group_centrality.htm
  275. .. [2] Rami Puzis, Yuval Elovici, and Shlomi Dolev:
  276. "Finding the Most Prominent Group in Complex Networks"
  277. AI communications 20(4): 287-296, 2007.
  278. https://www.researchgate.net/profile/Rami_Puzis2/publication/220308855
  279. .. [3] Sourav Medya et. al.:
  280. Group Centrality Maximization via Network Design.
  281. SIAM International Conference on Data Mining, SDM 2018, 126–134.
  282. https://sites.cs.ucsb.edu/~arlei/pubs/sdm18.pdf
  283. .. [4] Rami Puzis, Yuval Elovici, and Shlomi Dolev.
  284. "Fast algorithm for successive computation of group betweenness centrality."
  285. https://journals.aps.org/pre/pdf/10.1103/PhysRevE.76.056709
  286. """
  287. import numpy as np
  288. import pandas as pd
  289. if C is not None:
  290. C = set(C)
  291. if C - G.nodes: # element(s) of C not in G
  292. raise nx.NodeNotFound(f"The node(s) {C - G.nodes} are in C but not in G.")
  293. nodes = list(G.nodes - C)
  294. else:
  295. nodes = list(G.nodes)
  296. DF_tree = nx.Graph()
  297. PB, sigma, D = _group_preprocessing(G, nodes, weight)
  298. betweenness = pd.DataFrame.from_dict(PB)
  299. if C is not None:
  300. for node in C:
  301. # remove from the betweenness all the nodes not part of the group
  302. betweenness.drop(index=node, inplace=True)
  303. betweenness.drop(columns=node, inplace=True)
  304. CL = [node for _, node in sorted(zip(np.diag(betweenness), nodes), reverse=True)]
  305. max_GBC = 0
  306. max_group = []
  307. DF_tree.add_node(
  308. 1,
  309. CL=CL,
  310. betweenness=betweenness,
  311. GBC=0,
  312. GM=[],
  313. sigma=sigma,
  314. cont=dict(zip(nodes, np.diag(betweenness))),
  315. )
  316. # the algorithm
  317. DF_tree.nodes[1]["heu"] = 0
  318. for i in range(k):
  319. DF_tree.nodes[1]["heu"] += DF_tree.nodes[1]["cont"][DF_tree.nodes[1]["CL"][i]]
  320. max_GBC, DF_tree, max_group = _dfbnb(
  321. G, k, DF_tree, max_GBC, 1, D, max_group, nodes, greedy
  322. )
  323. v = len(G)
  324. if not endpoints:
  325. scale = 0
  326. # if the graph is connected then subtract the endpoints from
  327. # the count for all the nodes in the graph. else count how many
  328. # nodes are connected to the group's nodes and subtract that.
  329. if nx.is_directed(G):
  330. if nx.is_strongly_connected(G):
  331. scale = k * (2 * v - k - 1)
  332. elif nx.is_connected(G):
  333. scale = k * (2 * v - k - 1)
  334. if scale == 0:
  335. for group_node1 in max_group:
  336. for node in D[group_node1]:
  337. if node != group_node1:
  338. if node in max_group:
  339. scale += 1
  340. else:
  341. scale += 2
  342. max_GBC -= scale
  343. # normalized
  344. if normalized:
  345. scale = 1 / ((v - k) * (v - k - 1))
  346. max_GBC *= scale
  347. # If undirected then count only the undirected edges
  348. elif not G.is_directed():
  349. max_GBC /= 2
  350. max_GBC = float("%.2f" % max_GBC)
  351. return max_GBC, max_group
  352. def _dfbnb(G, k, DF_tree, max_GBC, root, D, max_group, nodes, greedy):
  353. # stopping condition - if we found a group of size k and with higher GBC then prune
  354. if len(DF_tree.nodes[root]["GM"]) == k and DF_tree.nodes[root]["GBC"] > max_GBC:
  355. return DF_tree.nodes[root]["GBC"], DF_tree, DF_tree.nodes[root]["GM"]
  356. # stopping condition - if the size of group members equal to k or there are less than
  357. # k - |GM| in the candidate list or the heuristic function plus the GBC is below the
  358. # maximal GBC found then prune
  359. if (
  360. len(DF_tree.nodes[root]["GM"]) == k
  361. or len(DF_tree.nodes[root]["CL"]) <= k - len(DF_tree.nodes[root]["GM"])
  362. or DF_tree.nodes[root]["GBC"] + DF_tree.nodes[root]["heu"] <= max_GBC
  363. ):
  364. return max_GBC, DF_tree, max_group
  365. # finding the heuristic of both children
  366. node_p, node_m, DF_tree = _heuristic(k, root, DF_tree, D, nodes, greedy)
  367. # finding the child with the bigger heuristic + GBC and expand
  368. # that node first if greedy then only expand the plus node
  369. if greedy:
  370. max_GBC, DF_tree, max_group = _dfbnb(
  371. G, k, DF_tree, max_GBC, node_p, D, max_group, nodes, greedy
  372. )
  373. elif (
  374. DF_tree.nodes[node_p]["GBC"] + DF_tree.nodes[node_p]["heu"]
  375. > DF_tree.nodes[node_m]["GBC"] + DF_tree.nodes[node_m]["heu"]
  376. ):
  377. max_GBC, DF_tree, max_group = _dfbnb(
  378. G, k, DF_tree, max_GBC, node_p, D, max_group, nodes, greedy
  379. )
  380. max_GBC, DF_tree, max_group = _dfbnb(
  381. G, k, DF_tree, max_GBC, node_m, D, max_group, nodes, greedy
  382. )
  383. else:
  384. max_GBC, DF_tree, max_group = _dfbnb(
  385. G, k, DF_tree, max_GBC, node_m, D, max_group, nodes, greedy
  386. )
  387. max_GBC, DF_tree, max_group = _dfbnb(
  388. G, k, DF_tree, max_GBC, node_p, D, max_group, nodes, greedy
  389. )
  390. return max_GBC, DF_tree, max_group
  391. def _heuristic(k, root, DF_tree, D, nodes, greedy):
  392. import numpy as np
  393. # This helper function add two nodes to DF_tree - one left son and the
  394. # other right son, finds their heuristic, CL, GBC, and GM
  395. node_p = DF_tree.number_of_nodes() + 1
  396. node_m = DF_tree.number_of_nodes() + 2
  397. added_node = DF_tree.nodes[root]["CL"][0]
  398. # adding the plus node
  399. DF_tree.add_nodes_from([(node_p, deepcopy(DF_tree.nodes[root]))])
  400. DF_tree.nodes[node_p]["GM"].append(added_node)
  401. DF_tree.nodes[node_p]["GBC"] += DF_tree.nodes[node_p]["cont"][added_node]
  402. root_node = DF_tree.nodes[root]
  403. for x in nodes:
  404. for y in nodes:
  405. dxvy = 0
  406. dxyv = 0
  407. dvxy = 0
  408. if not (
  409. root_node["sigma"][x][y] == 0
  410. or root_node["sigma"][x][added_node] == 0
  411. or root_node["sigma"][added_node][y] == 0
  412. ):
  413. if D[x][added_node] == D[x][y] + D[y][added_node]:
  414. dxyv = (
  415. root_node["sigma"][x][y]
  416. * root_node["sigma"][y][added_node]
  417. / root_node["sigma"][x][added_node]
  418. )
  419. if D[x][y] == D[x][added_node] + D[added_node][y]:
  420. dxvy = (
  421. root_node["sigma"][x][added_node]
  422. * root_node["sigma"][added_node][y]
  423. / root_node["sigma"][x][y]
  424. )
  425. if D[added_node][y] == D[added_node][x] + D[x][y]:
  426. dvxy = (
  427. root_node["sigma"][added_node][x]
  428. * root_node["sigma"][x][y]
  429. / root_node["sigma"][added_node][y]
  430. )
  431. DF_tree.nodes[node_p]["sigma"][x][y] = root_node["sigma"][x][y] * (1 - dxvy)
  432. DF_tree.nodes[node_p]["betweenness"][x][y] = (
  433. root_node["betweenness"][x][y] - root_node["betweenness"][x][y] * dxvy
  434. )
  435. if y != added_node:
  436. DF_tree.nodes[node_p]["betweenness"][x][y] -= (
  437. root_node["betweenness"][x][added_node] * dxyv
  438. )
  439. if x != added_node:
  440. DF_tree.nodes[node_p]["betweenness"][x][y] -= (
  441. root_node["betweenness"][added_node][y] * dvxy
  442. )
  443. DF_tree.nodes[node_p]["CL"] = [
  444. node
  445. for _, node in sorted(
  446. zip(np.diag(DF_tree.nodes[node_p]["betweenness"]), nodes), reverse=True
  447. )
  448. if node not in DF_tree.nodes[node_p]["GM"]
  449. ]
  450. DF_tree.nodes[node_p]["cont"] = dict(
  451. zip(nodes, np.diag(DF_tree.nodes[node_p]["betweenness"]))
  452. )
  453. DF_tree.nodes[node_p]["heu"] = 0
  454. for i in range(k - len(DF_tree.nodes[node_p]["GM"])):
  455. DF_tree.nodes[node_p]["heu"] += DF_tree.nodes[node_p]["cont"][
  456. DF_tree.nodes[node_p]["CL"][i]
  457. ]
  458. # adding the minus node - don't insert the first node in the CL to GM
  459. # Insert minus node only if isn't greedy type algorithm
  460. if not greedy:
  461. DF_tree.add_nodes_from([(node_m, deepcopy(DF_tree.nodes[root]))])
  462. DF_tree.nodes[node_m]["CL"].pop(0)
  463. DF_tree.nodes[node_m]["cont"].pop(added_node)
  464. DF_tree.nodes[node_m]["heu"] = 0
  465. for i in range(k - len(DF_tree.nodes[node_m]["GM"])):
  466. DF_tree.nodes[node_m]["heu"] += DF_tree.nodes[node_m]["cont"][
  467. DF_tree.nodes[node_m]["CL"][i]
  468. ]
  469. else:
  470. node_m = None
  471. return node_p, node_m, DF_tree
  472. def group_closeness_centrality(G, S, weight=None):
  473. r"""Compute the group closeness centrality for a group of nodes.
  474. Group closeness centrality of a group of nodes $S$ is a measure
  475. of how close the group is to the other nodes in the graph.
  476. .. math::
  477. c_{close}(S) = \frac{|V-S|}{\sum_{v \in V-S} d_{S, v}}
  478. d_{S, v} = min_{u \in S} (d_{u, v})
  479. where $V$ is the set of nodes, $d_{S, v}$ is the distance of
  480. the group $S$ from $v$ defined as above. ($V-S$ is the set of nodes
  481. in $V$ that are not in $S$).
  482. Parameters
  483. ----------
  484. G : graph
  485. A NetworkX graph.
  486. S : list or set
  487. S is a group of nodes which belong to G, for which group closeness
  488. centrality is to be calculated.
  489. weight : None or string, optional (default=None)
  490. If None, all edge weights are considered equal.
  491. Otherwise holds the name of the edge attribute used as weight.
  492. The weight of an edge is treated as the length or distance between the two sides.
  493. Raises
  494. ------
  495. NodeNotFound
  496. If node(s) in S are not present in G.
  497. Returns
  498. -------
  499. closeness : float
  500. Group closeness centrality of the group S.
  501. See Also
  502. --------
  503. closeness_centrality
  504. Notes
  505. -----
  506. The measure was introduced in [1]_.
  507. The formula implemented here is described in [2]_.
  508. Higher values of closeness indicate greater centrality.
  509. It is assumed that 1 / 0 is 0 (required in the case of directed graphs,
  510. or when a shortest path length is 0).
  511. The number of nodes in the group must be a maximum of n - 1 where `n`
  512. is the total number of nodes in the graph.
  513. For directed graphs, the incoming distance is utilized here. To use the
  514. outward distance, act on `G.reverse()`.
  515. For weighted graphs the edge weights must be greater than zero.
  516. Zero edge weights can produce an infinite number of equal length
  517. paths between pairs of nodes.
  518. References
  519. ----------
  520. .. [1] M G Everett and S P Borgatti:
  521. The Centrality of Groups and Classes.
  522. Journal of Mathematical Sociology. 23(3): 181-201. 1999.
  523. http://www.analytictech.com/borgatti/group_centrality.htm
  524. .. [2] J. Zhao et. al.:
  525. Measuring and Maximizing Group Closeness Centrality over
  526. Disk Resident Graphs.
  527. WWWConference Proceedings, 2014. 689-694.
  528. https://doi.org/10.1145/2567948.2579356
  529. """
  530. if G.is_directed():
  531. G = G.reverse() # reverse view
  532. closeness = 0 # initialize to 0
  533. V = set(G) # set of nodes in G
  534. S = set(S) # set of nodes in group S
  535. V_S = V - S # set of nodes in V but not S
  536. shortest_path_lengths = nx.multi_source_dijkstra_path_length(G, S, weight=weight)
  537. # accumulation
  538. for v in V_S:
  539. try:
  540. closeness += shortest_path_lengths[v]
  541. except KeyError: # no path exists
  542. closeness += 0
  543. try:
  544. closeness = len(V_S) / closeness
  545. except ZeroDivisionError: # 1 / 0 assumed as 0
  546. closeness = 0
  547. return closeness
  548. def group_degree_centrality(G, S):
  549. """Compute the group degree centrality for a group of nodes.
  550. Group degree centrality of a group of nodes $S$ is the fraction
  551. of non-group members connected to group members.
  552. Parameters
  553. ----------
  554. G : graph
  555. A NetworkX graph.
  556. S : list or set
  557. S is a group of nodes which belong to G, for which group degree
  558. centrality is to be calculated.
  559. Raises
  560. ------
  561. NetworkXError
  562. If node(s) in S are not in G.
  563. Returns
  564. -------
  565. centrality : float
  566. Group degree centrality of the group S.
  567. See Also
  568. --------
  569. degree_centrality
  570. group_in_degree_centrality
  571. group_out_degree_centrality
  572. Notes
  573. -----
  574. The measure was introduced in [1]_.
  575. The number of nodes in the group must be a maximum of n - 1 where `n`
  576. is the total number of nodes in the graph.
  577. References
  578. ----------
  579. .. [1] M G Everett and S P Borgatti:
  580. The Centrality of Groups and Classes.
  581. Journal of Mathematical Sociology. 23(3): 181-201. 1999.
  582. http://www.analytictech.com/borgatti/group_centrality.htm
  583. """
  584. centrality = len(set().union(*[set(G.neighbors(i)) for i in S]) - set(S))
  585. centrality /= len(G.nodes()) - len(S)
  586. return centrality
  587. @not_implemented_for("undirected")
  588. def group_in_degree_centrality(G, S):
  589. """Compute the group in-degree centrality for a group of nodes.
  590. Group in-degree centrality of a group of nodes $S$ is the fraction
  591. of non-group members connected to group members by incoming edges.
  592. Parameters
  593. ----------
  594. G : graph
  595. A NetworkX graph.
  596. S : list or set
  597. S is a group of nodes which belong to G, for which group in-degree
  598. centrality is to be calculated.
  599. Returns
  600. -------
  601. centrality : float
  602. Group in-degree centrality of the group S.
  603. Raises
  604. ------
  605. NetworkXNotImplemented
  606. If G is undirected.
  607. NodeNotFound
  608. If node(s) in S are not in G.
  609. See Also
  610. --------
  611. degree_centrality
  612. group_degree_centrality
  613. group_out_degree_centrality
  614. Notes
  615. -----
  616. The number of nodes in the group must be a maximum of n - 1 where `n`
  617. is the total number of nodes in the graph.
  618. `G.neighbors(i)` gives nodes with an outward edge from i, in a DiGraph,
  619. so for group in-degree centrality, the reverse graph is used.
  620. """
  621. return group_degree_centrality(G.reverse(), S)
  622. @not_implemented_for("undirected")
  623. def group_out_degree_centrality(G, S):
  624. """Compute the group out-degree centrality for a group of nodes.
  625. Group out-degree centrality of a group of nodes $S$ is the fraction
  626. of non-group members connected to group members by outgoing edges.
  627. Parameters
  628. ----------
  629. G : graph
  630. A NetworkX graph.
  631. S : list or set
  632. S is a group of nodes which belong to G, for which group in-degree
  633. centrality is to be calculated.
  634. Returns
  635. -------
  636. centrality : float
  637. Group out-degree centrality of the group S.
  638. Raises
  639. ------
  640. NetworkXNotImplemented
  641. If G is undirected.
  642. NodeNotFound
  643. If node(s) in S are not in G.
  644. See Also
  645. --------
  646. degree_centrality
  647. group_degree_centrality
  648. group_in_degree_centrality
  649. Notes
  650. -----
  651. The number of nodes in the group must be a maximum of n - 1 where `n`
  652. is the total number of nodes in the graph.
  653. `G.neighbors(i)` gives nodes with an outward edge from i, in a DiGraph,
  654. so for group out-degree centrality, the graph itself is used.
  655. """
  656. return group_degree_centrality(G, S)