sparsifiers.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. """Functions for computing sparsifiers of graphs."""
  2. import math
  3. import networkx as nx
  4. from networkx.utils import not_implemented_for, py_random_state
  5. __all__ = ["spanner"]
  6. @py_random_state(3)
  7. @not_implemented_for("directed")
  8. @not_implemented_for("multigraph")
  9. def spanner(G, stretch, weight=None, seed=None):
  10. """Returns a spanner of the given graph with the given stretch.
  11. A spanner of a graph G = (V, E) with stretch t is a subgraph
  12. H = (V, E_S) such that E_S is a subset of E and the distance between
  13. any pair of nodes in H is at most t times the distance between the
  14. nodes in G.
  15. Parameters
  16. ----------
  17. G : NetworkX graph
  18. An undirected simple graph.
  19. stretch : float
  20. The stretch of the spanner.
  21. weight : object
  22. The edge attribute to use as distance.
  23. seed : integer, random_state, or None (default)
  24. Indicator of random number generation state.
  25. See :ref:`Randomness<randomness>`.
  26. Returns
  27. -------
  28. NetworkX graph
  29. A spanner of the given graph with the given stretch.
  30. Raises
  31. ------
  32. ValueError
  33. If a stretch less than 1 is given.
  34. Notes
  35. -----
  36. This function implements the spanner algorithm by Baswana and Sen,
  37. see [1].
  38. This algorithm is a randomized las vegas algorithm: The expected
  39. running time is O(km) where k = (stretch + 1) // 2 and m is the
  40. number of edges in G. The returned graph is always a spanner of the
  41. given graph with the specified stretch. For weighted graphs the
  42. number of edges in the spanner is O(k * n^(1 + 1 / k)) where k is
  43. defined as above and n is the number of nodes in G. For unweighted
  44. graphs the number of edges is O(n^(1 + 1 / k) + kn).
  45. References
  46. ----------
  47. [1] S. Baswana, S. Sen. A Simple and Linear Time Randomized
  48. Algorithm for Computing Sparse Spanners in Weighted Graphs.
  49. Random Struct. Algorithms 30(4): 532-563 (2007).
  50. """
  51. if stretch < 1:
  52. raise ValueError("stretch must be at least 1")
  53. k = (stretch + 1) // 2
  54. # initialize spanner H with empty edge set
  55. H = nx.empty_graph()
  56. H.add_nodes_from(G.nodes)
  57. # phase 1: forming the clusters
  58. # the residual graph has V' from the paper as its node set
  59. # and E' from the paper as its edge set
  60. residual_graph = _setup_residual_graph(G, weight)
  61. # clustering is a dictionary that maps nodes in a cluster to the
  62. # cluster center
  63. clustering = {v: v for v in G.nodes}
  64. sample_prob = math.pow(G.number_of_nodes(), -1 / k)
  65. size_limit = 2 * math.pow(G.number_of_nodes(), 1 + 1 / k)
  66. i = 0
  67. while i < k - 1:
  68. # step 1: sample centers
  69. sampled_centers = set()
  70. for center in set(clustering.values()):
  71. if seed.random() < sample_prob:
  72. sampled_centers.add(center)
  73. # combined loop for steps 2 and 3
  74. edges_to_add = set()
  75. edges_to_remove = set()
  76. new_clustering = {}
  77. for v in residual_graph.nodes:
  78. if clustering[v] in sampled_centers:
  79. continue
  80. # step 2: find neighboring (sampled) clusters and
  81. # lightest edges to them
  82. lightest_edge_neighbor, lightest_edge_weight = _lightest_edge_dicts(
  83. residual_graph, clustering, v
  84. )
  85. neighboring_sampled_centers = (
  86. set(lightest_edge_weight.keys()) & sampled_centers
  87. )
  88. # step 3: add edges to spanner
  89. if not neighboring_sampled_centers:
  90. # connect to each neighboring center via lightest edge
  91. for neighbor in lightest_edge_neighbor.values():
  92. edges_to_add.add((v, neighbor))
  93. # remove all incident edges
  94. for neighbor in residual_graph.adj[v]:
  95. edges_to_remove.add((v, neighbor))
  96. else: # there is a neighboring sampled center
  97. closest_center = min(
  98. neighboring_sampled_centers, key=lightest_edge_weight.get
  99. )
  100. closest_center_weight = lightest_edge_weight[closest_center]
  101. closest_center_neighbor = lightest_edge_neighbor[closest_center]
  102. edges_to_add.add((v, closest_center_neighbor))
  103. new_clustering[v] = closest_center
  104. # connect to centers with edge weight less than
  105. # closest_center_weight
  106. for center, edge_weight in lightest_edge_weight.items():
  107. if edge_weight < closest_center_weight:
  108. neighbor = lightest_edge_neighbor[center]
  109. edges_to_add.add((v, neighbor))
  110. # remove edges to centers with edge weight less than
  111. # closest_center_weight
  112. for neighbor in residual_graph.adj[v]:
  113. neighbor_cluster = clustering[neighbor]
  114. neighbor_weight = lightest_edge_weight[neighbor_cluster]
  115. if (
  116. neighbor_cluster == closest_center
  117. or neighbor_weight < closest_center_weight
  118. ):
  119. edges_to_remove.add((v, neighbor))
  120. # check whether iteration added too many edges to spanner,
  121. # if so repeat
  122. if len(edges_to_add) > size_limit:
  123. # an iteration is repeated O(1) times on expectation
  124. continue
  125. # iteration succeeded
  126. i = i + 1
  127. # actually add edges to spanner
  128. for u, v in edges_to_add:
  129. _add_edge_to_spanner(H, residual_graph, u, v, weight)
  130. # actually delete edges from residual graph
  131. residual_graph.remove_edges_from(edges_to_remove)
  132. # copy old clustering data to new_clustering
  133. for node, center in clustering.items():
  134. if center in sampled_centers:
  135. new_clustering[node] = center
  136. clustering = new_clustering
  137. # step 4: remove intra-cluster edges
  138. for u in residual_graph.nodes:
  139. for v in list(residual_graph.adj[u]):
  140. if clustering[u] == clustering[v]:
  141. residual_graph.remove_edge(u, v)
  142. # update residual graph node set
  143. for v in list(residual_graph.nodes):
  144. if v not in clustering:
  145. residual_graph.remove_node(v)
  146. # phase 2: vertex-cluster joining
  147. for v in residual_graph.nodes:
  148. lightest_edge_neighbor, _ = _lightest_edge_dicts(residual_graph, clustering, v)
  149. for neighbor in lightest_edge_neighbor.values():
  150. _add_edge_to_spanner(H, residual_graph, v, neighbor, weight)
  151. return H
  152. def _setup_residual_graph(G, weight):
  153. """Setup residual graph as a copy of G with unique edges weights.
  154. The node set of the residual graph corresponds to the set V' from
  155. the Baswana-Sen paper and the edge set corresponds to the set E'
  156. from the paper.
  157. This function associates distinct weights to the edges of the
  158. residual graph (even for unweighted input graphs), as required by
  159. the algorithm.
  160. Parameters
  161. ----------
  162. G : NetworkX graph
  163. An undirected simple graph.
  164. weight : object
  165. The edge attribute to use as distance.
  166. Returns
  167. -------
  168. NetworkX graph
  169. The residual graph used for the Baswana-Sen algorithm.
  170. """
  171. residual_graph = G.copy()
  172. # establish unique edge weights, even for unweighted graphs
  173. for u, v in G.edges():
  174. if not weight:
  175. residual_graph[u][v]["weight"] = (id(u), id(v))
  176. else:
  177. residual_graph[u][v]["weight"] = (G[u][v][weight], id(u), id(v))
  178. return residual_graph
  179. def _lightest_edge_dicts(residual_graph, clustering, node):
  180. """Find the lightest edge to each cluster.
  181. Searches for the minimum-weight edge to each cluster adjacent to
  182. the given node.
  183. Parameters
  184. ----------
  185. residual_graph : NetworkX graph
  186. The residual graph used by the Baswana-Sen algorithm.
  187. clustering : dictionary
  188. The current clustering of the nodes.
  189. node : node
  190. The node from which the search originates.
  191. Returns
  192. -------
  193. lightest_edge_neighbor, lightest_edge_weight : dictionary, dictionary
  194. lightest_edge_neighbor is a dictionary that maps a center C to
  195. a node v in the corresponding cluster such that the edge from
  196. the given node to v is the lightest edge from the given node to
  197. any node in cluster. lightest_edge_weight maps a center C to the
  198. weight of the aforementioned edge.
  199. Notes
  200. -----
  201. If a cluster has no node that is adjacent to the given node in the
  202. residual graph then the center of the cluster is not a key in the
  203. returned dictionaries.
  204. """
  205. lightest_edge_neighbor = {}
  206. lightest_edge_weight = {}
  207. for neighbor in residual_graph.adj[node]:
  208. neighbor_center = clustering[neighbor]
  209. weight = residual_graph[node][neighbor]["weight"]
  210. if (
  211. neighbor_center not in lightest_edge_weight
  212. or weight < lightest_edge_weight[neighbor_center]
  213. ):
  214. lightest_edge_neighbor[neighbor_center] = neighbor
  215. lightest_edge_weight[neighbor_center] = weight
  216. return lightest_edge_neighbor, lightest_edge_weight
  217. def _add_edge_to_spanner(H, residual_graph, u, v, weight):
  218. """Add the edge {u, v} to the spanner H and take weight from
  219. the residual graph.
  220. Parameters
  221. ----------
  222. H : NetworkX graph
  223. The spanner under construction.
  224. residual_graph : NetworkX graph
  225. The residual graph used by the Baswana-Sen algorithm. The weight
  226. for the edge is taken from this graph.
  227. u : node
  228. One endpoint of the edge.
  229. v : node
  230. The other endpoint of the edge.
  231. weight : object
  232. The edge attribute to use as distance.
  233. """
  234. H.add_edge(u, v)
  235. if weight:
  236. H[u][v][weight] = residual_graph[u][v]["weight"][0]