test_summarization.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641
  1. """
  2. Unit tests for dedensification and graph summarization
  3. """
  4. import pytest
  5. import networkx as nx
  6. class TestDirectedDedensification:
  7. def build_original_graph(self):
  8. original_matrix = [
  9. ("1", "BC"),
  10. ("2", "ABC"),
  11. ("3", ["A", "B", "6"]),
  12. ("4", "ABC"),
  13. ("5", "AB"),
  14. ("6", ["5"]),
  15. ("A", ["6"]),
  16. ]
  17. graph = nx.DiGraph()
  18. for source, targets in original_matrix:
  19. for target in targets:
  20. graph.add_edge(source, target)
  21. return graph
  22. def build_compressed_graph(self):
  23. compressed_matrix = [
  24. ("1", "BC"),
  25. ("2", ["ABC"]),
  26. ("3", ["A", "B", "6"]),
  27. ("4", ["ABC"]),
  28. ("5", "AB"),
  29. ("6", ["5"]),
  30. ("A", ["6"]),
  31. ("ABC", "ABC"),
  32. ]
  33. compressed_graph = nx.DiGraph()
  34. for source, targets in compressed_matrix:
  35. for target in targets:
  36. compressed_graph.add_edge(source, target)
  37. return compressed_graph
  38. def test_empty(self):
  39. """
  40. Verify that an empty directed graph results in no compressor nodes
  41. """
  42. G = nx.DiGraph()
  43. compressed_graph, c_nodes = nx.dedensify(G, threshold=2)
  44. assert c_nodes == set()
  45. @staticmethod
  46. def densify(G, compressor_nodes, copy=True):
  47. """
  48. Reconstructs the original graph from a dedensified, directed graph
  49. Parameters
  50. ----------
  51. G: dedensified graph
  52. A networkx graph
  53. compressor_nodes: iterable
  54. Iterable of compressor nodes in the dedensified graph
  55. inplace: bool, optional (default: False)
  56. Indicates if densification should be done inplace
  57. Returns
  58. -------
  59. G: graph
  60. A densified networkx graph
  61. """
  62. if copy:
  63. G = G.copy()
  64. for compressor_node in compressor_nodes:
  65. all_neighbors = set(nx.all_neighbors(G, compressor_node))
  66. out_neighbors = set(G.neighbors(compressor_node))
  67. for out_neighbor in out_neighbors:
  68. G.remove_edge(compressor_node, out_neighbor)
  69. in_neighbors = all_neighbors - out_neighbors
  70. for in_neighbor in in_neighbors:
  71. G.remove_edge(in_neighbor, compressor_node)
  72. for out_neighbor in out_neighbors:
  73. G.add_edge(in_neighbor, out_neighbor)
  74. G.remove_node(compressor_node)
  75. return G
  76. def setup_method(self):
  77. self.c_nodes = ("ABC",)
  78. def test_dedensify_edges(self):
  79. """
  80. Verifies that dedensify produced the correct edges to/from compressor
  81. nodes in a directed graph
  82. """
  83. G = self.build_original_graph()
  84. compressed_G = self.build_compressed_graph()
  85. compressed_graph, c_nodes = nx.dedensify(G, threshold=2)
  86. for s, t in compressed_graph.edges():
  87. o_s = "".join(sorted(s))
  88. o_t = "".join(sorted(t))
  89. compressed_graph_exists = compressed_graph.has_edge(s, t)
  90. verified_compressed_exists = compressed_G.has_edge(o_s, o_t)
  91. assert compressed_graph_exists == verified_compressed_exists
  92. assert len(c_nodes) == len(self.c_nodes)
  93. def test_dedensify_edge_count(self):
  94. """
  95. Verifies that dedensify produced the correct number of comrpessor nodes
  96. in a directed graph
  97. """
  98. G = self.build_original_graph()
  99. original_edge_count = len(G.edges())
  100. c_G, c_nodes = nx.dedensify(G, threshold=2)
  101. compressed_edge_count = len(c_G.edges())
  102. assert compressed_edge_count <= original_edge_count
  103. compressed_G = self.build_compressed_graph()
  104. assert compressed_edge_count == len(compressed_G.edges())
  105. def test_densify_edges(self):
  106. """
  107. Verifies that densification produces the correct edges from the
  108. original directed graph
  109. """
  110. compressed_G = self.build_compressed_graph()
  111. original_graph = self.densify(compressed_G, self.c_nodes, copy=True)
  112. G = self.build_original_graph()
  113. for s, t in G.edges():
  114. assert G.has_edge(s, t) == original_graph.has_edge(s, t)
  115. def test_densify_edge_count(self):
  116. """
  117. Verifies that densification produces the correct number of edges in the
  118. original directed graph
  119. """
  120. compressed_G = self.build_compressed_graph()
  121. compressed_edge_count = len(compressed_G.edges())
  122. original_graph = self.densify(compressed_G, self.c_nodes)
  123. original_edge_count = len(original_graph.edges())
  124. assert compressed_edge_count <= original_edge_count
  125. G = self.build_original_graph()
  126. assert original_edge_count == len(G.edges())
  127. class TestUnDirectedDedensification:
  128. def build_original_graph(self):
  129. """
  130. Builds graph shown in the original research paper
  131. """
  132. original_matrix = [
  133. ("1", "CB"),
  134. ("2", "ABC"),
  135. ("3", ["A", "B", "6"]),
  136. ("4", "ABC"),
  137. ("5", "AB"),
  138. ("6", ["5"]),
  139. ("A", ["6"]),
  140. ]
  141. graph = nx.Graph()
  142. for source, targets in original_matrix:
  143. for target in targets:
  144. graph.add_edge(source, target)
  145. return graph
  146. def test_empty(self):
  147. """
  148. Verify that an empty undirected graph results in no compressor nodes
  149. """
  150. G = nx.Graph()
  151. compressed_G, c_nodes = nx.dedensify(G, threshold=2)
  152. assert c_nodes == set()
  153. def setup_method(self):
  154. self.c_nodes = ("6AB", "ABC")
  155. def build_compressed_graph(self):
  156. compressed_matrix = [
  157. ("1", ["B", "C"]),
  158. ("2", ["ABC"]),
  159. ("3", ["6AB"]),
  160. ("4", ["ABC"]),
  161. ("5", ["6AB"]),
  162. ("6", ["6AB", "A"]),
  163. ("A", ["6AB", "ABC"]),
  164. ("B", ["ABC", "6AB"]),
  165. ("C", ["ABC"]),
  166. ]
  167. compressed_graph = nx.Graph()
  168. for source, targets in compressed_matrix:
  169. for target in targets:
  170. compressed_graph.add_edge(source, target)
  171. return compressed_graph
  172. def test_dedensify_edges(self):
  173. """
  174. Verifies that dedensify produced correct compressor nodes and the
  175. correct edges to/from the compressor nodes in an undirected graph
  176. """
  177. G = self.build_original_graph()
  178. c_G, c_nodes = nx.dedensify(G, threshold=2)
  179. v_compressed_G = self.build_compressed_graph()
  180. for s, t in c_G.edges():
  181. o_s = "".join(sorted(s))
  182. o_t = "".join(sorted(t))
  183. has_compressed_edge = c_G.has_edge(s, t)
  184. verified_has_compressed_edge = v_compressed_G.has_edge(o_s, o_t)
  185. assert has_compressed_edge == verified_has_compressed_edge
  186. assert len(c_nodes) == len(self.c_nodes)
  187. def test_dedensify_edge_count(self):
  188. """
  189. Verifies that dedensify produced the correct number of edges in an
  190. undirected graph
  191. """
  192. G = self.build_original_graph()
  193. c_G, c_nodes = nx.dedensify(G, threshold=2, copy=True)
  194. compressed_edge_count = len(c_G.edges())
  195. verified_original_edge_count = len(G.edges())
  196. assert compressed_edge_count <= verified_original_edge_count
  197. verified_compressed_G = self.build_compressed_graph()
  198. verified_compressed_edge_count = len(verified_compressed_G.edges())
  199. assert compressed_edge_count == verified_compressed_edge_count
  200. @pytest.mark.parametrize(
  201. "graph_type", [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph]
  202. )
  203. def test_summarization_empty(graph_type):
  204. G = graph_type()
  205. summary_graph = nx.snap_aggregation(G, node_attributes=("color",))
  206. assert nx.is_isomorphic(summary_graph, G)
  207. class AbstractSNAP:
  208. node_attributes = ("color",)
  209. def build_original_graph(self):
  210. pass
  211. def build_summary_graph(self):
  212. pass
  213. def test_summary_graph(self):
  214. original_graph = self.build_original_graph()
  215. summary_graph = self.build_summary_graph()
  216. relationship_attributes = ("type",)
  217. generated_summary_graph = nx.snap_aggregation(
  218. original_graph, self.node_attributes, relationship_attributes
  219. )
  220. relabeled_summary_graph = self.deterministic_labels(generated_summary_graph)
  221. assert nx.is_isomorphic(summary_graph, relabeled_summary_graph)
  222. def deterministic_labels(self, G):
  223. node_labels = list(G.nodes)
  224. node_labels = sorted(node_labels, key=lambda n: sorted(G.nodes[n]["group"])[0])
  225. node_labels.sort()
  226. label_mapping = {}
  227. for index, node in enumerate(node_labels):
  228. label = "Supernode-%s" % index
  229. label_mapping[node] = label
  230. return nx.relabel_nodes(G, label_mapping)
  231. class TestSNAPNoEdgeTypes(AbstractSNAP):
  232. relationship_attributes = ()
  233. def test_summary_graph(self):
  234. original_graph = self.build_original_graph()
  235. summary_graph = self.build_summary_graph()
  236. relationship_attributes = ("type",)
  237. generated_summary_graph = nx.snap_aggregation(
  238. original_graph, self.node_attributes
  239. )
  240. relabeled_summary_graph = self.deterministic_labels(generated_summary_graph)
  241. assert nx.is_isomorphic(summary_graph, relabeled_summary_graph)
  242. def build_original_graph(self):
  243. nodes = {
  244. "A": {"color": "Red"},
  245. "B": {"color": "Red"},
  246. "C": {"color": "Red"},
  247. "D": {"color": "Red"},
  248. "E": {"color": "Blue"},
  249. "F": {"color": "Blue"},
  250. "G": {"color": "Blue"},
  251. "H": {"color": "Blue"},
  252. "I": {"color": "Yellow"},
  253. "J": {"color": "Yellow"},
  254. "K": {"color": "Yellow"},
  255. "L": {"color": "Yellow"},
  256. }
  257. edges = [
  258. ("A", "B"),
  259. ("A", "C"),
  260. ("A", "E"),
  261. ("A", "I"),
  262. ("B", "D"),
  263. ("B", "J"),
  264. ("B", "F"),
  265. ("C", "G"),
  266. ("D", "H"),
  267. ("I", "J"),
  268. ("J", "K"),
  269. ("I", "L"),
  270. ]
  271. G = nx.Graph()
  272. for node in nodes:
  273. attributes = nodes[node]
  274. G.add_node(node, **attributes)
  275. for source, target in edges:
  276. G.add_edge(source, target)
  277. return G
  278. def build_summary_graph(self):
  279. nodes = {
  280. "Supernode-0": {"color": "Red"},
  281. "Supernode-1": {"color": "Red"},
  282. "Supernode-2": {"color": "Blue"},
  283. "Supernode-3": {"color": "Blue"},
  284. "Supernode-4": {"color": "Yellow"},
  285. "Supernode-5": {"color": "Yellow"},
  286. }
  287. edges = [
  288. ("Supernode-0", "Supernode-0"),
  289. ("Supernode-0", "Supernode-1"),
  290. ("Supernode-0", "Supernode-2"),
  291. ("Supernode-0", "Supernode-4"),
  292. ("Supernode-1", "Supernode-3"),
  293. ("Supernode-4", "Supernode-4"),
  294. ("Supernode-4", "Supernode-5"),
  295. ]
  296. G = nx.Graph()
  297. for node in nodes:
  298. attributes = nodes[node]
  299. G.add_node(node, **attributes)
  300. for source, target in edges:
  301. G.add_edge(source, target)
  302. supernodes = {
  303. "Supernode-0": {"A", "B"},
  304. "Supernode-1": {"C", "D"},
  305. "Supernode-2": {"E", "F"},
  306. "Supernode-3": {"G", "H"},
  307. "Supernode-4": {"I", "J"},
  308. "Supernode-5": {"K", "L"},
  309. }
  310. nx.set_node_attributes(G, supernodes, "group")
  311. return G
  312. class TestSNAPUndirected(AbstractSNAP):
  313. def build_original_graph(self):
  314. nodes = {
  315. "A": {"color": "Red"},
  316. "B": {"color": "Red"},
  317. "C": {"color": "Red"},
  318. "D": {"color": "Red"},
  319. "E": {"color": "Blue"},
  320. "F": {"color": "Blue"},
  321. "G": {"color": "Blue"},
  322. "H": {"color": "Blue"},
  323. "I": {"color": "Yellow"},
  324. "J": {"color": "Yellow"},
  325. "K": {"color": "Yellow"},
  326. "L": {"color": "Yellow"},
  327. }
  328. edges = [
  329. ("A", "B", "Strong"),
  330. ("A", "C", "Weak"),
  331. ("A", "E", "Strong"),
  332. ("A", "I", "Weak"),
  333. ("B", "D", "Weak"),
  334. ("B", "J", "Weak"),
  335. ("B", "F", "Strong"),
  336. ("C", "G", "Weak"),
  337. ("D", "H", "Weak"),
  338. ("I", "J", "Strong"),
  339. ("J", "K", "Strong"),
  340. ("I", "L", "Strong"),
  341. ]
  342. G = nx.Graph()
  343. for node in nodes:
  344. attributes = nodes[node]
  345. G.add_node(node, **attributes)
  346. for source, target, type in edges:
  347. G.add_edge(source, target, type=type)
  348. return G
  349. def build_summary_graph(self):
  350. nodes = {
  351. "Supernode-0": {"color": "Red"},
  352. "Supernode-1": {"color": "Red"},
  353. "Supernode-2": {"color": "Blue"},
  354. "Supernode-3": {"color": "Blue"},
  355. "Supernode-4": {"color": "Yellow"},
  356. "Supernode-5": {"color": "Yellow"},
  357. }
  358. edges = [
  359. ("Supernode-0", "Supernode-0", "Strong"),
  360. ("Supernode-0", "Supernode-1", "Weak"),
  361. ("Supernode-0", "Supernode-2", "Strong"),
  362. ("Supernode-0", "Supernode-4", "Weak"),
  363. ("Supernode-1", "Supernode-3", "Weak"),
  364. ("Supernode-4", "Supernode-4", "Strong"),
  365. ("Supernode-4", "Supernode-5", "Strong"),
  366. ]
  367. G = nx.Graph()
  368. for node in nodes:
  369. attributes = nodes[node]
  370. G.add_node(node, **attributes)
  371. for source, target, type in edges:
  372. G.add_edge(source, target, types=[{"type": type}])
  373. supernodes = {
  374. "Supernode-0": {"A", "B"},
  375. "Supernode-1": {"C", "D"},
  376. "Supernode-2": {"E", "F"},
  377. "Supernode-3": {"G", "H"},
  378. "Supernode-4": {"I", "J"},
  379. "Supernode-5": {"K", "L"},
  380. }
  381. nx.set_node_attributes(G, supernodes, "group")
  382. return G
  383. class TestSNAPDirected(AbstractSNAP):
  384. def build_original_graph(self):
  385. nodes = {
  386. "A": {"color": "Red"},
  387. "B": {"color": "Red"},
  388. "C": {"color": "Green"},
  389. "D": {"color": "Green"},
  390. "E": {"color": "Blue"},
  391. "F": {"color": "Blue"},
  392. "G": {"color": "Yellow"},
  393. "H": {"color": "Yellow"},
  394. }
  395. edges = [
  396. ("A", "C", "Strong"),
  397. ("A", "E", "Strong"),
  398. ("A", "F", "Weak"),
  399. ("B", "D", "Strong"),
  400. ("B", "E", "Weak"),
  401. ("B", "F", "Strong"),
  402. ("C", "G", "Strong"),
  403. ("C", "F", "Strong"),
  404. ("D", "E", "Strong"),
  405. ("D", "H", "Strong"),
  406. ("G", "E", "Strong"),
  407. ("H", "F", "Strong"),
  408. ]
  409. G = nx.DiGraph()
  410. for node in nodes:
  411. attributes = nodes[node]
  412. G.add_node(node, **attributes)
  413. for source, target, type in edges:
  414. G.add_edge(source, target, type=type)
  415. return G
  416. def build_summary_graph(self):
  417. nodes = {
  418. "Supernode-0": {"color": "Red"},
  419. "Supernode-1": {"color": "Green"},
  420. "Supernode-2": {"color": "Blue"},
  421. "Supernode-3": {"color": "Yellow"},
  422. }
  423. edges = [
  424. ("Supernode-0", "Supernode-1", [{"type": "Strong"}]),
  425. ("Supernode-0", "Supernode-2", [{"type": "Weak"}, {"type": "Strong"}]),
  426. ("Supernode-1", "Supernode-2", [{"type": "Strong"}]),
  427. ("Supernode-1", "Supernode-3", [{"type": "Strong"}]),
  428. ("Supernode-3", "Supernode-2", [{"type": "Strong"}]),
  429. ]
  430. G = nx.DiGraph()
  431. for node in nodes:
  432. attributes = nodes[node]
  433. G.add_node(node, **attributes)
  434. for source, target, types in edges:
  435. G.add_edge(source, target, types=types)
  436. supernodes = {
  437. "Supernode-0": {"A", "B"},
  438. "Supernode-1": {"C", "D"},
  439. "Supernode-2": {"E", "F"},
  440. "Supernode-3": {"G", "H"},
  441. "Supernode-4": {"I", "J"},
  442. "Supernode-5": {"K", "L"},
  443. }
  444. nx.set_node_attributes(G, supernodes, "group")
  445. return G
  446. class TestSNAPUndirectedMulti(AbstractSNAP):
  447. def build_original_graph(self):
  448. nodes = {
  449. "A": {"color": "Red"},
  450. "B": {"color": "Red"},
  451. "C": {"color": "Red"},
  452. "D": {"color": "Blue"},
  453. "E": {"color": "Blue"},
  454. "F": {"color": "Blue"},
  455. "G": {"color": "Yellow"},
  456. "H": {"color": "Yellow"},
  457. "I": {"color": "Yellow"},
  458. }
  459. edges = [
  460. ("A", "D", ["Weak", "Strong"]),
  461. ("B", "E", ["Weak", "Strong"]),
  462. ("D", "I", ["Strong"]),
  463. ("E", "H", ["Strong"]),
  464. ("F", "G", ["Weak"]),
  465. ("I", "G", ["Weak", "Strong"]),
  466. ("I", "H", ["Weak", "Strong"]),
  467. ("G", "H", ["Weak", "Strong"]),
  468. ]
  469. G = nx.MultiGraph()
  470. for node in nodes:
  471. attributes = nodes[node]
  472. G.add_node(node, **attributes)
  473. for source, target, types in edges:
  474. for type in types:
  475. G.add_edge(source, target, type=type)
  476. return G
  477. def build_summary_graph(self):
  478. nodes = {
  479. "Supernode-0": {"color": "Red"},
  480. "Supernode-1": {"color": "Blue"},
  481. "Supernode-2": {"color": "Yellow"},
  482. "Supernode-3": {"color": "Blue"},
  483. "Supernode-4": {"color": "Yellow"},
  484. "Supernode-5": {"color": "Red"},
  485. }
  486. edges = [
  487. ("Supernode-1", "Supernode-2", [{"type": "Weak"}]),
  488. ("Supernode-2", "Supernode-4", [{"type": "Weak"}, {"type": "Strong"}]),
  489. ("Supernode-3", "Supernode-4", [{"type": "Strong"}]),
  490. ("Supernode-3", "Supernode-5", [{"type": "Weak"}, {"type": "Strong"}]),
  491. ("Supernode-4", "Supernode-4", [{"type": "Weak"}, {"type": "Strong"}]),
  492. ]
  493. G = nx.MultiGraph()
  494. for node in nodes:
  495. attributes = nodes[node]
  496. G.add_node(node, **attributes)
  497. for source, target, types in edges:
  498. for type in types:
  499. G.add_edge(source, target, type=type)
  500. supernodes = {
  501. "Supernode-0": {"A", "B"},
  502. "Supernode-1": {"C", "D"},
  503. "Supernode-2": {"E", "F"},
  504. "Supernode-3": {"G", "H"},
  505. "Supernode-4": {"I", "J"},
  506. "Supernode-5": {"K", "L"},
  507. }
  508. nx.set_node_attributes(G, supernodes, "group")
  509. return G
  510. class TestSNAPDirectedMulti(AbstractSNAP):
  511. def build_original_graph(self):
  512. nodes = {
  513. "A": {"color": "Red"},
  514. "B": {"color": "Red"},
  515. "C": {"color": "Green"},
  516. "D": {"color": "Green"},
  517. "E": {"color": "Blue"},
  518. "F": {"color": "Blue"},
  519. "G": {"color": "Yellow"},
  520. "H": {"color": "Yellow"},
  521. }
  522. edges = [
  523. ("A", "C", ["Weak", "Strong"]),
  524. ("A", "E", ["Strong"]),
  525. ("A", "F", ["Weak"]),
  526. ("B", "D", ["Weak", "Strong"]),
  527. ("B", "E", ["Weak"]),
  528. ("B", "F", ["Strong"]),
  529. ("C", "G", ["Weak", "Strong"]),
  530. ("C", "F", ["Strong"]),
  531. ("D", "E", ["Strong"]),
  532. ("D", "H", ["Weak", "Strong"]),
  533. ("G", "E", ["Strong"]),
  534. ("H", "F", ["Strong"]),
  535. ]
  536. G = nx.MultiDiGraph()
  537. for node in nodes:
  538. attributes = nodes[node]
  539. G.add_node(node, **attributes)
  540. for source, target, types in edges:
  541. for type in types:
  542. G.add_edge(source, target, type=type)
  543. return G
  544. def build_summary_graph(self):
  545. nodes = {
  546. "Supernode-0": {"color": "Red"},
  547. "Supernode-1": {"color": "Blue"},
  548. "Supernode-2": {"color": "Yellow"},
  549. "Supernode-3": {"color": "Blue"},
  550. }
  551. edges = [
  552. ("Supernode-0", "Supernode-1", ["Weak", "Strong"]),
  553. ("Supernode-0", "Supernode-2", ["Weak", "Strong"]),
  554. ("Supernode-1", "Supernode-2", ["Strong"]),
  555. ("Supernode-1", "Supernode-3", ["Weak", "Strong"]),
  556. ("Supernode-3", "Supernode-2", ["Strong"]),
  557. ]
  558. G = nx.MultiDiGraph()
  559. for node in nodes:
  560. attributes = nodes[node]
  561. G.add_node(node, **attributes)
  562. for source, target, types in edges:
  563. for type in types:
  564. G.add_edge(source, target, type=type)
  565. supernodes = {
  566. "Supernode-0": {"A", "B"},
  567. "Supernode-1": {"C", "D"},
  568. "Supernode-2": {"E", "F"},
  569. "Supernode-3": {"G", "H"},
  570. }
  571. nx.set_node_attributes(G, supernodes, "group")
  572. return G