test_similarity.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923
  1. import pytest
  2. import networkx as nx
  3. from networkx.algorithms.similarity import (
  4. graph_edit_distance,
  5. optimal_edit_paths,
  6. optimize_graph_edit_distance,
  7. )
  8. from networkx.generators.classic import (
  9. circular_ladder_graph,
  10. cycle_graph,
  11. path_graph,
  12. wheel_graph,
  13. )
  14. def nmatch(n1, n2):
  15. return n1 == n2
  16. def ematch(e1, e2):
  17. return e1 == e2
  18. def getCanonical():
  19. G = nx.Graph()
  20. G.add_node("A", label="A")
  21. G.add_node("B", label="B")
  22. G.add_node("C", label="C")
  23. G.add_node("D", label="D")
  24. G.add_edge("A", "B", label="a-b")
  25. G.add_edge("B", "C", label="b-c")
  26. G.add_edge("B", "D", label="b-d")
  27. return G
  28. class TestSimilarity:
  29. @classmethod
  30. def setup_class(cls):
  31. global np
  32. np = pytest.importorskip("numpy")
  33. pytest.importorskip("scipy")
  34. def test_graph_edit_distance_roots_and_timeout(self):
  35. G0 = nx.star_graph(5)
  36. G1 = G0.copy()
  37. pytest.raises(ValueError, graph_edit_distance, G0, G1, roots=[2])
  38. pytest.raises(ValueError, graph_edit_distance, G0, G1, roots=[2, 3, 4])
  39. pytest.raises(nx.NodeNotFound, graph_edit_distance, G0, G1, roots=(9, 3))
  40. pytest.raises(nx.NodeNotFound, graph_edit_distance, G0, G1, roots=(3, 9))
  41. pytest.raises(nx.NodeNotFound, graph_edit_distance, G0, G1, roots=(9, 9))
  42. assert graph_edit_distance(G0, G1, roots=(1, 2)) == 0
  43. assert graph_edit_distance(G0, G1, roots=(0, 1)) == 8
  44. assert graph_edit_distance(G0, G1, roots=(1, 2), timeout=5) == 0
  45. assert graph_edit_distance(G0, G1, roots=(0, 1), timeout=5) == 8
  46. assert graph_edit_distance(G0, G1, roots=(0, 1), timeout=0.0001) is None
  47. # test raise on 0 timeout
  48. pytest.raises(nx.NetworkXError, graph_edit_distance, G0, G1, timeout=0)
  49. def test_graph_edit_distance(self):
  50. G0 = nx.Graph()
  51. G1 = path_graph(6)
  52. G2 = cycle_graph(6)
  53. G3 = wheel_graph(7)
  54. assert graph_edit_distance(G0, G0) == 0
  55. assert graph_edit_distance(G0, G1) == 11
  56. assert graph_edit_distance(G1, G0) == 11
  57. assert graph_edit_distance(G0, G2) == 12
  58. assert graph_edit_distance(G2, G0) == 12
  59. assert graph_edit_distance(G0, G3) == 19
  60. assert graph_edit_distance(G3, G0) == 19
  61. assert graph_edit_distance(G1, G1) == 0
  62. assert graph_edit_distance(G1, G2) == 1
  63. assert graph_edit_distance(G2, G1) == 1
  64. assert graph_edit_distance(G1, G3) == 8
  65. assert graph_edit_distance(G3, G1) == 8
  66. assert graph_edit_distance(G2, G2) == 0
  67. assert graph_edit_distance(G2, G3) == 7
  68. assert graph_edit_distance(G3, G2) == 7
  69. assert graph_edit_distance(G3, G3) == 0
  70. def test_graph_edit_distance_node_match(self):
  71. G1 = cycle_graph(5)
  72. G2 = cycle_graph(5)
  73. for n, attr in G1.nodes.items():
  74. attr["color"] = "red" if n % 2 == 0 else "blue"
  75. for n, attr in G2.nodes.items():
  76. attr["color"] = "red" if n % 2 == 1 else "blue"
  77. assert graph_edit_distance(G1, G2) == 0
  78. assert (
  79. graph_edit_distance(
  80. G1, G2, node_match=lambda n1, n2: n1["color"] == n2["color"]
  81. )
  82. == 1
  83. )
  84. def test_graph_edit_distance_edge_match(self):
  85. G1 = path_graph(6)
  86. G2 = path_graph(6)
  87. for e, attr in G1.edges.items():
  88. attr["color"] = "red" if min(e) % 2 == 0 else "blue"
  89. for e, attr in G2.edges.items():
  90. attr["color"] = "red" if min(e) // 3 == 0 else "blue"
  91. assert graph_edit_distance(G1, G2) == 0
  92. assert (
  93. graph_edit_distance(
  94. G1, G2, edge_match=lambda e1, e2: e1["color"] == e2["color"]
  95. )
  96. == 2
  97. )
  98. def test_graph_edit_distance_node_cost(self):
  99. G1 = path_graph(6)
  100. G2 = path_graph(6)
  101. for n, attr in G1.nodes.items():
  102. attr["color"] = "red" if n % 2 == 0 else "blue"
  103. for n, attr in G2.nodes.items():
  104. attr["color"] = "red" if n % 2 == 1 else "blue"
  105. def node_subst_cost(uattr, vattr):
  106. if uattr["color"] == vattr["color"]:
  107. return 1
  108. else:
  109. return 10
  110. def node_del_cost(attr):
  111. if attr["color"] == "blue":
  112. return 20
  113. else:
  114. return 50
  115. def node_ins_cost(attr):
  116. if attr["color"] == "blue":
  117. return 40
  118. else:
  119. return 100
  120. assert (
  121. graph_edit_distance(
  122. G1,
  123. G2,
  124. node_subst_cost=node_subst_cost,
  125. node_del_cost=node_del_cost,
  126. node_ins_cost=node_ins_cost,
  127. )
  128. == 6
  129. )
  130. def test_graph_edit_distance_edge_cost(self):
  131. G1 = path_graph(6)
  132. G2 = path_graph(6)
  133. for e, attr in G1.edges.items():
  134. attr["color"] = "red" if min(e) % 2 == 0 else "blue"
  135. for e, attr in G2.edges.items():
  136. attr["color"] = "red" if min(e) // 3 == 0 else "blue"
  137. def edge_subst_cost(gattr, hattr):
  138. if gattr["color"] == hattr["color"]:
  139. return 0.01
  140. else:
  141. return 0.1
  142. def edge_del_cost(attr):
  143. if attr["color"] == "blue":
  144. return 0.2
  145. else:
  146. return 0.5
  147. def edge_ins_cost(attr):
  148. if attr["color"] == "blue":
  149. return 0.4
  150. else:
  151. return 1.0
  152. assert (
  153. graph_edit_distance(
  154. G1,
  155. G2,
  156. edge_subst_cost=edge_subst_cost,
  157. edge_del_cost=edge_del_cost,
  158. edge_ins_cost=edge_ins_cost,
  159. )
  160. == 0.23
  161. )
  162. def test_graph_edit_distance_upper_bound(self):
  163. G1 = circular_ladder_graph(2)
  164. G2 = circular_ladder_graph(6)
  165. assert graph_edit_distance(G1, G2, upper_bound=5) is None
  166. assert graph_edit_distance(G1, G2, upper_bound=24) == 22
  167. assert graph_edit_distance(G1, G2) == 22
  168. def test_optimal_edit_paths(self):
  169. G1 = path_graph(3)
  170. G2 = cycle_graph(3)
  171. paths, cost = optimal_edit_paths(G1, G2)
  172. assert cost == 1
  173. assert len(paths) == 6
  174. def canonical(vertex_path, edge_path):
  175. return (
  176. tuple(sorted(vertex_path)),
  177. tuple(sorted(edge_path, key=lambda x: (None in x, x))),
  178. )
  179. expected_paths = [
  180. (
  181. [(0, 0), (1, 1), (2, 2)],
  182. [((0, 1), (0, 1)), ((1, 2), (1, 2)), (None, (0, 2))],
  183. ),
  184. (
  185. [(0, 0), (1, 2), (2, 1)],
  186. [((0, 1), (0, 2)), ((1, 2), (1, 2)), (None, (0, 1))],
  187. ),
  188. (
  189. [(0, 1), (1, 0), (2, 2)],
  190. [((0, 1), (0, 1)), ((1, 2), (0, 2)), (None, (1, 2))],
  191. ),
  192. (
  193. [(0, 1), (1, 2), (2, 0)],
  194. [((0, 1), (1, 2)), ((1, 2), (0, 2)), (None, (0, 1))],
  195. ),
  196. (
  197. [(0, 2), (1, 0), (2, 1)],
  198. [((0, 1), (0, 2)), ((1, 2), (0, 1)), (None, (1, 2))],
  199. ),
  200. (
  201. [(0, 2), (1, 1), (2, 0)],
  202. [((0, 1), (1, 2)), ((1, 2), (0, 1)), (None, (0, 2))],
  203. ),
  204. ]
  205. assert {canonical(*p) for p in paths} == {canonical(*p) for p in expected_paths}
  206. def test_optimize_graph_edit_distance(self):
  207. G1 = circular_ladder_graph(2)
  208. G2 = circular_ladder_graph(6)
  209. bestcost = 1000
  210. for cost in optimize_graph_edit_distance(G1, G2):
  211. assert cost < bestcost
  212. bestcost = cost
  213. assert bestcost == 22
  214. # def test_graph_edit_distance_bigger(self):
  215. # G1 = circular_ladder_graph(12)
  216. # G2 = circular_ladder_graph(16)
  217. # assert_equal(graph_edit_distance(G1, G2), 22)
  218. def test_selfloops(self):
  219. G0 = nx.Graph()
  220. G1 = nx.Graph()
  221. G1.add_edges_from((("A", "A"), ("A", "B")))
  222. G2 = nx.Graph()
  223. G2.add_edges_from((("A", "B"), ("B", "B")))
  224. G3 = nx.Graph()
  225. G3.add_edges_from((("A", "A"), ("A", "B"), ("B", "B")))
  226. assert graph_edit_distance(G0, G0) == 0
  227. assert graph_edit_distance(G0, G1) == 4
  228. assert graph_edit_distance(G1, G0) == 4
  229. assert graph_edit_distance(G0, G2) == 4
  230. assert graph_edit_distance(G2, G0) == 4
  231. assert graph_edit_distance(G0, G3) == 5
  232. assert graph_edit_distance(G3, G0) == 5
  233. assert graph_edit_distance(G1, G1) == 0
  234. assert graph_edit_distance(G1, G2) == 0
  235. assert graph_edit_distance(G2, G1) == 0
  236. assert graph_edit_distance(G1, G3) == 1
  237. assert graph_edit_distance(G3, G1) == 1
  238. assert graph_edit_distance(G2, G2) == 0
  239. assert graph_edit_distance(G2, G3) == 1
  240. assert graph_edit_distance(G3, G2) == 1
  241. assert graph_edit_distance(G3, G3) == 0
  242. def test_digraph(self):
  243. G0 = nx.DiGraph()
  244. G1 = nx.DiGraph()
  245. G1.add_edges_from((("A", "B"), ("B", "C"), ("C", "D"), ("D", "A")))
  246. G2 = nx.DiGraph()
  247. G2.add_edges_from((("A", "B"), ("B", "C"), ("C", "D"), ("A", "D")))
  248. G3 = nx.DiGraph()
  249. G3.add_edges_from((("A", "B"), ("A", "C"), ("B", "D"), ("C", "D")))
  250. assert graph_edit_distance(G0, G0) == 0
  251. assert graph_edit_distance(G0, G1) == 8
  252. assert graph_edit_distance(G1, G0) == 8
  253. assert graph_edit_distance(G0, G2) == 8
  254. assert graph_edit_distance(G2, G0) == 8
  255. assert graph_edit_distance(G0, G3) == 8
  256. assert graph_edit_distance(G3, G0) == 8
  257. assert graph_edit_distance(G1, G1) == 0
  258. assert graph_edit_distance(G1, G2) == 2
  259. assert graph_edit_distance(G2, G1) == 2
  260. assert graph_edit_distance(G1, G3) == 4
  261. assert graph_edit_distance(G3, G1) == 4
  262. assert graph_edit_distance(G2, G2) == 0
  263. assert graph_edit_distance(G2, G3) == 2
  264. assert graph_edit_distance(G3, G2) == 2
  265. assert graph_edit_distance(G3, G3) == 0
  266. def test_multigraph(self):
  267. G0 = nx.MultiGraph()
  268. G1 = nx.MultiGraph()
  269. G1.add_edges_from((("A", "B"), ("B", "C"), ("A", "C")))
  270. G2 = nx.MultiGraph()
  271. G2.add_edges_from((("A", "B"), ("B", "C"), ("B", "C"), ("A", "C")))
  272. G3 = nx.MultiGraph()
  273. G3.add_edges_from((("A", "B"), ("B", "C"), ("A", "C"), ("A", "C"), ("A", "C")))
  274. assert graph_edit_distance(G0, G0) == 0
  275. assert graph_edit_distance(G0, G1) == 6
  276. assert graph_edit_distance(G1, G0) == 6
  277. assert graph_edit_distance(G0, G2) == 7
  278. assert graph_edit_distance(G2, G0) == 7
  279. assert graph_edit_distance(G0, G3) == 8
  280. assert graph_edit_distance(G3, G0) == 8
  281. assert graph_edit_distance(G1, G1) == 0
  282. assert graph_edit_distance(G1, G2) == 1
  283. assert graph_edit_distance(G2, G1) == 1
  284. assert graph_edit_distance(G1, G3) == 2
  285. assert graph_edit_distance(G3, G1) == 2
  286. assert graph_edit_distance(G2, G2) == 0
  287. assert graph_edit_distance(G2, G3) == 1
  288. assert graph_edit_distance(G3, G2) == 1
  289. assert graph_edit_distance(G3, G3) == 0
  290. def test_multidigraph(self):
  291. G1 = nx.MultiDiGraph()
  292. G1.add_edges_from(
  293. (
  294. ("hardware", "kernel"),
  295. ("kernel", "hardware"),
  296. ("kernel", "userspace"),
  297. ("userspace", "kernel"),
  298. )
  299. )
  300. G2 = nx.MultiDiGraph()
  301. G2.add_edges_from(
  302. (
  303. ("winter", "spring"),
  304. ("spring", "summer"),
  305. ("summer", "autumn"),
  306. ("autumn", "winter"),
  307. )
  308. )
  309. assert graph_edit_distance(G1, G2) == 5
  310. assert graph_edit_distance(G2, G1) == 5
  311. # by https://github.com/jfbeaumont
  312. def testCopy(self):
  313. G = nx.Graph()
  314. G.add_node("A", label="A")
  315. G.add_node("B", label="B")
  316. G.add_edge("A", "B", label="a-b")
  317. assert (
  318. graph_edit_distance(G, G.copy(), node_match=nmatch, edge_match=ematch) == 0
  319. )
  320. def testSame(self):
  321. G1 = nx.Graph()
  322. G1.add_node("A", label="A")
  323. G1.add_node("B", label="B")
  324. G1.add_edge("A", "B", label="a-b")
  325. G2 = nx.Graph()
  326. G2.add_node("A", label="A")
  327. G2.add_node("B", label="B")
  328. G2.add_edge("A", "B", label="a-b")
  329. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 0
  330. def testOneEdgeLabelDiff(self):
  331. G1 = nx.Graph()
  332. G1.add_node("A", label="A")
  333. G1.add_node("B", label="B")
  334. G1.add_edge("A", "B", label="a-b")
  335. G2 = nx.Graph()
  336. G2.add_node("A", label="A")
  337. G2.add_node("B", label="B")
  338. G2.add_edge("A", "B", label="bad")
  339. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1
  340. def testOneNodeLabelDiff(self):
  341. G1 = nx.Graph()
  342. G1.add_node("A", label="A")
  343. G1.add_node("B", label="B")
  344. G1.add_edge("A", "B", label="a-b")
  345. G2 = nx.Graph()
  346. G2.add_node("A", label="Z")
  347. G2.add_node("B", label="B")
  348. G2.add_edge("A", "B", label="a-b")
  349. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1
  350. def testOneExtraNode(self):
  351. G1 = nx.Graph()
  352. G1.add_node("A", label="A")
  353. G1.add_node("B", label="B")
  354. G1.add_edge("A", "B", label="a-b")
  355. G2 = nx.Graph()
  356. G2.add_node("A", label="A")
  357. G2.add_node("B", label="B")
  358. G2.add_edge("A", "B", label="a-b")
  359. G2.add_node("C", label="C")
  360. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1
  361. def testOneExtraEdge(self):
  362. G1 = nx.Graph()
  363. G1.add_node("A", label="A")
  364. G1.add_node("B", label="B")
  365. G1.add_node("C", label="C")
  366. G1.add_node("C", label="C")
  367. G1.add_edge("A", "B", label="a-b")
  368. G2 = nx.Graph()
  369. G2.add_node("A", label="A")
  370. G2.add_node("B", label="B")
  371. G2.add_node("C", label="C")
  372. G2.add_edge("A", "B", label="a-b")
  373. G2.add_edge("A", "C", label="a-c")
  374. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1
  375. def testOneExtraNodeAndEdge(self):
  376. G1 = nx.Graph()
  377. G1.add_node("A", label="A")
  378. G1.add_node("B", label="B")
  379. G1.add_edge("A", "B", label="a-b")
  380. G2 = nx.Graph()
  381. G2.add_node("A", label="A")
  382. G2.add_node("B", label="B")
  383. G2.add_node("C", label="C")
  384. G2.add_edge("A", "B", label="a-b")
  385. G2.add_edge("A", "C", label="a-c")
  386. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 2
  387. def testGraph1(self):
  388. G1 = getCanonical()
  389. G2 = nx.Graph()
  390. G2.add_node("A", label="A")
  391. G2.add_node("B", label="B")
  392. G2.add_node("D", label="D")
  393. G2.add_node("E", label="E")
  394. G2.add_edge("A", "B", label="a-b")
  395. G2.add_edge("B", "D", label="b-d")
  396. G2.add_edge("D", "E", label="d-e")
  397. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 3
  398. def testGraph2(self):
  399. G1 = getCanonical()
  400. G2 = nx.Graph()
  401. G2.add_node("A", label="A")
  402. G2.add_node("B", label="B")
  403. G2.add_node("C", label="C")
  404. G2.add_node("D", label="D")
  405. G2.add_node("E", label="E")
  406. G2.add_edge("A", "B", label="a-b")
  407. G2.add_edge("B", "C", label="b-c")
  408. G2.add_edge("C", "D", label="c-d")
  409. G2.add_edge("C", "E", label="c-e")
  410. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 4
  411. def testGraph3(self):
  412. G1 = getCanonical()
  413. G2 = nx.Graph()
  414. G2.add_node("A", label="A")
  415. G2.add_node("B", label="B")
  416. G2.add_node("C", label="C")
  417. G2.add_node("D", label="D")
  418. G2.add_node("E", label="E")
  419. G2.add_node("F", label="F")
  420. G2.add_node("G", label="G")
  421. G2.add_edge("A", "C", label="a-c")
  422. G2.add_edge("A", "D", label="a-d")
  423. G2.add_edge("D", "E", label="d-e")
  424. G2.add_edge("D", "F", label="d-f")
  425. G2.add_edge("D", "G", label="d-g")
  426. G2.add_edge("E", "B", label="e-b")
  427. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 12
  428. def testGraph4(self):
  429. G1 = getCanonical()
  430. G2 = nx.Graph()
  431. G2.add_node("A", label="A")
  432. G2.add_node("B", label="B")
  433. G2.add_node("C", label="C")
  434. G2.add_node("D", label="D")
  435. G2.add_edge("A", "B", label="a-b")
  436. G2.add_edge("B", "C", label="b-c")
  437. G2.add_edge("C", "D", label="c-d")
  438. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 2
  439. def testGraph4_a(self):
  440. G1 = getCanonical()
  441. G2 = nx.Graph()
  442. G2.add_node("A", label="A")
  443. G2.add_node("B", label="B")
  444. G2.add_node("C", label="C")
  445. G2.add_node("D", label="D")
  446. G2.add_edge("A", "B", label="a-b")
  447. G2.add_edge("B", "C", label="b-c")
  448. G2.add_edge("A", "D", label="a-d")
  449. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 2
  450. def testGraph4_b(self):
  451. G1 = getCanonical()
  452. G2 = nx.Graph()
  453. G2.add_node("A", label="A")
  454. G2.add_node("B", label="B")
  455. G2.add_node("C", label="C")
  456. G2.add_node("D", label="D")
  457. G2.add_edge("A", "B", label="a-b")
  458. G2.add_edge("B", "C", label="b-c")
  459. G2.add_edge("B", "D", label="bad")
  460. assert graph_edit_distance(G1, G2, node_match=nmatch, edge_match=ematch) == 1
  461. # note: nx.simrank_similarity_numpy not included because returns np.array
  462. simrank_algs = [
  463. nx.simrank_similarity,
  464. nx.algorithms.similarity._simrank_similarity_python,
  465. ]
  466. @pytest.mark.parametrize("simrank_similarity", simrank_algs)
  467. def test_simrank_no_source_no_target(self, simrank_similarity):
  468. G = nx.cycle_graph(5)
  469. expected = {
  470. 0: {
  471. 0: 1,
  472. 1: 0.3951219505902448,
  473. 2: 0.5707317069281646,
  474. 3: 0.5707317069281646,
  475. 4: 0.3951219505902449,
  476. },
  477. 1: {
  478. 0: 0.3951219505902448,
  479. 1: 1,
  480. 2: 0.3951219505902449,
  481. 3: 0.5707317069281646,
  482. 4: 0.5707317069281646,
  483. },
  484. 2: {
  485. 0: 0.5707317069281646,
  486. 1: 0.3951219505902449,
  487. 2: 1,
  488. 3: 0.3951219505902449,
  489. 4: 0.5707317069281646,
  490. },
  491. 3: {
  492. 0: 0.5707317069281646,
  493. 1: 0.5707317069281646,
  494. 2: 0.3951219505902449,
  495. 3: 1,
  496. 4: 0.3951219505902449,
  497. },
  498. 4: {
  499. 0: 0.3951219505902449,
  500. 1: 0.5707317069281646,
  501. 2: 0.5707317069281646,
  502. 3: 0.3951219505902449,
  503. 4: 1,
  504. },
  505. }
  506. actual = simrank_similarity(G)
  507. for k, v in expected.items():
  508. assert v == pytest.approx(actual[k], abs=1e-2)
  509. # For a DiGraph test, use the first graph from the paper cited in
  510. # the docs: https://dl.acm.org/doi/pdf/10.1145/775047.775126
  511. G = nx.DiGraph()
  512. G.add_node(0, label="Univ")
  513. G.add_node(1, label="ProfA")
  514. G.add_node(2, label="ProfB")
  515. G.add_node(3, label="StudentA")
  516. G.add_node(4, label="StudentB")
  517. G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 4), (4, 2), (3, 0)])
  518. expected = {
  519. 0: {0: 1, 1: 0.0, 2: 0.1323363991265798, 3: 0.0, 4: 0.03387811817640443},
  520. 1: {0: 0.0, 1: 1, 2: 0.4135512472705618, 3: 0.0, 4: 0.10586911930126384},
  521. 2: {
  522. 0: 0.1323363991265798,
  523. 1: 0.4135512472705618,
  524. 2: 1,
  525. 3: 0.04234764772050554,
  526. 4: 0.08822426608438655,
  527. },
  528. 3: {0: 0.0, 1: 0.0, 2: 0.04234764772050554, 3: 1, 4: 0.3308409978164495},
  529. 4: {
  530. 0: 0.03387811817640443,
  531. 1: 0.10586911930126384,
  532. 2: 0.08822426608438655,
  533. 3: 0.3308409978164495,
  534. 4: 1,
  535. },
  536. }
  537. # Use the importance_factor from the paper to get the same numbers.
  538. actual = simrank_similarity(G, importance_factor=0.8)
  539. for k, v in expected.items():
  540. assert v == pytest.approx(actual[k], abs=1e-2)
  541. @pytest.mark.parametrize("simrank_similarity", simrank_algs)
  542. def test_simrank_source_no_target(self, simrank_similarity):
  543. G = nx.cycle_graph(5)
  544. expected = {
  545. 0: 1,
  546. 1: 0.3951219505902448,
  547. 2: 0.5707317069281646,
  548. 3: 0.5707317069281646,
  549. 4: 0.3951219505902449,
  550. }
  551. actual = simrank_similarity(G, source=0)
  552. assert expected == pytest.approx(actual, abs=1e-2)
  553. # For a DiGraph test, use the first graph from the paper cited in
  554. # the docs: https://dl.acm.org/doi/pdf/10.1145/775047.775126
  555. G = nx.DiGraph()
  556. G.add_node(0, label="Univ")
  557. G.add_node(1, label="ProfA")
  558. G.add_node(2, label="ProfB")
  559. G.add_node(3, label="StudentA")
  560. G.add_node(4, label="StudentB")
  561. G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 4), (4, 2), (3, 0)])
  562. expected = {0: 1, 1: 0.0, 2: 0.1323363991265798, 3: 0.0, 4: 0.03387811817640443}
  563. # Use the importance_factor from the paper to get the same numbers.
  564. actual = simrank_similarity(G, importance_factor=0.8, source=0)
  565. assert expected == pytest.approx(actual, abs=1e-2)
  566. @pytest.mark.parametrize("simrank_similarity", simrank_algs)
  567. def test_simrank_noninteger_nodes(self, simrank_similarity):
  568. G = nx.cycle_graph(5)
  569. G = nx.relabel_nodes(G, dict(enumerate("abcde")))
  570. expected = {
  571. "a": 1,
  572. "b": 0.3951219505902448,
  573. "c": 0.5707317069281646,
  574. "d": 0.5707317069281646,
  575. "e": 0.3951219505902449,
  576. }
  577. actual = simrank_similarity(G, source="a")
  578. assert expected == pytest.approx(actual, abs=1e-2)
  579. # For a DiGraph test, use the first graph from the paper cited in
  580. # the docs: https://dl.acm.org/doi/pdf/10.1145/775047.775126
  581. G = nx.DiGraph()
  582. G.add_node(0, label="Univ")
  583. G.add_node(1, label="ProfA")
  584. G.add_node(2, label="ProfB")
  585. G.add_node(3, label="StudentA")
  586. G.add_node(4, label="StudentB")
  587. G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 4), (4, 2), (3, 0)])
  588. node_labels = dict(enumerate(nx.get_node_attributes(G, "label").values()))
  589. G = nx.relabel_nodes(G, node_labels)
  590. expected = {
  591. "Univ": 1,
  592. "ProfA": 0.0,
  593. "ProfB": 0.1323363991265798,
  594. "StudentA": 0.0,
  595. "StudentB": 0.03387811817640443,
  596. }
  597. # Use the importance_factor from the paper to get the same numbers.
  598. actual = simrank_similarity(G, importance_factor=0.8, source="Univ")
  599. assert expected == pytest.approx(actual, abs=1e-2)
  600. @pytest.mark.parametrize("simrank_similarity", simrank_algs)
  601. def test_simrank_source_and_target(self, simrank_similarity):
  602. G = nx.cycle_graph(5)
  603. expected = 1
  604. actual = simrank_similarity(G, source=0, target=0)
  605. assert expected == pytest.approx(actual, abs=1e-2)
  606. # For a DiGraph test, use the first graph from the paper cited in
  607. # the docs: https://dl.acm.org/doi/pdf/10.1145/775047.775126
  608. G = nx.DiGraph()
  609. G.add_node(0, label="Univ")
  610. G.add_node(1, label="ProfA")
  611. G.add_node(2, label="ProfB")
  612. G.add_node(3, label="StudentA")
  613. G.add_node(4, label="StudentB")
  614. G.add_edges_from([(0, 1), (0, 2), (1, 3), (2, 4), (4, 2), (3, 0)])
  615. expected = 0.1323363991265798
  616. # Use the importance_factor from the paper to get the same numbers.
  617. # Use the pair (0,2) because (0,0) and (0,1) have trivial results.
  618. actual = simrank_similarity(G, importance_factor=0.8, source=0, target=2)
  619. assert expected == pytest.approx(actual, abs=1e-5)
  620. @pytest.mark.parametrize("alg", simrank_algs)
  621. def test_simrank_max_iterations(self, alg):
  622. G = nx.cycle_graph(5)
  623. pytest.raises(nx.ExceededMaxIterations, alg, G, max_iterations=10)
  624. def test_simrank_between_versions(self):
  625. G = nx.cycle_graph(5)
  626. # _python tolerance 1e-4
  627. expected_python_tol4 = {
  628. 0: 1,
  629. 1: 0.394512499239852,
  630. 2: 0.5703550452791322,
  631. 3: 0.5703550452791323,
  632. 4: 0.394512499239852,
  633. }
  634. # _numpy tolerance 1e-4
  635. expected_numpy_tol4 = {
  636. 0: 1.0,
  637. 1: 0.3947180735764555,
  638. 2: 0.570482097206368,
  639. 3: 0.570482097206368,
  640. 4: 0.3947180735764555,
  641. }
  642. actual = nx.simrank_similarity(G, source=0)
  643. assert expected_numpy_tol4 == pytest.approx(actual, abs=1e-7)
  644. # versions differ at 1e-4 level but equal at 1e-3
  645. assert expected_python_tol4 != pytest.approx(actual, abs=1e-4)
  646. assert expected_python_tol4 == pytest.approx(actual, abs=1e-3)
  647. actual = nx.similarity._simrank_similarity_python(G, source=0)
  648. assert expected_python_tol4 == pytest.approx(actual, abs=1e-7)
  649. # versions differ at 1e-4 level but equal at 1e-3
  650. assert expected_numpy_tol4 != pytest.approx(actual, abs=1e-4)
  651. assert expected_numpy_tol4 == pytest.approx(actual, abs=1e-3)
  652. def test_simrank_numpy_no_source_no_target(self):
  653. G = nx.cycle_graph(5)
  654. expected = np.array(
  655. [
  656. [
  657. 1.0,
  658. 0.3947180735764555,
  659. 0.570482097206368,
  660. 0.570482097206368,
  661. 0.3947180735764555,
  662. ],
  663. [
  664. 0.3947180735764555,
  665. 1.0,
  666. 0.3947180735764555,
  667. 0.570482097206368,
  668. 0.570482097206368,
  669. ],
  670. [
  671. 0.570482097206368,
  672. 0.3947180735764555,
  673. 1.0,
  674. 0.3947180735764555,
  675. 0.570482097206368,
  676. ],
  677. [
  678. 0.570482097206368,
  679. 0.570482097206368,
  680. 0.3947180735764555,
  681. 1.0,
  682. 0.3947180735764555,
  683. ],
  684. [
  685. 0.3947180735764555,
  686. 0.570482097206368,
  687. 0.570482097206368,
  688. 0.3947180735764555,
  689. 1.0,
  690. ],
  691. ]
  692. )
  693. actual = nx.similarity._simrank_similarity_numpy(G)
  694. np.testing.assert_allclose(expected, actual, atol=1e-7)
  695. def test_simrank_numpy_source_no_target(self):
  696. G = nx.cycle_graph(5)
  697. expected = np.array(
  698. [
  699. 1.0,
  700. 0.3947180735764555,
  701. 0.570482097206368,
  702. 0.570482097206368,
  703. 0.3947180735764555,
  704. ]
  705. )
  706. actual = nx.similarity._simrank_similarity_numpy(G, source=0)
  707. np.testing.assert_allclose(expected, actual, atol=1e-7)
  708. def test_simrank_numpy_source_and_target(self):
  709. G = nx.cycle_graph(5)
  710. expected = 1.0
  711. actual = nx.similarity._simrank_similarity_numpy(G, source=0, target=0)
  712. np.testing.assert_allclose(expected, actual, atol=1e-7)
  713. def test_panther_similarity_unweighted(self):
  714. np.random.seed(42)
  715. G = nx.Graph()
  716. G.add_edge(0, 1)
  717. G.add_edge(0, 2)
  718. G.add_edge(0, 3)
  719. G.add_edge(1, 2)
  720. G.add_edge(2, 4)
  721. expected = {3: 0.5, 2: 0.5, 1: 0.5, 4: 0.125}
  722. sim = nx.panther_similarity(G, 0, path_length=2)
  723. assert sim == expected
  724. def test_panther_similarity_weighted(self):
  725. np.random.seed(42)
  726. G = nx.Graph()
  727. G.add_edge("v1", "v2", weight=5)
  728. G.add_edge("v1", "v3", weight=1)
  729. G.add_edge("v1", "v4", weight=2)
  730. G.add_edge("v2", "v3", weight=0.1)
  731. G.add_edge("v3", "v5", weight=1)
  732. expected = {"v3": 0.75, "v4": 0.5, "v2": 0.5, "v5": 0.25}
  733. sim = nx.panther_similarity(G, "v1", path_length=2)
  734. assert sim == expected
  735. def test_generate_random_paths_unweighted(self):
  736. np.random.seed(42)
  737. index_map = {}
  738. num_paths = 10
  739. path_length = 2
  740. G = nx.Graph()
  741. G.add_edge(0, 1)
  742. G.add_edge(0, 2)
  743. G.add_edge(0, 3)
  744. G.add_edge(1, 2)
  745. G.add_edge(2, 4)
  746. paths = nx.generate_random_paths(
  747. G, num_paths, path_length=path_length, index_map=index_map
  748. )
  749. expected_paths = [
  750. [3, 0, 3],
  751. [4, 2, 1],
  752. [2, 1, 0],
  753. [2, 0, 3],
  754. [3, 0, 1],
  755. [3, 0, 1],
  756. [4, 2, 0],
  757. [2, 1, 0],
  758. [3, 0, 2],
  759. [2, 1, 2],
  760. ]
  761. expected_map = {
  762. 0: {0, 2, 3, 4, 5, 6, 7, 8},
  763. 1: {1, 2, 4, 5, 7, 9},
  764. 2: {1, 2, 3, 6, 7, 8, 9},
  765. 3: {0, 3, 4, 5, 8},
  766. 4: {1, 6},
  767. }
  768. assert expected_paths == list(paths)
  769. assert expected_map == index_map
  770. def test_generate_random_paths_weighted(self):
  771. np.random.seed(42)
  772. index_map = {}
  773. num_paths = 10
  774. path_length = 6
  775. G = nx.Graph()
  776. G.add_edge("a", "b", weight=0.6)
  777. G.add_edge("a", "c", weight=0.2)
  778. G.add_edge("c", "d", weight=0.1)
  779. G.add_edge("c", "e", weight=0.7)
  780. G.add_edge("c", "f", weight=0.9)
  781. G.add_edge("a", "d", weight=0.3)
  782. paths = nx.generate_random_paths(
  783. G, num_paths, path_length=path_length, index_map=index_map
  784. )
  785. expected_paths = [
  786. ["d", "c", "f", "c", "d", "a", "b"],
  787. ["e", "c", "f", "c", "f", "c", "e"],
  788. ["d", "a", "b", "a", "b", "a", "c"],
  789. ["b", "a", "d", "a", "b", "a", "b"],
  790. ["d", "a", "b", "a", "b", "a", "d"],
  791. ["d", "a", "b", "a", "b", "a", "c"],
  792. ["d", "a", "b", "a", "b", "a", "b"],
  793. ["f", "c", "f", "c", "f", "c", "e"],
  794. ["d", "a", "d", "a", "b", "a", "b"],
  795. ["e", "c", "f", "c", "e", "c", "d"],
  796. ]
  797. expected_map = {
  798. "d": {0, 2, 3, 4, 5, 6, 8, 9},
  799. "c": {0, 1, 2, 5, 7, 9},
  800. "f": {0, 1, 9, 7},
  801. "a": {0, 2, 3, 4, 5, 6, 8},
  802. "b": {0, 2, 3, 4, 5, 6, 8},
  803. "e": {1, 9, 7},
  804. }
  805. assert expected_paths == list(paths)
  806. assert expected_map == index_map
  807. def test_symmetry_with_custom_matching(self):
  808. print("G2 is edge (a,b) and G3 is edge (a,a)")
  809. print("but node order for G2 is (a,b) while for G3 it is (b,a)")
  810. a, b = "A", "B"
  811. G2 = nx.Graph()
  812. G2.add_nodes_from((a, b))
  813. G2.add_edges_from([(a, b)])
  814. G3 = nx.Graph()
  815. G3.add_nodes_from((b, a))
  816. G3.add_edges_from([(a, a)])
  817. for G in (G2, G3):
  818. for n in G:
  819. G.nodes[n]["attr"] = n
  820. for e in G.edges:
  821. G.edges[e]["attr"] = e
  822. match = lambda x, y: x == y
  823. print("Starting G2 to G3 GED calculation")
  824. assert nx.graph_edit_distance(G2, G3, node_match=match, edge_match=match) == 1
  825. print("Starting G3 to G2 GED calculation")
  826. assert nx.graph_edit_distance(G3, G2, node_match=match, edge_match=match) == 1