test_convert_pandas.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. import pytest
  2. import networkx as nx
  3. from networkx.utils import edges_equal, graphs_equal, nodes_equal
  4. np = pytest.importorskip("numpy")
  5. pd = pytest.importorskip("pandas")
  6. class TestConvertPandas:
  7. def setup_method(self):
  8. self.rng = np.random.RandomState(seed=5)
  9. ints = self.rng.randint(1, 11, size=(3, 2))
  10. a = ["A", "B", "C"]
  11. b = ["D", "A", "E"]
  12. df = pd.DataFrame(ints, columns=["weight", "cost"])
  13. df[0] = a # Column label 0 (int)
  14. df["b"] = b # Column label 'b' (str)
  15. self.df = df
  16. mdf = pd.DataFrame([[4, 16, "A", "D"]], columns=["weight", "cost", 0, "b"])
  17. self.mdf = pd.concat([df, mdf])
  18. def test_exceptions(self):
  19. G = pd.DataFrame(["a"]) # adj
  20. pytest.raises(nx.NetworkXError, nx.to_networkx_graph, G)
  21. G = pd.DataFrame(["a", 0.0]) # elist
  22. pytest.raises(nx.NetworkXError, nx.to_networkx_graph, G)
  23. df = pd.DataFrame([[1, 1], [1, 0]], dtype=int, index=[1, 2], columns=["a", "b"])
  24. pytest.raises(nx.NetworkXError, nx.from_pandas_adjacency, df)
  25. def test_from_edgelist_all_attr(self):
  26. Gtrue = nx.Graph(
  27. [
  28. ("E", "C", {"cost": 9, "weight": 10}),
  29. ("B", "A", {"cost": 1, "weight": 7}),
  30. ("A", "D", {"cost": 7, "weight": 4}),
  31. ]
  32. )
  33. G = nx.from_pandas_edgelist(self.df, 0, "b", True)
  34. assert graphs_equal(G, Gtrue)
  35. # MultiGraph
  36. MGtrue = nx.MultiGraph(Gtrue)
  37. MGtrue.add_edge("A", "D", cost=16, weight=4)
  38. MG = nx.from_pandas_edgelist(self.mdf, 0, "b", True, nx.MultiGraph())
  39. assert graphs_equal(MG, MGtrue)
  40. def test_from_edgelist_multi_attr(self):
  41. Gtrue = nx.Graph(
  42. [
  43. ("E", "C", {"cost": 9, "weight": 10}),
  44. ("B", "A", {"cost": 1, "weight": 7}),
  45. ("A", "D", {"cost": 7, "weight": 4}),
  46. ]
  47. )
  48. G = nx.from_pandas_edgelist(self.df, 0, "b", ["weight", "cost"])
  49. assert graphs_equal(G, Gtrue)
  50. def test_from_edgelist_multi_attr_incl_target(self):
  51. Gtrue = nx.Graph(
  52. [
  53. ("E", "C", {0: "C", "b": "E", "weight": 10}),
  54. ("B", "A", {0: "B", "b": "A", "weight": 7}),
  55. ("A", "D", {0: "A", "b": "D", "weight": 4}),
  56. ]
  57. )
  58. G = nx.from_pandas_edgelist(self.df, 0, "b", [0, "b", "weight"])
  59. assert graphs_equal(G, Gtrue)
  60. def test_from_edgelist_multidigraph_and_edge_attr(self):
  61. # example from issue #2374
  62. edges = [
  63. ("X1", "X4", {"Co": "zA", "Mi": 0, "St": "X1"}),
  64. ("X1", "X4", {"Co": "zB", "Mi": 54, "St": "X2"}),
  65. ("X1", "X4", {"Co": "zB", "Mi": 49, "St": "X3"}),
  66. ("X1", "X4", {"Co": "zB", "Mi": 44, "St": "X4"}),
  67. ("Y1", "Y3", {"Co": "zC", "Mi": 0, "St": "Y1"}),
  68. ("Y1", "Y3", {"Co": "zC", "Mi": 34, "St": "Y2"}),
  69. ("Y1", "Y3", {"Co": "zC", "Mi": 29, "St": "X2"}),
  70. ("Y1", "Y3", {"Co": "zC", "Mi": 24, "St": "Y3"}),
  71. ("Z1", "Z3", {"Co": "zD", "Mi": 0, "St": "Z1"}),
  72. ("Z1", "Z3", {"Co": "zD", "Mi": 14, "St": "X3"}),
  73. ]
  74. Gtrue = nx.MultiDiGraph(edges)
  75. data = {
  76. "O": ["X1", "X1", "X1", "X1", "Y1", "Y1", "Y1", "Y1", "Z1", "Z1"],
  77. "D": ["X4", "X4", "X4", "X4", "Y3", "Y3", "Y3", "Y3", "Z3", "Z3"],
  78. "St": ["X1", "X2", "X3", "X4", "Y1", "Y2", "X2", "Y3", "Z1", "X3"],
  79. "Co": ["zA", "zB", "zB", "zB", "zC", "zC", "zC", "zC", "zD", "zD"],
  80. "Mi": [0, 54, 49, 44, 0, 34, 29, 24, 0, 14],
  81. }
  82. df = pd.DataFrame.from_dict(data)
  83. G1 = nx.from_pandas_edgelist(
  84. df, source="O", target="D", edge_attr=True, create_using=nx.MultiDiGraph
  85. )
  86. G2 = nx.from_pandas_edgelist(
  87. df,
  88. source="O",
  89. target="D",
  90. edge_attr=["St", "Co", "Mi"],
  91. create_using=nx.MultiDiGraph,
  92. )
  93. assert graphs_equal(G1, Gtrue)
  94. assert graphs_equal(G2, Gtrue)
  95. def test_from_edgelist_one_attr(self):
  96. Gtrue = nx.Graph(
  97. [
  98. ("E", "C", {"weight": 10}),
  99. ("B", "A", {"weight": 7}),
  100. ("A", "D", {"weight": 4}),
  101. ]
  102. )
  103. G = nx.from_pandas_edgelist(self.df, 0, "b", "weight")
  104. assert graphs_equal(G, Gtrue)
  105. def test_from_edgelist_int_attr_name(self):
  106. # note: this also tests that edge_attr can be `source`
  107. Gtrue = nx.Graph(
  108. [("E", "C", {0: "C"}), ("B", "A", {0: "B"}), ("A", "D", {0: "A"})]
  109. )
  110. G = nx.from_pandas_edgelist(self.df, 0, "b", 0)
  111. assert graphs_equal(G, Gtrue)
  112. def test_from_edgelist_invalid_attr(self):
  113. pytest.raises(
  114. nx.NetworkXError, nx.from_pandas_edgelist, self.df, 0, "b", "misspell"
  115. )
  116. pytest.raises(nx.NetworkXError, nx.from_pandas_edgelist, self.df, 0, "b", 1)
  117. # see Issue #3562
  118. edgeframe = pd.DataFrame([[0, 1], [1, 2], [2, 0]], columns=["s", "t"])
  119. pytest.raises(
  120. nx.NetworkXError, nx.from_pandas_edgelist, edgeframe, "s", "t", True
  121. )
  122. pytest.raises(
  123. nx.NetworkXError, nx.from_pandas_edgelist, edgeframe, "s", "t", "weight"
  124. )
  125. pytest.raises(
  126. nx.NetworkXError,
  127. nx.from_pandas_edgelist,
  128. edgeframe,
  129. "s",
  130. "t",
  131. ["weight", "size"],
  132. )
  133. def test_from_edgelist_no_attr(self):
  134. Gtrue = nx.Graph([("E", "C", {}), ("B", "A", {}), ("A", "D", {})])
  135. G = nx.from_pandas_edgelist(self.df, 0, "b")
  136. assert graphs_equal(G, Gtrue)
  137. def test_from_edgelist(self):
  138. # Pandas DataFrame
  139. G = nx.cycle_graph(10)
  140. G.add_weighted_edges_from((u, v, u) for u, v in list(G.edges))
  141. edgelist = nx.to_edgelist(G)
  142. source = [s for s, t, d in edgelist]
  143. target = [t for s, t, d in edgelist]
  144. weight = [d["weight"] for s, t, d in edgelist]
  145. edges = pd.DataFrame({"source": source, "target": target, "weight": weight})
  146. GG = nx.from_pandas_edgelist(edges, edge_attr="weight")
  147. assert nodes_equal(G.nodes(), GG.nodes())
  148. assert edges_equal(G.edges(), GG.edges())
  149. GW = nx.to_networkx_graph(edges, create_using=nx.Graph)
  150. assert nodes_equal(G.nodes(), GW.nodes())
  151. assert edges_equal(G.edges(), GW.edges())
  152. def test_to_edgelist_default_source_or_target_col_exists(self):
  153. G = nx.path_graph(10)
  154. G.add_weighted_edges_from((u, v, u) for u, v in list(G.edges))
  155. nx.set_edge_attributes(G, 0, name="source")
  156. pytest.raises(nx.NetworkXError, nx.to_pandas_edgelist, G)
  157. # drop source column to test an exception raised for the target column
  158. for u, v, d in G.edges(data=True):
  159. d.pop("source", None)
  160. nx.set_edge_attributes(G, 0, name="target")
  161. pytest.raises(nx.NetworkXError, nx.to_pandas_edgelist, G)
  162. def test_to_edgelist_custom_source_or_target_col_exists(self):
  163. G = nx.path_graph(10)
  164. G.add_weighted_edges_from((u, v, u) for u, v in list(G.edges))
  165. nx.set_edge_attributes(G, 0, name="source_col_name")
  166. pytest.raises(
  167. nx.NetworkXError, nx.to_pandas_edgelist, G, source="source_col_name"
  168. )
  169. # drop source column to test an exception raised for the target column
  170. for u, v, d in G.edges(data=True):
  171. d.pop("source_col_name", None)
  172. nx.set_edge_attributes(G, 0, name="target_col_name")
  173. pytest.raises(
  174. nx.NetworkXError, nx.to_pandas_edgelist, G, target="target_col_name"
  175. )
  176. def test_to_edgelist_edge_key_col_exists(self):
  177. G = nx.path_graph(10, create_using=nx.MultiGraph)
  178. G.add_weighted_edges_from((u, v, u) for u, v in list(G.edges()))
  179. nx.set_edge_attributes(G, 0, name="edge_key_name")
  180. pytest.raises(
  181. nx.NetworkXError, nx.to_pandas_edgelist, G, edge_key="edge_key_name"
  182. )
  183. def test_from_adjacency(self):
  184. nodelist = [1, 2]
  185. dftrue = pd.DataFrame(
  186. [[1, 1], [1, 0]], dtype=int, index=nodelist, columns=nodelist
  187. )
  188. G = nx.Graph([(1, 1), (1, 2)])
  189. df = nx.to_pandas_adjacency(G, dtype=int)
  190. pd.testing.assert_frame_equal(df, dftrue)
  191. @pytest.mark.parametrize("graph", [nx.Graph, nx.MultiGraph])
  192. def test_roundtrip(self, graph):
  193. # edgelist
  194. Gtrue = graph([(1, 1), (1, 2)])
  195. df = nx.to_pandas_edgelist(Gtrue)
  196. G = nx.from_pandas_edgelist(df, create_using=graph)
  197. assert graphs_equal(Gtrue, G)
  198. # adjacency
  199. adj = {1: {1: {"weight": 1}, 2: {"weight": 1}}, 2: {1: {"weight": 1}}}
  200. Gtrue = graph(adj)
  201. df = nx.to_pandas_adjacency(Gtrue, dtype=int)
  202. G = nx.from_pandas_adjacency(df, create_using=graph)
  203. assert graphs_equal(Gtrue, G)
  204. def test_from_adjacency_named(self):
  205. # example from issue #3105
  206. data = {
  207. "A": {"A": 0, "B": 0, "C": 0},
  208. "B": {"A": 1, "B": 0, "C": 0},
  209. "C": {"A": 0, "B": 1, "C": 0},
  210. }
  211. dftrue = pd.DataFrame(data, dtype=np.intp)
  212. df = dftrue[["A", "C", "B"]]
  213. G = nx.from_pandas_adjacency(df, create_using=nx.DiGraph())
  214. df = nx.to_pandas_adjacency(G, dtype=np.intp)
  215. pd.testing.assert_frame_equal(df, dftrue)
  216. def test_edgekey_with_multigraph(self):
  217. df = pd.DataFrame(
  218. {
  219. "source": {"A": "N1", "B": "N2", "C": "N1", "D": "N1"},
  220. "target": {"A": "N2", "B": "N3", "C": "N1", "D": "N2"},
  221. "attr1": {"A": "F1", "B": "F2", "C": "F3", "D": "F4"},
  222. "attr2": {"A": 1, "B": 0, "C": 0, "D": 0},
  223. "attr3": {"A": 0, "B": 1, "C": 0, "D": 1},
  224. }
  225. )
  226. Gtrue = nx.MultiGraph(
  227. [
  228. ("N1", "N2", "F1", {"attr2": 1, "attr3": 0}),
  229. ("N2", "N3", "F2", {"attr2": 0, "attr3": 1}),
  230. ("N1", "N1", "F3", {"attr2": 0, "attr3": 0}),
  231. ("N1", "N2", "F4", {"attr2": 0, "attr3": 1}),
  232. ]
  233. )
  234. # example from issue #4065
  235. G = nx.from_pandas_edgelist(
  236. df,
  237. source="source",
  238. target="target",
  239. edge_attr=["attr2", "attr3"],
  240. edge_key="attr1",
  241. create_using=nx.MultiGraph(),
  242. )
  243. assert graphs_equal(G, Gtrue)
  244. df_roundtrip = nx.to_pandas_edgelist(G, edge_key="attr1")
  245. df_roundtrip = df_roundtrip.sort_values("attr1")
  246. df_roundtrip.index = ["A", "B", "C", "D"]
  247. pd.testing.assert_frame_equal(
  248. df, df_roundtrip[["source", "target", "attr1", "attr2", "attr3"]]
  249. )
  250. def test_edgekey_with_normal_graph_no_action(self):
  251. Gtrue = nx.Graph(
  252. [
  253. ("E", "C", {"cost": 9, "weight": 10}),
  254. ("B", "A", {"cost": 1, "weight": 7}),
  255. ("A", "D", {"cost": 7, "weight": 4}),
  256. ]
  257. )
  258. G = nx.from_pandas_edgelist(self.df, 0, "b", True, edge_key="weight")
  259. assert graphs_equal(G, Gtrue)
  260. def test_nonexisting_edgekey_raises(self):
  261. with pytest.raises(nx.exception.NetworkXError):
  262. nx.from_pandas_edgelist(
  263. self.df,
  264. source="source",
  265. target="target",
  266. edge_key="Not_real",
  267. edge_attr=True,
  268. create_using=nx.MultiGraph(),
  269. )
  270. def test_to_pandas_adjacency_with_nodelist():
  271. G = nx.complete_graph(5)
  272. nodelist = [1, 4]
  273. expected = pd.DataFrame(
  274. [[0, 1], [1, 0]], dtype=int, index=nodelist, columns=nodelist
  275. )
  276. pd.testing.assert_frame_equal(
  277. expected, nx.to_pandas_adjacency(G, nodelist, dtype=int)
  278. )
  279. def test_to_pandas_edgelist_with_nodelist():
  280. G = nx.Graph()
  281. G.add_edges_from([(0, 1), (1, 2), (1, 3)], weight=2.0)
  282. G.add_edge(0, 5, weight=100)
  283. df = nx.to_pandas_edgelist(G, nodelist=[1, 2])
  284. assert 0 not in df["source"].to_numpy()
  285. assert 100 not in df["weight"].to_numpy()