convert_matrix.py 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162
  1. """Functions to convert NetworkX graphs to and from common data containers
  2. like numpy arrays, scipy sparse arrays, and pandas DataFrames.
  3. The preferred way of converting data to a NetworkX graph is through the
  4. graph constructor. The constructor calls the `~networkx.convert.to_networkx_graph`
  5. function which attempts to guess the input type and convert it automatically.
  6. Examples
  7. --------
  8. Create a 10 node random graph from a numpy array
  9. >>> import numpy as np
  10. >>> rng = np.random.default_rng()
  11. >>> a = rng.integers(low=0, high=2, size=(10, 10))
  12. >>> DG = nx.from_numpy_array(a, create_using=nx.DiGraph)
  13. or equivalently:
  14. >>> DG = nx.DiGraph(a)
  15. which calls `from_numpy_array` internally based on the type of ``a``.
  16. See Also
  17. --------
  18. nx_agraph, nx_pydot
  19. """
  20. import itertools
  21. from collections import defaultdict
  22. import networkx as nx
  23. from networkx.utils import not_implemented_for
  24. __all__ = [
  25. "from_pandas_adjacency",
  26. "to_pandas_adjacency",
  27. "from_pandas_edgelist",
  28. "to_pandas_edgelist",
  29. "from_scipy_sparse_array",
  30. "to_scipy_sparse_array",
  31. "from_numpy_array",
  32. "to_numpy_array",
  33. ]
  34. def to_pandas_adjacency(
  35. G,
  36. nodelist=None,
  37. dtype=None,
  38. order=None,
  39. multigraph_weight=sum,
  40. weight="weight",
  41. nonedge=0.0,
  42. ):
  43. """Returns the graph adjacency matrix as a Pandas DataFrame.
  44. Parameters
  45. ----------
  46. G : graph
  47. The NetworkX graph used to construct the Pandas DataFrame.
  48. nodelist : list, optional
  49. The rows and columns are ordered according to the nodes in `nodelist`.
  50. If `nodelist` is None, then the ordering is produced by G.nodes().
  51. multigraph_weight : {sum, min, max}, optional
  52. An operator that determines how weights in multigraphs are handled.
  53. The default is to sum the weights of the multiple edges.
  54. weight : string or None, optional
  55. The edge attribute that holds the numerical value used for
  56. the edge weight. If an edge does not have that attribute, then the
  57. value 1 is used instead.
  58. nonedge : float, optional
  59. The matrix values corresponding to nonedges are typically set to zero.
  60. However, this could be undesirable if there are matrix values
  61. corresponding to actual edges that also have the value zero. If so,
  62. one might prefer nonedges to have some other value, such as nan.
  63. Returns
  64. -------
  65. df : Pandas DataFrame
  66. Graph adjacency matrix
  67. Notes
  68. -----
  69. For directed graphs, entry i,j corresponds to an edge from i to j.
  70. The DataFrame entries are assigned to the weight edge attribute. When
  71. an edge does not have a weight attribute, the value of the entry is set to
  72. the number 1. For multiple (parallel) edges, the values of the entries
  73. are determined by the 'multigraph_weight' parameter. The default is to
  74. sum the weight attributes for each of the parallel edges.
  75. When `nodelist` does not contain every node in `G`, the matrix is built
  76. from the subgraph of `G` that is induced by the nodes in `nodelist`.
  77. The convention used for self-loop edges in graphs is to assign the
  78. diagonal matrix entry value to the weight attribute of the edge
  79. (or the number 1 if the edge has no weight attribute). If the
  80. alternate convention of doubling the edge weight is desired the
  81. resulting Pandas DataFrame can be modified as follows:
  82. >>> import pandas as pd
  83. >>> pd.options.display.max_columns = 20
  84. >>> import numpy as np
  85. >>> G = nx.Graph([(1, 1)])
  86. >>> df = nx.to_pandas_adjacency(G, dtype=int)
  87. >>> df
  88. 1
  89. 1 1
  90. >>> df.values[np.diag_indices_from(df)] *= 2
  91. >>> df
  92. 1
  93. 1 2
  94. Examples
  95. --------
  96. >>> G = nx.MultiDiGraph()
  97. >>> G.add_edge(0, 1, weight=2)
  98. 0
  99. >>> G.add_edge(1, 0)
  100. 0
  101. >>> G.add_edge(2, 2, weight=3)
  102. 0
  103. >>> G.add_edge(2, 2)
  104. 1
  105. >>> nx.to_pandas_adjacency(G, nodelist=[0, 1, 2], dtype=int)
  106. 0 1 2
  107. 0 0 2 0
  108. 1 1 0 0
  109. 2 0 0 4
  110. """
  111. import pandas as pd
  112. M = to_numpy_array(
  113. G,
  114. nodelist=nodelist,
  115. dtype=dtype,
  116. order=order,
  117. multigraph_weight=multigraph_weight,
  118. weight=weight,
  119. nonedge=nonedge,
  120. )
  121. if nodelist is None:
  122. nodelist = list(G)
  123. return pd.DataFrame(data=M, index=nodelist, columns=nodelist)
  124. def from_pandas_adjacency(df, create_using=None):
  125. r"""Returns a graph from Pandas DataFrame.
  126. The Pandas DataFrame is interpreted as an adjacency matrix for the graph.
  127. Parameters
  128. ----------
  129. df : Pandas DataFrame
  130. An adjacency matrix representation of a graph
  131. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  132. Graph type to create. If graph instance, then cleared before populated.
  133. Notes
  134. -----
  135. For directed graphs, explicitly mention create_using=nx.DiGraph,
  136. and entry i,j of df corresponds to an edge from i to j.
  137. If `df` has a single data type for each entry it will be converted to an
  138. appropriate Python data type.
  139. If `df` has a user-specified compound data type the names
  140. of the data fields will be used as attribute keys in the resulting
  141. NetworkX graph.
  142. See Also
  143. --------
  144. to_pandas_adjacency
  145. Examples
  146. --------
  147. Simple integer weights on edges:
  148. >>> import pandas as pd
  149. >>> pd.options.display.max_columns = 20
  150. >>> df = pd.DataFrame([[1, 1], [2, 1]])
  151. >>> df
  152. 0 1
  153. 0 1 1
  154. 1 2 1
  155. >>> G = nx.from_pandas_adjacency(df)
  156. >>> G.name = "Graph from pandas adjacency matrix"
  157. >>> print(G)
  158. Graph named 'Graph from pandas adjacency matrix' with 2 nodes and 3 edges
  159. """
  160. try:
  161. df = df[df.index]
  162. except Exception as err:
  163. missing = list(set(df.index).difference(set(df.columns)))
  164. msg = f"{missing} not in columns"
  165. raise nx.NetworkXError("Columns must match Indices.", msg) from err
  166. A = df.values
  167. G = from_numpy_array(A, create_using=create_using)
  168. nx.relabel.relabel_nodes(G, dict(enumerate(df.columns)), copy=False)
  169. return G
  170. def to_pandas_edgelist(
  171. G,
  172. source="source",
  173. target="target",
  174. nodelist=None,
  175. dtype=None,
  176. edge_key=None,
  177. ):
  178. """Returns the graph edge list as a Pandas DataFrame.
  179. Parameters
  180. ----------
  181. G : graph
  182. The NetworkX graph used to construct the Pandas DataFrame.
  183. source : str or int, optional
  184. A valid column name (string or integer) for the source nodes (for the
  185. directed case).
  186. target : str or int, optional
  187. A valid column name (string or integer) for the target nodes (for the
  188. directed case).
  189. nodelist : list, optional
  190. Use only nodes specified in nodelist
  191. dtype : dtype, default None
  192. Use to create the DataFrame. Data type to force.
  193. Only a single dtype is allowed. If None, infer.
  194. edge_key : str or int or None, optional (default=None)
  195. A valid column name (string or integer) for the edge keys (for the
  196. multigraph case). If None, edge keys are not stored in the DataFrame.
  197. Returns
  198. -------
  199. df : Pandas DataFrame
  200. Graph edge list
  201. Examples
  202. --------
  203. >>> G = nx.Graph(
  204. ... [
  205. ... ("A", "B", {"cost": 1, "weight": 7}),
  206. ... ("C", "E", {"cost": 9, "weight": 10}),
  207. ... ]
  208. ... )
  209. >>> df = nx.to_pandas_edgelist(G, nodelist=["A", "C"])
  210. >>> df[["source", "target", "cost", "weight"]]
  211. source target cost weight
  212. 0 A B 1 7
  213. 1 C E 9 10
  214. >>> G = nx.MultiGraph([('A', 'B', {'cost': 1}), ('A', 'B', {'cost': 9})])
  215. >>> df = nx.to_pandas_edgelist(G, nodelist=['A', 'C'], edge_key='ekey')
  216. >>> df[['source', 'target', 'cost', 'ekey']]
  217. source target cost ekey
  218. 0 A B 1 0
  219. 1 A B 9 1
  220. """
  221. import pandas as pd
  222. if nodelist is None:
  223. edgelist = G.edges(data=True)
  224. else:
  225. edgelist = G.edges(nodelist, data=True)
  226. source_nodes = [s for s, _, _ in edgelist]
  227. target_nodes = [t for _, t, _ in edgelist]
  228. all_attrs = set().union(*(d.keys() for _, _, d in edgelist))
  229. if source in all_attrs:
  230. raise nx.NetworkXError(f"Source name {source!r} is an edge attr name")
  231. if target in all_attrs:
  232. raise nx.NetworkXError(f"Target name {target!r} is an edge attr name")
  233. nan = float("nan")
  234. edge_attr = {k: [d.get(k, nan) for _, _, d in edgelist] for k in all_attrs}
  235. if G.is_multigraph() and edge_key is not None:
  236. if edge_key in all_attrs:
  237. raise nx.NetworkXError(f"Edge key name {edge_key!r} is an edge attr name")
  238. edge_keys = [k for _, _, k in G.edges(keys=True)]
  239. edgelistdict = {source: source_nodes, target: target_nodes, edge_key: edge_keys}
  240. else:
  241. edgelistdict = {source: source_nodes, target: target_nodes}
  242. edgelistdict.update(edge_attr)
  243. return pd.DataFrame(edgelistdict, dtype=dtype)
  244. def from_pandas_edgelist(
  245. df,
  246. source="source",
  247. target="target",
  248. edge_attr=None,
  249. create_using=None,
  250. edge_key=None,
  251. ):
  252. """Returns a graph from Pandas DataFrame containing an edge list.
  253. The Pandas DataFrame should contain at least two columns of node names and
  254. zero or more columns of edge attributes. Each row will be processed as one
  255. edge instance.
  256. Note: This function iterates over DataFrame.values, which is not
  257. guaranteed to retain the data type across columns in the row. This is only
  258. a problem if your row is entirely numeric and a mix of ints and floats. In
  259. that case, all values will be returned as floats. See the
  260. DataFrame.iterrows documentation for an example.
  261. Parameters
  262. ----------
  263. df : Pandas DataFrame
  264. An edge list representation of a graph
  265. source : str or int
  266. A valid column name (string or integer) for the source nodes (for the
  267. directed case).
  268. target : str or int
  269. A valid column name (string or integer) for the target nodes (for the
  270. directed case).
  271. edge_attr : str or int, iterable, True, or None
  272. A valid column name (str or int) or iterable of column names that are
  273. used to retrieve items and add them to the graph as edge attributes.
  274. If `True`, all of the remaining columns will be added.
  275. If `None`, no edge attributes are added to the graph.
  276. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  277. Graph type to create. If graph instance, then cleared before populated.
  278. edge_key : str or None, optional (default=None)
  279. A valid column name for the edge keys (for a MultiGraph). The values in
  280. this column are used for the edge keys when adding edges if create_using
  281. is a multigraph.
  282. See Also
  283. --------
  284. to_pandas_edgelist
  285. Examples
  286. --------
  287. Simple integer weights on edges:
  288. >>> import pandas as pd
  289. >>> pd.options.display.max_columns = 20
  290. >>> import numpy as np
  291. >>> rng = np.random.RandomState(seed=5)
  292. >>> ints = rng.randint(1, 11, size=(3, 2))
  293. >>> a = ["A", "B", "C"]
  294. >>> b = ["D", "A", "E"]
  295. >>> df = pd.DataFrame(ints, columns=["weight", "cost"])
  296. >>> df[0] = a
  297. >>> df["b"] = b
  298. >>> df[["weight", "cost", 0, "b"]]
  299. weight cost 0 b
  300. 0 4 7 A D
  301. 1 7 1 B A
  302. 2 10 9 C E
  303. >>> G = nx.from_pandas_edgelist(df, 0, "b", ["weight", "cost"])
  304. >>> G["E"]["C"]["weight"]
  305. 10
  306. >>> G["E"]["C"]["cost"]
  307. 9
  308. >>> edges = pd.DataFrame(
  309. ... {
  310. ... "source": [0, 1, 2],
  311. ... "target": [2, 2, 3],
  312. ... "weight": [3, 4, 5],
  313. ... "color": ["red", "blue", "blue"],
  314. ... }
  315. ... )
  316. >>> G = nx.from_pandas_edgelist(edges, edge_attr=True)
  317. >>> G[0][2]["color"]
  318. 'red'
  319. Build multigraph with custom keys:
  320. >>> edges = pd.DataFrame(
  321. ... {
  322. ... "source": [0, 1, 2, 0],
  323. ... "target": [2, 2, 3, 2],
  324. ... "my_edge_key": ["A", "B", "C", "D"],
  325. ... "weight": [3, 4, 5, 6],
  326. ... "color": ["red", "blue", "blue", "blue"],
  327. ... }
  328. ... )
  329. >>> G = nx.from_pandas_edgelist(
  330. ... edges,
  331. ... edge_key="my_edge_key",
  332. ... edge_attr=["weight", "color"],
  333. ... create_using=nx.MultiGraph(),
  334. ... )
  335. >>> G[0][2]
  336. AtlasView({'A': {'weight': 3, 'color': 'red'}, 'D': {'weight': 6, 'color': 'blue'}})
  337. """
  338. g = nx.empty_graph(0, create_using)
  339. if edge_attr is None:
  340. g.add_edges_from(zip(df[source], df[target]))
  341. return g
  342. reserved_columns = [source, target]
  343. # Additional columns requested
  344. attr_col_headings = []
  345. attribute_data = []
  346. if edge_attr is True:
  347. attr_col_headings = [c for c in df.columns if c not in reserved_columns]
  348. elif isinstance(edge_attr, (list, tuple)):
  349. attr_col_headings = edge_attr
  350. else:
  351. attr_col_headings = [edge_attr]
  352. if len(attr_col_headings) == 0:
  353. raise nx.NetworkXError(
  354. f"Invalid edge_attr argument: No columns found with name: {attr_col_headings}"
  355. )
  356. try:
  357. attribute_data = zip(*[df[col] for col in attr_col_headings])
  358. except (KeyError, TypeError) as err:
  359. msg = f"Invalid edge_attr argument: {edge_attr}"
  360. raise nx.NetworkXError(msg) from err
  361. if g.is_multigraph():
  362. # => append the edge keys from the df to the bundled data
  363. if edge_key is not None:
  364. try:
  365. multigraph_edge_keys = df[edge_key]
  366. attribute_data = zip(attribute_data, multigraph_edge_keys)
  367. except (KeyError, TypeError) as err:
  368. msg = f"Invalid edge_key argument: {edge_key}"
  369. raise nx.NetworkXError(msg) from err
  370. for s, t, attrs in zip(df[source], df[target], attribute_data):
  371. if edge_key is not None:
  372. attrs, multigraph_edge_key = attrs
  373. key = g.add_edge(s, t, key=multigraph_edge_key)
  374. else:
  375. key = g.add_edge(s, t)
  376. g[s][t][key].update(zip(attr_col_headings, attrs))
  377. else:
  378. for s, t, attrs in zip(df[source], df[target], attribute_data):
  379. g.add_edge(s, t)
  380. g[s][t].update(zip(attr_col_headings, attrs))
  381. return g
  382. def to_scipy_sparse_array(G, nodelist=None, dtype=None, weight="weight", format="csr"):
  383. """Returns the graph adjacency matrix as a SciPy sparse array.
  384. Parameters
  385. ----------
  386. G : graph
  387. The NetworkX graph used to construct the sparse matrix.
  388. nodelist : list, optional
  389. The rows and columns are ordered according to the nodes in `nodelist`.
  390. If `nodelist` is None, then the ordering is produced by G.nodes().
  391. dtype : NumPy data-type, optional
  392. A valid NumPy dtype used to initialize the array. If None, then the
  393. NumPy default is used.
  394. weight : string or None optional (default='weight')
  395. The edge attribute that holds the numerical value used for
  396. the edge weight. If None then all edge weights are 1.
  397. format : str in {'bsr', 'csr', 'csc', 'coo', 'lil', 'dia', 'dok'}
  398. The type of the matrix to be returned (default 'csr'). For
  399. some algorithms different implementations of sparse matrices
  400. can perform better. See [1]_ for details.
  401. Returns
  402. -------
  403. A : SciPy sparse array
  404. Graph adjacency matrix.
  405. Notes
  406. -----
  407. For directed graphs, matrix entry i,j corresponds to an edge from i to j.
  408. The matrix entries are populated using the edge attribute held in
  409. parameter weight. When an edge does not have that attribute, the
  410. value of the entry is 1.
  411. For multiple edges the matrix values are the sums of the edge weights.
  412. When `nodelist` does not contain every node in `G`, the adjacency matrix
  413. is built from the subgraph of `G` that is induced by the nodes in
  414. `nodelist`.
  415. The convention used for self-loop edges in graphs is to assign the
  416. diagonal matrix entry value to the weight attribute of the edge
  417. (or the number 1 if the edge has no weight attribute). If the
  418. alternate convention of doubling the edge weight is desired the
  419. resulting SciPy sparse array can be modified as follows:
  420. >>> G = nx.Graph([(1, 1)])
  421. >>> A = nx.to_scipy_sparse_array(G)
  422. >>> print(A.todense())
  423. [[1]]
  424. >>> A.setdiag(A.diagonal() * 2)
  425. >>> print(A.toarray())
  426. [[2]]
  427. Examples
  428. --------
  429. >>> G = nx.MultiDiGraph()
  430. >>> G.add_edge(0, 1, weight=2)
  431. 0
  432. >>> G.add_edge(1, 0)
  433. 0
  434. >>> G.add_edge(2, 2, weight=3)
  435. 0
  436. >>> G.add_edge(2, 2)
  437. 1
  438. >>> S = nx.to_scipy_sparse_array(G, nodelist=[0, 1, 2])
  439. >>> print(S.toarray())
  440. [[0 2 0]
  441. [1 0 0]
  442. [0 0 4]]
  443. References
  444. ----------
  445. .. [1] Scipy Dev. References, "Sparse Matrices",
  446. https://docs.scipy.org/doc/scipy/reference/sparse.html
  447. """
  448. import scipy as sp
  449. import scipy.sparse # call as sp.sparse
  450. if len(G) == 0:
  451. raise nx.NetworkXError("Graph has no nodes or edges")
  452. if nodelist is None:
  453. nodelist = list(G)
  454. nlen = len(G)
  455. else:
  456. nlen = len(nodelist)
  457. if nlen == 0:
  458. raise nx.NetworkXError("nodelist has no nodes")
  459. nodeset = set(G.nbunch_iter(nodelist))
  460. if nlen != len(nodeset):
  461. for n in nodelist:
  462. if n not in G:
  463. raise nx.NetworkXError(f"Node {n} in nodelist is not in G")
  464. raise nx.NetworkXError("nodelist contains duplicates.")
  465. if nlen < len(G):
  466. G = G.subgraph(nodelist)
  467. index = dict(zip(nodelist, range(nlen)))
  468. coefficients = zip(
  469. *((index[u], index[v], wt) for u, v, wt in G.edges(data=weight, default=1))
  470. )
  471. try:
  472. row, col, data = coefficients
  473. except ValueError:
  474. # there is no edge in the subgraph
  475. row, col, data = [], [], []
  476. if G.is_directed():
  477. A = sp.sparse.coo_array((data, (row, col)), shape=(nlen, nlen), dtype=dtype)
  478. else:
  479. # symmetrize matrix
  480. d = data + data
  481. r = row + col
  482. c = col + row
  483. # selfloop entries get double counted when symmetrizing
  484. # so we subtract the data on the diagonal
  485. selfloops = list(nx.selfloop_edges(G, data=weight, default=1))
  486. if selfloops:
  487. diag_index, diag_data = zip(*((index[u], -wt) for u, v, wt in selfloops))
  488. d += diag_data
  489. r += diag_index
  490. c += diag_index
  491. A = sp.sparse.coo_array((d, (r, c)), shape=(nlen, nlen), dtype=dtype)
  492. try:
  493. return A.asformat(format)
  494. except ValueError as err:
  495. raise nx.NetworkXError(f"Unknown sparse matrix format: {format}") from err
  496. def _csr_gen_triples(A):
  497. """Converts a SciPy sparse array in **Compressed Sparse Row** format to
  498. an iterable of weighted edge triples.
  499. """
  500. nrows = A.shape[0]
  501. data, indices, indptr = A.data, A.indices, A.indptr
  502. for i in range(nrows):
  503. for j in range(indptr[i], indptr[i + 1]):
  504. yield i, indices[j], data[j]
  505. def _csc_gen_triples(A):
  506. """Converts a SciPy sparse array in **Compressed Sparse Column** format to
  507. an iterable of weighted edge triples.
  508. """
  509. ncols = A.shape[1]
  510. data, indices, indptr = A.data, A.indices, A.indptr
  511. for i in range(ncols):
  512. for j in range(indptr[i], indptr[i + 1]):
  513. yield indices[j], i, data[j]
  514. def _coo_gen_triples(A):
  515. """Converts a SciPy sparse array in **Coordinate** format to an iterable
  516. of weighted edge triples.
  517. """
  518. row, col, data = A.row, A.col, A.data
  519. return zip(row, col, data)
  520. def _dok_gen_triples(A):
  521. """Converts a SciPy sparse array in **Dictionary of Keys** format to an
  522. iterable of weighted edge triples.
  523. """
  524. for (r, c), v in A.items():
  525. yield r, c, v
  526. def _generate_weighted_edges(A):
  527. """Returns an iterable over (u, v, w) triples, where u and v are adjacent
  528. vertices and w is the weight of the edge joining u and v.
  529. `A` is a SciPy sparse array (in any format).
  530. """
  531. if A.format == "csr":
  532. return _csr_gen_triples(A)
  533. if A.format == "csc":
  534. return _csc_gen_triples(A)
  535. if A.format == "dok":
  536. return _dok_gen_triples(A)
  537. # If A is in any other format (including COO), convert it to COO format.
  538. return _coo_gen_triples(A.tocoo())
  539. def from_scipy_sparse_array(
  540. A, parallel_edges=False, create_using=None, edge_attribute="weight"
  541. ):
  542. """Creates a new graph from an adjacency matrix given as a SciPy sparse
  543. array.
  544. Parameters
  545. ----------
  546. A: scipy.sparse array
  547. An adjacency matrix representation of a graph
  548. parallel_edges : Boolean
  549. If this is True, `create_using` is a multigraph, and `A` is an
  550. integer matrix, then entry *(i, j)* in the matrix is interpreted as the
  551. number of parallel edges joining vertices *i* and *j* in the graph.
  552. If it is False, then the entries in the matrix are interpreted as
  553. the weight of a single edge joining the vertices.
  554. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  555. Graph type to create. If graph instance, then cleared before populated.
  556. edge_attribute: string
  557. Name of edge attribute to store matrix numeric value. The data will
  558. have the same type as the matrix entry (int, float, (real,imag)).
  559. Notes
  560. -----
  561. For directed graphs, explicitly mention create_using=nx.DiGraph,
  562. and entry i,j of A corresponds to an edge from i to j.
  563. If `create_using` is :class:`networkx.MultiGraph` or
  564. :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
  565. entries of `A` are of type :class:`int`, then this function returns a
  566. multigraph (constructed from `create_using`) with parallel edges.
  567. In this case, `edge_attribute` will be ignored.
  568. If `create_using` indicates an undirected multigraph, then only the edges
  569. indicated by the upper triangle of the matrix `A` will be added to the
  570. graph.
  571. Examples
  572. --------
  573. >>> import scipy as sp
  574. >>> import scipy.sparse # call as sp.sparse
  575. >>> A = sp.sparse.eye(2, 2, 1)
  576. >>> G = nx.from_scipy_sparse_array(A)
  577. If `create_using` indicates a multigraph and the matrix has only integer
  578. entries and `parallel_edges` is False, then the entries will be treated
  579. as weights for edges joining the nodes (without creating parallel edges):
  580. >>> A = sp.sparse.csr_array([[1, 1], [1, 2]])
  581. >>> G = nx.from_scipy_sparse_array(A, create_using=nx.MultiGraph)
  582. >>> G[1][1]
  583. AtlasView({0: {'weight': 2}})
  584. If `create_using` indicates a multigraph and the matrix has only integer
  585. entries and `parallel_edges` is True, then the entries will be treated
  586. as the number of parallel edges joining those two vertices:
  587. >>> A = sp.sparse.csr_array([[1, 1], [1, 2]])
  588. >>> G = nx.from_scipy_sparse_array(
  589. ... A, parallel_edges=True, create_using=nx.MultiGraph
  590. ... )
  591. >>> G[1][1]
  592. AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
  593. """
  594. G = nx.empty_graph(0, create_using)
  595. n, m = A.shape
  596. if n != m:
  597. raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
  598. # Make sure we get even the isolated nodes of the graph.
  599. G.add_nodes_from(range(n))
  600. # Create an iterable over (u, v, w) triples and for each triple, add an
  601. # edge from u to v with weight w.
  602. triples = _generate_weighted_edges(A)
  603. # If the entries in the adjacency matrix are integers, the graph is a
  604. # multigraph, and parallel_edges is True, then create parallel edges, each
  605. # with weight 1, for each entry in the adjacency matrix. Otherwise, create
  606. # one edge for each positive entry in the adjacency matrix and set the
  607. # weight of that edge to be the entry in the matrix.
  608. if A.dtype.kind in ("i", "u") and G.is_multigraph() and parallel_edges:
  609. chain = itertools.chain.from_iterable
  610. # The following line is equivalent to:
  611. #
  612. # for (u, v) in edges:
  613. # for d in range(A[u, v]):
  614. # G.add_edge(u, v, weight=1)
  615. #
  616. triples = chain(((u, v, 1) for d in range(w)) for (u, v, w) in triples)
  617. # If we are creating an undirected multigraph, only add the edges from the
  618. # upper triangle of the matrix. Otherwise, add all the edges. This relies
  619. # on the fact that the vertices created in the
  620. # `_generated_weighted_edges()` function are actually the row/column
  621. # indices for the matrix `A`.
  622. #
  623. # Without this check, we run into a problem where each edge is added twice
  624. # when `G.add_weighted_edges_from()` is invoked below.
  625. if G.is_multigraph() and not G.is_directed():
  626. triples = ((u, v, d) for u, v, d in triples if u <= v)
  627. G.add_weighted_edges_from(triples, weight=edge_attribute)
  628. return G
  629. def to_numpy_array(
  630. G,
  631. nodelist=None,
  632. dtype=None,
  633. order=None,
  634. multigraph_weight=sum,
  635. weight="weight",
  636. nonedge=0.0,
  637. ):
  638. """Returns the graph adjacency matrix as a NumPy array.
  639. Parameters
  640. ----------
  641. G : graph
  642. The NetworkX graph used to construct the NumPy array.
  643. nodelist : list, optional
  644. The rows and columns are ordered according to the nodes in `nodelist`.
  645. If `nodelist` is ``None``, then the ordering is produced by ``G.nodes()``.
  646. dtype : NumPy data type, optional
  647. A NumPy data type used to initialize the array. If None, then the NumPy
  648. default is used. The dtype can be structured if `weight=None`, in which
  649. case the dtype field names are used to look up edge attributes. The
  650. result is a structured array where each named field in the dtype
  651. corresponds to the adjaceny for that edge attribute. See examples for
  652. details.
  653. order : {'C', 'F'}, optional
  654. Whether to store multidimensional data in C- or Fortran-contiguous
  655. (row- or column-wise) order in memory. If None, then the NumPy default
  656. is used.
  657. multigraph_weight : callable, optional
  658. An function that determines how weights in multigraphs are handled.
  659. The function should accept a sequence of weights and return a single
  660. value. The default is to sum the weights of the multiple edges.
  661. weight : string or None optional (default = 'weight')
  662. The edge attribute that holds the numerical value used for
  663. the edge weight. If an edge does not have that attribute, then the
  664. value 1 is used instead. `weight` must be ``None`` if a structured
  665. dtype is used.
  666. nonedge : array_like (default = 0.0)
  667. The value used to represent non-edges in the adjaceny matrix.
  668. The array values corresponding to nonedges are typically set to zero.
  669. However, this could be undesirable if there are array values
  670. corresponding to actual edges that also have the value zero. If so,
  671. one might prefer nonedges to have some other value, such as ``nan``.
  672. Returns
  673. -------
  674. A : NumPy ndarray
  675. Graph adjacency matrix
  676. Raises
  677. ------
  678. NetworkXError
  679. If `dtype` is a structured dtype and `G` is a multigraph
  680. ValueError
  681. If `dtype` is a structured dtype and `weight` is not `None`
  682. See Also
  683. --------
  684. from_numpy_array
  685. Notes
  686. -----
  687. For directed graphs, entry ``i, j`` corresponds to an edge from ``i`` to ``j``.
  688. Entries in the adjacency matrix are given by the `weight` edge attribute.
  689. When an edge does not have a weight attribute, the value of the entry is
  690. set to the number 1. For multiple (parallel) edges, the values of the
  691. entries are determined by the `multigraph_weight` parameter. The default is
  692. to sum the weight attributes for each of the parallel edges.
  693. When `nodelist` does not contain every node in `G`, the adjacency matrix is
  694. built from the subgraph of `G` that is induced by the nodes in `nodelist`.
  695. The convention used for self-loop edges in graphs is to assign the
  696. diagonal array entry value to the weight attribute of the edge
  697. (or the number 1 if the edge has no weight attribute). If the
  698. alternate convention of doubling the edge weight is desired the
  699. resulting NumPy array can be modified as follows:
  700. >>> import numpy as np
  701. >>> G = nx.Graph([(1, 1)])
  702. >>> A = nx.to_numpy_array(G)
  703. >>> A
  704. array([[1.]])
  705. >>> A[np.diag_indices_from(A)] *= 2
  706. >>> A
  707. array([[2.]])
  708. Examples
  709. --------
  710. >>> G = nx.MultiDiGraph()
  711. >>> G.add_edge(0, 1, weight=2)
  712. 0
  713. >>> G.add_edge(1, 0)
  714. 0
  715. >>> G.add_edge(2, 2, weight=3)
  716. 0
  717. >>> G.add_edge(2, 2)
  718. 1
  719. >>> nx.to_numpy_array(G, nodelist=[0, 1, 2])
  720. array([[0., 2., 0.],
  721. [1., 0., 0.],
  722. [0., 0., 4.]])
  723. When `nodelist` argument is used, nodes of `G` which do not appear in the `nodelist`
  724. and their edges are not included in the adjacency matrix. Here is an example:
  725. >>> G = nx.Graph()
  726. >>> G.add_edge(3, 1)
  727. >>> G.add_edge(2, 0)
  728. >>> G.add_edge(2, 1)
  729. >>> G.add_edge(3, 0)
  730. >>> nx.to_numpy_array(G, nodelist=[1, 2, 3])
  731. array([[0., 1., 1.],
  732. [1., 0., 0.],
  733. [1., 0., 0.]])
  734. This function can also be used to create adjacency matrices for multiple
  735. edge attributes with structured dtypes:
  736. >>> G = nx.Graph()
  737. >>> G.add_edge(0, 1, weight=10)
  738. >>> G.add_edge(1, 2, cost=5)
  739. >>> G.add_edge(2, 3, weight=3, cost=-4.0)
  740. >>> dtype = np.dtype([("weight", int), ("cost", float)])
  741. >>> A = nx.to_numpy_array(G, dtype=dtype, weight=None)
  742. >>> A["weight"]
  743. array([[ 0, 10, 0, 0],
  744. [10, 0, 1, 0],
  745. [ 0, 1, 0, 3],
  746. [ 0, 0, 3, 0]])
  747. >>> A["cost"]
  748. array([[ 0., 1., 0., 0.],
  749. [ 1., 0., 5., 0.],
  750. [ 0., 5., 0., -4.],
  751. [ 0., 0., -4., 0.]])
  752. As stated above, the argument "nonedge" is useful especially when there are
  753. actually edges with weight 0 in the graph. Setting a nonedge value different than 0,
  754. makes it much clearer to differentiate such 0-weighted edges and actual nonedge values.
  755. >>> G = nx.Graph()
  756. >>> G.add_edge(3, 1, weight=2)
  757. >>> G.add_edge(2, 0, weight=0)
  758. >>> G.add_edge(2, 1, weight=0)
  759. >>> G.add_edge(3, 0, weight=1)
  760. >>> nx.to_numpy_array(G, nonedge=-1.)
  761. array([[-1., 2., -1., 1.],
  762. [ 2., -1., 0., -1.],
  763. [-1., 0., -1., 0.],
  764. [ 1., -1., 0., -1.]])
  765. """
  766. import numpy as np
  767. if nodelist is None:
  768. nodelist = list(G)
  769. nlen = len(nodelist)
  770. # Input validation
  771. nodeset = set(nodelist)
  772. if nodeset - set(G):
  773. raise nx.NetworkXError(f"Nodes {nodeset - set(G)} in nodelist is not in G")
  774. if len(nodeset) < nlen:
  775. raise nx.NetworkXError("nodelist contains duplicates.")
  776. A = np.full((nlen, nlen), fill_value=nonedge, dtype=dtype, order=order)
  777. # Corner cases: empty nodelist or graph without any edges
  778. if nlen == 0 or G.number_of_edges() == 0:
  779. return A
  780. # If dtype is structured and weight is None, use dtype field names as
  781. # edge attributes
  782. edge_attrs = None # Only single edge attribute by default
  783. if A.dtype.names:
  784. if weight is None:
  785. edge_attrs = dtype.names
  786. else:
  787. raise ValueError(
  788. "Specifying `weight` not supported for structured dtypes\n."
  789. "To create adjacency matrices from structured dtypes, use `weight=None`."
  790. )
  791. # Map nodes to row/col in matrix
  792. idx = dict(zip(nodelist, range(nlen)))
  793. if len(nodelist) < len(G):
  794. G = G.subgraph(nodelist).copy()
  795. # Collect all edge weights and reduce with `multigraph_weights`
  796. if G.is_multigraph():
  797. if edge_attrs:
  798. raise nx.NetworkXError(
  799. "Structured arrays are not supported for MultiGraphs"
  800. )
  801. d = defaultdict(list)
  802. for u, v, wt in G.edges(data=weight, default=1.0):
  803. d[(idx[u], idx[v])].append(wt)
  804. i, j = np.array(list(d.keys())).T # indices
  805. wts = [multigraph_weight(ws) for ws in d.values()] # reduced weights
  806. else:
  807. i, j, wts = [], [], []
  808. # Special branch: multi-attr adjacency from structured dtypes
  809. if edge_attrs:
  810. # Extract edges with all data
  811. for u, v, data in G.edges(data=True):
  812. i.append(idx[u])
  813. j.append(idx[v])
  814. wts.append(data)
  815. # Map each attribute to the appropriate named field in the
  816. # structured dtype
  817. for attr in edge_attrs:
  818. attr_data = [wt.get(attr, 1.0) for wt in wts]
  819. A[attr][i, j] = attr_data
  820. if not G.is_directed():
  821. A[attr][j, i] = attr_data
  822. return A
  823. for u, v, wt in G.edges(data=weight, default=1.0):
  824. i.append(idx[u])
  825. j.append(idx[v])
  826. wts.append(wt)
  827. # Set array values with advanced indexing
  828. A[i, j] = wts
  829. if not G.is_directed():
  830. A[j, i] = wts
  831. return A
  832. def from_numpy_array(A, parallel_edges=False, create_using=None):
  833. """Returns a graph from a 2D NumPy array.
  834. The 2D NumPy array is interpreted as an adjacency matrix for the graph.
  835. Parameters
  836. ----------
  837. A : a 2D numpy.ndarray
  838. An adjacency matrix representation of a graph
  839. parallel_edges : Boolean
  840. If this is True, `create_using` is a multigraph, and `A` is an
  841. integer array, then entry *(i, j)* in the array is interpreted as the
  842. number of parallel edges joining vertices *i* and *j* in the graph.
  843. If it is False, then the entries in the array are interpreted as
  844. the weight of a single edge joining the vertices.
  845. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  846. Graph type to create. If graph instance, then cleared before populated.
  847. Notes
  848. -----
  849. For directed graphs, explicitly mention create_using=nx.DiGraph,
  850. and entry i,j of A corresponds to an edge from i to j.
  851. If `create_using` is :class:`networkx.MultiGraph` or
  852. :class:`networkx.MultiDiGraph`, `parallel_edges` is True, and the
  853. entries of `A` are of type :class:`int`, then this function returns a
  854. multigraph (of the same type as `create_using`) with parallel edges.
  855. If `create_using` indicates an undirected multigraph, then only the edges
  856. indicated by the upper triangle of the array `A` will be added to the
  857. graph.
  858. If the NumPy array has a single data type for each array entry it
  859. will be converted to an appropriate Python data type.
  860. If the NumPy array has a user-specified compound data type the names
  861. of the data fields will be used as attribute keys in the resulting
  862. NetworkX graph.
  863. See Also
  864. --------
  865. to_numpy_array
  866. Examples
  867. --------
  868. Simple integer weights on edges:
  869. >>> import numpy as np
  870. >>> A = np.array([[1, 1], [2, 1]])
  871. >>> G = nx.from_numpy_array(A)
  872. >>> G.edges(data=True)
  873. EdgeDataView([(0, 0, {'weight': 1}), (0, 1, {'weight': 2}), (1, 1, {'weight': 1})])
  874. If `create_using` indicates a multigraph and the array has only integer
  875. entries and `parallel_edges` is False, then the entries will be treated
  876. as weights for edges joining the nodes (without creating parallel edges):
  877. >>> A = np.array([[1, 1], [1, 2]])
  878. >>> G = nx.from_numpy_array(A, create_using=nx.MultiGraph)
  879. >>> G[1][1]
  880. AtlasView({0: {'weight': 2}})
  881. If `create_using` indicates a multigraph and the array has only integer
  882. entries and `parallel_edges` is True, then the entries will be treated
  883. as the number of parallel edges joining those two vertices:
  884. >>> A = np.array([[1, 1], [1, 2]])
  885. >>> temp = nx.MultiGraph()
  886. >>> G = nx.from_numpy_array(A, parallel_edges=True, create_using=temp)
  887. >>> G[1][1]
  888. AtlasView({0: {'weight': 1}, 1: {'weight': 1}})
  889. User defined compound data type on edges:
  890. >>> dt = [("weight", float), ("cost", int)]
  891. >>> A = np.array([[(1.0, 2)]], dtype=dt)
  892. >>> G = nx.from_numpy_array(A)
  893. >>> G.edges()
  894. EdgeView([(0, 0)])
  895. >>> G[0][0]["cost"]
  896. 2
  897. >>> G[0][0]["weight"]
  898. 1.0
  899. """
  900. kind_to_python_type = {
  901. "f": float,
  902. "i": int,
  903. "u": int,
  904. "b": bool,
  905. "c": complex,
  906. "S": str,
  907. "U": str,
  908. "V": "void",
  909. }
  910. G = nx.empty_graph(0, create_using)
  911. if A.ndim != 2:
  912. raise nx.NetworkXError(f"Input array must be 2D, not {A.ndim}")
  913. n, m = A.shape
  914. if n != m:
  915. raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}")
  916. dt = A.dtype
  917. try:
  918. python_type = kind_to_python_type[dt.kind]
  919. except Exception as err:
  920. raise TypeError(f"Unknown numpy data type: {dt}") from err
  921. # Make sure we get even the isolated nodes of the graph.
  922. G.add_nodes_from(range(n))
  923. # Get a list of all the entries in the array with nonzero entries. These
  924. # coordinates become edges in the graph. (convert to int from np.int64)
  925. edges = ((int(e[0]), int(e[1])) for e in zip(*A.nonzero()))
  926. # handle numpy constructed data type
  927. if python_type == "void":
  928. # Sort the fields by their offset, then by dtype, then by name.
  929. fields = sorted(
  930. (offset, dtype, name) for name, (dtype, offset) in A.dtype.fields.items()
  931. )
  932. triples = (
  933. (
  934. u,
  935. v,
  936. {
  937. name: kind_to_python_type[dtype.kind](val)
  938. for (_, dtype, name), val in zip(fields, A[u, v])
  939. },
  940. )
  941. for u, v in edges
  942. )
  943. # If the entries in the adjacency matrix are integers, the graph is a
  944. # multigraph, and parallel_edges is True, then create parallel edges, each
  945. # with weight 1, for each entry in the adjacency matrix. Otherwise, create
  946. # one edge for each positive entry in the adjacency matrix and set the
  947. # weight of that edge to be the entry in the matrix.
  948. elif python_type is int and G.is_multigraph() and parallel_edges:
  949. chain = itertools.chain.from_iterable
  950. # The following line is equivalent to:
  951. #
  952. # for (u, v) in edges:
  953. # for d in range(A[u, v]):
  954. # G.add_edge(u, v, weight=1)
  955. #
  956. triples = chain(
  957. ((u, v, {"weight": 1}) for d in range(A[u, v])) for (u, v) in edges
  958. )
  959. else: # basic data type
  960. triples = ((u, v, {"weight": python_type(A[u, v])}) for u, v in edges)
  961. # If we are creating an undirected multigraph, only add the edges from the
  962. # upper triangle of the matrix. Otherwise, add all the edges. This relies
  963. # on the fact that the vertices created in the
  964. # `_generated_weighted_edges()` function are actually the row/column
  965. # indices for the matrix `A`.
  966. #
  967. # Without this check, we run into a problem where each edge is added twice
  968. # when `G.add_edges_from()` is invoked below.
  969. if G.is_multigraph() and not G.is_directed():
  970. triples = ((u, v, d) for u, v, d in triples if u <= v)
  971. G.add_edges_from(triples)
  972. return G