edgelist.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486
  1. """
  2. **********
  3. Edge Lists
  4. **********
  5. Read and write NetworkX graphs as edge lists.
  6. The multi-line adjacency list format is useful for graphs with nodes
  7. that can be meaningfully represented as strings. With the edgelist
  8. format simple edge data can be stored but node or graph data is not.
  9. There is no way of representing isolated nodes unless the node has a
  10. self-loop edge.
  11. Format
  12. ------
  13. You can read or write three formats of edge lists with these functions.
  14. Node pairs with no data::
  15. 1 2
  16. Python dictionary as data::
  17. 1 2 {'weight':7, 'color':'green'}
  18. Arbitrary data::
  19. 1 2 7 green
  20. """
  21. __all__ = [
  22. "generate_edgelist",
  23. "write_edgelist",
  24. "parse_edgelist",
  25. "read_edgelist",
  26. "read_weighted_edgelist",
  27. "write_weighted_edgelist",
  28. ]
  29. import networkx as nx
  30. from networkx.utils import open_file
  31. def generate_edgelist(G, delimiter=" ", data=True):
  32. """Generate a single line of the graph G in edge list format.
  33. Parameters
  34. ----------
  35. G : NetworkX graph
  36. delimiter : string, optional
  37. Separator for node labels
  38. data : bool or list of keys
  39. If False generate no edge data. If True use a dictionary
  40. representation of edge data. If a list of keys use a list of data
  41. values corresponding to the keys.
  42. Returns
  43. -------
  44. lines : string
  45. Lines of data in adjlist format.
  46. Examples
  47. --------
  48. >>> G = nx.lollipop_graph(4, 3)
  49. >>> G[1][2]["weight"] = 3
  50. >>> G[3][4]["capacity"] = 12
  51. >>> for line in nx.generate_edgelist(G, data=False):
  52. ... print(line)
  53. 0 1
  54. 0 2
  55. 0 3
  56. 1 2
  57. 1 3
  58. 2 3
  59. 3 4
  60. 4 5
  61. 5 6
  62. >>> for line in nx.generate_edgelist(G):
  63. ... print(line)
  64. 0 1 {}
  65. 0 2 {}
  66. 0 3 {}
  67. 1 2 {'weight': 3}
  68. 1 3 {}
  69. 2 3 {}
  70. 3 4 {'capacity': 12}
  71. 4 5 {}
  72. 5 6 {}
  73. >>> for line in nx.generate_edgelist(G, data=["weight"]):
  74. ... print(line)
  75. 0 1
  76. 0 2
  77. 0 3
  78. 1 2 3
  79. 1 3
  80. 2 3
  81. 3 4
  82. 4 5
  83. 5 6
  84. See Also
  85. --------
  86. write_adjlist, read_adjlist
  87. """
  88. if data is True:
  89. for u, v, d in G.edges(data=True):
  90. e = u, v, dict(d)
  91. yield delimiter.join(map(str, e))
  92. elif data is False:
  93. for u, v in G.edges(data=False):
  94. e = u, v
  95. yield delimiter.join(map(str, e))
  96. else:
  97. for u, v, d in G.edges(data=True):
  98. e = [u, v]
  99. try:
  100. e.extend(d[k] for k in data)
  101. except KeyError:
  102. pass # missing data for this edge, should warn?
  103. yield delimiter.join(map(str, e))
  104. @open_file(1, mode="wb")
  105. def write_edgelist(G, path, comments="#", delimiter=" ", data=True, encoding="utf-8"):
  106. """Write graph as a list of edges.
  107. Parameters
  108. ----------
  109. G : graph
  110. A NetworkX graph
  111. path : file or string
  112. File or filename to write. If a file is provided, it must be
  113. opened in 'wb' mode. Filenames ending in .gz or .bz2 will be compressed.
  114. comments : string, optional
  115. The character used to indicate the start of a comment
  116. delimiter : string, optional
  117. The string used to separate values. The default is whitespace.
  118. data : bool or list, optional
  119. If False write no edge data.
  120. If True write a string representation of the edge data dictionary..
  121. If a list (or other iterable) is provided, write the keys specified
  122. in the list.
  123. encoding: string, optional
  124. Specify which encoding to use when writing file.
  125. Examples
  126. --------
  127. >>> G = nx.path_graph(4)
  128. >>> nx.write_edgelist(G, "test.edgelist")
  129. >>> G = nx.path_graph(4)
  130. >>> fh = open("test.edgelist", "wb")
  131. >>> nx.write_edgelist(G, fh)
  132. >>> nx.write_edgelist(G, "test.edgelist.gz")
  133. >>> nx.write_edgelist(G, "test.edgelist.gz", data=False)
  134. >>> G = nx.Graph()
  135. >>> G.add_edge(1, 2, weight=7, color="red")
  136. >>> nx.write_edgelist(G, "test.edgelist", data=False)
  137. >>> nx.write_edgelist(G, "test.edgelist", data=["color"])
  138. >>> nx.write_edgelist(G, "test.edgelist", data=["color", "weight"])
  139. See Also
  140. --------
  141. read_edgelist
  142. write_weighted_edgelist
  143. """
  144. for line in generate_edgelist(G, delimiter, data):
  145. line += "\n"
  146. path.write(line.encode(encoding))
  147. def parse_edgelist(
  148. lines, comments="#", delimiter=None, create_using=None, nodetype=None, data=True
  149. ):
  150. """Parse lines of an edge list representation of a graph.
  151. Parameters
  152. ----------
  153. lines : list or iterator of strings
  154. Input data in edgelist format
  155. comments : string, optional
  156. Marker for comment lines. Default is `'#'`. To specify that no character
  157. should be treated as a comment, use ``comments=None``.
  158. delimiter : string, optional
  159. Separator for node labels. Default is `None`, meaning any whitespace.
  160. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  161. Graph type to create. If graph instance, then cleared before populated.
  162. nodetype : Python type, optional
  163. Convert nodes to this type. Default is `None`, meaning no conversion is
  164. performed.
  165. data : bool or list of (label,type) tuples
  166. If `False` generate no edge data or if `True` use a dictionary
  167. representation of edge data or a list tuples specifying dictionary
  168. key names and types for edge data.
  169. Returns
  170. -------
  171. G: NetworkX Graph
  172. The graph corresponding to lines
  173. Examples
  174. --------
  175. Edgelist with no data:
  176. >>> lines = ["1 2", "2 3", "3 4"]
  177. >>> G = nx.parse_edgelist(lines, nodetype=int)
  178. >>> list(G)
  179. [1, 2, 3, 4]
  180. >>> list(G.edges())
  181. [(1, 2), (2, 3), (3, 4)]
  182. Edgelist with data in Python dictionary representation:
  183. >>> lines = ["1 2 {'weight': 3}", "2 3 {'weight': 27}", "3 4 {'weight': 3.0}"]
  184. >>> G = nx.parse_edgelist(lines, nodetype=int)
  185. >>> list(G)
  186. [1, 2, 3, 4]
  187. >>> list(G.edges(data=True))
  188. [(1, 2, {'weight': 3}), (2, 3, {'weight': 27}), (3, 4, {'weight': 3.0})]
  189. Edgelist with data in a list:
  190. >>> lines = ["1 2 3", "2 3 27", "3 4 3.0"]
  191. >>> G = nx.parse_edgelist(lines, nodetype=int, data=(("weight", float),))
  192. >>> list(G)
  193. [1, 2, 3, 4]
  194. >>> list(G.edges(data=True))
  195. [(1, 2, {'weight': 3.0}), (2, 3, {'weight': 27.0}), (3, 4, {'weight': 3.0})]
  196. See Also
  197. --------
  198. read_weighted_edgelist
  199. """
  200. from ast import literal_eval
  201. G = nx.empty_graph(0, create_using)
  202. for line in lines:
  203. if comments is not None:
  204. p = line.find(comments)
  205. if p >= 0:
  206. line = line[:p]
  207. if not line:
  208. continue
  209. # split line, should have 2 or more
  210. s = line.strip().split(delimiter)
  211. if len(s) < 2:
  212. continue
  213. u = s.pop(0)
  214. v = s.pop(0)
  215. d = s
  216. if nodetype is not None:
  217. try:
  218. u = nodetype(u)
  219. v = nodetype(v)
  220. except Exception as err:
  221. raise TypeError(
  222. f"Failed to convert nodes {u},{v} to type {nodetype}."
  223. ) from err
  224. if len(d) == 0 or data is False:
  225. # no data or data type specified
  226. edgedata = {}
  227. elif data is True:
  228. # no edge types specified
  229. try: # try to evaluate as dictionary
  230. if delimiter == ",":
  231. edgedata_str = ",".join(d)
  232. else:
  233. edgedata_str = " ".join(d)
  234. edgedata = dict(literal_eval(edgedata_str.strip()))
  235. except Exception as err:
  236. raise TypeError(
  237. f"Failed to convert edge data ({d}) to dictionary."
  238. ) from err
  239. else:
  240. # convert edge data to dictionary with specified keys and type
  241. if len(d) != len(data):
  242. raise IndexError(
  243. f"Edge data {d} and data_keys {data} are not the same length"
  244. )
  245. edgedata = {}
  246. for (edge_key, edge_type), edge_value in zip(data, d):
  247. try:
  248. edge_value = edge_type(edge_value)
  249. except Exception as err:
  250. raise TypeError(
  251. f"Failed to convert {edge_key} data {edge_value} "
  252. f"to type {edge_type}."
  253. ) from err
  254. edgedata.update({edge_key: edge_value})
  255. G.add_edge(u, v, **edgedata)
  256. return G
  257. @open_file(0, mode="rb")
  258. def read_edgelist(
  259. path,
  260. comments="#",
  261. delimiter=None,
  262. create_using=None,
  263. nodetype=None,
  264. data=True,
  265. edgetype=None,
  266. encoding="utf-8",
  267. ):
  268. """Read a graph from a list of edges.
  269. Parameters
  270. ----------
  271. path : file or string
  272. File or filename to read. If a file is provided, it must be
  273. opened in 'rb' mode.
  274. Filenames ending in .gz or .bz2 will be uncompressed.
  275. comments : string, optional
  276. The character used to indicate the start of a comment. To specify that
  277. no character should be treated as a comment, use ``comments=None``.
  278. delimiter : string, optional
  279. The string used to separate values. The default is whitespace.
  280. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  281. Graph type to create. If graph instance, then cleared before populated.
  282. nodetype : int, float, str, Python type, optional
  283. Convert node data from strings to specified type
  284. data : bool or list of (label,type) tuples
  285. Tuples specifying dictionary key names and types for edge data
  286. edgetype : int, float, str, Python type, optional OBSOLETE
  287. Convert edge data from strings to specified type and use as 'weight'
  288. encoding: string, optional
  289. Specify which encoding to use when reading file.
  290. Returns
  291. -------
  292. G : graph
  293. A networkx Graph or other type specified with create_using
  294. Examples
  295. --------
  296. >>> nx.write_edgelist(nx.path_graph(4), "test.edgelist")
  297. >>> G = nx.read_edgelist("test.edgelist")
  298. >>> fh = open("test.edgelist", "rb")
  299. >>> G = nx.read_edgelist(fh)
  300. >>> fh.close()
  301. >>> G = nx.read_edgelist("test.edgelist", nodetype=int)
  302. >>> G = nx.read_edgelist("test.edgelist", create_using=nx.DiGraph)
  303. Edgelist with data in a list:
  304. >>> textline = "1 2 3"
  305. >>> fh = open("test.edgelist", "w")
  306. >>> d = fh.write(textline)
  307. >>> fh.close()
  308. >>> G = nx.read_edgelist("test.edgelist", nodetype=int, data=(("weight", float),))
  309. >>> list(G)
  310. [1, 2]
  311. >>> list(G.edges(data=True))
  312. [(1, 2, {'weight': 3.0})]
  313. See parse_edgelist() for more examples of formatting.
  314. See Also
  315. --------
  316. parse_edgelist
  317. write_edgelist
  318. Notes
  319. -----
  320. Since nodes must be hashable, the function nodetype must return hashable
  321. types (e.g. int, float, str, frozenset - or tuples of those, etc.)
  322. """
  323. lines = (line if isinstance(line, str) else line.decode(encoding) for line in path)
  324. return parse_edgelist(
  325. lines,
  326. comments=comments,
  327. delimiter=delimiter,
  328. create_using=create_using,
  329. nodetype=nodetype,
  330. data=data,
  331. )
  332. def write_weighted_edgelist(G, path, comments="#", delimiter=" ", encoding="utf-8"):
  333. """Write graph G as a list of edges with numeric weights.
  334. Parameters
  335. ----------
  336. G : graph
  337. A NetworkX graph
  338. path : file or string
  339. File or filename to write. If a file is provided, it must be
  340. opened in 'wb' mode.
  341. Filenames ending in .gz or .bz2 will be compressed.
  342. comments : string, optional
  343. The character used to indicate the start of a comment
  344. delimiter : string, optional
  345. The string used to separate values. The default is whitespace.
  346. encoding: string, optional
  347. Specify which encoding to use when writing file.
  348. Examples
  349. --------
  350. >>> G = nx.Graph()
  351. >>> G.add_edge(1, 2, weight=7)
  352. >>> nx.write_weighted_edgelist(G, "test.weighted.edgelist")
  353. See Also
  354. --------
  355. read_edgelist
  356. write_edgelist
  357. read_weighted_edgelist
  358. """
  359. write_edgelist(
  360. G,
  361. path,
  362. comments=comments,
  363. delimiter=delimiter,
  364. data=("weight",),
  365. encoding=encoding,
  366. )
  367. def read_weighted_edgelist(
  368. path,
  369. comments="#",
  370. delimiter=None,
  371. create_using=None,
  372. nodetype=None,
  373. encoding="utf-8",
  374. ):
  375. """Read a graph as list of edges with numeric weights.
  376. Parameters
  377. ----------
  378. path : file or string
  379. File or filename to read. If a file is provided, it must be
  380. opened in 'rb' mode.
  381. Filenames ending in .gz or .bz2 will be uncompressed.
  382. comments : string, optional
  383. The character used to indicate the start of a comment.
  384. delimiter : string, optional
  385. The string used to separate values. The default is whitespace.
  386. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  387. Graph type to create. If graph instance, then cleared before populated.
  388. nodetype : int, float, str, Python type, optional
  389. Convert node data from strings to specified type
  390. encoding: string, optional
  391. Specify which encoding to use when reading file.
  392. Returns
  393. -------
  394. G : graph
  395. A networkx Graph or other type specified with create_using
  396. Notes
  397. -----
  398. Since nodes must be hashable, the function nodetype must return hashable
  399. types (e.g. int, float, str, frozenset - or tuples of those, etc.)
  400. Example edgelist file format.
  401. With numeric edge data::
  402. # read with
  403. # >>> G=nx.read_weighted_edgelist(fh)
  404. # source target data
  405. a b 1
  406. a c 3.14159
  407. d e 42
  408. See Also
  409. --------
  410. write_weighted_edgelist
  411. """
  412. return read_edgelist(
  413. path,
  414. comments=comments,
  415. delimiter=delimiter,
  416. create_using=create_using,
  417. nodetype=nodetype,
  418. data=(("weight", float),),
  419. encoding=encoding,
  420. )