multiline_adjlist.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. """
  2. *************************
  3. Multi-line Adjacency List
  4. *************************
  5. Read and write NetworkX graphs as multi-line adjacency lists.
  6. The multi-line adjacency list format is useful for graphs with
  7. nodes that can be meaningfully represented as strings. With this format
  8. simple edge data can be stored but node or graph data is not.
  9. Format
  10. ------
  11. The first label in a line is the source node label followed by the node degree
  12. d. The next d lines are target node labels and optional edge data.
  13. That pattern repeats for all nodes in the graph.
  14. The graph with edges a-b, a-c, d-e can be represented as the following
  15. adjacency list (anything following the # in a line is a comment)::
  16. # example.multiline-adjlist
  17. a 2
  18. b
  19. c
  20. d 1
  21. e
  22. """
  23. __all__ = [
  24. "generate_multiline_adjlist",
  25. "write_multiline_adjlist",
  26. "parse_multiline_adjlist",
  27. "read_multiline_adjlist",
  28. ]
  29. import networkx as nx
  30. from networkx.utils import open_file
  31. def generate_multiline_adjlist(G, delimiter=" "):
  32. """Generate a single line of the graph G in multiline adjacency list format.
  33. Parameters
  34. ----------
  35. G : NetworkX graph
  36. delimiter : string, optional
  37. Separator for node labels
  38. Returns
  39. -------
  40. lines : string
  41. Lines of data in multiline adjlist format.
  42. Examples
  43. --------
  44. >>> G = nx.lollipop_graph(4, 3)
  45. >>> for line in nx.generate_multiline_adjlist(G):
  46. ... print(line)
  47. 0 3
  48. 1 {}
  49. 2 {}
  50. 3 {}
  51. 1 2
  52. 2 {}
  53. 3 {}
  54. 2 1
  55. 3 {}
  56. 3 1
  57. 4 {}
  58. 4 1
  59. 5 {}
  60. 5 1
  61. 6 {}
  62. 6 0
  63. See Also
  64. --------
  65. write_multiline_adjlist, read_multiline_adjlist
  66. """
  67. if G.is_directed():
  68. if G.is_multigraph():
  69. for s, nbrs in G.adjacency():
  70. nbr_edges = [
  71. (u, data)
  72. for u, datadict in nbrs.items()
  73. for key, data in datadict.items()
  74. ]
  75. deg = len(nbr_edges)
  76. yield str(s) + delimiter + str(deg)
  77. for u, d in nbr_edges:
  78. if d is None:
  79. yield str(u)
  80. else:
  81. yield str(u) + delimiter + str(d)
  82. else: # directed single edges
  83. for s, nbrs in G.adjacency():
  84. deg = len(nbrs)
  85. yield str(s) + delimiter + str(deg)
  86. for u, d in nbrs.items():
  87. if d is None:
  88. yield str(u)
  89. else:
  90. yield str(u) + delimiter + str(d)
  91. else: # undirected
  92. if G.is_multigraph():
  93. seen = set() # helper dict used to avoid duplicate edges
  94. for s, nbrs in G.adjacency():
  95. nbr_edges = [
  96. (u, data)
  97. for u, datadict in nbrs.items()
  98. if u not in seen
  99. for key, data in datadict.items()
  100. ]
  101. deg = len(nbr_edges)
  102. yield str(s) + delimiter + str(deg)
  103. for u, d in nbr_edges:
  104. if d is None:
  105. yield str(u)
  106. else:
  107. yield str(u) + delimiter + str(d)
  108. seen.add(s)
  109. else: # undirected single edges
  110. seen = set() # helper dict used to avoid duplicate edges
  111. for s, nbrs in G.adjacency():
  112. nbr_edges = [(u, d) for u, d in nbrs.items() if u not in seen]
  113. deg = len(nbr_edges)
  114. yield str(s) + delimiter + str(deg)
  115. for u, d in nbr_edges:
  116. if d is None:
  117. yield str(u)
  118. else:
  119. yield str(u) + delimiter + str(d)
  120. seen.add(s)
  121. @open_file(1, mode="wb")
  122. def write_multiline_adjlist(G, path, delimiter=" ", comments="#", encoding="utf-8"):
  123. """Write the graph G in multiline adjacency list format to path
  124. Parameters
  125. ----------
  126. G : NetworkX graph
  127. path : string or file
  128. Filename or file handle to write to.
  129. Filenames ending in .gz or .bz2 will be compressed.
  130. comments : string, optional
  131. Marker for comment lines
  132. delimiter : string, optional
  133. Separator for node labels
  134. encoding : string, optional
  135. Text encoding.
  136. Examples
  137. --------
  138. >>> G = nx.path_graph(4)
  139. >>> nx.write_multiline_adjlist(G, "test.adjlist")
  140. The path can be a file handle or a string with the name of the file. If a
  141. file handle is provided, it has to be opened in 'wb' mode.
  142. >>> fh = open("test.adjlist", "wb")
  143. >>> nx.write_multiline_adjlist(G, fh)
  144. Filenames ending in .gz or .bz2 will be compressed.
  145. >>> nx.write_multiline_adjlist(G, "test.adjlist.gz")
  146. See Also
  147. --------
  148. read_multiline_adjlist
  149. """
  150. import sys
  151. import time
  152. pargs = comments + " ".join(sys.argv)
  153. header = (
  154. f"{pargs}\n"
  155. + comments
  156. + f" GMT {time.asctime(time.gmtime())}\n"
  157. + comments
  158. + f" {G.name}\n"
  159. )
  160. path.write(header.encode(encoding))
  161. for multiline in generate_multiline_adjlist(G, delimiter):
  162. multiline += "\n"
  163. path.write(multiline.encode(encoding))
  164. def parse_multiline_adjlist(
  165. lines, comments="#", delimiter=None, create_using=None, nodetype=None, edgetype=None
  166. ):
  167. """Parse lines of a multiline adjacency list representation of a graph.
  168. Parameters
  169. ----------
  170. lines : list or iterator of strings
  171. Input data in multiline adjlist format
  172. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  173. Graph type to create. If graph instance, then cleared before populated.
  174. nodetype : Python type, optional
  175. Convert nodes to this type.
  176. edgetype : Python type, optional
  177. Convert edges to this type.
  178. comments : string, optional
  179. Marker for comment lines
  180. delimiter : string, optional
  181. Separator for node labels. The default is whitespace.
  182. Returns
  183. -------
  184. G: NetworkX graph
  185. The graph corresponding to the lines in multiline adjacency list format.
  186. Examples
  187. --------
  188. >>> lines = [
  189. ... "1 2",
  190. ... "2 {'weight':3, 'name': 'Frodo'}",
  191. ... "3 {}",
  192. ... "2 1",
  193. ... "5 {'weight':6, 'name': 'Saruman'}",
  194. ... ]
  195. >>> G = nx.parse_multiline_adjlist(iter(lines), nodetype=int)
  196. >>> list(G)
  197. [1, 2, 3, 5]
  198. """
  199. from ast import literal_eval
  200. G = nx.empty_graph(0, create_using)
  201. for line in lines:
  202. p = line.find(comments)
  203. if p >= 0:
  204. line = line[:p]
  205. if not line:
  206. continue
  207. try:
  208. (u, deg) = line.strip().split(delimiter)
  209. deg = int(deg)
  210. except BaseException as err:
  211. raise TypeError(f"Failed to read node and degree on line ({line})") from err
  212. if nodetype is not None:
  213. try:
  214. u = nodetype(u)
  215. except BaseException as err:
  216. raise TypeError(
  217. f"Failed to convert node ({u}) to " f"type {nodetype}"
  218. ) from err
  219. G.add_node(u)
  220. for i in range(deg):
  221. while True:
  222. try:
  223. line = next(lines)
  224. except StopIteration as err:
  225. msg = f"Failed to find neighbor for node ({u})"
  226. raise TypeError(msg) from err
  227. p = line.find(comments)
  228. if p >= 0:
  229. line = line[:p]
  230. if line:
  231. break
  232. vlist = line.strip().split(delimiter)
  233. numb = len(vlist)
  234. if numb < 1:
  235. continue # isolated node
  236. v = vlist.pop(0)
  237. data = "".join(vlist)
  238. if nodetype is not None:
  239. try:
  240. v = nodetype(v)
  241. except BaseException as err:
  242. raise TypeError(
  243. f"Failed to convert node ({v}) " f"to type {nodetype}"
  244. ) from err
  245. if edgetype is not None:
  246. try:
  247. edgedata = {"weight": edgetype(data)}
  248. except BaseException as err:
  249. raise TypeError(
  250. f"Failed to convert edge data ({data}) " f"to type {edgetype}"
  251. ) from err
  252. else:
  253. try: # try to evaluate
  254. edgedata = literal_eval(data)
  255. except:
  256. edgedata = {}
  257. G.add_edge(u, v, **edgedata)
  258. return G
  259. @open_file(0, mode="rb")
  260. def read_multiline_adjlist(
  261. path,
  262. comments="#",
  263. delimiter=None,
  264. create_using=None,
  265. nodetype=None,
  266. edgetype=None,
  267. encoding="utf-8",
  268. ):
  269. """Read graph in multi-line adjacency list format from path.
  270. Parameters
  271. ----------
  272. path : string or file
  273. Filename or file handle to read.
  274. Filenames ending in .gz or .bz2 will be uncompressed.
  275. create_using : NetworkX graph constructor, optional (default=nx.Graph)
  276. Graph type to create. If graph instance, then cleared before populated.
  277. nodetype : Python type, optional
  278. Convert nodes to this type.
  279. edgetype : Python type, optional
  280. Convert edge data to this type.
  281. comments : string, optional
  282. Marker for comment lines
  283. delimiter : string, optional
  284. Separator for node labels. The default is whitespace.
  285. Returns
  286. -------
  287. G: NetworkX graph
  288. Examples
  289. --------
  290. >>> G = nx.path_graph(4)
  291. >>> nx.write_multiline_adjlist(G, "test.adjlist")
  292. >>> G = nx.read_multiline_adjlist("test.adjlist")
  293. The path can be a file or a string with the name of the file. If a
  294. file s provided, it has to be opened in 'rb' mode.
  295. >>> fh = open("test.adjlist", "rb")
  296. >>> G = nx.read_multiline_adjlist(fh)
  297. Filenames ending in .gz or .bz2 will be compressed.
  298. >>> nx.write_multiline_adjlist(G, "test.adjlist.gz")
  299. >>> G = nx.read_multiline_adjlist("test.adjlist.gz")
  300. The optional nodetype is a function to convert node strings to nodetype.
  301. For example
  302. >>> G = nx.read_multiline_adjlist("test.adjlist", nodetype=int)
  303. will attempt to convert all nodes to integer type.
  304. The optional edgetype is a function to convert edge data strings to
  305. edgetype.
  306. >>> G = nx.read_multiline_adjlist("test.adjlist")
  307. The optional create_using parameter is a NetworkX graph container.
  308. The default is Graph(), an undirected graph. To read the data as
  309. a directed graph use
  310. >>> G = nx.read_multiline_adjlist("test.adjlist", create_using=nx.DiGraph)
  311. Notes
  312. -----
  313. This format does not store graph, node, or edge data.
  314. See Also
  315. --------
  316. write_multiline_adjlist
  317. """
  318. lines = (line.decode(encoding) for line in path)
  319. return parse_multiline_adjlist(
  320. lines,
  321. comments=comments,
  322. delimiter=delimiter,
  323. create_using=create_using,
  324. nodetype=nodetype,
  325. edgetype=edgetype,
  326. )