edgelist.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. """
  2. ********************
  3. Bipartite Edge Lists
  4. ********************
  5. Read and write NetworkX graphs as bipartite edge lists.
  6. Format
  7. ------
  8. You can read or write three formats of edge lists with these functions.
  9. Node pairs with no data::
  10. 1 2
  11. Python dictionary as data::
  12. 1 2 {'weight':7, 'color':'green'}
  13. Arbitrary data::
  14. 1 2 7 green
  15. For each edge (u, v) the node u is assigned to part 0 and the node v to part 1.
  16. """
  17. __all__ = ["generate_edgelist", "write_edgelist", "parse_edgelist", "read_edgelist"]
  18. import networkx as nx
  19. from networkx.utils import not_implemented_for, open_file
  20. @open_file(1, mode="wb")
  21. def write_edgelist(G, path, comments="#", delimiter=" ", data=True, encoding="utf-8"):
  22. """Write a bipartite graph as a list of edges.
  23. Parameters
  24. ----------
  25. G : Graph
  26. A NetworkX bipartite graph
  27. path : file or string
  28. File or filename to write. If a file is provided, it must be
  29. opened in 'wb' mode. Filenames ending in .gz or .bz2 will be compressed.
  30. comments : string, optional
  31. The character used to indicate the start of a comment
  32. delimiter : string, optional
  33. The string used to separate values. The default is whitespace.
  34. data : bool or list, optional
  35. If False write no edge data.
  36. If True write a string representation of the edge data dictionary..
  37. If a list (or other iterable) is provided, write the keys specified
  38. in the list.
  39. encoding: string, optional
  40. Specify which encoding to use when writing file.
  41. Examples
  42. --------
  43. >>> G = nx.path_graph(4)
  44. >>> G.add_nodes_from([0, 2], bipartite=0)
  45. >>> G.add_nodes_from([1, 3], bipartite=1)
  46. >>> nx.write_edgelist(G, "test.edgelist")
  47. >>> fh = open("test.edgelist", "wb")
  48. >>> nx.write_edgelist(G, fh)
  49. >>> nx.write_edgelist(G, "test.edgelist.gz")
  50. >>> nx.write_edgelist(G, "test.edgelist.gz", data=False)
  51. >>> G = nx.Graph()
  52. >>> G.add_edge(1, 2, weight=7, color="red")
  53. >>> nx.write_edgelist(G, "test.edgelist", data=False)
  54. >>> nx.write_edgelist(G, "test.edgelist", data=["color"])
  55. >>> nx.write_edgelist(G, "test.edgelist", data=["color", "weight"])
  56. See Also
  57. --------
  58. write_edgelist
  59. generate_edgelist
  60. """
  61. for line in generate_edgelist(G, delimiter, data):
  62. line += "\n"
  63. path.write(line.encode(encoding))
  64. @not_implemented_for("directed")
  65. def generate_edgelist(G, delimiter=" ", data=True):
  66. """Generate a single line of the bipartite graph G in edge list format.
  67. Parameters
  68. ----------
  69. G : NetworkX graph
  70. The graph is assumed to have node attribute `part` set to 0,1 representing
  71. the two graph parts
  72. delimiter : string, optional
  73. Separator for node labels
  74. data : bool or list of keys
  75. If False generate no edge data. If True use a dictionary
  76. representation of edge data. If a list of keys use a list of data
  77. values corresponding to the keys.
  78. Returns
  79. -------
  80. lines : string
  81. Lines of data in adjlist format.
  82. Examples
  83. --------
  84. >>> from networkx.algorithms import bipartite
  85. >>> G = nx.path_graph(4)
  86. >>> G.add_nodes_from([0, 2], bipartite=0)
  87. >>> G.add_nodes_from([1, 3], bipartite=1)
  88. >>> G[1][2]["weight"] = 3
  89. >>> G[2][3]["capacity"] = 12
  90. >>> for line in bipartite.generate_edgelist(G, data=False):
  91. ... print(line)
  92. 0 1
  93. 2 1
  94. 2 3
  95. >>> for line in bipartite.generate_edgelist(G):
  96. ... print(line)
  97. 0 1 {}
  98. 2 1 {'weight': 3}
  99. 2 3 {'capacity': 12}
  100. >>> for line in bipartite.generate_edgelist(G, data=["weight"]):
  101. ... print(line)
  102. 0 1
  103. 2 1 3
  104. 2 3
  105. """
  106. try:
  107. part0 = [n for n, d in G.nodes.items() if d["bipartite"] == 0]
  108. except BaseException as err:
  109. raise AttributeError("Missing node attribute `bipartite`") from err
  110. if data is True or data is False:
  111. for n in part0:
  112. for edge in G.edges(n, data=data):
  113. yield delimiter.join(map(str, edge))
  114. else:
  115. for n in part0:
  116. for u, v, d in G.edges(n, data=True):
  117. edge = [u, v]
  118. try:
  119. edge.extend(d[k] for k in data)
  120. except KeyError:
  121. pass # missing data for this edge, should warn?
  122. yield delimiter.join(map(str, edge))
  123. def parse_edgelist(
  124. lines, comments="#", delimiter=None, create_using=None, nodetype=None, data=True
  125. ):
  126. """Parse lines of an edge list representation of a bipartite graph.
  127. Parameters
  128. ----------
  129. lines : list or iterator of strings
  130. Input data in edgelist format
  131. comments : string, optional
  132. Marker for comment lines
  133. delimiter : string, optional
  134. Separator for node labels
  135. create_using: NetworkX graph container, optional
  136. Use given NetworkX graph for holding nodes or edges.
  137. nodetype : Python type, optional
  138. Convert nodes to this type.
  139. data : bool or list of (label,type) tuples
  140. If False generate no edge data or if True use a dictionary
  141. representation of edge data or a list tuples specifying dictionary
  142. key names and types for edge data.
  143. Returns
  144. -------
  145. G: NetworkX Graph
  146. The bipartite graph corresponding to lines
  147. Examples
  148. --------
  149. Edgelist with no data:
  150. >>> from networkx.algorithms import bipartite
  151. >>> lines = ["1 2", "2 3", "3 4"]
  152. >>> G = bipartite.parse_edgelist(lines, nodetype=int)
  153. >>> sorted(G.nodes())
  154. [1, 2, 3, 4]
  155. >>> sorted(G.nodes(data=True))
  156. [(1, {'bipartite': 0}), (2, {'bipartite': 0}), (3, {'bipartite': 0}), (4, {'bipartite': 1})]
  157. >>> sorted(G.edges())
  158. [(1, 2), (2, 3), (3, 4)]
  159. Edgelist with data in Python dictionary representation:
  160. >>> lines = ["1 2 {'weight':3}", "2 3 {'weight':27}", "3 4 {'weight':3.0}"]
  161. >>> G = bipartite.parse_edgelist(lines, nodetype=int)
  162. >>> sorted(G.nodes())
  163. [1, 2, 3, 4]
  164. >>> sorted(G.edges(data=True))
  165. [(1, 2, {'weight': 3}), (2, 3, {'weight': 27}), (3, 4, {'weight': 3.0})]
  166. Edgelist with data in a list:
  167. >>> lines = ["1 2 3", "2 3 27", "3 4 3.0"]
  168. >>> G = bipartite.parse_edgelist(lines, nodetype=int, data=(("weight", float),))
  169. >>> sorted(G.nodes())
  170. [1, 2, 3, 4]
  171. >>> sorted(G.edges(data=True))
  172. [(1, 2, {'weight': 3.0}), (2, 3, {'weight': 27.0}), (3, 4, {'weight': 3.0})]
  173. See Also
  174. --------
  175. """
  176. from ast import literal_eval
  177. G = nx.empty_graph(0, create_using)
  178. for line in lines:
  179. p = line.find(comments)
  180. if p >= 0:
  181. line = line[:p]
  182. if not len(line):
  183. continue
  184. # split line, should have 2 or more
  185. s = line.strip().split(delimiter)
  186. if len(s) < 2:
  187. continue
  188. u = s.pop(0)
  189. v = s.pop(0)
  190. d = s
  191. if nodetype is not None:
  192. try:
  193. u = nodetype(u)
  194. v = nodetype(v)
  195. except BaseException as err:
  196. raise TypeError(
  197. f"Failed to convert nodes {u},{v} " f"to type {nodetype}."
  198. ) from err
  199. if len(d) == 0 or data is False:
  200. # no data or data type specified
  201. edgedata = {}
  202. elif data is True:
  203. # no edge types specified
  204. try: # try to evaluate as dictionary
  205. edgedata = dict(literal_eval(" ".join(d)))
  206. except BaseException as err:
  207. raise TypeError(
  208. f"Failed to convert edge data ({d})" f"to dictionary."
  209. ) from err
  210. else:
  211. # convert edge data to dictionary with specified keys and type
  212. if len(d) != len(data):
  213. raise IndexError(
  214. f"Edge data {d} and data_keys {data} are not the same length"
  215. )
  216. edgedata = {}
  217. for (edge_key, edge_type), edge_value in zip(data, d):
  218. try:
  219. edge_value = edge_type(edge_value)
  220. except BaseException as err:
  221. raise TypeError(
  222. f"Failed to convert {edge_key} data "
  223. f"{edge_value} to type {edge_type}."
  224. ) from err
  225. edgedata.update({edge_key: edge_value})
  226. G.add_node(u, bipartite=0)
  227. G.add_node(v, bipartite=1)
  228. G.add_edge(u, v, **edgedata)
  229. return G
  230. @open_file(0, mode="rb")
  231. def read_edgelist(
  232. path,
  233. comments="#",
  234. delimiter=None,
  235. create_using=None,
  236. nodetype=None,
  237. data=True,
  238. edgetype=None,
  239. encoding="utf-8",
  240. ):
  241. """Read a bipartite graph from a list of edges.
  242. Parameters
  243. ----------
  244. path : file or string
  245. File or filename to read. If a file is provided, it must be
  246. opened in 'rb' mode.
  247. Filenames ending in .gz or .bz2 will be uncompressed.
  248. comments : string, optional
  249. The character used to indicate the start of a comment.
  250. delimiter : string, optional
  251. The string used to separate values. The default is whitespace.
  252. create_using : Graph container, optional,
  253. Use specified container to build graph. The default is networkx.Graph,
  254. an undirected graph.
  255. nodetype : int, float, str, Python type, optional
  256. Convert node data from strings to specified type
  257. data : bool or list of (label,type) tuples
  258. Tuples specifying dictionary key names and types for edge data
  259. edgetype : int, float, str, Python type, optional OBSOLETE
  260. Convert edge data from strings to specified type and use as 'weight'
  261. encoding: string, optional
  262. Specify which encoding to use when reading file.
  263. Returns
  264. -------
  265. G : graph
  266. A networkx Graph or other type specified with create_using
  267. Examples
  268. --------
  269. >>> from networkx.algorithms import bipartite
  270. >>> G = nx.path_graph(4)
  271. >>> G.add_nodes_from([0, 2], bipartite=0)
  272. >>> G.add_nodes_from([1, 3], bipartite=1)
  273. >>> bipartite.write_edgelist(G, "test.edgelist")
  274. >>> G = bipartite.read_edgelist("test.edgelist")
  275. >>> fh = open("test.edgelist", "rb")
  276. >>> G = bipartite.read_edgelist(fh)
  277. >>> fh.close()
  278. >>> G = bipartite.read_edgelist("test.edgelist", nodetype=int)
  279. Edgelist with data in a list:
  280. >>> textline = "1 2 3"
  281. >>> fh = open("test.edgelist", "w")
  282. >>> d = fh.write(textline)
  283. >>> fh.close()
  284. >>> G = bipartite.read_edgelist(
  285. ... "test.edgelist", nodetype=int, data=(("weight", float),)
  286. ... )
  287. >>> list(G)
  288. [1, 2]
  289. >>> list(G.edges(data=True))
  290. [(1, 2, {'weight': 3.0})]
  291. See parse_edgelist() for more examples of formatting.
  292. See Also
  293. --------
  294. parse_edgelist
  295. Notes
  296. -----
  297. Since nodes must be hashable, the function nodetype must return hashable
  298. types (e.g. int, float, str, frozenset - or tuples of those, etc.)
  299. """
  300. lines = (line.decode(encoding) for line in path)
  301. return parse_edgelist(
  302. lines,
  303. comments=comments,
  304. delimiter=delimiter,
  305. create_using=create_using,
  306. nodetype=nodetype,
  307. data=data,
  308. )