pajek.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. """
  2. *****
  3. Pajek
  4. *****
  5. Read graphs in Pajek format.
  6. This implementation handles directed and undirected graphs including
  7. those with self loops and parallel edges.
  8. Format
  9. ------
  10. See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
  11. for format information.
  12. """
  13. import warnings
  14. import networkx as nx
  15. from networkx.utils import open_file
  16. __all__ = ["read_pajek", "parse_pajek", "generate_pajek", "write_pajek"]
  17. def generate_pajek(G):
  18. """Generate lines in Pajek graph format.
  19. Parameters
  20. ----------
  21. G : graph
  22. A Networkx graph
  23. References
  24. ----------
  25. See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
  26. for format information.
  27. """
  28. if G.name == "":
  29. name = "NetworkX"
  30. else:
  31. name = G.name
  32. # Apparently many Pajek format readers can't process this line
  33. # So we'll leave it out for now.
  34. # yield '*network %s'%name
  35. # write nodes with attributes
  36. yield f"*vertices {G.order()}"
  37. nodes = list(G)
  38. # make dictionary mapping nodes to integers
  39. nodenumber = dict(zip(nodes, range(1, len(nodes) + 1)))
  40. for n in nodes:
  41. # copy node attributes and pop mandatory attributes
  42. # to avoid duplication.
  43. na = G.nodes.get(n, {}).copy()
  44. x = na.pop("x", 0.0)
  45. y = na.pop("y", 0.0)
  46. try:
  47. id = int(na.pop("id", nodenumber[n]))
  48. except ValueError as err:
  49. err.args += (
  50. (
  51. "Pajek format requires 'id' to be an int()."
  52. " Refer to the 'Relabeling nodes' section."
  53. ),
  54. )
  55. raise
  56. nodenumber[n] = id
  57. shape = na.pop("shape", "ellipse")
  58. s = " ".join(map(make_qstr, (id, n, x, y, shape)))
  59. # only optional attributes are left in na.
  60. for k, v in na.items():
  61. if isinstance(v, str) and v.strip() != "":
  62. s += f" {make_qstr(k)} {make_qstr(v)}"
  63. else:
  64. warnings.warn(
  65. f"Node attribute {k} is not processed. {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}."
  66. )
  67. yield s
  68. # write edges with attributes
  69. if G.is_directed():
  70. yield "*arcs"
  71. else:
  72. yield "*edges"
  73. for u, v, edgedata in G.edges(data=True):
  74. d = edgedata.copy()
  75. value = d.pop("weight", 1.0) # use 1 as default edge value
  76. s = " ".join(map(make_qstr, (nodenumber[u], nodenumber[v], value)))
  77. for k, v in d.items():
  78. if isinstance(v, str) and v.strip() != "":
  79. s += f" {make_qstr(k)} {make_qstr(v)}"
  80. else:
  81. warnings.warn(
  82. f"Edge attribute {k} is not processed. {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}."
  83. )
  84. yield s
  85. @open_file(1, mode="wb")
  86. def write_pajek(G, path, encoding="UTF-8"):
  87. """Write graph in Pajek format to path.
  88. Parameters
  89. ----------
  90. G : graph
  91. A Networkx graph
  92. path : file or string
  93. File or filename to write.
  94. Filenames ending in .gz or .bz2 will be compressed.
  95. Examples
  96. --------
  97. >>> G = nx.path_graph(4)
  98. >>> nx.write_pajek(G, "test.net")
  99. Warnings
  100. --------
  101. Optional node attributes and edge attributes must be non-empty strings.
  102. Otherwise it will not be written into the file. You will need to
  103. convert those attributes to strings if you want to keep them.
  104. References
  105. ----------
  106. See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
  107. for format information.
  108. """
  109. for line in generate_pajek(G):
  110. line += "\n"
  111. path.write(line.encode(encoding))
  112. @open_file(0, mode="rb")
  113. def read_pajek(path, encoding="UTF-8"):
  114. """Read graph in Pajek format from path.
  115. Parameters
  116. ----------
  117. path : file or string
  118. File or filename to write.
  119. Filenames ending in .gz or .bz2 will be uncompressed.
  120. Returns
  121. -------
  122. G : NetworkX MultiGraph or MultiDiGraph.
  123. Examples
  124. --------
  125. >>> G = nx.path_graph(4)
  126. >>> nx.write_pajek(G, "test.net")
  127. >>> G = nx.read_pajek("test.net")
  128. To create a Graph instead of a MultiGraph use
  129. >>> G1 = nx.Graph(G)
  130. References
  131. ----------
  132. See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
  133. for format information.
  134. """
  135. lines = (line.decode(encoding) for line in path)
  136. return parse_pajek(lines)
  137. def parse_pajek(lines):
  138. """Parse Pajek format graph from string or iterable.
  139. Parameters
  140. ----------
  141. lines : string or iterable
  142. Data in Pajek format.
  143. Returns
  144. -------
  145. G : NetworkX graph
  146. See Also
  147. --------
  148. read_pajek
  149. """
  150. import shlex
  151. # multigraph=False
  152. if isinstance(lines, str):
  153. lines = iter(lines.split("\n"))
  154. lines = iter([line.rstrip("\n") for line in lines])
  155. G = nx.MultiDiGraph() # are multiedges allowed in Pajek? assume yes
  156. labels = [] # in the order of the file, needed for matrix
  157. while lines:
  158. try:
  159. l = next(lines)
  160. except: # EOF
  161. break
  162. if l.lower().startswith("*network"):
  163. try:
  164. label, name = l.split(None, 1)
  165. except ValueError:
  166. # Line was not of the form: *network NAME
  167. pass
  168. else:
  169. G.graph["name"] = name
  170. elif l.lower().startswith("*vertices"):
  171. nodelabels = {}
  172. l, nnodes = l.split()
  173. for i in range(int(nnodes)):
  174. l = next(lines)
  175. try:
  176. splitline = [
  177. x.decode("utf-8") for x in shlex.split(str(l).encode("utf-8"))
  178. ]
  179. except AttributeError:
  180. splitline = shlex.split(str(l))
  181. id, label = splitline[0:2]
  182. labels.append(label)
  183. G.add_node(label)
  184. nodelabels[id] = label
  185. G.nodes[label]["id"] = id
  186. try:
  187. x, y, shape = splitline[2:5]
  188. G.nodes[label].update(
  189. {"x": float(x), "y": float(y), "shape": shape}
  190. )
  191. except:
  192. pass
  193. extra_attr = zip(splitline[5::2], splitline[6::2])
  194. G.nodes[label].update(extra_attr)
  195. elif l.lower().startswith("*edges") or l.lower().startswith("*arcs"):
  196. if l.lower().startswith("*edge"):
  197. # switch from multidigraph to multigraph
  198. G = nx.MultiGraph(G)
  199. if l.lower().startswith("*arcs"):
  200. # switch to directed with multiple arcs for each existing edge
  201. G = G.to_directed()
  202. for l in lines:
  203. try:
  204. splitline = [
  205. x.decode("utf-8") for x in shlex.split(str(l).encode("utf-8"))
  206. ]
  207. except AttributeError:
  208. splitline = shlex.split(str(l))
  209. if len(splitline) < 2:
  210. continue
  211. ui, vi = splitline[0:2]
  212. u = nodelabels.get(ui, ui)
  213. v = nodelabels.get(vi, vi)
  214. # parse the data attached to this edge and put in a dictionary
  215. edge_data = {}
  216. try:
  217. # there should always be a single value on the edge?
  218. w = splitline[2:3]
  219. edge_data.update({"weight": float(w[0])})
  220. except:
  221. pass
  222. # if there isn't, just assign a 1
  223. # edge_data.update({'value':1})
  224. extra_attr = zip(splitline[3::2], splitline[4::2])
  225. edge_data.update(extra_attr)
  226. # if G.has_edge(u,v):
  227. # multigraph=True
  228. G.add_edge(u, v, **edge_data)
  229. elif l.lower().startswith("*matrix"):
  230. G = nx.DiGraph(G)
  231. adj_list = (
  232. (labels[row], labels[col], {"weight": int(data)})
  233. for (row, line) in enumerate(lines)
  234. for (col, data) in enumerate(line.split())
  235. if int(data) != 0
  236. )
  237. G.add_edges_from(adj_list)
  238. return G
  239. def make_qstr(t):
  240. """Returns the string representation of t.
  241. Add outer double-quotes if the string has a space.
  242. """
  243. if not isinstance(t, str):
  244. t = str(t)
  245. if " " in t:
  246. t = f'"{t}"'
  247. return t