sparse6.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. # Original author: D. Eppstein, UC Irvine, August 12, 2003.
  2. # The original code at https://www.ics.uci.edu/~eppstein/PADS/ is public domain.
  3. """Functions for reading and writing graphs in the *sparse6* format.
  4. The *sparse6* file format is a space-efficient format for large sparse
  5. graphs. For small graphs or large dense graphs, use the *graph6* file
  6. format.
  7. For more information, see the `sparse6`_ homepage.
  8. .. _sparse6: https://users.cecs.anu.edu.au/~bdm/data/formats.html
  9. """
  10. import networkx as nx
  11. from networkx.exception import NetworkXError
  12. from networkx.readwrite.graph6 import data_to_n, n_to_data
  13. from networkx.utils import not_implemented_for, open_file
  14. __all__ = ["from_sparse6_bytes", "read_sparse6", "to_sparse6_bytes", "write_sparse6"]
  15. def _generate_sparse6_bytes(G, nodes, header):
  16. """Yield bytes in the sparse6 encoding of a graph.
  17. `G` is an undirected simple graph. `nodes` is the list of nodes for
  18. which the node-induced subgraph will be encoded; if `nodes` is the
  19. list of all nodes in the graph, the entire graph will be
  20. encoded. `header` is a Boolean that specifies whether to generate
  21. the header ``b'>>sparse6<<'`` before the remaining data.
  22. This function generates `bytes` objects in the following order:
  23. 1. the header (if requested),
  24. 2. the encoding of the number of nodes,
  25. 3. each character, one-at-a-time, in the encoding of the requested
  26. node-induced subgraph,
  27. 4. a newline character.
  28. This function raises :exc:`ValueError` if the graph is too large for
  29. the graph6 format (that is, greater than ``2 ** 36`` nodes).
  30. """
  31. n = len(G)
  32. if n >= 2**36:
  33. raise ValueError(
  34. "sparse6 is only defined if number of nodes is less " "than 2 ** 36"
  35. )
  36. if header:
  37. yield b">>sparse6<<"
  38. yield b":"
  39. for d in n_to_data(n):
  40. yield str.encode(chr(d + 63))
  41. k = 1
  42. while 1 << k < n:
  43. k += 1
  44. def enc(x):
  45. """Big endian k-bit encoding of x"""
  46. return [1 if (x & 1 << (k - 1 - i)) else 0 for i in range(k)]
  47. edges = sorted((max(u, v), min(u, v)) for u, v in G.edges())
  48. bits = []
  49. curv = 0
  50. for v, u in edges:
  51. if v == curv: # current vertex edge
  52. bits.append(0)
  53. bits.extend(enc(u))
  54. elif v == curv + 1: # next vertex edge
  55. curv += 1
  56. bits.append(1)
  57. bits.extend(enc(u))
  58. else: # skip to vertex v and then add edge to u
  59. curv = v
  60. bits.append(1)
  61. bits.extend(enc(v))
  62. bits.append(0)
  63. bits.extend(enc(u))
  64. if k < 6 and n == (1 << k) and ((-len(bits)) % 6) >= k and curv < (n - 1):
  65. # Padding special case: small k, n=2^k,
  66. # more than k bits of padding needed,
  67. # current vertex is not (n-1) --
  68. # appending 1111... would add a loop on (n-1)
  69. bits.append(0)
  70. bits.extend([1] * ((-len(bits)) % 6))
  71. else:
  72. bits.extend([1] * ((-len(bits)) % 6))
  73. data = [
  74. (bits[i + 0] << 5)
  75. + (bits[i + 1] << 4)
  76. + (bits[i + 2] << 3)
  77. + (bits[i + 3] << 2)
  78. + (bits[i + 4] << 1)
  79. + (bits[i + 5] << 0)
  80. for i in range(0, len(bits), 6)
  81. ]
  82. for d in data:
  83. yield str.encode(chr(d + 63))
  84. yield b"\n"
  85. def from_sparse6_bytes(string):
  86. """Read an undirected graph in sparse6 format from string.
  87. Parameters
  88. ----------
  89. string : string
  90. Data in sparse6 format
  91. Returns
  92. -------
  93. G : Graph
  94. Raises
  95. ------
  96. NetworkXError
  97. If the string is unable to be parsed in sparse6 format
  98. Examples
  99. --------
  100. >>> G = nx.from_sparse6_bytes(b":A_")
  101. >>> sorted(G.edges())
  102. [(0, 1), (0, 1), (0, 1)]
  103. See Also
  104. --------
  105. read_sparse6, write_sparse6
  106. References
  107. ----------
  108. .. [1] Sparse6 specification
  109. <https://users.cecs.anu.edu.au/~bdm/data/formats.html>
  110. """
  111. if string.startswith(b">>sparse6<<"):
  112. string = string[11:]
  113. if not string.startswith(b":"):
  114. raise NetworkXError("Expected leading colon in sparse6")
  115. chars = [c - 63 for c in string[1:]]
  116. n, data = data_to_n(chars)
  117. k = 1
  118. while 1 << k < n:
  119. k += 1
  120. def parseData():
  121. """Returns stream of pairs b[i], x[i] for sparse6 format."""
  122. chunks = iter(data)
  123. d = None # partial data word
  124. dLen = 0 # how many unparsed bits are left in d
  125. while 1:
  126. if dLen < 1:
  127. try:
  128. d = next(chunks)
  129. except StopIteration:
  130. return
  131. dLen = 6
  132. dLen -= 1
  133. b = (d >> dLen) & 1 # grab top remaining bit
  134. x = d & ((1 << dLen) - 1) # partially built up value of x
  135. xLen = dLen # how many bits included so far in x
  136. while xLen < k: # now grab full chunks until we have enough
  137. try:
  138. d = next(chunks)
  139. except StopIteration:
  140. return
  141. dLen = 6
  142. x = (x << 6) + d
  143. xLen += 6
  144. x = x >> (xLen - k) # shift back the extra bits
  145. dLen = xLen - k
  146. yield b, x
  147. v = 0
  148. G = nx.MultiGraph()
  149. G.add_nodes_from(range(n))
  150. multigraph = False
  151. for b, x in parseData():
  152. if b == 1:
  153. v += 1
  154. # padding with ones can cause overlarge number here
  155. if x >= n or v >= n:
  156. break
  157. elif x > v:
  158. v = x
  159. else:
  160. if G.has_edge(x, v):
  161. multigraph = True
  162. G.add_edge(x, v)
  163. if not multigraph:
  164. G = nx.Graph(G)
  165. return G
  166. def to_sparse6_bytes(G, nodes=None, header=True):
  167. """Convert an undirected graph to bytes in sparse6 format.
  168. Parameters
  169. ----------
  170. G : Graph (undirected)
  171. nodes: list or iterable
  172. Nodes are labeled 0...n-1 in the order provided. If None the ordering
  173. given by ``G.nodes()`` is used.
  174. header: bool
  175. If True add '>>sparse6<<' bytes to head of data.
  176. Raises
  177. ------
  178. NetworkXNotImplemented
  179. If the graph is directed.
  180. ValueError
  181. If the graph has at least ``2 ** 36`` nodes; the sparse6 format
  182. is only defined for graphs of order less than ``2 ** 36``.
  183. Examples
  184. --------
  185. >>> nx.to_sparse6_bytes(nx.path_graph(2))
  186. b'>>sparse6<<:An\\n'
  187. See Also
  188. --------
  189. to_sparse6_bytes, read_sparse6, write_sparse6_bytes
  190. Notes
  191. -----
  192. The returned bytes end with a newline character.
  193. The format does not support edge or node labels.
  194. References
  195. ----------
  196. .. [1] Graph6 specification
  197. <https://users.cecs.anu.edu.au/~bdm/data/formats.html>
  198. """
  199. if nodes is not None:
  200. G = G.subgraph(nodes)
  201. G = nx.convert_node_labels_to_integers(G, ordering="sorted")
  202. return b"".join(_generate_sparse6_bytes(G, nodes, header))
  203. @open_file(0, mode="rb")
  204. def read_sparse6(path):
  205. """Read an undirected graph in sparse6 format from path.
  206. Parameters
  207. ----------
  208. path : file or string
  209. File or filename to write.
  210. Returns
  211. -------
  212. G : Graph/Multigraph or list of Graphs/MultiGraphs
  213. If the file contains multiple lines then a list of graphs is returned
  214. Raises
  215. ------
  216. NetworkXError
  217. If the string is unable to be parsed in sparse6 format
  218. Examples
  219. --------
  220. You can read a sparse6 file by giving the path to the file::
  221. >>> import tempfile
  222. >>> with tempfile.NamedTemporaryFile(delete=False) as f:
  223. ... _ = f.write(b">>sparse6<<:An\\n")
  224. ... _ = f.seek(0)
  225. ... G = nx.read_sparse6(f.name)
  226. >>> list(G.edges())
  227. [(0, 1)]
  228. You can also read a sparse6 file by giving an open file-like object::
  229. >>> import tempfile
  230. >>> with tempfile.NamedTemporaryFile() as f:
  231. ... _ = f.write(b">>sparse6<<:An\\n")
  232. ... _ = f.seek(0)
  233. ... G = nx.read_sparse6(f)
  234. >>> list(G.edges())
  235. [(0, 1)]
  236. See Also
  237. --------
  238. read_sparse6, from_sparse6_bytes
  239. References
  240. ----------
  241. .. [1] Sparse6 specification
  242. <https://users.cecs.anu.edu.au/~bdm/data/formats.html>
  243. """
  244. glist = []
  245. for line in path:
  246. line = line.strip()
  247. if not len(line):
  248. continue
  249. glist.append(from_sparse6_bytes(line))
  250. if len(glist) == 1:
  251. return glist[0]
  252. else:
  253. return glist
  254. @not_implemented_for("directed")
  255. @open_file(1, mode="wb")
  256. def write_sparse6(G, path, nodes=None, header=True):
  257. """Write graph G to given path in sparse6 format.
  258. Parameters
  259. ----------
  260. G : Graph (undirected)
  261. path : file or string
  262. File or filename to write
  263. nodes: list or iterable
  264. Nodes are labeled 0...n-1 in the order provided. If None the ordering
  265. given by G.nodes() is used.
  266. header: bool
  267. If True add '>>sparse6<<' string to head of data
  268. Raises
  269. ------
  270. NetworkXError
  271. If the graph is directed
  272. Examples
  273. --------
  274. You can write a sparse6 file by giving the path to the file::
  275. >>> import tempfile
  276. >>> with tempfile.NamedTemporaryFile(delete=False) as f:
  277. ... nx.write_sparse6(nx.path_graph(2), f.name)
  278. ... print(f.read())
  279. b'>>sparse6<<:An\\n'
  280. You can also write a sparse6 file by giving an open file-like object::
  281. >>> with tempfile.NamedTemporaryFile() as f:
  282. ... nx.write_sparse6(nx.path_graph(2), f)
  283. ... _ = f.seek(0)
  284. ... print(f.read())
  285. b'>>sparse6<<:An\\n'
  286. See Also
  287. --------
  288. read_sparse6, from_sparse6_bytes
  289. Notes
  290. -----
  291. The format does not support edge or node labels.
  292. References
  293. ----------
  294. .. [1] Sparse6 specification
  295. <https://users.cecs.anu.edu.au/~bdm/data/formats.html>
  296. """
  297. if nodes is not None:
  298. G = G.subgraph(nodes)
  299. G = nx.convert_node_labels_to_integers(G, ordering="sorted")
  300. for b in _generate_sparse6_bytes(G, nodes, header):
  301. path.write(b)