gml.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858
  1. """
  2. Read graphs in GML format.
  3. "GML, the Graph Modelling Language, is our proposal for a portable
  4. file format for graphs. GML's key features are portability, simple
  5. syntax, extensibility and flexibility. A GML file consists of a
  6. hierarchical key-value lists. Graphs can be annotated with arbitrary
  7. data structures. The idea for a common file format was born at the
  8. GD'95; this proposal is the outcome of many discussions. GML is the
  9. standard file format in the Graphlet graph editor system. It has been
  10. overtaken and adapted by several other systems for drawing graphs."
  11. GML files are stored using a 7-bit ASCII encoding with any extended
  12. ASCII characters (iso8859-1) appearing as HTML character entities.
  13. You will need to give some thought into how the exported data should
  14. interact with different languages and even different Python versions.
  15. Re-importing from gml is also a concern.
  16. Without specifying a `stringizer`/`destringizer`, the code is capable of
  17. writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
  18. specification. For writing other data types, and for reading data other
  19. than `str` you need to explicitly supply a `stringizer`/`destringizer`.
  20. For additional documentation on the GML file format, please see the
  21. `GML website <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
  22. Several example graphs in GML format may be found on Mark Newman's
  23. `Network data page <http://www-personal.umich.edu/~mejn/netdata/>`_.
  24. """
  25. import html.entities as htmlentitydefs
  26. import re
  27. import warnings
  28. from ast import literal_eval
  29. from collections import defaultdict
  30. from enum import Enum
  31. from io import StringIO
  32. from typing import Any, NamedTuple
  33. import networkx as nx
  34. from networkx.exception import NetworkXError
  35. from networkx.utils import open_file
  36. __all__ = ["read_gml", "parse_gml", "generate_gml", "write_gml"]
  37. def escape(text):
  38. """Use XML character references to escape characters.
  39. Use XML character references for unprintable or non-ASCII
  40. characters, double quotes and ampersands in a string
  41. """
  42. def fixup(m):
  43. ch = m.group(0)
  44. return "&#" + str(ord(ch)) + ";"
  45. text = re.sub('[^ -~]|[&"]', fixup, text)
  46. return text if isinstance(text, str) else str(text)
  47. def unescape(text):
  48. """Replace XML character references with the referenced characters"""
  49. def fixup(m):
  50. text = m.group(0)
  51. if text[1] == "#":
  52. # Character reference
  53. if text[2] == "x":
  54. code = int(text[3:-1], 16)
  55. else:
  56. code = int(text[2:-1])
  57. else:
  58. # Named entity
  59. try:
  60. code = htmlentitydefs.name2codepoint[text[1:-1]]
  61. except KeyError:
  62. return text # leave unchanged
  63. try:
  64. return chr(code)
  65. except (ValueError, OverflowError):
  66. return text # leave unchanged
  67. return re.sub("&(?:[0-9A-Za-z]+|#(?:[0-9]+|x[0-9A-Fa-f]+));", fixup, text)
  68. def literal_destringizer(rep):
  69. """Convert a Python literal to the value it represents.
  70. Parameters
  71. ----------
  72. rep : string
  73. A Python literal.
  74. Returns
  75. -------
  76. value : object
  77. The value of the Python literal.
  78. Raises
  79. ------
  80. ValueError
  81. If `rep` is not a Python literal.
  82. """
  83. msg = "literal_destringizer is deprecated and will be removed in 3.0."
  84. warnings.warn(msg, DeprecationWarning)
  85. if isinstance(rep, str):
  86. orig_rep = rep
  87. try:
  88. return literal_eval(rep)
  89. except SyntaxError as err:
  90. raise ValueError(f"{orig_rep!r} is not a valid Python literal") from err
  91. else:
  92. raise ValueError(f"{rep!r} is not a string")
  93. @open_file(0, mode="rb")
  94. def read_gml(path, label="label", destringizer=None):
  95. """Read graph in GML format from `path`.
  96. Parameters
  97. ----------
  98. path : filename or filehandle
  99. The filename or filehandle to read from.
  100. label : string, optional
  101. If not None, the parsed nodes will be renamed according to node
  102. attributes indicated by `label`. Default value: 'label'.
  103. destringizer : callable, optional
  104. A `destringizer` that recovers values stored as strings in GML. If it
  105. cannot convert a string to a value, a `ValueError` is raised. Default
  106. value : None.
  107. Returns
  108. -------
  109. G : NetworkX graph
  110. The parsed graph.
  111. Raises
  112. ------
  113. NetworkXError
  114. If the input cannot be parsed.
  115. See Also
  116. --------
  117. write_gml, parse_gml
  118. literal_destringizer
  119. Notes
  120. -----
  121. GML files are stored using a 7-bit ASCII encoding with any extended
  122. ASCII characters (iso8859-1) appearing as HTML character entities.
  123. Without specifying a `stringizer`/`destringizer`, the code is capable of
  124. writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
  125. specification. For writing other data types, and for reading data other
  126. than `str` you need to explicitly supply a `stringizer`/`destringizer`.
  127. For additional documentation on the GML file format, please see the
  128. `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
  129. See the module docstring :mod:`networkx.readwrite.gml` for more details.
  130. Examples
  131. --------
  132. >>> G = nx.path_graph(4)
  133. >>> nx.write_gml(G, "test.gml")
  134. GML values are interpreted as strings by default:
  135. >>> H = nx.read_gml("test.gml")
  136. >>> H.nodes
  137. NodeView(('0', '1', '2', '3'))
  138. When a `destringizer` is provided, GML values are converted to the provided type.
  139. For example, integer nodes can be recovered as shown below:
  140. >>> J = nx.read_gml("test.gml", destringizer=int)
  141. >>> J.nodes
  142. NodeView((0, 1, 2, 3))
  143. """
  144. def filter_lines(lines):
  145. for line in lines:
  146. try:
  147. line = line.decode("ascii")
  148. except UnicodeDecodeError as err:
  149. raise NetworkXError("input is not ASCII-encoded") from err
  150. if not isinstance(line, str):
  151. lines = str(lines)
  152. if line and line[-1] == "\n":
  153. line = line[:-1]
  154. yield line
  155. G = parse_gml_lines(filter_lines(path), label, destringizer)
  156. return G
  157. def parse_gml(lines, label="label", destringizer=None):
  158. """Parse GML graph from a string or iterable.
  159. Parameters
  160. ----------
  161. lines : string or iterable of strings
  162. Data in GML format.
  163. label : string, optional
  164. If not None, the parsed nodes will be renamed according to node
  165. attributes indicated by `label`. Default value: 'label'.
  166. destringizer : callable, optional
  167. A `destringizer` that recovers values stored as strings in GML. If it
  168. cannot convert a string to a value, a `ValueError` is raised. Default
  169. value : None.
  170. Returns
  171. -------
  172. G : NetworkX graph
  173. The parsed graph.
  174. Raises
  175. ------
  176. NetworkXError
  177. If the input cannot be parsed.
  178. See Also
  179. --------
  180. write_gml, read_gml
  181. Notes
  182. -----
  183. This stores nested GML attributes as dictionaries in the NetworkX graph,
  184. node, and edge attribute structures.
  185. GML files are stored using a 7-bit ASCII encoding with any extended
  186. ASCII characters (iso8859-1) appearing as HTML character entities.
  187. Without specifying a `stringizer`/`destringizer`, the code is capable of
  188. writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
  189. specification. For writing other data types, and for reading data other
  190. than `str` you need to explicitly supply a `stringizer`/`destringizer`.
  191. For additional documentation on the GML file format, please see the
  192. `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
  193. See the module docstring :mod:`networkx.readwrite.gml` for more details.
  194. """
  195. def decode_line(line):
  196. if isinstance(line, bytes):
  197. try:
  198. line.decode("ascii")
  199. except UnicodeDecodeError as err:
  200. raise NetworkXError("input is not ASCII-encoded") from err
  201. if not isinstance(line, str):
  202. line = str(line)
  203. return line
  204. def filter_lines(lines):
  205. if isinstance(lines, str):
  206. lines = decode_line(lines)
  207. lines = lines.splitlines()
  208. yield from lines
  209. else:
  210. for line in lines:
  211. line = decode_line(line)
  212. if line and line[-1] == "\n":
  213. line = line[:-1]
  214. if line.find("\n") != -1:
  215. raise NetworkXError("input line contains newline")
  216. yield line
  217. G = parse_gml_lines(filter_lines(lines), label, destringizer)
  218. return G
  219. class Pattern(Enum):
  220. """encodes the index of each token-matching pattern in `tokenize`."""
  221. KEYS = 0
  222. REALS = 1
  223. INTS = 2
  224. STRINGS = 3
  225. DICT_START = 4
  226. DICT_END = 5
  227. COMMENT_WHITESPACE = 6
  228. class Token(NamedTuple):
  229. category: Pattern
  230. value: Any
  231. line: int
  232. position: int
  233. LIST_START_VALUE = "_networkx_list_start"
  234. def parse_gml_lines(lines, label, destringizer):
  235. """Parse GML `lines` into a graph."""
  236. def tokenize():
  237. patterns = [
  238. r"[A-Za-z][0-9A-Za-z_]*\b", # keys
  239. # reals
  240. r"[+-]?(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*|INF)(?:[Ee][+-]?[0-9]+)?",
  241. r"[+-]?[0-9]+", # ints
  242. r'".*?"', # strings
  243. r"\[", # dict start
  244. r"\]", # dict end
  245. r"#.*$|\s+", # comments and whitespaces
  246. ]
  247. tokens = re.compile("|".join(f"({pattern})" for pattern in patterns))
  248. lineno = 0
  249. for line in lines:
  250. length = len(line)
  251. pos = 0
  252. while pos < length:
  253. match = tokens.match(line, pos)
  254. if match is None:
  255. m = f"cannot tokenize {line[pos:]} at ({lineno + 1}, {pos + 1})"
  256. raise NetworkXError(m)
  257. for i in range(len(patterns)):
  258. group = match.group(i + 1)
  259. if group is not None:
  260. if i == 0: # keys
  261. value = group.rstrip()
  262. elif i == 1: # reals
  263. value = float(group)
  264. elif i == 2: # ints
  265. value = int(group)
  266. else:
  267. value = group
  268. if i != 6: # comments and whitespaces
  269. yield Token(Pattern(i), value, lineno + 1, pos + 1)
  270. pos += len(group)
  271. break
  272. lineno += 1
  273. yield Token(None, None, lineno + 1, 1) # EOF
  274. def unexpected(curr_token, expected):
  275. category, value, lineno, pos = curr_token
  276. value = repr(value) if value is not None else "EOF"
  277. raise NetworkXError(f"expected {expected}, found {value} at ({lineno}, {pos})")
  278. def consume(curr_token, category, expected):
  279. if curr_token.category == category:
  280. return next(tokens)
  281. unexpected(curr_token, expected)
  282. def parse_kv(curr_token):
  283. dct = defaultdict(list)
  284. while curr_token.category == Pattern.KEYS:
  285. key = curr_token.value
  286. curr_token = next(tokens)
  287. category = curr_token.category
  288. if category == Pattern.REALS or category == Pattern.INTS:
  289. value = curr_token.value
  290. curr_token = next(tokens)
  291. elif category == Pattern.STRINGS:
  292. value = unescape(curr_token.value[1:-1])
  293. if destringizer:
  294. try:
  295. value = destringizer(value)
  296. except ValueError:
  297. pass
  298. # Special handling for empty lists and tuples
  299. if value == "()":
  300. value = ()
  301. if value == "[]":
  302. value = []
  303. curr_token = next(tokens)
  304. elif category == Pattern.DICT_START:
  305. curr_token, value = parse_dict(curr_token)
  306. else:
  307. # Allow for string convertible id and label values
  308. if key in ("id", "label", "source", "target"):
  309. try:
  310. # String convert the token value
  311. value = unescape(str(curr_token.value))
  312. if destringizer:
  313. try:
  314. value = destringizer(value)
  315. except ValueError:
  316. pass
  317. curr_token = next(tokens)
  318. except Exception:
  319. msg = (
  320. "an int, float, string, '[' or string"
  321. + " convertible ASCII value for node id or label"
  322. )
  323. unexpected(curr_token, msg)
  324. # Special handling for nan and infinity. Since the gml language
  325. # defines unquoted strings as keys, the numeric and string branches
  326. # are skipped and we end up in this special branch, so we need to
  327. # convert the current token value to a float for NAN and plain INF.
  328. # +/-INF are handled in the pattern for 'reals' in tokenize(). This
  329. # allows labels and values to be nan or infinity, but not keys.
  330. elif curr_token.value in {"NAN", "INF"}:
  331. value = float(curr_token.value)
  332. curr_token = next(tokens)
  333. else: # Otherwise error out
  334. unexpected(curr_token, "an int, float, string or '['")
  335. dct[key].append(value)
  336. def clean_dict_value(value):
  337. if not isinstance(value, list):
  338. return value
  339. if len(value) == 1:
  340. return value[0]
  341. if value[0] == LIST_START_VALUE:
  342. return value[1:]
  343. return value
  344. dct = {key: clean_dict_value(value) for key, value in dct.items()}
  345. return curr_token, dct
  346. def parse_dict(curr_token):
  347. # dict start
  348. curr_token = consume(curr_token, Pattern.DICT_START, "'['")
  349. # dict contents
  350. curr_token, dct = parse_kv(curr_token)
  351. # dict end
  352. curr_token = consume(curr_token, Pattern.DICT_END, "']'")
  353. return curr_token, dct
  354. def parse_graph():
  355. curr_token, dct = parse_kv(next(tokens))
  356. if curr_token.category is not None: # EOF
  357. unexpected(curr_token, "EOF")
  358. if "graph" not in dct:
  359. raise NetworkXError("input contains no graph")
  360. graph = dct["graph"]
  361. if isinstance(graph, list):
  362. raise NetworkXError("input contains more than one graph")
  363. return graph
  364. tokens = tokenize()
  365. graph = parse_graph()
  366. directed = graph.pop("directed", False)
  367. multigraph = graph.pop("multigraph", False)
  368. if not multigraph:
  369. G = nx.DiGraph() if directed else nx.Graph()
  370. else:
  371. G = nx.MultiDiGraph() if directed else nx.MultiGraph()
  372. graph_attr = {k: v for k, v in graph.items() if k not in ("node", "edge")}
  373. G.graph.update(graph_attr)
  374. def pop_attr(dct, category, attr, i):
  375. try:
  376. return dct.pop(attr)
  377. except KeyError as err:
  378. raise NetworkXError(f"{category} #{i} has no {attr!r} attribute") from err
  379. nodes = graph.get("node", [])
  380. mapping = {}
  381. node_labels = set()
  382. for i, node in enumerate(nodes if isinstance(nodes, list) else [nodes]):
  383. id = pop_attr(node, "node", "id", i)
  384. if id in G:
  385. raise NetworkXError(f"node id {id!r} is duplicated")
  386. if label is not None and label != "id":
  387. node_label = pop_attr(node, "node", label, i)
  388. if node_label in node_labels:
  389. raise NetworkXError(f"node label {node_label!r} is duplicated")
  390. node_labels.add(node_label)
  391. mapping[id] = node_label
  392. G.add_node(id, **node)
  393. edges = graph.get("edge", [])
  394. for i, edge in enumerate(edges if isinstance(edges, list) else [edges]):
  395. source = pop_attr(edge, "edge", "source", i)
  396. target = pop_attr(edge, "edge", "target", i)
  397. if source not in G:
  398. raise NetworkXError(f"edge #{i} has undefined source {source!r}")
  399. if target not in G:
  400. raise NetworkXError(f"edge #{i} has undefined target {target!r}")
  401. if not multigraph:
  402. if not G.has_edge(source, target):
  403. G.add_edge(source, target, **edge)
  404. else:
  405. arrow = "->" if directed else "--"
  406. msg = f"edge #{i} ({source!r}{arrow}{target!r}) is duplicated"
  407. raise nx.NetworkXError(msg)
  408. else:
  409. key = edge.pop("key", None)
  410. if key is not None and G.has_edge(source, target, key):
  411. arrow = "->" if directed else "--"
  412. msg = f"edge #{i} ({source!r}{arrow}{target!r}, {key!r})"
  413. msg2 = 'Hint: If multigraph add "multigraph 1" to file header.'
  414. raise nx.NetworkXError(msg + " is duplicated\n" + msg2)
  415. G.add_edge(source, target, key, **edge)
  416. if label is not None and label != "id":
  417. G = nx.relabel_nodes(G, mapping)
  418. return G
  419. def literal_stringizer(value):
  420. """Convert a `value` to a Python literal in GML representation.
  421. Parameters
  422. ----------
  423. value : object
  424. The `value` to be converted to GML representation.
  425. Returns
  426. -------
  427. rep : string
  428. A double-quoted Python literal representing value. Unprintable
  429. characters are replaced by XML character references.
  430. Raises
  431. ------
  432. ValueError
  433. If `value` cannot be converted to GML.
  434. Notes
  435. -----
  436. `literal_stringizer` is largely the same as `repr` in terms of
  437. functionality but attempts prefix `unicode` and `bytes` literals with
  438. `u` and `b` to provide better interoperability of data generated by
  439. Python 2 and Python 3.
  440. The original value can be recovered using the
  441. :func:`networkx.readwrite.gml.literal_destringizer` function.
  442. """
  443. msg = "literal_stringizer is deprecated and will be removed in 3.0."
  444. warnings.warn(msg, DeprecationWarning)
  445. def stringize(value):
  446. if isinstance(value, (int, bool)) or value is None:
  447. if value is True: # GML uses 1/0 for boolean values.
  448. buf.write(str(1))
  449. elif value is False:
  450. buf.write(str(0))
  451. else:
  452. buf.write(str(value))
  453. elif isinstance(value, str):
  454. text = repr(value)
  455. if text[0] != "u":
  456. try:
  457. value.encode("latin1")
  458. except UnicodeEncodeError:
  459. text = "u" + text
  460. buf.write(text)
  461. elif isinstance(value, (float, complex, str, bytes)):
  462. buf.write(repr(value))
  463. elif isinstance(value, list):
  464. buf.write("[")
  465. first = True
  466. for item in value:
  467. if not first:
  468. buf.write(",")
  469. else:
  470. first = False
  471. stringize(item)
  472. buf.write("]")
  473. elif isinstance(value, tuple):
  474. if len(value) > 1:
  475. buf.write("(")
  476. first = True
  477. for item in value:
  478. if not first:
  479. buf.write(",")
  480. else:
  481. first = False
  482. stringize(item)
  483. buf.write(")")
  484. elif value:
  485. buf.write("(")
  486. stringize(value[0])
  487. buf.write(",)")
  488. else:
  489. buf.write("()")
  490. elif isinstance(value, dict):
  491. buf.write("{")
  492. first = True
  493. for key, value in value.items():
  494. if not first:
  495. buf.write(",")
  496. else:
  497. first = False
  498. stringize(key)
  499. buf.write(":")
  500. stringize(value)
  501. buf.write("}")
  502. elif isinstance(value, set):
  503. buf.write("{")
  504. first = True
  505. for item in value:
  506. if not first:
  507. buf.write(",")
  508. else:
  509. first = False
  510. stringize(item)
  511. buf.write("}")
  512. else:
  513. msg = f"{value!r} cannot be converted into a Python literal"
  514. raise ValueError(msg)
  515. buf = StringIO()
  516. stringize(value)
  517. return buf.getvalue()
  518. def generate_gml(G, stringizer=None):
  519. r"""Generate a single entry of the graph `G` in GML format.
  520. Parameters
  521. ----------
  522. G : NetworkX graph
  523. The graph to be converted to GML.
  524. stringizer : callable, optional
  525. A `stringizer` which converts non-int/non-float/non-dict values into
  526. strings. If it cannot convert a value into a string, it should raise a
  527. `ValueError` to indicate that. Default value: None.
  528. Returns
  529. -------
  530. lines: generator of strings
  531. Lines of GML data. Newlines are not appended.
  532. Raises
  533. ------
  534. NetworkXError
  535. If `stringizer` cannot convert a value into a string, or the value to
  536. convert is not a string while `stringizer` is None.
  537. See Also
  538. --------
  539. literal_stringizer
  540. Notes
  541. -----
  542. Graph attributes named 'directed', 'multigraph', 'node' or
  543. 'edge', node attributes named 'id' or 'label', edge attributes
  544. named 'source' or 'target' (or 'key' if `G` is a multigraph)
  545. are ignored because these attribute names are used to encode the graph
  546. structure.
  547. GML files are stored using a 7-bit ASCII encoding with any extended
  548. ASCII characters (iso8859-1) appearing as HTML character entities.
  549. Without specifying a `stringizer`/`destringizer`, the code is capable of
  550. writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
  551. specification. For writing other data types, and for reading data other
  552. than `str` you need to explicitly supply a `stringizer`/`destringizer`.
  553. For additional documentation on the GML file format, please see the
  554. `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
  555. See the module docstring :mod:`networkx.readwrite.gml` for more details.
  556. Examples
  557. --------
  558. >>> G = nx.Graph()
  559. >>> G.add_node("1")
  560. >>> print("\n".join(nx.generate_gml(G)))
  561. graph [
  562. node [
  563. id 0
  564. label "1"
  565. ]
  566. ]
  567. >>> G = nx.MultiGraph([("a", "b"), ("a", "b")])
  568. >>> print("\n".join(nx.generate_gml(G)))
  569. graph [
  570. multigraph 1
  571. node [
  572. id 0
  573. label "a"
  574. ]
  575. node [
  576. id 1
  577. label "b"
  578. ]
  579. edge [
  580. source 0
  581. target 1
  582. key 0
  583. ]
  584. edge [
  585. source 0
  586. target 1
  587. key 1
  588. ]
  589. ]
  590. """
  591. valid_keys = re.compile("^[A-Za-z][0-9A-Za-z_]*$")
  592. def stringize(key, value, ignored_keys, indent, in_list=False):
  593. if not isinstance(key, str):
  594. raise NetworkXError(f"{key!r} is not a string")
  595. if not valid_keys.match(key):
  596. raise NetworkXError(f"{key!r} is not a valid key")
  597. if not isinstance(key, str):
  598. key = str(key)
  599. if key not in ignored_keys:
  600. if isinstance(value, (int, bool)):
  601. if key == "label":
  602. yield indent + key + ' "' + str(value) + '"'
  603. elif value is True:
  604. # python bool is an instance of int
  605. yield indent + key + " 1"
  606. elif value is False:
  607. yield indent + key + " 0"
  608. # GML only supports signed 32-bit integers
  609. elif value < -(2**31) or value >= 2**31:
  610. yield indent + key + ' "' + str(value) + '"'
  611. else:
  612. yield indent + key + " " + str(value)
  613. elif isinstance(value, float):
  614. text = repr(value).upper()
  615. # GML matches INF to keys, so prepend + to INF. Use repr(float(*))
  616. # instead of string literal to future proof against changes to repr.
  617. if text == repr(float("inf")).upper():
  618. text = "+" + text
  619. else:
  620. # GML requires that a real literal contain a decimal point, but
  621. # repr may not output a decimal point when the mantissa is
  622. # integral and hence needs fixing.
  623. epos = text.rfind("E")
  624. if epos != -1 and text.find(".", 0, epos) == -1:
  625. text = text[:epos] + "." + text[epos:]
  626. if key == "label":
  627. yield indent + key + ' "' + text + '"'
  628. else:
  629. yield indent + key + " " + text
  630. elif isinstance(value, dict):
  631. yield indent + key + " ["
  632. next_indent = indent + " "
  633. for key, value in value.items():
  634. yield from stringize(key, value, (), next_indent)
  635. yield indent + "]"
  636. elif isinstance(value, (list, tuple)) and key != "label" and not in_list:
  637. if len(value) == 0:
  638. yield indent + key + " " + f'"{value!r}"'
  639. if len(value) == 1:
  640. yield indent + key + " " + f'"{LIST_START_VALUE}"'
  641. for val in value:
  642. yield from stringize(key, val, (), indent, True)
  643. else:
  644. if stringizer:
  645. try:
  646. value = stringizer(value)
  647. except ValueError as err:
  648. raise NetworkXError(
  649. f"{value!r} cannot be converted into a string"
  650. ) from err
  651. if not isinstance(value, str):
  652. raise NetworkXError(f"{value!r} is not a string")
  653. yield indent + key + ' "' + escape(value) + '"'
  654. multigraph = G.is_multigraph()
  655. yield "graph ["
  656. # Output graph attributes
  657. if G.is_directed():
  658. yield " directed 1"
  659. if multigraph:
  660. yield " multigraph 1"
  661. ignored_keys = {"directed", "multigraph", "node", "edge"}
  662. for attr, value in G.graph.items():
  663. yield from stringize(attr, value, ignored_keys, " ")
  664. # Output node data
  665. node_id = dict(zip(G, range(len(G))))
  666. ignored_keys = {"id", "label"}
  667. for node, attrs in G.nodes.items():
  668. yield " node ["
  669. yield " id " + str(node_id[node])
  670. yield from stringize("label", node, (), " ")
  671. for attr, value in attrs.items():
  672. yield from stringize(attr, value, ignored_keys, " ")
  673. yield " ]"
  674. # Output edge data
  675. ignored_keys = {"source", "target"}
  676. kwargs = {"data": True}
  677. if multigraph:
  678. ignored_keys.add("key")
  679. kwargs["keys"] = True
  680. for e in G.edges(**kwargs):
  681. yield " edge ["
  682. yield " source " + str(node_id[e[0]])
  683. yield " target " + str(node_id[e[1]])
  684. if multigraph:
  685. yield from stringize("key", e[2], (), " ")
  686. for attr, value in e[-1].items():
  687. yield from stringize(attr, value, ignored_keys, " ")
  688. yield " ]"
  689. yield "]"
  690. @open_file(1, mode="wb")
  691. def write_gml(G, path, stringizer=None):
  692. """Write a graph `G` in GML format to the file or file handle `path`.
  693. Parameters
  694. ----------
  695. G : NetworkX graph
  696. The graph to be converted to GML.
  697. path : filename or filehandle
  698. The filename or filehandle to write. Files whose names end with .gz or
  699. .bz2 will be compressed.
  700. stringizer : callable, optional
  701. A `stringizer` which converts non-int/non-float/non-dict values into
  702. strings. If it cannot convert a value into a string, it should raise a
  703. `ValueError` to indicate that. Default value: None.
  704. Raises
  705. ------
  706. NetworkXError
  707. If `stringizer` cannot convert a value into a string, or the value to
  708. convert is not a string while `stringizer` is None.
  709. See Also
  710. --------
  711. read_gml, generate_gml
  712. literal_stringizer
  713. Notes
  714. -----
  715. Graph attributes named 'directed', 'multigraph', 'node' or
  716. 'edge', node attributes named 'id' or 'label', edge attributes
  717. named 'source' or 'target' (or 'key' if `G` is a multigraph)
  718. are ignored because these attribute names are used to encode the graph
  719. structure.
  720. GML files are stored using a 7-bit ASCII encoding with any extended
  721. ASCII characters (iso8859-1) appearing as HTML character entities.
  722. Without specifying a `stringizer`/`destringizer`, the code is capable of
  723. writing `int`/`float`/`str`/`dict`/`list` data as required by the GML
  724. specification. For writing other data types, and for reading data other
  725. than `str` you need to explicitly supply a `stringizer`/`destringizer`.
  726. Note that while we allow non-standard GML to be read from a file, we make
  727. sure to write GML format. In particular, underscores are not allowed in
  728. attribute names.
  729. For additional documentation on the GML file format, please see the
  730. `GML url <https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297&L=1>`_.
  731. See the module docstring :mod:`networkx.readwrite.gml` for more details.
  732. Examples
  733. --------
  734. >>> G = nx.path_graph(4)
  735. >>> nx.write_gml(G, "test.gml")
  736. Filenames ending in .gz or .bz2 will be compressed.
  737. >>> nx.write_gml(G, "test.gml.gz")
  738. """
  739. for line in generate_gml(G, stringizer):
  740. path.write((line + "\n").encode("ascii"))