graphml.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050
  1. """
  2. *******
  3. GraphML
  4. *******
  5. Read and write graphs in GraphML format.
  6. .. warning::
  7. This parser uses the standard xml library present in Python, which is
  8. insecure - see :external+python:mod:`xml` for additional information.
  9. Only parse GraphML files you trust.
  10. This implementation does not support mixed graphs (directed and unidirected
  11. edges together), hyperedges, nested graphs, or ports.
  12. "GraphML is a comprehensive and easy-to-use file format for graphs. It
  13. consists of a language core to describe the structural properties of a
  14. graph and a flexible extension mechanism to add application-specific
  15. data. Its main features include support of
  16. * directed, undirected, and mixed graphs,
  17. * hypergraphs,
  18. * hierarchical graphs,
  19. * graphical representations,
  20. * references to external data,
  21. * application-specific attribute data, and
  22. * light-weight parsers.
  23. Unlike many other file formats for graphs, GraphML does not use a
  24. custom syntax. Instead, it is based on XML and hence ideally suited as
  25. a common denominator for all kinds of services generating, archiving,
  26. or processing graphs."
  27. http://graphml.graphdrawing.org/
  28. Format
  29. ------
  30. GraphML is an XML format. See
  31. http://graphml.graphdrawing.org/specification.html for the specification and
  32. http://graphml.graphdrawing.org/primer/graphml-primer.html
  33. for examples.
  34. """
  35. import warnings
  36. from collections import defaultdict
  37. import networkx as nx
  38. from networkx.utils import open_file
  39. __all__ = [
  40. "write_graphml",
  41. "read_graphml",
  42. "generate_graphml",
  43. "write_graphml_xml",
  44. "write_graphml_lxml",
  45. "parse_graphml",
  46. "GraphMLWriter",
  47. "GraphMLReader",
  48. ]
  49. @open_file(1, mode="wb")
  50. def write_graphml_xml(
  51. G,
  52. path,
  53. encoding="utf-8",
  54. prettyprint=True,
  55. infer_numeric_types=False,
  56. named_key_ids=False,
  57. edge_id_from_attribute=None,
  58. ):
  59. """Write G in GraphML XML format to path
  60. Parameters
  61. ----------
  62. G : graph
  63. A networkx graph
  64. path : file or string
  65. File or filename to write.
  66. Filenames ending in .gz or .bz2 will be compressed.
  67. encoding : string (optional)
  68. Encoding for text data.
  69. prettyprint : bool (optional)
  70. If True use line breaks and indenting in output XML.
  71. infer_numeric_types : boolean
  72. Determine if numeric types should be generalized.
  73. For example, if edges have both int and float 'weight' attributes,
  74. we infer in GraphML that both are floats.
  75. named_key_ids : bool (optional)
  76. If True use attr.name as value for key elements' id attribute.
  77. edge_id_from_attribute : dict key (optional)
  78. If provided, the graphml edge id is set by looking up the corresponding
  79. edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
  80. the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
  81. Examples
  82. --------
  83. >>> G = nx.path_graph(4)
  84. >>> nx.write_graphml(G, "test.graphml")
  85. Notes
  86. -----
  87. This implementation does not support mixed graphs (directed
  88. and unidirected edges together) hyperedges, nested graphs, or ports.
  89. """
  90. writer = GraphMLWriter(
  91. encoding=encoding,
  92. prettyprint=prettyprint,
  93. infer_numeric_types=infer_numeric_types,
  94. named_key_ids=named_key_ids,
  95. edge_id_from_attribute=edge_id_from_attribute,
  96. )
  97. writer.add_graph_element(G)
  98. writer.dump(path)
  99. @open_file(1, mode="wb")
  100. def write_graphml_lxml(
  101. G,
  102. path,
  103. encoding="utf-8",
  104. prettyprint=True,
  105. infer_numeric_types=False,
  106. named_key_ids=False,
  107. edge_id_from_attribute=None,
  108. ):
  109. """Write G in GraphML XML format to path
  110. This function uses the LXML framework and should be faster than
  111. the version using the xml library.
  112. Parameters
  113. ----------
  114. G : graph
  115. A networkx graph
  116. path : file or string
  117. File or filename to write.
  118. Filenames ending in .gz or .bz2 will be compressed.
  119. encoding : string (optional)
  120. Encoding for text data.
  121. prettyprint : bool (optional)
  122. If True use line breaks and indenting in output XML.
  123. infer_numeric_types : boolean
  124. Determine if numeric types should be generalized.
  125. For example, if edges have both int and float 'weight' attributes,
  126. we infer in GraphML that both are floats.
  127. named_key_ids : bool (optional)
  128. If True use attr.name as value for key elements' id attribute.
  129. edge_id_from_attribute : dict key (optional)
  130. If provided, the graphml edge id is set by looking up the corresponding
  131. edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
  132. the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
  133. Examples
  134. --------
  135. >>> G = nx.path_graph(4)
  136. >>> nx.write_graphml_lxml(G, "fourpath.graphml")
  137. Notes
  138. -----
  139. This implementation does not support mixed graphs (directed
  140. and unidirected edges together) hyperedges, nested graphs, or ports.
  141. """
  142. try:
  143. import lxml.etree as lxmletree
  144. except ImportError:
  145. return write_graphml_xml(
  146. G,
  147. path,
  148. encoding,
  149. prettyprint,
  150. infer_numeric_types,
  151. named_key_ids,
  152. edge_id_from_attribute,
  153. )
  154. writer = GraphMLWriterLxml(
  155. path,
  156. graph=G,
  157. encoding=encoding,
  158. prettyprint=prettyprint,
  159. infer_numeric_types=infer_numeric_types,
  160. named_key_ids=named_key_ids,
  161. edge_id_from_attribute=edge_id_from_attribute,
  162. )
  163. writer.dump()
  164. def generate_graphml(
  165. G,
  166. encoding="utf-8",
  167. prettyprint=True,
  168. named_key_ids=False,
  169. edge_id_from_attribute=None,
  170. ):
  171. """Generate GraphML lines for G
  172. Parameters
  173. ----------
  174. G : graph
  175. A networkx graph
  176. encoding : string (optional)
  177. Encoding for text data.
  178. prettyprint : bool (optional)
  179. If True use line breaks and indenting in output XML.
  180. named_key_ids : bool (optional)
  181. If True use attr.name as value for key elements' id attribute.
  182. edge_id_from_attribute : dict key (optional)
  183. If provided, the graphml edge id is set by looking up the corresponding
  184. edge data attribute keyed by this parameter. If `None` or the key does not exist in edge data,
  185. the edge id is set by the edge key if `G` is a MultiGraph, else the edge id is left unset.
  186. Examples
  187. --------
  188. >>> G = nx.path_graph(4)
  189. >>> linefeed = chr(10) # linefeed = \n
  190. >>> s = linefeed.join(nx.generate_graphml(G))
  191. >>> for line in nx.generate_graphml(G): # doctest: +SKIP
  192. ... print(line)
  193. Notes
  194. -----
  195. This implementation does not support mixed graphs (directed and unidirected
  196. edges together) hyperedges, nested graphs, or ports.
  197. """
  198. writer = GraphMLWriter(
  199. encoding=encoding,
  200. prettyprint=prettyprint,
  201. named_key_ids=named_key_ids,
  202. edge_id_from_attribute=edge_id_from_attribute,
  203. )
  204. writer.add_graph_element(G)
  205. yield from str(writer).splitlines()
  206. @open_file(0, mode="rb")
  207. def read_graphml(path, node_type=str, edge_key_type=int, force_multigraph=False):
  208. """Read graph in GraphML format from path.
  209. Parameters
  210. ----------
  211. path : file or string
  212. File or filename to write.
  213. Filenames ending in .gz or .bz2 will be compressed.
  214. node_type: Python type (default: str)
  215. Convert node ids to this type
  216. edge_key_type: Python type (default: int)
  217. Convert graphml edge ids to this type. Multigraphs use id as edge key.
  218. Non-multigraphs add to edge attribute dict with name "id".
  219. force_multigraph : bool (default: False)
  220. If True, return a multigraph with edge keys. If False (the default)
  221. return a multigraph when multiedges are in the graph.
  222. Returns
  223. -------
  224. graph: NetworkX graph
  225. If parallel edges are present or `force_multigraph=True` then
  226. a MultiGraph or MultiDiGraph is returned. Otherwise a Graph/DiGraph.
  227. The returned graph is directed if the file indicates it should be.
  228. Notes
  229. -----
  230. Default node and edge attributes are not propagated to each node and edge.
  231. They can be obtained from `G.graph` and applied to node and edge attributes
  232. if desired using something like this:
  233. >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
  234. >>> for node, data in G.nodes(data=True): # doctest: +SKIP
  235. ... if "color" not in data:
  236. ... data["color"] = default_color
  237. >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
  238. >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
  239. ... if "color" not in data:
  240. ... data["color"] = default_color
  241. This implementation does not support mixed graphs (directed and unidirected
  242. edges together), hypergraphs, nested graphs, or ports.
  243. For multigraphs the GraphML edge "id" will be used as the edge
  244. key. If not specified then they "key" attribute will be used. If
  245. there is no "key" attribute a default NetworkX multigraph edge key
  246. will be provided.
  247. Files with the yEd "yfiles" extension can be read. The type of the node's
  248. shape is preserved in the `shape_type` node attribute.
  249. yEd compressed files ("file.graphmlz" extension) can be read by renaming
  250. the file to "file.graphml.gz".
  251. """
  252. reader = GraphMLReader(node_type, edge_key_type, force_multigraph)
  253. # need to check for multiple graphs
  254. glist = list(reader(path=path))
  255. if len(glist) == 0:
  256. # If no graph comes back, try looking for an incomplete header
  257. header = b'<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
  258. path.seek(0)
  259. old_bytes = path.read()
  260. new_bytes = old_bytes.replace(b"<graphml>", header)
  261. glist = list(reader(string=new_bytes))
  262. if len(glist) == 0:
  263. raise nx.NetworkXError("file not successfully read as graphml")
  264. return glist[0]
  265. def parse_graphml(
  266. graphml_string, node_type=str, edge_key_type=int, force_multigraph=False
  267. ):
  268. """Read graph in GraphML format from string.
  269. Parameters
  270. ----------
  271. graphml_string : string
  272. String containing graphml information
  273. (e.g., contents of a graphml file).
  274. node_type: Python type (default: str)
  275. Convert node ids to this type
  276. edge_key_type: Python type (default: int)
  277. Convert graphml edge ids to this type. Multigraphs use id as edge key.
  278. Non-multigraphs add to edge attribute dict with name "id".
  279. force_multigraph : bool (default: False)
  280. If True, return a multigraph with edge keys. If False (the default)
  281. return a multigraph when multiedges are in the graph.
  282. Returns
  283. -------
  284. graph: NetworkX graph
  285. If no parallel edges are found a Graph or DiGraph is returned.
  286. Otherwise a MultiGraph or MultiDiGraph is returned.
  287. Examples
  288. --------
  289. >>> G = nx.path_graph(4)
  290. >>> linefeed = chr(10) # linefeed = \n
  291. >>> s = linefeed.join(nx.generate_graphml(G))
  292. >>> H = nx.parse_graphml(s)
  293. Notes
  294. -----
  295. Default node and edge attributes are not propagated to each node and edge.
  296. They can be obtained from `G.graph` and applied to node and edge attributes
  297. if desired using something like this:
  298. >>> default_color = G.graph["node_default"]["color"] # doctest: +SKIP
  299. >>> for node, data in G.nodes(data=True): # doctest: +SKIP
  300. ... if "color" not in data:
  301. ... data["color"] = default_color
  302. >>> default_color = G.graph["edge_default"]["color"] # doctest: +SKIP
  303. >>> for u, v, data in G.edges(data=True): # doctest: +SKIP
  304. ... if "color" not in data:
  305. ... data["color"] = default_color
  306. This implementation does not support mixed graphs (directed and unidirected
  307. edges together), hypergraphs, nested graphs, or ports.
  308. For multigraphs the GraphML edge "id" will be used as the edge
  309. key. If not specified then they "key" attribute will be used. If
  310. there is no "key" attribute a default NetworkX multigraph edge key
  311. will be provided.
  312. """
  313. reader = GraphMLReader(node_type, edge_key_type, force_multigraph)
  314. # need to check for multiple graphs
  315. glist = list(reader(string=graphml_string))
  316. if len(glist) == 0:
  317. # If no graph comes back, try looking for an incomplete header
  318. header = '<graphml xmlns="http://graphml.graphdrawing.org/xmlns">'
  319. new_string = graphml_string.replace("<graphml>", header)
  320. glist = list(reader(string=new_string))
  321. if len(glist) == 0:
  322. raise nx.NetworkXError("file not successfully read as graphml")
  323. return glist[0]
  324. class GraphML:
  325. NS_GRAPHML = "http://graphml.graphdrawing.org/xmlns"
  326. NS_XSI = "http://www.w3.org/2001/XMLSchema-instance"
  327. # xmlns:y="http://www.yworks.com/xml/graphml"
  328. NS_Y = "http://www.yworks.com/xml/graphml"
  329. SCHEMALOCATION = " ".join(
  330. [
  331. "http://graphml.graphdrawing.org/xmlns",
  332. "http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd",
  333. ]
  334. )
  335. def construct_types(self):
  336. types = [
  337. (int, "integer"), # for Gephi GraphML bug
  338. (str, "yfiles"),
  339. (str, "string"),
  340. (int, "int"),
  341. (int, "long"),
  342. (float, "float"),
  343. (float, "double"),
  344. (bool, "boolean"),
  345. ]
  346. # These additions to types allow writing numpy types
  347. try:
  348. import numpy as np
  349. except:
  350. pass
  351. else:
  352. # prepend so that python types are created upon read (last entry wins)
  353. types = [
  354. (np.float64, "float"),
  355. (np.float32, "float"),
  356. (np.float16, "float"),
  357. (np.float_, "float"),
  358. (np.int_, "int"),
  359. (np.int8, "int"),
  360. (np.int16, "int"),
  361. (np.int32, "int"),
  362. (np.int64, "int"),
  363. (np.uint8, "int"),
  364. (np.uint16, "int"),
  365. (np.uint32, "int"),
  366. (np.uint64, "int"),
  367. (np.int_, "int"),
  368. (np.intc, "int"),
  369. (np.intp, "int"),
  370. ] + types
  371. self.xml_type = dict(types)
  372. self.python_type = dict(reversed(a) for a in types)
  373. # This page says that data types in GraphML follow Java(TM).
  374. # http://graphml.graphdrawing.org/primer/graphml-primer.html#AttributesDefinition
  375. # true and false are the only boolean literals:
  376. # http://en.wikibooks.org/wiki/Java_Programming/Literals#Boolean_Literals
  377. convert_bool = {
  378. # We use data.lower() in actual use.
  379. "true": True,
  380. "false": False,
  381. # Include integer strings for convenience.
  382. "0": False,
  383. 0: False,
  384. "1": True,
  385. 1: True,
  386. }
  387. def get_xml_type(self, key):
  388. """Wrapper around the xml_type dict that raises a more informative
  389. exception message when a user attempts to use data of a type not
  390. supported by GraphML."""
  391. try:
  392. return self.xml_type[key]
  393. except KeyError as err:
  394. raise TypeError(
  395. f"GraphML does not support type {type(key)} as data values."
  396. ) from err
  397. class GraphMLWriter(GraphML):
  398. def __init__(
  399. self,
  400. graph=None,
  401. encoding="utf-8",
  402. prettyprint=True,
  403. infer_numeric_types=False,
  404. named_key_ids=False,
  405. edge_id_from_attribute=None,
  406. ):
  407. self.construct_types()
  408. from xml.etree.ElementTree import Element
  409. self.myElement = Element
  410. self.infer_numeric_types = infer_numeric_types
  411. self.prettyprint = prettyprint
  412. self.named_key_ids = named_key_ids
  413. self.edge_id_from_attribute = edge_id_from_attribute
  414. self.encoding = encoding
  415. self.xml = self.myElement(
  416. "graphml",
  417. {
  418. "xmlns": self.NS_GRAPHML,
  419. "xmlns:xsi": self.NS_XSI,
  420. "xsi:schemaLocation": self.SCHEMALOCATION,
  421. },
  422. )
  423. self.keys = {}
  424. self.attributes = defaultdict(list)
  425. self.attribute_types = defaultdict(set)
  426. if graph is not None:
  427. self.add_graph_element(graph)
  428. def __str__(self):
  429. from xml.etree.ElementTree import tostring
  430. if self.prettyprint:
  431. self.indent(self.xml)
  432. s = tostring(self.xml).decode(self.encoding)
  433. return s
  434. def attr_type(self, name, scope, value):
  435. """Infer the attribute type of data named name. Currently this only
  436. supports inference of numeric types.
  437. If self.infer_numeric_types is false, type is used. Otherwise, pick the
  438. most general of types found across all values with name and scope. This
  439. means edges with data named 'weight' are treated separately from nodes
  440. with data named 'weight'.
  441. """
  442. if self.infer_numeric_types:
  443. types = self.attribute_types[(name, scope)]
  444. if len(types) > 1:
  445. types = {self.get_xml_type(t) for t in types}
  446. if "string" in types:
  447. return str
  448. elif "float" in types or "double" in types:
  449. return float
  450. else:
  451. return int
  452. else:
  453. return list(types)[0]
  454. else:
  455. return type(value)
  456. def get_key(self, name, attr_type, scope, default):
  457. keys_key = (name, attr_type, scope)
  458. try:
  459. return self.keys[keys_key]
  460. except KeyError:
  461. if self.named_key_ids:
  462. new_id = name
  463. else:
  464. new_id = f"d{len(list(self.keys))}"
  465. self.keys[keys_key] = new_id
  466. key_kwargs = {
  467. "id": new_id,
  468. "for": scope,
  469. "attr.name": name,
  470. "attr.type": attr_type,
  471. }
  472. key_element = self.myElement("key", **key_kwargs)
  473. # add subelement for data default value if present
  474. if default is not None:
  475. default_element = self.myElement("default")
  476. default_element.text = str(default)
  477. key_element.append(default_element)
  478. self.xml.insert(0, key_element)
  479. return new_id
  480. def add_data(self, name, element_type, value, scope="all", default=None):
  481. """
  482. Make a data element for an edge or a node. Keep a log of the
  483. type in the keys table.
  484. """
  485. if element_type not in self.xml_type:
  486. raise nx.NetworkXError(
  487. f"GraphML writer does not support {element_type} as data values."
  488. )
  489. keyid = self.get_key(name, self.get_xml_type(element_type), scope, default)
  490. data_element = self.myElement("data", key=keyid)
  491. data_element.text = str(value)
  492. return data_element
  493. def add_attributes(self, scope, xml_obj, data, default):
  494. """Appends attribute data to edges or nodes, and stores type information
  495. to be added later. See add_graph_element.
  496. """
  497. for k, v in data.items():
  498. self.attribute_types[(str(k), scope)].add(type(v))
  499. self.attributes[xml_obj].append([k, v, scope, default.get(k)])
  500. def add_nodes(self, G, graph_element):
  501. default = G.graph.get("node_default", {})
  502. for node, data in G.nodes(data=True):
  503. node_element = self.myElement("node", id=str(node))
  504. self.add_attributes("node", node_element, data, default)
  505. graph_element.append(node_element)
  506. def add_edges(self, G, graph_element):
  507. if G.is_multigraph():
  508. for u, v, key, data in G.edges(data=True, keys=True):
  509. edge_element = self.myElement(
  510. "edge",
  511. source=str(u),
  512. target=str(v),
  513. id=str(data.get(self.edge_id_from_attribute))
  514. if self.edge_id_from_attribute
  515. and self.edge_id_from_attribute in data
  516. else str(key),
  517. )
  518. default = G.graph.get("edge_default", {})
  519. self.add_attributes("edge", edge_element, data, default)
  520. graph_element.append(edge_element)
  521. else:
  522. for u, v, data in G.edges(data=True):
  523. if self.edge_id_from_attribute and self.edge_id_from_attribute in data:
  524. # select attribute to be edge id
  525. edge_element = self.myElement(
  526. "edge",
  527. source=str(u),
  528. target=str(v),
  529. id=str(data.get(self.edge_id_from_attribute)),
  530. )
  531. else:
  532. # default: no edge id
  533. edge_element = self.myElement("edge", source=str(u), target=str(v))
  534. default = G.graph.get("edge_default", {})
  535. self.add_attributes("edge", edge_element, data, default)
  536. graph_element.append(edge_element)
  537. def add_graph_element(self, G):
  538. """
  539. Serialize graph G in GraphML to the stream.
  540. """
  541. if G.is_directed():
  542. default_edge_type = "directed"
  543. else:
  544. default_edge_type = "undirected"
  545. graphid = G.graph.pop("id", None)
  546. if graphid is None:
  547. graph_element = self.myElement("graph", edgedefault=default_edge_type)
  548. else:
  549. graph_element = self.myElement(
  550. "graph", edgedefault=default_edge_type, id=graphid
  551. )
  552. default = {}
  553. data = {
  554. k: v
  555. for (k, v) in G.graph.items()
  556. if k not in ["node_default", "edge_default"]
  557. }
  558. self.add_attributes("graph", graph_element, data, default)
  559. self.add_nodes(G, graph_element)
  560. self.add_edges(G, graph_element)
  561. # self.attributes contains a mapping from XML Objects to a list of
  562. # data that needs to be added to them.
  563. # We postpone processing in order to do type inference/generalization.
  564. # See self.attr_type
  565. for xml_obj, data in self.attributes.items():
  566. for k, v, scope, default in data:
  567. xml_obj.append(
  568. self.add_data(
  569. str(k), self.attr_type(k, scope, v), str(v), scope, default
  570. )
  571. )
  572. self.xml.append(graph_element)
  573. def add_graphs(self, graph_list):
  574. """Add many graphs to this GraphML document."""
  575. for G in graph_list:
  576. self.add_graph_element(G)
  577. def dump(self, stream):
  578. from xml.etree.ElementTree import ElementTree
  579. if self.prettyprint:
  580. self.indent(self.xml)
  581. document = ElementTree(self.xml)
  582. document.write(stream, encoding=self.encoding, xml_declaration=True)
  583. def indent(self, elem, level=0):
  584. # in-place prettyprint formatter
  585. i = "\n" + level * " "
  586. if len(elem):
  587. if not elem.text or not elem.text.strip():
  588. elem.text = i + " "
  589. if not elem.tail or not elem.tail.strip():
  590. elem.tail = i
  591. for elem in elem:
  592. self.indent(elem, level + 1)
  593. if not elem.tail or not elem.tail.strip():
  594. elem.tail = i
  595. else:
  596. if level and (not elem.tail or not elem.tail.strip()):
  597. elem.tail = i
  598. class IncrementalElement:
  599. """Wrapper for _IncrementalWriter providing an Element like interface.
  600. This wrapper does not intend to be a complete implementation but rather to
  601. deal with those calls used in GraphMLWriter.
  602. """
  603. def __init__(self, xml, prettyprint):
  604. self.xml = xml
  605. self.prettyprint = prettyprint
  606. def append(self, element):
  607. self.xml.write(element, pretty_print=self.prettyprint)
  608. class GraphMLWriterLxml(GraphMLWriter):
  609. def __init__(
  610. self,
  611. path,
  612. graph=None,
  613. encoding="utf-8",
  614. prettyprint=True,
  615. infer_numeric_types=False,
  616. named_key_ids=False,
  617. edge_id_from_attribute=None,
  618. ):
  619. self.construct_types()
  620. import lxml.etree as lxmletree
  621. self.myElement = lxmletree.Element
  622. self._encoding = encoding
  623. self._prettyprint = prettyprint
  624. self.named_key_ids = named_key_ids
  625. self.edge_id_from_attribute = edge_id_from_attribute
  626. self.infer_numeric_types = infer_numeric_types
  627. self._xml_base = lxmletree.xmlfile(path, encoding=encoding)
  628. self._xml = self._xml_base.__enter__()
  629. self._xml.write_declaration()
  630. # We need to have a xml variable that support insertion. This call is
  631. # used for adding the keys to the document.
  632. # We will store those keys in a plain list, and then after the graph
  633. # element is closed we will add them to the main graphml element.
  634. self.xml = []
  635. self._keys = self.xml
  636. self._graphml = self._xml.element(
  637. "graphml",
  638. {
  639. "xmlns": self.NS_GRAPHML,
  640. "xmlns:xsi": self.NS_XSI,
  641. "xsi:schemaLocation": self.SCHEMALOCATION,
  642. },
  643. )
  644. self._graphml.__enter__()
  645. self.keys = {}
  646. self.attribute_types = defaultdict(set)
  647. if graph is not None:
  648. self.add_graph_element(graph)
  649. def add_graph_element(self, G):
  650. """
  651. Serialize graph G in GraphML to the stream.
  652. """
  653. if G.is_directed():
  654. default_edge_type = "directed"
  655. else:
  656. default_edge_type = "undirected"
  657. graphid = G.graph.pop("id", None)
  658. if graphid is None:
  659. graph_element = self._xml.element("graph", edgedefault=default_edge_type)
  660. else:
  661. graph_element = self._xml.element(
  662. "graph", edgedefault=default_edge_type, id=graphid
  663. )
  664. # gather attributes types for the whole graph
  665. # to find the most general numeric format needed.
  666. # Then pass through attributes to create key_id for each.
  667. graphdata = {
  668. k: v
  669. for k, v in G.graph.items()
  670. if k not in ("node_default", "edge_default")
  671. }
  672. node_default = G.graph.get("node_default", {})
  673. edge_default = G.graph.get("edge_default", {})
  674. # Graph attributes
  675. for k, v in graphdata.items():
  676. self.attribute_types[(str(k), "graph")].add(type(v))
  677. for k, v in graphdata.items():
  678. element_type = self.get_xml_type(self.attr_type(k, "graph", v))
  679. self.get_key(str(k), element_type, "graph", None)
  680. # Nodes and data
  681. for node, d in G.nodes(data=True):
  682. for k, v in d.items():
  683. self.attribute_types[(str(k), "node")].add(type(v))
  684. for node, d in G.nodes(data=True):
  685. for k, v in d.items():
  686. T = self.get_xml_type(self.attr_type(k, "node", v))
  687. self.get_key(str(k), T, "node", node_default.get(k))
  688. # Edges and data
  689. if G.is_multigraph():
  690. for u, v, ekey, d in G.edges(keys=True, data=True):
  691. for k, v in d.items():
  692. self.attribute_types[(str(k), "edge")].add(type(v))
  693. for u, v, ekey, d in G.edges(keys=True, data=True):
  694. for k, v in d.items():
  695. T = self.get_xml_type(self.attr_type(k, "edge", v))
  696. self.get_key(str(k), T, "edge", edge_default.get(k))
  697. else:
  698. for u, v, d in G.edges(data=True):
  699. for k, v in d.items():
  700. self.attribute_types[(str(k), "edge")].add(type(v))
  701. for u, v, d in G.edges(data=True):
  702. for k, v in d.items():
  703. T = self.get_xml_type(self.attr_type(k, "edge", v))
  704. self.get_key(str(k), T, "edge", edge_default.get(k))
  705. # Now add attribute keys to the xml file
  706. for key in self.xml:
  707. self._xml.write(key, pretty_print=self._prettyprint)
  708. # The incremental_writer writes each node/edge as it is created
  709. incremental_writer = IncrementalElement(self._xml, self._prettyprint)
  710. with graph_element:
  711. self.add_attributes("graph", incremental_writer, graphdata, {})
  712. self.add_nodes(G, incremental_writer) # adds attributes too
  713. self.add_edges(G, incremental_writer) # adds attributes too
  714. def add_attributes(self, scope, xml_obj, data, default):
  715. """Appends attribute data."""
  716. for k, v in data.items():
  717. data_element = self.add_data(
  718. str(k), self.attr_type(str(k), scope, v), str(v), scope, default.get(k)
  719. )
  720. xml_obj.append(data_element)
  721. def __str__(self):
  722. return object.__str__(self)
  723. def dump(self):
  724. self._graphml.__exit__(None, None, None)
  725. self._xml_base.__exit__(None, None, None)
  726. # default is lxml is present.
  727. write_graphml = write_graphml_lxml
  728. class GraphMLReader(GraphML):
  729. """Read a GraphML document. Produces NetworkX graph objects."""
  730. def __init__(self, node_type=str, edge_key_type=int, force_multigraph=False):
  731. self.construct_types()
  732. self.node_type = node_type
  733. self.edge_key_type = edge_key_type
  734. self.multigraph = force_multigraph # If False, test for multiedges
  735. self.edge_ids = {} # dict mapping (u,v) tuples to edge id attributes
  736. def __call__(self, path=None, string=None):
  737. from xml.etree.ElementTree import ElementTree, fromstring
  738. if path is not None:
  739. self.xml = ElementTree(file=path)
  740. elif string is not None:
  741. self.xml = fromstring(string)
  742. else:
  743. raise ValueError("Must specify either 'path' or 'string' as kwarg")
  744. (keys, defaults) = self.find_graphml_keys(self.xml)
  745. for g in self.xml.findall(f"{{{self.NS_GRAPHML}}}graph"):
  746. yield self.make_graph(g, keys, defaults)
  747. def make_graph(self, graph_xml, graphml_keys, defaults, G=None):
  748. # set default graph type
  749. edgedefault = graph_xml.get("edgedefault", None)
  750. if G is None:
  751. if edgedefault == "directed":
  752. G = nx.MultiDiGraph()
  753. else:
  754. G = nx.MultiGraph()
  755. # set defaults for graph attributes
  756. G.graph["node_default"] = {}
  757. G.graph["edge_default"] = {}
  758. for key_id, value in defaults.items():
  759. key_for = graphml_keys[key_id]["for"]
  760. name = graphml_keys[key_id]["name"]
  761. python_type = graphml_keys[key_id]["type"]
  762. if key_for == "node":
  763. G.graph["node_default"].update({name: python_type(value)})
  764. if key_for == "edge":
  765. G.graph["edge_default"].update({name: python_type(value)})
  766. # hyperedges are not supported
  767. hyperedge = graph_xml.find(f"{{{self.NS_GRAPHML}}}hyperedge")
  768. if hyperedge is not None:
  769. raise nx.NetworkXError("GraphML reader doesn't support hyperedges")
  770. # add nodes
  771. for node_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}node"):
  772. self.add_node(G, node_xml, graphml_keys, defaults)
  773. # add edges
  774. for edge_xml in graph_xml.findall(f"{{{self.NS_GRAPHML}}}edge"):
  775. self.add_edge(G, edge_xml, graphml_keys)
  776. # add graph data
  777. data = self.decode_data_elements(graphml_keys, graph_xml)
  778. G.graph.update(data)
  779. # switch to Graph or DiGraph if no parallel edges were found
  780. if self.multigraph:
  781. return G
  782. G = nx.DiGraph(G) if G.is_directed() else nx.Graph(G)
  783. # add explicit edge "id" from file as attribute in NX graph.
  784. nx.set_edge_attributes(G, values=self.edge_ids, name="id")
  785. return G
  786. def add_node(self, G, node_xml, graphml_keys, defaults):
  787. """Add a node to the graph."""
  788. # warn on finding unsupported ports tag
  789. ports = node_xml.find(f"{{{self.NS_GRAPHML}}}port")
  790. if ports is not None:
  791. warnings.warn("GraphML port tag not supported.")
  792. # find the node by id and cast it to the appropriate type
  793. node_id = self.node_type(node_xml.get("id"))
  794. # get data/attributes for node
  795. data = self.decode_data_elements(graphml_keys, node_xml)
  796. G.add_node(node_id, **data)
  797. # get child nodes
  798. if node_xml.attrib.get("yfiles.foldertype") == "group":
  799. graph_xml = node_xml.find(f"{{{self.NS_GRAPHML}}}graph")
  800. self.make_graph(graph_xml, graphml_keys, defaults, G)
  801. def add_edge(self, G, edge_element, graphml_keys):
  802. """Add an edge to the graph."""
  803. # warn on finding unsupported ports tag
  804. ports = edge_element.find(f"{{{self.NS_GRAPHML}}}port")
  805. if ports is not None:
  806. warnings.warn("GraphML port tag not supported.")
  807. # raise error if we find mixed directed and undirected edges
  808. directed = edge_element.get("directed")
  809. if G.is_directed() and directed == "false":
  810. msg = "directed=false edge found in directed graph."
  811. raise nx.NetworkXError(msg)
  812. if (not G.is_directed()) and directed == "true":
  813. msg = "directed=true edge found in undirected graph."
  814. raise nx.NetworkXError(msg)
  815. source = self.node_type(edge_element.get("source"))
  816. target = self.node_type(edge_element.get("target"))
  817. data = self.decode_data_elements(graphml_keys, edge_element)
  818. # GraphML stores edge ids as an attribute
  819. # NetworkX uses them as keys in multigraphs too if no key
  820. # attribute is specified
  821. edge_id = edge_element.get("id")
  822. if edge_id:
  823. # self.edge_ids is used by `make_graph` method for non-multigraphs
  824. self.edge_ids[source, target] = edge_id
  825. try:
  826. edge_id = self.edge_key_type(edge_id)
  827. except ValueError: # Could not convert.
  828. pass
  829. else:
  830. edge_id = data.get("key")
  831. if G.has_edge(source, target):
  832. # mark this as a multigraph
  833. self.multigraph = True
  834. # Use add_edges_from to avoid error with add_edge when `'key' in data`
  835. # Note there is only one edge here...
  836. G.add_edges_from([(source, target, edge_id, data)])
  837. def decode_data_elements(self, graphml_keys, obj_xml):
  838. """Use the key information to decode the data XML if present."""
  839. data = {}
  840. for data_element in obj_xml.findall(f"{{{self.NS_GRAPHML}}}data"):
  841. key = data_element.get("key")
  842. try:
  843. data_name = graphml_keys[key]["name"]
  844. data_type = graphml_keys[key]["type"]
  845. except KeyError as err:
  846. raise nx.NetworkXError(f"Bad GraphML data: no key {key}") from err
  847. text = data_element.text
  848. # assume anything with subelements is a yfiles extension
  849. if text is not None and len(list(data_element)) == 0:
  850. if data_type == bool:
  851. # Ignore cases.
  852. # http://docs.oracle.com/javase/6/docs/api/java/lang/
  853. # Boolean.html#parseBoolean%28java.lang.String%29
  854. data[data_name] = self.convert_bool[text.lower()]
  855. else:
  856. data[data_name] = data_type(text)
  857. elif len(list(data_element)) > 0:
  858. # Assume yfiles as subelements, try to extract node_label
  859. node_label = None
  860. # set GenericNode's configuration as shape type
  861. gn = data_element.find(f"{{{self.NS_Y}}}GenericNode")
  862. if gn:
  863. data["shape_type"] = gn.get("configuration")
  864. for node_type in ["GenericNode", "ShapeNode", "SVGNode", "ImageNode"]:
  865. pref = f"{{{self.NS_Y}}}{node_type}/{{{self.NS_Y}}}"
  866. geometry = data_element.find(f"{pref}Geometry")
  867. if geometry is not None:
  868. data["x"] = geometry.get("x")
  869. data["y"] = geometry.get("y")
  870. if node_label is None:
  871. node_label = data_element.find(f"{pref}NodeLabel")
  872. shape = data_element.find(f"{pref}Shape")
  873. if shape is not None:
  874. data["shape_type"] = shape.get("type")
  875. if node_label is not None:
  876. data["label"] = node_label.text
  877. # check all the different types of edges avaivable in yEd.
  878. for edge_type in [
  879. "PolyLineEdge",
  880. "SplineEdge",
  881. "QuadCurveEdge",
  882. "BezierEdge",
  883. "ArcEdge",
  884. ]:
  885. pref = f"{{{self.NS_Y}}}{edge_type}/{{{self.NS_Y}}}"
  886. edge_label = data_element.find(f"{pref}EdgeLabel")
  887. if edge_label is not None:
  888. break
  889. if edge_label is not None:
  890. data["label"] = edge_label.text
  891. return data
  892. def find_graphml_keys(self, graph_element):
  893. """Extracts all the keys and key defaults from the xml."""
  894. graphml_keys = {}
  895. graphml_key_defaults = {}
  896. for k in graph_element.findall(f"{{{self.NS_GRAPHML}}}key"):
  897. attr_id = k.get("id")
  898. attr_type = k.get("attr.type")
  899. attr_name = k.get("attr.name")
  900. yfiles_type = k.get("yfiles.type")
  901. if yfiles_type is not None:
  902. attr_name = yfiles_type
  903. attr_type = "yfiles"
  904. if attr_type is None:
  905. attr_type = "string"
  906. warnings.warn(f"No key type for id {attr_id}. Using string")
  907. if attr_name is None:
  908. raise nx.NetworkXError(f"Unknown key for id {attr_id}.")
  909. graphml_keys[attr_id] = {
  910. "name": attr_name,
  911. "type": self.python_type[attr_type],
  912. "for": k.get("for"),
  913. }
  914. # check for "default" sub-element of key element
  915. default = k.find(f"{{{self.NS_GRAPHML}}}default")
  916. if default is not None:
  917. # Handle default values identically to data element values
  918. python_type = graphml_keys[attr_id]["type"]
  919. if python_type == bool:
  920. graphml_key_defaults[attr_id] = self.convert_bool[
  921. default.text.lower()
  922. ]
  923. else:
  924. graphml_key_defaults[attr_id] = python_type(default.text)
  925. return graphml_keys, graphml_key_defaults