123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284 |
- """
- *****
- Pajek
- *****
- Read graphs in Pajek format.
- This implementation handles directed and undirected graphs including
- those with self loops and parallel edges.
- Format
- ------
- See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
- for format information.
- """
- import warnings
- import networkx as nx
- from networkx.utils import open_file
- __all__ = ["read_pajek", "parse_pajek", "generate_pajek", "write_pajek"]
- def generate_pajek(G):
- """Generate lines in Pajek graph format.
- Parameters
- ----------
- G : graph
- A Networkx graph
- References
- ----------
- See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
- for format information.
- """
- if G.name == "":
- name = "NetworkX"
- else:
- name = G.name
- # Apparently many Pajek format readers can't process this line
- # So we'll leave it out for now.
- # yield '*network %s'%name
- # write nodes with attributes
- yield f"*vertices {G.order()}"
- nodes = list(G)
- # make dictionary mapping nodes to integers
- nodenumber = dict(zip(nodes, range(1, len(nodes) + 1)))
- for n in nodes:
- # copy node attributes and pop mandatory attributes
- # to avoid duplication.
- na = G.nodes.get(n, {}).copy()
- x = na.pop("x", 0.0)
- y = na.pop("y", 0.0)
- try:
- id = int(na.pop("id", nodenumber[n]))
- except ValueError as err:
- err.args += (
- (
- "Pajek format requires 'id' to be an int()."
- " Refer to the 'Relabeling nodes' section."
- ),
- )
- raise
- nodenumber[n] = id
- shape = na.pop("shape", "ellipse")
- s = " ".join(map(make_qstr, (id, n, x, y, shape)))
- # only optional attributes are left in na.
- for k, v in na.items():
- if isinstance(v, str) and v.strip() != "":
- s += f" {make_qstr(k)} {make_qstr(v)}"
- else:
- warnings.warn(
- f"Node attribute {k} is not processed. {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}."
- )
- yield s
- # write edges with attributes
- if G.is_directed():
- yield "*arcs"
- else:
- yield "*edges"
- for u, v, edgedata in G.edges(data=True):
- d = edgedata.copy()
- value = d.pop("weight", 1.0) # use 1 as default edge value
- s = " ".join(map(make_qstr, (nodenumber[u], nodenumber[v], value)))
- for k, v in d.items():
- if isinstance(v, str) and v.strip() != "":
- s += f" {make_qstr(k)} {make_qstr(v)}"
- else:
- warnings.warn(
- f"Edge attribute {k} is not processed. {('Empty attribute' if isinstance(v, str) else 'Non-string attribute')}."
- )
- yield s
- @open_file(1, mode="wb")
- def write_pajek(G, path, encoding="UTF-8"):
- """Write graph in Pajek format to path.
- Parameters
- ----------
- G : graph
- A Networkx graph
- path : file or string
- File or filename to write.
- Filenames ending in .gz or .bz2 will be compressed.
- Examples
- --------
- >>> G = nx.path_graph(4)
- >>> nx.write_pajek(G, "test.net")
- Warnings
- --------
- Optional node attributes and edge attributes must be non-empty strings.
- Otherwise it will not be written into the file. You will need to
- convert those attributes to strings if you want to keep them.
- References
- ----------
- See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
- for format information.
- """
- for line in generate_pajek(G):
- line += "\n"
- path.write(line.encode(encoding))
- @open_file(0, mode="rb")
- def read_pajek(path, encoding="UTF-8"):
- """Read graph in Pajek format from path.
- Parameters
- ----------
- path : file or string
- File or filename to write.
- Filenames ending in .gz or .bz2 will be uncompressed.
- Returns
- -------
- G : NetworkX MultiGraph or MultiDiGraph.
- Examples
- --------
- >>> G = nx.path_graph(4)
- >>> nx.write_pajek(G, "test.net")
- >>> G = nx.read_pajek("test.net")
- To create a Graph instead of a MultiGraph use
- >>> G1 = nx.Graph(G)
- References
- ----------
- See http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/draweps.htm
- for format information.
- """
- lines = (line.decode(encoding) for line in path)
- return parse_pajek(lines)
- def parse_pajek(lines):
- """Parse Pajek format graph from string or iterable.
- Parameters
- ----------
- lines : string or iterable
- Data in Pajek format.
- Returns
- -------
- G : NetworkX graph
- See Also
- --------
- read_pajek
- """
- import shlex
- # multigraph=False
- if isinstance(lines, str):
- lines = iter(lines.split("\n"))
- lines = iter([line.rstrip("\n") for line in lines])
- G = nx.MultiDiGraph() # are multiedges allowed in Pajek? assume yes
- labels = [] # in the order of the file, needed for matrix
- while lines:
- try:
- l = next(lines)
- except: # EOF
- break
- if l.lower().startswith("*network"):
- try:
- label, name = l.split(None, 1)
- except ValueError:
- # Line was not of the form: *network NAME
- pass
- else:
- G.graph["name"] = name
- elif l.lower().startswith("*vertices"):
- nodelabels = {}
- l, nnodes = l.split()
- for i in range(int(nnodes)):
- l = next(lines)
- try:
- splitline = [
- x.decode("utf-8") for x in shlex.split(str(l).encode("utf-8"))
- ]
- except AttributeError:
- splitline = shlex.split(str(l))
- id, label = splitline[0:2]
- labels.append(label)
- G.add_node(label)
- nodelabels[id] = label
- G.nodes[label]["id"] = id
- try:
- x, y, shape = splitline[2:5]
- G.nodes[label].update(
- {"x": float(x), "y": float(y), "shape": shape}
- )
- except:
- pass
- extra_attr = zip(splitline[5::2], splitline[6::2])
- G.nodes[label].update(extra_attr)
- elif l.lower().startswith("*edges") or l.lower().startswith("*arcs"):
- if l.lower().startswith("*edge"):
- # switch from multidigraph to multigraph
- G = nx.MultiGraph(G)
- if l.lower().startswith("*arcs"):
- # switch to directed with multiple arcs for each existing edge
- G = G.to_directed()
- for l in lines:
- try:
- splitline = [
- x.decode("utf-8") for x in shlex.split(str(l).encode("utf-8"))
- ]
- except AttributeError:
- splitline = shlex.split(str(l))
- if len(splitline) < 2:
- continue
- ui, vi = splitline[0:2]
- u = nodelabels.get(ui, ui)
- v = nodelabels.get(vi, vi)
- # parse the data attached to this edge and put in a dictionary
- edge_data = {}
- try:
- # there should always be a single value on the edge?
- w = splitline[2:3]
- edge_data.update({"weight": float(w[0])})
- except:
- pass
- # if there isn't, just assign a 1
- # edge_data.update({'value':1})
- extra_attr = zip(splitline[3::2], splitline[4::2])
- edge_data.update(extra_attr)
- # if G.has_edge(u,v):
- # multigraph=True
- G.add_edge(u, v, **edge_data)
- elif l.lower().startswith("*matrix"):
- G = nx.DiGraph(G)
- adj_list = (
- (labels[row], labels[col], {"weight": int(data)})
- for (row, line) in enumerate(lines)
- for (col, data) in enumerate(line.split())
- if int(data) != 0
- )
- G.add_edges_from(adj_list)
- return G
- def make_qstr(t):
- """Returns the string representation of t.
- Add outer double-quotes if the string has a space.
- """
- if not isinstance(t, str):
- t = str(t)
- if " " in t:
- t = f'"{t}"'
- return t
|