123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462 |
- """
- Functions for constructing matrix-like objects from graph attributes.
- """
- __all__ = ["attr_matrix", "attr_sparse_matrix"]
- def _node_value(G, node_attr):
- """Returns a function that returns a value from G.nodes[u].
- We return a function expecting a node as its sole argument. Then, in the
- simplest scenario, the returned function will return G.nodes[u][node_attr].
- However, we also handle the case when `node_attr` is None or when it is a
- function itself.
- Parameters
- ----------
- G : graph
- A NetworkX graph
- node_attr : {None, str, callable}
- Specification of how the value of the node attribute should be obtained
- from the node attribute dictionary.
- Returns
- -------
- value : function
- A function expecting a node as its sole argument. The function will
- returns a value from G.nodes[u] that depends on `edge_attr`.
- """
- if node_attr is None:
- def value(u):
- return u
- elif not callable(node_attr):
- # assume it is a key for the node attribute dictionary
- def value(u):
- return G.nodes[u][node_attr]
- else:
- # Advanced: Allow users to specify something else.
- #
- # For example,
- # node_attr = lambda u: G.nodes[u].get('size', .5) * 3
- #
- value = node_attr
- return value
- def _edge_value(G, edge_attr):
- """Returns a function that returns a value from G[u][v].
- Suppose there exists an edge between u and v. Then we return a function
- expecting u and v as arguments. For Graph and DiGraph, G[u][v] is
- the edge attribute dictionary, and the function (essentially) returns
- G[u][v][edge_attr]. However, we also handle cases when `edge_attr` is None
- and when it is a function itself. For MultiGraph and MultiDiGraph, G[u][v]
- is a dictionary of all edges between u and v. In this case, the returned
- function sums the value of `edge_attr` for every edge between u and v.
- Parameters
- ----------
- G : graph
- A NetworkX graph
- edge_attr : {None, str, callable}
- Specification of how the value of the edge attribute should be obtained
- from the edge attribute dictionary, G[u][v]. For multigraphs, G[u][v]
- is a dictionary of all the edges between u and v. This allows for
- special treatment of multiedges.
- Returns
- -------
- value : function
- A function expecting two nodes as parameters. The nodes should
- represent the from- and to- node of an edge. The function will
- return a value from G[u][v] that depends on `edge_attr`.
- """
- if edge_attr is None:
- # topological count of edges
- if G.is_multigraph():
- def value(u, v):
- return len(G[u][v])
- else:
- def value(u, v):
- return 1
- elif not callable(edge_attr):
- # assume it is a key for the edge attribute dictionary
- if edge_attr == "weight":
- # provide a default value
- if G.is_multigraph():
- def value(u, v):
- return sum(d.get(edge_attr, 1) for d in G[u][v].values())
- else:
- def value(u, v):
- return G[u][v].get(edge_attr, 1)
- else:
- # otherwise, the edge attribute MUST exist for each edge
- if G.is_multigraph():
- def value(u, v):
- return sum(d[edge_attr] for d in G[u][v].values())
- else:
- def value(u, v):
- return G[u][v][edge_attr]
- else:
- # Advanced: Allow users to specify something else.
- #
- # Alternative default value:
- # edge_attr = lambda u,v: G[u][v].get('thickness', .5)
- #
- # Function on an attribute:
- # edge_attr = lambda u,v: abs(G[u][v]['weight'])
- #
- # Handle Multi(Di)Graphs differently:
- # edge_attr = lambda u,v: numpy.prod([d['size'] for d in G[u][v].values()])
- #
- # Ignore multiple edges
- # edge_attr = lambda u,v: 1 if len(G[u][v]) else 0
- #
- value = edge_attr
- return value
- def attr_matrix(
- G,
- edge_attr=None,
- node_attr=None,
- normalized=False,
- rc_order=None,
- dtype=None,
- order=None,
- ):
- """Returns the attribute matrix using attributes from `G` as a numpy array.
- If only `G` is passed in, then the adjacency matrix is constructed.
- Let A be a discrete set of values for the node attribute `node_attr`. Then
- the elements of A represent the rows and columns of the constructed matrix.
- Now, iterate through every edge e=(u,v) in `G` and consider the value
- of the edge attribute `edge_attr`. If ua and va are the values of the
- node attribute `node_attr` for u and v, respectively, then the value of
- the edge attribute is added to the matrix element at (ua, va).
- Parameters
- ----------
- G : graph
- The NetworkX graph used to construct the attribute matrix.
- edge_attr : str, optional
- Each element of the matrix represents a running total of the
- specified edge attribute for edges whose node attributes correspond
- to the rows/cols of the matrix. The attribute must be present for
- all edges in the graph. If no attribute is specified, then we
- just count the number of edges whose node attributes correspond
- to the matrix element.
- node_attr : str, optional
- Each row and column in the matrix represents a particular value
- of the node attribute. The attribute must be present for all nodes
- in the graph. Note, the values of this attribute should be reliably
- hashable. So, float values are not recommended. If no attribute is
- specified, then the rows and columns will be the nodes of the graph.
- normalized : bool, optional
- If True, then each row is normalized by the summation of its values.
- rc_order : list, optional
- A list of the node attribute values. This list specifies the ordering
- of rows and columns of the array. If no ordering is provided, then
- the ordering will be random (and also, a return value).
- Other Parameters
- ----------------
- dtype : NumPy data-type, optional
- A valid NumPy dtype used to initialize the array. Keep in mind certain
- dtypes can yield unexpected results if the array is to be normalized.
- The parameter is passed to numpy.zeros(). If unspecified, the NumPy
- default is used.
- order : {'C', 'F'}, optional
- Whether to store multidimensional data in C- or Fortran-contiguous
- (row- or column-wise) order in memory. This parameter is passed to
- numpy.zeros(). If unspecified, the NumPy default is used.
- Returns
- -------
- M : 2D NumPy ndarray
- The attribute matrix.
- ordering : list
- If `rc_order` was specified, then only the attribute matrix is returned.
- However, if `rc_order` was None, then the ordering used to construct
- the matrix is returned as well.
- Examples
- --------
- Construct an adjacency matrix:
- >>> G = nx.Graph()
- >>> G.add_edge(0, 1, thickness=1, weight=3)
- >>> G.add_edge(0, 2, thickness=2)
- >>> G.add_edge(1, 2, thickness=3)
- >>> nx.attr_matrix(G, rc_order=[0, 1, 2])
- array([[0., 1., 1.],
- [1., 0., 1.],
- [1., 1., 0.]])
- Alternatively, we can obtain the matrix describing edge thickness.
- >>> nx.attr_matrix(G, edge_attr="thickness", rc_order=[0, 1, 2])
- array([[0., 1., 2.],
- [1., 0., 3.],
- [2., 3., 0.]])
- We can also color the nodes and ask for the probability distribution over
- all edges (u,v) describing:
- Pr(v has color Y | u has color X)
- >>> G.nodes[0]["color"] = "red"
- >>> G.nodes[1]["color"] = "red"
- >>> G.nodes[2]["color"] = "blue"
- >>> rc = ["red", "blue"]
- >>> nx.attr_matrix(G, node_attr="color", normalized=True, rc_order=rc)
- array([[0.33333333, 0.66666667],
- [1. , 0. ]])
- For example, the above tells us that for all edges (u,v):
- Pr( v is red | u is red) = 1/3
- Pr( v is blue | u is red) = 2/3
- Pr( v is red | u is blue) = 1
- Pr( v is blue | u is blue) = 0
- Finally, we can obtain the total weights listed by the node colors.
- >>> nx.attr_matrix(G, edge_attr="weight", node_attr="color", rc_order=rc)
- array([[3., 2.],
- [2., 0.]])
- Thus, the total weight over all edges (u,v) with u and v having colors:
- (red, red) is 3 # the sole contribution is from edge (0,1)
- (red, blue) is 2 # contributions from edges (0,2) and (1,2)
- (blue, red) is 2 # same as (red, blue) since graph is undirected
- (blue, blue) is 0 # there are no edges with blue endpoints
- """
- import numpy as np
- edge_value = _edge_value(G, edge_attr)
- node_value = _node_value(G, node_attr)
- if rc_order is None:
- ordering = list({node_value(n) for n in G})
- else:
- ordering = rc_order
- N = len(ordering)
- undirected = not G.is_directed()
- index = dict(zip(ordering, range(N)))
- M = np.zeros((N, N), dtype=dtype, order=order)
- seen = set()
- for u, nbrdict in G.adjacency():
- for v in nbrdict:
- # Obtain the node attribute values.
- i, j = index[node_value(u)], index[node_value(v)]
- if v not in seen:
- M[i, j] += edge_value(u, v)
- if undirected:
- M[j, i] = M[i, j]
- if undirected:
- seen.add(u)
- if normalized:
- M /= M.sum(axis=1).reshape((N, 1))
- if rc_order is None:
- return M, ordering
- else:
- return M
- def attr_sparse_matrix(
- G, edge_attr=None, node_attr=None, normalized=False, rc_order=None, dtype=None
- ):
- """Returns a SciPy sparse array using attributes from G.
- If only `G` is passed in, then the adjacency matrix is constructed.
- Let A be a discrete set of values for the node attribute `node_attr`. Then
- the elements of A represent the rows and columns of the constructed matrix.
- Now, iterate through every edge e=(u,v) in `G` and consider the value
- of the edge attribute `edge_attr`. If ua and va are the values of the
- node attribute `node_attr` for u and v, respectively, then the value of
- the edge attribute is added to the matrix element at (ua, va).
- Parameters
- ----------
- G : graph
- The NetworkX graph used to construct the NumPy matrix.
- edge_attr : str, optional
- Each element of the matrix represents a running total of the
- specified edge attribute for edges whose node attributes correspond
- to the rows/cols of the matrix. The attribute must be present for
- all edges in the graph. If no attribute is specified, then we
- just count the number of edges whose node attributes correspond
- to the matrix element.
- node_attr : str, optional
- Each row and column in the matrix represents a particular value
- of the node attribute. The attribute must be present for all nodes
- in the graph. Note, the values of this attribute should be reliably
- hashable. So, float values are not recommended. If no attribute is
- specified, then the rows and columns will be the nodes of the graph.
- normalized : bool, optional
- If True, then each row is normalized by the summation of its values.
- rc_order : list, optional
- A list of the node attribute values. This list specifies the ordering
- of rows and columns of the array. If no ordering is provided, then
- the ordering will be random (and also, a return value).
- Other Parameters
- ----------------
- dtype : NumPy data-type, optional
- A valid NumPy dtype used to initialize the array. Keep in mind certain
- dtypes can yield unexpected results if the array is to be normalized.
- The parameter is passed to numpy.zeros(). If unspecified, the NumPy
- default is used.
- Returns
- -------
- M : SciPy sparse array
- The attribute matrix.
- ordering : list
- If `rc_order` was specified, then only the matrix is returned.
- However, if `rc_order` was None, then the ordering used to construct
- the matrix is returned as well.
- Examples
- --------
- Construct an adjacency matrix:
- >>> G = nx.Graph()
- >>> G.add_edge(0, 1, thickness=1, weight=3)
- >>> G.add_edge(0, 2, thickness=2)
- >>> G.add_edge(1, 2, thickness=3)
- >>> M = nx.attr_sparse_matrix(G, rc_order=[0, 1, 2])
- >>> M.toarray()
- array([[0., 1., 1.],
- [1., 0., 1.],
- [1., 1., 0.]])
- Alternatively, we can obtain the matrix describing edge thickness.
- >>> M = nx.attr_sparse_matrix(G, edge_attr="thickness", rc_order=[0, 1, 2])
- >>> M.toarray()
- array([[0., 1., 2.],
- [1., 0., 3.],
- [2., 3., 0.]])
- We can also color the nodes and ask for the probability distribution over
- all edges (u,v) describing:
- Pr(v has color Y | u has color X)
- >>> G.nodes[0]["color"] = "red"
- >>> G.nodes[1]["color"] = "red"
- >>> G.nodes[2]["color"] = "blue"
- >>> rc = ["red", "blue"]
- >>> M = nx.attr_sparse_matrix(G, node_attr="color", normalized=True, rc_order=rc)
- >>> M.toarray()
- array([[0.33333333, 0.66666667],
- [1. , 0. ]])
- For example, the above tells us that for all edges (u,v):
- Pr( v is red | u is red) = 1/3
- Pr( v is blue | u is red) = 2/3
- Pr( v is red | u is blue) = 1
- Pr( v is blue | u is blue) = 0
- Finally, we can obtain the total weights listed by the node colors.
- >>> M = nx.attr_sparse_matrix(G, edge_attr="weight", node_attr="color", rc_order=rc)
- >>> M.toarray()
- array([[3., 2.],
- [2., 0.]])
- Thus, the total weight over all edges (u,v) with u and v having colors:
- (red, red) is 3 # the sole contribution is from edge (0,1)
- (red, blue) is 2 # contributions from edges (0,2) and (1,2)
- (blue, red) is 2 # same as (red, blue) since graph is undirected
- (blue, blue) is 0 # there are no edges with blue endpoints
- """
- import numpy as np
- import scipy as sp
- import scipy.sparse # call as sp.sparse
- edge_value = _edge_value(G, edge_attr)
- node_value = _node_value(G, node_attr)
- if rc_order is None:
- ordering = list({node_value(n) for n in G})
- else:
- ordering = rc_order
- N = len(ordering)
- undirected = not G.is_directed()
- index = dict(zip(ordering, range(N)))
- M = sp.sparse.lil_array((N, N), dtype=dtype)
- seen = set()
- for u, nbrdict in G.adjacency():
- for v in nbrdict:
- # Obtain the node attribute values.
- i, j = index[node_value(u)], index[node_value(v)]
- if v not in seen:
- M[i, j] += edge_value(u, v)
- if undirected:
- M[j, i] = M[i, j]
- if undirected:
- seen.add(u)
- if normalized:
- M *= 1 / M.sum(axis=1)[:, np.newaxis] # in-place mult preserves sparse
- if rc_order is None:
- return M, ordering
- else:
- return M
|