123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335 |
- """Hubs and authorities analysis of graph structure.
- """
- import networkx as nx
- __all__ = ["hits"]
- @nx._dispatch
- def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
- """Returns HITS hubs and authorities values for nodes.
- The HITS algorithm computes two numbers for a node.
- Authorities estimates the node value based on the incoming links.
- Hubs estimates the node value based on outgoing links.
- Parameters
- ----------
- G : graph
- A NetworkX graph
- max_iter : integer, optional
- Maximum number of iterations in power method.
- tol : float, optional
- Error tolerance used to check convergence in power method iteration.
- nstart : dictionary, optional
- Starting value of each node for power method iteration.
- normalized : bool (default=True)
- Normalize results by the sum of all of the values.
- Returns
- -------
- (hubs,authorities) : two-tuple of dictionaries
- Two dictionaries keyed by node containing the hub and authority
- values.
- Raises
- ------
- PowerIterationFailedConvergence
- If the algorithm fails to converge to the specified tolerance
- within the specified number of iterations of the power iteration
- method.
- Examples
- --------
- >>> G = nx.path_graph(4)
- >>> h, a = nx.hits(G)
- Notes
- -----
- The eigenvector calculation is done by the power iteration method
- and has no guarantee of convergence. The iteration will stop
- after max_iter iterations or an error tolerance of
- number_of_nodes(G)*tol has been reached.
- The HITS algorithm was designed for directed graphs but this
- algorithm does not check if the input graph is directed and will
- execute on undirected graphs.
- References
- ----------
- .. [1] A. Langville and C. Meyer,
- "A survey of eigenvector methods of web information retrieval."
- http://citeseer.ist.psu.edu/713792.html
- .. [2] Jon Kleinberg,
- Authoritative sources in a hyperlinked environment
- Journal of the ACM 46 (5): 604-32, 1999.
- doi:10.1145/324133.324140.
- http://www.cs.cornell.edu/home/kleinber/auth.pdf.
- """
- import numpy as np
- import scipy as sp
- import scipy.sparse.linalg # call as sp.sparse.linalg
- if len(G) == 0:
- return {}, {}
- A = nx.adjacency_matrix(G, nodelist=list(G), dtype=float)
- if nstart is None:
- _, _, vt = sp.sparse.linalg.svds(A, k=1, maxiter=max_iter, tol=tol)
- else:
- nstart = np.array(list(nstart.values()))
- _, _, vt = sp.sparse.linalg.svds(A, k=1, v0=nstart, maxiter=max_iter, tol=tol)
- a = vt.flatten().real
- h = A @ a
- if normalized:
- h /= h.sum()
- a /= a.sum()
- hubs = dict(zip(G, map(float, h)))
- authorities = dict(zip(G, map(float, a)))
- return hubs, authorities
- def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True):
- if isinstance(G, (nx.MultiGraph, nx.MultiDiGraph)):
- raise Exception("hits() not defined for graphs with multiedges.")
- if len(G) == 0:
- return {}, {}
- # choose fixed starting vector if not given
- if nstart is None:
- h = dict.fromkeys(G, 1.0 / G.number_of_nodes())
- else:
- h = nstart
- # normalize starting vector
- s = 1.0 / sum(h.values())
- for k in h:
- h[k] *= s
- for _ in range(max_iter): # power iteration: make up to max_iter iterations
- hlast = h
- h = dict.fromkeys(hlast.keys(), 0)
- a = dict.fromkeys(hlast.keys(), 0)
- # this "matrix multiply" looks odd because it is
- # doing a left multiply a^T=hlast^T*G
- for n in h:
- for nbr in G[n]:
- a[nbr] += hlast[n] * G[n][nbr].get("weight", 1)
- # now multiply h=Ga
- for n in h:
- for nbr in G[n]:
- h[n] += a[nbr] * G[n][nbr].get("weight", 1)
- # normalize vector
- s = 1.0 / max(h.values())
- for n in h:
- h[n] *= s
- # normalize vector
- s = 1.0 / max(a.values())
- for n in a:
- a[n] *= s
- # check convergence, l1 norm
- err = sum(abs(h[n] - hlast[n]) for n in h)
- if err < tol:
- break
- else:
- raise nx.PowerIterationFailedConvergence(max_iter)
- if normalized:
- s = 1.0 / sum(a.values())
- for n in a:
- a[n] *= s
- s = 1.0 / sum(h.values())
- for n in h:
- h[n] *= s
- return h, a
- def _hits_numpy(G, normalized=True):
- """Returns HITS hubs and authorities values for nodes.
- The HITS algorithm computes two numbers for a node.
- Authorities estimates the node value based on the incoming links.
- Hubs estimates the node value based on outgoing links.
- Parameters
- ----------
- G : graph
- A NetworkX graph
- normalized : bool (default=True)
- Normalize results by the sum of all of the values.
- Returns
- -------
- (hubs,authorities) : two-tuple of dictionaries
- Two dictionaries keyed by node containing the hub and authority
- values.
- Examples
- --------
- >>> G = nx.path_graph(4)
- The `hubs` and `authorities` are given by the eigenvectors corresponding to the
- maximum eigenvalues of the hubs_matrix and the authority_matrix, respectively.
- The ``hubs`` and ``authority`` matrices are computed from the adjancency
- matrix:
- >>> adj_ary = nx.to_numpy_array(G)
- >>> hubs_matrix = adj_ary @ adj_ary.T
- >>> authority_matrix = adj_ary.T @ adj_ary
- `_hits_numpy` maps the eigenvector corresponding to the maximum eigenvalue
- of the respective matrices to the nodes in `G`:
- >>> from networkx.algorithms.link_analysis.hits_alg import _hits_numpy
- >>> hubs, authority = _hits_numpy(G)
- Notes
- -----
- The eigenvector calculation uses NumPy's interface to LAPACK.
- The HITS algorithm was designed for directed graphs but this
- algorithm does not check if the input graph is directed and will
- execute on undirected graphs.
- References
- ----------
- .. [1] A. Langville and C. Meyer,
- "A survey of eigenvector methods of web information retrieval."
- http://citeseer.ist.psu.edu/713792.html
- .. [2] Jon Kleinberg,
- Authoritative sources in a hyperlinked environment
- Journal of the ACM 46 (5): 604-32, 1999.
- doi:10.1145/324133.324140.
- http://www.cs.cornell.edu/home/kleinber/auth.pdf.
- """
- import numpy as np
- if len(G) == 0:
- return {}, {}
- adj_ary = nx.to_numpy_array(G)
- # Hub matrix
- H = adj_ary @ adj_ary.T
- e, ev = np.linalg.eig(H)
- h = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue
- # Authority matrix
- A = adj_ary.T @ adj_ary
- e, ev = np.linalg.eig(A)
- a = ev[:, np.argmax(e)] # eigenvector corresponding to the maximum eigenvalue
- if normalized:
- h /= h.sum()
- a /= a.sum()
- else:
- h /= h.max()
- a /= a.max()
- hubs = dict(zip(G, map(float, h)))
- authorities = dict(zip(G, map(float, a)))
- return hubs, authorities
- def _hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True):
- """Returns HITS hubs and authorities values for nodes.
- The HITS algorithm computes two numbers for a node.
- Authorities estimates the node value based on the incoming links.
- Hubs estimates the node value based on outgoing links.
- Parameters
- ----------
- G : graph
- A NetworkX graph
- max_iter : integer, optional
- Maximum number of iterations in power method.
- tol : float, optional
- Error tolerance used to check convergence in power method iteration.
- nstart : dictionary, optional
- Starting value of each node for power method iteration.
- normalized : bool (default=True)
- Normalize results by the sum of all of the values.
- Returns
- -------
- (hubs,authorities) : two-tuple of dictionaries
- Two dictionaries keyed by node containing the hub and authority
- values.
- Examples
- --------
- >>> from networkx.algorithms.link_analysis.hits_alg import _hits_scipy
- >>> G = nx.path_graph(4)
- >>> h, a = _hits_scipy(G)
- Notes
- -----
- This implementation uses SciPy sparse matrices.
- The eigenvector calculation is done by the power iteration method
- and has no guarantee of convergence. The iteration will stop
- after max_iter iterations or an error tolerance of
- number_of_nodes(G)*tol has been reached.
- The HITS algorithm was designed for directed graphs but this
- algorithm does not check if the input graph is directed and will
- execute on undirected graphs.
- Raises
- ------
- PowerIterationFailedConvergence
- If the algorithm fails to converge to the specified tolerance
- within the specified number of iterations of the power iteration
- method.
- References
- ----------
- .. [1] A. Langville and C. Meyer,
- "A survey of eigenvector methods of web information retrieval."
- http://citeseer.ist.psu.edu/713792.html
- .. [2] Jon Kleinberg,
- Authoritative sources in a hyperlinked environment
- Journal of the ACM 46 (5): 604-632, 1999.
- doi:10.1145/324133.324140.
- http://www.cs.cornell.edu/home/kleinber/auth.pdf.
- """
- import numpy as np
- if len(G) == 0:
- return {}, {}
- A = nx.to_scipy_sparse_array(G, nodelist=list(G))
- (n, _) = A.shape # should be square
- ATA = A.T @ A # authority matrix
- # choose fixed starting vector if not given
- if nstart is None:
- x = np.ones((n, 1)) / n
- else:
- x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float)
- x /= x.sum()
- # power iteration on authority matrix
- i = 0
- while True:
- xlast = x
- x = ATA @ x
- x /= x.max()
- # check convergence, l1 norm
- err = np.absolute(x - xlast).sum()
- if err < tol:
- break
- if i > max_iter:
- raise nx.PowerIterationFailedConvergence(max_iter)
- i += 1
- a = x.flatten()
- h = A @ a
- if normalized:
- h /= h.sum()
- a /= a.sum()
- hubs = dict(zip(G, map(float, h)))
- authorities = dict(zip(G, map(float, a)))
- return hubs, authorities
|