123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500 |
- """PageRank analysis of graph structure. """
- from warnings import warn
- import networkx as nx
- __all__ = ["pagerank", "google_matrix"]
- @nx._dispatch
- def pagerank(
- G,
- alpha=0.85,
- personalization=None,
- max_iter=100,
- tol=1.0e-6,
- nstart=None,
- weight="weight",
- dangling=None,
- ):
- """Returns the PageRank of the nodes in the graph.
- PageRank computes a ranking of the nodes in the graph G based on
- the structure of the incoming links. It was originally designed as
- an algorithm to rank web pages.
- Parameters
- ----------
- G : graph
- A NetworkX graph. Undirected graphs will be converted to a directed
- graph with two directed edges for each undirected edge.
- alpha : float, optional
- Damping parameter for PageRank, default=0.85.
- personalization: dict, optional
- The "personalization vector" consisting of a dictionary with a
- key some subset of graph nodes and personalization value each of those.
- At least one personalization value must be non-zero.
- If not specified, a nodes personalization value will be zero.
- By default, a uniform distribution is used.
- max_iter : integer, optional
- Maximum number of iterations in power method eigenvalue solver.
- tol : float, optional
- Error tolerance used to check convergence in power method solver.
- The iteration will stop after a tolerance of ``len(G) * tol`` is reached.
- nstart : dictionary, optional
- Starting value of PageRank iteration for each node.
- weight : key, optional
- Edge data key to use as weight. If None weights are set to 1.
- dangling: dict, optional
- The outedges to be assigned to any "dangling" nodes, i.e., nodes without
- any outedges. The dict key is the node the outedge points to and the dict
- value is the weight of that outedge. By default, dangling nodes are given
- outedges according to the personalization vector (uniform if not
- specified). This must be selected to result in an irreducible transition
- matrix (see notes under google_matrix). It may be common to have the
- dangling dict to be the same as the personalization dict.
- Returns
- -------
- pagerank : dictionary
- Dictionary of nodes with PageRank as value
- Examples
- --------
- >>> G = nx.DiGraph(nx.path_graph(4))
- >>> pr = nx.pagerank(G, alpha=0.9)
- Notes
- -----
- The eigenvector calculation is done by the power iteration method
- and has no guarantee of convergence. The iteration will stop after
- an error tolerance of ``len(G) * tol`` has been reached. If the
- number of iterations exceed `max_iter`, a
- :exc:`networkx.exception.PowerIterationFailedConvergence` exception
- is raised.
- The PageRank algorithm was designed for directed graphs but this
- algorithm does not check if the input graph is directed and will
- execute on undirected graphs by converting each edge in the
- directed graph to two edges.
- See Also
- --------
- google_matrix
- Raises
- ------
- PowerIterationFailedConvergence
- If the algorithm fails to converge to the specified tolerance
- within the specified number of iterations of the power iteration
- method.
- References
- ----------
- .. [1] A. Langville and C. Meyer,
- "A survey of eigenvector methods of web information retrieval."
- http://citeseer.ist.psu.edu/713792.html
- .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
- The PageRank citation ranking: Bringing order to the Web. 1999
- http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
- """
- return _pagerank_scipy(
- G, alpha, personalization, max_iter, tol, nstart, weight, dangling
- )
- def _pagerank_python(
- G,
- alpha=0.85,
- personalization=None,
- max_iter=100,
- tol=1.0e-6,
- nstart=None,
- weight="weight",
- dangling=None,
- ):
- if len(G) == 0:
- return {}
- D = G.to_directed()
-
- W = nx.stochastic_graph(D, weight=weight)
- N = W.number_of_nodes()
-
- if nstart is None:
- x = dict.fromkeys(W, 1.0 / N)
- else:
-
- s = sum(nstart.values())
- x = {k: v / s for k, v in nstart.items()}
- if personalization is None:
-
- p = dict.fromkeys(W, 1.0 / N)
- else:
- s = sum(personalization.values())
- p = {k: v / s for k, v in personalization.items()}
- if dangling is None:
-
- dangling_weights = p
- else:
- s = sum(dangling.values())
- dangling_weights = {k: v / s for k, v in dangling.items()}
- dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
-
- for _ in range(max_iter):
- xlast = x
- x = dict.fromkeys(xlast.keys(), 0)
- danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
- for n in x:
-
-
- for _, nbr, wt in W.edges(n, data=weight):
- x[nbr] += alpha * xlast[n] * wt
- x[n] += danglesum * dangling_weights.get(n, 0) + (1.0 - alpha) * p.get(n, 0)
-
- err = sum(abs(x[n] - xlast[n]) for n in x)
- if err < N * tol:
- return x
- raise nx.PowerIterationFailedConvergence(max_iter)
- @nx._dispatch
- def google_matrix(
- G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None
- ):
- """Returns the Google matrix of the graph.
- Parameters
- ----------
- G : graph
- A NetworkX graph. Undirected graphs will be converted to a directed
- graph with two directed edges for each undirected edge.
- alpha : float
- The damping factor.
- personalization: dict, optional
- The "personalization vector" consisting of a dictionary with a
- key some subset of graph nodes and personalization value each of those.
- At least one personalization value must be non-zero.
- If not specified, a nodes personalization value will be zero.
- By default, a uniform distribution is used.
- nodelist : list, optional
- The rows and columns are ordered according to the nodes in nodelist.
- If nodelist is None, then the ordering is produced by G.nodes().
- weight : key, optional
- Edge data key to use as weight. If None weights are set to 1.
- dangling: dict, optional
- The outedges to be assigned to any "dangling" nodes, i.e., nodes without
- any outedges. The dict key is the node the outedge points to and the dict
- value is the weight of that outedge. By default, dangling nodes are given
- outedges according to the personalization vector (uniform if not
- specified) This must be selected to result in an irreducible transition
- matrix (see notes below). It may be common to have the dangling dict to
- be the same as the personalization dict.
- Returns
- -------
- A : 2D NumPy ndarray
- Google matrix of the graph
- Notes
- -----
- The array returned represents the transition matrix that describes the
- Markov chain used in PageRank. For PageRank to converge to a unique
- solution (i.e., a unique stationary distribution in a Markov chain), the
- transition matrix must be irreducible. In other words, it must be that
- there exists a path between every pair of nodes in the graph, or else there
- is the potential of "rank sinks."
- This implementation works with Multi(Di)Graphs. For multigraphs the
- weight between two nodes is set to be the sum of all edge weights
- between those nodes.
- See Also
- --------
- pagerank
- """
- import numpy as np
- if nodelist is None:
- nodelist = list(G)
- A = nx.to_numpy_array(G, nodelist=nodelist, weight=weight)
- N = len(G)
- if N == 0:
- return A
-
- if personalization is None:
- p = np.repeat(1.0 / N, N)
- else:
- p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
- if p.sum() == 0:
- raise ZeroDivisionError
- p /= p.sum()
-
- if dangling is None:
- dangling_weights = p
- else:
-
- dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
- dangling_weights /= dangling_weights.sum()
- dangling_nodes = np.where(A.sum(axis=1) == 0)[0]
-
- A[dangling_nodes] = dangling_weights
- A /= A.sum(axis=1)[:, np.newaxis]
- return alpha * A + (1 - alpha) * p
- def _pagerank_numpy(
- G, alpha=0.85, personalization=None, weight="weight", dangling=None
- ):
- """Returns the PageRank of the nodes in the graph.
- PageRank computes a ranking of the nodes in the graph G based on
- the structure of the incoming links. It was originally designed as
- an algorithm to rank web pages.
- Parameters
- ----------
- G : graph
- A NetworkX graph. Undirected graphs will be converted to a directed
- graph with two directed edges for each undirected edge.
- alpha : float, optional
- Damping parameter for PageRank, default=0.85.
- personalization: dict, optional
- The "personalization vector" consisting of a dictionary with a
- key some subset of graph nodes and personalization value each of those.
- At least one personalization value must be non-zero.
- If not specified, a nodes personalization value will be zero.
- By default, a uniform distribution is used.
- weight : key, optional
- Edge data key to use as weight. If None weights are set to 1.
- dangling: dict, optional
- The outedges to be assigned to any "dangling" nodes, i.e., nodes without
- any outedges. The dict key is the node the outedge points to and the dict
- value is the weight of that outedge. By default, dangling nodes are given
- outedges according to the personalization vector (uniform if not
- specified) This must be selected to result in an irreducible transition
- matrix (see notes under google_matrix). It may be common to have the
- dangling dict to be the same as the personalization dict.
- Returns
- -------
- pagerank : dictionary
- Dictionary of nodes with PageRank as value.
- Examples
- --------
- >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_numpy
- >>> G = nx.DiGraph(nx.path_graph(4))
- >>> pr = _pagerank_numpy(G, alpha=0.9)
- Notes
- -----
- The eigenvector calculation uses NumPy's interface to the LAPACK
- eigenvalue solvers. This will be the fastest and most accurate
- for small graphs.
- This implementation works with Multi(Di)Graphs. For multigraphs the
- weight between two nodes is set to be the sum of all edge weights
- between those nodes.
- See Also
- --------
- pagerank, google_matrix
- References
- ----------
- .. [1] A. Langville and C. Meyer,
- "A survey of eigenvector methods of web information retrieval."
- http://citeseer.ist.psu.edu/713792.html
- .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
- The PageRank citation ranking: Bringing order to the Web. 1999
- http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
- """
- import numpy as np
- if len(G) == 0:
- return {}
- M = google_matrix(
- G, alpha, personalization=personalization, weight=weight, dangling=dangling
- )
-
- eigenvalues, eigenvectors = np.linalg.eig(M.T)
- ind = np.argmax(eigenvalues)
-
- largest = np.array(eigenvectors[:, ind]).flatten().real
- norm = largest.sum()
- return dict(zip(G, map(float, largest / norm)))
- def _pagerank_scipy(
- G,
- alpha=0.85,
- personalization=None,
- max_iter=100,
- tol=1.0e-6,
- nstart=None,
- weight="weight",
- dangling=None,
- ):
- """Returns the PageRank of the nodes in the graph.
- PageRank computes a ranking of the nodes in the graph G based on
- the structure of the incoming links. It was originally designed as
- an algorithm to rank web pages.
- Parameters
- ----------
- G : graph
- A NetworkX graph. Undirected graphs will be converted to a directed
- graph with two directed edges for each undirected edge.
- alpha : float, optional
- Damping parameter for PageRank, default=0.85.
- personalization: dict, optional
- The "personalization vector" consisting of a dictionary with a
- key some subset of graph nodes and personalization value each of those.
- At least one personalization value must be non-zero.
- If not specified, a nodes personalization value will be zero.
- By default, a uniform distribution is used.
- max_iter : integer, optional
- Maximum number of iterations in power method eigenvalue solver.
- tol : float, optional
- Error tolerance used to check convergence in power method solver.
- The iteration will stop after a tolerance of ``len(G) * tol`` is reached.
- nstart : dictionary, optional
- Starting value of PageRank iteration for each node.
- weight : key, optional
- Edge data key to use as weight. If None weights are set to 1.
- dangling: dict, optional
- The outedges to be assigned to any "dangling" nodes, i.e., nodes without
- any outedges. The dict key is the node the outedge points to and the dict
- value is the weight of that outedge. By default, dangling nodes are given
- outedges according to the personalization vector (uniform if not
- specified) This must be selected to result in an irreducible transition
- matrix (see notes under google_matrix). It may be common to have the
- dangling dict to be the same as the personalization dict.
- Returns
- -------
- pagerank : dictionary
- Dictionary of nodes with PageRank as value
- Examples
- --------
- >>> from networkx.algorithms.link_analysis.pagerank_alg import _pagerank_scipy
- >>> G = nx.DiGraph(nx.path_graph(4))
- >>> pr = _pagerank_scipy(G, alpha=0.9)
- Notes
- -----
- The eigenvector calculation uses power iteration with a SciPy
- sparse matrix representation.
- This implementation works with Multi(Di)Graphs. For multigraphs the
- weight between two nodes is set to be the sum of all edge weights
- between those nodes.
- See Also
- --------
- pagerank
- Raises
- ------
- PowerIterationFailedConvergence
- If the algorithm fails to converge to the specified tolerance
- within the specified number of iterations of the power iteration
- method.
- References
- ----------
- .. [1] A. Langville and C. Meyer,
- "A survey of eigenvector methods of web information retrieval."
- http://citeseer.ist.psu.edu/713792.html
- .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
- The PageRank citation ranking: Bringing order to the Web. 1999
- http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
- """
- import numpy as np
- import scipy as sp
- import scipy.sparse
- N = len(G)
- if N == 0:
- return {}
- nodelist = list(G)
- A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float)
- S = A.sum(axis=1)
- S[S != 0] = 1.0 / S[S != 0]
-
- Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape))
- A = Q @ A
-
- if nstart is None:
- x = np.repeat(1.0 / N, N)
- else:
- x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float)
- x /= x.sum()
-
- if personalization is None:
- p = np.repeat(1.0 / N, N)
- else:
- p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
- if p.sum() == 0:
- raise ZeroDivisionError
- p /= p.sum()
-
- if dangling is None:
- dangling_weights = p
- else:
-
- dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
- dangling_weights /= dangling_weights.sum()
- is_dangling = np.where(S == 0)[0]
-
- for _ in range(max_iter):
- xlast = x
- x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p
-
- err = np.absolute(x - xlast).sum()
- if err < N * tol:
- return dict(zip(nodelist, map(float, x)))
- raise nx.PowerIterationFailedConvergence(max_iter)
|