123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218 |
- """ This module provides the functions for node classification problem.
- The functions in this module are not imported
- into the top level `networkx` namespace.
- You can access these functions by importing
- the `networkx.algorithms.node_classification` modules,
- then accessing the functions as attributes of `node_classification`.
- For example:
- >>> from networkx.algorithms import node_classification
- >>> G = nx.path_graph(4)
- >>> G.edges()
- EdgeView([(0, 1), (1, 2), (2, 3)])
- >>> G.nodes[0]["label"] = "A"
- >>> G.nodes[3]["label"] = "B"
- >>> node_classification.harmonic_function(G)
- ['A', 'A', 'B', 'B']
- References
- ----------
- Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
- Semi-supervised learning using gaussian fields and harmonic functions.
- In ICML (Vol. 3, pp. 912-919).
- """
- import networkx as nx
- __all__ = ["harmonic_function", "local_and_global_consistency"]
- @nx.utils.not_implemented_for("directed")
- def harmonic_function(G, max_iter=30, label_name="label"):
- """Node classification by Harmonic function
- Function for computing Harmonic function algorithm by Zhu et al.
- Parameters
- ----------
- G : NetworkX Graph
- max_iter : int
- maximum number of iterations allowed
- label_name : string
- name of target labels to predict
- Returns
- -------
- predicted : list
- List of length ``len(G)`` with the predicted labels for each node.
- Raises
- ------
- NetworkXError
- If no nodes in `G` have attribute `label_name`.
- Examples
- --------
- >>> from networkx.algorithms import node_classification
- >>> G = nx.path_graph(4)
- >>> G.nodes[0]["label"] = "A"
- >>> G.nodes[3]["label"] = "B"
- >>> G.nodes(data=True)
- NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
- >>> G.edges()
- EdgeView([(0, 1), (1, 2), (2, 3)])
- >>> predicted = node_classification.harmonic_function(G)
- >>> predicted
- ['A', 'A', 'B', 'B']
- References
- ----------
- Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
- Semi-supervised learning using gaussian fields and harmonic functions.
- In ICML (Vol. 3, pp. 912-919).
- """
- import numpy as np
- import scipy as sp
- import scipy.sparse # call as sp.sparse
- X = nx.to_scipy_sparse_array(G) # adjacency matrix
- labels, label_dict = _get_label_info(G, label_name)
- if labels.shape[0] == 0:
- raise nx.NetworkXError(
- f"No node on the input graph is labeled by '{label_name}'."
- )
- n_samples = X.shape[0]
- n_classes = label_dict.shape[0]
- F = np.zeros((n_samples, n_classes))
- # Build propagation matrix
- degrees = X.sum(axis=0)
- degrees[degrees == 0] = 1 # Avoid division by 0
- # TODO: csr_array
- D = sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0))
- P = (D @ X).tolil()
- P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes
- # Build base matrix
- B = np.zeros((n_samples, n_classes))
- B[labels[:, 0], labels[:, 1]] = 1
- for _ in range(max_iter):
- F = (P @ F) + B
- return label_dict[np.argmax(F, axis=1)].tolist()
- @nx.utils.not_implemented_for("directed")
- def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"):
- """Node classification by Local and Global Consistency
- Function for computing Local and global consistency algorithm by Zhou et al.
- Parameters
- ----------
- G : NetworkX Graph
- alpha : float
- Clamping factor
- max_iter : int
- Maximum number of iterations allowed
- label_name : string
- Name of target labels to predict
- Returns
- -------
- predicted : list
- List of length ``len(G)`` with the predicted labels for each node.
- Raises
- ------
- NetworkXError
- If no nodes in `G` have attribute `label_name`.
- Examples
- --------
- >>> from networkx.algorithms import node_classification
- >>> G = nx.path_graph(4)
- >>> G.nodes[0]["label"] = "A"
- >>> G.nodes[3]["label"] = "B"
- >>> G.nodes(data=True)
- NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
- >>> G.edges()
- EdgeView([(0, 1), (1, 2), (2, 3)])
- >>> predicted = node_classification.local_and_global_consistency(G)
- >>> predicted
- ['A', 'A', 'B', 'B']
- References
- ----------
- Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
- Learning with local and global consistency.
- Advances in neural information processing systems, 16(16), 321-328.
- """
- import numpy as np
- import scipy as sp
- import scipy.sparse # call as sp.sparse
- X = nx.to_scipy_sparse_array(G) # adjacency matrix
- labels, label_dict = _get_label_info(G, label_name)
- if labels.shape[0] == 0:
- raise nx.NetworkXError(
- f"No node on the input graph is labeled by '{label_name}'."
- )
- n_samples = X.shape[0]
- n_classes = label_dict.shape[0]
- F = np.zeros((n_samples, n_classes))
- # Build propagation matrix
- degrees = X.sum(axis=0)
- degrees[degrees == 0] = 1 # Avoid division by 0
- # TODO: csr_array
- D2 = np.sqrt(sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0)))
- P = alpha * ((D2 @ X) @ D2)
- # Build base matrix
- B = np.zeros((n_samples, n_classes))
- B[labels[:, 0], labels[:, 1]] = 1 - alpha
- for _ in range(max_iter):
- F = (P @ F) + B
- return label_dict[np.argmax(F, axis=1)].tolist()
- def _get_label_info(G, label_name):
- """Get and return information of labels from the input graph
- Parameters
- ----------
- G : Network X graph
- label_name : string
- Name of the target label
- Returns
- ----------
- labels : numpy array, shape = [n_labeled_samples, 2]
- Array of pairs of labeled node ID and label ID
- label_dict : numpy array, shape = [n_classes]
- Array of labels
- i-th element contains the label corresponding label ID `i`
- """
- import numpy as np
- labels = []
- label_to_id = {}
- lid = 0
- for i, n in enumerate(G.nodes(data=True)):
- if label_name in n[1]:
- label = n[1][label_name]
- if label not in label_to_id:
- label_to_id[label] = lid
- lid += 1
- labels.append([i, label_to_id[label]])
- labels = np.array(labels)
- label_dict = np.array(
- [label for label, _ in sorted(label_to_id.items(), key=lambda x: x[1])]
- )
- return (labels, label_dict)
|