123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467 |
- """Sparse block 1-norm estimator.
- """
- import numpy as np
- from scipy.sparse.linalg import aslinearoperator
- __all__ = ['onenormest']
- def onenormest(A, t=2, itmax=5, compute_v=False, compute_w=False):
- """
- Compute a lower bound of the 1-norm of a sparse matrix.
- Parameters
- ----------
- A : ndarray or other linear operator
- A linear operator that can be transposed and that can
- produce matrix products.
- t : int, optional
- A positive parameter controlling the tradeoff between
- accuracy versus time and memory usage.
- Larger values take longer and use more memory
- but give more accurate output.
- itmax : int, optional
- Use at most this many iterations.
- compute_v : bool, optional
- Request a norm-maximizing linear operator input vector if True.
- compute_w : bool, optional
- Request a norm-maximizing linear operator output vector if True.
- Returns
- -------
- est : float
- An underestimate of the 1-norm of the sparse matrix.
- v : ndarray, optional
- The vector such that ||Av||_1 == est*||v||_1.
- It can be thought of as an input to the linear operator
- that gives an output with particularly large norm.
- w : ndarray, optional
- The vector Av which has relatively large 1-norm.
- It can be thought of as an output of the linear operator
- that is relatively large in norm compared to the input.
- Notes
- -----
- This is algorithm 2.4 of [1].
- In [2] it is described as follows.
- "This algorithm typically requires the evaluation of
- about 4t matrix-vector products and almost invariably
- produces a norm estimate (which is, in fact, a lower
- bound on the norm) correct to within a factor 3."
- .. versionadded:: 0.13.0
- References
- ----------
- .. [1] Nicholas J. Higham and Francoise Tisseur (2000),
- "A Block Algorithm for Matrix 1-Norm Estimation,
- with an Application to 1-Norm Pseudospectra."
- SIAM J. Matrix Anal. Appl. Vol. 21, No. 4, pp. 1185-1201.
- .. [2] Awad H. Al-Mohy and Nicholas J. Higham (2009),
- "A new scaling and squaring algorithm for the matrix exponential."
- SIAM J. Matrix Anal. Appl. Vol. 31, No. 3, pp. 970-989.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.sparse import csc_matrix
- >>> from scipy.sparse.linalg import onenormest
- >>> A = csc_matrix([[1., 0., 0.], [5., 8., 2.], [0., -1., 0.]], dtype=float)
- >>> A.toarray()
- array([[ 1., 0., 0.],
- [ 5., 8., 2.],
- [ 0., -1., 0.]])
- >>> onenormest(A)
- 9.0
- >>> np.linalg.norm(A.toarray(), ord=1)
- 9.0
- """
- # Check the input.
- A = aslinearoperator(A)
- if A.shape[0] != A.shape[1]:
- raise ValueError('expected the operator to act like a square matrix')
- # If the operator size is small compared to t,
- # then it is easier to compute the exact norm.
- # Otherwise estimate the norm.
- n = A.shape[1]
- if t >= n:
- A_explicit = np.asarray(aslinearoperator(A).matmat(np.identity(n)))
- if A_explicit.shape != (n, n):
- raise Exception('internal error: ',
- 'unexpected shape ' + str(A_explicit.shape))
- col_abs_sums = abs(A_explicit).sum(axis=0)
- if col_abs_sums.shape != (n, ):
- raise Exception('internal error: ',
- 'unexpected shape ' + str(col_abs_sums.shape))
- argmax_j = np.argmax(col_abs_sums)
- v = elementary_vector(n, argmax_j)
- w = A_explicit[:, argmax_j]
- est = col_abs_sums[argmax_j]
- else:
- est, v, w, nmults, nresamples = _onenormest_core(A, A.H, t, itmax)
- # Report the norm estimate along with some certificates of the estimate.
- if compute_v or compute_w:
- result = (est,)
- if compute_v:
- result += (v,)
- if compute_w:
- result += (w,)
- return result
- else:
- return est
- def _blocked_elementwise(func):
- """
- Decorator for an elementwise function, to apply it blockwise along
- first dimension, to avoid excessive memory usage in temporaries.
- """
- block_size = 2**20
- def wrapper(x):
- if x.shape[0] < block_size:
- return func(x)
- else:
- y0 = func(x[:block_size])
- y = np.zeros((x.shape[0],) + y0.shape[1:], dtype=y0.dtype)
- y[:block_size] = y0
- del y0
- for j in range(block_size, x.shape[0], block_size):
- y[j:j+block_size] = func(x[j:j+block_size])
- return y
- return wrapper
- @_blocked_elementwise
- def sign_round_up(X):
- """
- This should do the right thing for both real and complex matrices.
- From Higham and Tisseur:
- "Everything in this section remains valid for complex matrices
- provided that sign(A) is redefined as the matrix (aij / |aij|)
- (and sign(0) = 1) transposes are replaced by conjugate transposes."
- """
- Y = X.copy()
- Y[Y == 0] = 1
- Y /= np.abs(Y)
- return Y
- @_blocked_elementwise
- def _max_abs_axis1(X):
- return np.max(np.abs(X), axis=1)
- def _sum_abs_axis0(X):
- block_size = 2**20
- r = None
- for j in range(0, X.shape[0], block_size):
- y = np.sum(np.abs(X[j:j+block_size]), axis=0)
- if r is None:
- r = y
- else:
- r += y
- return r
- def elementary_vector(n, i):
- v = np.zeros(n, dtype=float)
- v[i] = 1
- return v
- def vectors_are_parallel(v, w):
- # Columns are considered parallel when they are equal or negative.
- # Entries are required to be in {-1, 1},
- # which guarantees that the magnitudes of the vectors are identical.
- if v.ndim != 1 or v.shape != w.shape:
- raise ValueError('expected conformant vectors with entries in {-1,1}')
- n = v.shape[0]
- return np.dot(v, w) == n
- def every_col_of_X_is_parallel_to_a_col_of_Y(X, Y):
- for v in X.T:
- if not any(vectors_are_parallel(v, w) for w in Y.T):
- return False
- return True
- def column_needs_resampling(i, X, Y=None):
- # column i of X needs resampling if either
- # it is parallel to a previous column of X or
- # it is parallel to a column of Y
- n, t = X.shape
- v = X[:, i]
- if any(vectors_are_parallel(v, X[:, j]) for j in range(i)):
- return True
- if Y is not None:
- if any(vectors_are_parallel(v, w) for w in Y.T):
- return True
- return False
- def resample_column(i, X):
- X[:, i] = np.random.randint(0, 2, size=X.shape[0])*2 - 1
- def less_than_or_close(a, b):
- return np.allclose(a, b) or (a < b)
- def _algorithm_2_2(A, AT, t):
- """
- This is Algorithm 2.2.
- Parameters
- ----------
- A : ndarray or other linear operator
- A linear operator that can produce matrix products.
- AT : ndarray or other linear operator
- The transpose of A.
- t : int, optional
- A positive parameter controlling the tradeoff between
- accuracy versus time and memory usage.
- Returns
- -------
- g : sequence
- A non-negative decreasing vector
- such that g[j] is a lower bound for the 1-norm
- of the column of A of jth largest 1-norm.
- The first entry of this vector is therefore a lower bound
- on the 1-norm of the linear operator A.
- This sequence has length t.
- ind : sequence
- The ith entry of ind is the index of the column A whose 1-norm
- is given by g[i].
- This sequence of indices has length t, and its entries are
- chosen from range(n), possibly with repetition,
- where n is the order of the operator A.
- Notes
- -----
- This algorithm is mainly for testing.
- It uses the 'ind' array in a way that is similar to
- its usage in algorithm 2.4. This algorithm 2.2 may be easier to test,
- so it gives a chance of uncovering bugs related to indexing
- which could have propagated less noticeably to algorithm 2.4.
- """
- A_linear_operator = aslinearoperator(A)
- AT_linear_operator = aslinearoperator(AT)
- n = A_linear_operator.shape[0]
- # Initialize the X block with columns of unit 1-norm.
- X = np.ones((n, t))
- if t > 1:
- X[:, 1:] = np.random.randint(0, 2, size=(n, t-1))*2 - 1
- X /= float(n)
- # Iteratively improve the lower bounds.
- # Track extra things, to assert invariants for debugging.
- g_prev = None
- h_prev = None
- k = 1
- ind = range(t)
- while True:
- Y = np.asarray(A_linear_operator.matmat(X))
- g = _sum_abs_axis0(Y)
- best_j = np.argmax(g)
- g.sort()
- g = g[::-1]
- S = sign_round_up(Y)
- Z = np.asarray(AT_linear_operator.matmat(S))
- h = _max_abs_axis1(Z)
- # If this algorithm runs for fewer than two iterations,
- # then its return values do not have the properties indicated
- # in the description of the algorithm.
- # In particular, the entries of g are not 1-norms of any
- # column of A until the second iteration.
- # Therefore we will require the algorithm to run for at least
- # two iterations, even though this requirement is not stated
- # in the description of the algorithm.
- if k >= 2:
- if less_than_or_close(max(h), np.dot(Z[:, best_j], X[:, best_j])):
- break
- ind = np.argsort(h)[::-1][:t]
- h = h[ind]
- for j in range(t):
- X[:, j] = elementary_vector(n, ind[j])
- # Check invariant (2.2).
- if k >= 2:
- if not less_than_or_close(g_prev[0], h_prev[0]):
- raise Exception('invariant (2.2) is violated')
- if not less_than_or_close(h_prev[0], g[0]):
- raise Exception('invariant (2.2) is violated')
- # Check invariant (2.3).
- if k >= 3:
- for j in range(t):
- if not less_than_or_close(g[j], g_prev[j]):
- raise Exception('invariant (2.3) is violated')
- # Update for the next iteration.
- g_prev = g
- h_prev = h
- k += 1
- # Return the lower bounds and the corresponding column indices.
- return g, ind
- def _onenormest_core(A, AT, t, itmax):
- """
- Compute a lower bound of the 1-norm of a sparse matrix.
- Parameters
- ----------
- A : ndarray or other linear operator
- A linear operator that can produce matrix products.
- AT : ndarray or other linear operator
- The transpose of A.
- t : int, optional
- A positive parameter controlling the tradeoff between
- accuracy versus time and memory usage.
- itmax : int, optional
- Use at most this many iterations.
- Returns
- -------
- est : float
- An underestimate of the 1-norm of the sparse matrix.
- v : ndarray, optional
- The vector such that ||Av||_1 == est*||v||_1.
- It can be thought of as an input to the linear operator
- that gives an output with particularly large norm.
- w : ndarray, optional
- The vector Av which has relatively large 1-norm.
- It can be thought of as an output of the linear operator
- that is relatively large in norm compared to the input.
- nmults : int, optional
- The number of matrix products that were computed.
- nresamples : int, optional
- The number of times a parallel column was observed,
- necessitating a re-randomization of the column.
- Notes
- -----
- This is algorithm 2.4.
- """
- # This function is a more or less direct translation
- # of Algorithm 2.4 from the Higham and Tisseur (2000) paper.
- A_linear_operator = aslinearoperator(A)
- AT_linear_operator = aslinearoperator(AT)
- if itmax < 2:
- raise ValueError('at least two iterations are required')
- if t < 1:
- raise ValueError('at least one column is required')
- n = A.shape[0]
- if t >= n:
- raise ValueError('t should be smaller than the order of A')
- # Track the number of big*small matrix multiplications
- # and the number of resamplings.
- nmults = 0
- nresamples = 0
- # "We now explain our choice of starting matrix. We take the first
- # column of X to be the vector of 1s [...] This has the advantage that
- # for a matrix with nonnegative elements the algorithm converges
- # with an exact estimate on the second iteration, and such matrices
- # arise in applications [...]"
- X = np.ones((n, t), dtype=float)
- # "The remaining columns are chosen as rand{-1,1},
- # with a check for and correction of parallel columns,
- # exactly as for S in the body of the algorithm."
- if t > 1:
- for i in range(1, t):
- # These are technically initial samples, not resamples,
- # so the resampling count is not incremented.
- resample_column(i, X)
- for i in range(t):
- while column_needs_resampling(i, X):
- resample_column(i, X)
- nresamples += 1
- # "Choose starting matrix X with columns of unit 1-norm."
- X /= float(n)
- # "indices of used unit vectors e_j"
- ind_hist = np.zeros(0, dtype=np.intp)
- est_old = 0
- S = np.zeros((n, t), dtype=float)
- k = 1
- ind = None
- while True:
- Y = np.asarray(A_linear_operator.matmat(X))
- nmults += 1
- mags = _sum_abs_axis0(Y)
- est = np.max(mags)
- best_j = np.argmax(mags)
- if est > est_old or k == 2:
- if k >= 2:
- ind_best = ind[best_j]
- w = Y[:, best_j]
- # (1)
- if k >= 2 and est <= est_old:
- est = est_old
- break
- est_old = est
- S_old = S
- if k > itmax:
- break
- S = sign_round_up(Y)
- del Y
- # (2)
- if every_col_of_X_is_parallel_to_a_col_of_Y(S, S_old):
- break
- if t > 1:
- # "Ensure that no column of S is parallel to another column of S
- # or to a column of S_old by replacing columns of S by rand{-1,1}."
- for i in range(t):
- while column_needs_resampling(i, S, S_old):
- resample_column(i, S)
- nresamples += 1
- del S_old
- # (3)
- Z = np.asarray(AT_linear_operator.matmat(S))
- nmults += 1
- h = _max_abs_axis1(Z)
- del Z
- # (4)
- if k >= 2 and max(h) == h[ind_best]:
- break
- # "Sort h so that h_first >= ... >= h_last
- # and re-order ind correspondingly."
- #
- # Later on, we will need at most t+len(ind_hist) largest
- # entries, so drop the rest
- ind = np.argsort(h)[::-1][:t+len(ind_hist)].copy()
- del h
- if t > 1:
- # (5)
- # Break if the most promising t vectors have been visited already.
- if np.in1d(ind[:t], ind_hist).all():
- break
- # Put the most promising unvisited vectors at the front of the list
- # and put the visited vectors at the end of the list.
- # Preserve the order of the indices induced by the ordering of h.
- seen = np.in1d(ind, ind_hist)
- ind = np.concatenate((ind[~seen], ind[seen]))
- for j in range(t):
- X[:, j] = elementary_vector(n, ind[j])
- new_ind = ind[:t][~np.in1d(ind[:t], ind_hist)]
- ind_hist = np.concatenate((ind_hist, new_ind))
- k += 1
- v = elementary_vector(n, ind_best)
- return est, v, w, nmults, nresamples
|