123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430 |
- """Nearly exact trust-region optimization subproblem."""
- import numpy as np
- from scipy.linalg import (norm, get_lapack_funcs, solve_triangular,
- cho_solve)
- from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem)
- __all__ = ['_minimize_trustregion_exact',
- 'estimate_smallest_singular_value',
- 'singular_leading_submatrix',
- 'IterativeSubproblem']
- def _minimize_trustregion_exact(fun, x0, args=(), jac=None, hess=None,
- **trust_region_options):
- """
- Minimization of scalar function of one or more variables using
- a nearly exact trust-region algorithm.
- Options
- -------
- initial_tr_radius : float
- Initial trust-region radius.
- max_tr_radius : float
- Maximum value of the trust-region radius. No steps that are longer
- than this value will be proposed.
- eta : float
- Trust region related acceptance stringency for proposed steps.
- gtol : float
- Gradient norm must be less than ``gtol`` before successful
- termination.
- """
- if jac is None:
- raise ValueError('Jacobian is required for trust region '
- 'exact minimization.')
- if not callable(hess):
- raise ValueError('Hessian matrix is required for trust region '
- 'exact minimization.')
- return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess,
- subproblem=IterativeSubproblem,
- **trust_region_options)
- def estimate_smallest_singular_value(U):
- """Given upper triangular matrix ``U`` estimate the smallest singular
- value and the correspondent right singular vector in O(n**2) operations.
- Parameters
- ----------
- U : ndarray
- Square upper triangular matrix.
- Returns
- -------
- s_min : float
- Estimated smallest singular value of the provided matrix.
- z_min : ndarray
- Estimatied right singular vector.
- Notes
- -----
- The procedure is based on [1]_ and is done in two steps. First, it finds
- a vector ``e`` with components selected from {+1, -1} such that the
- solution ``w`` from the system ``U.T w = e`` is as large as possible.
- Next it estimate ``U v = w``. The smallest singular value is close
- to ``norm(w)/norm(v)`` and the right singular vector is close
- to ``v/norm(v)``.
- The estimation will be better more ill-conditioned is the matrix.
- References
- ----------
- .. [1] Cline, A. K., Moler, C. B., Stewart, G. W., Wilkinson, J. H.
- An estimate for the condition number of a matrix. 1979.
- SIAM Journal on Numerical Analysis, 16(2), 368-375.
- """
- U = np.atleast_2d(U)
- m, n = U.shape
- if m != n:
- raise ValueError("A square triangular matrix should be provided.")
- # A vector `e` with components selected from {+1, -1}
- # is selected so that the solution `w` to the system
- # `U.T w = e` is as large as possible. Implementation
- # based on algorithm 3.5.1, p. 142, from reference [2]
- # adapted for lower triangular matrix.
- p = np.zeros(n)
- w = np.empty(n)
- # Implemented according to: Golub, G. H., Van Loan, C. F. (2013).
- # "Matrix computations". Forth Edition. JHU press. pp. 140-142.
- for k in range(n):
- wp = (1-p[k]) / U.T[k, k]
- wm = (-1-p[k]) / U.T[k, k]
- pp = p[k+1:] + U.T[k+1:, k]*wp
- pm = p[k+1:] + U.T[k+1:, k]*wm
- if abs(wp) + norm(pp, 1) >= abs(wm) + norm(pm, 1):
- w[k] = wp
- p[k+1:] = pp
- else:
- w[k] = wm
- p[k+1:] = pm
- # The system `U v = w` is solved using backward substitution.
- v = solve_triangular(U, w)
- v_norm = norm(v)
- w_norm = norm(w)
- # Smallest singular value
- s_min = w_norm / v_norm
- # Associated vector
- z_min = v / v_norm
- return s_min, z_min
- def gershgorin_bounds(H):
- """
- Given a square matrix ``H`` compute upper
- and lower bounds for its eigenvalues (Gregoshgorin Bounds).
- Defined ref. [1].
- References
- ----------
- .. [1] Conn, A. R., Gould, N. I., & Toint, P. L.
- Trust region methods. 2000. Siam. pp. 19.
- """
- H_diag = np.diag(H)
- H_diag_abs = np.abs(H_diag)
- H_row_sums = np.sum(np.abs(H), axis=1)
- lb = np.min(H_diag + H_diag_abs - H_row_sums)
- ub = np.max(H_diag - H_diag_abs + H_row_sums)
- return lb, ub
- def singular_leading_submatrix(A, U, k):
- """
- Compute term that makes the leading ``k`` by ``k``
- submatrix from ``A`` singular.
- Parameters
- ----------
- A : ndarray
- Symmetric matrix that is not positive definite.
- U : ndarray
- Upper triangular matrix resulting of an incomplete
- Cholesky decomposition of matrix ``A``.
- k : int
- Positive integer such that the leading k by k submatrix from
- `A` is the first non-positive definite leading submatrix.
- Returns
- -------
- delta : float
- Amount that should be added to the element (k, k) of the
- leading k by k submatrix of ``A`` to make it singular.
- v : ndarray
- A vector such that ``v.T B v = 0``. Where B is the matrix A after
- ``delta`` is added to its element (k, k).
- """
- # Compute delta
- delta = np.sum(U[:k-1, k-1]**2) - A[k-1, k-1]
- n = len(A)
- # Inicialize v
- v = np.zeros(n)
- v[k-1] = 1
- # Compute the remaining values of v by solving a triangular system.
- if k != 1:
- v[:k-1] = solve_triangular(U[:k-1, :k-1], -U[:k-1, k-1])
- return delta, v
- class IterativeSubproblem(BaseQuadraticSubproblem):
- """Quadratic subproblem solved by nearly exact iterative method.
- Notes
- -----
- This subproblem solver was based on [1]_, [2]_ and [3]_,
- which implement similar algorithms. The algorithm is basically
- that of [1]_ but ideas from [2]_ and [3]_ were also used.
- References
- ----------
- .. [1] A.R. Conn, N.I. Gould, and P.L. Toint, "Trust region methods",
- Siam, pp. 169-200, 2000.
- .. [2] J. Nocedal and S. Wright, "Numerical optimization",
- Springer Science & Business Media. pp. 83-91, 2006.
- .. [3] J.J. More and D.C. Sorensen, "Computing a trust region step",
- SIAM Journal on Scientific and Statistical Computing, vol. 4(3),
- pp. 553-572, 1983.
- """
- # UPDATE_COEFF appears in reference [1]_
- # in formula 7.3.14 (p. 190) named as "theta".
- # As recommended there it value is fixed in 0.01.
- UPDATE_COEFF = 0.01
- EPS = np.finfo(float).eps
- def __init__(self, x, fun, jac, hess, hessp=None,
- k_easy=0.1, k_hard=0.2):
- super().__init__(x, fun, jac, hess)
- # When the trust-region shrinks in two consecutive
- # calculations (``tr_radius < previous_tr_radius``)
- # the lower bound ``lambda_lb`` may be reused,
- # facilitating the convergence. To indicate no
- # previous value is known at first ``previous_tr_radius``
- # is set to -1 and ``lambda_lb`` to None.
- self.previous_tr_radius = -1
- self.lambda_lb = None
- self.niter = 0
- # ``k_easy`` and ``k_hard`` are parameters used
- # to determine the stop criteria to the iterative
- # subproblem solver. Take a look at pp. 194-197
- # from reference _[1] for a more detailed description.
- self.k_easy = k_easy
- self.k_hard = k_hard
- # Get Lapack function for cholesky decomposition.
- # The implemented SciPy wrapper does not return
- # the incomplete factorization needed by the method.
- self.cholesky, = get_lapack_funcs(('potrf',), (self.hess,))
- # Get info about Hessian
- self.dimension = len(self.hess)
- self.hess_gershgorin_lb,\
- self.hess_gershgorin_ub = gershgorin_bounds(self.hess)
- self.hess_inf = norm(self.hess, np.Inf)
- self.hess_fro = norm(self.hess, 'fro')
- # A constant such that for vectors smaler than that
- # backward substituition is not reliable. It was stabilished
- # based on Golub, G. H., Van Loan, C. F. (2013).
- # "Matrix computations". Forth Edition. JHU press., p.165.
- self.CLOSE_TO_ZERO = self.dimension * self.EPS * self.hess_inf
- def _initial_values(self, tr_radius):
- """Given a trust radius, return a good initial guess for
- the damping factor, the lower bound and the upper bound.
- The values were chosen accordingly to the guidelines on
- section 7.3.8 (p. 192) from [1]_.
- """
- # Upper bound for the damping factor
- lambda_ub = max(0, self.jac_mag/tr_radius + min(-self.hess_gershgorin_lb,
- self.hess_fro,
- self.hess_inf))
- # Lower bound for the damping factor
- lambda_lb = max(0, -min(self.hess.diagonal()),
- self.jac_mag/tr_radius - min(self.hess_gershgorin_ub,
- self.hess_fro,
- self.hess_inf))
- # Improve bounds with previous info
- if tr_radius < self.previous_tr_radius:
- lambda_lb = max(self.lambda_lb, lambda_lb)
- # Initial guess for the damping factor
- if lambda_lb == 0:
- lambda_initial = 0
- else:
- lambda_initial = max(np.sqrt(lambda_lb * lambda_ub),
- lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
- return lambda_initial, lambda_lb, lambda_ub
- def solve(self, tr_radius):
- """Solve quadratic subproblem"""
- lambda_current, lambda_lb, lambda_ub = self._initial_values(tr_radius)
- n = self.dimension
- hits_boundary = True
- already_factorized = False
- self.niter = 0
- while True:
- # Compute Cholesky factorization
- if already_factorized:
- already_factorized = False
- else:
- H = self.hess+lambda_current*np.eye(n)
- U, info = self.cholesky(H, lower=False,
- overwrite_a=False,
- clean=True)
- self.niter += 1
- # Check if factorization succeeded
- if info == 0 and self.jac_mag > self.CLOSE_TO_ZERO:
- # Successful factorization
- # Solve `U.T U p = s`
- p = cho_solve((U, False), -self.jac)
- p_norm = norm(p)
- # Check for interior convergence
- if p_norm <= tr_radius and lambda_current == 0:
- hits_boundary = False
- break
- # Solve `U.T w = p`
- w = solve_triangular(U, p, trans='T')
- w_norm = norm(w)
- # Compute Newton step accordingly to
- # formula (4.44) p.87 from ref [2]_.
- delta_lambda = (p_norm/w_norm)**2 * (p_norm-tr_radius)/tr_radius
- lambda_new = lambda_current + delta_lambda
- if p_norm < tr_radius: # Inside boundary
- s_min, z_min = estimate_smallest_singular_value(U)
- ta, tb = self.get_boundaries_intersections(p, z_min,
- tr_radius)
- # Choose `step_len` with the smallest magnitude.
- # The reason for this choice is explained at
- # ref [3]_, p. 6 (Immediately before the formula
- # for `tau`).
- step_len = min([ta, tb], key=abs)
- # Compute the quadratic term (p.T*H*p)
- quadratic_term = np.dot(p, np.dot(H, p))
- # Check stop criteria
- relative_error = (step_len**2 * s_min**2) / (quadratic_term + lambda_current*tr_radius**2)
- if relative_error <= self.k_hard:
- p += step_len * z_min
- break
- # Update uncertanty bounds
- lambda_ub = lambda_current
- lambda_lb = max(lambda_lb, lambda_current - s_min**2)
- # Compute Cholesky factorization
- H = self.hess + lambda_new*np.eye(n)
- c, info = self.cholesky(H, lower=False,
- overwrite_a=False,
- clean=True)
- # Check if the factorization have succeeded
- #
- if info == 0: # Successful factorization
- # Update damping factor
- lambda_current = lambda_new
- already_factorized = True
- else: # Unsuccessful factorization
- # Update uncertanty bounds
- lambda_lb = max(lambda_lb, lambda_new)
- # Update damping factor
- lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
- lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
- else: # Outside boundary
- # Check stop criteria
- relative_error = abs(p_norm - tr_radius) / tr_radius
- if relative_error <= self.k_easy:
- break
- # Update uncertanty bounds
- lambda_lb = lambda_current
- # Update damping factor
- lambda_current = lambda_new
- elif info == 0 and self.jac_mag <= self.CLOSE_TO_ZERO:
- # jac_mag very close to zero
- # Check for interior convergence
- if lambda_current == 0:
- p = np.zeros(n)
- hits_boundary = False
- break
- s_min, z_min = estimate_smallest_singular_value(U)
- step_len = tr_radius
- # Check stop criteria
- if step_len**2 * s_min**2 <= self.k_hard * lambda_current * tr_radius**2:
- p = step_len * z_min
- break
- # Update uncertanty bounds
- lambda_ub = lambda_current
- lambda_lb = max(lambda_lb, lambda_current - s_min**2)
- # Update damping factor
- lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
- lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
- else: # Unsuccessful factorization
- # Compute auxiliary terms
- delta, v = singular_leading_submatrix(H, U, info)
- v_norm = norm(v)
- # Update uncertanty interval
- lambda_lb = max(lambda_lb, lambda_current + delta/v_norm**2)
- # Update damping factor
- lambda_current = max(np.sqrt(lambda_lb * lambda_ub),
- lambda_lb + self.UPDATE_COEFF*(lambda_ub-lambda_lb))
- self.lambda_lb = lambda_lb
- self.lambda_current = lambda_current
- self.previous_tr_radius = tr_radius
- return p, hits_boundary
|