123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126 |
- """Newton-CG trust-region optimization."""
- import math
- import numpy as np
- import scipy.linalg
- from ._trustregion import (_minimize_trust_region, BaseQuadraticSubproblem)
- __all__ = []
- def _minimize_trust_ncg(fun, x0, args=(), jac=None, hess=None, hessp=None,
- **trust_region_options):
- """
- Minimization of scalar function of one or more variables using
- the Newton conjugate gradient trust-region algorithm.
- Options
- -------
- initial_trust_radius : float
- Initial trust-region radius.
- max_trust_radius : float
- Maximum value of the trust-region radius. No steps that are longer
- than this value will be proposed.
- eta : float
- Trust region related acceptance stringency for proposed steps.
- gtol : float
- Gradient norm must be less than `gtol` before successful
- termination.
- """
- if jac is None:
- raise ValueError('Jacobian is required for Newton-CG trust-region '
- 'minimization')
- if hess is None and hessp is None:
- raise ValueError('Either the Hessian or the Hessian-vector product '
- 'is required for Newton-CG trust-region minimization')
- return _minimize_trust_region(fun, x0, args=args, jac=jac, hess=hess,
- hessp=hessp, subproblem=CGSteihaugSubproblem,
- **trust_region_options)
- class CGSteihaugSubproblem(BaseQuadraticSubproblem):
- """Quadratic subproblem solved by a conjugate gradient method"""
- def solve(self, trust_radius):
- """
- Solve the subproblem using a conjugate gradient method.
- Parameters
- ----------
- trust_radius : float
- We are allowed to wander only this far away from the origin.
- Returns
- -------
- p : ndarray
- The proposed step.
- hits_boundary : bool
- True if the proposed step is on the boundary of the trust region.
- Notes
- -----
- This is algorithm (7.2) of Nocedal and Wright 2nd edition.
- Only the function that computes the Hessian-vector product is required.
- The Hessian itself is not required, and the Hessian does
- not need to be positive semidefinite.
- """
- # get the norm of jacobian and define the origin
- p_origin = np.zeros_like(self.jac)
- # define a default tolerance
- tolerance = min(0.5, math.sqrt(self.jac_mag)) * self.jac_mag
- # Stop the method if the search direction
- # is a direction of nonpositive curvature.
- if self.jac_mag < tolerance:
- hits_boundary = False
- return p_origin, hits_boundary
- # init the state for the first iteration
- z = p_origin
- r = self.jac
- d = -r
- # Search for the min of the approximation of the objective function.
- while True:
- # do an iteration
- Bd = self.hessp(d)
- dBd = np.dot(d, Bd)
- if dBd <= 0:
- # Look at the two boundary points.
- # Find both values of t to get the boundary points such that
- # ||z + t d|| == trust_radius
- # and then choose the one with the predicted min value.
- ta, tb = self.get_boundaries_intersections(z, d, trust_radius)
- pa = z + ta * d
- pb = z + tb * d
- if self(pa) < self(pb):
- p_boundary = pa
- else:
- p_boundary = pb
- hits_boundary = True
- return p_boundary, hits_boundary
- r_squared = np.dot(r, r)
- alpha = r_squared / dBd
- z_next = z + alpha * d
- if scipy.linalg.norm(z_next) >= trust_radius:
- # Find t >= 0 to get the boundary point such that
- # ||z + t d|| == trust_radius
- ta, tb = self.get_boundaries_intersections(z, d, trust_radius)
- p_boundary = z + tb * d
- hits_boundary = True
- return p_boundary, hits_boundary
- r_next = r + alpha * Bd
- r_next_squared = np.dot(r_next, r_next)
- if math.sqrt(r_next_squared) < tolerance:
- hits_boundary = False
- return z_next, hits_boundary
- beta_next = r_next_squared / r_squared
- d_next = -r_next + beta_next * d
- # update the state for the next iteration
- z = z_next
- r = r_next
- d = d_next
|