123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301 |
- """Trust-region optimization."""
- import math
- import warnings
- import numpy as np
- import scipy.linalg
- from ._optimize import (_check_unknown_options, _status_message,
- OptimizeResult, _prepare_scalar_function)
- from scipy.optimize._hessian_update_strategy import HessianUpdateStrategy
- from scipy.optimize._differentiable_functions import FD_METHODS
- __all__ = []
- def _wrap_function(function, args):
- # wraps a minimizer function to count number of evaluations
- # and to easily provide an args kwd.
- ncalls = [0]
- if function is None:
- return ncalls, None
- def function_wrapper(x, *wrapper_args):
- ncalls[0] += 1
- # A copy of x is sent to the user function (gh13740)
- return function(np.copy(x), *(wrapper_args + args))
- return ncalls, function_wrapper
- class BaseQuadraticSubproblem:
- """
- Base/abstract class defining the quadratic model for trust-region
- minimization. Child classes must implement the ``solve`` method.
- Values of the objective function, Jacobian and Hessian (if provided) at
- the current iterate ``x`` are evaluated on demand and then stored as
- attributes ``fun``, ``jac``, ``hess``.
- """
- def __init__(self, x, fun, jac, hess=None, hessp=None):
- self._x = x
- self._f = None
- self._g = None
- self._h = None
- self._g_mag = None
- self._cauchy_point = None
- self._newton_point = None
- self._fun = fun
- self._jac = jac
- self._hess = hess
- self._hessp = hessp
- def __call__(self, p):
- return self.fun + np.dot(self.jac, p) + 0.5 * np.dot(p, self.hessp(p))
- @property
- def fun(self):
- """Value of objective function at current iteration."""
- if self._f is None:
- self._f = self._fun(self._x)
- return self._f
- @property
- def jac(self):
- """Value of Jacobian of objective function at current iteration."""
- if self._g is None:
- self._g = self._jac(self._x)
- return self._g
- @property
- def hess(self):
- """Value of Hessian of objective function at current iteration."""
- if self._h is None:
- self._h = self._hess(self._x)
- return self._h
- def hessp(self, p):
- if self._hessp is not None:
- return self._hessp(self._x, p)
- else:
- return np.dot(self.hess, p)
- @property
- def jac_mag(self):
- """Magnitude of jacobian of objective function at current iteration."""
- if self._g_mag is None:
- self._g_mag = scipy.linalg.norm(self.jac)
- return self._g_mag
- def get_boundaries_intersections(self, z, d, trust_radius):
- """
- Solve the scalar quadratic equation ||z + t d|| == trust_radius.
- This is like a line-sphere intersection.
- Return the two values of t, sorted from low to high.
- """
- a = np.dot(d, d)
- b = 2 * np.dot(z, d)
- c = np.dot(z, z) - trust_radius**2
- sqrt_discriminant = math.sqrt(b*b - 4*a*c)
- # The following calculation is mathematically
- # equivalent to:
- # ta = (-b - sqrt_discriminant) / (2*a)
- # tb = (-b + sqrt_discriminant) / (2*a)
- # but produce smaller round off errors.
- # Look at Matrix Computation p.97
- # for a better justification.
- aux = b + math.copysign(sqrt_discriminant, b)
- ta = -aux / (2*a)
- tb = -2*c / aux
- return sorted([ta, tb])
- def solve(self, trust_radius):
- raise NotImplementedError('The solve method should be implemented by '
- 'the child class')
- def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None,
- subproblem=None, initial_trust_radius=1.0,
- max_trust_radius=1000.0, eta=0.15, gtol=1e-4,
- maxiter=None, disp=False, return_all=False,
- callback=None, inexact=True, **unknown_options):
- """
- Minimization of scalar function of one or more variables using a
- trust-region algorithm.
- Options for the trust-region algorithm are:
- initial_trust_radius : float
- Initial trust radius.
- max_trust_radius : float
- Never propose steps that are longer than this value.
- eta : float
- Trust region related acceptance stringency for proposed steps.
- gtol : float
- Gradient norm must be less than `gtol`
- before successful termination.
- maxiter : int
- Maximum number of iterations to perform.
- disp : bool
- If True, print convergence message.
- inexact : bool
- Accuracy to solve subproblems. If True requires less nonlinear
- iterations, but more vector products. Only effective for method
- trust-krylov.
- This function is called by the `minimize` function.
- It is not supposed to be called directly.
- """
- _check_unknown_options(unknown_options)
- if jac is None:
- raise ValueError('Jacobian is currently required for trust-region '
- 'methods')
- if hess is None and hessp is None:
- raise ValueError('Either the Hessian or the Hessian-vector product '
- 'is currently required for trust-region methods')
- if subproblem is None:
- raise ValueError('A subproblem solving strategy is required for '
- 'trust-region methods')
- if not (0 <= eta < 0.25):
- raise Exception('invalid acceptance stringency')
- if max_trust_radius <= 0:
- raise Exception('the max trust radius must be positive')
- if initial_trust_radius <= 0:
- raise ValueError('the initial trust radius must be positive')
- if initial_trust_radius >= max_trust_radius:
- raise ValueError('the initial trust radius must be less than the '
- 'max trust radius')
- # force the initial guess into a nice format
- x0 = np.asarray(x0).flatten()
- # A ScalarFunction representing the problem. This caches calls to fun, jac,
- # hess.
- sf = _prepare_scalar_function(fun, x0, jac=jac, hess=hess, args=args)
- fun = sf.fun
- jac = sf.grad
- if callable(hess):
- hess = sf.hess
- elif callable(hessp):
- # this elif statement must come before examining whether hess
- # is estimated by FD methods or a HessianUpdateStrategy
- pass
- elif (hess in FD_METHODS or isinstance(hess, HessianUpdateStrategy)):
- # If the Hessian is being estimated by finite differences or a
- # Hessian update strategy then ScalarFunction.hess returns a
- # LinearOperator or a HessianUpdateStrategy. This enables the
- # calculation/creation of a hessp. BUT you only want to do this
- # if the user *hasn't* provided a callable(hessp) function.
- hess = None
- def hessp(x, p, *args):
- return sf.hess(x).dot(p)
- else:
- raise ValueError('Either the Hessian or the Hessian-vector product '
- 'is currently required for trust-region methods')
- # ScalarFunction doesn't represent hessp
- nhessp, hessp = _wrap_function(hessp, args)
- # limit the number of iterations
- if maxiter is None:
- maxiter = len(x0)*200
- # init the search status
- warnflag = 0
- # initialize the search
- trust_radius = initial_trust_radius
- x = x0
- if return_all:
- allvecs = [x]
- m = subproblem(x, fun, jac, hess, hessp)
- k = 0
- # search for the function min
- # do not even start if the gradient is small enough
- while m.jac_mag >= gtol:
- # Solve the sub-problem.
- # This gives us the proposed step relative to the current position
- # and it tells us whether the proposed step
- # has reached the trust region boundary or not.
- try:
- p, hits_boundary = m.solve(trust_radius)
- except np.linalg.LinAlgError:
- warnflag = 3
- break
- # calculate the predicted value at the proposed point
- predicted_value = m(p)
- # define the local approximation at the proposed point
- x_proposed = x + p
- m_proposed = subproblem(x_proposed, fun, jac, hess, hessp)
- # evaluate the ratio defined in equation (4.4)
- actual_reduction = m.fun - m_proposed.fun
- predicted_reduction = m.fun - predicted_value
- if predicted_reduction <= 0:
- warnflag = 2
- break
- rho = actual_reduction / predicted_reduction
- # update the trust radius according to the actual/predicted ratio
- if rho < 0.25:
- trust_radius *= 0.25
- elif rho > 0.75 and hits_boundary:
- trust_radius = min(2*trust_radius, max_trust_radius)
- # if the ratio is high enough then accept the proposed step
- if rho > eta:
- x = x_proposed
- m = m_proposed
- # append the best guess, call back, increment the iteration count
- if return_all:
- allvecs.append(np.copy(x))
- if callback is not None:
- callback(np.copy(x))
- k += 1
- # check if the gradient is small enough to stop
- if m.jac_mag < gtol:
- warnflag = 0
- break
- # check if we have looked at enough iterations
- if k >= maxiter:
- warnflag = 1
- break
- # print some stuff if requested
- status_messages = (
- _status_message['success'],
- _status_message['maxiter'],
- 'A bad approximation caused failure to predict improvement.',
- 'A linalg error occurred, such as a non-psd Hessian.',
- )
- if disp:
- if warnflag == 0:
- print(status_messages[warnflag])
- else:
- warnings.warn(status_messages[warnflag], RuntimeWarning, 3)
- print(" Current function value: %f" % m.fun)
- print(" Iterations: %d" % k)
- print(" Function evaluations: %d" % sf.nfev)
- print(" Gradient evaluations: %d" % sf.ngev)
- print(" Hessian evaluations: %d" % (sf.nhev + nhessp[0]))
- result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag,
- fun=m.fun, jac=m.jac, nfev=sf.nfev, njev=sf.ngev,
- nhev=sf.nhev + nhessp[0], nit=k,
- message=status_messages[warnflag])
- if hess is not None:
- result['hess'] = m.hess
- if return_all:
- result['allvecs'] = allvecs
- return result
|