123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881 |
- """
- Functions
- ---------
- .. autosummary::
- :toctree: generated/
- line_search_armijo
- line_search_wolfe1
- line_search_wolfe2
- scalar_search_wolfe1
- scalar_search_wolfe2
- """
- from warnings import warn
- from scipy.optimize import _minpack2 as minpack2
- import numpy as np
- __all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
- 'scalar_search_wolfe1', 'scalar_search_wolfe2',
- 'line_search_armijo']
- class LineSearchWarning(RuntimeWarning):
- pass
- #------------------------------------------------------------------------------
- # Minpack's Wolfe line and scalar searches
- #------------------------------------------------------------------------------
- def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
- old_fval=None, old_old_fval=None,
- args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
- xtol=1e-14):
- """
- As `scalar_search_wolfe1` but do a line search to direction `pk`
- Parameters
- ----------
- f : callable
- Function `f(x)`
- fprime : callable
- Gradient of `f`
- xk : array_like
- Current point
- pk : array_like
- Search direction
- gfk : array_like, optional
- Gradient of `f` at point `xk`
- old_fval : float, optional
- Value of `f` at point `xk`
- old_old_fval : float, optional
- Value of `f` at point preceding `xk`
- The rest of the parameters are the same as for `scalar_search_wolfe1`.
- Returns
- -------
- stp, f_count, g_count, fval, old_fval
- As in `line_search_wolfe1`
- gval : array
- Gradient of `f` at the final point
- """
- if gfk is None:
- gfk = fprime(xk, *args)
- gval = [gfk]
- gc = [0]
- fc = [0]
- def phi(s):
- fc[0] += 1
- return f(xk + s*pk, *args)
- def derphi(s):
- gval[0] = fprime(xk + s*pk, *args)
- gc[0] += 1
- return np.dot(gval[0], pk)
- derphi0 = np.dot(gfk, pk)
- stp, fval, old_fval = scalar_search_wolfe1(
- phi, derphi, old_fval, old_old_fval, derphi0,
- c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
- return stp, fc[0], gc[0], fval, old_fval, gval[0]
- def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
- c1=1e-4, c2=0.9,
- amax=50, amin=1e-8, xtol=1e-14):
- """
- Scalar function search for alpha that satisfies strong Wolfe conditions
- alpha > 0 is assumed to be a descent direction.
- Parameters
- ----------
- phi : callable phi(alpha)
- Function at point `alpha`
- derphi : callable phi'(alpha)
- Objective function derivative. Returns a scalar.
- phi0 : float, optional
- Value of phi at 0
- old_phi0 : float, optional
- Value of phi at previous point
- derphi0 : float, optional
- Value derphi at 0
- c1 : float, optional
- Parameter for Armijo condition rule.
- c2 : float, optional
- Parameter for curvature condition rule.
- amax, amin : float, optional
- Maximum and minimum step size
- xtol : float, optional
- Relative tolerance for an acceptable step.
- Returns
- -------
- alpha : float
- Step size, or None if no suitable step was found
- phi : float
- Value of `phi` at the new point `alpha`
- phi0 : float
- Value of `phi` at `alpha=0`
- Notes
- -----
- Uses routine DCSRCH from MINPACK.
- """
- if phi0 is None:
- phi0 = phi(0.)
- if derphi0 is None:
- derphi0 = derphi(0.)
- if old_phi0 is not None and derphi0 != 0:
- alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
- if alpha1 < 0:
- alpha1 = 1.0
- else:
- alpha1 = 1.0
- phi1 = phi0
- derphi1 = derphi0
- isave = np.zeros((2,), np.intc)
- dsave = np.zeros((13,), float)
- task = b'START'
- maxiter = 100
- for i in range(maxiter):
- stp, phi1, derphi1, task = minpack2.dcsrch(alpha1, phi1, derphi1,
- c1, c2, xtol, task,
- amin, amax, isave, dsave)
- if task[:2] == b'FG':
- alpha1 = stp
- phi1 = phi(stp)
- derphi1 = derphi(stp)
- else:
- break
- else:
- # maxiter reached, the line search did not converge
- stp = None
- if task[:5] == b'ERROR' or task[:4] == b'WARN':
- stp = None # failed
- return stp, phi1, phi0
- line_search = line_search_wolfe1
- #------------------------------------------------------------------------------
- # Pure-Python Wolfe line and scalar searches
- #------------------------------------------------------------------------------
- def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
- old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
- extra_condition=None, maxiter=10):
- """Find alpha that satisfies strong Wolfe conditions.
- Parameters
- ----------
- f : callable f(x,*args)
- Objective function.
- myfprime : callable f'(x,*args)
- Objective function gradient.
- xk : ndarray
- Starting point.
- pk : ndarray
- Search direction.
- gfk : ndarray, optional
- Gradient value for x=xk (xk being the current parameter
- estimate). Will be recomputed if omitted.
- old_fval : float, optional
- Function value for x=xk. Will be recomputed if omitted.
- old_old_fval : float, optional
- Function value for the point preceding x=xk.
- args : tuple, optional
- Additional arguments passed to objective function.
- c1 : float, optional
- Parameter for Armijo condition rule.
- c2 : float, optional
- Parameter for curvature condition rule.
- amax : float, optional
- Maximum step size
- extra_condition : callable, optional
- A callable of the form ``extra_condition(alpha, x, f, g)``
- returning a boolean. Arguments are the proposed step ``alpha``
- and the corresponding ``x``, ``f`` and ``g`` values. The line search
- accepts the value of ``alpha`` only if this
- callable returns ``True``. If the callable returns ``False``
- for the step length, the algorithm will continue with
- new iterates. The callable is only called for iterates
- satisfying the strong Wolfe conditions.
- maxiter : int, optional
- Maximum number of iterations to perform.
- Returns
- -------
- alpha : float or None
- Alpha for which ``x_new = x0 + alpha * pk``,
- or None if the line search algorithm did not converge.
- fc : int
- Number of function evaluations made.
- gc : int
- Number of gradient evaluations made.
- new_fval : float or None
- New function value ``f(x_new)=f(x0+alpha*pk)``,
- or None if the line search algorithm did not converge.
- old_fval : float
- Old function value ``f(x0)``.
- new_slope : float or None
- The local slope along the search direction at the
- new value ``<myfprime(x_new), pk>``,
- or None if the line search algorithm did not converge.
- Notes
- -----
- Uses the line search algorithm to enforce strong Wolfe
- conditions. See Wright and Nocedal, 'Numerical Optimization',
- 1999, pp. 59-61.
- Examples
- --------
- >>> import numpy as np
- >>> from scipy.optimize import line_search
- A objective function and its gradient are defined.
- >>> def obj_func(x):
- ... return (x[0])**2+(x[1])**2
- >>> def obj_grad(x):
- ... return [2*x[0], 2*x[1]]
- We can find alpha that satisfies strong Wolfe conditions.
- >>> start_point = np.array([1.8, 1.7])
- >>> search_gradient = np.array([-1.0, -1.0])
- >>> line_search(obj_func, obj_grad, start_point, search_gradient)
- (1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4])
- """
- fc = [0]
- gc = [0]
- gval = [None]
- gval_alpha = [None]
- def phi(alpha):
- fc[0] += 1
- return f(xk + alpha * pk, *args)
- fprime = myfprime
- def derphi(alpha):
- gc[0] += 1
- gval[0] = fprime(xk + alpha * pk, *args) # store for later use
- gval_alpha[0] = alpha
- return np.dot(gval[0], pk)
- if gfk is None:
- gfk = fprime(xk, *args)
- derphi0 = np.dot(gfk, pk)
- if extra_condition is not None:
- # Add the current gradient as argument, to avoid needless
- # re-evaluation
- def extra_condition2(alpha, phi):
- if gval_alpha[0] != alpha:
- derphi(alpha)
- x = xk + alpha * pk
- return extra_condition(alpha, x, phi, gval[0])
- else:
- extra_condition2 = None
- alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
- phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
- extra_condition2, maxiter=maxiter)
- if derphi_star is None:
- warn('The line search algorithm did not converge', LineSearchWarning)
- else:
- # derphi_star is a number (derphi) -- so use the most recently
- # calculated gradient used in computing it derphi = gfk*pk
- # this is the gradient at the next step no need to compute it
- # again in the outer loop.
- derphi_star = gval[0]
- return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
- def scalar_search_wolfe2(phi, derphi, phi0=None,
- old_phi0=None, derphi0=None,
- c1=1e-4, c2=0.9, amax=None,
- extra_condition=None, maxiter=10):
- """Find alpha that satisfies strong Wolfe conditions.
- alpha > 0 is assumed to be a descent direction.
- Parameters
- ----------
- phi : callable phi(alpha)
- Objective scalar function.
- derphi : callable phi'(alpha)
- Objective function derivative. Returns a scalar.
- phi0 : float, optional
- Value of phi at 0.
- old_phi0 : float, optional
- Value of phi at previous point.
- derphi0 : float, optional
- Value of derphi at 0
- c1 : float, optional
- Parameter for Armijo condition rule.
- c2 : float, optional
- Parameter for curvature condition rule.
- amax : float, optional
- Maximum step size.
- extra_condition : callable, optional
- A callable of the form ``extra_condition(alpha, phi_value)``
- returning a boolean. The line search accepts the value
- of ``alpha`` only if this callable returns ``True``.
- If the callable returns ``False`` for the step length,
- the algorithm will continue with new iterates.
- The callable is only called for iterates satisfying
- the strong Wolfe conditions.
- maxiter : int, optional
- Maximum number of iterations to perform.
- Returns
- -------
- alpha_star : float or None
- Best alpha, or None if the line search algorithm did not converge.
- phi_star : float
- phi at alpha_star.
- phi0 : float
- phi at 0.
- derphi_star : float or None
- derphi at alpha_star, or None if the line search algorithm
- did not converge.
- Notes
- -----
- Uses the line search algorithm to enforce strong Wolfe
- conditions. See Wright and Nocedal, 'Numerical Optimization',
- 1999, pp. 59-61.
- """
- if phi0 is None:
- phi0 = phi(0.)
- if derphi0 is None:
- derphi0 = derphi(0.)
- alpha0 = 0
- if old_phi0 is not None and derphi0 != 0:
- alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
- else:
- alpha1 = 1.0
- if alpha1 < 0:
- alpha1 = 1.0
- if amax is not None:
- alpha1 = min(alpha1, amax)
- phi_a1 = phi(alpha1)
- #derphi_a1 = derphi(alpha1) evaluated below
- phi_a0 = phi0
- derphi_a0 = derphi0
- if extra_condition is None:
- extra_condition = lambda alpha, phi: True
- for i in range(maxiter):
- if alpha1 == 0 or (amax is not None and alpha0 == amax):
- # alpha1 == 0: This shouldn't happen. Perhaps the increment has
- # slipped below machine precision?
- alpha_star = None
- phi_star = phi0
- phi0 = old_phi0
- derphi_star = None
- if alpha1 == 0:
- msg = 'Rounding errors prevent the line search from converging'
- else:
- msg = "The line search algorithm could not find a solution " + \
- "less than or equal to amax: %s" % amax
- warn(msg, LineSearchWarning)
- break
- not_first_iteration = i > 0
- if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
- ((phi_a1 >= phi_a0) and not_first_iteration):
- alpha_star, phi_star, derphi_star = \
- _zoom(alpha0, alpha1, phi_a0,
- phi_a1, derphi_a0, phi, derphi,
- phi0, derphi0, c1, c2, extra_condition)
- break
- derphi_a1 = derphi(alpha1)
- if (abs(derphi_a1) <= -c2*derphi0):
- if extra_condition(alpha1, phi_a1):
- alpha_star = alpha1
- phi_star = phi_a1
- derphi_star = derphi_a1
- break
- if (derphi_a1 >= 0):
- alpha_star, phi_star, derphi_star = \
- _zoom(alpha1, alpha0, phi_a1,
- phi_a0, derphi_a1, phi, derphi,
- phi0, derphi0, c1, c2, extra_condition)
- break
- alpha2 = 2 * alpha1 # increase by factor of two on each iteration
- if amax is not None:
- alpha2 = min(alpha2, amax)
- alpha0 = alpha1
- alpha1 = alpha2
- phi_a0 = phi_a1
- phi_a1 = phi(alpha1)
- derphi_a0 = derphi_a1
- else:
- # stopping test maxiter reached
- alpha_star = alpha1
- phi_star = phi_a1
- derphi_star = None
- warn('The line search algorithm did not converge', LineSearchWarning)
- return alpha_star, phi_star, phi0, derphi_star
- def _cubicmin(a, fa, fpa, b, fb, c, fc):
- """
- Finds the minimizer for a cubic polynomial that goes through the
- points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
- If no minimizer can be found, return None.
- """
- # f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
- with np.errstate(divide='raise', over='raise', invalid='raise'):
- try:
- C = fpa
- db = b - a
- dc = c - a
- denom = (db * dc) ** 2 * (db - dc)
- d1 = np.empty((2, 2))
- d1[0, 0] = dc ** 2
- d1[0, 1] = -db ** 2
- d1[1, 0] = -dc ** 3
- d1[1, 1] = db ** 3
- [A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
- fc - fa - C * dc]).flatten())
- A /= denom
- B /= denom
- radical = B * B - 3 * A * C
- xmin = a + (-B + np.sqrt(radical)) / (3 * A)
- except ArithmeticError:
- return None
- if not np.isfinite(xmin):
- return None
- return xmin
- def _quadmin(a, fa, fpa, b, fb):
- """
- Finds the minimizer for a quadratic polynomial that goes through
- the points (a,fa), (b,fb) with derivative at a of fpa.
- """
- # f(x) = B*(x-a)^2 + C*(x-a) + D
- with np.errstate(divide='raise', over='raise', invalid='raise'):
- try:
- D = fa
- C = fpa
- db = b - a * 1.0
- B = (fb - D - C * db) / (db * db)
- xmin = a - C / (2.0 * B)
- except ArithmeticError:
- return None
- if not np.isfinite(xmin):
- return None
- return xmin
- def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
- phi, derphi, phi0, derphi0, c1, c2, extra_condition):
- """Zoom stage of approximate linesearch satisfying strong Wolfe conditions.
-
- Part of the optimization algorithm in `scalar_search_wolfe2`.
-
- Notes
- -----
- Implements Algorithm 3.6 (zoom) in Wright and Nocedal,
- 'Numerical Optimization', 1999, pp. 61.
- """
- maxiter = 10
- i = 0
- delta1 = 0.2 # cubic interpolant check
- delta2 = 0.1 # quadratic interpolant check
- phi_rec = phi0
- a_rec = 0
- while True:
- # interpolate to find a trial step length between a_lo and
- # a_hi Need to choose interpolation here. Use cubic
- # interpolation and then if the result is within delta *
- # dalpha or outside of the interval bounded by a_lo or a_hi
- # then use quadratic interpolation, if the result is still too
- # close, then use bisection
- dalpha = a_hi - a_lo
- if dalpha < 0:
- a, b = a_hi, a_lo
- else:
- a, b = a_lo, a_hi
- # minimizer of cubic interpolant
- # (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
- #
- # if the result is too close to the end points (or out of the
- # interval), then use quadratic interpolation with phi_lo,
- # derphi_lo and phi_hi if the result is still too close to the
- # end points (or out of the interval) then use bisection
- if (i > 0):
- cchk = delta1 * dalpha
- a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi,
- a_rec, phi_rec)
- if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk):
- qchk = delta2 * dalpha
- a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
- if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
- a_j = a_lo + 0.5*dalpha
- # Check new value of a_j
- phi_aj = phi(a_j)
- if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
- phi_rec = phi_hi
- a_rec = a_hi
- a_hi = a_j
- phi_hi = phi_aj
- else:
- derphi_aj = derphi(a_j)
- if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj):
- a_star = a_j
- val_star = phi_aj
- valprime_star = derphi_aj
- break
- if derphi_aj*(a_hi - a_lo) >= 0:
- phi_rec = phi_hi
- a_rec = a_hi
- a_hi = a_lo
- phi_hi = phi_lo
- else:
- phi_rec = phi_lo
- a_rec = a_lo
- a_lo = a_j
- phi_lo = phi_aj
- derphi_lo = derphi_aj
- i += 1
- if (i > maxiter):
- # Failed to find a conforming step size
- a_star = None
- val_star = None
- valprime_star = None
- break
- return a_star, val_star, valprime_star
- #------------------------------------------------------------------------------
- # Armijo line and scalar searches
- #------------------------------------------------------------------------------
- def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
- """Minimize over alpha, the function ``f(xk+alpha pk)``.
- Parameters
- ----------
- f : callable
- Function to be minimized.
- xk : array_like
- Current point.
- pk : array_like
- Search direction.
- gfk : array_like
- Gradient of `f` at point `xk`.
- old_fval : float
- Value of `f` at point `xk`.
- args : tuple, optional
- Optional arguments.
- c1 : float, optional
- Value to control stopping criterion.
- alpha0 : scalar, optional
- Value of `alpha` at start of the optimization.
- Returns
- -------
- alpha
- f_count
- f_val_at_alpha
- Notes
- -----
- Uses the interpolation algorithm (Armijo backtracking) as suggested by
- Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
- """
- xk = np.atleast_1d(xk)
- fc = [0]
- def phi(alpha1):
- fc[0] += 1
- return f(xk + alpha1*pk, *args)
- if old_fval is None:
- phi0 = phi(0.)
- else:
- phi0 = old_fval # compute f(xk) -- done in past loop
- derphi0 = np.dot(gfk, pk)
- alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1,
- alpha0=alpha0)
- return alpha, fc[0], phi1
- def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
- """
- Compatibility wrapper for `line_search_armijo`
- """
- r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
- alpha0=alpha0)
- return r[0], r[1], 0, r[2]
- def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
- """Minimize over alpha, the function ``phi(alpha)``.
- Uses the interpolation algorithm (Armijo backtracking) as suggested by
- Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
- alpha > 0 is assumed to be a descent direction.
- Returns
- -------
- alpha
- phi1
- """
- phi_a0 = phi(alpha0)
- if phi_a0 <= phi0 + c1*alpha0*derphi0:
- return alpha0, phi_a0
- # Otherwise, compute the minimizer of a quadratic interpolant:
- alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
- phi_a1 = phi(alpha1)
- if (phi_a1 <= phi0 + c1*alpha1*derphi0):
- return alpha1, phi_a1
- # Otherwise, loop with cubic interpolation until we find an alpha which
- # satisfies the first Wolfe condition (since we are backtracking, we will
- # assume that the value of alpha is not too small and satisfies the second
- # condition.
- while alpha1 > amin: # we are assuming alpha>0 is a descent direction
- factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
- a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
- alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
- a = a / factor
- b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
- alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
- b = b / factor
- alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
- phi_a2 = phi(alpha2)
- if (phi_a2 <= phi0 + c1*alpha2*derphi0):
- return alpha2, phi_a2
- if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
- alpha2 = alpha1 / 2.0
- alpha0 = alpha1
- alpha1 = alpha2
- phi_a0 = phi_a1
- phi_a1 = phi_a2
- # Failed to find a suitable step length
- return None, phi_a1
- #------------------------------------------------------------------------------
- # Non-monotone line search for DF-SANE
- #------------------------------------------------------------------------------
- def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta,
- gamma=1e-4, tau_min=0.1, tau_max=0.5):
- """
- Nonmonotone backtracking line search as described in [1]_
- Parameters
- ----------
- f : callable
- Function returning a tuple ``(f, F)`` where ``f`` is the value
- of a merit function and ``F`` the residual.
- x_k : ndarray
- Initial position.
- d : ndarray
- Search direction.
- prev_fs : float
- List of previous merit function values. Should have ``len(prev_fs) <= M``
- where ``M`` is the nonmonotonicity window parameter.
- eta : float
- Allowed merit function increase, see [1]_
- gamma, tau_min, tau_max : float, optional
- Search parameters, see [1]_
- Returns
- -------
- alpha : float
- Step length
- xp : ndarray
- Next position
- fp : float
- Merit function value at next position
- Fp : ndarray
- Residual at next position
- References
- ----------
- [1] "Spectral residual method without gradient information for solving
- large-scale nonlinear systems of equations." W. La Cruz,
- J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006).
- """
- f_k = prev_fs[-1]
- f_bar = max(prev_fs)
- alpha_p = 1
- alpha_m = 1
- alpha = 1
- while True:
- xp = x_k + alpha_p * d
- fp, Fp = f(xp)
- if fp <= f_bar + eta - gamma * alpha_p**2 * f_k:
- alpha = alpha_p
- break
- alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
- xp = x_k - alpha_m * d
- fp, Fp = f(xp)
- if fp <= f_bar + eta - gamma * alpha_m**2 * f_k:
- alpha = -alpha_m
- break
- alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
- alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
- alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
- return alpha, xp, fp, Fp
- def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta,
- gamma=1e-4, tau_min=0.1, tau_max=0.5,
- nu=0.85):
- """
- Nonmonotone line search from [1]
- Parameters
- ----------
- f : callable
- Function returning a tuple ``(f, F)`` where ``f`` is the value
- of a merit function and ``F`` the residual.
- x_k : ndarray
- Initial position.
- d : ndarray
- Search direction.
- f_k : float
- Initial merit function value.
- C, Q : float
- Control parameters. On the first iteration, give values
- Q=1.0, C=f_k
- eta : float
- Allowed merit function increase, see [1]_
- nu, gamma, tau_min, tau_max : float, optional
- Search parameters, see [1]_
- Returns
- -------
- alpha : float
- Step length
- xp : ndarray
- Next position
- fp : float
- Merit function value at next position
- Fp : ndarray
- Residual at next position
- C : float
- New value for the control parameter C
- Q : float
- New value for the control parameter Q
- References
- ----------
- .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line
- search and its application to the spectral residual
- method'', IMA J. Numer. Anal. 29, 814 (2009).
- """
- alpha_p = 1
- alpha_m = 1
- alpha = 1
- while True:
- xp = x_k + alpha_p * d
- fp, Fp = f(xp)
- if fp <= C + eta - gamma * alpha_p**2 * f_k:
- alpha = alpha_p
- break
- alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
- xp = x_k - alpha_m * d
- fp, Fp = f(xp)
- if fp <= C + eta - gamma * alpha_m**2 * f_k:
- alpha = -alpha_m
- break
- alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
- alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
- alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
- # Update C and Q
- Q_next = nu * Q + 1
- C = (nu * Q * (C + eta) + fp) / Q_next
- Q = Q_next
- return alpha, xp, fp, Fp, C, Q
|