123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331 |
- """
- Dogleg algorithm with rectangular trust regions for least-squares minimization.
- The description of the algorithm can be found in [Voglis]_. The algorithm does
- trust-region iterations, but the shape of trust regions is rectangular as
- opposed to conventional elliptical. The intersection of a trust region and
- an initial feasible region is again some rectangle. Thus, on each iteration a
- bound-constrained quadratic optimization problem is solved.
- A quadratic problem is solved by well-known dogleg approach, where the
- function is minimized along piecewise-linear "dogleg" path [NumOpt]_,
- Chapter 4. If Jacobian is not rank-deficient then the function is decreasing
- along this path, and optimization amounts to simply following along this
- path as long as a point stays within the bounds. A constrained Cauchy step
- (along the anti-gradient) is considered for safety in rank deficient cases,
- in this situations the convergence might be slow.
- If during iterations some variable hit the initial bound and the component
- of anti-gradient points outside the feasible region, then a next dogleg step
- won't make any progress. At this state such variables satisfy first-order
- optimality conditions and they are excluded before computing a next dogleg
- step.
- Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
- Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
- dense and sparse matrices, or Jacobian being LinearOperator). The second
- option allows to solve very large problems (up to couple of millions of
- residuals on a regular PC), provided the Jacobian matrix is sufficiently
- sparse. But note that dogbox is not very good for solving problems with
- large number of constraints, because of variables exclusion-inclusion on each
- iteration (a required number of function evaluations might be high or accuracy
- of a solution will be poor), thus its large-scale usage is probably limited
- to unconstrained problems.
- References
- ----------
- .. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg
- Approach for Unconstrained and Bound Constrained Nonlinear
- Optimization", WSEAS International Conference on Applied
- Mathematics, Corfu, Greece, 2004.
- .. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".
- """
- import numpy as np
- from numpy.linalg import lstsq, norm
- from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr
- from scipy.optimize import OptimizeResult
- from .common import (
- step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,
- build_quadratic_1d, minimize_quadratic_1d, compute_grad,
- compute_jac_scale, check_termination, scale_for_robust_loss_function,
- print_header_nonlinear, print_iteration_nonlinear)
- def lsmr_operator(Jop, d, active_set):
- """Compute LinearOperator to use in LSMR by dogbox algorithm.
- `active_set` mask is used to excluded active variables from computations
- of matrix-vector products.
- """
- m, n = Jop.shape
- def matvec(x):
- x_free = x.ravel().copy()
- x_free[active_set] = 0
- return Jop.matvec(x * d)
- def rmatvec(x):
- r = d * Jop.rmatvec(x)
- r[active_set] = 0
- return r
- return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)
- def find_intersection(x, tr_bounds, lb, ub):
- """Find intersection of trust-region bounds and initial bounds.
- Returns
- -------
- lb_total, ub_total : ndarray with shape of x
- Lower and upper bounds of the intersection region.
- orig_l, orig_u : ndarray of bool with shape of x
- True means that an original bound is taken as a corresponding bound
- in the intersection region.
- tr_l, tr_u : ndarray of bool with shape of x
- True means that a trust-region bound is taken as a corresponding bound
- in the intersection region.
- """
- lb_centered = lb - x
- ub_centered = ub - x
- lb_total = np.maximum(lb_centered, -tr_bounds)
- ub_total = np.minimum(ub_centered, tr_bounds)
- orig_l = np.equal(lb_total, lb_centered)
- orig_u = np.equal(ub_total, ub_centered)
- tr_l = np.equal(lb_total, -tr_bounds)
- tr_u = np.equal(ub_total, tr_bounds)
- return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u
- def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):
- """Find dogleg step in a rectangular region.
- Returns
- -------
- step : ndarray, shape (n,)
- Computed dogleg step.
- bound_hits : ndarray of int, shape (n,)
- Each component shows whether a corresponding variable hits the
- initial bound after the step is taken:
- * 0 - a variable doesn't hit the bound.
- * -1 - lower bound is hit.
- * 1 - upper bound is hit.
- tr_hit : bool
- Whether the step hit the boundary of the trust-region.
- """
- lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(
- x, tr_bounds, lb, ub
- )
- bound_hits = np.zeros_like(x, dtype=int)
- if in_bounds(newton_step, lb_total, ub_total):
- return newton_step, bound_hits, False
- to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)
- # The classical dogleg algorithm would check if Cauchy step fits into
- # the bounds, and just return it constrained version if not. But in a
- # rectangular trust region it makes sense to try to improve constrained
- # Cauchy step too. Thus, we don't distinguish these two cases.
- cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g
- step_diff = newton_step - cauchy_step
- step_size, hits = step_size_to_bound(cauchy_step, step_diff,
- lb_total, ub_total)
- bound_hits[(hits < 0) & orig_l] = -1
- bound_hits[(hits > 0) & orig_u] = 1
- tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)
- return cauchy_step + step_size * step_diff, bound_hits, tr_hit
- def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
- loss_function, tr_solver, tr_options, verbose):
- f = f0
- f_true = f.copy()
- nfev = 1
- J = J0
- njev = 1
- if loss_function is not None:
- rho = loss_function(f)
- cost = 0.5 * np.sum(rho[0])
- J, f = scale_for_robust_loss_function(J, f, rho)
- else:
- cost = 0.5 * np.dot(f, f)
- g = compute_grad(J, f)
- jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
- if jac_scale:
- scale, scale_inv = compute_jac_scale(J)
- else:
- scale, scale_inv = x_scale, 1 / x_scale
- Delta = norm(x0 * scale_inv, ord=np.inf)
- if Delta == 0:
- Delta = 1.0
- on_bound = np.zeros_like(x0, dtype=int)
- on_bound[np.equal(x0, lb)] = -1
- on_bound[np.equal(x0, ub)] = 1
- x = x0
- step = np.empty_like(x0)
- if max_nfev is None:
- max_nfev = x0.size * 100
- termination_status = None
- iteration = 0
- step_norm = None
- actual_reduction = None
- if verbose == 2:
- print_header_nonlinear()
- while True:
- active_set = on_bound * g < 0
- free_set = ~active_set
- g_free = g[free_set]
- g_full = g.copy()
- g[active_set] = 0
- g_norm = norm(g, ord=np.inf)
- if g_norm < gtol:
- termination_status = 1
- if verbose == 2:
- print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
- step_norm, g_norm)
- if termination_status is not None or nfev == max_nfev:
- break
- x_free = x[free_set]
- lb_free = lb[free_set]
- ub_free = ub[free_set]
- scale_free = scale[free_set]
- # Compute (Gauss-)Newton and build quadratic model for Cauchy step.
- if tr_solver == 'exact':
- J_free = J[:, free_set]
- newton_step = lstsq(J_free, -f, rcond=-1)[0]
- # Coefficients for the quadratic model along the anti-gradient.
- a, b = build_quadratic_1d(J_free, g_free, -g_free)
- elif tr_solver == 'lsmr':
- Jop = aslinearoperator(J)
- # We compute lsmr step in scaled variables and then
- # transform back to normal variables, if lsmr would give exact lsq
- # solution, this would be equivalent to not doing any
- # transformations, but from experience it's better this way.
- # We pass active_set to make computations as if we selected
- # the free subset of J columns, but without actually doing any
- # slicing, which is expensive for sparse matrices and impossible
- # for LinearOperator.
- lsmr_op = lsmr_operator(Jop, scale, active_set)
- newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
- newton_step *= scale_free
- # Components of g for active variables were zeroed, so this call
- # is correct and equivalent to using J_free and g_free.
- a, b = build_quadratic_1d(Jop, g, -g)
- actual_reduction = -1.0
- while actual_reduction <= 0 and nfev < max_nfev:
- tr_bounds = Delta * scale_free
- step_free, on_bound_free, tr_hit = dogleg_step(
- x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)
- step.fill(0.0)
- step[free_set] = step_free
- if tr_solver == 'exact':
- predicted_reduction = -evaluate_quadratic(J_free, g_free,
- step_free)
- elif tr_solver == 'lsmr':
- predicted_reduction = -evaluate_quadratic(Jop, g, step)
- # gh11403 ensure that solution is fully within bounds.
- x_new = np.clip(x + step, lb, ub)
- f_new = fun(x_new)
- nfev += 1
- step_h_norm = norm(step * scale_inv, ord=np.inf)
- if not np.all(np.isfinite(f_new)):
- Delta = 0.25 * step_h_norm
- continue
- # Usual trust-region step quality estimation.
- if loss_function is not None:
- cost_new = loss_function(f_new, cost_only=True)
- else:
- cost_new = 0.5 * np.dot(f_new, f_new)
- actual_reduction = cost - cost_new
- Delta, ratio = update_tr_radius(
- Delta, actual_reduction, predicted_reduction,
- step_h_norm, tr_hit
- )
- step_norm = norm(step)
- termination_status = check_termination(
- actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
- if termination_status is not None:
- break
- if actual_reduction > 0:
- on_bound[free_set] = on_bound_free
- x = x_new
- # Set variables exactly at the boundary.
- mask = on_bound == -1
- x[mask] = lb[mask]
- mask = on_bound == 1
- x[mask] = ub[mask]
- f = f_new
- f_true = f.copy()
- cost = cost_new
- J = jac(x, f)
- njev += 1
- if loss_function is not None:
- rho = loss_function(f)
- J, f = scale_for_robust_loss_function(J, f, rho)
- g = compute_grad(J, f)
- if jac_scale:
- scale, scale_inv = compute_jac_scale(J, scale_inv)
- else:
- step_norm = 0
- actual_reduction = 0
- iteration += 1
- if termination_status is None:
- termination_status = 0
- return OptimizeResult(
- x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
- active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)
|