| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331 | """Dogleg algorithm with rectangular trust regions for least-squares minimization.The description of the algorithm can be found in [Voglis]_. The algorithm doestrust-region iterations, but the shape of trust regions is rectangular asopposed to conventional elliptical. The intersection of a trust region andan initial feasible region is again some rectangle. Thus, on each iteration abound-constrained quadratic optimization problem is solved.A quadratic problem is solved by well-known dogleg approach, where thefunction is minimized along piecewise-linear "dogleg" path [NumOpt]_,Chapter 4. If Jacobian is not rank-deficient then the function is decreasingalong this path, and optimization amounts to simply following along thispath as long as a point stays within the bounds. A constrained Cauchy step(along the anti-gradient) is considered for safety in rank deficient cases,in this situations the convergence might be slow.If during iterations some variable hit the initial bound and the componentof anti-gradient points outside the feasible region, then a next dogleg stepwon't make any progress. At this state such variables satisfy first-orderoptimality conditions and they are excluded before computing a next doglegstep.Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for denseJacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (fordense and sparse matrices, or Jacobian being LinearOperator). The secondoption allows to solve very large problems (up to couple of millions ofresiduals on a regular PC), provided the Jacobian matrix is sufficientlysparse. But note that dogbox is not very good for solving problems withlarge number of constraints, because of variables exclusion-inclusion on eachiteration (a required number of function evaluations might be high or accuracyof a solution will be poor), thus its large-scale usage is probably limitedto unconstrained problems.References----------.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg            Approach for Unconstrained and Bound Constrained Nonlinear            Optimization", WSEAS International Conference on Applied            Mathematics, Corfu, Greece, 2004... [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition"."""import numpy as npfrom numpy.linalg import lstsq, normfrom scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmrfrom scipy.optimize import OptimizeResultfrom .common import (    step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,    build_quadratic_1d, minimize_quadratic_1d, compute_grad,    compute_jac_scale, check_termination, scale_for_robust_loss_function,    print_header_nonlinear, print_iteration_nonlinear)def lsmr_operator(Jop, d, active_set):    """Compute LinearOperator to use in LSMR by dogbox algorithm.    `active_set` mask is used to excluded active variables from computations    of matrix-vector products.    """    m, n = Jop.shape    def matvec(x):        x_free = x.ravel().copy()        x_free[active_set] = 0        return Jop.matvec(x * d)    def rmatvec(x):        r = d * Jop.rmatvec(x)        r[active_set] = 0        return r    return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)def find_intersection(x, tr_bounds, lb, ub):    """Find intersection of trust-region bounds and initial bounds.    Returns    -------    lb_total, ub_total : ndarray with shape of x        Lower and upper bounds of the intersection region.    orig_l, orig_u : ndarray of bool with shape of x        True means that an original bound is taken as a corresponding bound        in the intersection region.    tr_l, tr_u : ndarray of bool with shape of x        True means that a trust-region bound is taken as a corresponding bound        in the intersection region.    """    lb_centered = lb - x    ub_centered = ub - x    lb_total = np.maximum(lb_centered, -tr_bounds)    ub_total = np.minimum(ub_centered, tr_bounds)    orig_l = np.equal(lb_total, lb_centered)    orig_u = np.equal(ub_total, ub_centered)    tr_l = np.equal(lb_total, -tr_bounds)    tr_u = np.equal(ub_total, tr_bounds)    return lb_total, ub_total, orig_l, orig_u, tr_l, tr_udef dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):    """Find dogleg step in a rectangular region.    Returns    -------    step : ndarray, shape (n,)        Computed dogleg step.    bound_hits : ndarray of int, shape (n,)        Each component shows whether a corresponding variable hits the        initial bound after the step is taken:            *  0 - a variable doesn't hit the bound.            * -1 - lower bound is hit.            *  1 - upper bound is hit.    tr_hit : bool        Whether the step hit the boundary of the trust-region.    """    lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(        x, tr_bounds, lb, ub    )    bound_hits = np.zeros_like(x, dtype=int)    if in_bounds(newton_step, lb_total, ub_total):        return newton_step, bound_hits, False    to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)    # The classical dogleg algorithm would check if Cauchy step fits into    # the bounds, and just return it constrained version if not. But in a    # rectangular trust region it makes sense to try to improve constrained    # Cauchy step too. Thus, we don't distinguish these two cases.    cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g    step_diff = newton_step - cauchy_step    step_size, hits = step_size_to_bound(cauchy_step, step_diff,                                         lb_total, ub_total)    bound_hits[(hits < 0) & orig_l] = -1    bound_hits[(hits > 0) & orig_u] = 1    tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)    return cauchy_step + step_size * step_diff, bound_hits, tr_hitdef dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,           loss_function, tr_solver, tr_options, verbose):    f = f0    f_true = f.copy()    nfev = 1    J = J0    njev = 1    if loss_function is not None:        rho = loss_function(f)        cost = 0.5 * np.sum(rho[0])        J, f = scale_for_robust_loss_function(J, f, rho)    else:        cost = 0.5 * np.dot(f, f)    g = compute_grad(J, f)    jac_scale = isinstance(x_scale, str) and x_scale == 'jac'    if jac_scale:        scale, scale_inv = compute_jac_scale(J)    else:        scale, scale_inv = x_scale, 1 / x_scale    Delta = norm(x0 * scale_inv, ord=np.inf)    if Delta == 0:        Delta = 1.0    on_bound = np.zeros_like(x0, dtype=int)    on_bound[np.equal(x0, lb)] = -1    on_bound[np.equal(x0, ub)] = 1    x = x0    step = np.empty_like(x0)    if max_nfev is None:        max_nfev = x0.size * 100    termination_status = None    iteration = 0    step_norm = None    actual_reduction = None    if verbose == 2:        print_header_nonlinear()    while True:        active_set = on_bound * g < 0        free_set = ~active_set        g_free = g[free_set]        g_full = g.copy()        g[active_set] = 0        g_norm = norm(g, ord=np.inf)        if g_norm < gtol:            termination_status = 1        if verbose == 2:            print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,                                      step_norm, g_norm)        if termination_status is not None or nfev == max_nfev:            break        x_free = x[free_set]        lb_free = lb[free_set]        ub_free = ub[free_set]        scale_free = scale[free_set]        # Compute (Gauss-)Newton and build quadratic model for Cauchy step.        if tr_solver == 'exact':            J_free = J[:, free_set]            newton_step = lstsq(J_free, -f, rcond=-1)[0]            # Coefficients for the quadratic model along the anti-gradient.            a, b = build_quadratic_1d(J_free, g_free, -g_free)        elif tr_solver == 'lsmr':            Jop = aslinearoperator(J)            # We compute lsmr step in scaled variables and then            # transform back to normal variables, if lsmr would give exact lsq            # solution, this would be equivalent to not doing any            # transformations, but from experience it's better this way.            # We pass active_set to make computations as if we selected            # the free subset of J columns, but without actually doing any            # slicing, which is expensive for sparse matrices and impossible            # for LinearOperator.            lsmr_op = lsmr_operator(Jop, scale, active_set)            newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]            newton_step *= scale_free            # Components of g for active variables were zeroed, so this call            # is correct and equivalent to using J_free and g_free.            a, b = build_quadratic_1d(Jop, g, -g)        actual_reduction = -1.0        while actual_reduction <= 0 and nfev < max_nfev:            tr_bounds = Delta * scale_free            step_free, on_bound_free, tr_hit = dogleg_step(                x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)            step.fill(0.0)            step[free_set] = step_free            if tr_solver == 'exact':                predicted_reduction = -evaluate_quadratic(J_free, g_free,                                                          step_free)            elif tr_solver == 'lsmr':                predicted_reduction = -evaluate_quadratic(Jop, g, step)            # gh11403 ensure that solution is fully within bounds.            x_new = np.clip(x + step, lb, ub)            f_new = fun(x_new)            nfev += 1            step_h_norm = norm(step * scale_inv, ord=np.inf)            if not np.all(np.isfinite(f_new)):                Delta = 0.25 * step_h_norm                continue            # Usual trust-region step quality estimation.            if loss_function is not None:                cost_new = loss_function(f_new, cost_only=True)            else:                cost_new = 0.5 * np.dot(f_new, f_new)            actual_reduction = cost - cost_new            Delta, ratio = update_tr_radius(                Delta, actual_reduction, predicted_reduction,                step_h_norm, tr_hit            )            step_norm = norm(step)            termination_status = check_termination(                actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)            if termination_status is not None:                break        if actual_reduction > 0:            on_bound[free_set] = on_bound_free            x = x_new            # Set variables exactly at the boundary.            mask = on_bound == -1            x[mask] = lb[mask]            mask = on_bound == 1            x[mask] = ub[mask]            f = f_new            f_true = f.copy()            cost = cost_new            J = jac(x, f)            njev += 1            if loss_function is not None:                rho = loss_function(f)                J, f = scale_for_robust_loss_function(J, f, rho)            g = compute_grad(J, f)            if jac_scale:                scale, scale_inv = compute_jac_scale(J, scale_inv)        else:            step_norm = 0            actual_reduction = 0        iteration += 1    if termination_status is None:        termination_status = 0    return OptimizeResult(        x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,        active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)
 |