123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470 |
- import numpy as np
- from scipy.linalg import lu_factor, lu_solve
- from scipy.sparse import issparse, csc_matrix, eye
- from scipy.sparse.linalg import splu
- from scipy.optimize._numdiff import group_columns
- from .common import (validate_max_step, validate_tol, select_initial_step,
- norm, EPS, num_jac, validate_first_step,
- warn_extraneous)
- from .base import OdeSolver, DenseOutput
- MAX_ORDER = 5
- NEWTON_MAXITER = 4
- MIN_FACTOR = 0.2
- MAX_FACTOR = 10
- def compute_R(order, factor):
- """Compute the matrix for changing the differences array."""
- I = np.arange(1, order + 1)[:, None]
- J = np.arange(1, order + 1)
- M = np.zeros((order + 1, order + 1))
- M[1:, 1:] = (I - 1 - factor * J) / I
- M[0] = 1
- return np.cumprod(M, axis=0)
- def change_D(D, order, factor):
- """Change differences array in-place when step size is changed."""
- R = compute_R(order, factor)
- U = compute_R(order, 1)
- RU = R.dot(U)
- D[:order + 1] = np.dot(RU.T, D[:order + 1])
- def solve_bdf_system(fun, t_new, y_predict, c, psi, LU, solve_lu, scale, tol):
- """Solve the algebraic system resulting from BDF method."""
- d = 0
- y = y_predict.copy()
- dy_norm_old = None
- converged = False
- for k in range(NEWTON_MAXITER):
- f = fun(t_new, y)
- if not np.all(np.isfinite(f)):
- break
- dy = solve_lu(LU, c * f - psi - d)
- dy_norm = norm(dy / scale)
- if dy_norm_old is None:
- rate = None
- else:
- rate = dy_norm / dy_norm_old
- if (rate is not None and (rate >= 1 or
- rate ** (NEWTON_MAXITER - k) / (1 - rate) * dy_norm > tol)):
- break
- y += dy
- d += dy
- if (dy_norm == 0 or
- rate is not None and rate / (1 - rate) * dy_norm < tol):
- converged = True
- break
- dy_norm_old = dy_norm
- return converged, k + 1, y, d
- class BDF(OdeSolver):
- """Implicit method based on backward-differentiation formulas.
- This is a variable order method with the order varying automatically from
- 1 to 5. The general framework of the BDF algorithm is described in [1]_.
- This class implements a quasi-constant step size as explained in [2]_.
- The error estimation strategy for the constant-step BDF is derived in [3]_.
- An accuracy enhancement using modified formulas (NDF) [2]_ is also implemented.
- Can be applied in the complex domain.
- Parameters
- ----------
- fun : callable
- Right-hand side of the system. The calling signature is ``fun(t, y)``.
- Here ``t`` is a scalar, and there are two options for the ndarray ``y``:
- It can either have shape (n,); then ``fun`` must return array_like with
- shape (n,). Alternatively it can have shape (n, k); then ``fun``
- must return an array_like with shape (n, k), i.e. each column
- corresponds to a single column in ``y``. The choice between the two
- options is determined by `vectorized` argument (see below). The
- vectorized implementation allows a faster approximation of the Jacobian
- by finite differences (required for this solver).
- t0 : float
- Initial time.
- y0 : array_like, shape (n,)
- Initial state.
- t_bound : float
- Boundary time - the integration won't continue beyond it. It also
- determines the direction of the integration.
- first_step : float or None, optional
- Initial step size. Default is ``None`` which means that the algorithm
- should choose.
- max_step : float, optional
- Maximum allowed step size. Default is np.inf, i.e., the step size is not
- bounded and determined solely by the solver.
- rtol, atol : float and array_like, optional
- Relative and absolute tolerances. The solver keeps the local error
- estimates less than ``atol + rtol * abs(y)``. Here `rtol` controls a
- relative accuracy (number of correct digits), while `atol` controls
- absolute accuracy (number of correct decimal places). To achieve the
- desired `rtol`, set `atol` to be smaller than the smallest value that
- can be expected from ``rtol * abs(y)`` so that `rtol` dominates the
- allowable error. If `atol` is larger than ``rtol * abs(y)`` the
- number of correct digits is not guaranteed. Conversely, to achieve the
- desired `atol` set `rtol` such that ``rtol * abs(y)`` is always smaller
- than `atol`. If components of y have different scales, it might be
- beneficial to set different `atol` values for different components by
- passing array_like with shape (n,) for `atol`. Default values are
- 1e-3 for `rtol` and 1e-6 for `atol`.
- jac : {None, array_like, sparse_matrix, callable}, optional
- Jacobian matrix of the right-hand side of the system with respect to y,
- required by this method. The Jacobian matrix has shape (n, n) and its
- element (i, j) is equal to ``d f_i / d y_j``.
- There are three ways to define the Jacobian:
- * If array_like or sparse_matrix, the Jacobian is assumed to
- be constant.
- * If callable, the Jacobian is assumed to depend on both
- t and y; it will be called as ``jac(t, y)`` as necessary.
- For the 'Radau' and 'BDF' methods, the return value might be a
- sparse matrix.
- * If None (default), the Jacobian will be approximated by
- finite differences.
- It is generally recommended to provide the Jacobian rather than
- relying on a finite-difference approximation.
- jac_sparsity : {None, array_like, sparse matrix}, optional
- Defines a sparsity structure of the Jacobian matrix for a
- finite-difference approximation. Its shape must be (n, n). This argument
- is ignored if `jac` is not `None`. If the Jacobian has only few non-zero
- elements in *each* row, providing the sparsity structure will greatly
- speed up the computations [4]_. A zero entry means that a corresponding
- element in the Jacobian is always zero. If None (default), the Jacobian
- is assumed to be dense.
- vectorized : bool, optional
- Whether `fun` is implemented in a vectorized fashion. Default is False.
- Attributes
- ----------
- n : int
- Number of equations.
- status : string
- Current status of the solver: 'running', 'finished' or 'failed'.
- t_bound : float
- Boundary time.
- direction : float
- Integration direction: +1 or -1.
- t : float
- Current time.
- y : ndarray
- Current state.
- t_old : float
- Previous time. None if no steps were made yet.
- step_size : float
- Size of the last successful step. None if no steps were made yet.
- nfev : int
- Number of evaluations of the right-hand side.
- njev : int
- Number of evaluations of the Jacobian.
- nlu : int
- Number of LU decompositions.
- References
- ----------
- .. [1] G. D. Byrne, A. C. Hindmarsh, "A Polyalgorithm for the Numerical
- Solution of Ordinary Differential Equations", ACM Transactions on
- Mathematical Software, Vol. 1, No. 1, pp. 71-96, March 1975.
- .. [2] L. F. Shampine, M. W. Reichelt, "THE MATLAB ODE SUITE", SIAM J. SCI.
- COMPUTE., Vol. 18, No. 1, pp. 1-22, January 1997.
- .. [3] E. Hairer, G. Wanner, "Solving Ordinary Differential Equations I:
- Nonstiff Problems", Sec. III.2.
- .. [4] A. Curtis, M. J. D. Powell, and J. Reid, "On the estimation of
- sparse Jacobian matrices", Journal of the Institute of Mathematics
- and its Applications, 13, pp. 117-120, 1974.
- """
- def __init__(self, fun, t0, y0, t_bound, max_step=np.inf,
- rtol=1e-3, atol=1e-6, jac=None, jac_sparsity=None,
- vectorized=False, first_step=None, **extraneous):
- warn_extraneous(extraneous)
- super().__init__(fun, t0, y0, t_bound, vectorized,
- support_complex=True)
- self.max_step = validate_max_step(max_step)
- self.rtol, self.atol = validate_tol(rtol, atol, self.n)
- f = self.fun(self.t, self.y)
- if first_step is None:
- self.h_abs = select_initial_step(self.fun, self.t, self.y, f,
- self.direction, 1,
- self.rtol, self.atol)
- else:
- self.h_abs = validate_first_step(first_step, t0, t_bound)
- self.h_abs_old = None
- self.error_norm_old = None
- self.newton_tol = max(10 * EPS / rtol, min(0.03, rtol ** 0.5))
- self.jac_factor = None
- self.jac, self.J = self._validate_jac(jac, jac_sparsity)
- if issparse(self.J):
- def lu(A):
- self.nlu += 1
- return splu(A)
- def solve_lu(LU, b):
- return LU.solve(b)
- I = eye(self.n, format='csc', dtype=self.y.dtype)
- else:
- def lu(A):
- self.nlu += 1
- return lu_factor(A, overwrite_a=True)
- def solve_lu(LU, b):
- return lu_solve(LU, b, overwrite_b=True)
- I = np.identity(self.n, dtype=self.y.dtype)
- self.lu = lu
- self.solve_lu = solve_lu
- self.I = I
- kappa = np.array([0, -0.1850, -1/9, -0.0823, -0.0415, 0])
- self.gamma = np.hstack((0, np.cumsum(1 / np.arange(1, MAX_ORDER + 1))))
- self.alpha = (1 - kappa) * self.gamma
- self.error_const = kappa * self.gamma + 1 / np.arange(1, MAX_ORDER + 2)
- D = np.empty((MAX_ORDER + 3, self.n), dtype=self.y.dtype)
- D[0] = self.y
- D[1] = f * self.h_abs * self.direction
- self.D = D
- self.order = 1
- self.n_equal_steps = 0
- self.LU = None
- def _validate_jac(self, jac, sparsity):
- t0 = self.t
- y0 = self.y
- if jac is None:
- if sparsity is not None:
- if issparse(sparsity):
- sparsity = csc_matrix(sparsity)
- groups = group_columns(sparsity)
- sparsity = (sparsity, groups)
- def jac_wrapped(t, y):
- self.njev += 1
- f = self.fun_single(t, y)
- J, self.jac_factor = num_jac(self.fun_vectorized, t, y, f,
- self.atol, self.jac_factor,
- sparsity)
- return J
- J = jac_wrapped(t0, y0)
- elif callable(jac):
- J = jac(t0, y0)
- self.njev += 1
- if issparse(J):
- J = csc_matrix(J, dtype=y0.dtype)
- def jac_wrapped(t, y):
- self.njev += 1
- return csc_matrix(jac(t, y), dtype=y0.dtype)
- else:
- J = np.asarray(J, dtype=y0.dtype)
- def jac_wrapped(t, y):
- self.njev += 1
- return np.asarray(jac(t, y), dtype=y0.dtype)
- if J.shape != (self.n, self.n):
- raise ValueError("`jac` is expected to have shape {}, but "
- "actually has {}."
- .format((self.n, self.n), J.shape))
- else:
- if issparse(jac):
- J = csc_matrix(jac, dtype=y0.dtype)
- else:
- J = np.asarray(jac, dtype=y0.dtype)
- if J.shape != (self.n, self.n):
- raise ValueError("`jac` is expected to have shape {}, but "
- "actually has {}."
- .format((self.n, self.n), J.shape))
- jac_wrapped = None
- return jac_wrapped, J
- def _step_impl(self):
- t = self.t
- D = self.D
- max_step = self.max_step
- min_step = 10 * np.abs(np.nextafter(t, self.direction * np.inf) - t)
- if self.h_abs > max_step:
- h_abs = max_step
- change_D(D, self.order, max_step / self.h_abs)
- self.n_equal_steps = 0
- elif self.h_abs < min_step:
- h_abs = min_step
- change_D(D, self.order, min_step / self.h_abs)
- self.n_equal_steps = 0
- else:
- h_abs = self.h_abs
- atol = self.atol
- rtol = self.rtol
- order = self.order
- alpha = self.alpha
- gamma = self.gamma
- error_const = self.error_const
- J = self.J
- LU = self.LU
- current_jac = self.jac is None
- step_accepted = False
- while not step_accepted:
- if h_abs < min_step:
- return False, self.TOO_SMALL_STEP
- h = h_abs * self.direction
- t_new = t + h
- if self.direction * (t_new - self.t_bound) > 0:
- t_new = self.t_bound
- change_D(D, order, np.abs(t_new - t) / h_abs)
- self.n_equal_steps = 0
- LU = None
- h = t_new - t
- h_abs = np.abs(h)
- y_predict = np.sum(D[:order + 1], axis=0)
- scale = atol + rtol * np.abs(y_predict)
- psi = np.dot(D[1: order + 1].T, gamma[1: order + 1]) / alpha[order]
- converged = False
- c = h / alpha[order]
- while not converged:
- if LU is None:
- LU = self.lu(self.I - c * J)
- converged, n_iter, y_new, d = solve_bdf_system(
- self.fun, t_new, y_predict, c, psi, LU, self.solve_lu,
- scale, self.newton_tol)
- if not converged:
- if current_jac:
- break
- J = self.jac(t_new, y_predict)
- LU = None
- current_jac = True
- if not converged:
- factor = 0.5
- h_abs *= factor
- change_D(D, order, factor)
- self.n_equal_steps = 0
- LU = None
- continue
- safety = 0.9 * (2 * NEWTON_MAXITER + 1) / (2 * NEWTON_MAXITER
- + n_iter)
- scale = atol + rtol * np.abs(y_new)
- error = error_const[order] * d
- error_norm = norm(error / scale)
- if error_norm > 1:
- factor = max(MIN_FACTOR,
- safety * error_norm ** (-1 / (order + 1)))
- h_abs *= factor
- change_D(D, order, factor)
- self.n_equal_steps = 0
- # As we didn't have problems with convergence, we don't
- # reset LU here.
- else:
- step_accepted = True
- self.n_equal_steps += 1
- self.t = t_new
- self.y = y_new
- self.h_abs = h_abs
- self.J = J
- self.LU = LU
- # Update differences. The principal relation here is
- # D^{j + 1} y_n = D^{j} y_n - D^{j} y_{n - 1}. Keep in mind that D
- # contained difference for previous interpolating polynomial and
- # d = D^{k + 1} y_n. Thus this elegant code follows.
- D[order + 2] = d - D[order + 1]
- D[order + 1] = d
- for i in reversed(range(order + 1)):
- D[i] += D[i + 1]
- if self.n_equal_steps < order + 1:
- return True, None
- if order > 1:
- error_m = error_const[order - 1] * D[order]
- error_m_norm = norm(error_m / scale)
- else:
- error_m_norm = np.inf
- if order < MAX_ORDER:
- error_p = error_const[order + 1] * D[order + 2]
- error_p_norm = norm(error_p / scale)
- else:
- error_p_norm = np.inf
- error_norms = np.array([error_m_norm, error_norm, error_p_norm])
- with np.errstate(divide='ignore'):
- factors = error_norms ** (-1 / np.arange(order, order + 3))
- delta_order = np.argmax(factors) - 1
- order += delta_order
- self.order = order
- factor = min(MAX_FACTOR, safety * np.max(factors))
- self.h_abs *= factor
- change_D(D, order, factor)
- self.n_equal_steps = 0
- self.LU = None
- return True, None
- def _dense_output_impl(self):
- return BdfDenseOutput(self.t_old, self.t, self.h_abs * self.direction,
- self.order, self.D[:self.order + 1].copy())
- class BdfDenseOutput(DenseOutput):
- def __init__(self, t_old, t, h, order, D):
- super().__init__(t_old, t)
- self.order = order
- self.t_shift = self.t - h * np.arange(self.order)
- self.denom = h * (1 + np.arange(self.order))
- self.D = D
- def _call_impl(self, t):
- if t.ndim == 0:
- x = (t - self.t_shift) / self.denom
- p = np.cumprod(x)
- else:
- x = (t - self.t_shift[:, None]) / self.denom[:, None]
- p = np.cumprod(x, axis=0)
- y = np.dot(self.D[1:].T, p)
- if y.ndim == 1:
- y += self.D[0]
- else:
- y += self.D[0, :, None]
- return y
|