_differentiable_functions.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. import numpy as np
  2. import scipy.sparse as sps
  3. from ._numdiff import approx_derivative, group_columns
  4. from ._hessian_update_strategy import HessianUpdateStrategy
  5. from scipy.sparse.linalg import LinearOperator
  6. FD_METHODS = ('2-point', '3-point', 'cs')
  7. class ScalarFunction:
  8. """Scalar function and its derivatives.
  9. This class defines a scalar function F: R^n->R and methods for
  10. computing or approximating its first and second derivatives.
  11. Parameters
  12. ----------
  13. fun : callable
  14. evaluates the scalar function. Must be of the form ``fun(x, *args)``,
  15. where ``x`` is the argument in the form of a 1-D array and ``args`` is
  16. a tuple of any additional fixed parameters needed to completely specify
  17. the function. Should return a scalar.
  18. x0 : array-like
  19. Provides an initial set of variables for evaluating fun. Array of real
  20. elements of size (n,), where 'n' is the number of independent
  21. variables.
  22. args : tuple, optional
  23. Any additional fixed parameters needed to completely specify the scalar
  24. function.
  25. grad : {callable, '2-point', '3-point', 'cs'}
  26. Method for computing the gradient vector.
  27. If it is a callable, it should be a function that returns the gradient
  28. vector:
  29. ``grad(x, *args) -> array_like, shape (n,)``
  30. where ``x`` is an array with shape (n,) and ``args`` is a tuple with
  31. the fixed parameters.
  32. Alternatively, the keywords {'2-point', '3-point', 'cs'} can be used
  33. to select a finite difference scheme for numerical estimation of the
  34. gradient with a relative step size. These finite difference schemes
  35. obey any specified `bounds`.
  36. hess : {callable, '2-point', '3-point', 'cs', HessianUpdateStrategy}
  37. Method for computing the Hessian matrix. If it is callable, it should
  38. return the Hessian matrix:
  39. ``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
  40. where x is a (n,) ndarray and `args` is a tuple with the fixed
  41. parameters. Alternatively, the keywords {'2-point', '3-point', 'cs'}
  42. select a finite difference scheme for numerical estimation. Or, objects
  43. implementing `HessianUpdateStrategy` interface can be used to
  44. approximate the Hessian.
  45. Whenever the gradient is estimated via finite-differences, the Hessian
  46. cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
  47. to be estimated using one of the quasi-Newton strategies.
  48. finite_diff_rel_step : None or array_like
  49. Relative step size to use. The absolute step size is computed as
  50. ``h = finite_diff_rel_step * sign(x0) * max(1, abs(x0))``, possibly
  51. adjusted to fit into the bounds. For ``method='3-point'`` the sign
  52. of `h` is ignored. If None then finite_diff_rel_step is selected
  53. automatically,
  54. finite_diff_bounds : tuple of array_like
  55. Lower and upper bounds on independent variables. Defaults to no bounds,
  56. (-np.inf, np.inf). Each bound must match the size of `x0` or be a
  57. scalar, in the latter case the bound will be the same for all
  58. variables. Use it to limit the range of function evaluation.
  59. epsilon : None or array_like, optional
  60. Absolute step size to use, possibly adjusted to fit into the bounds.
  61. For ``method='3-point'`` the sign of `epsilon` is ignored. By default
  62. relative steps are used, only if ``epsilon is not None`` are absolute
  63. steps used.
  64. Notes
  65. -----
  66. This class implements a memoization logic. There are methods `fun`,
  67. `grad`, hess` and corresponding attributes `f`, `g` and `H`. The following
  68. things should be considered:
  69. 1. Use only public methods `fun`, `grad` and `hess`.
  70. 2. After one of the methods is called, the corresponding attribute
  71. will be set. However, a subsequent call with a different argument
  72. of *any* of the methods may overwrite the attribute.
  73. """
  74. def __init__(self, fun, x0, args, grad, hess, finite_diff_rel_step,
  75. finite_diff_bounds, epsilon=None):
  76. if not callable(grad) and grad not in FD_METHODS:
  77. raise ValueError(
  78. f"`grad` must be either callable or one of {FD_METHODS}."
  79. )
  80. if not (callable(hess) or hess in FD_METHODS
  81. or isinstance(hess, HessianUpdateStrategy)):
  82. raise ValueError(
  83. f"`hess` must be either callable, HessianUpdateStrategy"
  84. f" or one of {FD_METHODS}."
  85. )
  86. if grad in FD_METHODS and hess in FD_METHODS:
  87. raise ValueError("Whenever the gradient is estimated via "
  88. "finite-differences, we require the Hessian "
  89. "to be estimated using one of the "
  90. "quasi-Newton strategies.")
  91. # the astype call ensures that self.x is a copy of x0
  92. self.x = np.atleast_1d(x0).astype(float)
  93. self.n = self.x.size
  94. self.nfev = 0
  95. self.ngev = 0
  96. self.nhev = 0
  97. self.f_updated = False
  98. self.g_updated = False
  99. self.H_updated = False
  100. self._lowest_x = None
  101. self._lowest_f = np.inf
  102. finite_diff_options = {}
  103. if grad in FD_METHODS:
  104. finite_diff_options["method"] = grad
  105. finite_diff_options["rel_step"] = finite_diff_rel_step
  106. finite_diff_options["abs_step"] = epsilon
  107. finite_diff_options["bounds"] = finite_diff_bounds
  108. if hess in FD_METHODS:
  109. finite_diff_options["method"] = hess
  110. finite_diff_options["rel_step"] = finite_diff_rel_step
  111. finite_diff_options["abs_step"] = epsilon
  112. finite_diff_options["as_linear_operator"] = True
  113. # Function evaluation
  114. def fun_wrapped(x):
  115. self.nfev += 1
  116. # Send a copy because the user may overwrite it.
  117. # Overwriting results in undefined behaviour because
  118. # fun(self.x) will change self.x, with the two no longer linked.
  119. fx = fun(np.copy(x), *args)
  120. # Make sure the function returns a true scalar
  121. if not np.isscalar(fx):
  122. try:
  123. fx = np.asarray(fx).item()
  124. except (TypeError, ValueError) as e:
  125. raise ValueError(
  126. "The user-provided objective function "
  127. "must return a scalar value."
  128. ) from e
  129. if fx < self._lowest_f:
  130. self._lowest_x = x
  131. self._lowest_f = fx
  132. return fx
  133. def update_fun():
  134. self.f = fun_wrapped(self.x)
  135. self._update_fun_impl = update_fun
  136. self._update_fun()
  137. # Gradient evaluation
  138. if callable(grad):
  139. def grad_wrapped(x):
  140. self.ngev += 1
  141. return np.atleast_1d(grad(np.copy(x), *args))
  142. def update_grad():
  143. self.g = grad_wrapped(self.x)
  144. elif grad in FD_METHODS:
  145. def update_grad():
  146. self._update_fun()
  147. self.ngev += 1
  148. self.g = approx_derivative(fun_wrapped, self.x, f0=self.f,
  149. **finite_diff_options)
  150. self._update_grad_impl = update_grad
  151. self._update_grad()
  152. # Hessian Evaluation
  153. if callable(hess):
  154. self.H = hess(np.copy(x0), *args)
  155. self.H_updated = True
  156. self.nhev += 1
  157. if sps.issparse(self.H):
  158. def hess_wrapped(x):
  159. self.nhev += 1
  160. return sps.csr_matrix(hess(np.copy(x), *args))
  161. self.H = sps.csr_matrix(self.H)
  162. elif isinstance(self.H, LinearOperator):
  163. def hess_wrapped(x):
  164. self.nhev += 1
  165. return hess(np.copy(x), *args)
  166. else:
  167. def hess_wrapped(x):
  168. self.nhev += 1
  169. return np.atleast_2d(np.asarray(hess(np.copy(x), *args)))
  170. self.H = np.atleast_2d(np.asarray(self.H))
  171. def update_hess():
  172. self.H = hess_wrapped(self.x)
  173. elif hess in FD_METHODS:
  174. def update_hess():
  175. self._update_grad()
  176. self.H = approx_derivative(grad_wrapped, self.x, f0=self.g,
  177. **finite_diff_options)
  178. return self.H
  179. update_hess()
  180. self.H_updated = True
  181. elif isinstance(hess, HessianUpdateStrategy):
  182. self.H = hess
  183. self.H.initialize(self.n, 'hess')
  184. self.H_updated = True
  185. self.x_prev = None
  186. self.g_prev = None
  187. def update_hess():
  188. self._update_grad()
  189. self.H.update(self.x - self.x_prev, self.g - self.g_prev)
  190. self._update_hess_impl = update_hess
  191. if isinstance(hess, HessianUpdateStrategy):
  192. def update_x(x):
  193. self._update_grad()
  194. self.x_prev = self.x
  195. self.g_prev = self.g
  196. # ensure that self.x is a copy of x. Don't store a reference
  197. # otherwise the memoization doesn't work properly.
  198. self.x = np.atleast_1d(x).astype(float)
  199. self.f_updated = False
  200. self.g_updated = False
  201. self.H_updated = False
  202. self._update_hess()
  203. else:
  204. def update_x(x):
  205. # ensure that self.x is a copy of x. Don't store a reference
  206. # otherwise the memoization doesn't work properly.
  207. self.x = np.atleast_1d(x).astype(float)
  208. self.f_updated = False
  209. self.g_updated = False
  210. self.H_updated = False
  211. self._update_x_impl = update_x
  212. def _update_fun(self):
  213. if not self.f_updated:
  214. self._update_fun_impl()
  215. self.f_updated = True
  216. def _update_grad(self):
  217. if not self.g_updated:
  218. self._update_grad_impl()
  219. self.g_updated = True
  220. def _update_hess(self):
  221. if not self.H_updated:
  222. self._update_hess_impl()
  223. self.H_updated = True
  224. def fun(self, x):
  225. if not np.array_equal(x, self.x):
  226. self._update_x_impl(x)
  227. self._update_fun()
  228. return self.f
  229. def grad(self, x):
  230. if not np.array_equal(x, self.x):
  231. self._update_x_impl(x)
  232. self._update_grad()
  233. return self.g
  234. def hess(self, x):
  235. if not np.array_equal(x, self.x):
  236. self._update_x_impl(x)
  237. self._update_hess()
  238. return self.H
  239. def fun_and_grad(self, x):
  240. if not np.array_equal(x, self.x):
  241. self._update_x_impl(x)
  242. self._update_fun()
  243. self._update_grad()
  244. return self.f, self.g
  245. class VectorFunction:
  246. """Vector function and its derivatives.
  247. This class defines a vector function F: R^n->R^m and methods for
  248. computing or approximating its first and second derivatives.
  249. Notes
  250. -----
  251. This class implements a memoization logic. There are methods `fun`,
  252. `jac`, hess` and corresponding attributes `f`, `J` and `H`. The following
  253. things should be considered:
  254. 1. Use only public methods `fun`, `jac` and `hess`.
  255. 2. After one of the methods is called, the corresponding attribute
  256. will be set. However, a subsequent call with a different argument
  257. of *any* of the methods may overwrite the attribute.
  258. """
  259. def __init__(self, fun, x0, jac, hess,
  260. finite_diff_rel_step, finite_diff_jac_sparsity,
  261. finite_diff_bounds, sparse_jacobian):
  262. if not callable(jac) and jac not in FD_METHODS:
  263. raise ValueError("`jac` must be either callable or one of {}."
  264. .format(FD_METHODS))
  265. if not (callable(hess) or hess in FD_METHODS
  266. or isinstance(hess, HessianUpdateStrategy)):
  267. raise ValueError("`hess` must be either callable,"
  268. "HessianUpdateStrategy or one of {}."
  269. .format(FD_METHODS))
  270. if jac in FD_METHODS and hess in FD_METHODS:
  271. raise ValueError("Whenever the Jacobian is estimated via "
  272. "finite-differences, we require the Hessian to "
  273. "be estimated using one of the quasi-Newton "
  274. "strategies.")
  275. self.x = np.atleast_1d(x0).astype(float)
  276. self.n = self.x.size
  277. self.nfev = 0
  278. self.njev = 0
  279. self.nhev = 0
  280. self.f_updated = False
  281. self.J_updated = False
  282. self.H_updated = False
  283. finite_diff_options = {}
  284. if jac in FD_METHODS:
  285. finite_diff_options["method"] = jac
  286. finite_diff_options["rel_step"] = finite_diff_rel_step
  287. if finite_diff_jac_sparsity is not None:
  288. sparsity_groups = group_columns(finite_diff_jac_sparsity)
  289. finite_diff_options["sparsity"] = (finite_diff_jac_sparsity,
  290. sparsity_groups)
  291. finite_diff_options["bounds"] = finite_diff_bounds
  292. self.x_diff = np.copy(self.x)
  293. if hess in FD_METHODS:
  294. finite_diff_options["method"] = hess
  295. finite_diff_options["rel_step"] = finite_diff_rel_step
  296. finite_diff_options["as_linear_operator"] = True
  297. self.x_diff = np.copy(self.x)
  298. if jac in FD_METHODS and hess in FD_METHODS:
  299. raise ValueError("Whenever the Jacobian is estimated via "
  300. "finite-differences, we require the Hessian to "
  301. "be estimated using one of the quasi-Newton "
  302. "strategies.")
  303. # Function evaluation
  304. def fun_wrapped(x):
  305. self.nfev += 1
  306. return np.atleast_1d(fun(x))
  307. def update_fun():
  308. self.f = fun_wrapped(self.x)
  309. self._update_fun_impl = update_fun
  310. update_fun()
  311. self.v = np.zeros_like(self.f)
  312. self.m = self.v.size
  313. # Jacobian Evaluation
  314. if callable(jac):
  315. self.J = jac(self.x)
  316. self.J_updated = True
  317. self.njev += 1
  318. if (sparse_jacobian or
  319. sparse_jacobian is None and sps.issparse(self.J)):
  320. def jac_wrapped(x):
  321. self.njev += 1
  322. return sps.csr_matrix(jac(x))
  323. self.J = sps.csr_matrix(self.J)
  324. self.sparse_jacobian = True
  325. elif sps.issparse(self.J):
  326. def jac_wrapped(x):
  327. self.njev += 1
  328. return jac(x).toarray()
  329. self.J = self.J.toarray()
  330. self.sparse_jacobian = False
  331. else:
  332. def jac_wrapped(x):
  333. self.njev += 1
  334. return np.atleast_2d(jac(x))
  335. self.J = np.atleast_2d(self.J)
  336. self.sparse_jacobian = False
  337. def update_jac():
  338. self.J = jac_wrapped(self.x)
  339. elif jac in FD_METHODS:
  340. self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
  341. **finite_diff_options)
  342. self.J_updated = True
  343. if (sparse_jacobian or
  344. sparse_jacobian is None and sps.issparse(self.J)):
  345. def update_jac():
  346. self._update_fun()
  347. self.J = sps.csr_matrix(
  348. approx_derivative(fun_wrapped, self.x, f0=self.f,
  349. **finite_diff_options))
  350. self.J = sps.csr_matrix(self.J)
  351. self.sparse_jacobian = True
  352. elif sps.issparse(self.J):
  353. def update_jac():
  354. self._update_fun()
  355. self.J = approx_derivative(fun_wrapped, self.x, f0=self.f,
  356. **finite_diff_options).toarray()
  357. self.J = self.J.toarray()
  358. self.sparse_jacobian = False
  359. else:
  360. def update_jac():
  361. self._update_fun()
  362. self.J = np.atleast_2d(
  363. approx_derivative(fun_wrapped, self.x, f0=self.f,
  364. **finite_diff_options))
  365. self.J = np.atleast_2d(self.J)
  366. self.sparse_jacobian = False
  367. self._update_jac_impl = update_jac
  368. # Define Hessian
  369. if callable(hess):
  370. self.H = hess(self.x, self.v)
  371. self.H_updated = True
  372. self.nhev += 1
  373. if sps.issparse(self.H):
  374. def hess_wrapped(x, v):
  375. self.nhev += 1
  376. return sps.csr_matrix(hess(x, v))
  377. self.H = sps.csr_matrix(self.H)
  378. elif isinstance(self.H, LinearOperator):
  379. def hess_wrapped(x, v):
  380. self.nhev += 1
  381. return hess(x, v)
  382. else:
  383. def hess_wrapped(x, v):
  384. self.nhev += 1
  385. return np.atleast_2d(np.asarray(hess(x, v)))
  386. self.H = np.atleast_2d(np.asarray(self.H))
  387. def update_hess():
  388. self.H = hess_wrapped(self.x, self.v)
  389. elif hess in FD_METHODS:
  390. def jac_dot_v(x, v):
  391. return jac_wrapped(x).T.dot(v)
  392. def update_hess():
  393. self._update_jac()
  394. self.H = approx_derivative(jac_dot_v, self.x,
  395. f0=self.J.T.dot(self.v),
  396. args=(self.v,),
  397. **finite_diff_options)
  398. update_hess()
  399. self.H_updated = True
  400. elif isinstance(hess, HessianUpdateStrategy):
  401. self.H = hess
  402. self.H.initialize(self.n, 'hess')
  403. self.H_updated = True
  404. self.x_prev = None
  405. self.J_prev = None
  406. def update_hess():
  407. self._update_jac()
  408. # When v is updated before x was updated, then x_prev and
  409. # J_prev are None and we need this check.
  410. if self.x_prev is not None and self.J_prev is not None:
  411. delta_x = self.x - self.x_prev
  412. delta_g = self.J.T.dot(self.v) - self.J_prev.T.dot(self.v)
  413. self.H.update(delta_x, delta_g)
  414. self._update_hess_impl = update_hess
  415. if isinstance(hess, HessianUpdateStrategy):
  416. def update_x(x):
  417. self._update_jac()
  418. self.x_prev = self.x
  419. self.J_prev = self.J
  420. self.x = np.atleast_1d(x).astype(float)
  421. self.f_updated = False
  422. self.J_updated = False
  423. self.H_updated = False
  424. self._update_hess()
  425. else:
  426. def update_x(x):
  427. self.x = np.atleast_1d(x).astype(float)
  428. self.f_updated = False
  429. self.J_updated = False
  430. self.H_updated = False
  431. self._update_x_impl = update_x
  432. def _update_v(self, v):
  433. if not np.array_equal(v, self.v):
  434. self.v = v
  435. self.H_updated = False
  436. def _update_x(self, x):
  437. if not np.array_equal(x, self.x):
  438. self._update_x_impl(x)
  439. def _update_fun(self):
  440. if not self.f_updated:
  441. self._update_fun_impl()
  442. self.f_updated = True
  443. def _update_jac(self):
  444. if not self.J_updated:
  445. self._update_jac_impl()
  446. self.J_updated = True
  447. def _update_hess(self):
  448. if not self.H_updated:
  449. self._update_hess_impl()
  450. self.H_updated = True
  451. def fun(self, x):
  452. self._update_x(x)
  453. self._update_fun()
  454. return self.f
  455. def jac(self, x):
  456. self._update_x(x)
  457. self._update_jac()
  458. return self.J
  459. def hess(self, x, v):
  460. # v should be updated before x.
  461. self._update_v(v)
  462. self._update_x(x)
  463. self._update_hess()
  464. return self.H
  465. class LinearVectorFunction:
  466. """Linear vector function and its derivatives.
  467. Defines a linear function F = A x, where x is N-D vector and
  468. A is m-by-n matrix. The Jacobian is constant and equals to A. The Hessian
  469. is identically zero and it is returned as a csr matrix.
  470. """
  471. def __init__(self, A, x0, sparse_jacobian):
  472. if sparse_jacobian or sparse_jacobian is None and sps.issparse(A):
  473. self.J = sps.csr_matrix(A)
  474. self.sparse_jacobian = True
  475. elif sps.issparse(A):
  476. self.J = A.toarray()
  477. self.sparse_jacobian = False
  478. else:
  479. # np.asarray makes sure A is ndarray and not matrix
  480. self.J = np.atleast_2d(np.asarray(A))
  481. self.sparse_jacobian = False
  482. self.m, self.n = self.J.shape
  483. self.x = np.atleast_1d(x0).astype(float)
  484. self.f = self.J.dot(self.x)
  485. self.f_updated = True
  486. self.v = np.zeros(self.m, dtype=float)
  487. self.H = sps.csr_matrix((self.n, self.n))
  488. def _update_x(self, x):
  489. if not np.array_equal(x, self.x):
  490. self.x = np.atleast_1d(x).astype(float)
  491. self.f_updated = False
  492. def fun(self, x):
  493. self._update_x(x)
  494. if not self.f_updated:
  495. self.f = self.J.dot(x)
  496. self.f_updated = True
  497. return self.f
  498. def jac(self, x):
  499. self._update_x(x)
  500. return self.J
  501. def hess(self, x, v):
  502. self._update_x(x)
  503. self.v = v
  504. return self.H
  505. class IdentityVectorFunction(LinearVectorFunction):
  506. """Identity vector function and its derivatives.
  507. The Jacobian is the identity matrix, returned as a dense array when
  508. `sparse_jacobian=False` and as a csr matrix otherwise. The Hessian is
  509. identically zero and it is returned as a csr matrix.
  510. """
  511. def __init__(self, x0, sparse_jacobian):
  512. n = len(x0)
  513. if sparse_jacobian or sparse_jacobian is None:
  514. A = sps.eye(n, format='csr')
  515. sparse_jacobian = True
  516. else:
  517. A = np.eye(n)
  518. sparse_jacobian = False
  519. super().__init__(A, x0, sparse_jacobian)