_svdp.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. """
  2. Python wrapper for PROPACK
  3. --------------------------
  4. PROPACK is a collection of Fortran routines for iterative computation
  5. of partial SVDs of large matrices or linear operators.
  6. Based on BSD licensed pypropack project:
  7. http://github.com/jakevdp/pypropack
  8. Author: Jake Vanderplas <vanderplas@astro.washington.edu>
  9. PROPACK source is BSD licensed, and available at
  10. http://soi.stanford.edu/~rmunk/PROPACK/
  11. """
  12. __all__ = ['_svdp']
  13. import numpy as np
  14. from scipy._lib._util import check_random_state
  15. from scipy.sparse.linalg import aslinearoperator
  16. from scipy.linalg import LinAlgError
  17. from ._propack import _spropack # type: ignore
  18. from ._propack import _dpropack
  19. from ._propack import _cpropack
  20. from ._propack import _zpropack
  21. _lansvd_dict = {
  22. 'f': _spropack.slansvd,
  23. 'd': _dpropack.dlansvd,
  24. 'F': _cpropack.clansvd,
  25. 'D': _zpropack.zlansvd,
  26. }
  27. _lansvd_irl_dict = {
  28. 'f': _spropack.slansvd_irl,
  29. 'd': _dpropack.dlansvd_irl,
  30. 'F': _cpropack.clansvd_irl,
  31. 'D': _zpropack.zlansvd_irl,
  32. }
  33. _which_converter = {
  34. 'LM': 'L',
  35. 'SM': 'S',
  36. }
  37. class _AProd:
  38. """
  39. Wrapper class for linear operator
  40. The call signature of the __call__ method matches the callback of
  41. the PROPACK routines.
  42. """
  43. def __init__(self, A):
  44. try:
  45. self.A = aslinearoperator(A)
  46. except TypeError:
  47. self.A = aslinearoperator(np.asarray(A))
  48. def __call__(self, transa, m, n, x, y, sparm, iparm):
  49. if transa == 'n':
  50. y[:] = self.A.matvec(x)
  51. else:
  52. y[:] = self.A.rmatvec(x)
  53. @property
  54. def shape(self):
  55. return self.A.shape
  56. @property
  57. def dtype(self):
  58. try:
  59. return self.A.dtype
  60. except AttributeError:
  61. return self.A.matvec(np.zeros(self.A.shape[1])).dtype
  62. def _svdp(A, k, which='LM', irl_mode=True, kmax=None,
  63. compute_u=True, compute_v=True, v0=None, full_output=False, tol=0,
  64. delta=None, eta=None, anorm=0, cgs=False, elr=True,
  65. min_relgap=0.002, shifts=None, maxiter=None, random_state=None):
  66. """
  67. Compute the singular value decomposition of a linear operator using PROPACK
  68. Parameters
  69. ----------
  70. A : array_like, sparse matrix, or LinearOperator
  71. Operator for which SVD will be computed. If `A` is a LinearOperator
  72. object, it must define both ``matvec`` and ``rmatvec`` methods.
  73. k : int
  74. Number of singular values/vectors to compute
  75. which : {"LM", "SM"}
  76. Which singluar triplets to compute:
  77. - 'LM': compute triplets corresponding to the `k` largest singular
  78. values
  79. - 'SM': compute triplets corresponding to the `k` smallest singular
  80. values
  81. `which='SM'` requires `irl_mode=True`. Computes largest singular
  82. values by default.
  83. irl_mode : bool, optional
  84. If `True`, then compute SVD using IRL (implicitly restarted Lanczos)
  85. mode. Default is `True`.
  86. kmax : int, optional
  87. Maximal number of iterations / maximal dimension of the Krylov
  88. subspace. Default is ``10 * k``.
  89. compute_u : bool, optional
  90. If `True` (default) then compute left singular vectors, `u`.
  91. compute_v : bool, optional
  92. If `True` (default) then compute right singular vectors, `v`.
  93. tol : float, optional
  94. The desired relative accuracy for computed singular values.
  95. If not specified, it will be set based on machine precision.
  96. v0 : array_like, optional
  97. Starting vector for iterations: must be of length ``A.shape[0]``.
  98. If not specified, PROPACK will generate a starting vector.
  99. full_output : bool, optional
  100. If `True`, then return sigma_bound. Default is `False`.
  101. delta : float, optional
  102. Level of orthogonality to maintain between Lanczos vectors.
  103. Default is set based on machine precision.
  104. eta : float, optional
  105. Orthogonality cutoff. During reorthogonalization, vectors with
  106. component larger than `eta` along the Lanczos vector will be purged.
  107. Default is set based on machine precision.
  108. anorm : float, optional
  109. Estimate of ``||A||``. Default is `0`.
  110. cgs : bool, optional
  111. If `True`, reorthogonalization is done using classical Gram-Schmidt.
  112. If `False` (default), it is done using modified Gram-Schmidt.
  113. elr : bool, optional
  114. If `True` (default), then extended local orthogonality is enforced
  115. when obtaining singular vectors.
  116. min_relgap : float, optional
  117. The smallest relative gap allowed between any shift in IRL mode.
  118. Default is `0.001`. Accessed only if ``irl_mode=True``.
  119. shifts : int, optional
  120. Number of shifts per restart in IRL mode. Default is determined
  121. to satisfy ``k <= min(kmax-shifts, m, n)``. Must be
  122. >= 0, but choosing 0 might lead to performance degredation.
  123. Accessed only if ``irl_mode=True``.
  124. maxiter : int, optional
  125. Maximum number of restarts in IRL mode. Default is `1000`.
  126. Accessed only if ``irl_mode=True``.
  127. random_state : {None, int, `numpy.random.Generator`,
  128. `numpy.random.RandomState`}, optional
  129. Pseudorandom number generator state used to generate resamples.
  130. If `random_state` is ``None`` (or `np.random`), the
  131. `numpy.random.RandomState` singleton is used.
  132. If `random_state` is an int, a new ``RandomState`` instance is used,
  133. seeded with `random_state`.
  134. If `random_state` is already a ``Generator`` or ``RandomState``
  135. instance then that instance is used.
  136. Returns
  137. -------
  138. u : ndarray
  139. The `k` largest (``which="LM"``) or smallest (``which="SM"``) left
  140. singular vectors, ``shape == (A.shape[0], 3)``, returned only if
  141. ``compute_u=True``.
  142. sigma : ndarray
  143. The top `k` singular values, ``shape == (k,)``
  144. vt : ndarray
  145. The `k` largest (``which="LM"``) or smallest (``which="SM"``) right
  146. singular vectors, ``shape == (3, A.shape[1])``, returned only if
  147. ``compute_v=True``.
  148. sigma_bound : ndarray
  149. the error bounds on the singular values sigma, returned only if
  150. ``full_output=True``.
  151. """
  152. # 32-bit complex PROPACK functions have Fortran LAPACK ABI
  153. # incompatibility issues
  154. if np.iscomplexobj(A) and (np.intp(0).itemsize < 8):
  155. raise TypeError('PROPACK complex-valued SVD methods not available '
  156. 'for 32-bit builds')
  157. random_state = check_random_state(random_state)
  158. which = which.upper()
  159. if which not in {'LM', 'SM'}:
  160. raise ValueError("`which` must be either 'LM' or 'SM'")
  161. if not irl_mode and which == 'SM':
  162. raise ValueError("`which`='SM' requires irl_mode=True")
  163. aprod = _AProd(A)
  164. typ = aprod.dtype.char
  165. try:
  166. lansvd_irl = _lansvd_irl_dict[typ]
  167. lansvd = _lansvd_dict[typ]
  168. except KeyError:
  169. # work with non-supported types using native system precision
  170. if np.iscomplexobj(np.empty(0, dtype=typ)):
  171. typ = np.dtype(complex).char
  172. else:
  173. typ = np.dtype(float).char
  174. lansvd_irl = _lansvd_irl_dict[typ]
  175. lansvd = _lansvd_dict[typ]
  176. m, n = aprod.shape
  177. if (k < 1) or (k > min(m, n)):
  178. raise ValueError("k must be positive and not greater than m or n")
  179. if kmax is None:
  180. kmax = 10*k
  181. if maxiter is None:
  182. maxiter = 1000
  183. # guard against unnecessarily large kmax
  184. kmax = min(m + 1, n + 1, kmax)
  185. if kmax < k:
  186. raise ValueError(
  187. "kmax must be greater than or equal to k, "
  188. f"but kmax ({kmax}) < k ({k})")
  189. # convert python args to fortran args
  190. jobu = 'y' if compute_u else 'n'
  191. jobv = 'y' if compute_v else 'n'
  192. # these will be the output arrays
  193. u = np.zeros((m, kmax + 1), order='F', dtype=typ)
  194. v = np.zeros((n, kmax), order='F', dtype=typ)
  195. # Specify the starting vector. if v0 is all zero, PROPACK will generate
  196. # a random starting vector: the random seed cannot be controlled in that
  197. # case, so we'll instead use numpy to generate a random vector
  198. if v0 is None:
  199. u[:, 0] = random_state.uniform(size=m)
  200. if np.iscomplexobj(np.empty(0, dtype=typ)): # complex type
  201. u[:, 0] += 1j * random_state.uniform(size=m)
  202. else:
  203. try:
  204. u[:, 0] = v0
  205. except ValueError:
  206. raise ValueError(f"v0 must be of length {m}")
  207. # process options for the fit
  208. if delta is None:
  209. delta = np.sqrt(np.finfo(typ).eps)
  210. if eta is None:
  211. eta = np.finfo(typ).eps ** 0.75
  212. if irl_mode:
  213. doption = np.array((delta, eta, anorm, min_relgap), dtype=typ.lower())
  214. # validate or find default shifts
  215. if shifts is None:
  216. shifts = kmax - k
  217. if k > min(kmax - shifts, m, n):
  218. raise ValueError('shifts must satisfy '
  219. 'k <= min(kmax-shifts, m, n)!')
  220. elif shifts < 0:
  221. raise ValueError('shifts must be >= 0!')
  222. else:
  223. doption = np.array((delta, eta, anorm), dtype=typ.lower())
  224. ioption = np.array((int(bool(cgs)), int(bool(elr))), dtype='i')
  225. # If computing `u` or `v` (left and right singular vectors,
  226. # respectively), `blocksize` controls how large a fraction of the
  227. # work is done via fast BLAS level 3 operations. A larger blocksize
  228. # may lead to faster computation at the expense of greater memory
  229. # consumption. `blocksize` must be ``>= 1``. Choosing blocksize
  230. # of 16, but docs don't specify; it's almost surely a
  231. # power of 2.
  232. blocksize = 16
  233. # Determine lwork & liwork:
  234. # the required lengths are specified in the PROPACK documentation
  235. if compute_u or compute_v:
  236. lwork = m + n + 9*kmax + 5*kmax*kmax + 4 + max(
  237. 3*kmax*kmax + 4*kmax + 4,
  238. blocksize*max(m, n))
  239. liwork = 8*kmax
  240. else:
  241. lwork = m + n + 9*kmax + 2*kmax*kmax + 4 + max(m + n, 4*kmax + 4)
  242. liwork = 2*kmax + 1
  243. work = np.empty(lwork, dtype=typ.lower())
  244. iwork = np.empty(liwork, dtype=np.int32)
  245. # dummy arguments: these are passed to aprod, and not used in this wrapper
  246. dparm = np.empty(1, dtype=typ.lower())
  247. iparm = np.empty(1, dtype=np.int32)
  248. if typ.isupper():
  249. # PROPACK documentation is unclear on the required length of zwork.
  250. # Use the same length Julia's wrapper uses
  251. # see https://github.com/JuliaSmoothOptimizers/PROPACK.jl/
  252. zwork = np.empty(m + n + 32*m, dtype=typ)
  253. works = work, zwork, iwork
  254. else:
  255. works = work, iwork
  256. if irl_mode:
  257. u, sigma, bnd, v, info = lansvd_irl(_which_converter[which], jobu,
  258. jobv, m, n, shifts, k, maxiter,
  259. aprod, u, v, tol, *works, doption,
  260. ioption, dparm, iparm)
  261. else:
  262. u, sigma, bnd, v, info = lansvd(jobu, jobv, m, n, k, aprod, u, v, tol,
  263. *works, doption, ioption, dparm, iparm)
  264. if info > 0:
  265. raise LinAlgError(
  266. f"An invariant subspace of dimension {info} was found.")
  267. elif info < 0:
  268. raise LinAlgError(
  269. f"k={k} singular triplets did not converge within "
  270. f"kmax={kmax} iterations")
  271. # info == 0: The K largest (or smallest) singular triplets were computed
  272. # succesfully!
  273. return u[:, :k], sigma, v[:, :k].conj().T, bnd