488 lines
15 KiB
Python
488 lines
15 KiB
Python
|
# Copyright (C) 2015, Pauli Virtanen <pav@iki.fi>
|
||
|
# Distributed under the same license as Scipy.
|
||
|
|
||
|
from __future__ import division, print_function, absolute_import
|
||
|
|
||
|
import warnings
|
||
|
import numpy as np
|
||
|
from numpy.linalg import LinAlgError
|
||
|
from scipy._lib.six import xrange
|
||
|
from scipy.linalg import (get_blas_funcs, qr, solve, svd, qr_insert, lstsq)
|
||
|
from scipy.sparse.linalg.isolve.utils import make_system
|
||
|
|
||
|
|
||
|
__all__ = ['gcrotmk']
|
||
|
|
||
|
|
||
|
def _fgmres(matvec, v0, m, atol, lpsolve=None, rpsolve=None, cs=(), outer_v=(),
|
||
|
prepend_outer_v=False):
|
||
|
"""
|
||
|
FGMRES Arnoldi process, with optional projection or augmentation
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
matvec : callable
|
||
|
Operation A*x
|
||
|
v0 : ndarray
|
||
|
Initial vector, normalized to nrm2(v0) == 1
|
||
|
m : int
|
||
|
Number of GMRES rounds
|
||
|
atol : float
|
||
|
Absolute tolerance for early exit
|
||
|
lpsolve : callable
|
||
|
Left preconditioner L
|
||
|
rpsolve : callable
|
||
|
Right preconditioner R
|
||
|
CU : list of (ndarray, ndarray)
|
||
|
Columns of matrices C and U in GCROT
|
||
|
outer_v : list of ndarrays
|
||
|
Augmentation vectors in LGMRES
|
||
|
prepend_outer_v : bool, optional
|
||
|
Whether augmentation vectors come before or after
|
||
|
Krylov iterates
|
||
|
|
||
|
Raises
|
||
|
------
|
||
|
LinAlgError
|
||
|
If nans encountered
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Q, R : ndarray
|
||
|
QR decomposition of the upper Hessenberg H=QR
|
||
|
B : ndarray
|
||
|
Projections corresponding to matrix C
|
||
|
vs : list of ndarray
|
||
|
Columns of matrix V
|
||
|
zs : list of ndarray
|
||
|
Columns of matrix Z
|
||
|
y : ndarray
|
||
|
Solution to ||H y - e_1||_2 = min!
|
||
|
res : float
|
||
|
The final (preconditioned) residual norm
|
||
|
|
||
|
"""
|
||
|
|
||
|
if lpsolve is None:
|
||
|
lpsolve = lambda x: x
|
||
|
if rpsolve is None:
|
||
|
rpsolve = lambda x: x
|
||
|
|
||
|
axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'], (v0,))
|
||
|
|
||
|
vs = [v0]
|
||
|
zs = []
|
||
|
y = None
|
||
|
res = np.nan
|
||
|
|
||
|
m = m + len(outer_v)
|
||
|
|
||
|
# Orthogonal projection coefficients
|
||
|
B = np.zeros((len(cs), m), dtype=v0.dtype)
|
||
|
|
||
|
# H is stored in QR factorized form
|
||
|
Q = np.ones((1, 1), dtype=v0.dtype)
|
||
|
R = np.zeros((1, 0), dtype=v0.dtype)
|
||
|
|
||
|
eps = np.finfo(v0.dtype).eps
|
||
|
|
||
|
breakdown = False
|
||
|
|
||
|
# FGMRES Arnoldi process
|
||
|
for j in xrange(m):
|
||
|
# L A Z = C B + V H
|
||
|
|
||
|
if prepend_outer_v and j < len(outer_v):
|
||
|
z, w = outer_v[j]
|
||
|
elif prepend_outer_v and j == len(outer_v):
|
||
|
z = rpsolve(v0)
|
||
|
w = None
|
||
|
elif not prepend_outer_v and j >= m - len(outer_v):
|
||
|
z, w = outer_v[j - (m - len(outer_v))]
|
||
|
else:
|
||
|
z = rpsolve(vs[-1])
|
||
|
w = None
|
||
|
|
||
|
if w is None:
|
||
|
w = lpsolve(matvec(z))
|
||
|
else:
|
||
|
# w is clobbered below
|
||
|
w = w.copy()
|
||
|
|
||
|
w_norm = nrm2(w)
|
||
|
|
||
|
# GCROT projection: L A -> (1 - C C^H) L A
|
||
|
# i.e. orthogonalize against C
|
||
|
for i, c in enumerate(cs):
|
||
|
alpha = dot(c, w)
|
||
|
B[i,j] = alpha
|
||
|
w = axpy(c, w, c.shape[0], -alpha) # w -= alpha*c
|
||
|
|
||
|
# Orthogonalize against V
|
||
|
hcur = np.zeros(j+2, dtype=Q.dtype)
|
||
|
for i, v in enumerate(vs):
|
||
|
alpha = dot(v, w)
|
||
|
hcur[i] = alpha
|
||
|
w = axpy(v, w, v.shape[0], -alpha) # w -= alpha*v
|
||
|
hcur[i+1] = nrm2(w)
|
||
|
|
||
|
with np.errstate(over='ignore', divide='ignore'):
|
||
|
# Careful with denormals
|
||
|
alpha = 1/hcur[-1]
|
||
|
|
||
|
if np.isfinite(alpha):
|
||
|
w = scal(alpha, w)
|
||
|
|
||
|
if not (hcur[-1] > eps * w_norm):
|
||
|
# w essentially in the span of previous vectors,
|
||
|
# or we have nans. Bail out after updating the QR
|
||
|
# solution.
|
||
|
breakdown = True
|
||
|
|
||
|
vs.append(w)
|
||
|
zs.append(z)
|
||
|
|
||
|
# Arnoldi LSQ problem
|
||
|
|
||
|
# Add new column to H=Q*R, padding other columns with zeros
|
||
|
Q2 = np.zeros((j+2, j+2), dtype=Q.dtype, order='F')
|
||
|
Q2[:j+1,:j+1] = Q
|
||
|
Q2[j+1,j+1] = 1
|
||
|
|
||
|
R2 = np.zeros((j+2, j), dtype=R.dtype, order='F')
|
||
|
R2[:j+1,:] = R
|
||
|
|
||
|
Q, R = qr_insert(Q2, R2, hcur, j, which='col',
|
||
|
overwrite_qru=True, check_finite=False)
|
||
|
|
||
|
# Transformed least squares problem
|
||
|
# || Q R y - inner_res_0 * e_1 ||_2 = min!
|
||
|
# Since R = [R'; 0], solution is y = inner_res_0 (R')^{-1} (Q^H)[:j,0]
|
||
|
|
||
|
# Residual is immediately known
|
||
|
res = abs(Q[0,-1])
|
||
|
|
||
|
# Check for termination
|
||
|
if res < atol or breakdown:
|
||
|
break
|
||
|
|
||
|
if not np.isfinite(R[j,j]):
|
||
|
# nans encountered, bail out
|
||
|
raise LinAlgError()
|
||
|
|
||
|
# -- Get the LSQ problem solution
|
||
|
|
||
|
# The problem is triangular, but the condition number may be
|
||
|
# bad (or in case of breakdown the last diagonal entry may be
|
||
|
# zero), so use lstsq instead of trtrs.
|
||
|
y, _, _, _, = lstsq(R[:j+1,:j+1], Q[0,:j+1].conj())
|
||
|
|
||
|
B = B[:,:j+1]
|
||
|
|
||
|
return Q, R, B, vs, zs, y, res
|
||
|
|
||
|
|
||
|
def gcrotmk(A, b, x0=None, tol=1e-5, maxiter=1000, M=None, callback=None,
|
||
|
m=20, k=None, CU=None, discard_C=False, truncate='oldest',
|
||
|
atol=None):
|
||
|
"""
|
||
|
Solve a matrix equation using flexible GCROT(m,k) algorithm.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
A : {sparse matrix, dense matrix, LinearOperator}
|
||
|
The real or complex N-by-N matrix of the linear system.
|
||
|
b : {array, matrix}
|
||
|
Right hand side of the linear system. Has shape (N,) or (N,1).
|
||
|
x0 : {array, matrix}
|
||
|
Starting guess for the solution.
|
||
|
tol, atol : float, optional
|
||
|
Tolerances for convergence, ``norm(residual) <= max(tol*norm(b), atol)``.
|
||
|
The default for ``atol`` is `tol`.
|
||
|
|
||
|
.. warning::
|
||
|
|
||
|
The default value for `atol` will be changed in a future release.
|
||
|
For future compatibility, specify `atol` explicitly.
|
||
|
maxiter : int, optional
|
||
|
Maximum number of iterations. Iteration will stop after maxiter
|
||
|
steps even if the specified tolerance has not been achieved.
|
||
|
M : {sparse matrix, dense matrix, LinearOperator}, optional
|
||
|
Preconditioner for A. The preconditioner should approximate the
|
||
|
inverse of A. gcrotmk is a 'flexible' algorithm and the preconditioner
|
||
|
can vary from iteration to iteration. Effective preconditioning
|
||
|
dramatically improves the rate of convergence, which implies that
|
||
|
fewer iterations are needed to reach a given error tolerance.
|
||
|
callback : function, optional
|
||
|
User-supplied function to call after each iteration. It is called
|
||
|
as callback(xk), where xk is the current solution vector.
|
||
|
m : int, optional
|
||
|
Number of inner FGMRES iterations per each outer iteration.
|
||
|
Default: 20
|
||
|
k : int, optional
|
||
|
Number of vectors to carry between inner FGMRES iterations.
|
||
|
According to [2]_, good values are around m.
|
||
|
Default: m
|
||
|
CU : list of tuples, optional
|
||
|
List of tuples ``(c, u)`` which contain the columns of the matrices
|
||
|
C and U in the GCROT(m,k) algorithm. For details, see [2]_.
|
||
|
The list given and vectors contained in it are modified in-place.
|
||
|
If not given, start from empty matrices. The ``c`` elements in the
|
||
|
tuples can be ``None``, in which case the vectors are recomputed
|
||
|
via ``c = A u`` on start and orthogonalized as described in [3]_.
|
||
|
discard_C : bool, optional
|
||
|
Discard the C-vectors at the end. Useful if recycling Krylov subspaces
|
||
|
for different linear systems.
|
||
|
truncate : {'oldest', 'smallest'}, optional
|
||
|
Truncation scheme to use. Drop: oldest vectors, or vectors with
|
||
|
smallest singular values using the scheme discussed in [1,2].
|
||
|
See [2]_ for detailed comparison.
|
||
|
Default: 'oldest'
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
x : array or matrix
|
||
|
The solution found.
|
||
|
info : int
|
||
|
Provides convergence information:
|
||
|
|
||
|
* 0 : successful exit
|
||
|
* >0 : convergence to tolerance not achieved, number of iterations
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] E. de Sturler, ''Truncation strategies for optimal Krylov subspace
|
||
|
methods'', SIAM J. Numer. Anal. 36, 864 (1999).
|
||
|
.. [2] J.E. Hicken and D.W. Zingg, ''A simplified and flexible variant
|
||
|
of GCROT for solving nonsymmetric linear systems'',
|
||
|
SIAM J. Sci. Comput. 32, 172 (2010).
|
||
|
.. [3] M.L. Parks, E. de Sturler, G. Mackey, D.D. Johnson, S. Maiti,
|
||
|
''Recycling Krylov subspaces for sequences of linear systems'',
|
||
|
SIAM J. Sci. Comput. 28, 1651 (2006).
|
||
|
|
||
|
"""
|
||
|
A,M,x,b,postprocess = make_system(A,M,x0,b)
|
||
|
|
||
|
if not np.isfinite(b).all():
|
||
|
raise ValueError("RHS must contain only finite numbers")
|
||
|
|
||
|
if truncate not in ('oldest', 'smallest'):
|
||
|
raise ValueError("Invalid value for 'truncate': %r" % (truncate,))
|
||
|
|
||
|
if atol is None:
|
||
|
warnings.warn("scipy.sparse.linalg.gcrotmk called without specifying `atol`. "
|
||
|
"The default value will change in the future. To preserve "
|
||
|
"current behavior, set ``atol=tol``.",
|
||
|
category=DeprecationWarning, stacklevel=2)
|
||
|
atol = tol
|
||
|
|
||
|
matvec = A.matvec
|
||
|
psolve = M.matvec
|
||
|
|
||
|
if CU is None:
|
||
|
CU = []
|
||
|
|
||
|
if k is None:
|
||
|
k = m
|
||
|
|
||
|
axpy, dot, scal = None, None, None
|
||
|
|
||
|
r = b - matvec(x)
|
||
|
|
||
|
axpy, dot, scal, nrm2 = get_blas_funcs(['axpy', 'dot', 'scal', 'nrm2'], (x, r))
|
||
|
|
||
|
b_norm = nrm2(b)
|
||
|
|
||
|
if discard_C:
|
||
|
CU[:] = [(None, u) for c, u in CU]
|
||
|
|
||
|
# Reorthogonalize old vectors
|
||
|
if CU:
|
||
|
# Sort already existing vectors to the front
|
||
|
CU.sort(key=lambda cu: cu[0] is not None)
|
||
|
|
||
|
# Fill-in missing ones
|
||
|
C = np.empty((A.shape[0], len(CU)), dtype=r.dtype, order='F')
|
||
|
us = []
|
||
|
j = 0
|
||
|
while CU:
|
||
|
# More memory-efficient: throw away old vectors as we go
|
||
|
c, u = CU.pop(0)
|
||
|
if c is None:
|
||
|
c = matvec(u)
|
||
|
C[:,j] = c
|
||
|
j += 1
|
||
|
us.append(u)
|
||
|
|
||
|
# Orthogonalize
|
||
|
Q, R, P = qr(C, overwrite_a=True, mode='economic', pivoting=True)
|
||
|
del C
|
||
|
|
||
|
# C := Q
|
||
|
cs = list(Q.T)
|
||
|
|
||
|
# U := U P R^-1, back-substitution
|
||
|
new_us = []
|
||
|
for j in xrange(len(cs)):
|
||
|
u = us[P[j]]
|
||
|
for i in xrange(j):
|
||
|
u = axpy(us[P[i]], u, u.shape[0], -R[i,j])
|
||
|
if abs(R[j,j]) < 1e-12 * abs(R[0,0]):
|
||
|
# discard rest of the vectors
|
||
|
break
|
||
|
u = scal(1.0/R[j,j], u)
|
||
|
new_us.append(u)
|
||
|
|
||
|
# Form the new CU lists
|
||
|
CU[:] = list(zip(cs, new_us))[::-1]
|
||
|
|
||
|
if CU:
|
||
|
axpy, dot = get_blas_funcs(['axpy', 'dot'], (r,))
|
||
|
|
||
|
# Solve first the projection operation with respect to the CU
|
||
|
# vectors. This corresponds to modifying the initial guess to
|
||
|
# be
|
||
|
#
|
||
|
# x' = x + U y
|
||
|
# y = argmin_y || b - A (x + U y) ||^2
|
||
|
#
|
||
|
# The solution is y = C^H (b - A x)
|
||
|
for c, u in CU:
|
||
|
yc = dot(c, r)
|
||
|
x = axpy(u, x, x.shape[0], yc)
|
||
|
r = axpy(c, r, r.shape[0], -yc)
|
||
|
|
||
|
# GCROT main iteration
|
||
|
for j_outer in xrange(maxiter):
|
||
|
# -- callback
|
||
|
if callback is not None:
|
||
|
callback(x)
|
||
|
|
||
|
beta = nrm2(r)
|
||
|
|
||
|
# -- check stopping condition
|
||
|
beta_tol = max(atol, tol * b_norm)
|
||
|
|
||
|
if beta <= beta_tol and (j_outer > 0 or CU):
|
||
|
# recompute residual to avoid rounding error
|
||
|
r = b - matvec(x)
|
||
|
beta = nrm2(r)
|
||
|
|
||
|
if beta <= beta_tol:
|
||
|
j_outer = -1
|
||
|
break
|
||
|
|
||
|
ml = m + max(k - len(CU), 0)
|
||
|
|
||
|
cs = [c for c, u in CU]
|
||
|
|
||
|
try:
|
||
|
Q, R, B, vs, zs, y, pres = _fgmres(matvec,
|
||
|
r/beta,
|
||
|
ml,
|
||
|
rpsolve=psolve,
|
||
|
atol=max(atol, tol*b_norm)/beta,
|
||
|
cs=cs)
|
||
|
y *= beta
|
||
|
except LinAlgError:
|
||
|
# Floating point over/underflow, non-finite result from
|
||
|
# matmul etc. -- report failure.
|
||
|
break
|
||
|
|
||
|
#
|
||
|
# At this point,
|
||
|
#
|
||
|
# [A U, A Z] = [C, V] G; G = [ I B ]
|
||
|
# [ 0 H ]
|
||
|
#
|
||
|
# where [C, V] has orthonormal columns, and r = beta v_0. Moreover,
|
||
|
#
|
||
|
# || b - A (x + Z y + U q) ||_2 = || r - C B y - V H y - C q ||_2 = min!
|
||
|
#
|
||
|
# from which y = argmin_y || beta e_1 - H y ||_2, and q = -B y
|
||
|
#
|
||
|
|
||
|
#
|
||
|
# GCROT(m,k) update
|
||
|
#
|
||
|
|
||
|
# Define new outer vectors
|
||
|
|
||
|
# ux := (Z - U B) y
|
||
|
ux = zs[0]*y[0]
|
||
|
for z, yc in zip(zs[1:], y[1:]):
|
||
|
ux = axpy(z, ux, ux.shape[0], yc) # ux += z*yc
|
||
|
by = B.dot(y)
|
||
|
for cu, byc in zip(CU, by):
|
||
|
c, u = cu
|
||
|
ux = axpy(u, ux, ux.shape[0], -byc) # ux -= u*byc
|
||
|
|
||
|
# cx := V H y
|
||
|
hy = Q.dot(R.dot(y))
|
||
|
cx = vs[0] * hy[0]
|
||
|
for v, hyc in zip(vs[1:], hy[1:]):
|
||
|
cx = axpy(v, cx, cx.shape[0], hyc) # cx += v*hyc
|
||
|
|
||
|
# Normalize cx, maintaining cx = A ux
|
||
|
# This new cx is orthogonal to the previous C, by construction
|
||
|
try:
|
||
|
alpha = 1/nrm2(cx)
|
||
|
if not np.isfinite(alpha):
|
||
|
raise FloatingPointError()
|
||
|
except (FloatingPointError, ZeroDivisionError):
|
||
|
# Cannot update, so skip it
|
||
|
continue
|
||
|
|
||
|
cx = scal(alpha, cx)
|
||
|
ux = scal(alpha, ux)
|
||
|
|
||
|
# Update residual and solution
|
||
|
gamma = dot(cx, r)
|
||
|
r = axpy(cx, r, r.shape[0], -gamma) # r -= gamma*cx
|
||
|
x = axpy(ux, x, x.shape[0], gamma) # x += gamma*ux
|
||
|
|
||
|
# Truncate CU
|
||
|
if truncate == 'oldest':
|
||
|
while len(CU) >= k and CU:
|
||
|
del CU[0]
|
||
|
elif truncate == 'smallest':
|
||
|
if len(CU) >= k and CU:
|
||
|
# cf. [1,2]
|
||
|
D = solve(R[:-1,:].T, B.T).T
|
||
|
W, sigma, V = svd(D)
|
||
|
|
||
|
# C := C W[:,:k-1], U := U W[:,:k-1]
|
||
|
new_CU = []
|
||
|
for j, w in enumerate(W[:,:k-1].T):
|
||
|
c, u = CU[0]
|
||
|
c = c * w[0]
|
||
|
u = u * w[0]
|
||
|
for cup, wp in zip(CU[1:], w[1:]):
|
||
|
cp, up = cup
|
||
|
c = axpy(cp, c, c.shape[0], wp)
|
||
|
u = axpy(up, u, u.shape[0], wp)
|
||
|
|
||
|
# Reorthogonalize at the same time; not necessary
|
||
|
# in exact arithmetic, but floating point error
|
||
|
# tends to accumulate here
|
||
|
for cp, up in new_CU:
|
||
|
alpha = dot(cp, c)
|
||
|
c = axpy(cp, c, c.shape[0], -alpha)
|
||
|
u = axpy(up, u, u.shape[0], -alpha)
|
||
|
alpha = nrm2(c)
|
||
|
c = scal(1.0/alpha, c)
|
||
|
u = scal(1.0/alpha, u)
|
||
|
|
||
|
new_CU.append((c, u))
|
||
|
CU[:] = new_CU
|
||
|
|
||
|
# Add new vector to CU
|
||
|
CU.append((cx, ux))
|
||
|
|
||
|
# Include the solution vector to the span
|
||
|
CU.append((None, x.copy()))
|
||
|
if discard_C:
|
||
|
CU[:] = [(None, uz) for cz, uz in CU]
|
||
|
|
||
|
return postprocess(x), j_outer + 1
|