Source code for probnum.linalg.linearsolvers.linearsolvers

"""Probabilistic numerical methods for solving linear systems.

This module provides routines to solve linear systems of equations in a
Bayesian framework. This means that a prior distribution over elements
of the linear system can be provided and is updated with information
collected by the solvers to return a posterior distribution.
"""

import warnings
from typing import Callable, Dict, Optional, Tuple, Union

import numpy as np
import scipy.sparse

from probnum import linops, randvars, utils
from probnum.linalg.linearsolvers.matrixbased import (
    AsymmetricMatrixBasedSolver,
    NoisySymmetricMatrixBasedSolver,
    SymmetricMatrixBasedSolver,
)
from probnum.linalg.linearsolvers.solutionbased import SolutionBasedSolver

# Type aliases
SquareLinOp = Union[
    np.ndarray, scipy.sparse.spmatrix, linops.LinearOperator, "randvars.RandomVariable"
]
RandomVecMat = Union[np.ndarray, "randvars.RandomVariable"]


[docs]def problinsolve(
    A: SquareLinOp,
    b: RandomVecMat,
    A0: Optional[SquareLinOp] = None,
    Ainv0: Optional[SquareLinOp] = None,
    x0: Optional[RandomVecMat] = None,
    assume_A: str = "sympos",
    maxiter: Optional[int] = None,
    atol: float = 10 ** -6,
    rtol: float = 10 ** -6,
    callback: Optional[Callable] = None,
    **kwargs
) -> Tuple[
    "randvars.RandomVariable",
    "randvars.RandomVariable",
    "randvars.RandomVariable",
    Dict,
]:
    """Infer a solution to the linear system :math:`A x = b` in a Bayesian framework.

    Probabilistic linear solvers infer solutions to problems of the form

    .. math:: Ax=b,

    where :math:`A \\in \\mathbb{R}^{n \\times n}` and :math:`b \\in \\mathbb{R}^{n}`.
    They return a probability measure which quantifies uncertainty in the output arising
    from finite computational resources. This solver can take prior information either
    on the linear operator :math:`A` or its inverse :math:`H=A^{-1}` in the form of a
    random variable ``A0`` or ``Ainv0`` and outputs a posterior belief over :math:`A` or
    :math:`H`. This code implements the method described in Wenger et al. [1]_ based on
    the work in Hennig et al. [2]_.

    Parameters
    ----------
    A :
        *shape=(n, n)* -- A square linear operator (or matrix). Only matrix-vector
        products :math:`v \\mapsto Av` are used internally.
    b :
        *shape=(n, ) or (n, nrhs)* -- Right-hand side vector, matrix or random
        variable in :math:`A x = b`. For multiple right hand sides, ``nrhs`` problems
        are solved sequentially with the posteriors over the matrices acting as priors
        for subsequent solves. If the right-hand-side is assumed to be noisy, every
        iteration of the solver samples a realization from ``b``.
    A0 :
        *shape=(n, n)* -- A square matrix, linear operator or random variable
        representing the prior belief over the linear operator :math:`A`. If an array or
        linear operator is given, a prior distribution is chosen automatically.
    Ainv0 :
        *shape=(n, n)* -- A square matrix, linear operator or random variable
        representing the prior belief over the inverse :math:`H=A^{-1}`. This can be
        viewed as taking the form of a pre-conditioner. If an array or linear operator
        is given, a prior distribution is chosen automatically.
    x0 :
        *shape=(n, ) or (n, nrhs)* -- Prior belief for the solution of the linear
        system. Will be ignored if ``Ainv0`` is given.
    assume_A :
        Assumptions on the linear operator which can influence solver choice and
        behavior. The available options are (combinations of)

        ====================  =========
         generic matrix       ``gen``
         symmetric            ``sym``
         positive definite    ``pos``
         (additive) noise     ``noise``
        ====================  =========

    maxiter :
        Maximum number of iterations. Defaults to :math:`10n`, where :math:`n` is the
        dimension of :math:`A`.
    atol :
        Absolute convergence tolerance.
    rtol :
        Relative convergence tolerance.
    callback :
        User-supplied function called after each iteration of the linear solver. It is
        called as ``callback(xk, Ak, Ainvk, sk, yk, alphak, resid, **kwargs)`` and can
        be used to return quantities from the iteration. Note that depending on the
        function supplied, this can slow down the solver considerably.
    kwargs : optional
        Optional keyword arguments passed onto the solver iteration.

    Returns
    -------
    x :
        Approximate solution :math:`x` to the linear system. Shape of the return matches
        the shape of ``b``.
    A :
        Posterior belief over the linear operator.
    Ainv :
        Posterior belief over the linear operator inverse :math:`H=A^{-1}`.
    info :
        Information on convergence of the solver.

    Raises
    ------
    ValueError
        If size mismatches detected or input matrices are not square.
    LinAlgError
        If the matrix ``A`` is singular.
    LinAlgWarning
        If an ill-conditioned input ``A`` is detected.

    Notes
    -----
    For a specific class of priors the posterior mean of :math:`x_k=Hb` coincides with
    the iterates of the conjugate gradient method. The matrix-based view taken here
    recovers the solution-based inference of :func:`bayescg` [3]_.

    References
    ----------
    .. [1] Wenger, J. and Hennig, P., Probabilistic Linear Solvers for Machine Learning,
       *Advances in Neural Information Processing Systems (NeurIPS)*, 2020
    .. [2] Hennig, P., Probabilistic Interpretation of Linear Solvers, *SIAM Journal on
       Optimization*, 2015, 25, 234-260
    .. [3] Bartels, S. et al., Probabilistic Linear Solvers: A Unifying View,
       *Statistics and Computing*, 2019

    See Also
    --------
    bayescg : Solve linear systems with prior information on the solution.

    Examples
    --------
    >>> import numpy as np
    >>> np.random.seed(1)
    >>> n = 20
    >>> A = np.random.rand(n, n)
    >>> A = 0.5 * (A + A.T) + 5 * np.eye(n)
    >>> b = np.random.rand(n)
    >>> x, A, Ainv, info = problinsolve(A=A, b=b)
    >>> print(info["iter"])
    9
    """

    # Check linear system for type and dimension mismatch
    _check_linear_system(A=A, b=b, A0=A0, Ainv0=Ainv0, x0=x0)

    # Check matrix assumptions for correctness
    assume_A = assume_A.lower()
    _assume_A_tmp = assume_A
    for allowed_str in ["gen", "sym", "pos", "noise"]:
        _assume_A_tmp = _assume_A_tmp.replace(allowed_str, "")
    if _assume_A_tmp != "":
        raise ValueError(
            "Assumption '{}' contains unrecognized linear operator properties.".format(
                assume_A
            )
        )

    # Transform the linear system to an appropriate form
    A, b, x0 = _preprocess_linear_system(A=A, b=b, x0=x0)

    # Parameter initialization
    n = A.shape[0]
    nrhs = b.shape[1]
    x = x0
    info = {}

    # Set convergence parameters
    if maxiter is None:
        maxiter = n * 10

    if nrhs > 1:
        # Iteratively solve for multiple right hand sides (with posteriors as new
        # priors)
        for i in range(nrhs):
            if i > 0:
                x = None  # Only use prior information on Ainv for multiple rhs
            # Select and initialize solver
            linear_solver = _init_solver(
                A=A,
                b=utils.as_colvec(b[:, i]),
                A0=A0,
                Ainv0=Ainv0,
                x0=x,
                assume_A=assume_A,
            )

            # Solve linear system
            x, A0, Ainv0, info = linear_solver.solve(
                maxiter=maxiter, atol=atol, rtol=rtol, callback=callback, **kwargs
            )

        # Return Ainv @ b for multiple rhs
        x = Ainv0 @ b
    else:
        # Single right hand side
        linear_solver = _init_solver(
            A=A, b=b, A0=A0, Ainv0=Ainv0, x0=x, assume_A=assume_A
        )

        # Solve linear system
        x, A0, Ainv0, info = linear_solver.solve(
            maxiter=maxiter, atol=atol, rtol=rtol, callback=callback, **kwargs
        )

    # Check result and issue warnings (e.g. singular or ill-conditioned matrix)
    _postprocess(info=info, A=A)

    return x, A0, Ainv0, info


[docs]def bayescg(A, b, x0=None, maxiter=None, atol=None, rtol=None, callback=None):
    """Conjugate Gradients using prior information on the solution of the linear system.

    In the setting where :math:`A` is a symmetric positive-definite matrix, this solver
    takes prior information on the solution and outputs a posterior belief over
    :math:`x`. This code implements the method described in Cockayne et al. [1]_.

    Note that the solution-based view of BayesCG and the matrix-based view of
    :meth:`problinsolve` correspond [2]_.

    Parameters
    ----------
    A : array-like or LinearOperator, shape=(n,n)
        A square linear operator (or matrix). Only matrix-vector products :math:`Av` are
        used internally.
    b : array_like, shape=(n,) or (n, nrhs)
        Right-hand side vector or matrix in :math:`A x = b`.
    x0 : array-like or RandomVariable, shape=(n,) or or (n, nrhs)
        Prior belief over the solution of the linear system.
    maxiter : int
        Maximum number of iterations. Defaults to :math:`10n`, where :math:`n` is the
        dimension of :math:`A`.
    atol : float, optional
        Absolute residual tolerance. If :math:`\\lVert r_i \\rVert = \\lVert Ax_i - b
        \\rVert < \\text{atol}`, the iteration terminates.
    rtol : float, optional
        Relative residual tolerance. If :math:`\\lVert r_i \\rVert  < \\text{rtol}
        \\lVert b \\rVert`, the iteration terminates.
    callback : function, optional
        User-supplied function called after each iteration of the linear solver. It is
        called as ``callback(xk, sk, yk, alphak, resid, **kwargs)`` and can be used to
        return quantities from the iteration. Note that depending on the function
        supplied, this can slow down the solver.

    References
    ----------
    .. [1] Cockayne, J. et al., A Bayesian Conjugate Gradient Method, *Bayesian
       Analysis*, 2019, 14, 937-1012
    .. [2] Bartels, S. et al., Probabilistic Linear Solvers: A Unifying View,
       *Statistics and Computing*, 2019

    See Also
    --------
    problinsolve : Solve linear systems in a Bayesian framework.
    """
    # Check linear system for type and dimension mismatch
    _check_linear_system(A=A, b=b, x0=x0)

    # Preprocess linear system
    A, b, x0 = _preprocess_linear_system(A=A, b=b, x0=x0)

    # Set default convergence parameters
    n = A.shape[0]
    if maxiter is None:
        maxiter = n * 10

    # Solve linear system
    x, info = SolutionBasedSolver(A=A, b=b, x0=x0).solve(
        callback=callback, maxiter=maxiter, atol=atol, rtol=rtol
    )

    # Check result and issue warnings (e.g. singular or ill-conditioned matrix)
    _postprocess(info=info, A=A)

    return x, info


def _check_linear_system(A, b, A0=None, Ainv0=None, x0=None):
    """Check linear system compatibility.

    Raises an exception if the input arguments are not of the right type or not
    compatible.

    Parameters
    ----------
    A : array-like or LinearOperator, shape=(n,n)
        A square linear operator (or matrix). Only matrix-vector products :math:`Av` are
        used internally.
    b : array_like, shape=(n,) or (n, nrhs)
        Right-hand side vector or matrix in :math:`A x = b`.
    A0 : array-like or LinearOperator or RandomVariable, shape=(n,n), optional
        A square matrix, linear operator or random variable representing the prior
        belief over the linear operator :math:`A`.
    Ainv0 : array-like or LinearOperator or RandomVariable, shape=(n,n), optional
        A square matrix, linear operator or random variable representing the prior
        belief over the inverse :math:`H=A^{-1}`.
    x0 : array-like, or RandomVariable, shape=(n,) or (n, nrhs)
        Optional. Prior belief for the solution of the linear system. Will be ignored if
        ``Ainv0`` is given.

    Raises
    ------
    ValueError
        If type or size mismatches detected or inputs ``A`` and ``Ainv`` are not square.
    """
    # Check types
    linop_types = (
        np.ndarray,
        scipy.sparse.spmatrix,
        scipy.sparse.linalg.LinearOperator,
        randvars.RandomVariable,
    )
    vector_types = (np.ndarray, scipy.sparse.spmatrix, randvars.RandomVariable)
    if not isinstance(A, linop_types):
        raise ValueError(
            "A must be either an array, a linear operator or a random variable."
        )
    if not isinstance(b, vector_types):
        raise ValueError(
            "The right hand side must be a (sparse) array or a random variable."
        )
    if A0 is not None and not isinstance(A0, randvars.RandomVariable):
        raise ValueError("The prior belief over A must be a random variable.")
    if Ainv0 is not None and not isinstance(Ainv0, linop_types):
        raise ValueError(
            "The inverse of A must be either an array, a linear operator or "
            "a random variable of either."
        )
    if x0 is not None and not isinstance(x0, vector_types):
        raise ValueError("The initial guess for the solution must be a (sparse) array.")

    # Prior distribution mismatch
    if (
        isinstance(A0, randvars.RandomVariable)
        or isinstance(Ainv0, randvars.RandomVariable)
    ) and isinstance(x0, randvars.RandomVariable):
        raise ValueError(
            "Cannot specify distributions on the linear operator and the solution "
            "simultaneously."
        )

    # Dimension mismatch
    if A.shape[0] != b.shape[0]:
        raise ValueError("Dimension mismatch. The dimensions of A and b must match.")
    if Ainv0 is not None and A.shape != Ainv0.shape:
        raise ValueError(
            "Dimension mismatch. The dimensions of A and Ainv0 must match."
        )
    if A0 is not None and A.shape != A0.shape:
        raise ValueError("Dimension mismatch. The dimensions of A and A0 must match.")
    if x0 is not None and A.shape[1] != x0.shape[0]:
        raise ValueError("Dimension mismatch. The dimensions of A and x0 must match.")

    # Square matrices
    if A.shape[0] != A.shape[1]:
        raise ValueError("Matrix A must be square.")
    if A0 is not None and A0.shape[0] != A0.shape[1]:
        raise ValueError("A0 must be square.")
    if Ainv0 is not None and Ainv0.shape[0] != Ainv0.shape[1]:
        raise ValueError("The inverse of A must be square.")


def _preprocess_linear_system(A, b, x0=None):
    """Transform the linear system to an appropriate form.

    Parameters
    ----------
    A : array-like or LinearOperator, shape=(n,n)
        A square linear operator (or matrix). Only matrix-vector products :math:`Av` are
        used internally.
    b : array_like, shape=(n,) or (n, nrhs)
        Right-hand side vector or matrix in :math:`A x = b`.
    x0 : array-like, or RandomVariable, shape=(n,) or (n, nrhs)
        Optional. Prior belief for the solution of the linear system. Will be ignored if
        ``Ainv0`` is given.

    Returns
    -------
    A : RandomVariable, shape=(n,n)
        Prior belief over the linear operator :math:`A`.
    b : array-like, shape=(n,) or (n, nrhs)
        Right-hand-side of the linear system.
    x0 : array-like, or RandomVariable, shape=(n,) or (n, nrhs)
        Optional. Prior belief for the solution of the linear system. Will be ignored if
        ``Ainv0`` is given.
    """
    # Transform linear system to correct dimensions
    if not isinstance(b, randvars.RandomVariable):
        b = utils.as_colvec(b)  # (n,) -> (n, 1)
    if x0 is not None:
        x0 = utils.as_colvec(x0)  # (n,) -> (n, 1)

    return A, b, x0


def _init_solver(A, b, A0, Ainv0, x0, assume_A):
    """Selects and initializes an appropriate instance of the probabilistic linear
    solver based on the system properties and prior information given.

    Parameters
    ----------
    A : array-like or LinearOperator, shape=(n,n)
        A square linear operator (or matrix). Only matrix-vector products :math:`Av` are
        used internally.
    b : array_like, shape=(n,) or (n, nrhs)
        Right-hand side vector or matrix in :math:`A x = b`.
    A0 : array-like or LinearOperator or RandomVariable, shape=(n,n), optional
        A square matrix, linear operator or random variable representing the prior
        belief over the linear operator :math:`A`.
    Ainv0 : array-like or LinearOperator or RandomVariable, shape=(n,n), optional
        A square matrix, linear operator or random variable representing the prior
        belief over the inverse :math:`H=A^{-1}`.
    x0 : array-like, or RandomVariable, shape=(n,) or (n, nrhs)
        Optional. Prior belief for the solution of the linear system. Will be ignored if
        ``Ainv0`` is given.
    assume_A : str
        Assumptions on the linear operator, which can influence solver choice or
        behavior. The available options are (combinations of)

        ====================  =========
         generic matrix       ``gen``
         symmetric            ``sym``
         positive definite    ``pos``
         (additive) noise     ``noise``
        ====================  =========

    Returns
    -------
    linear_solver : ProbabilisticLinearSolver
        A type of probabilistic linear solver implementing the solve method for linear
        systems.
    """
    # Choose matrix based view if not clear from arguments
    if (Ainv0 is not None or A0 is not None) and isinstance(
        x0, randvars.RandomVariable
    ):
        warnings.warn(
            "Cannot use prior uncertainty on both the matrix (inverse) and the "
            "solution. The latter will be ignored."
        )
        x0 = x0.mean

    # Extract information from priors
    # System matrix is symmetric
    if isinstance(A0, randvars.RandomVariable):
        if isinstance(A0.cov, linops.SymmetricKronecker) and "sym" not in assume_A:
            assume_A += "sym"
    if isinstance(Ainv0, randvars.RandomVariable):
        if isinstance(Ainv0.cov, linops.SymmetricKronecker) and "sym" not in assume_A:
            assume_A += "sym"
    # System matrix is NOT stochastic
    if (
        not isinstance(A, randvars.RandomVariable)
        and not isinstance(A, scipy.sparse.linalg.LinearOperator)
        and "noise" in assume_A
    ):
        warnings.warn(
            "A is assumed to be noisy, but is neither a random variable nor a "
            "linear operator. Use exact probabilistic linear solver instead."
        )

    # Solution-based view
    if isinstance(x0, randvars.RandomVariable):
        return SolutionBasedSolver(A=A, b=b, x0=x0)
    # Matrix-based view
    else:
        if "sym" in assume_A and "pos" in assume_A:
            if "noise" in assume_A:
                return NoisySymmetricMatrixBasedSolver(
                    A=A, b=b, x0=x0, A0=A0, Ainv0=Ainv0
                )
            else:
                return SymmetricMatrixBasedSolver(A=A, b=b, x0=x0, A0=A0, Ainv0=Ainv0)
        elif "sym" not in assume_A and "pos" in assume_A:
            return AsymmetricMatrixBasedSolver(A=A, b=b, x0=x0)
        else:
            raise NotImplementedError


def _postprocess(info, A):
    """Postprocess the linear system and its solution.

    Raises exceptions or warnings based on the properties of the linear system and the
    solver iteration.

    Parameters
    ----------
    info : dict
        Convergence information output by a probabilistic linear solver.
    A : array-like or LinearOperator, shape=(n,n)
        A square linear operator (or matrix).

    Raises
    ------
    LinAlgError
        If the matrix ``A`` is singular.
    LinAlgWarning
        If an ill-conditioned system matrix ``A`` is detected.
    """
    rel_cond = info["rel_cond"]

    # Get the correct machine epsilon for the precision used.
    # if A.dtype.char in 'fF':  # single precision
    #     lamch = scipy.linalg.get_lapack_funcs('lamch', dtype='f')
    # else:
    #     lamch = scipy.linalg.get_lapack_funcs('lamch', dtype='d')
    # machine_eps = lamch('E')
    machine_eps = 10 ** -16

    # Singular matrix
    # # TODO: get info from solver
    # if False:
    #     raise scipy.linalg.LinAlgError("The system matrix A is singular.")
    # Ill-conditioned matrix A
    if rel_cond is not None and 1 / rel_cond < machine_eps:
        warnings.warn(
            (
                "Ill-conditioned matrix detected (estimated rcond={:.6g}). "
                "Results are likely inaccurate."
            ).format(rel_cond),
            scipy.linalg.LinAlgWarning,
            stacklevel=3,
        )