Source code for probnum.quad.solvers._bayesian_quadrature

"""Probabilistic numerical methods for solving integrals."""

from __future__ import annotations

from typing import Callable, Optional, Tuple
import warnings

import numpy as np

from probnum.quad.integration_measures import IntegrationMeasure, LebesgueMeasure
from probnum.quad.kernel_embeddings import KernelEmbedding
from probnum.quad.solvers._bq_state import BQIterInfo, BQState
from probnum.quad.solvers.acquisition_functions import (
    IntegralVarianceReduction,
    MutualInformation,
    WeightedPredictiveVariance,
)
from probnum.quad.solvers.belief_updates import BQBeliefUpdate, BQStandardBeliefUpdate
from probnum.quad.solvers.initial_designs import InitialDesign, LatinDesign, MCDesign
from probnum.quad.solvers.policies import (
    MaxAcquisitionPolicy,
    Policy,
    RandomMaxAcquisitionPolicy,
    RandomPolicy,
    VanDerCorputPolicy,
)
from probnum.quad.solvers.stopping_criteria import (
    BQStoppingCriterion,
    ImmediateStop,
    IntegralVarianceTolerance,
    MaxNevals,
    RelativeMeanChange,
)
from probnum.quad.typing import DomainLike
from probnum.randprocs.kernels import ExpQuad, Kernel
from probnum.randvars import Normal
from probnum.typing import IntLike

# pylint: disable=too-many-branches, too-complex


class BayesianQuadrature:
    r"""The Bayesian quadrature method.

    Bayesian quadrature solves integrals of the form

    .. math:: F = \int_\Omega f(x) d \mu(x).

    Parameters
    ----------
    kernel
        The kernel used for the Gaussian process model.
    measure
        The integration measure.
    policy
        The policy choosing nodes at which to evaluate the integrand.
    belief_update
        The inference method.
    stopping_criterion
        The criterion that determines convergence.
    initial_design
        The initial design chooses a set of nodes once, before the acquisition loop with
        the policy runs.

    Raises
    ------
    ValueError
        If ``initial_design`` is given but ``policy`` is not given.

    See Also
    --------
    :func:`bayesquad <probnum.quad.bayesquad>` :
        Computes the integral using an acquisition policy.
    :func:`bayesquad_from_data <probnum.quad.bayesquad_from_data>` :
        Computes the integral :math:`F` using a given dataset of nodes and function
        evaluations.


    """

    def __init__(
        self,
        kernel: Kernel,
        measure: IntegrationMeasure,
        policy: Optional[Policy],
        belief_update: BQBeliefUpdate,
        stopping_criterion: BQStoppingCriterion,
        initial_design: Optional[InitialDesign],
    ) -> None:

        if policy is None and initial_design is not None:
            raise ValueError(
                "An initial design can only be used in combination with a policy."
            )

        self.kernel = kernel
        self.measure = measure
        self.policy = policy
        self.belief_update = belief_update
        self.stopping_criterion = stopping_criterion
        self.initial_design = initial_design

    # pylint: disable=too-many-statements, too-many-locals
[docs]    @classmethod
    def from_problem(
        cls,
        input_dim: IntLike,
        kernel: Optional[Kernel] = None,
        measure: Optional[IntegrationMeasure] = None,
        domain: Optional[DomainLike] = None,
        policy: Optional[str] = "bmc",
        initial_design: Optional[str] = None,
        options: Optional[dict] = None,
    ) -> "BayesianQuadrature":

        r"""Creates an instance of this class from a problem description.

        Parameters
        ----------
        input_dim
            The input dimension.
        kernel
            The kernel used for the GP model. Defaults to the ``ExpQuad`` kernel.
        measure
            The integration measure. Defaults to the Lebesgue measure on the ``domain``.
        domain
            The integration bounds. Obsolete if ``measure`` is given.
        policy
            The policy choosing nodes at which to evaluate the integrand.
            Choose ``None`` if you want to integrate from a fixed dataset.
        initial_design
            The initial design chooses a set of nodes once, before the acquisition loop
            with the policy runs.
        options
            A dictionary with the following optional solver settings

                scale_estimation : Optional[str]
                    Estimation method to use to compute the scale parameter. Defaults
                    to 'mle'.
                max_evals : Optional[IntLike]
                    Maximum number of evaluations as stopping criterion.
                var_tol : Optional[FloatLike]
                    Variance tolerance as stopping criterion.
                rel_tol : Optional[FloatLike]
                    Relative tolerance as stopping criterion.
                jitter : Optional[FloatLike]
                    Non-negative jitter to numerically stabilise kernel matrix
                    inversion. Defaults to 1e-8.
                batch_size : Optional[IntLike]
                    Batch size used in node acquisition. Defaults to 1.
                n_initial_design_nodes : Optional[IntLike]
                    The number of nodes created by the initial design. Defaults to
                    ``input_dim * 5`` if an initial design is given.
                n_candidates : Optional[IntLike]
                    The number of candidate nodes used by the policies that maximize an
                    acquisition function by drawing random candidates. Defaults to 1e2.
                    Applicable to policies 'us_rand', 'mi_rand' and 'ivr_rand'.
                n_restarts : Optional[IntLike]
                    The number of restarts that the acquisition optimizer performs in
                    order to find the maximizer. Defaults to 10. Applicable to policies
                    'us', 'mi' and 'ivr'.

        Returns
        -------
        BayesianQuadrature
            An instance of this class.

        Raises
        ------
        NotImplementedError
            If an unknown ``policy`` or an unknown ``initial_design`` is given.
        ValueError
            If neither ``domain`` nor ``measure`` is given.

        See Also
        --------
        :func:`bayesquad <probnum.quad.bayesquad>` :
            For details on options for ``policy`` and ``initial_design``.

        """

        input_dim = int(input_dim)

        # Set some solver options
        if options is None:
            options = {}

        max_evals = options.get("max_evals", None)
        rel_tol = options.get("rel_tol", None)
        var_tol = options.get("var_tol", None)

        scale_estimation = options.get("scale_estimation", "mle")
        jitter = options.get("jitter", 1.0e-8)
        batch_size = options.get("batch_size", int(1))
        n_initial_design_nodes = options.get(
            "n_initial_design_nodes", int(5 * input_dim)
        )
        n_candidates = options.get("n_candidates", int(1e2))
        n_restarts = options.get("n_restarts", int(10))

        # Set up integration measure
        if domain is None and measure is None:
            raise ValueError(
                "You need to either specify an integration domain or a measure."
            )
        if measure is None:
            measure = LebesgueMeasure(domain=domain, input_dim=input_dim)

        # Select the kernel
        if kernel is None:
            kernel = ExpQuad(input_shape=(input_dim,))

        # Select policy
        acquisition_dict = dict(
            mi=MutualInformation,
            ivr=IntegralVarianceReduction,
            us=WeightedPredictiveVariance,
        )
        if policy is None:
            # If policy is None, this implies that the integration problem is defined
            # through a fixed set of nodes and function evaluations which will not
            # require an acquisition loop. The error handling is done in ``integrate``.
            pass
        elif policy == "bmc":
            policy = RandomPolicy(batch_size, measure.sample)
        elif policy == "vdc":
            policy = VanDerCorputPolicy(batch_size, measure)
        # all random max acquisition policies (all must contain suffix '_rand')
        elif policy in ["us_rand", "mi_rand", "ivr_rand"]:
            assert policy[-5:] == "_rand"
            policy = RandomMaxAcquisitionPolicy(
                batch_size=1,
                acquisition_func=acquisition_dict[policy[:-5]],
                n_candidates=n_candidates,
            )
        # all max acquisition policies with optimizer
        elif policy in ["us", "mi", "ivr"]:
            policy = MaxAcquisitionPolicy(
                batch_size=1,
                acquisition_func=acquisition_dict[policy],
                n_restarts=n_restarts,
            )
        else:
            raise NotImplementedError(f"The given policy ({policy}) is unknown.")

        # Select the belief updater
        belief_update = BQStandardBeliefUpdate(
            jitter=jitter, scale_estimation=scale_estimation
        )

        # Select stopping criterion: If multiple stopping criteria are given, BQ stops
        # once any criterion is fulfilled (logical `or`).
        def _stopcrit_or(sc1, sc2):
            if sc1 is None:
                return sc2
            return sc1 | sc2

        _stop_crit = None
        if max_evals is not None:
            _stop_crit = _stopcrit_or(_stop_crit, MaxNevals(max_evals))
        if var_tol is not None:
            _stop_crit = _stopcrit_or(_stop_crit, IntegralVarianceTolerance(var_tol))
        if rel_tol is not None:
            _stop_crit = _stopcrit_or(_stop_crit, RelativeMeanChange(rel_tol))

        # If no stopping criteria are given, use some default values.
        if _stop_crit is None:
            _stop_crit = IntegralVarianceTolerance(var_tol=1e-6) | MaxNevals(
                max_nevals=input_dim * 25
            )  # 25 is an arbitrary value

        # If no policy is given, then the iteration must terminate immediately.
        if policy is None:
            _stop_crit = ImmediateStop()

        # Select initial design
        if initial_design is None:
            pass  # not to raise the exception
        elif initial_design == "mc":
            initial_design = MCDesign(n_initial_design_nodes, measure)
        elif initial_design == "latin":
            initial_design = LatinDesign(n_initial_design_nodes, measure)
        else:
            raise NotImplementedError(
                f"The given initial design ({initial_design}) is unknown."
            )

        return cls(
            kernel=kernel,
            measure=measure,
            policy=policy,
            belief_update=belief_update,
            stopping_criterion=_stop_crit,
            initial_design=initial_design,
        )

[docs]    def bq_iterator(
        self,
        bq_state: BQState,
        info: BQIterInfo,
        fun: Optional[Callable],
        rng: Optional[np.random.Generator],
    ) -> Tuple[Normal, BQState, BQIterInfo]:
        """Generator that implements the iteration of the BQ method.

        This function exposes the state of the BQ method one step at a time while
        running the loop.

        Parameters
        ----------
        bq_state
            State of the Bayesian quadrature methods. Contains the information about
            the problem and the BQ belief.
        info
            The state of the iteration.
        fun
            Function to be integrated. It needs to accept a shape=(n_eval, input_dim)
            ``np.ndarray`` and return a shape=(n_eval,) ``np.ndarray``.
        rng
            The random number generator used for random methods.

        Yields
        ------
        new_integral_belief :
            Updated belief about the integral.
        new_bq_state :
            The updated state of the Bayesian quadrature belief.
        new_info :
            The updated state of the iteration.
        """

        while True:

            _has_converged = self.stopping_criterion(bq_state, info)
            info = BQIterInfo.from_stopping_decision(info, has_converged=_has_converged)

            yield bq_state.integral_belief, bq_state, info

            # Have we already converged?
            if _has_converged:
                break

            # Select new nodes via policy
            new_nodes = self.policy(bq_state, rng)

            # Evaluate the integrand at new nodes
            new_fun_evals = fun(new_nodes)

            # Update the belief about the integrand
            _, bq_state = self.belief_update(
                bq_state=bq_state,
                new_nodes=new_nodes,
                new_fun_evals=new_fun_evals,
            )

            # Update the state of the iteration
            info = BQIterInfo.from_iteration(info=info, dnevals=self.policy.batch_size)

[docs]    def integrate(
        self,
        fun: Optional[Callable],
        nodes: Optional[np.ndarray],
        fun_evals: Optional[np.ndarray],
        rng: Optional[np.random.Generator] = None,
    ) -> Tuple[Normal, BQState, BQIterInfo]:
        """Integrates a given function.

        The function may be given as a function handle ``fun`` and/or numerically in
        terms of ``fun_evals`` at fixed nodes ``nodes``.

        If a policy is defined this method calls the generator ``bq_iterator`` until
        the first stopping criterion is met. The initial design is evaluated in a batch
        prior to running ``bq_iterator``.

        If no policy is defined this method immediately stops after processing the
        given ``nodes``.


        Parameters
        ----------
        fun
            Function to be integrated. It needs to accept a shape=(n_eval, input_dim)
            ``np.ndarray`` and return a shape=(n_eval,) ``np.ndarray``.
        nodes
            *shape=(n_eval, input_dim)* -- Optional nodes at which function evaluations
            are available as ``fun_evals`` from start.
        fun_evals
            *shape=(n_eval,)* -- Optional function evaluations at ``nodes`` available
            from the start.
        rng
            The random number generator used for random methods.

        Returns
        -------
        integral_belief :
            Posterior belief about the integral.
        bq_state :
            Final state of the Bayesian quadrature method.

        Raises
        ------
        ValueError
            If neither the integrand function ``fun`` nor integrand evaluations
            ``fun_evals`` are given.
        ValueError
            If dimension of ``nodes`` or ``fun_evals`` is incorrect, or if their
            shapes do not match.
        ValueError
            If ``rng`` is not given but ``policy`` or ``initial_design`` requires it.
        ValueError
            If a policy is available but ``fun`` is not given.
        ValueError
            If no policy is available and no ``nodes`` are given.

        Warns
        -----
        UserWarning
            When no policy is given and ``fun`` is ignored.

        Notes
        -----
        The initial design is evaluated prior to running the ``bq_iterator`` and hence
        may not obey the stopping criterion. For example, if stopping is induced via a
        maximum number of evaluations (``max_evals``) smaller than the batch size of the
        initial design, the initial design will be evaluated nevertheless.

        """

        # Check if integrand function is provided
        if fun is None and fun_evals is None:
            raise ValueError(
                "Please provide an integrand function 'fun' or function values "
                "'fun_evals'."
            )

        # Setup fixed design
        if nodes is not None and fun_evals is None:
            fun_evals = fun(nodes)

        # Check if shapes of nodes and function evaluations match
        if fun_evals is not None and fun_evals.ndim != 1:
            raise ValueError(
                f"fun_evals must be one-dimensional " f"({fun_evals.ndim})."
            )
        if nodes is not None and nodes.ndim != 2:
            raise ValueError(f"nodes must be two-dimensional ({nodes.ndim}).")

        if nodes is not None and fun_evals is not None:
            if nodes.shape[0] != fun_evals.shape[0]:
                raise ValueError(
                    f"nodes ({nodes.shape[0]}) and fun_evals "
                    f"({fun_evals.shape[0]}) need to contain the same number "
                    f"of evaluations."
                )

        # policy given
        if self.policy is not None:

            # function handle must be given for policy to work
            if fun is None:
                raise ValueError("Policy requires ``fun`` to be given.")

            # some policies require and rng
            if self.policy.requires_rng and rng is None:
                raise ValueError(
                    f"The policy '{self.policy.__class__.__name__}' requires a random "
                    f"number generator (rng) to be given."
                )

        # no policy given: Integrate on fixed dataset.
        else:
            # nodes must be provided if no policy is given.
            if nodes is None:
                raise ValueError("No policy available: Please provide nodes.")

            # Use fun_evals and disregard fun if both are given
            if fun is not None and fun_evals is not None:
                warnings.warn(
                    "No policy available: 'fun_evals' are used instead of 'fun'."
                )
                fun = None

            # override stopping condition as no policy is given.
            self.stopping_criterion = ImmediateStop()

        # initial design given (which implies policy and fun is given)
        if self.initial_design is not None:

            # some designs require and rng
            if self.initial_design.requires_rng and rng is None:
                raise ValueError(
                    f"The initial design '{self.initial_design.__class__.__name__}' "
                    f"requires a random number generator (rng) to be given."
                )

            initial_design_nodes = self.initial_design(rng)
            initial_design_fun_evals = fun(initial_design_nodes)
            if nodes is not None:
                nodes = np.concatenate((nodes, initial_design_nodes), axis=0)
                fun_evals = np.append(fun_evals, initial_design_fun_evals)
            else:
                nodes = initial_design_nodes
                fun_evals = initial_design_fun_evals

        # set BQ state: This encodes a zero-mean prior.
        bq_state = BQState(
            measure=self.measure,
            kernel=self.kernel,
            integral_belief=Normal(
                0.0, KernelEmbedding(self.kernel, self.measure).kernel_variance()
            ),
        )

        # update BQ state if nodes and evaluations are available
        if nodes is not None:
            _, bq_state = self.belief_update(
                bq_state=bq_state,
                new_nodes=nodes,
                new_fun_evals=fun_evals,
            )

        # set iteration info
        info = BQIterInfo.from_bq_state(bq_state)

        # run loop
        for (_, bq_state, info) in self.bq_iterator(bq_state, info, fun, rng):
            pass

        return bq_state.integral_belief, bq_state, info