Source code for probnum.randprocs.kernels._kernel

"""Kernel / covariance function."""

import abc
from typing import Optional

import numpy as np

from probnum import utils as _pn_utils
from probnum.typing import ArrayLike, ShapeLike, ShapeType


class Kernel(abc.ABC):
    r"""(Cross-)covariance function(s)

    Abstract base class representing one or multiple (cross-)covariance function(s),
    also known as kernels.
    A cross-covariance function

    .. math::
        :nowrap:

        \begin{equation}
            k_{fg} \colon
            \mathcal{X}^{d_\text{in}} \times \mathcal{X}^{d_\text{in}}
            \to \mathbb{R}
        \end{equation}

    is a function of two arguments :math:`x_0` and :math:`x_1`, which represents the
    covariance between two evaluations :math:`f(x_0)` and :math:`g(x_1)` of two
    scalar-valued random processes :math:`f` and :math:`g` on a common probability space
    (or, equivalently, two outputs :math:`h_i(x_0)` and :math:`h_j(x_1)` of a
    vector-valued random process :math:`h`).
    If :math:`f = g`, then the cross-covariance function is also referred to as a
    covariance function, in which case it must be symmetric and positive
    (semi-)definite.

    An instance of a :class:`Kernel` can compute multiple different (cross-)covariance
    functions on the same pair of inputs simultaneously. For instance, it can be used to
    compute the full covariance matrix

    .. math::
        :nowrap:

        \begin{equation}
            C^f \colon
            \mathcal{X}^{d_\text{in}} \times \mathcal{X}^{d_\text{in}}
            \to \mathbb{R}^{d_\text{out} \times d_\text{out}},
            C^f_{i j}(x_0, x_1) := k_{f_i f_j}(x_0, x_1)
        \end{equation}

    of the vector-valued random process :math:`f`. To this end, we understand any
    :class:`Kernel` as a tensor whose shape is given by :attr:`output_shape`, which
    contains different (cross-)covariance functions as its entries.

    Parameters
    ----------
    input_shape :
        Shape of the :class:`Kernel`'s input.
    output_shape :
        Shape of the :class:`Kernel`'s output.

        If ``output_shape`` is set to ``()``, the :class:`Kernel` instance represents a
        single (cross-)covariance function. Otherwise, i.e. if ``output_shape`` is a
        non-empty tuple, the :class:`Kernel` instance represents a tensor of
        (cross-)covariance functions whose shape is given by ``output_shape``.

    Examples
    --------

    >>> D = 3
    >>> k = pn.randprocs.kernels.Linear(input_shape=D)
    >>> k.input_shape
    (3,)
    >>> k.output_shape
    ()

    Generate some input data.

    >>> N = 4
    >>> xs = np.linspace(0, 1, N * D).reshape(N, D)
    >>> xs.shape
    (4, 3)
    >>> xs
    array([[0.        , 0.09090909, 0.18181818],
           [0.27272727, 0.36363636, 0.45454545],
           [0.54545455, 0.63636364, 0.72727273],
           [0.81818182, 0.90909091, 1.        ]])

    We can compute kernel matrices like so.

    >>> k.matrix(xs)
    array([[0.04132231, 0.11570248, 0.19008264, 0.26446281],
           [0.11570248, 0.41322314, 0.7107438 , 1.00826446],
           [0.19008264, 0.7107438 , 1.23140496, 1.75206612],
           [0.26446281, 1.00826446, 1.75206612, 2.49586777]])

    The :meth:`Kernel.__call__` evaluations are vectorized over the "batch shapes" of
    the inputs, applying standard NumPy broadcasting.

    >>> k(xs[:, None], xs[None, :])  # same as `.matrix`
    array([[0.04132231, 0.11570248, 0.19008264, 0.26446281],
           [0.11570248, 0.41322314, 0.7107438 , 1.00826446],
           [0.19008264, 0.7107438 , 1.23140496, 1.75206612],
           [0.26446281, 1.00826446, 1.75206612, 2.49586777]])

    No broadcasting is applied if both inputs have the same shape. For instance, one can
    efficiently compute just the diagonal of the kernel matrix via

    >>> k(xs, xs)
    array([0.04132231, 0.41322314, 1.23140496, 2.49586777])
    >>> k(xs, None)  # x1 = None is an efficient way to set x1 == x0
    array([0.04132231, 0.41322314, 1.23140496, 2.49586777])

    and the diagonal above the main diagonal of the kernel matrix is retrieved through

    >>> k(xs[:-1, :], xs[1:, :])
    array([0.11570248, 0.7107438 , 1.75206612])
    """

    def __init__(
        self,
        input_shape: ShapeLike,
        output_shape: ShapeLike = (),
    ):
        self._input_shape = _pn_utils.as_shape(input_shape)
        self._input_ndim = len(self._input_shape)

        if self._input_ndim > 1:
            raise ValueError(
                "Currently, we only support kernels with at most 1 input dimension."
            )

        self._output_shape = _pn_utils.as_shape(output_shape)
        self._output_ndim = len(self._output_shape)

    @property
    def input_shape(self) -> ShapeType:
        """Shape of single, i.e. non-batched, arguments of the covariance function."""
        return self._input_shape

    @property
    def input_ndim(self) -> int:
        """Syntactic sugar for ``len(input_shape)``."""
        return self._input_ndim

    @property
    def output_shape(self) -> ShapeType:
        """Shape of single, i.e. non-batched, return values of the covariance function.

        If :attr:`output_shape` is ``()``, the :class:`Kernel` instance represents
        a single (cross-)covariance function.
        Otherwise, i.e. if :attr:`output_shape` is non-empty, the :class:`Kernel`
        instance represents a tensor of (cross-)covariance functions whose shape is
        given by ``output_shape``.
        """
        return self._output_shape

    @property
    def output_ndim(self) -> int:
        """Syntactic sugar for ``len(output_shape)``."""
        return self._output_ndim

    def __repr__(self) -> str:
        return (
            f"<{self.__class__.__name__} with"
            f" input_shape={self.input_shape} and"
            f" output_shape={self.output_shape}>"
        )

[docs] def __call__( self, x0: ArrayLike, x1: Optional[ArrayLike], ) -> np.ndarray: """Evaluate the (cross-)covariance function(s). The evaluation of the (cross-covariance) function(s) is vectorized over the batch shapes of the arguments, applying standard NumPy broadcasting. Parameters ---------- x0 *shape=* ``batch_shape_0 +`` :attr:`input_shape` -- (Batch of) input(s) for the first argument of the :class:`Kernel`. x1 *shape=* ``batch_shape_1 +`` :attr:`input_shape` -- (Batch of) input(s) for the second argument of the :class:`Kernel`. Can also be set to ``None``, in which case the function will behave as if ``x1 = x0`` (but it is implemented more efficiently). Returns ------- k_x0_x1 : *shape=* ``bcast_batch_shape +`` :attr:`output_shape` -- The (cross-)covariance function(s) evaluated at ``(x0, x1)``. Since the function is vectorized over the batch shapes of the inputs, the output array contains the following entries: .. code-block:: python k_x0_x1[batch_idx + output_idx] = k[output_idx]( x0[batch_idx, ...], x1[batch_idx, ...], ) where we assume that ``x0`` and ``x1`` have been broadcast to a common shape ``bcast_batch_shape +`` :attr:`input_shape`, and where ``output_idx`` and ``batch_idx`` are indices compatible with :attr:`output_shape` and ``bcast_batch_shape``, respectively. By ``k[output_idx]`` we refer to the covariance function at index ``output_idx`` in the tensor of covariance functions represented by the :class:`Kernel` instance. Raises ------ ValueError If one of the input shapes is not of the form ``batch_shape_{0,1} +`` :attr:`input_shape`. ValueError If the inputs can not be broadcast to a common shape. See Also -------- matrix: Convenience function to compute a kernel matrix, i.e. a matrix of pairwise evaluations of the kernel on two sets of points. Examples -------- See documentation of class :class:`Kernel`. """ x0 = np.asarray(x0) if x1 is not None: x1 = np.asarray(x1) # Shape checking broadcast_batch_shape = self._check_shapes( x0.shape, x1.shape if x1 is not None else None ) # Evaluate the kernel k_x0_x1 = self._evaluate(x0, x1) assert k_x0_x1.shape == broadcast_batch_shape + self._output_shape return k_x0_x1
[docs] def matrix( self, x0: ArrayLike, x1: Optional[ArrayLike] = None, ) -> np.ndarray: """A convenience function for computing a kernel matrix for two sets of inputs. This is syntactic sugar for ``k(x0[:, None], x1[None, :])``. Hence, it computes the matrix (stack) of pairwise covariances between two sets of input points. If ``k`` represents a single covariance function, then the resulting matrix will be symmetric positive-(semi)definite for ``x0 == x1``. Parameters ---------- x0 *shape=* ``(M,) +`` :attr:`input_shape` or :attr:`input_shape` -- Stack of inputs for the first argument of the :class:`Kernel`. x1 *shape=* ``(N,) +`` :attr:`input_shape` or :attr:`input_shape` -- (Optional) stack of inputs for the second argument of the :class:`Kernel`. If ``x1`` is not specified, the function behaves as if ``x1 = x0`` (but it is implemented more efficiently). Returns ------- kernmat : *shape=* ``batch_shape +`` :attr:`output_shape` -- The matrix / stack of matrices containing the pairwise evaluations of the (cross-)covariance function(s) on ``x0`` and ``x1``. Depending on the shape of the inputs, ``batch_shape`` is either ``(M, N)``, ``(M,)``, ``(N,)``, or ``()``. Raises ------ ValueError If the shapes of the inputs don't match the specification. See Also -------- __call__: Evaluate the kernel more flexibly. Examples -------- See documentation of class :class:`Kernel`. """ x0 = np.asarray(x0) x1 = x0 if x1 is None else np.asarray(x1) # Shape checking errmsg = ( "`{argname}` must have shape `({batch_dim},) + input_shape` or " f"`input_shape`, where `input_shape` is `{self.input_shape}`, but an array " "with shape `{shape}` was given." ) if not 0 <= x0.ndim - self._input_ndim <= 1: raise ValueError(errmsg.format(argname="x0", batch_dim="M", shape=x0.shape)) if not 0 <= x1.ndim - self._input_ndim <= 1: raise ValueError(errmsg.format(argname="x1", batch_dim="N", shape=x1.shape)) # Pairwise kernel evaluation if x0.ndim > self._input_ndim and x1.ndim > self._input_ndim: return self(x0[:, None], x1[None, :]) return self(x0, x1)
@abc.abstractmethod def _evaluate( self, x0: ArrayLike, x1: Optional[ArrayLike], ) -> np.ndarray: """Implementation of the kernel evaluation which is called after input checking. When implementing a particular kernel, the subclass should implement the kernel computation by overwriting this method. It is called by the :meth:`__call__` method after applying input checking. The implementation must return the array described in the "Returns" section of the :meth:`__call__` method. Note that the inputs are not automatically broadcast to a common shape, but it is guaranteed that this is possible. Parameters ---------- x0 See argument ``x0`` in the docstring of :meth:`__call__`. x1 See argument ``x1`` in the docstring of :meth:`__call__`. Returns ------- k_x0_x1 : See "Returns" section in the docstring of :meth:`__call__`. """ def _check_shapes( self, x0_shape: ShapeType, x1_shape: Optional[ShapeType] = None, ) -> ShapeType: """Checks input argument shapes and computes the broadcast batch shape of both inputs. This function checks the shapes of the inputs to the :meth:`__call__` method and it computes the `bcast_batch_shape` mentioned in the docstring. Parameters ---------- x0_shape : Shape of the first input to the covariance function. x1_shape : Shape of the (optional) second input to the covariance function. Returns ------- broadcast_batch_shape : The `batch_shape` after broadcasting the inputs to a common shape. Raises ------- ValueError If one of the input shapes is not of the form ``batch_shape_{0,1} +`` :attr:`input_shape`. ValueError If the inputs can not be broadcast to a common shape. """ err_msg = ( "The shape of the input array `{argname}` must match the `input_shape` " f"`{self.input_shape}` of the kernel along its last dimension, but an " "array with shape `{shape}` was given." ) if x0_shape[len(x0_shape) - self._input_ndim :] != self.input_shape: raise ValueError(err_msg.format(argname="x0", shape=x0_shape)) broadcast_batch_shape = x0_shape[: len(x0_shape) - self._input_ndim] if x1_shape is not None: if x1_shape[len(x1_shape) - self._input_ndim :] != self.input_shape: raise ValueError(err_msg.format(argname="x1", shape=x1_shape)) try: broadcast_batch_shape = np.broadcast_shapes( broadcast_batch_shape, x1_shape[: len(x1_shape) - self._input_ndim], ) except ValueError as ve: err_msg = ( f"The input arrays `x0` and `x1` with shapes {x0_shape} and " f"{x1_shape} can not be broadcast to a common shape." ) raise ValueError(err_msg) from ve return broadcast_batch_shape def _euclidean_inner_products( self, x0: np.ndarray, x1: Optional[np.ndarray] ) -> np.ndarray: """Implementation of the Euclidean inner product, which supports scalar inputs and an optional second argument.""" prods = x0**2 if x1 is None else x0 * x1 if self.input_ndim == 0: return prods assert self.input_ndim == 1 return np.sum(prods, axis=-1) class IsotropicMixin(abc.ABC): # pylint: disable=too-few-public-methods r"""Mixin for isotropic kernels. An isotropic kernel is a kernel which only depends on the Euclidean norm of the distance between the arguments, i.e. .. math :: k(x_0, x_1) = k(\lVert x_0 - x_1 \rVert_2). Hence, all isotropic kernels are stationary. """ def _squared_euclidean_distances( self, x0: np.ndarray, x1: Optional[np.ndarray] ) -> np.ndarray: """Implementation of the squared Euclidean distance, which supports scalar inputs and an optional second argument.""" if x1 is None: return np.zeros_like( # pylint: disable=unexpected-keyword-arg x0, shape=x0.shape[: x0.ndim - self._input_ndim], ) sqdiffs = (x0 - x1) ** 2 if self.input_ndim == 0: return sqdiffs assert self.input_ndim == 1 return np.sum(sqdiffs, axis=-1) def _euclidean_distances( self, x0: np.ndarray, x1: Optional[np.ndarray] ) -> np.ndarray: """Implementation of the Euclidean distance, which supports scalar inputs and an optional second argument.""" if x1 is None: return np.zeros_like( # pylint: disable=unexpected-keyword-arg x0, shape=x0.shape[: x0.ndim - self._input_ndim], ) return np.sqrt(self._squared_euclidean_distances(x0, x1))