# Source code for probnum.diffeq.odefiltsmooth.prior

"""
Continuous-Time priors for ODE solvers.

Currently, they are only relevant in the context of ODEs.
If needed in a more general setting, it is easy to move
them to statespace module (->thoughts?)

Matern will be easy to implement, just reuse the template
provided by IOUP and change parameters
"""
import warnings
import numpy as np
from scipy.special import binom  # for Matern
from scipy.special import factorial  # vectorised factorial for IBM-Q(h)

from probnum.filtsmooth.statespace.continuous import LTISDEModel
from probnum.random_variables import Normal

[docs]class ODEPrior(LTISDEModel):
"""
Prior dynamic model for ODE filtering and smoothing.

An ODE prior is an continuous LTI state space model with attributes:
* order of integration :math:q
* spatial dimension of the underlying ODE
* projection to :math:X_i(t)
(the :math:(i-1)-th derivative estimate)
* A preconditioner :math:P (see below)

.. math:: d X(t) = [F X(t) + u] dt + L dB(t)

the prior for the ODE Dynamics is given by

.. math:: dX(t) = P F P^{-1} X(t) dt + P L dB(t)

where :math:P is a preconditioner matrix ensuring stability
of the iterations. Note that ODE priors do not have a drift term.
By default, we choose :math:P to be the
matrix that maps to filtering iteration to the Nordsieck vector,

.. math:: P = \\text{diag }(h^{-q}, h^{-q+1}, ..., 1).

Here, :math:h is some expected average step size. Note that we
ignored the factorials in this matrix. Our setting makes it easy to
recover "no preconditioning" by choosing :math:h=1.

- If no expected step size is available we choose :math:h=1.0.
This recovers :math:P=I_{d(q+1)}, hence no preconditioning.

- For fixed step size algorithms this quantity :math:h is easy
to choose

- For adaptive steps it is a bit more involved.

Since it doesn't have to be exact, any more or less appropriate
choice will do well. The main effect of this preconditioning is that
the predictive covariances inside each filter iteration are
well-conditioned: for IBM(:math:q) priors, the condition number
of the predictive covariances only depends on order of integration
:math:q and not on the step size anymore. Nb: this only holds
if all required derivatives of the RHS vector field of the ODE are
specified: None for IBM(1), Jacobian of :math:f for IBM(2),
Hessian of :math:f for IBM(3). If this is not the case the
preconditioner still helps but is not as powerful anymore.

Without preconditioning they can be numerically singular for small
steps and higher order methods which especially makes smoothing
algorithms unstable.

Another advantage of this preconditioning is that the smallest value
that appears inside the algorithm is :math:h^{q} (with
preconditioning) instead of :math:h^{2q+1} (without preconditioning).

The matrices :math:F, u, L are the usual matrices for
IBM(:math:q), IOUP(:math:q) or Matern(:math:q+1/2) processes.
As always, :math:B(t) is
s-dimensional Brownian motion with unit diffusion matrix :math:Q.

"""

def __init__(self, driftmat, dispmat, ordint, spatialdim, precond_step=1.0):
self.ordint = ordint
self.spatialdim = spatialdim
self.precond, self.invprecond = self.precond2nordsieck(precond_step)
driftmat = self.precond @ driftmat @ self.invprecond
dispmat = self.precond @ dispmat
forcevec = np.zeros(len(driftmat))
diffmat = np.eye(spatialdim)
super().__init__(driftmat, forcevec, dispmat, diffmat)

[docs]    def proj2coord(self, coord):
"""
Projection matrix to :math:i-th coordinates.

Computes the matrix

.. math:: H_i = \\left[ I_d \\otimes e_i \\right] P^{-1},

where :math:e_i is the :math:i-th unit vector,
that projects to the :math:i-th coordinate of a vector.
If the ODE is multidimensional, it projects to **each** of the
:math:i-th coordinates of each ODE dimension.

Parameters
----------
coord : int
Coordinate index :math:i which to project to.
Expected to be in range :math:0 \\leq i \\leq q + 1.

Returns
-------
np.ndarray, shape=(d, d*(q+1))
Projection matrix :math:H_i.
"""
projvec1d = np.eye(self.ordint + 1)[:, coord]
projmat1d = projvec1d.reshape((1, self.ordint + 1))
projmat = np.kron(np.eye(self.spatialdim), projmat1d)
projmat1d_with_precond = projmat @ self.invprecond
return projmat1d_with_precond

[docs]    def precond2nordsieck(self, step):
"""
Computes preconditioner inspired by Nordsieck.

Computes the matrix :math:P given by

.. math:: P = I_d \\otimes diag (1, h, h^2, ..., h^q)

as well as its inverse :math:P^{-1}.

Parameters
----------
step : float
Step size :math:h used for preconditioning. If :math:h
is so small that :math:h^q! < 10^{-15}, it is being
set to :math:h = (\\cdot 10^{-15})^{1/q}.

Returns
-------
precond : np.ndarray, shape=(d(q+1), d(q+1))
Preconditioner matrix :math:P.
invprecond : np.ndarray, shape=(d(q+1), d(q+1))
Inverse preconditioner matrix :math:P^{-1}.
"""
smallval = step ** self.ordint
if smallval < 1e-15:
warnmsg = (
"Preconditioner contains values below "
"machine precision (%.1e)" % smallval
)
warnings.warn(message=warnmsg, category=RuntimeWarning)
step = 1e-15 ** (1 / self.ordint)
powers = np.arange(start=-self.ordint, stop=1)
diags = step ** powers
precond = np.kron(np.eye(self.spatialdim), np.diag(diags))
invprecond = np.kron(np.eye(self.spatialdim), np.diag(1.0 / diags))
return precond, invprecond

@property
def preconditioner(self):
"""
Convenience property to return the readily-computed
preconditioner without having to remember abbreviations.

Returns
-------
np.ndarray, shape=(d(q+1), d(q+1))
Preconditioner matrix :math:P
"""
return self.precond

@property
def inverse_preconditioner(self):
"""
Convenience property to return the readily-computed
inverse preconditioner without having to remember abbreviations.

Returns
-------
np.ndarray, shape=(d(q+1), d(q+1))
Inverse preconditioner matrix :math:P^{-1}
"""
return self.invprecond

[docs]class IBM(ODEPrior):
"""
Integrated Brownian motion of order :math:q prior.

The integrated Brownian motion prior is represented through
the LTI SDE

.. math:: dX(t) =  F X(t) dt + L dB(t)

where for readibility reasons we did not write the preconditioner
matrix :math:P; see :class:ODEPrior for explanations.

- It has driftmatrix :math:F given by

.. math:: F = I_d \\otimes \\tilde F, \\quad
\\tilde F = \\begin{pmatrix} 0 & I_q \\\\ 0 & 0 \\end{pmatrix}

where the top left zero-vector has :math:q rows and 1 column.

- It has dispersion matrix :math:L given by

.. math:: L = I_d \\otimes \\tilde L, \\quad
\\tilde L = \\sigma \\, e_{q+1}

where :math:\\sigma is the diffusion constant, that is,
:math:\\sigma^2 is the intensity of each dimension of the
:math:d-dimensional Brownian motion driving the SDE and
:math:e_{q+1}=(0, ..., 0, 1) is the :math:(q+1)-st unit vector.

- The Brownian motion :math:B=B(t) driving the SDE has unit
diffusion :math:Q = I_d.

Parameters
----------
ordint : int
Order of integration :math:q. The higher :math:q, the higher
the order of the ODE filter.
spatialdim : int
Spatial dimension :math:d of the ordinary differential
equation that is to be modelled.
diffconst : float
Diffusion constant :math:sigma of the stochastic process.
precond_step : float, optional
Expected step size :math:h used in the ODE filter.
This quantity is used for preconditioning, see :class:ODEPrior
for a clear explanation. Default is :math:h=1.
"""

def __init__(self, ordint, spatialdim, diffconst, precond_step=1.0):
"""
ordint : this is "q"
spatialdim : d
diffconst : sigma
"""
self.diffconst = diffconst
driftmat = _driftmat_ibm(ordint, spatialdim)
dispmat = _dispmat(ordint, spatialdim, diffconst)
super().__init__(driftmat, dispmat, ordint, spatialdim, precond_step)

[docs]    def chapmankolmogorov(self, start, stop, step, randvar, *args, **kwargs):
"""
Closed form solution to the Chapman-Kolmogorov equations
for the integrated Brownian motion.

It is given by

.. math:: X_{t+h} \\, | \\, X_t \\sim \\mathcal{N}(A(h)X_t, Q(h))

with matrices :math:A(h) and Q(h) defined by

.. math:: [A(h)]_{ij} = \\mathbb{I}_{i\\leq j} \\frac{h^{j-i}}{(j-i)!}

.. math:: [Q(h)]_{ij} = \\sigma^2 \\frac{h^{2q+1-i-j}}{(2q+1-i-j)!(q-j)!(q-i)!}

The implementation that is used here is more stable than the matrix-exponential
implementation in :meth:super().chapmankolmogorov which is relevant for
combinations of large order :math:q and small steps :math:h.
In these cases even the preconditioning is subject to numerical
instability if the transition matrices :math:A(h)
and :math:Q(h) are computed with matrix exponentials.

"step" variable is obsolent here and is ignored.
"""
mean, covar = randvar.mean, randvar.cov
ah = self._trans_ibm(start, stop)
qh = self._transdiff_ibm(start, stop)
mpred = ah @ mean
crosscov = covar @ ah.T
cpred = ah @ crosscov + qh
return Normal(mpred, cpred), crosscov

def _trans_ibm(self, start, stop):
"""
Computes closed form solution for the transition matrix A(h).
"""
step = stop - start

# This seems like the faster solution compared to fully vectorising.
# I suspect it is because np.math.factorial is much faster than
# scipy.special.factorial
ah_1d = np.diag(np.ones(self.ordint + 1), 0)
for i in range(self.ordint):
offdiagonal = (
step ** (i + 1) / np.math.factorial(i + 1) * np.ones(self.ordint - i)
)
ah_1d += np.diag(offdiagonal, i + 1)

ah = np.kron(np.eye(self.spatialdim), ah_1d)
return self.precond @ ah @ self.invprecond

def _transdiff_ibm(self, start, stop):
"""
Computes closed form solution for the diffusion matrix Q(h).
"""
step = stop - start
indices = np.arange(0, self.ordint + 1)
col_idcs, row_idcs = np.meshgrid(indices, indices)
exponent = 2 * self.ordint + 1 - row_idcs - col_idcs
factorial_rows = factorial(
self.ordint - row_idcs
)  # factorial() handles matrices but is slow(ish)
factorial_cols = factorial(self.ordint - col_idcs)
qh_1d = (
self.diffconst ** 2
* step ** exponent
/ (exponent * factorial_rows * factorial_cols)
)
qh = np.kron(np.eye(self.spatialdim), qh_1d)
return self.precond @ qh @ self.precond.T

def _driftmat_ibm(ordint, spatialdim):
"""
Returns I_d \\otimes F
"""
driftmat_1d = np.diag(np.ones(ordint), 1)
return np.kron(np.eye(spatialdim), driftmat_1d)

[docs]class IOUP(ODEPrior):
"""
IOUP(q) prior:

F = I_d \\otimes F
L = I_d \\otimes L = I_d \\otimes (0, ...,  diffconst**2)
Q = I_d
"""

def __init__(self, ordint, spatialdim, driftspeed, diffconst, precond_step=1.0):
"""
ordint : this is "q"
spatialdim : d
driftspeed : float > 0; (lambda; note that -lambda ("minus"-lambda)
is used in the OU equation!!
diffconst : sigma
"""
self.driftspeed = driftspeed
self.diffconst = diffconst
driftmat = _driftmat_ioup(ordint, spatialdim, self.driftspeed)
dispvec = _dispmat(ordint, spatialdim, diffconst)
super().__init__(driftmat, dispvec, ordint, spatialdim, precond_step)

def _driftmat_ioup(ordint, spatialdim, driftspeed):
"""
Returns I_d \\otimes F
"""
driftmat = np.diag(np.ones(ordint), 1)
driftmat[-1, -1] = -driftspeed
return np.kron(np.eye(spatialdim), driftmat)

[docs]class Matern(ODEPrior):
"""
Matern(q) prior --> Matern process with reg. q+0.5
and hence, with matrix size q+1

F = I_d \\otimes F
L = I_d \\otimes L = I_d \\otimes diffconst*(0, ..., 1)
Q = I_d
"""

def __init__(self, ordint, spatialdim, lengthscale, diffconst, precond_step=1.0):
"""
ordint : this is "q"
spatialdim : d
lengthscale : used as 1/lengthscale, remember that!
diffconst : sigma

"""
self.lengthscale = lengthscale
self.diffconst = diffconst
driftmat = _driftmat_matern(ordint, spatialdim, self.lengthscale)
dispvec = _dispmat(ordint, spatialdim, diffconst)
super().__init__(driftmat, dispvec, ordint, spatialdim, precond_step)

def _driftmat_matern(ordint, spatialdim, lengthscale):
"""
Returns I_d \\otimes F
"""
driftmat = np.diag(np.ones(ordint), 1)
nu = ordint + 0.5
D, lam = ordint + 1, np.sqrt(2 * nu) / lengthscale
driftmat[-1, :] = np.array([-binom(D, i) * lam ** (D - i) for i in range(D)])
return np.kron(np.eye(spatialdim), driftmat)

def _dispmat(ordint, spatialdim, diffconst):
"""
Returns I_D \\otimes L
diffconst = sigma**2
"""
dispvec = diffconst * np.eye(ordint + 1)[:, -1]
return np.kron(np.eye(spatialdim), dispvec).T