Source code for probnum.filtsmooth.particle._particle_filter

"""Particle filters."""

from typing import Iterable, Union

import numpy as np

from probnum import problems, randprocs, randvars
from probnum.filtsmooth import _bayesfiltsmooth
from probnum.filtsmooth.particle import (
from probnum.typing import FloatLike, IntLike

# Terribly long variable names, but internal only, so no worries.
ParticleFilterMeasurementModelArgType = Union[
ParticleFilterLinearisedMeasurementModelArgType = Union[

[docs]def effective_number_of_events(categ_rv: randvars.Categorical) -> float: """Approximate effective number of events in the support of a categorical random variable. In a particle filter, this is used as the effective number of particles which may indicate the need for resampling. """ return 1.0 / np.sum(categ_rv.probabilities**2)
class ParticleFilter(_bayesfiltsmooth.BayesFiltSmooth): r"""Particle filter (PF). Also known as sequential Monte Carlo method. A PF estimates the posterior distribution of a Markov process given noisy, non-linear observations, with a set of particles. The random state of the particle filter is inferred from the random state of the initial random variable. Parameters ---------- prior_process : Prior Gauss-Markov process. importance_distribution : Importance distribution. num_particles : Number of particles to use. rng Random number generator. with_resampling Whether after each step the effective number of particles shall be checked, and, if too low, the state should be resampled. Optional. Default is `True`. resampling_percentage_threshold Percentage threshold for resampling. That is, it is the value :math:`p` such that resampling is performed if :math:`N_{\text{eff}} < p \, N_\text{particles}` holds. Optional. Default is 0.1. If this value is non-positive, resampling is never performed. If it is larger than 1, resampling is performed after each step. """ def __init__( self, prior_process: randprocs.markov.MarkovProcess, importance_distribution: _importance_distributions.ImportanceDistribution, num_particles: IntLike, rng: np.random.Generator, with_resampling: bool = True, resampling_percentage_threshold: FloatLike = 0.1, ) -> None: super().__init__( prior_process=prior_process, ) self.num_particles = num_particles self.importance_distribution = importance_distribution self.rng = rng self.with_resampling = with_resampling self.resampling_percentage_threshold = resampling_percentage_threshold self.min_effective_num_of_particles = ( resampling_percentage_threshold * num_particles )
[docs] def filter( self, regression_problem: problems.TimeSeriesRegressionProblem, ): """Apply particle filtering to a data set. Parameters ---------- regression_problem : Regression problem. Returns ------- posterior Posterior distribution of the filtered output info_dicts list of dictionaries containing filtering information See Also -------- TimeSeriesRegressionProblem: a regression problem data class """ filtered_rvs = [] info_dicts = [] for rv, info in self.filter_generator(regression_problem): filtered_rvs.append(rv) info_dicts.append(info) posterior = _particle_filter_posterior.ParticleFilterPosterior( states=filtered_rvs, locations=regression_problem.locations, ) return posterior, info_dicts
[docs] def filter_generator( self, regression_problem: problems.TimeSeriesRegressionProblem, ): """Apply Particle filtering to a data set. Parameters ---------- regression_problem Regression problem. Raises ------ ValueError If repeating time-points are encountered. Yields ------ curr_rv Filtering random variable at each grid point. info_dict Dictionary containing filtering information See Also -------- TimeSeriesRegressionProblem: a regression problem data class """ # It is not clear at the moment how to handle this. if not np.all(np.diff(regression_problem.locations) > 0): raise ValueError( "Particle filtering cannot handle repeating time points currently." ) initarg = self.prior_process.initarg t_old = self.prior_process.initarg # If the initial time of the prior equals the location of the first data point, # the initial set of particles is overwritten. # Here, we set them to unimportant values. # If the initial time of the prior is NOT the location of the first data point, # we have to sample an initial set of particles. weights = np.ones(self.num_particles) / self.num_particles particle_set_shape = (self.num_particles,) + self.prior_process.initrv.shape if regression_problem.locations[0] == initarg: particles = np.nan * np.ones(particle_set_shape) else: particles = self.prior_process.initrv.sample( rng=self.rng, size=(self.num_particles,) ) for t, data, measmod in regression_problem: dt = t - t_old new_particles = particles.copy() new_weights = weights.copy() # Capture the inputs in a variable for more compact code layout inputs = measmod, particles, weights, data, t_old, dt, t if t == initarg: particle_generator = self._importance_rv_generator_initial_time(*inputs) else: particle_generator = self._importance_rv_generator(*inputs) for idx, (importance_rv, dynamics_rv, p, w) in enumerate( particle_generator ): # Importance sampling step new_particle = importance_rv.sample(rng=self.rng) meas_rv, _ = measmod.forward_realization(new_particle, t=t) loglikelihood = meas_rv.logpdf(data) log_correction_factor = ( self.importance_distribution.log_correction_factor( proposal_state=new_particle, importance_rv=importance_rv, dynamics_rv=dynamics_rv, old_weight=w, ) ) new_weight = np.exp(loglikelihood + log_correction_factor) new_particles[idx] = new_particle new_weights[idx] = new_weight weights = new_weights / np.sum(new_weights) particles = new_particles new_rv = randvars.Categorical(support=particles, probabilities=weights) if self.with_resampling: N = effective_number_of_events(new_rv) if N < self.min_effective_num_of_particles: new_rv = new_rv.resample(rng=self.rng) yield new_rv, {} t_old = t
def _importance_rv_generator_initial_time( self, measmod, particles, weights, data, t_old, dt, t, ): processed = self.importance_distribution.process_initrv_with_data( self.prior_process.initrv, data, t, measurement_model=measmod ) importance_rv, dynamics_rv, _ = processed for p, w in zip(particles, weights): yield importance_rv, dynamics_rv, p, w def _importance_rv_generator( self, measmod, particles, weights, data, t_old, dt, t, ): for p, w in zip(particles, weights): output = self.importance_distribution.generate_importance_rv( p, data, t_old, dt, measurement_model=measmod ) importance_rv, dynamics_rv, _ = output yield importance_rv, dynamics_rv, p, w