Source code for probnum.filtsmooth.utils._merge_regression_problems

"""Utility functions for filtering and smoothing."""


from typing import Tuple

import numpy as np

from probnum import problems

__all__ = ["merge_regression_problems"]


[docs]def merge_regression_problems( regression_problem1: problems.TimeSeriesRegressionProblem, regression_problem2: problems.TimeSeriesRegressionProblem, ) -> Tuple[problems.TimeSeriesRegressionProblem]: """Make a new regression problem out of two other regression problems. Parameters ---------- regression_problem1 : Time series regression problem. regression_problem2 : Time series regression problem. Raises ------ ValueError If the locations in both regression problems are not disjoint. Multiple observations at a single grid point are not supported currently. Returns ------- problem : problems.TimeSeriesRegressionProblem Time series regression problem. Note ---- To merge more than two problems, combine this function with functools.reduce. Examples -------- Create two car-tracking problems with similar parameters and disjoint locations. >>> import probnum.problems.zoo.filtsmooth as filtsmooth_zoo >>> import numpy as np >>> rng = np.random.default_rng(seed=1) >>> prob1, _ = filtsmooth_zoo.car_tracking( ... rng=rng, measurement_variance=2.0, timespan=(0.0, 10.0), step=0.5 ... ) >>> print(prob1.locations) [0. 0.5 1. 1.5 2. 2.5 3. 3.5 4. 4.5 5. 5.5 6. 6.5 7. 7.5 8. 8.5 9. 9.5] >>> prob2, _ = filtsmooth_zoo.car_tracking( ... rng=rng, measurement_variance=2.0, timespan=(0.25, 10.25), step=0.5 ... ) >>> print(prob2.locations) [0.25 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75 5.25 5.75 6.25 6.75 7.25 7.75 8.25 8.75 9.25 9.75] Merge them with merge_regression_problems >>> new_prob = merge_regression_problems(prob1, prob2) >>> print(new_prob.locations) [0. 0.25 0.5 0.75 1. 1.25 1.5 1.75 2. 2.25 2.5 2.75 3. 3.25 3.5 3.75 4. 4.25 4.5 4.75 5. 5.25 5.5 5.75 6. 6.25 6.5 6.75 7. 7.25 7.5 7.75 8. 8.25 8.5 8.75 9. 9.25 9.5 9.75] If you have more than two problems that you want to merge, do this with functools.reduce. >>> import functools >>> prob3, _ = filtsmooth_zoo.car_tracking( ... rng=rng, measurement_variance=2.0, timespan=(0.35, 10.35), step=0.5 ... ) >>> new_prob = functools.reduce( ... merge_regression_problems, ... (prob1, prob2, prob3), ... ) >>> print(new_prob.locations) [0. 0.25 0.35 0.5 0.75 0.85 1. 1.25 1.35 1.5 1.75 1.85 2. 2.25 2.35 2.5 2.75 2.85 3. 3.25 3.35 3.5 3.75 3.85 4. 4.25 4.35 4.5 4.75 4.85 5. 5.25 5.35 5.5 5.75 5.85 6. 6.25 6.35 6.5 6.75 6.85 7. 7.25 7.35 7.5 7.75 7.85 8. 8.25 8.35 8.5 8.75 8.85 9. 9.25 9.35 9.5 9.75 9.85] """ measurement_models1 = np.asarray(regression_problem1.measurement_models) measurement_models2 = np.asarray(regression_problem2.measurement_models) # Some shorthand improves readibility of the inserts below. locs1, data1, sol1 = ( regression_problem1.locations, regression_problem1.observations, regression_problem1.solution, ) locs2, data2, sol2 = ( regression_problem2.locations, regression_problem2.observations, regression_problem2.solution, ) # Merge time locations if np.any(np.in1d(locs1, locs2)): raise ValueError("Regression problems must not share time locations.") new_locs = np.sort(np.concatenate((locs1, locs2))) locs1_in_new_locs = np.searchsorted(new_locs, locs1) locs2_in_new_locs = np.searchsorted(new_locs, locs2) # Merge observations new_num_obs = len(data1) + len(data2) if not data1.shape[1:] == data2.shape[1:]: raise ValueError("The data sets have incompatible dimension.") new_data_shape = (new_num_obs,) + data1.shape[1:] new_data = np.zeros(new_data_shape) new_data[locs1_in_new_locs] = data1 new_data[locs2_in_new_locs] = data2 # Merge solutions. # The resulting problem will only have a solution of BOTH problems have one. if sol1 is not None and sol2 is not None: if not sol1.shape[1:] == sol2.shape[1:]: raise ValueError("The solution arrays have incompatible dimension.") new_sol_shape = (new_num_obs,) + sol1.shape[1:] new_sol = np.zeros(new_sol_shape) new_sol[locs1_in_new_locs] = sol1 new_sol[locs2_in_new_locs] = sol2 else: new_sol = None # Merge measurement models new_measurement_models = np.zeros((new_num_obs,), dtype=object) new_measurement_models[locs1_in_new_locs] = measurement_models1 new_measurement_models[locs2_in_new_locs] = measurement_models2 # Return merged arrays new_regression_problem = problems.TimeSeriesRegressionProblem( locations=new_locs, observations=new_data, measurement_models=new_measurement_models, solution=new_sol, ) return new_regression_problem