# Source code for probnum.filtsmooth.utils._merge_regression_problems

```"""Utility functions for filtering and smoothing."""

from typing import Tuple

import numpy as np

from probnum import problems

__all__ = ["merge_regression_problems"]

[docs]def merge_regression_problems(
regression_problem1: problems.TimeSeriesRegressionProblem,
regression_problem2: problems.TimeSeriesRegressionProblem,
) -> Tuple[problems.TimeSeriesRegressionProblem]:
"""Make a new regression problem out of two other regression problems.

Parameters
----------
regression_problem1 :
Time series regression problem.
regression_problem2 :
Time series regression problem.

Raises
------
ValueError
If the locations in both regression problems are not disjoint.
Multiple observations at a single grid point are not supported currently.

Returns
-------
problem : problems.TimeSeriesRegressionProblem
Time series regression problem.

Note
----
To merge more than two problems, combine this function with functools.reduce.

Examples
--------

Create two car-tracking problems with similar parameters and disjoint locations.

>>> import probnum.problems.zoo.filtsmooth as filtsmooth_zoo
>>> import numpy as np
>>> rng = np.random.default_rng(seed=1)
>>> prob1, _ = filtsmooth_zoo.car_tracking(
...     rng=rng, measurement_variance=2.0, timespan=(0.0, 10.0), step=0.5
... )
>>> print(prob1.locations)
[0.  0.5 1.  1.5 2.  2.5 3.  3.5 4.  4.5 5.  5.5 6.  6.5 7.  7.5 8.  8.5
9.  9.5]

>>> prob2, _ = filtsmooth_zoo.car_tracking(
...     rng=rng, measurement_variance=2.0, timespan=(0.25, 10.25), step=0.5
... )
>>> print(prob2.locations)
[0.25 0.75 1.25 1.75 2.25 2.75 3.25 3.75 4.25 4.75 5.25 5.75 6.25 6.75
7.25 7.75 8.25 8.75 9.25 9.75]

Merge them with merge_regression_problems

>>> new_prob = merge_regression_problems(prob1, prob2)
>>> print(new_prob.locations)
[0.   0.25 0.5  0.75 1.   1.25 1.5  1.75 2.   2.25 2.5  2.75 3.   3.25
3.5  3.75 4.   4.25 4.5  4.75 5.   5.25 5.5  5.75 6.   6.25 6.5  6.75
7.   7.25 7.5  7.75 8.   8.25 8.5  8.75 9.   9.25 9.5  9.75]

If you have more than two problems that you want to merge,
do this with functools.reduce.

>>> import functools
>>> prob3, _ = filtsmooth_zoo.car_tracking(
...     rng=rng, measurement_variance=2.0, timespan=(0.35, 10.35), step=0.5
... )
>>> new_prob = functools.reduce(
...     merge_regression_problems,
...     (prob1, prob2, prob3),
... )
>>> print(new_prob.locations)
[0.   0.25 0.35 0.5  0.75 0.85 1.   1.25 1.35 1.5  1.75 1.85 2.   2.25
2.35 2.5  2.75 2.85 3.   3.25 3.35 3.5  3.75 3.85 4.   4.25 4.35 4.5
4.75 4.85 5.   5.25 5.35 5.5  5.75 5.85 6.   6.25 6.35 6.5  6.75 6.85
7.   7.25 7.35 7.5  7.75 7.85 8.   8.25 8.35 8.5  8.75 8.85 9.   9.25
9.35 9.5  9.75 9.85]
"""

measurement_models1 = np.asarray(regression_problem1.measurement_models)
measurement_models2 = np.asarray(regression_problem2.measurement_models)

# Some shorthand improves readibility of the inserts below.
locs1, data1, sol1 = (
regression_problem1.locations,
regression_problem1.observations,
regression_problem1.solution,
)
locs2, data2, sol2 = (
regression_problem2.locations,
regression_problem2.observations,
regression_problem2.solution,
)

# Merge time locations
if np.any(np.in1d(locs1, locs2)):
raise ValueError("Regression problems must not share time locations.")
new_locs = np.sort(np.concatenate((locs1, locs2)))
locs1_in_new_locs = np.searchsorted(new_locs, locs1)
locs2_in_new_locs = np.searchsorted(new_locs, locs2)

# Merge observations
new_num_obs = len(data1) + len(data2)
if not data1.shape[1:] == data2.shape[1:]:
raise ValueError("The data sets have incompatible dimension.")
new_data_shape = (new_num_obs,) + data1.shape[1:]
new_data = np.zeros(new_data_shape)
new_data[locs1_in_new_locs] = data1
new_data[locs2_in_new_locs] = data2

# Merge solutions.
# The resulting problem will only have a solution of BOTH problems have one.
if sol1 is not None and sol2 is not None:
if not sol1.shape[1:] == sol2.shape[1:]:
raise ValueError("The solution arrays have incompatible dimension.")
new_sol_shape = (new_num_obs,) + sol1.shape[1:]
new_sol = np.zeros(new_sol_shape)
new_sol[locs1_in_new_locs] = sol1
new_sol[locs2_in_new_locs] = sol2
else:
new_sol = None

# Merge measurement models
new_measurement_models = np.zeros((new_num_obs,), dtype=object)
new_measurement_models[locs1_in_new_locs] = measurement_models1
new_measurement_models[locs2_in_new_locs] = measurement_models2

# Return merged arrays
new_regression_problem = problems.TimeSeriesRegressionProblem(
locations=new_locs,
observations=new_data,
measurement_models=new_measurement_models,
solution=new_sol,
)
return new_regression_problem
```