merge

murphyk · murphyk · commit 2b8626a54550 · 2022-11-14T16:44:15.000Z
Merge branch 'main' of https://github.com/probml/dynamax into main
diff --git a/dynamax/generalized_gaussian_ssm/inference.py b/dynamax/generalized_gaussian_ssm/inference.py
@@ -2,13 +2,12 @@
 from numpy.polynomial.hermite_e import hermegauss
 from jax import jacfwd, vmap, lax
 import jax.numpy as jnp
-from jax import scipy as jsc
 from jax import lax
 from tensorflow_probability.substrates.jax.distributions import MultivariateNormalFullCovariance as MVN
 from jaxtyping import Array, Float
 from typing import NamedTuple, Optional, Union, Callable
 
-from dynamax.utils.utils import linear_solve
+from dynamax.utils.utils import psd_solve
 from dynamax.generalized_gaussian_ssm.models import ParamsGGSSM
 from dynamax.linear_gaussian_ssm.inference import PosteriorGSSMFiltered, PosteriorGSSMSmoothed
 
@@ -162,7 +161,7 @@ def _step(carry, _):
         S = g_ev(Cov_Y, prior_mean, prior_cov) + g_cov(m_Y, m_Y, prior_mean, prior_cov)
         log_likelihood = emission_dist(yhat, S).log_prob(jnp.atleast_1d(y)).sum()
         C = g_cov(identity_fn, m_Y, prior_mean, prior_cov)
-        K = linear_solve(S, C.T).T
+        K = psd_solve(S, C.T).T
         posterior_mean = prior_mean + K @ (y - yhat)
         posterior_cov = prior_cov - K @ S @ K.T
         return (posterior_mean, posterior_cov), log_likelihood
@@ -195,7 +194,7 @@ def _statistical_linear_regression(mu, Sigma, m, S, C):
         b (D_obs):
         Omega (D_obs, D_obs):
     """
-    A = linear_solve(Sigma.T, C).T
+    A = psd_solve(Sigma.T, C).T
     b = m - A @ mu
     Omega = S - A @ Sigma @ A.T
     return A, b, Omega
@@ -329,7 +328,7 @@ def _step(carry, args):
 
         # Prediction step
         pred_mean, pred_cov, pred_cross = _predict(filtered_mean, filtered_cov, f, Q, u, g_ev, g_cov)
-        G = linear_solve(pred_cov, pred_cross.T).T
+        G = psd_solve(pred_cov, pred_cross.T).T
 
         # Compute smoothed mean and covariance
         smoothed_mean = filtered_mean + G @ (smoothed_mean_next - pred_mean)
diff --git a/dynamax/linear_gaussian_ssm/inference.py b/dynamax/linear_gaussian_ssm/inference.py
@@ -1,6 +1,5 @@
 import jax.numpy as jnp
 import jax.random as jr
-from jax import scipy as jsc
 from jax import lax
 from tensorflow_probability.substrates.jax.distributions import MultivariateNormalFullCovariance as MVN
 from functools import wraps
@@ -9,7 +8,7 @@
 from jaxtyping import Array, Float
 from typing import NamedTuple, Optional, Union
 
-from dynamax.utils.utils import linear_solve
+from dynamax.utils.utils import psd_solve
 from dynamax.parameters import ParameterProperties
 from dynamax.types import PRNGKey, Scalar
 
@@ -171,7 +170,7 @@ def _condition_on(m, P, H, D, d, R, u, y):
     """
     # Compute the Kalman gain
     S = R + H @ P @ H.T
-    K = linear_solve(S, H @ P).T
+    K = psd_solve(S, H @ P).T
     Sigma_cond = P - K @ S @ K.T
     mu_cond = m + K @ (y - D @ u - d - H @ m)
     return mu_cond, Sigma_cond
@@ -324,7 +323,7 @@ def _step(carry, args):
 
         # This is like the Kalman gain but in reverse
         # See Eq 8.11 of Saarka's "Bayesian Filtering and Smoothing"
-        G = linear_solve(Q + F @ filtered_cov @ F.T, F @ filtered_cov).T
+        G = psd_solve(Q + F @ filtered_cov @ F.T, F @ filtered_cov).T
 
         # Compute the smoothed mean and covariance
         smoothed_mean = filtered_mean + G @ (smoothed_mean_next - F @ filtered_mean - B @ u - b)
diff --git a/dynamax/linear_gaussian_ssm/info_inference.py b/dynamax/linear_gaussian_ssm/info_inference.py
@@ -1,11 +1,10 @@
 import jax.numpy as jnp
-from jax import scipy as jsc
 from jax import lax, vmap, value_and_grad
 from jax.scipy.linalg import solve_triangular
 from jaxtyping import Array, Float
 from typing import NamedTuple, Optional
 
-from dynamax.utils.utils import linear_solve
+from dynamax.utils.utils import psd_solve
 
 
 class ParamsLGSSMInfo(NamedTuple):
@@ -60,7 +59,7 @@ def info_to_moment_form(etas, Lambdas):
         means (N,D)
         covs (N,D,D)
     """
-    means = vmap(lambda A, b:linear_solve(A, b))(Lambdas, etas)
+    means = vmap(lambda A, b:psd_solve(A, b))(Lambdas, etas)
     covs = jnp.linalg.inv(Lambdas)
     return means, covs
 
@@ -82,7 +81,7 @@ def _mvn_info_log_prob(eta, Lambda, x):
     """
     D = len(Lambda)
     lp = x.T @ eta - 0.5 * x.T @ Lambda @ x
-    lp += -0.5 * eta.T @ linear_solve(Lambda, eta)
+    lp += -0.5 * eta.T @ psd_solve(Lambda, eta)
     sign, logdet = jnp.linalg.slogdet(Lambda)
     lp += -0.5 * (D * jnp.log(2 * jnp.pi) - sign * logdet)
     return lp
@@ -121,7 +120,7 @@ def _info_predict(eta, Lambda, F, Q_prec, B, u, b):
         eta_pred (D_hid,): predicted precision weighted mean.
         Lambda_pred (D_hid,D_hid): predicted precision.
     """
-    K = linear_solve(Lambda + F.T @ Q_prec @ F, F.T @ Q_prec).T
+    K = psd_solve(Lambda + F.T @ Q_prec @ F, F.T @ Q_prec).T
     I = jnp.eye(F.shape[0])
     ## This version should be more stable than:
     # Lambda_pred = (I - K @ F.T) @ Q_prec
@@ -263,7 +262,7 @@ def _smooth_step(carry, args):
 
         # This is the information form version of the 'reverse' Kalman gain
         # See Eq 8.11 of Saarka's "Bayesian Filtering and Smoothing"
-        G = linear_solve(Q_prec + smoothed_prec_next - pred_prec, Q_prec @ F)
+        G = psd_solve(Q_prec + smoothed_prec_next - pred_prec, Q_prec @ F)
 
         # Compute the smoothed parameter estimates
         smoothed_prec = filtered_prec + F.T @ Q_prec @ (F - G)
@@ -398,18 +397,18 @@ def lds_to_block_tridiag(lds, data, inputs):
     T = len(data)
 
     # diagonal blocks of precision matrix
-    J_diag = jnp.array([jnp.dot(C(t).T, linear_solve(R(t), C(t))) for t in range(T)])
+    J_diag = jnp.array([jnp.dot(C(t).T, psd_solve(R(t), C(t))) for t in range(T)])
     J_diag = J_diag.at[0].add(jnp.linalg.inv(Q0))
-    J_diag = J_diag.at[:-1].add(jnp.array([jnp.dot(A(t).T, linear_solve(Q(t), A(t))) for t in range(T - 1)]))
+    J_diag = J_diag.at[:-1].add(jnp.array([jnp.dot(A(t).T, psd_solve(Q(t), A(t))) for t in range(T - 1)]))
     J_diag = J_diag.at[1:].add(jnp.array([jnp.linalg.inv(Q(t)) for t in range(0, T - 1)]))
 
     # lower diagonal blocks of precision matrix
-    J_lower_diag = jnp.array([-linear_solve(Q(t), A(t)) for t in range(T - 1)])
+    J_lower_diag = jnp.array([-psd_solve(Q(t), A(t)) for t in range(T - 1)])
 
     # linear potential
-    h = jnp.array([jnp.dot(data[t] - D(t) @ inputs[t], linear_solve(R(t), C(t))) for t in range(T)])
-    h = h.at[0].add(linear_solve(Q0, m0))
-    h = h.at[:-1].add(jnp.array([-jnp.dot(A(t).T, linear_solve(Q(t), B(t) @ inputs[t])) for t in range(T - 1)]))
-    h = h.at[1:].add(jnp.array([linear_solve(Q(t), B(t) @ inputs[t]) for t in range(T - 1)]))
+    h = jnp.array([jnp.dot(data[t] - D(t) @ inputs[t], psd_solve(R(t), C(t))) for t in range(T)])
+    h = h.at[0].add(psd_solve(Q0, m0))
+    h = h.at[:-1].add(jnp.array([-jnp.dot(A(t).T, psd_solve(Q(t), B(t) @ inputs[t])) for t in range(T - 1)]))
+    h = h.at[1:].add(jnp.array([psd_solve(Q(t), B(t) @ inputs[t]) for t in range(T - 1)]))
 
     return J_diag, J_lower_diag, h
diff --git a/dynamax/linear_gaussian_ssm/models.py b/dynamax/linear_gaussian_ssm/models.py
@@ -20,7 +20,7 @@
 from dynamax.utils.distributions import MatrixNormalInverseWishart as MNIW
 from dynamax.utils.distributions import NormalInverseWishart as NIW
 from dynamax.utils.distributions import mniw_posterior_update, niw_posterior_update
-from dynamax.utils.utils import pytree_stack, linear_solve
+from dynamax.utils.utils import pytree_stack, psd_solve
 
 class SuffStatsLGSSM(Protocol):
     """A :class:`NamedTuple` with sufficient statistics for LGSSM parameter estimation."""
@@ -339,7 +339,7 @@ def m_step(
 
         def fit_linear_regression(ExxT, ExyT, EyyT, N):
             # Solve a linear regression given sufficient statistics
-            W = linear_solve(ExxT, ExyT).T
+            W = psd_solve(ExxT, ExyT).T
             Sigma = (EyyT - W @ ExyT - ExyT.T @ W.T + W @ ExxT @ W.T) / N
             return W, Sigma
 
diff --git a/dynamax/linear_gaussian_ssm/parallel_inference.py b/dynamax/linear_gaussian_ssm/parallel_inference.py
@@ -7,7 +7,7 @@
 from tensorflow_probability.substrates.jax.distributions import MultivariateNormalFullCovariance as MVN
 from jaxtyping import Array, Float
 
-from dynamax.utils.utils import linear_solve
+from dynamax.utils.utils import psd_solve
 from dynamax.linear_gaussian_ssm.inference import PosteriorGSSMFiltered, PosteriorGSSMSmoothed, ParamsLGSSM
 
 def _make_associative_filtering_elements(params, emissions):
@@ -26,7 +26,7 @@ def _first_filtering_element(params, y):
         m1 = params.initial.mean
         P1 = params.initial.cov
         S1 = H @ P1 @ H.T + R
-        K1 = linear_solve(S1, H @ P1).T
+        K1 = psd_solve(S1, H @ P1).T
 
         A = jnp.zeros_like(F)
         b = m1 + K1 @ (y - H @ m1)
@@ -131,7 +131,7 @@ def _generic_smoothing_element(params, m, P):
 
         Pp = F @ P @ F.T + Q
 
-        E  = linear_solve(Pp, F @ P).T
+        E  = psd_solve(Pp, F @ P).T
         g  = m - E @ F @ m
         L  = P - E @ Pp @ E.T
         return E, g, L
diff --git a/dynamax/nonlinear_gaussian_ssm/inference_ekf.py b/dynamax/nonlinear_gaussian_ssm/inference_ekf.py
@@ -5,7 +5,7 @@
 from jaxtyping import Array, Float
 from typing import Optional
 
-from dynamax.utils.utils import linear_solve
+from dynamax.utils.utils import psd_solve
 from dynamax.nonlinear_gaussian_ssm.models import ParamsNLGSSM
 from dynamax.linear_gaussian_ssm.inference import PosteriorGSSMFiltered, PosteriorGSSMSmoothed
 
@@ -72,7 +72,7 @@ def _step(carry, _):
         prior_mean, prior_cov = carry
         H_x = H(prior_mean, u)
         S = R + H_x @ prior_cov @ H_x.T
-        K = linear_solve(S, H_x @ prior_cov).T
+        K = psd_solve(S, H_x @ prior_cov).T
         posterior_cov = prior_cov - K @ S @ K.T
         posterior_mean = prior_mean + K @ (y - h(prior_mean, u))
         return (posterior_mean, posterior_cov), None
@@ -204,7 +204,7 @@ def _step(carry, args):
         # Prediction step
         m_pred = f(filtered_mean, u)
         S_pred = Q + F_x @ filtered_cov @ F_x.T
-        G = linear_solve(S_pred, F_x @ filtered_cov).T
+        G = psd_solve(S_pred, F_x @ filtered_cov).T
 
         # Compute smoothed mean and covariance
         smoothed_mean = filtered_mean + G @ (smoothed_mean_next - m_pred)
diff --git a/dynamax/nonlinear_gaussian_ssm/inference_ukf.py b/dynamax/nonlinear_gaussian_ssm/inference_ukf.py
@@ -5,7 +5,7 @@
 from jaxtyping import Array, Float
 from typing import NamedTuple, Optional
 
-from dynamax.utils.utils import linear_solve
+from dynamax.utils.utils import psd_solve
 from dynamax.nonlinear_gaussian_ssm.models import  ParamsNLGSSM
 from dynamax.linear_gaussian_ssm.models import PosteriorGSSMFiltered, PosteriorGSSMSmoothed
 
@@ -130,7 +130,7 @@ def _condition_on(m, P, h, R, lamb, w_mean, w_cov, u, y):
     ll = MVN(pred_mean, pred_cov).log_prob(y)
 
     # Compute filtered mean and covariace
-    K = linear_solve(pred_cov, pred_cross.T).T  # Filter gain
+    K = psd_solve(pred_cov, pred_cross.T).T  # Filter gain
     m_cond = m + K @ (y - pred_mean)
     P_cond = P - K @ pred_cov @ K.T
     return ll, m_cond, P_cond
@@ -244,7 +244,7 @@ def _step(carry, args):
 
         # Prediction step
         m_pred, S_pred, S_cross = _predict(filtered_mean, filtered_cov, f, Q, lamb, w_mean, w_cov, u)
-        G = linear_solve(S_pred, S_cross.T).T
+        G = psd_solve(S_pred, S_cross.T).T
 
         # Compute smoothed mean and covariance
         smoothed_mean = filtered_mean + G @ (smoothed_mean_next - m_pred)
diff --git a/dynamax/utils/distributions.py b/dynamax/utils/distributions.py
@@ -6,7 +6,7 @@
 tfd = tfp.distributions
 tfb = tfp.bijectors
 
-from dynamax.utils.utils import linear_solve
+from dynamax.utils.utils import psd_solve
 
 
 class InverseWishart(tfd.TransformedDistribution):
@@ -319,7 +319,7 @@ def mniw_posterior_update(mniw_prior, sufficient_stats):
     Sxx = V_pri + SxxT
     Sxy = SxyT + V_pri @ M_pri.T
     Syy = SyyT + M_pri @ V_pri @ M_pri.T
-    M_pos = linear_solve(Sxx, Sxy).T
+    M_pos = psd_solve(Sxx, Sxy).T
     V_pos = Sxx
     nu_pos = nu_pri + N
     Psi_pos = Psi_pri + Syy - M_pos @ Sxy
diff --git a/dynamax/utils/utils.py b/dynamax/utils/utils.py
@@ -198,6 +198,6 @@ def find_permutation(
     return perm
 
 
-def linear_solve(A,b):
-    """A wrapper for coordinating the linalg solvers used in the library."""
+def psd_solve(A,b):
+    """A wrapper for coordinating the linalg solvers used in the library for psd matrices."""
     return jnp.linalg.solve(A,b)