Implement compute_state_probability_matrix_equation1

szhan · szhan · commit 589e2d9c8d20 · 2023-08-30T21:02:11.000+01:00
diff --git a/python/tests/test_beagle.py b/python/tests/test_beagle.py
@@ -367,6 +367,8 @@ def compute_state_probability_matrix_equation1(fm, bm, ref_h, query_h, rho, mu):
     Implement the HMM forward-backward algorithm following Equation 1 of BB2016.
     This is simpler than faithfully reimplementing the original BEAGLE 4.1 algorithm.
 
+    See the docstring of `compute_state_probability_matrix` for more details.
+
     :param numpy.ndarray fm: Forward probability matrix.
     :param numpy.ndarray bm: Backward probability matrix.
     :param numpy.ndarray ref_h: Reference haplotypes.
@@ -376,7 +378,24 @@ def compute_state_probability_matrix_equation1(fm, bm, ref_h, query_h, rho, mu):
     :return: HMM state probability matrix.
     :rtype: numpy.ndarray
     """
-    pass
+    m = ref_h.shape[0]
+    h = ref_h.shape[1]
+    assert m == len(query_h)
+    assert m == len(rho)
+    assert m == len(mu)
+    assert 0 <= np.min(rho) and np.max(rho) <= 1
+    # BEAGLE caps mismatch probabilities at 0.5.
+    assert 0 <= np.min(mu) and np.max(mu) <= 0.5
+    assert fm.shape == (m, h)
+    assert bm.shape == (m, h)
+    # Check all biallelic sites
+    assert np.all(np.isin(np.unique(ref_h), [0, 1]))
+    assert np.all(np.isin(np.unique(query_h), [-1, 0, 1]))
+    sm = np.zeros((m, h), dtype=np.float64)
+    for i in np.arange(m - 1, 0, -1):
+        for j in np.arange(h):
+            sm[i, j] = fm[i, j] * bm[i, j]
+    return sm
 
 
 def interpolate_allele_probabilities_equation1(sm, ref_h, genotyped_pos, imputed_pos):