1
- # Simulation
2
1
import copy
3
2
import itertools
4
3
14
13
REF_HOM_OBS_HET = 1
15
14
REF_HET_OBS_HOM = 2
16
15
16
+ MISSING = - 1
17
+
17
18
18
19
def mirror_coordinates (ts ):
19
20
"""
@@ -411,6 +412,7 @@ def update_probabilities(self, site, genotype_state):
411
412
]
412
413
413
414
query_is_het = genotype_state == 1
415
+ query_is_missing = genotype_state == MISSING
414
416
415
417
for st1 in T :
416
418
u1 = st1 .tree_node
@@ -444,6 +446,7 @@ def update_probabilities(self, site, genotype_state):
444
446
match ,
445
447
template_is_het ,
446
448
query_is_het ,
449
+ query_is_missing ,
447
450
)
448
451
449
452
# This will ensure that allelic_state[:n] is filled
@@ -561,7 +564,14 @@ def compute_normalisation_factor_dict(self):
561
564
raise NotImplementedError ()
562
565
563
566
def compute_next_probability_dict (
564
- self , site_id , p_last , inner_summation , is_match , template_is_het , query_is_het
567
+ self ,
568
+ site_id ,
569
+ p_last ,
570
+ inner_summation ,
571
+ is_match ,
572
+ template_is_het ,
573
+ query_is_het ,
574
+ query_is_missing ,
565
575
):
566
576
raise NotImplementedError ()
567
577
@@ -670,41 +680,45 @@ def compute_next_probability_dict(
670
680
is_match ,
671
681
template_is_het ,
672
682
query_is_het ,
683
+ query_is_missing ,
673
684
):
674
685
rho = self .rho [site_id ]
675
686
mu = self .mu [site_id ]
676
687
n = self .ts .num_samples
677
688
678
- template_is_hom = np .logical_not (template_is_het )
679
- query_is_hom = np .logical_not (query_is_het )
680
-
681
- EQUAL_BOTH_HOM = np .logical_and (
682
- np .logical_and (is_match , template_is_hom ), query_is_hom
683
- )
684
- UNEQUAL_BOTH_HOM = np .logical_and (
685
- np .logical_and (np .logical_not (is_match ), template_is_hom ), query_is_hom
686
- )
687
- BOTH_HET = np .logical_and (template_is_het , query_is_het )
688
- REF_HOM_OBS_HET = np .logical_and (template_is_hom , query_is_het )
689
- REF_HET_OBS_HOM = np .logical_and (template_is_het , query_is_hom )
690
-
691
689
p_t = (
692
690
(rho / n ) ** 2
693
691
+ ((1 - rho ) * (rho / n )) * inner_normalisation_factor
694
692
+ (1 - rho ) ** 2 * p_last
695
693
)
696
- p_e = (
697
- EQUAL_BOTH_HOM * (1 - mu ) ** 2
698
- + UNEQUAL_BOTH_HOM * (mu ** 2 )
699
- + REF_HOM_OBS_HET * (2 * mu * (1 - mu ))
700
- + REF_HET_OBS_HOM * (mu * (1 - mu ))
701
- + BOTH_HET * ((1 - mu ) ** 2 + mu ** 2 )
702
- )
694
+
695
+ if query_is_missing :
696
+ p_e = 1
697
+ else :
698
+ query_is_hom = np .logical_not (query_is_het )
699
+ template_is_hom = np .logical_not (template_is_het )
700
+
701
+ equal_both_hom = np .logical_and (
702
+ np .logical_and (is_match , template_is_hom ), query_is_hom
703
+ )
704
+ unequal_both_hom = np .logical_and (
705
+ np .logical_and (np .logical_not (is_match ), template_is_hom ), query_is_hom
706
+ )
707
+ both_het = np .logical_and (template_is_het , query_is_het )
708
+ ref_hom_obs_het = np .logical_and (template_is_hom , query_is_het )
709
+ ref_het_obs_hom = np .logical_and (template_is_het , query_is_hom )
710
+
711
+ p_e = (
712
+ equal_both_hom * (1 - mu ) ** 2
713
+ + unequal_both_hom * (mu ** 2 )
714
+ + ref_hom_obs_het * (2 * mu * (1 - mu ))
715
+ + ref_het_obs_hom * (mu * (1 - mu ))
716
+ + both_het * ((1 - mu ) ** 2 + mu ** 2 )
717
+ )
703
718
704
719
return p_t * p_e
705
720
706
721
707
- # DEV: Sort this
708
722
class BackwardAlgorithm (LsHmmAlgorithm ):
709
723
"""Runs the Li and Stephens forward algorithm."""
710
724
@@ -737,29 +751,35 @@ def compute_next_probability_dict(
737
751
is_match ,
738
752
template_is_het ,
739
753
query_is_het ,
754
+ query_is_missing ,
740
755
):
741
756
mu = self .mu [site_id ]
742
757
743
758
template_is_hom = np .logical_not (template_is_het )
744
- query_is_hom = np .logical_not (query_is_het )
745
759
746
- EQUAL_BOTH_HOM = np .logical_and (
747
- np .logical_and (is_match , template_is_hom ), query_is_hom
748
- )
749
- UNEQUAL_BOTH_HOM = np .logical_and (
750
- np .logical_and (np .logical_not (is_match ), template_is_hom ), query_is_hom
751
- )
752
- BOTH_HET = np .logical_and (template_is_het , query_is_het )
753
- REF_HOM_OBS_HET = np .logical_and (template_is_hom , query_is_het )
754
- REF_HET_OBS_HOM = np .logical_and (template_is_het , query_is_hom )
755
-
756
- p_e = (
757
- EQUAL_BOTH_HOM * (1 - mu ) ** 2
758
- + UNEQUAL_BOTH_HOM * (mu ** 2 )
759
- + REF_HOM_OBS_HET * (2 * mu * (1 - mu ))
760
- + REF_HET_OBS_HOM * (mu * (1 - mu ))
761
- + BOTH_HET * ((1 - mu ) ** 2 + mu ** 2 )
762
- )
760
+ if query_is_missing :
761
+ p_e = 1
762
+ else :
763
+ query_is_hom = np .logical_not (query_is_het )
764
+
765
+ equal_both_hom = np .logical_and (
766
+ np .logical_and (is_match , template_is_hom ), query_is_hom
767
+ )
768
+ unequal_both_hom = np .logical_and (
769
+ np .logical_and (np .logical_not (is_match ), template_is_hom ), query_is_hom
770
+ )
771
+ both_het = np .logical_and (template_is_het , query_is_het )
772
+ ref_hom_obs_het = np .logical_and (template_is_hom , query_is_het )
773
+ ref_het_obs_hom = np .logical_and (template_is_het , query_is_hom )
774
+
775
+ p_e = (
776
+ equal_both_hom * (1 - mu ) ** 2
777
+ + unequal_both_hom * (mu ** 2 )
778
+ + ref_hom_obs_het * (2 * mu * (1 - mu ))
779
+ + ref_het_obs_hom * (mu * (1 - mu ))
780
+ + both_het * ((1 - mu ) ** 2 + mu ** 2 )
781
+ )
782
+
763
783
return p_next * p_e
764
784
765
785
@@ -797,18 +817,33 @@ def example_genotypes(self, ts):
797
817
s = H [:, 0 ].reshape (1 , H .shape [0 ]) + H [:, 1 ].reshape (1 , H .shape [0 ])
798
818
H = H [:, 2 :]
799
819
820
+ genotypes = [
821
+ s ,
822
+ H [:, - 1 ].reshape (1 , H .shape [0 ]) + H [:, - 2 ].reshape (1 , H .shape [0 ]),
823
+ ]
824
+
825
+ s_tmp = s .copy ()
826
+ s_tmp [0 , - 1 ] = MISSING
827
+ genotypes .append (s_tmp )
828
+ s_tmp = s .copy ()
829
+ s_tmp [0 , ts .num_sites // 2 ] = MISSING
830
+ genotypes .append (s_tmp )
831
+ s_tmp = s .copy ()
832
+ s_tmp [0 , :] = MISSING
833
+ genotypes .append (s_tmp )
834
+
800
835
m = ts .get_num_sites ()
801
836
n = H .shape [1 ]
802
837
803
838
G = np .zeros ((m , n , n ))
804
839
for i in range (m ):
805
840
G [i , :, :] = np .add .outer (H [i , :], H [i , :])
806
841
807
- return H , G , s
842
+ return H , G , genotypes
808
843
809
844
def example_parameters_genotypes (self , ts , seed = 42 ):
810
845
np .random .seed (seed )
811
- H , G , s = self .example_genotypes (ts )
846
+ H , G , genotypes = self .example_genotypes (ts )
812
847
n = H .shape [1 ]
813
848
m = ts .get_num_sites ()
814
849
@@ -819,13 +854,16 @@ def example_parameters_genotypes(self, ts, seed=42):
819
854
820
855
e = self .genotype_emission (mu , m )
821
856
822
- yield n , m , G , s , e , r , mu
857
+ for s in genotypes :
858
+ yield n , m , G , s , e , r , mu
823
859
824
860
# Mixture of random and extremes
825
861
rs = [np .zeros (m ) + 0.999 , np .zeros (m ) + 1e-6 , np .random .rand (m )]
826
862
mus = [np .zeros (m ) + 0.33 , np .zeros (m ) + 1e-6 , np .random .rand (m ) * 0.33 ]
827
863
828
- for r , mu in itertools .product (rs , mus ):
864
+ e = self .genotype_emission (mu , m )
865
+
866
+ for s , r , mu in itertools .product (genotypes , rs , mus ):
829
867
r [0 ] = 0
830
868
e = self .genotype_emission (mu , m )
831
869
yield n , m , G , s , e , r , mu
0 commit comments