@@ -2225,15 +2225,15 @@ get_allele_samples(const tsk_site_t *site, const tsk_bit_array_t *state,
2225
2225
}
2226
2226
2227
2227
static int
2228
- norm_hap_weighted (tsk_size_t state_dim , const double * hap_weights ,
2228
+ norm_hap_weighted (tsk_size_t result_dim , const double * hap_weights ,
2229
2229
tsk_size_t TSK_UNUSED (n_a ), tsk_size_t TSK_UNUSED (n_b ), double * result , void * params )
2230
2230
{
2231
2231
sample_count_stat_params_t args = * (sample_count_stat_params_t * ) params ;
2232
2232
const double * weight_row ;
2233
2233
double n ;
2234
2234
tsk_size_t k ;
2235
2235
2236
- for (k = 0 ; k < state_dim ; k ++ ) {
2236
+ for (k = 0 ; k < result_dim ; k ++ ) {
2237
2237
weight_row = GET_2D_ROW (hap_weights , 3 , k );
2238
2238
n = (double ) args .sample_set_sizes [k ];
2239
2239
// TODO: what to do when n = 0
@@ -2243,12 +2243,38 @@ norm_hap_weighted(tsk_size_t state_dim, const double *hap_weights,
2243
2243
}
2244
2244
2245
2245
static int
2246
- norm_total_weighted (tsk_size_t state_dim , const double * TSK_UNUSED (hap_weights ),
2246
+ norm_hap_weighted_ij (tsk_size_t result_dim , const double * hap_weights ,
2247
+ tsk_size_t TSK_UNUSED (n_a ), tsk_size_t TSK_UNUSED (n_b ), double * result , void * params )
2248
+ {
2249
+ sample_count_stat_params_t args = * (sample_count_stat_params_t * ) params ;
2250
+ const double * weight_row ;
2251
+ double ni , nj , wAB_i , wAB_j ;
2252
+ tsk_id_t i , j ;
2253
+ tsk_size_t k ;
2254
+
2255
+ for (k = 0 ; k < result_dim ; k ++ ) {
2256
+ i = args .set_indexes [2 * k ];
2257
+ j = args .set_indexes [2 * k + 1 ];
2258
+ ni = (double ) args .sample_set_sizes [i ];
2259
+ nj = (double ) args .sample_set_sizes [j ];
2260
+ weight_row = GET_2D_ROW (hap_weights , 3 , i );
2261
+ wAB_i = weight_row [0 ];
2262
+ weight_row = GET_2D_ROW (hap_weights , 3 , j );
2263
+ wAB_j = weight_row [0 ];
2264
+
2265
+ result [k ] = (wAB_i + wAB_j ) / (ni + nj );
2266
+ }
2267
+
2268
+ return 0 ;
2269
+ }
2270
+
2271
+ static int
2272
+ norm_total_weighted (tsk_size_t result_dim , const double * TSK_UNUSED (hap_weights ),
2247
2273
tsk_size_t n_a , tsk_size_t n_b , double * result , void * TSK_UNUSED (params ))
2248
2274
{
2249
2275
tsk_size_t k ;
2250
2276
2251
- for (k = 0 ; k < state_dim ; k ++ ) {
2277
+ for (k = 0 ; k < result_dim ; k ++ ) {
2252
2278
result [k ] = 1 / (double ) (n_a * n_b );
2253
2279
}
2254
2280
return 0 ;
@@ -2268,9 +2294,6 @@ get_all_samples_bits(tsk_bit_array_t *all_samples, tsk_size_t n)
2268
2294
}
2269
2295
}
2270
2296
2271
- typedef int norm_func_t (tsk_size_t state_dim , const double * hap_weights , tsk_size_t n_a ,
2272
- tsk_size_t n_b , double * result , void * params );
2273
-
2274
2297
static int
2275
2298
compute_general_two_site_stat_result (const tsk_bit_array_t * site_a_state ,
2276
2299
const tsk_bit_array_t * site_b_state , tsk_size_t num_a_alleles ,
@@ -2290,14 +2313,15 @@ compute_general_two_site_stat_result(const tsk_bit_array_t *site_a_state,
2290
2313
// a2 [s1, s2, s3] [s1, s2, s3] [s1, s2, s3]
2291
2314
// a3 [s1, s2, s3] [s1, s2, s3] [s1, s2, s3]
2292
2315
tsk_size_t k , mut_a , mut_b ;
2293
- tsk_size_t row_len = num_b_alleles * state_dim ;
2316
+ tsk_size_t result_row_len = num_b_alleles * result_dim ;
2294
2317
tsk_size_t w_A = 0 , w_B = 0 , w_AB = 0 ;
2295
2318
uint8_t polarised_val = polarised ? 1 : 0 ;
2296
2319
double * hap_weight_row ;
2297
2320
double * result_tmp_row ;
2298
2321
double * weights = tsk_malloc (3 * state_dim * sizeof (* weights ));
2299
- double * norm = tsk_malloc (state_dim * sizeof (* norm ));
2300
- double * result_tmp = tsk_malloc (row_len * num_a_alleles * sizeof (* result_tmp ));
2322
+ double * norm = tsk_malloc (result_dim * sizeof (* norm ));
2323
+ double * result_tmp
2324
+ = tsk_malloc (result_row_len * num_a_alleles * sizeof (* result_tmp ));
2301
2325
2302
2326
tsk_memset (& ss_A_samples , 0 , sizeof (ss_A_samples ));
2303
2327
tsk_memset (& ss_B_samples , 0 , sizeof (ss_B_samples ));
@@ -2327,7 +2351,7 @@ compute_general_two_site_stat_result(const tsk_bit_array_t *site_a_state,
2327
2351
}
2328
2352
2329
2353
for (mut_a = polarised_val ; mut_a < num_a_alleles ; mut_a ++ ) {
2330
- result_tmp_row = GET_2D_ROW (result_tmp , row_len , mut_a );
2354
+ result_tmp_row = GET_2D_ROW (result_tmp , result_row_len , mut_a );
2331
2355
for (mut_b = polarised_val ; mut_b < num_b_alleles ; mut_b ++ ) {
2332
2356
tsk_bit_array_get_row (site_a_state , mut_a , & A_samples );
2333
2357
tsk_bit_array_get_row (site_b_state , mut_b , & B_samples );
@@ -2352,15 +2376,15 @@ compute_general_two_site_stat_result(const tsk_bit_array_t *site_a_state,
2352
2376
if (ret != 0 ) {
2353
2377
goto out ;
2354
2378
}
2355
- ret = norm_f (state_dim , weights , num_a_alleles - polarised_val ,
2379
+ ret = norm_f (result_dim , weights , num_a_alleles - polarised_val ,
2356
2380
num_b_alleles - polarised_val , norm , f_params );
2357
2381
if (ret != 0 ) {
2358
2382
goto out ;
2359
2383
}
2360
- for (k = 0 ; k < state_dim ; k ++ ) {
2384
+ for (k = 0 ; k < result_dim ; k ++ ) {
2361
2385
result [k ] += result_tmp_row [k ] * norm [k ];
2362
2386
}
2363
- result_tmp_row += state_dim ; // Advance to the next column
2387
+ result_tmp_row += result_dim ; // Advance to the next column
2364
2388
}
2365
2389
}
2366
2390
@@ -2538,8 +2562,8 @@ tsk_treeseq_two_site_count_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,
2538
2562
get_site_row_col_indices (
2539
2563
n_rows , row_sites , n_cols , col_sites , sites , & n_sites , row_idx , col_idx );
2540
2564
2541
- // We rely on n_sites to allocate these arrays, they're initialized to NULL for safe
2542
- // deallocation if the previous allocation fails
2565
+ // We rely on n_sites to allocate these arrays, which are initialized
2566
+ // to NULL for safe deallocation if the previous allocation fails
2543
2567
num_alleles = tsk_malloc (n_sites * sizeof (* num_alleles ));
2544
2568
site_offsets = tsk_malloc (n_sites * sizeof (* site_offsets ));
2545
2569
if (num_alleles == NULL || site_offsets == NULL ) {
@@ -3195,7 +3219,7 @@ tsk_treeseq_two_branch_count_stat(const tsk_treeseq_t *self, tsk_size_t state_di
3195
3219
return ret ;
3196
3220
}
3197
3221
3198
- static int
3222
+ int
3199
3223
tsk_treeseq_two_locus_count_stat (const tsk_treeseq_t * self , tsk_size_t num_sample_sets ,
3200
3224
const tsk_size_t * sample_set_sizes , const tsk_id_t * sample_sets ,
3201
3225
tsk_size_t result_dim , const tsk_id_t * set_indexes , general_stat_func_t * f ,
@@ -3209,7 +3233,6 @@ tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sampl
3209
3233
tsk_bit_array_t sample_sets_bits ;
3210
3234
bool stat_site = !!(options & TSK_STAT_SITE );
3211
3235
bool stat_branch = !!(options & TSK_STAT_BRANCH );
3212
- // double default_windows[] = { 0, self->tables->sequence_length };
3213
3236
tsk_size_t state_dim = num_sample_sets ;
3214
3237
sample_count_stat_params_t f_params = { .sample_sets = sample_sets ,
3215
3238
.num_sample_sets = num_sample_sets ,
@@ -3232,17 +3255,15 @@ tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sampl
3232
3255
ret = tsk_trace_error (TSK_ERR_MULTIPLE_STAT_MODES );
3233
3256
goto out ;
3234
3257
}
3235
- // TODO: impossible until we implement branch/windows
3236
- // if (result_dim < 1) {
3237
- // ret = tsk_trace_error(TSK_ERR_BAD_RESULT_DIMS);
3238
- // goto out;
3239
- // }
3240
3258
ret = tsk_treeseq_check_sample_sets (
3241
3259
self , num_sample_sets , sample_set_sizes , sample_sets );
3242
3260
if (ret != 0 ) {
3243
3261
goto out ;
3244
3262
}
3245
- tsk_bug_assert (state_dim > 0 );
3263
+ if (result_dim < 1 ) {
3264
+ ret = tsk_trace_error (TSK_ERR_BAD_RESULT_DIMS );
3265
+ goto out ;
3266
+ }
3246
3267
ret = sample_sets_to_bit_array (
3247
3268
self , sample_set_sizes , sample_sets , num_sample_sets , & sample_sets_bits );
3248
3269
if (ret != 0 ) {
@@ -4781,6 +4802,200 @@ tsk_treeseq_f2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
4781
4802
return ret ;
4782
4803
}
4783
4804
4805
+ static int
4806
+ D2_ij_summary_func (tsk_size_t TSK_UNUSED (state_dim ), const double * state ,
4807
+ tsk_size_t result_dim , double * result , void * params )
4808
+ {
4809
+ sample_count_stat_params_t args = * (sample_count_stat_params_t * ) params ;
4810
+ const double * state_row ;
4811
+ double n ;
4812
+ tsk_size_t k ;
4813
+ tsk_id_t i , j ;
4814
+ double p_A , p_B , p_AB , p_Ab , p_aB , D_i , D_j ;
4815
+
4816
+ for (k = 0 ; k < result_dim ; k ++ ) {
4817
+ i = args .set_indexes [2 * k ];
4818
+ j = args .set_indexes [2 * k + 1 ];
4819
+
4820
+ n = (double ) args .sample_set_sizes [i ];
4821
+ state_row = GET_2D_ROW (state , 3 , i );
4822
+ p_AB = state_row [0 ] / n ;
4823
+ p_Ab = state_row [1 ] / n ;
4824
+ p_aB = state_row [2 ] / n ;
4825
+ p_A = p_AB + p_Ab ;
4826
+ p_B = p_AB + p_aB ;
4827
+ D_i = p_AB - (p_A * p_B );
4828
+
4829
+ n = (double ) args .sample_set_sizes [j ];
4830
+ state_row = GET_2D_ROW (state , 3 , j );
4831
+ p_AB = state_row [0 ] / n ;
4832
+ p_Ab = state_row [1 ] / n ;
4833
+ p_aB = state_row [2 ] / n ;
4834
+ p_A = p_AB + p_Ab ;
4835
+ p_B = p_AB + p_aB ;
4836
+ D_j = p_AB - (p_A * p_B );
4837
+
4838
+ result [k ] = D_i * D_j ;
4839
+ }
4840
+
4841
+ return 0 ;
4842
+ }
4843
+
4844
+ int
4845
+ tsk_treeseq_D2_ij (const tsk_treeseq_t * self , tsk_size_t num_sample_sets ,
4846
+ const tsk_size_t * sample_set_sizes , const tsk_id_t * sample_sets ,
4847
+ tsk_size_t num_index_tuples , const tsk_id_t * index_tuples , tsk_size_t num_rows ,
4848
+ const tsk_id_t * row_sites , const double * row_positions , tsk_size_t num_cols ,
4849
+ const tsk_id_t * col_sites , const double * col_positions , tsk_flags_t options ,
4850
+ double * result )
4851
+ {
4852
+ int ret = 0 ;
4853
+ ret = check_sample_stat_inputs (num_sample_sets , 2 , num_index_tuples , index_tuples );
4854
+ if (ret != 0 ) {
4855
+ goto out ;
4856
+ }
4857
+ ret = tsk_treeseq_two_locus_count_stat (self , num_sample_sets , sample_set_sizes ,
4858
+ sample_sets , num_index_tuples , index_tuples , D2_ij_summary_func ,
4859
+ norm_total_weighted , num_rows , row_sites , row_positions , num_cols , col_sites ,
4860
+ col_positions , options , result );
4861
+ out :
4862
+ return ret ;
4863
+ }
4864
+
4865
+ static int
4866
+ D2_ij_unbiased_summary_func (tsk_size_t TSK_UNUSED (state_dim ), const double * state ,
4867
+ tsk_size_t result_dim , double * result , void * params )
4868
+ {
4869
+ sample_count_stat_params_t args = * (sample_count_stat_params_t * ) params ;
4870
+ const double * state_row ;
4871
+ tsk_size_t k ;
4872
+ tsk_id_t i , j ;
4873
+ double n_i , n_j ;
4874
+ double w_AB_i , w_Ab_i , w_aB_i , w_ab_i ;
4875
+ double w_AB_j , w_Ab_j , w_aB_j , w_ab_j ;
4876
+
4877
+ for (k = 0 ; k < result_dim ; k ++ ) {
4878
+ i = args .set_indexes [2 * k ];
4879
+ j = args .set_indexes [2 * k + 1 ];
4880
+ if (i == j ) {
4881
+ // We require disjoint sample sets because we test equality here
4882
+ n_i = (double ) args .sample_set_sizes [i ];
4883
+ state_row = GET_2D_ROW (state , 3 , i );
4884
+ w_AB_i = state_row [0 ];
4885
+ w_Ab_i = state_row [1 ];
4886
+ w_aB_i = state_row [2 ];
4887
+ w_ab_i = n_i - (w_AB_i + w_Ab_i + w_aB_i );
4888
+ result [k ] = (w_AB_i * (w_AB_i - 1 ) * w_ab_i * (w_ab_i - 1 )
4889
+ + w_Ab_i * (w_Ab_i - 1 ) * w_aB_i * (w_aB_i - 1 )
4890
+ - 2 * w_AB_i * w_Ab_i * w_aB_i * w_ab_i )
4891
+ / n_i / (n_i - 1 ) / (n_i - 2 ) / (n_i - 3 );
4892
+ }
4893
+
4894
+ else {
4895
+ n_i = (double ) args .sample_set_sizes [i ];
4896
+ state_row = GET_2D_ROW (state , 3 , i );
4897
+ w_AB_i = state_row [0 ];
4898
+ w_Ab_i = state_row [1 ];
4899
+ w_aB_i = state_row [2 ];
4900
+ w_ab_i = n_i - (w_AB_i + w_Ab_i + w_aB_i );
4901
+
4902
+ n_j = (double ) args .sample_set_sizes [j ];
4903
+ state_row = GET_2D_ROW (state , 3 , j );
4904
+ w_AB_j = state_row [0 ];
4905
+ w_Ab_j = state_row [1 ];
4906
+ w_aB_j = state_row [2 ];
4907
+ w_ab_j = n_j - (w_AB_j + w_Ab_j + w_aB_j );
4908
+
4909
+ result [k ] = (w_Ab_i * w_aB_i - w_AB_i * w_ab_i )
4910
+ * (w_Ab_j * w_aB_j - w_AB_j * w_ab_j ) / n_i / (n_i - 1 ) / n_j
4911
+ / (n_j - 1 );
4912
+ }
4913
+ }
4914
+
4915
+ return 0 ;
4916
+ }
4917
+
4918
+ int
4919
+ tsk_treeseq_D2_ij_unbiased (const tsk_treeseq_t * self , tsk_size_t num_sample_sets ,
4920
+ const tsk_size_t * sample_set_sizes , const tsk_id_t * sample_sets ,
4921
+ tsk_size_t num_index_tuples , const tsk_id_t * index_tuples , tsk_size_t num_rows ,
4922
+ const tsk_id_t * row_sites , const double * row_positions , tsk_size_t num_cols ,
4923
+ const tsk_id_t * col_sites , const double * col_positions , tsk_flags_t options ,
4924
+ double * result )
4925
+ {
4926
+ int ret = 0 ;
4927
+ ret = check_sample_stat_inputs (num_sample_sets , 2 , num_index_tuples , index_tuples );
4928
+ if (ret != 0 ) {
4929
+ goto out ;
4930
+ }
4931
+ ret = tsk_treeseq_two_locus_count_stat (self , num_sample_sets , sample_set_sizes ,
4932
+ sample_sets , num_index_tuples , index_tuples , D2_ij_unbiased_summary_func ,
4933
+ norm_total_weighted , num_rows , row_sites , row_positions , num_cols , col_sites ,
4934
+ col_positions , options , result );
4935
+ out :
4936
+ return ret ;
4937
+ }
4938
+
4939
+ static int
4940
+ r2_ij_summary_func (tsk_size_t TSK_UNUSED (state_dim ), const double * state ,
4941
+ tsk_size_t result_dim , double * result , void * params )
4942
+ {
4943
+ sample_count_stat_params_t args = * (sample_count_stat_params_t * ) params ;
4944
+ const double * state_row ;
4945
+ tsk_size_t k ;
4946
+ tsk_id_t i , j ;
4947
+ double n , pAB , pAb , paB , pA , pB , D_i , D_j , denom_i , denom_j ;
4948
+
4949
+ for (k = 0 ; k < result_dim ; k ++ ) {
4950
+ i = args .set_indexes [2 * k ];
4951
+ j = args .set_indexes [2 * k + 1 ];
4952
+
4953
+ n = (double ) args .sample_set_sizes [i ];
4954
+ state_row = GET_2D_ROW (state , 3 , i );
4955
+ pAB = state_row [0 ] / n ;
4956
+ pAb = state_row [1 ] / n ;
4957
+ paB = state_row [2 ] / n ;
4958
+ pA = pAB + pAb ;
4959
+ pB = pAB + paB ;
4960
+ D_i = pAB - (pA * pB );
4961
+ denom_i = sqrt (pA * (1 - pA ) * pB * (1 - pB ));
4962
+
4963
+ n = (double ) args .sample_set_sizes [j ];
4964
+ state_row = GET_2D_ROW (state , 3 , j );
4965
+ pAB = state_row [0 ] / n ;
4966
+ pAb = state_row [1 ] / n ;
4967
+ paB = state_row [2 ] / n ;
4968
+ pA = pAB + pAb ;
4969
+ pB = pAB + paB ;
4970
+ D_j = pAB - (pA * pB );
4971
+ denom_j = sqrt (pA * (1 - pA ) * pB * (1 - pB ));
4972
+
4973
+ result [k ] = (D_i * D_j ) / (denom_i * denom_j );
4974
+ }
4975
+ return 0 ;
4976
+ }
4977
+
4978
+ int
4979
+ tsk_treeseq_r2_ij (const tsk_treeseq_t * self , tsk_size_t num_sample_sets ,
4980
+ const tsk_size_t * sample_set_sizes , const tsk_id_t * sample_sets ,
4981
+ tsk_size_t num_index_tuples , const tsk_id_t * index_tuples , tsk_size_t num_rows ,
4982
+ const tsk_id_t * row_sites , const double * row_positions , tsk_size_t num_cols ,
4983
+ const tsk_id_t * col_sites , const double * col_positions , tsk_flags_t options ,
4984
+ double * result )
4985
+ {
4986
+ int ret = 0 ;
4987
+ ret = check_sample_stat_inputs (num_sample_sets , 2 , num_index_tuples , index_tuples );
4988
+ if (ret != 0 ) {
4989
+ goto out ;
4990
+ }
4991
+ ret = tsk_treeseq_two_locus_count_stat (self , num_sample_sets , sample_set_sizes ,
4992
+ sample_sets , num_index_tuples , index_tuples , r2_ij_summary_func ,
4993
+ norm_hap_weighted_ij , num_rows , row_sites , row_positions , num_cols , col_sites ,
4994
+ col_positions , options , result );
4995
+ out :
4996
+ return ret ;
4997
+ }
4998
+
4784
4999
/***********************************
4785
5000
* Three way stats
4786
5001
***********************************/
0 commit comments