Skip to content

Commit 57594f4

Browse files
C changes to enable non-zero ancestral state
1 parent 1d8f79f commit 57594f4

File tree

6 files changed

+46
-16
lines changed

6 files changed

+46
-16
lines changed

lib/ancestor_matcher.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,12 +257,12 @@ static inline void
257257
ancestor_matcher_set_allelic_state(
258258
ancestor_matcher_t *self, const tsk_id_t site, allele_t *restrict allelic_state)
259259
{
260+
const tree_sequence_builder_t *tsb = self->tree_sequence_builder;
260261
mutation_list_node_t *mutation;
261262

262-
/* FIXME assuming that 0 is always the ancestral state */
263-
allelic_state[0] = 0;
263+
allelic_state[0] = tsb->sites.ancestral_state[site];
264264

265-
for (mutation = self->tree_sequence_builder->sites.mutations[site]; mutation != NULL;
265+
for (mutation = tsb->sites.mutations[site]; mutation != NULL;
266266
mutation = mutation->next) {
267267
allelic_state[mutation->node] = mutation->derived_state;
268268
}

lib/err.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ tsi_strerror(int err)
9393
case TSI_ERR_BAD_MUTATION_DUPLICATE_NODE:
9494
ret = "Bad mutation information: mutation already exists for this node.";
9595
break;
96+
case TSI_ERR_BAD_ANCESTRAL_STATE:
97+
ret = "Bad derived state for site";
98+
break;
9699
case TSI_ERR_BAD_NUM_SAMPLES:
97100
ret = "Must have at least 2 samples.";
98101
break;

lib/err.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#define TSI_ERR_BAD_FOCAL_SITE -21
2626
#define TSI_ERR_MATCH_IMPOSSIBLE_EXTREME_MUTATION_PROBA -22
2727
#define TSI_ERR_MATCH_IMPOSSIBLE_ZERO_RECOMB_PRECISION -23
28+
#define TSI_ERR_BAD_ANCESTRAL_STATE -24
2829
// clang-format on
2930

3031
#ifdef __GNUC__

lib/tests/tests.c

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ verify_restore_tsb(tree_sequence_builder_t *tsb, tsk_table_collection_t *tables)
153153
CU_ASSERT_EQUAL_FATAL(ret, 0);
154154

155155
ret = tree_sequence_builder_alloc(
156-
&other_tsb, tsb->num_sites, tsb->sites.num_alleles, 1, 1, 0);
156+
&other_tsb, tsb->num_sites, tsb->sites.num_alleles, NULL, 1, 1, 0);
157157
CU_ASSERT_EQUAL_FATAL(ret, 0);
158158

159159
ret = tree_sequence_builder_restore_nodes(
@@ -361,7 +361,7 @@ run_random_data(size_t num_samples, size_t num_sites, int seed,
361361
CU_ASSERT_FATAL(num_samples >= 2);
362362
ret = ancestor_builder_alloc(&ancestor_builder, num_samples, num_sites, 0);
363363
CU_ASSERT_EQUAL_FATAL(ret, 0);
364-
ret = tree_sequence_builder_alloc(&tsb, num_sites, NULL, 1, 1, 0);
364+
ret = tree_sequence_builder_alloc(&tsb, num_sites, NULL, NULL, 1, 1, 0);
365365
CU_ASSERT_EQUAL_FATAL(ret, 0);
366366
ret = ancestor_matcher_alloc(&ancestor_matcher, &tsb, recombination_rates,
367367
mismatch_rates, DBL_MIN, TSI_EXTENDED_CHECKS);
@@ -528,7 +528,7 @@ test_matching_one_site(void)
528528
size_t num_edges;
529529
tsk_id_t *left, *right, *parent;
530530

531-
ret = tree_sequence_builder_alloc(&tsb, 1, NULL, 1, 1, 0);
531+
ret = tree_sequence_builder_alloc(&tsb, 1, NULL, NULL, 1, 1, 0);
532532
CU_ASSERT_EQUAL_FATAL(ret, 0);
533533
ret = tree_sequence_builder_add_node(&tsb, 2.0, 0);
534534
CU_ASSERT_EQUAL_FATAL(ret, 0);
@@ -597,7 +597,7 @@ test_matching_one_site_many_alleles(void)
597597
allele_t derived_state;
598598

599599
/* Create a linear topology with a mutation over each node */
600-
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, 1, 1, 0);
600+
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, NULL, 1, 1, 0);
601601
CU_ASSERT_EQUAL_FATAL(ret, 0);
602602
ret = tree_sequence_builder_add_node(&tsb, num_nodes, 0);
603603
CU_ASSERT_EQUAL_FATAL(ret, 0);
@@ -674,7 +674,7 @@ test_matching_errors(void)
674674
size_t num_edges;
675675
tsk_id_t *left, *right, *parent;
676676

677-
ret = tree_sequence_builder_alloc(&tsb, 2, NULL, 1, 1, 0);
677+
ret = tree_sequence_builder_alloc(&tsb, 2, NULL, NULL, 1, 1, 0);
678678
CU_ASSERT_EQUAL_FATAL(ret, 0);
679679
ret = tree_sequence_builder_add_node(&tsb, 2.0, 0);
680680
CU_ASSERT_EQUAL_FATAL(ret, 0);
@@ -708,26 +708,38 @@ test_tsb_errors(void)
708708
int ret;
709709
tree_sequence_builder_t tsb;
710710
tsk_size_t num_alleles;
711+
allele_t ancestral_state;
711712
tsk_id_t left, right, parent;
712713
tsk_id_t left_arr[2], right_arr[2], parent_arr[2];
713714

714715
num_alleles = 0;
715-
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, 1, 1, 0);
716+
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, NULL, 1, 1, 0);
716717
CU_ASSERT_EQUAL_FATAL(ret, TSI_ERR_BAD_NUM_ALLELES);
717718
tree_sequence_builder_free(&tsb);
718719

719720
num_alleles = 1;
720-
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, 1, 1, 0);
721+
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, NULL, 1, 1, 0);
721722
CU_ASSERT_EQUAL_FATAL(ret, TSI_ERR_BAD_NUM_ALLELES);
722723
tree_sequence_builder_free(&tsb);
723724

724725
num_alleles = INT8_MAX + 1;
725-
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, 1, 1, 0);
726+
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, NULL, 1, 1, 0);
726727
CU_ASSERT_EQUAL_FATAL(ret, TSI_ERR_BAD_NUM_ALLELES);
727728
tree_sequence_builder_free(&tsb);
728729

729730
num_alleles = 2;
730-
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, 1, 1, 0);
731+
ancestral_state = -1;
732+
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, &ancestral_state, 1, 1, 0);
733+
CU_ASSERT_EQUAL_FATAL(ret, TSI_ERR_BAD_ANCESTRAL_STATE);
734+
tree_sequence_builder_free(&tsb);
735+
736+
ancestral_state = 2;
737+
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, &ancestral_state, 1, 1, 0);
738+
CU_ASSERT_EQUAL_FATAL(ret, TSI_ERR_BAD_ANCESTRAL_STATE);
739+
tree_sequence_builder_free(&tsb);
740+
741+
num_alleles = 2;
742+
ret = tree_sequence_builder_alloc(&tsb, 1, &num_alleles, NULL, 1, 1, 0);
731743
/* Add two nodes so we can test adding paths */
732744
ret = tree_sequence_builder_add_node(&tsb, 2.0, 0);
733745
CU_ASSERT_EQUAL_FATAL(ret, 0);

lib/tree_sequence_builder.c

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ tree_sequence_builder_print_state(tree_sequence_builder_t *self, FILE *out)
207207

208208
int
209209
tree_sequence_builder_alloc(tree_sequence_builder_t *self, size_t num_sites,
210-
tsk_size_t *num_alleles, size_t nodes_chunk_size, size_t edges_chunk_size, int flags)
210+
tsk_size_t *num_alleles, allele_t *ancestral_state, size_t nodes_chunk_size,
211+
size_t edges_chunk_size, int flags)
211212
{
212213
int ret = 0;
213214
size_t j;
@@ -229,8 +230,11 @@ tree_sequence_builder_alloc(tree_sequence_builder_t *self, size_t num_sites,
229230
self->path = calloc(self->max_nodes, sizeof(*self->path));
230231
self->sites.mutations = calloc(self->num_sites, sizeof(*self->sites.mutations));
231232
self->sites.num_alleles = calloc(self->num_sites, sizeof(*self->sites.num_alleles));
233+
self->sites.ancestral_state
234+
= calloc(self->num_sites, sizeof(*self->sites.ancestral_state));
232235
if (self->time == NULL || self->node_flags == NULL || self->path == NULL
233-
|| self->sites.mutations == NULL) {
236+
|| self->sites.mutations == NULL || self->sites.num_alleles == NULL
237+
|| self->sites.ancestral_state == NULL) {
234238
ret = TSI_ERR_NO_MEMORY;
235239
goto out;
236240
}
@@ -263,6 +267,14 @@ tree_sequence_builder_alloc(tree_sequence_builder_t *self, size_t num_sites,
263267
}
264268
self->sites.num_alleles[j] = num_alleles[j];
265269
}
270+
if (ancestral_state != NULL) {
271+
if (ancestral_state[j] < 0
272+
|| ancestral_state[j] >= (allele_t) num_alleles[j]) {
273+
ret = TSI_ERR_BAD_ANCESTRAL_STATE;
274+
goto out;
275+
}
276+
self->sites.ancestral_state[j] = ancestral_state[j];
277+
}
266278
}
267279
out:
268280
return ret;
@@ -276,6 +288,7 @@ tree_sequence_builder_free(tree_sequence_builder_t *self)
276288
tsi_safe_free(self->node_flags);
277289
tsi_safe_free(self->sites.mutations);
278290
tsi_safe_free(self->sites.num_alleles);
291+
tsi_safe_free(self->sites.ancestral_state);
279292
tsi_safe_free(self->left_index_edges);
280293
tsi_safe_free(self->right_index_edges);
281294
tsi_blkalloc_free(&self->tsi_blkalloc);

lib/tsinfer.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ typedef struct {
130130
struct {
131131
mutation_list_node_t **mutations;
132132
tsk_size_t *num_alleles;
133+
allele_t *ancestral_state;
133134
} sites;
134135
/* TODO add nodes struct */
135136
double *time;
@@ -217,8 +218,8 @@ double ancestor_matcher_get_mean_traceback_size(ancestor_matcher_t *self);
217218
size_t ancestor_matcher_get_total_memory(ancestor_matcher_t *self);
218219

219220
int tree_sequence_builder_alloc(tree_sequence_builder_t *self, size_t num_sites,
220-
tsk_size_t *num_alleles, size_t nodes_chunk_size, size_t edges_chunk_size,
221-
int flags);
221+
tsk_size_t *num_alleles, allele_t *ancestral_state, size_t nodes_chunk_size,
222+
size_t edges_chunk_size, int flags);
222223
int tree_sequence_builder_print_state(tree_sequence_builder_t *self, FILE *out);
223224
int tree_sequence_builder_free(tree_sequence_builder_t *self);
224225
int tree_sequence_builder_add_node(

0 commit comments

Comments
 (0)