Skip to content

Commit b5c6849

Browse files
committed
Add terminal site support to C ancestor builder
1 parent afc524c commit b5c6849

File tree

4 files changed

+32
-20
lines changed

4 files changed

+32
-20
lines changed

_tsinfermodule.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -136,18 +136,19 @@ static PyObject *
136136
AncestorBuilder_add_site(AncestorBuilder *self, PyObject *args, PyObject *kwds)
137137
{
138138
int err;
139-
static char *kwlist[] = {"time", "genotypes", NULL};
139+
static char *kwlist[] = {"time", "genotypes", "terminal", NULL};
140140
PyObject *ret = NULL;
141141
double time;
142142
PyObject *genotypes = NULL;
143143
PyArrayObject *genotypes_array = NULL;
144144
npy_intp *shape;
145+
int terminal = 0;
145146

146147
if (AncestorBuilder_check_state(self) != 0) {
147148
goto out;
148149
}
149-
if (!PyArg_ParseTupleAndKeywords(args, kwds, "dO", kwlist,
150-
&time, &genotypes)) {
150+
if (!PyArg_ParseTupleAndKeywords(args, kwds, "dO|p", kwlist,
151+
&time, &genotypes, &terminal)) {
151152
goto out;
152153
}
153154
genotypes_array = (PyArrayObject *) PyArray_FROM_OTF(genotypes, NPY_INT8,
@@ -166,7 +167,7 @@ AncestorBuilder_add_site(AncestorBuilder *self, PyObject *args, PyObject *kwds)
166167
}
167168
Py_BEGIN_ALLOW_THREADS
168169
err = ancestor_builder_add_site(self->builder, time,
169-
(allele_t *) PyArray_DATA(genotypes_array));
170+
(allele_t *) PyArray_DATA(genotypes_array), terminal);
170171
Py_END_ALLOW_THREADS
171172
if (err != 0) {
172173
handle_library_error(err);

lib/ancestor_builder.c

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,9 @@ ancestor_builder_compute_ancestral_states(const ancestor_builder_t *self, int di
439439
/* (int) min_sample_set_size); */
440440
for (l = focal_site + direction; l >= 0 && l < (int64_t) num_sites; l += direction) {
441441
/* printf("\tl = %d\n", (int) l); */
442+
if (sites[l].terminal) {
443+
break;
444+
}
442445
ancestor[l] = 0;
443446
last_site = (tsk_id_t) l;
444447

@@ -653,7 +656,8 @@ ancestor_builder_allocate_genotypes(ancestor_builder_t *self)
653656
}
654657

655658
int WARN_UNUSED
656-
ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genotypes)
659+
ancestor_builder_add_site(
660+
ancestor_builder_t *self, double time, allele_t *genotypes, bool terminal)
657661
{
658662
int ret = 0;
659663
site_t *site;
@@ -665,21 +669,30 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot
665669
avl_tree_t *pattern_map;
666670
tsk_id_t site_id = (tsk_id_t) self->num_sites;
667671
size_t derived_count, j;
668-
time_map_t *time_map = ancestor_builder_get_time_map(self, time);
672+
time_map_t *time_map = NULL;
669673

674+
if (self->num_sites == self->max_sites) {
675+
ret = TSI_ERR_TOO_MANY_SITES;
676+
goto out;
677+
}
670678
derived_count = 0;
671679
for (j = 0; j < (size_t) self->num_samples; j++) {
672680
if (genotypes[j] == 1) {
673681
derived_count++;
674682
}
675683
}
676-
677-
if (time_map == NULL) {
678-
ret = TSI_ERR_NO_MEMORY;
684+
site = &self->sites[site_id];
685+
site->time = time;
686+
site->derived_count = derived_count;
687+
site->terminal = terminal;
688+
if (terminal) {
689+
site->encoded_genotypes = NULL;
690+
self->num_sites++;
679691
goto out;
680692
}
681-
if (self->num_sites == self->max_sites) {
682-
ret = TSI_ERR_TOO_MANY_SITES;
693+
time_map = ancestor_builder_get_time_map(self, time);
694+
if (time_map == NULL) {
695+
ret = TSI_ERR_NO_MEMORY;
683696
goto out;
684697
}
685698
ret = ancestor_builder_encode_genotypes(self, genotypes, encoded_genotypes);
@@ -688,9 +701,6 @@ ancestor_builder_add_site(ancestor_builder_t *self, double time, allele_t *genot
688701
}
689702
self->num_sites++;
690703
pattern_map = &time_map->pattern_map;
691-
site = &self->sites[site_id];
692-
site->time = time;
693-
site->derived_count = derived_count;
694704

695705
search.encoded_genotypes = encoded_genotypes;
696706
search.encoded_genotypes_size = self->encoded_genotypes_size;

lib/tests/tests.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ run_random_data(size_t num_samples, size_t num_sites, int seed,
375375
genotypes[k] = samples[k][j];
376376
time += genotypes[k];
377377
}
378-
ret = ancestor_builder_add_site(&ancestor_builder, time, genotypes);
378+
ret = ancestor_builder_add_site(&ancestor_builder, time, genotypes, false);
379379
CU_ASSERT_EQUAL_FATAL(ret, 0);
380380
}
381381
/* ancestor_builder_print_state(&ancestor_builder, stdout); */
@@ -478,15 +478,15 @@ test_ancestor_builder_errors(void)
478478
ret = ancestor_builder_alloc(&ancestor_builder, 2, 0, -1, 0);
479479
CU_ASSERT_EQUAL_FATAL(ret, 0);
480480
CU_ASSERT_EQUAL_FATAL(ancestor_builder.num_sites, 0);
481-
ret = ancestor_builder_add_site(&ancestor_builder, 4, genotypes_ones);
481+
ret = ancestor_builder_add_site(&ancestor_builder, 4, genotypes_ones, false);
482482
CU_ASSERT_EQUAL_FATAL(ret, TSI_ERR_TOO_MANY_SITES);
483483
ancestor_builder_free(&ancestor_builder);
484484

485485
ret = ancestor_builder_alloc(&ancestor_builder, 4, 2, -1, 0);
486486
CU_ASSERT_EQUAL_FATAL(ret, 0);
487-
ret = ancestor_builder_add_site(&ancestor_builder, 4, genotypes_zeros);
487+
ret = ancestor_builder_add_site(&ancestor_builder, 4, genotypes_zeros, false);
488488
CU_ASSERT_EQUAL_FATAL(ret, 0);
489-
ret = ancestor_builder_add_site(&ancestor_builder, 4, genotypes_ones);
489+
ret = ancestor_builder_add_site(&ancestor_builder, 4, genotypes_ones, false);
490490
CU_ASSERT_EQUAL_FATAL(ret, 0);
491491
CU_ASSERT_EQUAL_FATAL(ancestor_builder.num_sites, 2);
492492
ret = ancestor_builder_finalise(&ancestor_builder);
@@ -509,7 +509,7 @@ test_ancestor_builder_one_site(void)
509509

510510
ret = ancestor_builder_alloc(&ancestor_builder, 4, 1, -1, 0);
511511
CU_ASSERT_EQUAL_FATAL(ret, 0);
512-
ret = ancestor_builder_add_site(&ancestor_builder, 4, genotypes);
512+
ret = ancestor_builder_add_site(&ancestor_builder, 4, genotypes, false);
513513
CU_ASSERT_EQUAL_FATAL(ret, 0);
514514
ret = ancestor_builder_finalise(&ancestor_builder);
515515
CU_ASSERT_EQUAL_FATAL(ret, 0);

lib/tsinfer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ typedef struct {
104104
double time;
105105
uint8_t *encoded_genotypes;
106106
tsk_size_t derived_count;
107+
bool terminal;
107108
} site_t;
108109

109110
typedef struct {
@@ -251,7 +252,7 @@ int ancestor_builder_alloc(ancestor_builder_t *self, size_t num_samples,
251252
int ancestor_builder_free(ancestor_builder_t *self);
252253
int ancestor_builder_print_state(ancestor_builder_t *self, FILE *out);
253254
int ancestor_builder_add_site(
254-
ancestor_builder_t *self, double time, allele_t *genotypes);
255+
ancestor_builder_t *self, double time, allele_t *genotypes, bool terminal);
255256
int ancestor_builder_finalise(ancestor_builder_t *self);
256257
int ancestor_builder_make_ancestor(const ancestor_builder_t *self,
257258
size_t num_focal_sites, const tsk_id_t *focal_sites, tsk_id_t *start, tsk_id_t *end,

0 commit comments

Comments
 (0)