Skip to content

Commit dacbf38

Browse files
authored
Tolerate invalid parents when computing mutation parents (#3309)
1 parent 1acc108 commit dacbf38

File tree

3 files changed

+135
-0
lines changed

3 files changed

+135
-0
lines changed

c/tests/test_tables.c

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10775,6 +10775,85 @@ test_check_integrity_bad_mutation_parent_topology(void)
1077510775
tsk_table_collection_free(&tables);
1077610776
}
1077710777

10778+
static void
10779+
test_table_collection_compute_mutation_parents_tolerates_invalid_input(void)
10780+
{
10781+
int ret;
10782+
tsk_id_t ret_id;
10783+
tsk_table_collection_t tables;
10784+
tsk_id_t site;
10785+
10786+
ret = tsk_table_collection_init(&tables, 0);
10787+
CU_ASSERT_EQUAL_FATAL(ret, 0);
10788+
tables.sequence_length = 1.0;
10789+
10790+
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, TSK_NULL, NULL, 0);
10791+
CU_ASSERT_FATAL(ret_id >= 0);
10792+
ret_id = tsk_node_table_add_row(
10793+
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
10794+
CU_ASSERT_FATAL(ret_id >= 0);
10795+
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 0, 1, NULL, 0);
10796+
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
10797+
site = tsk_site_table_add_row(&tables.sites, 0.0, "A", 1, NULL, 0);
10798+
CU_ASSERT_FATAL(site >= 0);
10799+
ret_id = tsk_mutation_table_add_row(
10800+
&tables.mutations, site, 1, TSK_NULL, TSK_UNKNOWN_TIME, "C", 1, NULL, 0);
10801+
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
10802+
10803+
ret = tsk_table_collection_build_index(&tables, 0);
10804+
CU_ASSERT_EQUAL_FATAL(ret, 0);
10805+
tables.mutations.parent[0] = 42;
10806+
10807+
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
10808+
CU_ASSERT_EQUAL_FATAL(ret, 0);
10809+
CU_ASSERT_FATAL(tables.mutations.parent[0] == TSK_NULL);
10810+
10811+
tsk_table_collection_free(&tables);
10812+
}
10813+
10814+
static void
10815+
test_table_collection_compute_mutation_parents_restores_on_error(void)
10816+
{
10817+
int ret;
10818+
tsk_id_t ret_id;
10819+
tsk_table_collection_t tables;
10820+
tsk_id_t site;
10821+
10822+
ret = tsk_table_collection_init(&tables, 0);
10823+
CU_ASSERT_EQUAL_FATAL(ret, 0);
10824+
tables.sequence_length = 1.0;
10825+
10826+
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, TSK_NULL, NULL, 0);
10827+
CU_ASSERT_FATAL(ret_id >= 0);
10828+
ret_id = tsk_node_table_add_row(
10829+
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
10830+
CU_ASSERT_FATAL(ret_id >= 0);
10831+
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 0, 1, NULL, 0);
10832+
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
10833+
site = tsk_site_table_add_row(&tables.sites, 0.5, "A", 1, NULL, 0);
10834+
CU_ASSERT_FATAL(site >= 0);
10835+
10836+
ret_id = tsk_mutation_table_add_row(
10837+
&tables.mutations, site, 1, TSK_NULL, TSK_UNKNOWN_TIME, "C", 1, NULL, 0);
10838+
CU_ASSERT_FATAL(ret_id >= 0);
10839+
ret_id = tsk_mutation_table_add_row(
10840+
&tables.mutations, site, 0, TSK_NULL, TSK_UNKNOWN_TIME, "G", 1, NULL, 0);
10841+
CU_ASSERT_FATAL(ret_id >= 0);
10842+
10843+
ret = tsk_table_collection_build_index(&tables, 0);
10844+
CU_ASSERT_EQUAL_FATAL(ret, 0);
10845+
10846+
tables.mutations.parent[0] = 111;
10847+
tables.mutations.parent[1] = 222;
10848+
10849+
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
10850+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
10851+
CU_ASSERT_EQUAL(tables.mutations.parent[0], 111);
10852+
CU_ASSERT_EQUAL(tables.mutations.parent[1], 222);
10853+
10854+
tsk_table_collection_free(&tables);
10855+
}
10856+
1077810857
static void
1077910858
test_table_collection_subset_with_options(tsk_flags_t options)
1078010859
{
@@ -11934,6 +12013,10 @@ main(int argc, char **argv)
1193412013
test_table_collection_check_integrity_bad_indexes },
1193512014
{ "test_check_integrity_bad_mutation_parent_topology",
1193612015
test_check_integrity_bad_mutation_parent_topology },
12016+
{ "test_table_collection_compute_mutation_parents_tolerates_invalid_input",
12017+
test_table_collection_compute_mutation_parents_tolerates_invalid_input },
12018+
{ "test_table_collection_compute_mutation_parents_restores_on_error",
12019+
test_table_collection_compute_mutation_parents_restores_on_error },
1193712020
{ "test_table_collection_subset", test_table_collection_subset },
1193812021
{ "test_table_collection_subset_unsorted",
1193912022
test_table_collection_subset_unsorted },

c/tskit/tables.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12436,8 +12436,27 @@ tsk_table_collection_compute_mutation_parents(
1243612436
tsk_table_collection_t *self, tsk_flags_t options)
1243712437
{
1243812438
int ret = 0;
12439+
tsk_mutation_table_t *mutations = &self->mutations;
12440+
tsk_id_t *parent_backup = NULL;
12441+
bool restore_parents = false;
1243912442

1244012443
if (!(options & TSK_NO_CHECK_INTEGRITY)) {
12444+
if (mutations->num_rows > 0) {
12445+
/* We need to wipe the parent column before computing, as otherwise invalid
12446+
* parents can cause integrity checks to fail. We take a copy to restore on
12447+
* error */
12448+
parent_backup = tsk_malloc(mutations->num_rows * sizeof(*parent_backup));
12449+
if (parent_backup == NULL) {
12450+
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
12451+
goto out;
12452+
}
12453+
tsk_memcpy(parent_backup, mutations->parent,
12454+
mutations->num_rows * sizeof(*parent_backup));
12455+
/* Set the parent pointers to TSK_NULL */
12456+
tsk_memset(mutations->parent, 0xff,
12457+
mutations->num_rows * sizeof(*mutations->parent));
12458+
restore_parents = true;
12459+
}
1244112460
/* Safe to cast here as we're not counting trees */
1244212461
ret = (int) tsk_table_collection_check_integrity(self, TSK_CHECK_TREES);
1244312462
if (ret < 0) {
@@ -12452,6 +12471,11 @@ tsk_table_collection_compute_mutation_parents(
1245212471
}
1245312472

1245412473
out:
12474+
if (ret != 0 && restore_parents) {
12475+
tsk_memcpy(mutations->parent, parent_backup,
12476+
mutations->num_rows * sizeof(*parent_backup));
12477+
}
12478+
tsk_safe_free(parent_backup);
1245512479
return ret;
1245612480
}
1245712481

python/tests/test_tables.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3540,6 +3540,34 @@ def test_table_references(self):
35403540
assert str(populations) == before_populations
35413541
assert str(provenances) == before_provenances
35423542

3543+
def test_compute_mutation_parents_ignores_existing_values(self):
3544+
tables = tskit.TableCollection(sequence_length=1.0)
3545+
parent = tables.nodes.add_row(time=1.0)
3546+
child = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)
3547+
tables.edges.add_row(left=0.0, right=1.0, parent=parent, child=child)
3548+
site = tables.sites.add_row(position=0.0, ancestral_state="A")
3549+
tables.mutations.add_row(site=site, node=child, derived_state="C")
3550+
tables.build_index()
3551+
tables.mutations.parent[:] = 42
3552+
tables.compute_mutation_parents()
3553+
assert tables.mutations.parent[0] == tskit.NULL
3554+
3555+
def test_compute_mutation_parents_restores_on_index_error(self):
3556+
tables = tskit.TableCollection(sequence_length=1.0)
3557+
parent = tables.nodes.add_row(time=1.0)
3558+
child = tables.nodes.add_row(time=0, flags=tskit.NODE_IS_SAMPLE)
3559+
tables.edges.add_row(left=0.0, right=1.0, parent=parent, child=child)
3560+
site = tables.sites.add_row(position=0.0, ancestral_state="A")
3561+
tables.mutations.add_row(site=site, node=child, derived_state="C")
3562+
3563+
mutation_columns = tables.mutations.asdict()
3564+
mutation_columns["parent"] = np.array([123], dtype=np.int32)
3565+
tables.mutations.set_columns(**mutation_columns)
3566+
3567+
with pytest.raises(tskit.LibraryError, match="TSK_ERR_TABLES_NOT_INDEXED"):
3568+
tables.compute_mutation_parents()
3569+
assert tables.mutations.parent[0] == 123
3570+
35433571
def test_str(self):
35443572
ts = msprime.simulate(10, random_seed=1)
35453573
tables = ts.tables

0 commit comments

Comments
 (0)