From be8a0b27dd4361de7c6fb5fbcca3a7b19afa3a8f Mon Sep 17 00:00:00 2001 From: Ben Jeffery Date: Mon, 16 Jun 2025 00:36:49 +0100 Subject: [PATCH] Remove tsk_diff_iter_t --- c/CHANGELOG.rst | 9 ++ c/tests/test_tables.c | 31 ------- c/tests/test_trees.c | 197 ------------------------------------------ c/tskit/tables.c | 162 ---------------------------------- c/tskit/tables.h | 25 ------ c/tskit/trees.c | 12 --- c/tskit/trees.h | 3 - 7 files changed, 9 insertions(+), 430 deletions(-) diff --git a/c/CHANGELOG.rst b/c/CHANGELOG.rst index 9b5d5e7f38..19b76efffe 100644 --- a/c/CHANGELOG.rst +++ b/c/CHANGELOG.rst @@ -1,3 +1,12 @@ +-------------------- +[1.2.0] - 2025-XX-XX +-------------------- + +**Breaking changes** + +- Remove ``tsk_diff_iter_t`` and associated functions. + (:user:`benjeffery`, :pr:`3221`, :issue:`2797`). + -------------------- [1.1.4] - 2025-03-31 -------------------- diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 6de6675ff6..2cff9b7a49 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -11505,35 +11505,6 @@ test_table_collection_delete_older(void) tsk_treeseq_free(&ts); } -static void -test_table_collection_edge_diffs_errors(void) -{ - int ret; - tsk_id_t ret_id; - tsk_table_collection_t tables; - tsk_diff_iter_t iter; - - ret = tsk_table_collection_init(&tables, 0); - CU_ASSERT_EQUAL(ret, 0); - tables.sequence_length = 1; - ret_id - = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0, -1, -1, NULL, 0); - CU_ASSERT_EQUAL(ret_id, 0); - ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1, -1, -1, NULL, 0); - CU_ASSERT_EQUAL(ret_id, 1); - ret = (int) tsk_edge_table_add_row(&tables.edges, 0., 1., 1, 0, NULL, 0); - CU_ASSERT_EQUAL(ret, 0); - - ret = tsk_diff_iter_init(&iter, &tables, -1, 0); - CU_ASSERT_EQUAL(ret, TSK_ERR_TABLES_NOT_INDEXED); - - tables.nodes.time[0] = 1; - ret = tsk_diff_iter_init(&iter, &tables, -1, 0); - CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING); - - tsk_table_collection_free(&tables); -} - int main(int argc, char **argv) { @@ -11670,8 +11641,6 @@ main(int argc, char **argv) { "test_table_collection_takeset_indexes", test_table_collection_takeset_indexes }, { "test_table_collection_delete_older", test_table_collection_delete_older }, - { "test_table_collection_edge_diffs_errors", - test_table_collection_edge_diffs_errors }, { NULL, NULL }, }; diff --git a/c/tests/test_trees.c b/c/tests/test_trees.c index dcf4d2a69d..612e5200d0 100644 --- a/c/tests/test_trees.c +++ b/c/tests/test_trees.c @@ -534,100 +534,6 @@ verify_tree_next_prev(tsk_treeseq_t *ts) free(trees); } -static void -verify_tree_diffs(tsk_treeseq_t *ts, tsk_flags_t options) -{ - int ret, valid_tree; - tsk_diff_iter_t iter; - tsk_tree_t tree; - tsk_edge_list_node_t *record; - tsk_edge_list_t records_out, records_in; - tsk_size_t num_nodes = tsk_treeseq_get_num_nodes(ts); - tsk_size_t j, num_trees; - double lft, rgt; - tsk_id_t *parent = tsk_malloc(num_nodes * sizeof(tsk_id_t)); - tsk_id_t *child = tsk_malloc(num_nodes * sizeof(tsk_id_t)); - tsk_id_t *sib = tsk_malloc(num_nodes * sizeof(tsk_id_t)); - - CU_ASSERT_FATAL(parent != NULL); - CU_ASSERT_FATAL(child != NULL); - CU_ASSERT_FATAL(sib != NULL); - for (j = 0; j < num_nodes; j++) { - parent[j] = TSK_NULL; - child[j] = TSK_NULL; - sib[j] = TSK_NULL; - } - ret = tsk_diff_iter_init_from_ts(&iter, ts, options); - CU_ASSERT_EQUAL_FATAL(ret, 0); - ret = tsk_tree_init(&tree, ts, 0); - CU_ASSERT_EQUAL_FATAL(ret, 0); - valid_tree = tsk_tree_first(&tree); - CU_ASSERT_EQUAL_FATAL(valid_tree, TSK_TREE_OK); - tsk_diff_iter_print_state(&iter, _devnull); - - num_trees = 0; - while ((ret = tsk_diff_iter_next(&iter, &lft, &rgt, &records_out, &records_in)) - == TSK_TREE_OK) { - tsk_diff_iter_print_state(&iter, _devnull); - num_trees++; - /* Update forwards */ - for (record = records_out.head; record != NULL; record = record->next) { - parent[record->edge.child] = TSK_NULL; - } - for (record = records_in.head; record != NULL; record = record->next) { - parent[record->edge.child] = record->edge.parent; - } - if (valid_tree) { - /* Now check against the sparse tree iterator. */ - for (j = 0; j < num_nodes; j++) { - CU_ASSERT_EQUAL(parent[j], tree.parent[j]); - } - } - /* Update backwards */ - for (record = records_out.tail; record != NULL; record = record->prev) { - parent[record->edge.child] = TSK_NULL; - } - for (record = records_in.tail; record != NULL; record = record->prev) { - parent[record->edge.child] = record->edge.parent; - } - if (valid_tree) { - /* Now check against the sparse tree iterator. */ - for (j = 0; j < num_nodes; j++) { - CU_ASSERT_EQUAL(parent[j], tree.parent[j]); - } - CU_ASSERT_EQUAL(tree.interval.left, lft); - CU_ASSERT_EQUAL(tree.interval.right, rgt); - valid_tree = tsk_tree_next(&tree); - if (num_trees < tsk_treeseq_get_num_trees(ts)) { - CU_ASSERT_EQUAL(ret, TSK_TREE_OK); - } else { - CU_ASSERT_EQUAL(valid_tree, 0); - } - } else { - CU_ASSERT_TRUE_FATAL(options & TSK_INCLUDE_TERMINAL); - for (j = 0; j < num_nodes; j++) { - CU_ASSERT_EQUAL(parent[j], -1); - } - CU_ASSERT_EQUAL(lft, tsk_treeseq_get_sequence_length(ts)); - CU_ASSERT_EQUAL(rgt, tsk_treeseq_get_sequence_length(ts)); - } - } - if (options & TSK_INCLUDE_TERMINAL) { - CU_ASSERT_EQUAL(num_trees, tsk_treeseq_get_num_trees(ts) + 1); - } else { - CU_ASSERT_EQUAL(num_trees, tsk_treeseq_get_num_trees(ts)); - } - CU_ASSERT_EQUAL_FATAL(valid_tree, 0); - ret = tsk_diff_iter_free(&iter); - CU_ASSERT_EQUAL_FATAL(ret, 0); - ret = tsk_tree_free(&tree); - CU_ASSERT_EQUAL_FATAL(ret, 0); - - free(parent); - free(child); - free(sib); -} - static void verify_edge_array_single_tree( tsk_tree_t *tree, tsk_edge_table_t *edge_table, tsk_size_t num_nodes) @@ -5918,71 +5824,6 @@ test_convenience_arrays_multi_tree(void) tsk_treeseq_free(&ts); } -/*======================================================= - * Diff iter tests. - *======================================================*/ - -static void -test_simple_diff_iter(void) -{ - int ret; - tsk_treeseq_t ts; - - tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL, - paper_ex_individuals, NULL, 0); - - verify_tree_diffs(&ts, 0); - verify_tree_diffs(&ts, TSK_INCLUDE_TERMINAL); - - ret = tsk_treeseq_free(&ts); - CU_ASSERT_EQUAL(ret, 0); -} - -static void -test_nonbinary_diff_iter(void) -{ - int ret; - tsk_treeseq_t ts; - - tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL, NULL, - NULL, NULL, NULL, 0); - verify_tree_diffs(&ts, 0); - verify_tree_diffs(&ts, TSK_INCLUDE_TERMINAL); - - ret = tsk_treeseq_free(&ts); - CU_ASSERT_EQUAL(ret, 0); -} - -static void -test_unary_diff_iter(void) -{ - int ret; - tsk_treeseq_t ts; - - tsk_treeseq_from_text( - &ts, 10, unary_ex_nodes, unary_ex_edges, NULL, NULL, NULL, NULL, NULL, 0); - verify_tree_diffs(&ts, 0); - verify_tree_diffs(&ts, TSK_INCLUDE_TERMINAL); - - ret = tsk_treeseq_free(&ts); - CU_ASSERT_EQUAL(ret, 0); -} - -static void -test_internal_sample_diff_iter(void) -{ - int ret; - tsk_treeseq_t ts; - - tsk_treeseq_from_text(&ts, 10, internal_sample_ex_nodes, internal_sample_ex_edges, - NULL, NULL, NULL, NULL, NULL, 0); - verify_tree_diffs(&ts, 0); - verify_tree_diffs(&ts, TSK_INCLUDE_TERMINAL); - - ret = tsk_treeseq_free(&ts); - CU_ASSERT_EQUAL(ret, 0); -} - static void test_multiroot_mrca(void) { @@ -6012,36 +5853,6 @@ test_multiroot_mrca(void) tsk_treeseq_free(&ts); } -static void -test_multiroot_diff_iter(void) -{ - int ret; - tsk_treeseq_t ts; - - tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL, NULL, - NULL, NULL, NULL, 0); - verify_tree_diffs(&ts, 0); - verify_tree_diffs(&ts, TSK_INCLUDE_TERMINAL); - - ret = tsk_treeseq_free(&ts); - CU_ASSERT_EQUAL(ret, 0); -} - -static void -test_empty_diff_iter(void) -{ - int ret; - tsk_treeseq_t ts; - - tsk_treeseq_from_text( - &ts, 10, empty_ex_nodes, empty_ex_edges, NULL, NULL, NULL, NULL, NULL, 0); - verify_tree_diffs(&ts, 0); - verify_tree_diffs(&ts, TSK_INCLUDE_TERMINAL); - - ret = tsk_treeseq_free(&ts); - CU_ASSERT_EQUAL(ret, 0); -} - /*======================================================= * Sample sets *======================================================*/ @@ -8840,14 +8651,6 @@ main(int argc, char **argv) /* multiroot tests */ { "test_multiroot_mrca", test_multiroot_mrca }, - { "test_multiroot_diff_iter", test_multiroot_diff_iter }, - - /* Diff iter tests */ - { "test_simple_diff_iter", test_simple_diff_iter }, - { "test_nonbinary_diff_iter", test_nonbinary_diff_iter }, - { "test_unary_diff_iter", test_unary_diff_iter }, - { "test_internal_sample_diff_iter", test_internal_sample_diff_iter }, - { "test_empty_diff_iter", test_empty_diff_iter }, /* Sample sets */ { "test_simple_sample_sets", test_simple_sample_sets }, diff --git a/c/tskit/tables.c b/c/tskit/tables.c index 4b35ff5c1d..c1025ee8a5 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -13448,165 +13448,3 @@ tsk_squash_edges(tsk_edge_t *edges, tsk_size_t num_edges, tsk_size_t *num_output out: return ret; } - -/* ======================================================== * - * Tree diff iterator. - * ======================================================== */ - -int TSK_WARN_UNUSED -tsk_diff_iter_init(tsk_diff_iter_t *self, const tsk_table_collection_t *tables, - tsk_id_t num_trees, tsk_flags_t options) -{ - int ret = 0; - - tsk_bug_assert(tables != NULL); - tsk_memset(self, 0, sizeof(tsk_diff_iter_t)); - self->num_nodes = tables->nodes.num_rows; - self->num_edges = tables->edges.num_rows; - self->tables = tables; - self->insertion_index = 0; - self->removal_index = 0; - self->tree_left = 0; - self->tree_index = -1; - if (num_trees < 0) { - num_trees = tsk_table_collection_check_integrity(self->tables, TSK_CHECK_TREES); - if (num_trees < 0) { - ret = (int) num_trees; - goto out; - } - } - self->last_index = num_trees; - - if (options & TSK_INCLUDE_TERMINAL) { - self->last_index = self->last_index + 1; - } - self->edge_list_nodes = tsk_malloc(self->num_edges * sizeof(*self->edge_list_nodes)); - if (self->edge_list_nodes == NULL) { - ret = tsk_trace_error(TSK_ERR_NO_MEMORY); - goto out; - } -out: - return ret; -} - -int -tsk_diff_iter_free(tsk_diff_iter_t *self) -{ - tsk_safe_free(self->edge_list_nodes); - return 0; -} - -void -tsk_diff_iter_print_state(const tsk_diff_iter_t *self, FILE *out) -{ - fprintf(out, "tree_diff_iterator state\n"); - fprintf(out, "num_edges = %lld\n", (long long) self->num_edges); - fprintf(out, "insertion_index = %lld\n", (long long) self->insertion_index); - fprintf(out, "removal_index = %lld\n", (long long) self->removal_index); - fprintf(out, "tree_left = %f\n", self->tree_left); - fprintf(out, "tree_index = %lld\n", (long long) self->tree_index); -} - -int TSK_WARN_UNUSED -tsk_diff_iter_next(tsk_diff_iter_t *self, double *ret_left, double *ret_right, - tsk_edge_list_t *edges_out_ret, tsk_edge_list_t *edges_in_ret) -{ - int ret = 0; - tsk_id_t k; - const double sequence_length = self->tables->sequence_length; - double left = self->tree_left; - double right = sequence_length; - tsk_size_t next_edge_list_node = 0; - tsk_edge_list_node_t *out_head = NULL; - tsk_edge_list_node_t *out_tail = NULL; - tsk_edge_list_node_t *in_head = NULL; - tsk_edge_list_node_t *in_tail = NULL; - tsk_edge_list_node_t *w = NULL; - tsk_edge_list_t edges_out; - tsk_edge_list_t edges_in; - const tsk_edge_table_t *edges = &self->tables->edges; - const tsk_id_t *insertion_order = self->tables->indexes.edge_insertion_order; - const tsk_id_t *removal_order = self->tables->indexes.edge_removal_order; - - tsk_memset(&edges_out, 0, sizeof(edges_out)); - tsk_memset(&edges_in, 0, sizeof(edges_in)); - - if (self->tree_index + 1 < self->last_index) { - /* First we remove the stale records */ - while (self->removal_index < (tsk_id_t) self->num_edges - && left == edges->right[removal_order[self->removal_index]]) { - k = removal_order[self->removal_index]; - tsk_bug_assert(next_edge_list_node < self->num_edges); - w = &self->edge_list_nodes[next_edge_list_node]; - next_edge_list_node++; - w->edge.id = k; - w->edge.left = edges->left[k]; - w->edge.right = edges->right[k]; - w->edge.parent = edges->parent[k]; - w->edge.child = edges->child[k]; - w->edge.metadata = edges->metadata + edges->metadata_offset[k]; - w->edge.metadata_length - = edges->metadata_offset[k + 1] - edges->metadata_offset[k]; - w->next = NULL; - w->prev = NULL; - if (out_head == NULL) { - out_head = w; - out_tail = w; - } else { - out_tail->next = w; - w->prev = out_tail; - out_tail = w; - } - self->removal_index++; - } - edges_out.head = out_head; - edges_out.tail = out_tail; - - /* Now insert the new records */ - while (self->insertion_index < (tsk_id_t) self->num_edges - && left == edges->left[insertion_order[self->insertion_index]]) { - k = insertion_order[self->insertion_index]; - tsk_bug_assert(next_edge_list_node < self->num_edges); - w = &self->edge_list_nodes[next_edge_list_node]; - next_edge_list_node++; - w->edge.id = k; - w->edge.left = edges->left[k]; - w->edge.right = edges->right[k]; - w->edge.parent = edges->parent[k]; - w->edge.child = edges->child[k]; - w->edge.metadata = edges->metadata + edges->metadata_offset[k]; - w->edge.metadata_length - = edges->metadata_offset[k + 1] - edges->metadata_offset[k]; - w->next = NULL; - w->prev = NULL; - if (in_head == NULL) { - in_head = w; - in_tail = w; - } else { - in_tail->next = w; - w->prev = in_tail; - in_tail = w; - } - self->insertion_index++; - } - edges_in.head = in_head; - edges_in.tail = in_tail; - - right = sequence_length; - if (self->insertion_index < (tsk_id_t) self->num_edges) { - right = TSK_MIN(right, edges->left[insertion_order[self->insertion_index]]); - } - if (self->removal_index < (tsk_id_t) self->num_edges) { - right = TSK_MIN(right, edges->right[removal_order[self->removal_index]]); - } - self->tree_index++; - ret = TSK_TREE_OK; - } - *edges_out_ret = edges_out; - *edges_in_ret = edges_in; - *ret_left = left; - *ret_right = right; - /* Set the left coordinate for the next tree */ - self->tree_left = right; - return ret; -} diff --git a/c/tskit/tables.h b/c/tskit/tables.h index 204e39e2db..e6fae4f03e 100644 --- a/c/tskit/tables.h +++ b/c/tskit/tables.h @@ -682,18 +682,6 @@ typedef struct { tsk_edge_list_node_t *tail; } tsk_edge_list_t; -typedef struct { - tsk_size_t num_nodes; - tsk_size_t num_edges; - double tree_left; - const tsk_table_collection_t *tables; - tsk_id_t insertion_index; - tsk_id_t removal_index; - tsk_id_t tree_index; - tsk_id_t last_index; - tsk_edge_list_node_t *edge_list_nodes; -} tsk_diff_iter_t; - /****************************************************************************/ /* Common function options */ /****************************************************************************/ @@ -4771,19 +4759,6 @@ int tsk_identity_segments_get(const tsk_identity_segments_t *self, tsk_id_t a, void tsk_identity_segments_print_state(tsk_identity_segments_t *self, FILE *out); int tsk_identity_segments_free(tsk_identity_segments_t *self); -/* Edge differences */ - -/* Internal API - currently used in a few places, but a better API is envisaged - * at some point. - * IMPORTANT: tskit-rust uses this API, so don't break without discussing! - */ -int tsk_diff_iter_init(tsk_diff_iter_t *self, const tsk_table_collection_t *tables, - tsk_id_t num_trees, tsk_flags_t options); -int tsk_diff_iter_free(tsk_diff_iter_t *self); -int tsk_diff_iter_next(tsk_diff_iter_t *self, double *left, double *right, - tsk_edge_list_t *edges_out, tsk_edge_list_t *edges_in); -void tsk_diff_iter_print_state(const tsk_diff_iter_t *self, FILE *out); - #ifdef __cplusplus } #endif diff --git a/c/tskit/trees.c b/c/tskit/trees.c index 4638fb18e8..209549f30f 100644 --- a/c/tskit/trees.c +++ b/c/tskit/trees.c @@ -7358,18 +7358,6 @@ tsk_tree_map_mutations(tsk_tree_t *self, int32_t *genotypes, return ret; } -/* Compatibility shim for initialising the diff iterator from a tree sequence. We are - * using this function in a small number of places internally, so simplest to keep it - * until a more satisfactory "diff" API comes along. - */ -int TSK_WARN_UNUSED -tsk_diff_iter_init_from_ts( - tsk_diff_iter_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t options) -{ - return tsk_diff_iter_init( - self, tree_sequence->tables, (tsk_id_t) tree_sequence->num_trees, options); -} - /* ======================================================== * * KC Distance * ======================================================== */ diff --git a/c/tskit/trees.h b/c/tskit/trees.h index bef944fff3..ac4100f7b0 100644 --- a/c/tskit/trees.h +++ b/c/tskit/trees.h @@ -1904,9 +1904,6 @@ bool tsk_tree_is_sample(const tsk_tree_t *self, tsk_id_t u); */ bool tsk_tree_equals(const tsk_tree_t *self, const tsk_tree_t *other); -int tsk_diff_iter_init_from_ts( - tsk_diff_iter_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t options); - int tsk_tree_position_init( tsk_tree_position_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t options); int tsk_tree_position_free(tsk_tree_position_t *self);