add deprecated compare()

petrelharp · petrelharp · commit 55dc73271f7b · 2025-09-09T20:18:16.000Z
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,7 @@ Final release to go with publication of Fritze et al.
 
 **Breaking change:** renamed `compare` to `haplotype_arf`, because there are other comparison
 methods that we might implement here, and each would return a different object.
+For now, `compare` does the same thing but raises a DeprecationWarning.
 
 ## [0.1] - 2024-12-14
 
diff --git a/tests/test_methods.py b/tests/test_methods.py
@@ -198,6 +198,13 @@ def test_diff(self, ts):
 
 class TestNodeMatching:
 
+    def test_empty_ts(self):
+        ts = tskit.TableCollection(sequence_length=1.0).tree_sequence()
+        x = tscompare.node_spans(ts)
+        assert len(x) == 0
+        x = tscompare.shared_node_spans(ts, ts)
+        assert x.shape == (0, 0)
+
     @pytest.mark.parametrize(
         "ts",
         [true_simpl, true_unary],
@@ -253,6 +260,15 @@ def test_isolated_samples(self):
         assert np.all(np.isclose(node_spans_missing, true_spans_missing))
 
 
+class TestDeprecation:
+
+    @pytest.mark.filterwarnings("ignore:invalid value encountered in scalar divide")
+    def test_compare(self):
+        ts = tskit.TableCollection(sequence_length=1.0).tree_sequence()
+        with pytest.warns(DeprecationWarning):
+            _ = tscompare.compare(ts, ts)
+
+
 class TestMatchedSpans:
 
     def verify_compare(self, ts, other, transform=None):
@@ -270,6 +286,16 @@ def verify_compare(self, ts, other, transform=None):
         assert np.isclose(other_span, dis.total_span[1])
         assert np.isclose(rmse, dis.rmse), f"{rmse} != {dis.rmse}"
 
+    @pytest.mark.filterwarnings("ignore:invalid value encountered in scalar divide")
+    def test_empty_ts(self):
+        ts = tskit.TableCollection(sequence_length=1.0).tree_sequence()
+        x = tscompare.haplotype_arf(ts, ts)
+        assert np.isnan(x.arf)
+        assert np.isnan(x.tpr)
+        assert np.isnan(x.rmse)
+        assert x.matched_span == (0, 0)
+        assert x.total_span == (0, 0)
+
     def test_samples_dont_match(self):
         ts1 = tskit.Tree.generate_star(2).tree_sequence
         ts2 = tskit.Tree.generate_star(3).tree_sequence
diff --git a/tscompare/__init__.py b/tscompare/__init__.py
@@ -24,6 +24,7 @@
 """
 from .methods import ARFResult  # noqa F401
 from .methods import CladeMap  # noqa F401
+from .methods import compare  # noqa F401
 from .methods import haplotype_arf  # noqa F401
 from .methods import match_node_ages  # noqa F401
 from .methods import node_spans  # noqa F401
diff --git a/tscompare/methods.py b/tscompare/methods.py
@@ -23,6 +23,7 @@
 Tools for comparing node times between tree sequences with different node sets
 """
 import copy
+import warnings
 from collections import defaultdict
 from dataclasses import dataclass
 from itertools import product
@@ -32,6 +33,16 @@
 import tskit
 
 
+def compare(*args, **kwargs):
+    warnings.warn(
+        "compare() is deprecated and will be removed in the future; "
+        "please use haplotype_arf() instead.",
+        DeprecationWarning,
+        stacklevel=1,
+    )
+    return haplotype_arf(*args, **kwargs)
+
+
 def node_spans(ts, include_missing=False):
     """
     Returns the array of "node spans", i.e., the `j`th entry gives
@@ -416,39 +427,51 @@ def f(t):
 
     ts_node_spans = node_spans(ts, include_missing=True)
     shared_spans = shared_node_spans(ts, other)
-    col_ind = shared_spans.indices
-    row_ind = np.repeat(
-        np.arange(shared_spans.shape[0]), repeats=np.diff(shared_spans.indptr)
-    )
-    # We require that the samples are the same in both trees!
-    # If we did not require this, we could identify swapped samples,
-    # but this is out of scope (people could detect this using
-    # the shared spans matrix directly).
-    is_sample = np.full(max(ts.num_nodes, other.num_nodes), False)
-    is_sample[samples] = True
-    index_not_equal = ~np.equal(row_ind, col_ind)
-    shared_spans.data[np.logical_and(is_sample[row_ind], index_not_equal)] = 0.0
-    # Find all potential matches for a node based on max shared span length
-    max_span = shared_spans.max(axis=1).toarray().flatten()
-    total_match_n1_span = np.sum(max_span)  # <---- one thing to output
-    # zero out everything that's not a row max
-    shared_spans.data[shared_spans.data != max_span[row_ind]] = 0.0
-    # now re-sparsify the matrix: but, beware! don't do this again later.
-    shared_spans.eliminate_zeros()
-    col_ind = shared_spans.indices
-    row_ind = np.repeat(
-        np.arange(shared_spans.shape[0]), repeats=np.diff(shared_spans.indptr)
-    )
-    # now, make a matrix with differences in transformed times
-    # in the places where shared_spans retains nonzero elements
-    time_diff = shared_spans.copy()
-    ts_times = ts.nodes_time[row_ind]
-    other_times = other.nodes_time[col_ind]
-    time_diff.data[:] = np.absolute(
-        np.asarray(transform(ts_times) - transform(other_times))
-    )
-    # "explicit=True" takes the min of only the entries explicitly represented
-    dt = time_diff.min(axis=1, explicit=True).toarray().flatten()
+    if min(ts.num_nodes, other.num_nodes) > 0:
+        col_ind = shared_spans.indices
+        row_ind = np.repeat(
+            np.arange(shared_spans.shape[0]), repeats=np.diff(shared_spans.indptr)
+        )
+        # We require that the samples are the same in both trees!
+        # If we did not require this, we could identify swapped samples,
+        # but this is out of scope (people could detect this using
+        # the shared spans matrix directly).
+        is_sample = np.full(max(ts.num_nodes, other.num_nodes), False)
+        is_sample[samples] = True
+        index_not_equal = ~np.equal(row_ind, col_ind)
+        shared_spans.data[np.logical_and(is_sample[row_ind], index_not_equal)] = 0.0
+        # Find all potential matches for a node based on max shared span length
+        max_span = shared_spans.max(axis=1).toarray().flatten()
+        total_match_n1_span = np.sum(max_span)  # <---- one thing to output
+        # zero out everything that's not a row max
+        shared_spans.data[shared_spans.data != max_span[row_ind]] = 0.0
+        # now re-sparsify the matrix: but, beware! don't do this again later.
+        shared_spans.eliminate_zeros()
+        col_ind = shared_spans.indices
+        row_ind = np.repeat(
+            np.arange(shared_spans.shape[0]), repeats=np.diff(shared_spans.indptr)
+        )
+        # now, make a matrix with differences in transformed times
+        # in the places where shared_spans retains nonzero elements
+        time_diff = shared_spans.copy()
+        ts_times = ts.nodes_time[row_ind]
+        other_times = other.nodes_time[col_ind]
+        time_diff.data[:] = np.absolute(
+            np.asarray(transform(ts_times) - transform(other_times))
+        )
+        # "explicit=True" takes the min of only the entries explicitly represented
+        dt = time_diff.min(axis=1, explicit=True).toarray().flatten()
+        # next, zero out also those non-best-time-match elements
+        shared_spans.data[time_diff.data != dt[row_ind]] = 0.0
+        # and, find sum of column maxima
+        total_match_n2_span = shared_spans.max(
+            axis=0
+        ).sum()  # <--- the other thing we return
+    else:
+        max_span = 0
+        total_match_n1_span = 0
+        total_match_n2_span = 0
+
     has_match = max_span != 0
     if np.any(has_match):
         rmse = np.sqrt(
@@ -459,13 +482,6 @@ def f(t):
     else:
         rmse = np.nan
 
-    # next, zero out also those non-best-time-match elements
-    shared_spans.data[time_diff.data != dt[row_ind]] = 0.0
-    # and, find sum of column maxima
-    total_match_n2_span = shared_spans.max(
-        axis=0
-    ).sum()  # <--- the other thing we return
-
     total_span_ts = np.sum(ts_node_spans)
     total_span_other = np.sum(node_spans(other, include_missing=True))
     return ARFResult(