From c91169a16b95becf978b50156493a27a5df6363a Mon Sep 17 00:00:00 2001
From: Andrew Kern <adkern@uoregon.edu>
Date: Thu, 26 Jun 2025 14:39:34 -0700
Subject: [PATCH 1/2] Fix: genetic_relatedness to allow single sample set with
 self-comparisons

Previously, genetic_relatedness would fail with TSK_ERR_INSUFFICIENT_SAMPLE_SETS
when given a single sample set and indexes referring to that set, e.g.:
  ts.genetic_relatedness([[0]], indexes=[(0,0)])

This was because the C API requires at least k=2 sample sets for k-way statistics,
even when indexes only reference a single set for self-comparison.

The fix adds an allow_self_comparisons parameter to __k_way_sample_set_stat,
which is set to True only for genetic_relatedness. When enabled and indexes
only reference existing sample sets, dummy sample sets are added to satisfy
the C API requirement while ensuring they are never accessed during computation.

Tests added for all three modes (site, branch, node) to verify self-comparisons
work correctly with single sample sets.

Fixes #3055
---
 python/tests/test_tree_stats.py | 35 +++++++++++++++++++++++++++++++++
 python/tskit/trees.py           | 20 +++++++++++++++++++
 2 files changed, 55 insertions(+)

diff --git a/python/tests/test_tree_stats.py b/python/tests/test_tree_stats.py
index a92da4f252..436db02131 100644
--- a/python/tests/test_tree_stats.py
+++ b/python/tests/test_tree_stats.py
@@ -2311,6 +2311,16 @@ def test_shapes(self, proportion):
 class TestBranchGeneticRelatedness(TestGeneticRelatedness, TopologyExamplesMixin):
     mode = "branch"
 
+    def test_single_sample_set_self_comparison(self):
+        # Test for issue #3055 - self-comparisons with single sample set
+        ts = msprime.simulate(sample_size=10, Ne=10000, length=1000, random_seed=42)
+        # Single sample set with self-comparison
+        result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode="branch")
+        assert result.shape == (1,)
+        # Should work for multiple samples in single set too
+        result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode="branch")
+        assert result.shape == (1,)
+
     @pytest.mark.parametrize("polarised", [True, False])
     def test_simple_tree_noncentred(self, polarised):
         # 2.00┊   4   ┊
@@ -2365,10 +2375,35 @@ def test_simple_tree_noncentred(self, polarised):
 class TestNodeGeneticRelatedness(TestGeneticRelatedness, TopologyExamplesMixin):
     mode = "node"
 
+    def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):
+        # Test for issue #3055 - self-comparisons with single sample set
+        ts = ts_12_highrecomb_fixture
+        # Single sample set with self-comparison
+        result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode="node")
+        assert result.shape == (ts.num_nodes, 1)
+        # Should work for multiple samples in single set too
+        result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode="node")
+        assert result.shape == (ts.num_nodes, 1)
+
 
 class TestSiteGeneticRelatedness(TestGeneticRelatedness, MutatedTopologyExamplesMixin):
     mode = "site"
 
+    def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):
+        # Test for issue #3055 - self-comparisons with single sample set
+        ts = ts_12_highrecomb_fixture
+        # Single sample set with self-comparison
+        result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode="site")
+        assert result.shape == (1,)
+        # Should work for multiple samples in single set too
+        result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode="site")
+        assert result.shape == (1,)
+        # Test with multiple self-comparisons
+        result = ts.genetic_relatedness(
+            [[0], [1]], indexes=[(0, 0), (1, 1)], mode="site"
+        )
+        assert result.shape == (2,)
+
     def test_match_K_c0(self):
         # This test checks that ts.genetic_relatedness() matches K_c0
         # from Speed & Balding (2014) https://www.nature.com/articles/nrg3821
diff --git a/python/tskit/trees.py b/python/tskit/trees.py
index e740181f81..dc942f10f7 100644
--- a/python/tskit/trees.py
+++ b/python/tskit/trees.py
@@ -8106,6 +8106,7 @@ def __k_way_sample_set_stat(
         span_normalise=True,
         polarised=False,
         centre=True,
+        allow_self_comparisons=False,
     ):
         sample_set_sizes = np.array(
             [len(sample_set) for sample_set in sample_sets], dtype=np.uint32
@@ -8132,6 +8133,24 @@ def __k_way_sample_set_stat(
                 "Indexes must be convertable to a 2D numpy array with {} "
                 "columns".format(k)
             )
+        # For genetic_relatedness, we allow self-comparisons with a single sample set
+        if allow_self_comparisons and len(sample_sets) < k:
+            # Check that all indexes are valid
+            if np.any(indexes >= len(sample_sets)) or np.any(indexes < 0):
+                raise ValueError("Index out of bounds")
+            # Find which sample sets we actually need
+            unique_indexes = np.unique(indexes)
+            if np.max(unique_indexes) < len(sample_sets):
+                # we need to pad with dummy sets to satisfy the C side
+                # requirement of having at least k sets
+                sample_sets = list(sample_sets)
+                sample_set_sizes = list(sample_set_sizes)
+                while len(sample_sets) < k:
+                    # Add a dummy sample set that won't be used
+                    sample_sets.append(np.array([sample_sets[0][0]], dtype=np.int32))
+                    sample_set_sizes.append(1)
+                sample_set_sizes = np.array(sample_set_sizes, dtype=np.uint32)
+                flattened = util.safe_np_int_cast(np.hstack(sample_sets), np.int32)
         stat = self.__run_windowed_stat(
             windows,
             ll_method,
@@ -8702,6 +8721,7 @@ def genetic_relatedness(
             span_normalise=span_normalise,
             polarised=polarised,
             centre=centre,
+            allow_self_comparisons=True,
         )
         if proportion:
             # TODO this should be done in C also

From cd830f7aeadab92f1308fa554ee6aa79d713cf27 Mon Sep 17 00:00:00 2001
From: Andrew Kern <adkern@uoregon.edu>
Date: Thu, 26 Jun 2025 14:48:08 -0700
Subject: [PATCH 2/2] fix: use fixture for self comparison test

---
 python/tests/test_tree_stats.py | 43 +++++++++++++++++++++++++++++++--
 1 file changed, 41 insertions(+), 2 deletions(-)

diff --git a/python/tests/test_tree_stats.py b/python/tests/test_tree_stats.py
index 436db02131..37eb492119 100644
--- a/python/tests/test_tree_stats.py
+++ b/python/tests/test_tree_stats.py
@@ -2311,9 +2311,9 @@ def test_shapes(self, proportion):
 class TestBranchGeneticRelatedness(TestGeneticRelatedness, TopologyExamplesMixin):
     mode = "branch"
 
-    def test_single_sample_set_self_comparison(self):
+    def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):
         # Test for issue #3055 - self-comparisons with single sample set
-        ts = msprime.simulate(sample_size=10, Ne=10000, length=1000, random_seed=42)
+        ts = ts_12_highrecomb_fixture
         # Single sample set with self-comparison
         result = ts.genetic_relatedness([[0]], indexes=[(0, 0)], mode="branch")
         assert result.shape == (1,)
@@ -2321,6 +2321,19 @@ def test_single_sample_set_self_comparison(self):
         result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode="branch")
         assert result.shape == (1,)
 
+    def test_single_sample_set_invalid_indexes(self, ts_12_highrecomb_fixture):
+        # Test that invalid indexes raise ValueError with single sample set
+        ts = ts_12_highrecomb_fixture
+        # Index out of bounds (only have 1 sample set, but trying to access index 1)
+        with pytest.raises(ValueError, match="Index out of bounds"):
+            ts.genetic_relatedness([[0]], indexes=[(0, 1)], mode="branch")
+        # Negative index
+        with pytest.raises(ValueError, match="Index out of bounds"):
+            ts.genetic_relatedness([[0]], indexes=[(-1, 0)], mode="branch")
+        # Both indexes out of bounds
+        with pytest.raises(ValueError, match="Index out of bounds"):
+            ts.genetic_relatedness([[0, 1]], indexes=[(2, 2)], mode="branch")
+
     @pytest.mark.parametrize("polarised", [True, False])
     def test_simple_tree_noncentred(self, polarised):
         # 2.00┊   4   ┊
@@ -2385,6 +2398,19 @@ def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):
         result = ts.genetic_relatedness([[0, 1, 2]], indexes=[(0, 0)], mode="node")
         assert result.shape == (ts.num_nodes, 1)
 
+    def test_single_sample_set_invalid_indexes(self, ts_12_highrecomb_fixture):
+        # Test that invalid indexes raise ValueError with single sample set
+        ts = ts_12_highrecomb_fixture
+        # Index out of bounds (only have 1 sample set, but trying to access index 1)
+        with pytest.raises(ValueError, match="Index out of bounds"):
+            ts.genetic_relatedness([[0]], indexes=[(0, 1)], mode="node")
+        # Negative index
+        with pytest.raises(ValueError, match="Index out of bounds"):
+            ts.genetic_relatedness([[0]], indexes=[(-1, 0)], mode="node")
+        # Both indexes out of bounds
+        with pytest.raises(ValueError, match="Index out of bounds"):
+            ts.genetic_relatedness([[0, 1]], indexes=[(2, 2)], mode="node")
+
 
 class TestSiteGeneticRelatedness(TestGeneticRelatedness, MutatedTopologyExamplesMixin):
     mode = "site"
@@ -2404,6 +2430,19 @@ def test_single_sample_set_self_comparison(self, ts_12_highrecomb_fixture):
         )
         assert result.shape == (2,)
 
+    def test_single_sample_set_invalid_indexes(self, ts_12_highrecomb_fixture):
+        # Test that invalid indexes raise ValueError with single sample set
+        ts = ts_12_highrecomb_fixture
+        # Index out of bounds (only have 1 sample set, but trying to access index 1)
+        with pytest.raises(ValueError, match="Index out of bounds"):
+            ts.genetic_relatedness([[0]], indexes=[(0, 1)], mode="site")
+        # Negative index
+        with pytest.raises(ValueError, match="Index out of bounds"):
+            ts.genetic_relatedness([[0]], indexes=[(-1, 0)], mode="site")
+        # Both indexes out of bounds
+        with pytest.raises(ValueError, match="Index out of bounds"):
+            ts.genetic_relatedness([[0, 1]], indexes=[(2, 2)], mode="site")
+
     def test_match_K_c0(self):
         # This test checks that ts.genetic_relatedness() matches K_c0
         # from Speed & Balding (2014) https://www.nature.com/articles/nrg3821