Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions src/iranges/IRanges.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from biocutils import Names, combine_rows, combine_sequences, show_as_cell

from .interval import (
calc_gap_and_overlap,
calc_gap_and_overlap_position,
create_np_interval_vector,
)

Expand Down Expand Up @@ -1772,6 +1772,7 @@ def _generic_find_hits(
max_gap,
min_overlap,
select,
query_type="any",
delete_index=False,
):
self._build_ncls_index()
Expand All @@ -1788,10 +1789,14 @@ def _generic_find_hits(
if select != "all" and len(all_overlaps[_q_idx]) > 0:
continue

_gap, _overlap = calc_gap_and_overlap(
_gap, _overlap, _position = calc_gap_and_overlap_position(
(query._start[_q_idx], query._start[_q_idx] + query._width[_q_idx]),
(self._start[_s_idx], self._start[_s_idx] + self._width[_s_idx]),
)

if query_type != "any" and query_type != _position:
continue

_append = True

if _gap is not None and _gap > max_gap:
Expand Down Expand Up @@ -1877,7 +1882,14 @@ def find_overlaps(
_tgap = 0 if max_gap == -1 else max_gap

all_overlaps = self._generic_find_hits(
query, _tgap, _tgap, max_gap, min_overlap, select, delete_index=delete_index
query,
_tgap,
_tgap,
max_gap,
min_overlap,
select,
query_type=query_type,
delete_index=delete_index,
)
return all_overlaps

Expand Down Expand Up @@ -2189,7 +2201,7 @@ def distance(self, query: "IRanges") -> np.ndarray:
for i in range(len(self)):
i_self = self[i]
i_query = query[i]
_gap, _overlap = calc_gap_and_overlap(
_gap, _overlap, _position = calc_gap_and_overlap_position(
(i_self.start[0], i_self.end[0]), (i_query.start[0], i_query.end[0])
)

Expand Down
42 changes: 30 additions & 12 deletions src/iranges/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ def create_np_interval_vector(
return cov[1:], revmap


def calc_gap_and_overlap(
def calc_gap_and_overlap_position(
first: Tuple[int, int], second: Tuple[int, int]
) -> Tuple[Optional[int], Optional[int]]:
"""Calculate gap and/or overlap between two intervals.
) -> Tuple[Optional[int], Optional[int], Optional[str]]:
"""Calculate gap and/or overlap between two intervals, including overlap position.

Args:
first:
Expand All @@ -88,15 +88,33 @@ def calc_gap_and_overlap(
second:
Interval containing start and end positions.
`end` is non-inclusive.

Returns:
A tuple of (gap, overlap, overlap_position):
- gap: The gap between the intervals if non-overlapping, else None.
- overlap: The overlap size if overlapping, else None.
- overlap_position: Where the overlap occurs relative to the first interval.
Options are: 'start', 'end', 'within', or 'any' (if there's overlap but no specific case).
"""
if min(first[1], second[1]) > max(first[0], second[0]):
_overlap = min(first[1], second[1]) - max(first[0], second[0])
return (None, _overlap)
start_first, end_first = first
start_second, end_second = second

if end_first > start_second and end_second > start_first:
# Overlapping case
overlap = min(end_first, end_second) - max(start_first, start_second)

# Determine the overlap position
if start_second <= start_first and end_second >= end_first:
overlap_position = "within"
elif start_second < start_first:
overlap_position = "start"
elif end_second > end_first:
overlap_position = "end"
else:
overlap_position = "any"

_gap = None
if second[0] >= first[1]:
_gap = second[0] - first[1]
elif first[0] >= second[1]:
_gap = first[0] - second[1]
return None, overlap, overlap_position

return (_gap, None)
# Non-overlapping, calculate the gap
gap = max(start_first - end_second, start_second - end_first)
return gap, None, None