Skip to content

Commit 0f30746

Browse files
committed
Correct writing CRAM alignment span field for unmapped data.
The specification states unmapped data must have alignment start and alignment span both zero. Previously span was 1 (from end-start+1). We now adhere to the specification for 3.1 onwards, but it is left as-is (incorrect) for 3.0. See previous commit for implications on reading. Outputting spec compliant CRAM files would make older builds of htslib/samtools fail to return unmapped data from a CRAM index query. This is not helpful, so in this case we feel the specification is best amended to permit other values in the alignment span field (albeit to keep the existing values as recommendations).
1 parent 5dc29e9 commit 0f30746

File tree

3 files changed

+29
-7
lines changed

3 files changed

+29
-7
lines changed

cram/cram_encode.c

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,6 +1397,9 @@ static int add_read_names(cram_fd *fd, cram_container *c, cram_slice *s,
13971397
return -1;
13981398
}
13991399

1400+
// CRAM version >= 3.1
1401+
#define CRAM_ge31(v) ((v) >= 0x301)
1402+
14001403
/*
14011404
* Encodes all slices in a container into blocks.
14021405
* Returns 0 on success
@@ -1526,6 +1529,12 @@ int cram_encode_container(cram_fd *fd, cram_container *c) {
15261529
s->hdr->ref_seq_id = -2;
15271530
s->hdr->ref_seq_start = 0;
15281531
s->hdr->ref_seq_span = 0;
1532+
} else if (c->ref_id == -1 && CRAM_ge31(fd->version)) {
1533+
// Spec states span=0, but it broke our range queries.
1534+
// See commit message for this and prior.
1535+
s->hdr->ref_seq_id = -1;
1536+
s->hdr->ref_seq_start = 0;
1537+
s->hdr->ref_seq_span = 0;
15291538
} else {
15301539
s->hdr->ref_seq_id = c->ref_id;
15311540
s->hdr->ref_seq_start = first_base;
@@ -1923,8 +1932,15 @@ int cram_encode_container(cram_fd *fd, cram_container *c) {
19231932
}
19241933

19251934
c->ref_seq_id = c->slices[0]->hdr->ref_seq_id;
1926-
c->ref_seq_start = c->slices[0]->hdr->ref_seq_start;
1927-
c->ref_seq_span = c->slices[0]->hdr->ref_seq_span;
1935+
if (c->ref_seq_id == -1 && CRAM_ge31(fd->version)) {
1936+
// Spec states span=0, but it broke our range queries.
1937+
// See commit message for this and prior.
1938+
c->ref_seq_start = 0;
1939+
c->ref_seq_span = 0;
1940+
} else {
1941+
c->ref_seq_start = c->slices[0]->hdr->ref_seq_start;
1942+
c->ref_seq_span = c->slices[0]->hdr->ref_seq_span;
1943+
}
19281944
for (i = 0; i < c->curr_slice; i++) {
19291945
cram_slice *s = c->slices[i];
19301946

@@ -2592,12 +2608,18 @@ static char *cram_encode_aux(cram_fd *fd, bam_seq_t *b, cram_container *c,
25922608
*
25932609
* See cram_next_container() and cram_close().
25942610
*/
2595-
void cram_update_curr_slice(cram_container *c) {
2611+
void cram_update_curr_slice(cram_container *c, int version) {
25962612
cram_slice *s = c->slice;
25972613
if (c->multi_seq) {
25982614
s->hdr->ref_seq_id = -2;
25992615
s->hdr->ref_seq_start = 0;
26002616
s->hdr->ref_seq_span = 0;
2617+
} else if (c->curr_ref == -1 && CRAM_ge31(version)) {
2618+
// Spec states span=0, but it broke our range queries.
2619+
// See commit message for this and prior.
2620+
s->hdr->ref_seq_id = -1;
2621+
s->hdr->ref_seq_start = 0;
2622+
s->hdr->ref_seq_span = 0;
26012623
} else {
26022624
s->hdr->ref_seq_id = c->curr_ref;
26032625
s->hdr->ref_seq_start = c->first_base;
@@ -2632,7 +2654,7 @@ static cram_container *cram_next_container(cram_fd *fd, bam_seq_t *b) {
26322654
c->curr_ref = bam_ref(b);
26332655

26342656
if (c->slice)
2635-
cram_update_curr_slice(c);
2657+
cram_update_curr_slice(c, fd->version);
26362658

26372659
/* Flush container */
26382660
if (c->curr_slice == c->max_slice ||

cram/cram_encode.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ int cram_encode_container(cram_fd *fd, cram_container *c);
106106
*
107107
* See cram_next_container() and cram_close().
108108
*/
109-
void cram_update_curr_slice(cram_container *c);
109+
void cram_update_curr_slice(cram_container *c, int version);
110110

111111
#ifdef __cplusplus
112112
}

cram/cram_io.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5342,7 +5342,7 @@ int cram_flush(cram_fd *fd) {
53425342

53435343
if (fd->mode == 'w' && fd->ctr) {
53445344
if(fd->ctr->slice)
5345-
cram_update_curr_slice(fd->ctr);
5345+
cram_update_curr_slice(fd->ctr, fd->version);
53465346

53475347
if (-1 == cram_flush_container_mt(fd, fd->ctr))
53485348
return -1;
@@ -5449,7 +5449,7 @@ int cram_close(cram_fd *fd) {
54495449

54505450
if (fd->mode == 'w' && fd->ctr) {
54515451
if(fd->ctr->slice)
5452-
cram_update_curr_slice(fd->ctr);
5452+
cram_update_curr_slice(fd->ctr, fd->version);
54535453

54545454
if (-1 == cram_flush_container_mt(fd, fd->ctr))
54555455
return -1;

0 commit comments

Comments
 (0)