Skip to content

Commit f1bb470

Browse files
authored
feat: Add bam::Record::set_cigar (#477)
* typo * Add new set_cigar method * Add test and fix for set_cigar
1 parent 1c22ac5 commit f1bb470

File tree

3 files changed

+118
-1
lines changed

3 files changed

+118
-1
lines changed

src/bam/mod.rs

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1933,6 +1933,67 @@ CCCCCCCCCCCCCCCCCCC"[..],
19331933
assert_eq!(rec.qname(), b"r0");
19341934
}
19351935

1936+
#[test]
1937+
fn test_set_cigar() {
1938+
let (names, _, seqs, quals, cigars) = gold();
1939+
1940+
assert!(names[0] != names[1]);
1941+
1942+
for i in 0..names.len() {
1943+
let mut rec = record::Record::new();
1944+
rec.set(names[i], Some(&cigars[i]), seqs[i], quals[i]);
1945+
rec.push_aux(b"NM", Aux::I32(15)).unwrap();
1946+
1947+
assert_eq!(rec.qname(), names[i]);
1948+
assert_eq!(*rec.cigar(), cigars[i]);
1949+
assert_eq!(rec.seq().as_bytes(), seqs[i]);
1950+
assert_eq!(rec.qual(), quals[i]);
1951+
assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
1952+
1953+
// boring cigar
1954+
let new_cigar = CigarString(vec![Cigar::Match(rec.seq_len() as u32)]);
1955+
assert_ne!(*rec.cigar(), new_cigar);
1956+
rec.set_cigar(Some(&new_cigar));
1957+
assert_eq!(*rec.cigar(), new_cigar);
1958+
1959+
assert_eq!(rec.qname(), names[i]);
1960+
assert_eq!(rec.seq().as_bytes(), seqs[i]);
1961+
assert_eq!(rec.qual(), quals[i]);
1962+
assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
1963+
1964+
// bizarre cigar
1965+
let new_cigar = (0..rec.seq_len())
1966+
.map(|i| {
1967+
if i % 2 == 0 {
1968+
Cigar::Match(1)
1969+
} else {
1970+
Cigar::Ins(1)
1971+
}
1972+
})
1973+
.collect::<Vec<_>>();
1974+
let new_cigar = CigarString(new_cigar);
1975+
assert_ne!(*rec.cigar(), new_cigar);
1976+
rec.set_cigar(Some(&new_cigar));
1977+
assert_eq!(*rec.cigar(), new_cigar);
1978+
1979+
assert_eq!(rec.qname(), names[i]);
1980+
assert_eq!(rec.seq().as_bytes(), seqs[i]);
1981+
assert_eq!(rec.qual(), quals[i]);
1982+
assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
1983+
1984+
// empty cigar
1985+
let new_cigar = CigarString(Vec::new());
1986+
assert_ne!(*rec.cigar(), new_cigar);
1987+
rec.set_cigar(None);
1988+
assert_eq!(*rec.cigar(), new_cigar);
1989+
1990+
assert_eq!(rec.qname(), names[i]);
1991+
assert_eq!(rec.seq().as_bytes(), seqs[i]);
1992+
assert_eq!(rec.qual(), quals[i]);
1993+
assert_eq!(rec.aux(b"NM").unwrap(), Aux::I32(15));
1994+
}
1995+
}
1996+
19361997
#[test]
19371998
fn test_remove_aux() {
19381999
let mut bam = Reader::from_path(Path::new("test/test.bam")).expect("Error opening file.");

src/bam/record.rs

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,62 @@ impl Record {
473473
self.inner_mut().core.l_extranul = extranul as u8;
474474
}
475475

476+
/// Replace current cigar with a new one.
477+
pub fn set_cigar(&mut self, new_cigar: Option<&CigarString>) {
478+
self.cigar = None;
479+
480+
let qname_data_len = self.qname_capacity();
481+
let old_cigar_data_len = self.cigar_len() * 4;
482+
483+
// Length of data after cigar
484+
let other_data_len = self.inner_mut().l_data - (qname_data_len + old_cigar_data_len) as i32;
485+
486+
let new_cigar_len = match new_cigar {
487+
Some(x) => x.len(),
488+
None => 0,
489+
};
490+
let new_cigar_data_len = new_cigar_len * 4;
491+
492+
if new_cigar_data_len < old_cigar_data_len {
493+
self.inner_mut().l_data -= (old_cigar_data_len - new_cigar_data_len) as i32;
494+
} else if new_cigar_data_len > old_cigar_data_len {
495+
self.inner_mut().l_data += (new_cigar_data_len - old_cigar_data_len) as i32;
496+
497+
// Reallocate if necessary
498+
if (self.inner().m_data as i32) < self.inner().l_data {
499+
// Verbosity due to lexical borrowing
500+
let l_data = self.inner().l_data;
501+
self.realloc_var_data(l_data as usize);
502+
}
503+
}
504+
505+
if new_cigar_data_len != old_cigar_data_len {
506+
// Move other data to new location
507+
unsafe {
508+
::libc::memmove(
509+
self.inner.data.add(qname_data_len + new_cigar_data_len) as *mut ::libc::c_void,
510+
self.inner.data.add(qname_data_len + old_cigar_data_len) as *mut ::libc::c_void,
511+
other_data_len as usize,
512+
);
513+
}
514+
}
515+
516+
// Copy cigar data
517+
if let Some(cigar_string) = new_cigar {
518+
let cigar_data = unsafe {
519+
#[allow(clippy::cast_ptr_alignment)]
520+
slice::from_raw_parts_mut(
521+
self.inner.data.add(qname_data_len) as *mut u32,
522+
cigar_string.len(),
523+
)
524+
};
525+
for (i, c) in cigar_string.iter().enumerate() {
526+
cigar_data[i] = c.encode();
527+
}
528+
}
529+
self.inner_mut().core.n_cigar = new_cigar_len as u32;
530+
}
531+
476532
fn realloc_var_data(&mut self, new_len: usize) {
477533
// pad request
478534
let new_len = new_len as u32;

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
//! ```
2121
//!
2222
//! We can reproduce that with Rust-Htslib. Reading BAM files and printing the header
23-
//! to the the screen is as easy as
23+
//! to the screen is as easy as
2424
//!
2525
//! ```
2626
//! use rust_htslib::{bam, bam::Read};

0 commit comments

Comments
 (0)