Skip to content
Open
167 changes: 77 additions & 90 deletions src/trim_primer_quality.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "trim_primer_quality.h"

#include <map>
#define round_int(x, total) \
((int)(0.5 + ((float)x / float(total)) * 10000)) / (float)100

Expand Down Expand Up @@ -388,90 +388,6 @@ std::vector<primer> insertionSort(std::vector<primer> primers, uint32_t n) {
return primers;
}

// For paired reads
void get_overlapping_primers(bam1_t *r, std::vector<primer> &primers,
std::vector<primer> &overlapped_primers) {
overlapped_primers.clear();
uint32_t start_pos = -1;
char strand = '+';
if (bam_is_rev(r)) {
start_pos = bam_endpos(r) - 1;
strand = '-';
} else {
start_pos = r->core.pos;
}

int low = 0;
int high = primers.size();
// then we iterate and push what fits
int loc_exact = binary_search(primers, start_pos, low,
high);
primer possible_match;
if(loc_exact >= low && loc_exact < high){
possible_match = primers[loc_exact];
if(start_pos >= possible_match.get_start() && start_pos <= possible_match.get_end() &&
(strand == possible_match.get_strand() || possible_match.get_strand() == 0)){
overlapped_primers.push_back(possible_match);
}
}
int loc = 0;
bool done_right = false;
bool done_left = false;
int i = 1;
while(!done_left && !done_right){
loc = loc_exact + i;
if(loc >= low && loc < high){
possible_match = primers[loc];

if(start_pos >= possible_match.get_start() && start_pos <= possible_match.get_end() &&
(strand == possible_match.get_strand() || possible_match.get_strand() == 0)){
overlapped_primers.push_back(possible_match);
}
if(start_pos < possible_match.get_start()){
done_right = true;
}
} else{
done_right = true;
}

loc = loc_exact - i;
if(loc >= low && loc < high){
possible_match = primers[loc];
if(start_pos >= possible_match.get_start() && start_pos <= possible_match.get_end() &&
(strand == possible_match.get_strand() || possible_match.get_strand() == 0)){
overlapped_primers.push_back(possible_match);
}
if(start_pos > possible_match.get_end()){
done_left = true;
}
} else{
done_left = true;
}
i++;
}
}

// For unpaired reads
void get_overlapping_primers(bam1_t *r, std::vector<primer> primers,
std::vector<primer> &overlapped_primers,
bool unpaired_rev) {
overlapped_primers.clear();
uint32_t start_pos = -1;
char strand = '+';
if (unpaired_rev) {
start_pos = bam_endpos(r) - 1;
strand = '-';
} else {
start_pos = r->core.pos;
}
for (std::vector<primer>::iterator it = primers.begin(); it != primers.end();
++it) {
if (start_pos >= it->get_start() && start_pos <= it->get_end() &&
(strand == it->get_strand() || it->get_strand() == 0))
overlapped_primers.push_back(*it);
}
}

void condense_cigar(cigar_ *t) {
uint32_t i = 0, len = 0, cig, next_cig;
while (i < t->nlength - 1) {
Expand Down Expand Up @@ -542,6 +458,36 @@ int iterate_aln(std::vector<bam1_t *>::iterator &aln_itr,
return iterate_reads;
}

std::vector<std::map<uint32_t, std::vector<primer>>> find_primer_per_position(std::vector<primer> primers){
//end pos of last primer
uint32_t last_pos = 0;
for (uint32_t i = 0; i < primers.size(); i++){
if (primers[i].get_end() > last_pos){
last_pos = primers[i].get_end();
}
}
std::map<uint32_t, std::vector<primer>> primer_map_forward;
std::map<uint32_t, std::vector<primer>> primer_map_reverse;
for(uint32_t j = 0; j < last_pos; j++){
for (uint32_t i = 0; i < primers.size(); i++){
uint32_t start = primers[i].get_start();
uint32_t end = primers[i].get_end();
char strand = primers[i].get_strand();
if(start <= j && j <= end){
if(strand == '+'){
primer_map_forward[j].push_back(primers[i]);
}else{
primer_map_reverse[j].push_back(primers[i]);
}
}
}
}
std::vector<std::map<uint32_t, std::vector<primer>>> hash;
hash.push_back(primer_map_forward);
hash.push_back(primer_map_reverse);
return hash;
}

int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
uint8_t min_qual, uint8_t sliding_window,
std::string cmd, bool write_no_primer_reads,
Expand All @@ -567,6 +513,11 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
amplicons.inOrder();
}

// calculate the primers that should cover each position
std::vector<std::map<uint32_t, std::vector<primer>>> hash = find_primer_per_position(primers);
std::map<uint32_t, std::vector<primer>> primer_map_forward = hash[0];
std::map<uint32_t, std::vector<primer>> primer_map_reverse = hash[1];

// Read in input file
samFile *in;

Expand Down Expand Up @@ -642,12 +593,31 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
std::vector<primer> sorted_primers = insertionSort(primers, primers.size());

std::vector<bam1_t *>::iterator aln_itr = alns.begin();
uint32_t start_pos = -1;
char strand = '+';

// Iterate through reads
while (iterate_aln(aln_itr, alns, header, in, aln) >= 0) {
unmapped_flag = false;
primer_trimmed = false;
get_overlapping_primers(aln, sorted_primers, overlapping_primers);
strand = '+';
if (bam_is_rev(aln)) {
start_pos = bam_endpos(aln) - 1;
strand = '-';
} else {
start_pos = aln->core.pos;
}
overlapping_primers.clear();
if(strand == '+'){
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers = primer_map_forward[start_pos];
}
}else{
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()){
overlapping_primers = primer_map_reverse[start_pos];
}
}

if ((aln->core.flag & BAM_FUNMAP) == 0) { // If mapped
// if primer pair info provided, check if read correctly overlaps with
// atleast one amplicon
Expand All @@ -672,7 +642,16 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
(abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq);
// if reverse strand
if ((aln->core.flag & BAM_FPAIRED) != 0 && isize_flag) { // If paired
get_overlapping_primers(aln, sorted_primers, overlapping_primers);
overlapping_primers.clear();
if(strand == '+'){
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers = primer_map_forward[start_pos];
}
}else{ //FIX THIS DUMMY
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()) {
overlapping_primers = primer_map_reverse[start_pos];
}
}
if (overlapping_primers.size() >
0) { // If read starts before overlapping regions (?)
primer_trimmed = true;
Expand Down Expand Up @@ -707,7 +686,11 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,

// handle the unpaired reads
// forward primer unpaired read
get_overlapping_primers(aln, primers, overlapping_primers, false);
start_pos = aln->core.pos;
overlapping_primers.clear();
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers = primer_map_forward[start_pos];
}
if (overlapping_primers.size() > 0) {
primer_trimmed = true;
cand_primer = get_max_end(overlapping_primers);
Expand All @@ -721,8 +704,12 @@ int trim_bam_qual_primer(std::string bam, std::string bed, std::string bam_out,
}

// reverse primer unpaired read
get_overlapping_primers(aln, primers, overlapping_primers, true);

overlapping_primers.clear();
start_pos = bam_endpos(aln) - 1;
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()) {
overlapping_primers = primer_map_reverse[start_pos];
}

if (overlapping_primers.size() > 0) {
primer_trimmed = true;
cand_primer = get_min_start(overlapping_primers);
Expand Down
8 changes: 2 additions & 6 deletions src/trim_primer_quality.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <stdint.h>
#include <string.h>

#include <map>
#include <cstring>
#include <fstream>
#include <iostream>
Expand Down Expand Up @@ -50,17 +50,13 @@ int32_t get_pos_on_reference(uint32_t *cigar, uint32_t ncigar, uint32_t pos,
void reverse_qual(uint8_t *q, int l);
void reverse_cigar(uint32_t *cigar, int l);
double mean_quality(uint8_t *a, int s, int e);
std::vector<std::map<uint32_t, std::vector<primer>>> find_primer_per_position(std::vector<primer> primers);
cigar_ quality_trim(bam1_t *r, uint8_t qual_threshold, uint8_t sliding_window);
void print_cigar(uint32_t *cigar, int nlength);
cigar_ primer_trim(bam1_t *r, bool &isize_flag, int32_t new_pos,
bool unpaired_rev);
void replace_cigar(bam1_t *b, uint32_t n, uint32_t *cigar);
void condense_cigar(cigar_ *t);
void get_overlapping_primers(bam1_t *r, std::vector<primer> &primers,
std::vector<primer> &overlapping_primers);
void get_overlapping_primers(bam1_t *r, std::vector<primer> primers,
std::vector<primer> &overlapping_primers,
bool unpaired_rev);
int get_bigger_primer(std::vector<primer> primers);
bool amplicon_filter(IntervalTree amplicons, bam1_t *r);
std::vector<primer> insertionSort(std::vector<primer> primers, uint32_t n);
Expand Down
25 changes: 24 additions & 1 deletion tests/test_primer_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,30 @@ int main() {
isize_flag =
(abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq);
std::cout << bam_get_qname(aln) << std::endl;
get_overlapping_primers(aln, sorted_primers, overlapping_primers);
std::vector<std::map<uint32_t, std::vector<primer>>> hash = find_primer_per_position(primers);
std::map<uint32_t, std::vector<primer>> primer_map_forward = hash[0];
std::map<uint32_t, std::vector<primer>> primer_map_reverse = hash[1];

uint32_t start_pos = -1;
char strand = '+';
if (bam_is_rev(aln)) {
start_pos = bam_endpos(aln) - 1;
strand = '-';
} else {
start_pos = aln->core.pos;
}
overlapping_primers.clear();
if(strand == '+'){
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers = primer_map_forward[start_pos];
}
}else{
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()){
overlapping_primers = primer_map_reverse[start_pos];
}
}
//get_overlapping_primers(aln, sorted_primers, overlapping_primers);

if (overlapping_primers.size() != overlapping_primer_sizes[ctr]) {
success = -1;
std::cout << "Overlapping primer sizes for " << bam_get_qname(aln)
Expand Down
23 changes: 20 additions & 3 deletions tests/test_primer_trim_edge_cases.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ int main() {
}
}
}
// calculate the primers that should cover each position
std::vector<std::map<uint32_t, std::vector<primer>>> hash = find_primer_per_position(primers);
std::map<uint32_t, std::vector<primer>> primer_map_forward = hash[0];
std::map<uint32_t, std::vector<primer>> primer_map_reverse = hash[1];
bam_hdr_t *header = sam_hdr_read(in);
region_.assign(header->target_name[0]);
std::string temp(header->text);
Expand All @@ -39,16 +43,23 @@ int main() {
std::vector<primer> overlapping_primers;
primer cand_primer;
bool isize_flag = false;
uint32_t start_pos = -1;
while (sam_itr_next(in, iter, aln) >= 0) {
if ((aln->core.flag & BAM_FUNMAP) != 0) {
continue;
}

start_pos = aln->core.pos;
isize_flag =
(abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq);
len = bam_cigar2qlen(aln->core.n_cigar, bam_get_cigar(aln));
std::cout << bam_get_qname(aln) << std::endl;
print_cigar(bam_get_cigar(aln), aln->core.n_cigar);
get_overlapping_primers(aln, primers, overlapping_primers, false);
overlapping_primers.clear();
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers = primer_map_forward[start_pos];
}
//get_overlapping_primers(aln, primers, overlapping_primers, false);
if (overlapping_primers.size() > 0) {
// Forward trim
cand_primer = get_max_end(overlapping_primers);
Expand All @@ -61,7 +72,13 @@ int main() {
cigar = bam_get_cigar(aln);
print_cigar(cigar, aln->core.n_cigar);
// Reverse trim
get_overlapping_primers(aln, primers, overlapping_primers, true);
start_pos = bam_endpos(aln) - 1;
overlapping_primers.clear();
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()){
overlapping_primers = primer_map_reverse[start_pos];
}
;
//get_overlapping_primers(aln, primers, overlapping_primers, true);
if (overlapping_primers.size() > 0) {
cand_primer = get_min_start(overlapping_primers);
t = primer_trim(aln, isize_flag, cand_primer.get_start() - 1, true);
Expand All @@ -82,7 +99,7 @@ int main() {
success = -1;
std::cout << "Cigar length and read length don't match after trimming"
<< std::endl;
std::cout << "Expected" << len << ". Got "
std::cout << "Expected " << len << ". Got "
<< bam_cigar2qlen(aln->core.n_cigar, bam_get_cigar(aln))
<< std::endl;
}
Expand Down
22 changes: 20 additions & 2 deletions tests/test_unpaired_trim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ int main() {
std::vector<primer> overlapping_primers;
primer cand_primer;
bool isize_flag = false;
uint32_t start_pos = -1;
// calculate the primers that should cover each position
std::vector<std::map<uint32_t, std::vector<primer>>> hash = find_primer_per_position(primers);
std::map<uint32_t, std::vector<primer>> primer_map_forward = hash[0];
std::map<uint32_t, std::vector<primer>> primer_map_reverse = hash[1];

while (sam_itr_next(in, iter, aln) >= 0) {
if ((aln->core.flag & BAM_FUNMAP) != 0) {
continue;
Expand All @@ -85,10 +91,17 @@ int main() {
(abs(aln->core.isize) - max_primer_len) > abs(aln->core.l_qseq);
std::cout << bam_get_qname(aln) << std::endl;
std::cout << std::endl << "Forward" << std::endl;
get_overlapping_primers(aln, primers, overlapping_primers, false);
overlapping_primers.clear();
start_pos = aln->core.pos;
if (primer_map_forward.find(start_pos) != primer_map_forward.end()) {
overlapping_primers.clear();
overlapping_primers = primer_map_forward[start_pos];
}
start_pos = bam_endpos(aln) - 1;
// Forward primer
if (overlapping_primers.size() > 0) {
cand_primer = get_max_end(overlapping_primers);
std::cout << cand_primer.get_start() << " " << cand_primer.get_end() << std::endl;
t = primer_trim(aln, isize_flag, cand_primer.get_end() + 1, false);
aln->core.pos += t.start_pos;
replace_cigar(aln, t.nlength, t.cigar);
Expand All @@ -108,7 +121,12 @@ int main() {
}
// Reverse primer
std::cout << std::endl << "Reverse" << std::endl;
get_overlapping_primers(aln, primers, overlapping_primers, true);
overlapping_primers.clear();
if (primer_map_reverse.find(start_pos) != primer_map_reverse.end()){
overlapping_primers = primer_map_reverse[start_pos];
}

//get_overlapping_primers(aln, primers, overlapping_primers, true);
if (overlapping_primers.size() > 0) {
cand_primer = get_min_start(overlapping_primers);
free_cigar(t);
Expand Down