Skip to content

Commit 66f9499

Browse files
author
schellt
committed
Version 0.4
1 parent 89fb9af commit 66f9499

File tree

2 files changed

+84
-16
lines changed

2 files changed

+84
-16
lines changed

README.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# backmap.pl v0.3
1+
# backmap.pl v0.4
22

33
## Description
44
__Automatic read mapping and genome size estimation from coverage.__
@@ -31,14 +31,15 @@ Optional:
3131

3232
```
3333
backmap.pl [-a <assembly.fa> {-p <paired_1.fq>,<paired_2.fq> | -u <unpaired.fq>} |
34-
-pb <pacbio.fq> | -ont <ont.fq> } | -b <mapping.bam>]
34+
-pb <clr.fq> | -hifi <hifi.fq> | -ont <ont.fq> } | -b <mapping.bam>]
3535
3636
Mandatory:
3737
-a STR Assembly were reads should mapped to in fasta format
3838
AND AT LEAST ONE OF
3939
-p STR Two files with paired Illumina reads comma sperated
4040
-u STR Fastq file with unpaired Illumina reads
41-
-pb STR Fasta or fastq file with PacBio reads
41+
-pb STR Fasta or fastq file with PacBio CLR reads
42+
-hifi STR Fasta or fastq file with PacBio HiFi reads
4243
-ont STR Fasta or fastq file with Nanopore reads
4344
OR
4445
-b STR Bam file to calculate coverage from
@@ -63,7 +64,8 @@ Options: [default]
6364
-ne Do not estimate genome size [off]
6465
-kt Keep temporary bam files [off]
6566
-bo STR Options passed to bwa [-a -c 10000]
66-
-mo STR Options passed to minimap [PacBio: -H -x map-pb; ONT: -x map-ont]
67+
-mo STR Options passed to minimap [CLR: -H -x map-pb; HiFi: minimap<=2.18
68+
-x asm20 minimap>2.18 -x map-hifi; ONT: -x map-ont]
6769
-qo STR Options passed to qualimap [none]
6870
Pass options with quotes e.g. -bo "<options>"
6971
-v Print executed commands to STDERR [off]
@@ -91,4 +93,4 @@ Ewels P, Magnusson M, Lundin S, Käller M (2016). MultiQC: summarize analysis re
9193
- bedtools:
9294
Quinlan AR, Hall IM (2010). BEDTools: a flexible suite of utilities for comparing genomic features. _Bioinformatics_, 26(6):841–842, <https://doi.org/10.1093/bioinformatics/btq033>
9395
- Rscript:
94-
R Core Team (2019). R: A Language and Environment for Statistical Computing. <http://www.R-project.org/>
96+
R Core Team (2021). R: A Language and Environment for Statistical Computing. <http://www.R-project.org/>

backmap.pl

Lines changed: 77 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
use Number::FormatEng qw(:all);
88
use Parallel::Loops;
99

10-
my $version = "0.3";
10+
my $version = "0.4";
1111

1212
sub print_help{
1313
print STDOUT "\n";
@@ -18,14 +18,15 @@ sub print_help{
1818
print STDOUT "\n";
1919
print STDOUT "Usage:\n";
2020
print STDOUT "\tbackmap.pl [-a <assembly.fa> {-p <paired_1.fq>,<paired_2.fq> | -u <unpaired.fq> |\n";
21-
print STDOUT "\t -pb <pacbio.fq> | -ont <ont.fq> } | -b <mapping.bam>]\n";
21+
print STDOUT "\t -pb <clr.fq> | -hifi <hifi.fq> | -ont <ont.fq> } | -b <mapping.bam>]\n";
2222
print STDOUT "\n";
2323
print STDOUT "Mandatory:\n";
2424
print STDOUT "\t-a STR\t\tAssembly were reads should mapped to in fasta format\n";
2525
print STDOUT "\tAND AT LEAST ONE OF\n";
2626
print STDOUT "\t-p STR\t\tTwo fastq files with paired Illumina reads comma sperated\n";
2727
print STDOUT "\t-u STR\t\tFastq file with unpaired Illumina reads\n";
28-
print STDOUT "\t-pb STR\t\tFasta or fastq file with PacBio reads\n";
28+
print STDOUT "\t-pb STR\t\tFasta or fastq file with PacBio CLR reads\n";
29+
print STDOUT "\t-hifi STR\tFasta or fastq file with PacBio HiFi reads\n";
2930
print STDOUT "\t-ont STR\tFasta or fastq file with Nanopore reads\n";
3031
print STDOUT "\tOR\n";
3132
print STDOUT "\t-b STR\t\tBam file to calculate coverage from\n";
@@ -47,7 +48,7 @@ sub print_help{
4748
print STDOUT "\t-ne\t\tDo not estimate genome size [off]\n";
4849
print STDOUT "\t-kt\t\tKeep temporary bam files [off]\n";
4950
print STDOUT "\t-bo STR\t\tOptions passed to bwa [-a -c 10000]\n";
50-
print STDOUT "\t-mo STR\t\tOptions passed to minimap [PacBio: -H -x map-pb; ONT: -x map-ont]\n";
51+
print STDOUT "\t-mo STR\t\tOptions passed to minimap [CLR: -H -x map-pb; HiFi: minimap<=2.18\n\t\t\t-x asm20 minimap>2.18 -x map-hifi; ONT: -x map-ont]\n";
5152
print STDOUT "\t-qo STR\t\tOptions passed to qualimap [none]\n";
5253
print STDOUT "\tPass options with quotes e.g. -bo \"<options>\"\n";
5354
print STDOUT "\t-v\t\tPrint executed commands to STDERR [off]\n";
@@ -84,6 +85,7 @@ sub round_format_pref{
8485
my @paired = ();
8586
my @unpaired = ();
8687
my @pb = ();
88+
my @hifi = ();
8789
my @ont = ();
8890
my $threads = 1;
8991
my $prefix = "";
@@ -130,6 +132,9 @@ sub round_format_pref{
130132
if ($ARGV[$i] eq "-pb"){
131133
push(@pb,$ARGV[$i+1]);
132134
}
135+
if ($ARGV[$i] eq "-hifi"){
136+
push(@hifi,$ARGV[$i+1]);
137+
}
133138
if ($ARGV[$i] eq "-ont"){
134139
push(@ont,$ARGV[$i+1]);
135140
}
@@ -250,7 +255,7 @@ sub round_format_pref{
250255
print STDERR "ERROR\tFile $assembly_path does not exist!\n";
251256
$input_error = 1;
252257
}
253-
if(scalar(@paired) == 0 and scalar(@unpaired) == 0 and scalar(@pb) == 0 and scalar(@ont) == 0){
258+
if(scalar(@paired) == 0 and scalar(@unpaired) == 0 and scalar(@pb) == 0 and scalar(@hifi) == 0 and scalar(@ont) == 0){
254259
print STDERR "ERROR\tNo reads specified!\n";
255260
$input_error = 1;
256261
}
@@ -363,6 +368,24 @@ sub round_format_pref{
363368
}
364369
}
365370

371+
my %hifi_filter;
372+
373+
if($assembly_path ne ""){
374+
foreach(@hifi){
375+
if(not -f "$_"){
376+
print STDERR "INFO\tNo file $_ - skipping this file\n";
377+
}
378+
else{
379+
if(exists($hifi_filter{abs_path($_)})){
380+
print STDERR "INFO\tFile " . abs_path($_) . " already specified\n";
381+
}
382+
else{
383+
$hifi_filter{abs_path($_)} = 1;
384+
}
385+
}
386+
}
387+
}
388+
366389
my %ont_filter;
367390

368391
if($assembly_path ne ""){
@@ -400,7 +423,7 @@ sub round_format_pref{
400423
}
401424

402425
if($assembly_path ne ""){
403-
if(scalar(keys(%paired_filter)) == 0 and scalar(keys(%unpaired_filter)) == 0 and scalar(keys(%pb_filter)) == 0 and scalar(keys(%ont_filter)) == 0){
426+
if(scalar(keys(%paired_filter)) == 0 and scalar(keys(%unpaired_filter)) == 0 and scalar(keys(%pb_filter)) == 0 and scalar(keys(%hifi_filter)) == 0 and scalar(keys(%ont_filter)) == 0){
404427
print STDERR "ERROR\tNo existing read files specified!\n";
405428
exit 1;
406429
}
@@ -436,12 +459,16 @@ sub round_format_pref{
436459
}
437460

438461
my $minimap_version;
462+
my $minimap_minor_version;
439463
if(not defined(can_run("minimap2"))){
440464
$minimap_version = "not detected";
441465
}
442466
else{
443467
$minimap_version = `minimap2 --version`;
444468
chomp $minimap_version;
469+
$minimap_minor_version = $minimap_version;
470+
$minimap_minor_version =~ s/-.*//;
471+
$minimap_minor_version =~ s/^.*\.//;
445472
}
446473

447474
my $samtools_version = `samtools --version | head -1 | sed 's/^samtools //'`;
@@ -592,6 +619,20 @@ sub round_format_pref{
592619
push(@pb_bam,"$out_dir/$prefix.pb$pb_counter.bam");
593620
}
594621

622+
my $hifi_counter = 0;
623+
my @hifi_bam = ();
624+
foreach(keys(%hifi_filter)){
625+
$hifi_counter++;
626+
if($minimap_minor_version <= 18){
627+
$cmd = "minimap2 $minimap_opts-x asm20 -a -t $threads $assembly_path $_ 2> $out_dir/$prefix\_minimap_hifi$hifi_counter.err | samtools view -1 -b - > $out_dir/$prefix.hifi$hifi_counter.bam";
628+
}
629+
else{
630+
$cmd = "minimap2 $minimap_opts-x map-hifi -a -t $threads $assembly_path $_ 2> $out_dir/$prefix\_minimap_hifi$hifi_counter.err | samtools view -1 -b - > $out_dir/$prefix.hifi$hifi_counter.bam";
631+
}
632+
exe_cmd($cmd,$verbose,$dry);
633+
push(@hifi_bam,"$out_dir/$prefix.hifi$hifi_counter.bam");
634+
}
635+
595636
my $ont_counter = 0;
596637
my @ont_bam = ();
597638
foreach(keys(%ont_filter)){
@@ -608,6 +649,7 @@ sub round_format_pref{
608649
my $paired_bam_files = join(" ",@paired_bam);
609650
my $unpaired_bam_files = join(" ",@unpaired_bam);
610651
my $pb_bam_files = join(" ",@pb_bam);
652+
my $hifi_bam_files = join(" ",@hifi_bam);
611653
my $ont_bam_files = join(" ",@ont_bam);
612654

613655
if($ill_bam_count > 0){
@@ -639,6 +681,20 @@ sub round_format_pref{
639681
push(@merged_bam_file, "$out_dir/$prefix.pb.bam");
640682
}
641683
}
684+
685+
if(scalar(@hifi_bam) > 0){
686+
if(scalar(@hifi_bam) == 1){
687+
my $single_bam = $hifi_bam[0];
688+
$cmd = "ln -fs $single_bam $out_dir/$prefix.hifi.bam";
689+
exe_cmd($cmd,$verbose,$dry);
690+
push(@merged_bam_file, "$out_dir/$prefix.hifi.bam");
691+
}
692+
else{
693+
$cmd = "samtools merge -@ $samtools_threads $out_dir/$prefix.hifi.bam $hifi_bam_files";
694+
exe_cmd($cmd,$verbose,$dry);
695+
push(@merged_bam_file, "$out_dir/$prefix.hifi.bam");
696+
}
697+
}
642698

643699
if(scalar(@ont_bam) > 0){
644700
if(scalar(@ont_bam) == 1){
@@ -676,7 +732,7 @@ sub round_format_pref{
676732
}
677733

678734
if($keep_tmp == 0){
679-
my $tmp_bams = join(" ",@paired_bam,@unpaired_bam,@pb_bam,@ont_bam,@merged_bam_file);
735+
my $tmp_bams = join(" ",@paired_bam,@unpaired_bam,@pb_bam,@hifi_bam,@ont_bam,@merged_bam_file);
680736
$cmd = "rm $tmp_bams";
681737
exe_cmd($cmd,$verbose,$dry);
682738
}
@@ -720,7 +776,10 @@ sub round_format_pref{
720776

721777
my $tech = "Illumina";
722778
if($_ =~ m/\.pb\.sort\.bam$/){
723-
$tech = "PacBio";
779+
$tech = "CLR";
780+
}
781+
if($_ =~ m/\.hifi\.sort\.bam$/){
782+
$tech = "HiFi";
724783
}
725784
if($_ =~ m/\.ont\.sort\.bam$/){
726785
$tech = "Nanopore";
@@ -780,7 +839,7 @@ sub round_format_pref{
780839
$rscript = "$out_dir/$prefix.plot.all.r";
781840
}
782841
if($dry == 0){
783-
my @techs = ("Illumina","PacBio","Nanopore");
842+
my @techs = ("Illumina","CLR","HiFi","Nanopore");
784843

785844
open(RALL,'>',"$rscript") or die "ERROR\tCould not open file $rscript\n";
786845

@@ -811,6 +870,10 @@ sub round_format_pref{
811870
push(@col,"\"blue\"");
812871
}
813872
if($i == 2 and exists($cov_files{$techs[$i]})){
873+
print RALL "lines($techs[$i]\[,1],$techs[$i]\[,2],type=\"l\",col=\"darkgreen\")\n";
874+
push(@col,"\"darkgreen\"");
875+
}
876+
if($i == 3 and exists($cov_files{$techs[$i]})){
814877
print RALL "lines($techs[$i]\[,1],$techs[$i]\[,2],type=\"l\",col=\"red\")\n";
815878
push(@col,"\"red\"");
816879
}
@@ -852,7 +915,10 @@ sub round_format_pref{
852915

853916
my $tech = "Illumina";
854917
if($_ =~ m/\.pb\.sort\.bam$/){
855-
$tech = "PacBio";
918+
$tech = "CLR";
919+
}
920+
if($_ =~ m/\.hifi\.sort\.bam$/){
921+
$tech = "HiFi";
856922
}
857923
if($_ =~ m/\.ont\.sort\.bam$/){
858924
$tech = "Nanopore";
@@ -891,7 +957,7 @@ sub round_format_pref{
891957
print "Output\n";
892958
print "======\n";
893959

894-
my @techs = ("Illumina","PacBio","Nanopore");
960+
my @techs = ("Illumina","CLR","HiFi","Nanopore");
895961
for (my $i = 0; $i < scalar(@techs); $i++){
896962
if(exists($results{$techs[$i]})){
897963
print $results{$techs[$i]};

0 commit comments

Comments
 (0)