7
7
use Number::FormatEng qw( :all) ;
8
8
use Parallel::Loops;
9
9
10
- my $version = " 0.3 " ;
10
+ my $version = " 0.4 " ;
11
11
12
12
sub print_help{
13
13
print STDOUT " \n " ;
@@ -18,14 +18,15 @@ sub print_help{
18
18
print STDOUT " \n " ;
19
19
print STDOUT " Usage:\n " ;
20
20
print STDOUT " \t backmap.pl [-a <assembly.fa> {-p <paired_1.fq>,<paired_2.fq> | -u <unpaired.fq> |\n " ;
21
- print STDOUT " \t -pb <pacbio .fq> | -ont <ont.fq> } | -b <mapping.bam>]\n " ;
21
+ print STDOUT " \t -pb <clr.fq> | -hifi <hifi .fq> | -ont <ont.fq> } | -b <mapping.bam>]\n " ;
22
22
print STDOUT " \n " ;
23
23
print STDOUT " Mandatory:\n " ;
24
24
print STDOUT " \t -a STR\t\t Assembly were reads should mapped to in fasta format\n " ;
25
25
print STDOUT " \t AND AT LEAST ONE OF\n " ;
26
26
print STDOUT " \t -p STR\t\t Two fastq files with paired Illumina reads comma sperated\n " ;
27
27
print STDOUT " \t -u STR\t\t Fastq file with unpaired Illumina reads\n " ;
28
- print STDOUT " \t -pb STR\t\t Fasta or fastq file with PacBio reads\n " ;
28
+ print STDOUT " \t -pb STR\t\t Fasta or fastq file with PacBio CLR reads\n " ;
29
+ print STDOUT " \t -hifi STR\t Fasta or fastq file with PacBio HiFi reads\n " ;
29
30
print STDOUT " \t -ont STR\t Fasta or fastq file with Nanopore reads\n " ;
30
31
print STDOUT " \t OR\n " ;
31
32
print STDOUT " \t -b STR\t\t Bam file to calculate coverage from\n " ;
@@ -47,7 +48,7 @@ sub print_help{
47
48
print STDOUT " \t -ne\t\t Do not estimate genome size [off]\n " ;
48
49
print STDOUT " \t -kt\t\t Keep temporary bam files [off]\n " ;
49
50
print STDOUT " \t -bo STR\t\t Options passed to bwa [-a -c 10000]\n " ;
50
- print STDOUT " \t -mo STR\t\t Options passed to minimap [PacBio : -H -x map-pb; ONT: -x map-ont]\n " ;
51
+ print STDOUT " \t -mo STR\t\t Options passed to minimap [CLR : -H -x map-pb; HiFi: minimap<=2.18 \n\t\t\t -x asm20 minimap>2.18 -x map-hifi ; ONT: -x map-ont]\n " ;
51
52
print STDOUT " \t -qo STR\t\t Options passed to qualimap [none]\n " ;
52
53
print STDOUT " \t Pass options with quotes e.g. -bo \" <options>\"\n " ;
53
54
print STDOUT " \t -v\t\t Print executed commands to STDERR [off]\n " ;
@@ -84,6 +85,7 @@ sub round_format_pref{
84
85
my @paired = ();
85
86
my @unpaired = ();
86
87
my @pb = ();
88
+ my @hifi = ();
87
89
my @ont = ();
88
90
my $threads = 1;
89
91
my $prefix = " " ;
@@ -130,6 +132,9 @@ sub round_format_pref{
130
132
if ($ARGV [$i ] eq " -pb" ){
131
133
push (@pb ,$ARGV [$i +1]);
132
134
}
135
+ if ($ARGV [$i ] eq " -hifi" ){
136
+ push (@hifi ,$ARGV [$i +1]);
137
+ }
133
138
if ($ARGV [$i ] eq " -ont" ){
134
139
push (@ont ,$ARGV [$i +1]);
135
140
}
@@ -250,7 +255,7 @@ sub round_format_pref{
250
255
print STDERR " ERROR\t File $assembly_path does not exist!\n " ;
251
256
$input_error = 1;
252
257
}
253
- if (scalar (@paired ) == 0 and scalar (@unpaired ) == 0 and scalar (@pb ) == 0 and scalar (@ont ) == 0){
258
+ if (scalar (@paired ) == 0 and scalar (@unpaired ) == 0 and scalar (@pb ) == 0 and scalar (@hifi ) == 0 and scalar ( @ ont ) == 0){
254
259
print STDERR " ERROR\t No reads specified!\n " ;
255
260
$input_error = 1;
256
261
}
@@ -363,6 +368,24 @@ sub round_format_pref{
363
368
}
364
369
}
365
370
371
+ my %hifi_filter ;
372
+
373
+ if ($assembly_path ne " " ){
374
+ foreach (@hifi ){
375
+ if (not -f " $_ " ){
376
+ print STDERR " INFO\t No file $_ - skipping this file\n " ;
377
+ }
378
+ else {
379
+ if (exists ($hifi_filter {abs_path($_ )})){
380
+ print STDERR " INFO\t File " . abs_path($_ ) . " already specified\n " ;
381
+ }
382
+ else {
383
+ $hifi_filter {abs_path($_ )} = 1;
384
+ }
385
+ }
386
+ }
387
+ }
388
+
366
389
my %ont_filter ;
367
390
368
391
if ($assembly_path ne " " ){
@@ -400,7 +423,7 @@ sub round_format_pref{
400
423
}
401
424
402
425
if ($assembly_path ne " " ){
403
- if (scalar (keys (%paired_filter )) == 0 and scalar (keys (%unpaired_filter )) == 0 and scalar (keys (%pb_filter )) == 0 and scalar (keys (%ont_filter )) == 0){
426
+ if (scalar (keys (%paired_filter )) == 0 and scalar (keys (%unpaired_filter )) == 0 and scalar (keys (%pb_filter )) == 0 and scalar (keys (%hifi_filter )) == 0 and scalar ( keys ( % ont_filter )) == 0){
404
427
print STDERR " ERROR\t No existing read files specified!\n " ;
405
428
exit 1;
406
429
}
@@ -436,12 +459,16 @@ sub round_format_pref{
436
459
}
437
460
438
461
my $minimap_version ;
462
+ my $minimap_minor_version ;
439
463
if (not defined (can_run(" minimap2" ))){
440
464
$minimap_version = " not detected" ;
441
465
}
442
466
else {
443
467
$minimap_version = ` minimap2 --version` ;
444
468
chomp $minimap_version ;
469
+ $minimap_minor_version = $minimap_version ;
470
+ $minimap_minor_version =~ s / -.*// ;
471
+ $minimap_minor_version =~ s / ^.*\. // ;
445
472
}
446
473
447
474
my $samtools_version = ` samtools --version | head -1 | sed 's/^samtools //'` ;
@@ -592,6 +619,20 @@ sub round_format_pref{
592
619
push (@pb_bam ," $out_dir /$prefix .pb$pb_counter .bam" );
593
620
}
594
621
622
+ my $hifi_counter = 0;
623
+ my @hifi_bam = ();
624
+ foreach (keys (%hifi_filter )){
625
+ $hifi_counter ++;
626
+ if ($minimap_minor_version <= 18){
627
+ $cmd = " minimap2 $minimap_opts -x asm20 -a -t $threads $assembly_path $_ 2> $out_dir /$prefix \_ minimap_hifi$hifi_counter .err | samtools view -1 -b - > $out_dir /$prefix .hifi$hifi_counter .bam" ;
628
+ }
629
+ else {
630
+ $cmd = " minimap2 $minimap_opts -x map-hifi -a -t $threads $assembly_path $_ 2> $out_dir /$prefix \_ minimap_hifi$hifi_counter .err | samtools view -1 -b - > $out_dir /$prefix .hifi$hifi_counter .bam" ;
631
+ }
632
+ exe_cmd($cmd ,$verbose ,$dry );
633
+ push (@hifi_bam ," $out_dir /$prefix .hifi$hifi_counter .bam" );
634
+ }
635
+
595
636
my $ont_counter = 0;
596
637
my @ont_bam = ();
597
638
foreach (keys (%ont_filter )){
@@ -608,6 +649,7 @@ sub round_format_pref{
608
649
my $paired_bam_files = join (" " ,@paired_bam );
609
650
my $unpaired_bam_files = join (" " ,@unpaired_bam );
610
651
my $pb_bam_files = join (" " ,@pb_bam );
652
+ my $hifi_bam_files = join (" " ,@hifi_bam );
611
653
my $ont_bam_files = join (" " ,@ont_bam );
612
654
613
655
if ($ill_bam_count > 0){
@@ -639,6 +681,20 @@ sub round_format_pref{
639
681
push (@merged_bam_file , " $out_dir /$prefix .pb.bam" );
640
682
}
641
683
}
684
+
685
+ if (scalar (@hifi_bam ) > 0){
686
+ if (scalar (@hifi_bam ) == 1){
687
+ my $single_bam = $hifi_bam [0];
688
+ $cmd = " ln -fs $single_bam $out_dir /$prefix .hifi.bam" ;
689
+ exe_cmd($cmd ,$verbose ,$dry );
690
+ push (@merged_bam_file , " $out_dir /$prefix .hifi.bam" );
691
+ }
692
+ else {
693
+ $cmd = " samtools merge -@ $samtools_threads $out_dir /$prefix .hifi.bam $hifi_bam_files " ;
694
+ exe_cmd($cmd ,$verbose ,$dry );
695
+ push (@merged_bam_file , " $out_dir /$prefix .hifi.bam" );
696
+ }
697
+ }
642
698
643
699
if (scalar (@ont_bam ) > 0){
644
700
if (scalar (@ont_bam ) == 1){
@@ -676,7 +732,7 @@ sub round_format_pref{
676
732
}
677
733
678
734
if ($keep_tmp == 0){
679
- my $tmp_bams = join (" " ,@paired_bam ,@unpaired_bam ,@pb_bam ,@ont_bam ,@merged_bam_file );
735
+ my $tmp_bams = join (" " ,@paired_bam ,@unpaired_bam ,@pb_bam ,@hifi_bam , @ ont_bam ,@merged_bam_file );
680
736
$cmd = " rm $tmp_bams " ;
681
737
exe_cmd($cmd ,$verbose ,$dry );
682
738
}
@@ -720,7 +776,10 @@ sub round_format_pref{
720
776
721
777
my $tech = " Illumina" ;
722
778
if ($_ =~ m /\. pb\. sort\. bam$ / ){
723
- $tech = " PacBio" ;
779
+ $tech = " CLR" ;
780
+ }
781
+ if ($_ =~ m /\. hifi\. sort\. bam$ / ){
782
+ $tech = " HiFi" ;
724
783
}
725
784
if ($_ =~ m /\. ont\. sort\. bam$ / ){
726
785
$tech = " Nanopore" ;
@@ -780,7 +839,7 @@ sub round_format_pref{
780
839
$rscript = " $out_dir /$prefix .plot.all.r" ;
781
840
}
782
841
if ($dry == 0){
783
- my @techs = (" Illumina" ," PacBio " ," Nanopore" );
842
+ my @techs = (" Illumina" ," CLR " , " HiFi " ," Nanopore" );
784
843
785
844
open (RALL,' >' ," $rscript " ) or die " ERROR\t Could not open file $rscript \n " ;
786
845
@@ -811,6 +870,10 @@ sub round_format_pref{
811
870
push (@col ," \" blue\" " );
812
871
}
813
872
if ($i == 2 and exists ($cov_files {$techs [$i ]})){
873
+ print RALL " lines($techs [$i ]\[ ,1],$techs [$i ]\[ ,2],type=\" l\" ,col=\" darkgreen\" )\n " ;
874
+ push (@col ," \" darkgreen\" " );
875
+ }
876
+ if ($i == 3 and exists ($cov_files {$techs [$i ]})){
814
877
print RALL " lines($techs [$i ]\[ ,1],$techs [$i ]\[ ,2],type=\" l\" ,col=\" red\" )\n " ;
815
878
push (@col ," \" red\" " );
816
879
}
@@ -852,7 +915,10 @@ sub round_format_pref{
852
915
853
916
my $tech = " Illumina" ;
854
917
if ($_ =~ m /\. pb\. sort\. bam$ / ){
855
- $tech = " PacBio" ;
918
+ $tech = " CLR" ;
919
+ }
920
+ if ($_ =~ m /\. hifi\. sort\. bam$ / ){
921
+ $tech = " HiFi" ;
856
922
}
857
923
if ($_ =~ m /\. ont\. sort\. bam$ / ){
858
924
$tech = " Nanopore" ;
@@ -891,7 +957,7 @@ sub round_format_pref{
891
957
print " Output\n " ;
892
958
print " ======\n " ;
893
959
894
- my @techs = (" Illumina" ," PacBio " ," Nanopore" );
960
+ my @techs = (" Illumina" ," CLR " , " HiFi " ," Nanopore" );
895
961
for (my $i = 0; $i < scalar (@techs ); $i ++){
896
962
if (exists ($results {$techs [$i ]})){
897
963
print $results {$techs [$i ]};
0 commit comments