Skip to content

Commit 60517b4

Browse files
committed
make several BAM ops more frugal: [GATK ValidateSamFile, GATK SortSam, samtools fastq, samtools reset]
1 parent 3d3a508 commit 60517b4

File tree

1 file changed

+14
-20
lines changed

1 file changed

+14
-20
lines changed

wdl/tasks/Utility/BAMutils.wdl

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,6 @@ task ValidateSamFile {
282282
RuntimeAttr? runtime_attr_override
283283
}
284284

285-
Int disk_size = ceil(size(bam, "GiB")) + 50
286285
String output_basename = basename(basename(bam, ".bam"), ".cram")
287286
String output_name = "${output_basename}_${validation_mode}.txt"
288287

@@ -307,9 +306,12 @@ task ValidateSamFile {
307306
}
308307

309308
#########################
309+
310+
Int disk_size = ceil(size(bam, "GiB")) + 10
311+
310312
RuntimeAttr default_attr = object {
311313
cpu_cores: 2,
312-
mem_gb: 8,
314+
mem_gb: 4,
313315
disk_gb: disk_size,
314316
preemptible_tries: 2,
315317
max_retries: 1,
@@ -1205,7 +1207,7 @@ task DeduplicateQuerynameSortedBam {
12051207
task BamToFastq {
12061208
meta {
12071209
description : "Convert a long reads BAM file to a fastq file."
1208-
warn: "Please do not include 'RG' in tags_to_preserve, as that's automatically saved"
1210+
warn: "Does not work for pair-end reads. Please do not include 'RG' in tags_to_preserve, as that's automatically saved"
12091211
}
12101212

12111213
parameter_meta {
@@ -1262,25 +1264,18 @@ task BamToFastq {
12621264
samtools fastq \
12631265
-@1 \
12641266
-t \
1267+
~{true='-T ' false =' ' save_all_tags} ~{true="' '" false =' ' save_all_tags} \
1268+
~{true='-T ' false =' ' custom_tags_to_preserve} ~{sep=',' tags_to_preserve} \
12651269
-0 ~{prefix}.fq.gz \
12661270
~{local_bam}
1267-
1268-
# also using pigz to enable parallel compression
1269-
time \
1270-
samtools fastq \
1271-
~{true='-T ' false =' ' save_all_tags} \
1272-
~{true='-T ' false =' ' custom_tags_to_preserve} ~{sep=',' tags_to_preserve} \
1273-
~{local_bam} \
1274-
| pigz \
1275-
> ~{prefix}.fq.gz
12761271
>>>
12771272

12781273
#########################
12791274
Int disk_size = 10 + 3 * ceil(size(bam, "GiB"))
12801275

12811276
RuntimeAttr default_attr = object {
12821277
cpu_cores: 2,
1283-
mem_gb: 8,
1278+
mem_gb: 4,
12841279
disk_gb: disk_size,
12851280
preemptible_tries: 2,
12861281
max_retries: 1,
@@ -1489,9 +1484,6 @@ task SamtoolsReset {
14891484

14901485
String prefix = basename(bam, ".bam")
14911486

1492-
Int disk_size = if defined(num_ssds) then 375*select_first([num_ssds]) else 1+10*ceil(size([bam], "GB"))
1493-
String disk_type = if defined(num_ssds) then " LOCAL" else " SSD"
1494-
14951487
String base = basename(bam, ".bam")
14961488
String local_bam = "/cromwell_root/~{base}.bam"
14971489

@@ -1500,19 +1492,21 @@ task SamtoolsReset {
15001492
15011493
time gcloud storage cp ~{bam} ~{local_bam}
15021494
1503-
samtools view -@1 ~{local_bam} | grep -v "^@" | awk -F '\t' '{print $2}' | sort | uniq -c > orignal.SAM-flag.stats.txt &
1495+
samtools view ~{local_bam} | grep -v "^@" | awk -F '\t' '{print $2}' | sort | uniq -c > orignal.SAM-flag.stats.txt &
15041496
1505-
samtools reset -@3 \
1497+
samtools reset -@4 \
15061498
--remove-tag ~{sep=',' tags_to_drop} \
15071499
-o ~{prefix}.unaligned.bam \
15081500
~{local_bam}
15091501
wait
15101502
>>>
15111503

15121504
#########################
1505+
Int disk_size = if defined(num_ssds) then 375*select_first([num_ssds]) else 10+4*ceil(size(bam, "GiB"))
1506+
String disk_type = if defined(num_ssds) then " LOCAL" else " SSD"
15131507
RuntimeAttr default_attr = object {
1514-
cpu_cores: 4,
1515-
mem_gb: 16,
1508+
cpu_cores: 6,
1509+
mem_gb: 10,
15161510
disk_gb: disk_size,
15171511
preemptible_tries: 2,
15181512
max_retries: 1,

0 commit comments

Comments
 (0)