Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- [#2154](https://github.com/nf-core/sarek/pull/2154) - Fix `--save_output_as_bam` causing duplicate emission errors, silently skipping variant calling, and running unnecessary CRAM-to-BAM conversions
- [#2117](https://github.com/nf-core/sarek/pull/2117) - Silent failure with multi-lane samples
- [#2143](https://github.com/nf-core/sarek/pull/2143) - Varlociraptor collecting multiple scenario files for one sample
- [#2146](https://github.com/nf-core/sarek/pull/2146) - Fail early when `--no_intervals` is used with joint germline HaplotypeCaller
Expand Down
79 changes: 32 additions & 47 deletions conf/modules/markduplicates.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,8 @@

process {

withName: 'CRAM_TO_BAM' {
ext.args = { '-b' }
}

withName: 'BAM_TO_CRAM' {
// BAM provided for step Markduplicates either run through MD or Convert -> then saved as sorted.cram (convert) or md.cram (md directly)
// BAM files provided for step prepare_recal are converted and run through BQSR -> then saved as md.cram
// BAM files provided for step recal are converted and run through BQSR II -> then saved as md.cram
ext.args = { '-C' }
ext.prefix = { "${meta.id}.converted" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/converted/${meta.id}" },
pattern: "*{cram,crai}",
saveAs: { !params.save_output_as_bam ? it : null }
]
}

withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:(BAM_MARKDUPLICATES|BAM_MARKDUPLICATES_SPARK):CRAM_QC_MOSDEPTH_SAMTOOLS:SAMTOOLS_STATS' {
ext.prefix = { "${meta.id}.md.cram" }
ext.prefix = { params.save_output_as_bam ? "${meta.id}.md.bam" : "${meta.id}.md.cram" }
ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('samtools')) }
publishDir = [
mode: params.publish_dir_mode,
Expand All @@ -45,9 +27,10 @@ process {

withName: 'BAM_TO_CRAM_MAPPING' {
ext.prefix = { "${meta.id}.sorted" }
// Run only when mapping should be saved as CRAM or when no MD is done
// Run only when mapping should be saved as CRAM or when no MD is done (and not saving as BAM)
ext.when = (params.save_mapped && !params.save_output_as_bam) ||
(
!params.save_output_as_bam &&
(params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) &&
!(params.tools && params.tools.split(',').contains('sentieon_dedup'))
)
Expand Down Expand Up @@ -78,7 +61,7 @@ process {
params.umi_tag ? "--BARCODE_TAG ${params.umi_tag}" :
(params.umi_in_read_header || params.umi_length ? "--BARCODE_TAG RX" : "")
].join(" ").trim() }
ext.prefix = { "${meta.id}.md.cram" }
ext.prefix = { params.save_output_as_bam ? "${meta.id}.md.bam" : "${meta.id}.md.cram" }
ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('markduplicates')) }
publishDir = [
[
Expand All @@ -87,6 +70,12 @@ process {
pattern: "*{cram,crai}",
saveAs: { !params.save_output_as_bam ? it : null }
],
[
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" },
pattern: "*{md.bam,md.bam.bai}",
saveAs: { params.save_output_as_bam ? it : null }
],
[
mode: params.publish_dir_mode,
path: { "${params.outdir}/reports/" },
Expand All @@ -99,42 +88,38 @@ process {
withName: 'GATK4SPARK_MARKDUPLICATES' {
containerOptions = ''
ext.args = { '--remove-sequencing-duplicates false -VS LENIENT' }
ext.prefix = { "${meta.id}.md.cram" }
ext.prefix = { params.save_output_as_bam ? "${meta.id}.md.bam" : "${meta.id}.md.cram" }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" },
pattern: "*{cram,crai}",
saveAs: { !params.save_output_as_bam ? it : null }
[
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" },
pattern: "*{cram,crai}",
saveAs: { !params.save_output_as_bam ? it : null }
],
[
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" },
pattern: "*{md.bam,md.bam.bai}",
saveAs: { params.save_output_as_bam ? it : null }
]
]
}

withName: 'INDEX_MARKDUPLICATES' {
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" },
pattern: "*{cram,crai}",
saveAs: { !params.save_output_as_bam ? it : null }
]
}

withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:CRAM_TO_BAM' {
ext.when = { params.save_output_as_bam }
if (params.tools && params.tools.split(',').contains('sentieon_dedup')) {
ext.prefix = { "${meta.id}.dedup" }
publishDir = [
[
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/sentieon_dedup/${meta.id}/" },
pattern: "*{dedup.bam,dedup.bam.bai}",
saveAs: { params.save_output_as_bam ? it : null }
]
} else {
ext.prefix = { "${meta.id}.md" }
publishDir = [
path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" },
pattern: "*{cram,crai}",
saveAs: { !params.save_output_as_bam ? it : null }
],
[
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/markduplicates/${meta.id}/" },
pattern: "*{md.bam,md.bam.bai}",
pattern: "*{bam,bai}",
saveAs: { params.save_output_as_bam ? it : null }
]
}
]
}

}
45 changes: 26 additions & 19 deletions conf/modules/recalibrate.config
Original file line number Diff line number Diff line change
Expand Up @@ -43,31 +43,38 @@ process {
]]
}

if ((params.step == 'mapping' || params.step == 'markduplicates'|| params.step == 'prepare_recalibration'|| params.step == 'recalibrate') && (!(params.skip_tools && params.skip_tools.split(',').contains('baserecalibrator')))) {
withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):CRAM_MERGE_INDEX_SAMTOOLS:MERGE_CRAM' {
ext.prefix = { "${meta.id}.recal" }
ext.when = { meta.num_intervals > 1 }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" },
pattern: "*cram",
saveAs: { !params.save_output_as_bam ? it : null }
withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):CRAM_MERGE_INDEX_SAMTOOLS:MERGE_CRAM' {
ext.prefix = { "${meta.id}.recal" }
ext.when = { meta.num_intervals > 1 }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" },
pattern: "*cram",
saveAs: { !params.save_output_as_bam ? it : null }
]
}
}

withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' {
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" },
pattern: "*{recal.cram,recal.cram.crai}",
saveAs: { !params.save_output_as_bam ? it : null }
withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM' {
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" },
pattern: "*{recal.cram,recal.cram.crai}",
saveAs: { !params.save_output_as_bam ? it : null }
]
}
}

withName: 'CRAM_TO_BAM_RECAL' {
withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):BAM_MERGE_INDEX_SAMTOOLS:MERGE_BAM' {
ext.prefix = { "${meta.id}.recal" }
ext.when = { params.save_output_as_bam}
ext.when = { meta.num_intervals > 1 }
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" },
pattern: "*bam",
saveAs: { params.save_output_as_bam ? it : null }
]
}

withName: 'NFCORE_SAREK:SAREK:FASTQ_PREPROCESS_GATK:(BAM_APPLYBQSR|BAM_APPLYBQSR_SPARK):BAM_MERGE_INDEX_SAMTOOLS:INDEX_MERGE_BAM' {
publishDir = [
mode: params.publish_dir_mode,
path: { "${params.outdir}/preprocessing/recalibrated/${meta.id}/" },
Expand Down
1 change: 0 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,6 @@ Expected run output:
[88/3af664] process > NFCORE_SAREK:SAREK:BAM_APPLYBQSR:CRAM_MERGE_INDEX_SAMTOOLS:INDEX_CRAM (test) [100%] 1 of 1 ✔
[f4/828fde] process > NFCORE_SAREK:SAREK:CRAM_QC_RECAL:SAMTOOLS_STATS (test) [100%] 1 of 1 ✔
[fb/a9d66f] process > NFCORE_SAREK:SAREK:CRAM_QC_RECAL:MOSDEPTH (test) [100%] 1 of 1 ✔
[- ] process > NFCORE_SAREK:SAREK:CRAM_TO_BAM_RECAL -
[ef/026185] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:STRELKA_SINGLE (test) [100%] 1 of 1 ✔
[- ] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:MERGE_STRELKA -
[- ] process > NFCORE_SAREK:SAREK:BAM_VARIANT_CALLING_GERMLINE_ALL:BAM_VARIANT_CALLING_SINGLE_STRELKA:MERGE_STRELKA_GENOME -
Expand Down
45 changes: 11 additions & 34 deletions subworkflows/local/bam_applybqsr/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ workflow BAM_APPLYBQSR {

main:
versions = channel.empty()
bam_applybqsr_single = channel.empty()
bam_to_merge = channel.empty()

// Combine cram and intervals for spread and gather strategy
// Move num_intervals to meta map
Expand All @@ -35,54 +33,33 @@ workflow BAM_APPLYBQSR {
dict.map { _meta, dict_ -> [dict_] },
)

// FOR BAMs
if (params.save_output_as_bam) {
// BAM path — populated when ext.suffix='bam', empty otherwise
bam_to_merge = GATK4_APPLYBQSR.out.bam
.map { meta, bam_ -> [groupKey(meta, meta.num_intervals), bam_] }
.groupTuple()

bam_applybqsr_out = GATK4_APPLYBQSR.out.bam
.join(GATK4_APPLYBQSR.out.bai, failOnDuplicate: true, failOnMismatch: true)
.branch { files ->
single: files[0].num_intervals == 1
multiple: files[0].num_intervals > 1
}

bam_applybqsr_single = bam_applybqsr_out.single

// For multiple intervals, gather and merge the recalibrated cram files
bam_to_merge = bam_applybqsr_out.multiple
.map { meta, bam_, _bai -> [groupKey(meta, meta.num_intervals), bam_] }
.groupTuple()
}

// Merge and index the recalibrated cram files
BAM_MERGE_INDEX_SAMTOOLS(bam_to_merge)

// Combine single and merged multiple bam and index files, removing num_intervals field
bam_recal = bam_applybqsr_single
.mix(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai)
.map { meta, bam, bai -> [meta - meta.subMap('num_intervals'), bam, bai] }

// FOR CRAMs

// Gather the recalibrated cram files
// CRAM path — populated when ext.suffix='cram', empty otherwise
cram_to_merge = GATK4_APPLYBQSR.out.cram.map { meta, cram_ -> [groupKey(meta, meta.num_intervals), cram_] }.groupTuple()

// Merge and index the recalibrated cram files
CRAM_MERGE_INDEX_SAMTOOLS(
cram_to_merge,
fasta,
fasta_fai,
)

// Remove no longer necessary field: num_intervals
cram_recal = CRAM_MERGE_INDEX_SAMTOOLS.out.cram_crai.map { meta, cram_, crai -> [meta - meta.subMap('num_intervals'), cram_, crai] }
// Mix — one is always empty
recal_out = BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai
.mix(CRAM_MERGE_INDEX_SAMTOOLS.out.cram_crai)
.map { meta, file_, index -> [meta - meta.subMap('num_intervals'), file_, index] }

// Gather versions of all tools used
versions = versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions)
versions = versions.mix(CRAM_MERGE_INDEX_SAMTOOLS.out.versions)
versions = versions.mix(GATK4_APPLYBQSR.out.versions)

emit:
bam = bam_recal // channel: [ meta, bam, bai ]
cram = cram_recal // channel: [ meta, cram, crai ]
versions // channel: [ versions.yml ]
alignment = recal_out // channel: [ meta, file, index ] — BAM or CRAM
versions // channel: [ versions.yml ]
}
45 changes: 11 additions & 34 deletions subworkflows/local/bam_applybqsr_spark/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ workflow BAM_APPLYBQSR_SPARK {

main:
versions = channel.empty()
bam_applybqsr_single = channel.empty()
bam_to_merge = channel.empty()

// Combine cram and intervals for spread and gather strategy
// Move num_intervals to meta map
Expand All @@ -35,54 +33,33 @@ workflow BAM_APPLYBQSR_SPARK {
dict.map { _meta, dict_ -> [dict_] },
)

// FOR BAMs
if (params.save_output_as_bam) {
// BAM path — populated when ext.suffix='bam', empty otherwise
bam_to_merge = GATK4SPARK_APPLYBQSR.out.bam
.map { meta, bam_ -> [groupKey(meta, meta.num_intervals), bam_] }
.groupTuple()

bam_applybqsr_out = GATK4SPARK_APPLYBQSR.out.bam
.join(GATK4SPARK_APPLYBQSR.out.bai, failOnDuplicate: true, failOnMismatch: true)
.branch { files ->
single: files[0].num_intervals == 1
multiple: files[0].num_intervals > 1
}

bam_applybqsr_single = bam_applybqsr_out.single

// For multiple intervals, gather and merge the recalibrated cram files
bam_to_merge = bam_applybqsr_out.multiple
.map { meta, bam_, _bai -> [groupKey(meta, meta.num_intervals), bam_] }
.groupTuple()
}

// Merge and index the recalibrated cram files
BAM_MERGE_INDEX_SAMTOOLS(bam_to_merge)

// Combine single and merged multiple bam and index files, removing num_intervals field
bam_recal = bam_applybqsr_single
.mix(BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai)
.map { meta, bam, bai -> [meta - meta.subMap('num_intervals'), bam, bai] }

// FOR CRAMs

// Gather the recalibrated cram files
// CRAM path — populated when ext.suffix='cram', empty otherwise
cram_to_merge = GATK4SPARK_APPLYBQSR.out.cram.map { meta, cram_ -> [groupKey(meta, meta.num_intervals), cram_] }.groupTuple()

// Merge and index the recalibrated cram files
CRAM_MERGE_INDEX_SAMTOOLS(
cram_to_merge,
fasta,
fasta_fai,
)

// Remove no longer necessary field: num_intervals
cram_recal = CRAM_MERGE_INDEX_SAMTOOLS.out.cram_crai.map { meta, cram_, crai -> [meta - meta.subMap('num_intervals'), cram_, crai] }
// Mix — one is always empty
recal_out = BAM_MERGE_INDEX_SAMTOOLS.out.bam_bai
.mix(CRAM_MERGE_INDEX_SAMTOOLS.out.cram_crai)
.map { meta, file_, index -> [meta - meta.subMap('num_intervals'), file_, index] }

// Gather versions of all tools used
versions = versions.mix(BAM_MERGE_INDEX_SAMTOOLS.out.versions)
versions = versions.mix(CRAM_MERGE_INDEX_SAMTOOLS.out.versions)
versions = versions.mix(GATK4SPARK_APPLYBQSR.out.versions)

emit:
bam = bam_recal // channel: [ meta, bam, bai ]
cram = cram_recal // channel: [ meta, cram, crai ]
versions // channel: [ versions.yml ]
alignment = recal_out // channel: [ meta, file, index ] — BAM or CRAM
versions // channel: [ versions.yml ]
}
22 changes: 15 additions & 7 deletions subworkflows/local/bam_markduplicates/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
// For all modules here:
// A when clause condition is defined in the conf/modules.config to determine if the module should be run

include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main'
include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates/main'
include { CRAM_QC_MOSDEPTH_SAMTOOLS } from '../cram_qc_mosdepth_samtools/main'
include { GATK4_MARKDUPLICATES } from '../../../modules/nf-core/gatk4/markduplicates/main'
include { SAMTOOLS_INDEX as INDEX_MARKDUPLICATES } from '../../../modules/nf-core/samtools/index/main'

workflow BAM_MARKDUPLICATES {
take:
Expand All @@ -21,22 +22,29 @@ workflow BAM_MARKDUPLICATES {
// RUN MARKUPDUPLICATES
GATK4_MARKDUPLICATES(bam, fasta.map{ meta, fasta -> [ fasta ] }, fasta_fai.map{ meta, fasta_fai -> [ fasta_fai ] })

// Join with the crai file
cram = GATK4_MARKDUPLICATES.out.cram.join(GATK4_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true)
// BAM path: module does not auto-index BAM output, so index explicitly
INDEX_MARKDUPLICATES(GATK4_MARKDUPLICATES.out.bam)

// QC on CRAM
CRAM_QC_MOSDEPTH_SAMTOOLS(cram, fasta, intervals_bed_combined)
// Unified alignment output — BAM or CRAM depending on save_output_as_bam
alignment = GATK4_MARKDUPLICATES.out.bam
.join(INDEX_MARKDUPLICATES.out.bai, failOnDuplicate: true, failOnMismatch: true)
.mix(GATK4_MARKDUPLICATES.out.cram
.join(GATK4_MARKDUPLICATES.out.crai, failOnDuplicate: true, failOnMismatch: true))

// QC on alignment
CRAM_QC_MOSDEPTH_SAMTOOLS(alignment, fasta, intervals_bed_combined)

// Gather all reports generated
reports = reports.mix(GATK4_MARKDUPLICATES.out.metrics)
reports = reports.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.reports)

// Gather versions of all tools used
versions = versions.mix(GATK4_MARKDUPLICATES.out.versions)
versions = versions.mix(INDEX_MARKDUPLICATES.out.versions)
versions = versions.mix(CRAM_QC_MOSDEPTH_SAMTOOLS.out.versions)

emit:
cram
alignment // channel: [ meta, file, index ] — BAM or CRAM
reports

versions // channel: [ versions.yml ]
Expand Down
Loading
Loading