diff --git a/CHANGELOG.md b/CHANGELOG.md index b48fc209c..dfdd45b82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,13 +9,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#754](https://github.com/nf-core/mag/pull/754) - Replaced local module with nf-core module for all Bowtie2 processes (reported by @MeriamOs, fix by @jfy133) +- [#754](https://github.com/nf-core/mag/pull/754) - Made filenames consistent for all bowtie2 assembly-aligning output (fix by @jfy133) + ### `Fixed` +- [#754](https://github.com/nf-core/mag/pull/754) - Fix version reporting in binning preparation and ancient DNA workflows (fix by @jfy133) - [#769](https://github.com/nf-core/mag/pull/769) - Fix megahit not emitting correct filenames due to suboptimal arguments ordering (reported and fix by @IceGreb) - [#771](https://github.com/nf-core/mag/pull/771) - Fix misspecified checkm2 database parameter check (reported by @dpelegri and fix by @jfy133) ### `Dependencies` +| Tool | Previous version | New version | +| ------- | ---------------- | ----------- | +| Bowtie2 | 2.4.2 | 2.5.2 | + ### `Deprecated` ## v3.3.1 - [2025-02-13] diff --git a/conf/modules.config b/conf/modules.config index e14432d63..888645b47 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -347,24 +347,42 @@ process { ] } + withName: BOWTIE2_ASSEMBLY_BUILD { + ext.prefix = { "${meta.assembler}-${meta.id}" } + } + withName: BOWTIE2_ASSEMBLY_ALIGN { + tag = { "${meta.assembler}-${meta.id}-${meta3.id}" } ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' - ext.prefix = { "${meta.id}.assembly" } + ext.args2 = "" + ext.prefix = { "${meta.assembler}-${meta.id}-${meta3.id}" } publishDir = [ [ - path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, + path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.log", ], [ - path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, + path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{bam,bai}", + pattern: "*.{bam,csi}", enabled: params.save_assembly_mapped_reads, ], ] } + withName: SAMTOOLS_INDEX { + ext.prefix = { "${meta.assembler}-${meta.id}-${meta3.id}" } + publishDir = [ + [ + path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{bam}", + enabled: params.save_assembly_mapped_reads, + ] + ] + } + withName: 'MAG_DEPTHS_PLOT|MAG_DEPTHS_SUMMARY' { publishDir = [path: { "${params.outdir}/GenomeBinning/depths/bins" }, mode: params.publish_dir_mode, pattern: "*.{png,tsv}"] } diff --git a/docs/output.md b/docs/output.md index daece6f6e..38922893b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -219,9 +219,9 @@ Trimmed (short) reads are assembled with both megahit and SPAdes. Hybrid assembl - `[sample/group].contigs.fa.gz`: Compressed metagenome assembly in fasta format - `[sample/group].log`: Log file - `QC/[sample/group]/`: Directory containing QUAST files and Bowtie2 mapping logs - - `MEGAHIT-[sample].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set. + - `MEGAHIT-[sample]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set. - `MEGAHIT-[sample/group]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the respective sample ("sampleToMap"). - - `MEGAHIT-[sample].[bam/bai]`: Optionally saved BAM file of the Bowtie2 mapping of reads against the assembly. + - `MEGAHIT-[sample]-[sampleToMap].[bam/bai]`: Optionally saved BAM file of the Bowtie2 mapping of reads against the assembly. @@ -238,9 +238,9 @@ Trimmed (short) reads are assembled with both megahit and SPAdes. Hybrid assembl - `[sample/group].contigs.fa.gz`: Compressed assembled contigs in fasta format - `[sample/group].spades.log`: Log file - `QC/[sample/group]/`: Directory containing QUAST files and Bowtie2 mapping logs - - `SPAdes-[sample].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set. + - `SPAdes-[sample]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set. - `SPAdes-[sample/group]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the respective sample ("sampleToMap"). - - `SPAdes-[sample].[bam/bai]`: Optionally saved BAM file of the Bowtie2 mapping of reads against the assembly. + - `SPAdes-[sample]-[sampleToMap].[bam/bai]`: Optionally saved BAM file of the Bowtie2 mapping of reads against the assembly. diff --git a/modules.json b/modules.json index e892528f4..641140775 100644 --- a/modules.json +++ b/modules.json @@ -36,6 +36,16 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "bowtie2/align": { + "branch": "master", + "git_sha": "0e9cb409c32d3ec4f0d3804588e4778971c09b7e", + "installed_by": ["modules"] + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "cat/fastq": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", @@ -267,6 +277,16 @@ "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", "installed_by": ["modules"] }, + "samtools/index": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", + "installed_by": ["modules"] + }, "seqtk/mergepe": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 000000000..9090f2188 --- /dev/null +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bowtie2=2.5.2 + - bioconda::samtools=1.18 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf new file mode 100644 index 000000000..7b2f25eb4 --- /dev/null +++ b/modules/nf-core/bowtie2/align/main.nf @@ -0,0 +1,116 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + touch ${prefix}.bowtie2.log + ${create_unmapped} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml new file mode 100644 index 000000000..7436097b8 --- /dev/null +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -0,0 +1,132 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Bowtie2 genome fasta file + pattern: "*.fasta" + - - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - sam: + - meta: + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" + - bam: + - meta: + type: file + description: Output BAM file containing read alignments + pattern: "*.bam" + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.bam" + - cram: + - meta: + type: file + description: Output CRAM file containing read alignments + pattern: "*.cram" + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.cram" + - csi: + - meta: + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - "*.csi": + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - crai: + - meta: + type: file + description: Output CRAM index + pattern: "*.crai" + - "*.crai": + type: file + description: Output CRAM index + pattern: "*.crai" + - log: + - meta: + type: file + description: Alignment log + pattern: "*.log" + - "*.log": + type: file + description: Alignment log + pattern: "*.log" + - fastq: + - meta: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - "*fastq.gz": + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 000000000..03f1d5e51 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/large_index.config b/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 000000000..fdc1c59dd --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 000000000..0de5950fe --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,623 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 000000000..028e7da68 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,311 @@ +{ + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { + "content": [ + "test.bam", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + [ + + ], + [ + "versions.yml:md5,01d18ab035146ea790e9a0f70adb758f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:19:25.337323" + }, + "sarscov2 - fastq, index, fasta, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir bowtie2 + touch bowtie2/${fasta.baseName}.{1..4}.bt2 + touch bowtie2/${fasta.baseName}.rev.{1,2}.bt2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml new file mode 100644 index 000000000..2729a92ec --- /dev/null +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -0,0 +1,49 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - index: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - bowtie2: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test b/modules/nf-core/bowtie2/build/tests/main.nf.test new file mode 100644 index 000000000..163760257 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test @@ -0,0 +1,31 @@ +nextflow_process { + + name "Test Process BOWTIE2_BUILD" + script "modules/nf-core/bowtie2/build/main.nf" + process "BOWTIE2_BUILD" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + + test("Should run without failures") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test.snap b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap new file mode 100644 index 000000000..6875e0213 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "1": [ + "versions.yml:md5,1df11e9b82891527271c889c880d3974" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "versions": [ + "versions.yml:md5,1df11e9b82891527271c889c880d3974" + ] + } + ], + "timestamp": "2023-11-23T11:51:01.107681997" + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/build/tests/tags.yml b/modules/nf-core/bowtie2/build/tests/tags.yml new file mode 100644 index 000000000..81aa61dab --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/tags.yml @@ -0,0 +1,2 @@ +bowtie2/build: + - modules/nf-core/bowtie2/build/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 000000000..62054fc97 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 000000000..311756102 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 000000000..db8df0d50 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,71 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file +output: + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 000000000..0ed260efa --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 000000000..ca34fb5cd --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 000000000..72d65e81a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:25.261127166" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:12.653194876" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:01.854932651" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:51.485364222" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:21.184050361" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 000000000..e0f58a7a3 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 000000000..62054fc97 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf new file mode 100644 index 000000000..caf3c61a8 --- /dev/null +++ b/modules/nf-core/samtools/sort/main.nf @@ -0,0 +1,72 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + samtools cat \\ + ${bam} \\ + | \\ + samtools sort \\ + $args \\ + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${prefix}.${extension} \\ + - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + """ + touch ${prefix}.${extension} + if [ "${extension}" == "bam" ]; + then + touch ${prefix}.${extension}.csi + elif [ "${extension}" == "cram" ]; + then + touch ${prefix}.${extension}.crai + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 000000000..a9dbec5a8 --- /dev/null +++ b/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,92 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file(s) + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Sorted CRAM file + pattern: "*.{cram}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@ewels" + - "@matthdsm" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 000000000..b05e6691b --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,192 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("multiple bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("cram") { + + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("multiple bam - stub") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("cram - stub") { + + options "-stub" + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 000000000..469891fe3 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,287 @@ +{ + "cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:49:58.207549273" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:50:08.630951018" + }, + "cram - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:50:19.061912443" + }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:55.479443" + }, + "multiple bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:36:13.781404" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,34aa85e86abefe637f7a4a9887f016fc" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:46.372244" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 000000000..f642771f5 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/modules/nf-core/samtools/sort/tests/nextflow_cram.config new file mode 100644 index 000000000..3a8c0188b --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow_cram.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index --output-fmt cram" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 000000000..cd63ea208 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/subworkflows/local/binning_preparation.nf b/subworkflows/local/binning_preparation.nf index 60f63a269..2720f7ebc 100644 --- a/subworkflows/local/binning_preparation.nf +++ b/subworkflows/local/binning_preparation.nf @@ -2,50 +2,92 @@ * Binning preparation with Bowtie2 */ -include { BOWTIE2_ASSEMBLY_BUILD } from '../../modules/local/bowtie2_assembly_build' -include { BOWTIE2_ASSEMBLY_ALIGN } from '../../modules/local/bowtie2_assembly_align' +include { BOWTIE2_BUILD as BOWTIE2_ASSEMBLY_BUILD } from '../../modules/nf-core/bowtie2/build' +include { BOWTIE2_ALIGN as BOWTIE2_ASSEMBLY_ALIGN } from '../../modules/nf-core/bowtie2/align' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' workflow BINNING_PREPARATION { take: - assemblies // channel: [ val(meta), path(assembly) ] - reads // channel: [ val(meta), [ reads ] ] + assemblies // channel: [ val(meta), path(assembly) ] + reads // channel: [ val(meta), [ reads ] ] main: + ch_versions = Channel.empty() + // build bowtie2 index for all assemblies - BOWTIE2_ASSEMBLY_BUILD ( assemblies ) + BOWTIE2_ASSEMBLY_BUILD(assemblies) + ch_versions = ch_versions.mix(BOWTIE2_ASSEMBLY_BUILD.out.versions.first()) // combine assemblies with sample reads for binning depending on specified mapping mode - if (params.binning_map_mode == 'all'){ + if (params.binning_map_mode == 'all') { // combine assemblies with reads of all samples - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index + .join(assemblies) .combine(reads) - } else if (params.binning_map_mode == 'group'){ + } + else if (params.binning_map_mode == 'group') { // combine assemblies with reads of samples from same group - ch_reads_bowtie2 = reads.map{ meta, reads -> [ meta.group, meta, reads ] } - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .map { meta, assembly, index -> [ meta.group, meta, assembly, index ] } - .combine(ch_reads_bowtie2, by: 0) - .map { group, assembly_meta, assembly, index, reads_meta, reads -> [ assembly_meta, assembly, index, reads_meta, reads ] } + ch_reads_bowtie2 = reads.map { meta, fastq -> [meta.group, meta, fastq] } - } else { + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index + .join(assemblies) + .map { meta, index, assembly -> [meta.group, meta, assembly, index] } + .combine(ch_reads_bowtie2, by: 0) + .map { _group, assembly_meta, assembly, index, reads_meta, fastq -> + [assembly_meta, assembly, index, reads_meta, fastq] + } + } + else { // i.e. --binning_map_mode 'own' // combine assemblies (not co-assembled) with reads from own sample - ch_reads_bowtie2 = reads.map{ meta, reads -> [ meta.id, meta, reads ] } - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .map { meta, assembly, index -> [ meta.id, meta, assembly, index ] } + ch_reads_bowtie2 = reads.map { meta, fastq -> [meta.id, meta, fastq] } + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index + .join(assemblies) + .map { meta, index, assembly -> [meta.id, meta, assembly, index] } .combine(ch_reads_bowtie2, by: 0) - .map { id, assembly_meta, assembly, index, reads_meta, reads -> [ assembly_meta, assembly, index, reads_meta, reads ] } + .map { _id, assembly_meta, assembly, index, reads_meta, fastq -> + [assembly_meta, assembly, index, reads_meta, fastq] + } + } + // RECONFIGURE CHANNEL TO MATCH BOWTIE2_ALIGN INPUT STRUCTURE + // I purposely flip the location of the metas here, to ensure the assembly meta gets + // exported with the resulting BAM file from the bowtie2 module, and not the reads meta. + // We have to retain the reads meta for use within prefix and tag in the bowtie2 module. + // I don't combine assembly_meta and relevant reads_meta, as otherwise would have a map + // operation after each use of SAMTOOLS_SORT.out.*, to remove the now irrelevant reads_meta + // information. + ch_bowtie2_align_input = ch_bowtie2_input.multiMap { assembly_meta, assembly, index, reads_meta, fastq -> + reads: [assembly_meta, fastq] + index: [[:], index] + assembly: [reads_meta, assembly] + saveunaligned: false + sortbam: true } - BOWTIE2_ASSEMBLY_ALIGN ( ch_bowtie2_input ) + BOWTIE2_ASSEMBLY_ALIGN( + ch_bowtie2_align_input.reads, + ch_bowtie2_align_input.index, + ch_bowtie2_align_input.assembly, + ch_bowtie2_align_input.saveunaligned, + ch_bowtie2_align_input.sortbam, + ) + ch_versions = ch_versions.mix(BOWTIE2_ASSEMBLY_ALIGN.out.versions.first()) + + SAMTOOLS_INDEX(BOWTIE2_ASSEMBLY_ALIGN.out.bam) + ch_sorted_assembly_bams = assemblies + .join(BOWTIE2_ASSEMBLY_ALIGN.out.bam) + .join(SAMTOOLS_INDEX.out.bai) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + // group mappings for one assembly - ch_grouped_mappings = BOWTIE2_ASSEMBLY_ALIGN.out.mappings + ch_grouped_mappings = ch_sorted_assembly_bams .groupTuple(by: 0) - .map { meta, assembly, bams, bais -> [ meta, assembly.sort()[0], bams, bais ] } // multiple symlinks to the same assembly -> use first of sorted list + .map { meta, assembly, bams, bais -> [meta, assembly.sort()[0], bams, bais] } + .dump(tag: 'ch_grouped_mappings') emit: - bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { assembly_meta, reads_meta, log -> [ log ] } - bowtie2_version = BOWTIE2_ASSEMBLY_ALIGN.out.versions + bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { _meta, log -> [log] } + versions = ch_versions grouped_mappings = ch_grouped_mappings } diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index abcee2c38..c70d6fee6 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -2,18 +2,18 @@ * SHORTREAD_PREPROCESSING: Preprocessing and QC for short reads */ -include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' -include { FASTP } from '../../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' -include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' -include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/mergepe/main' -include { BBMAP_BBNORM } from '../../modules/nf-core/bbmap/bbnorm/main' +include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' +include { FASTP } from '../../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' +include { BOWTIE2_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/nf-core/bowtie2/build/main.nf' +include { BOWTIE2_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/nf-core/bowtie2/align/main.nf' +include { BOWTIE2_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../../modules/nf-core/bowtie2/build/main.nf' +include { BOWTIE2_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/nf-core/bowtie2/align/main.nf' +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' +include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/mergepe/main' +include { BBMAP_BBNORM } from '../../modules/nf-core/bbmap/bbnorm/main' workflow SHORTREAD_PREPROCESSING { take: @@ -75,9 +75,10 @@ workflow SHORTREAD_PREPROCESSING { } else { BOWTIE2_HOST_REMOVAL_BUILD( - ch_host_fasta + ch_host_fasta.map { fasta -> [[id: fasta.getSimpleName()], fasta] } ) ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index + ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_BUILD.out.versions) } } else if (params.host_genome) { @@ -88,8 +89,11 @@ workflow SHORTREAD_PREPROCESSING { BOWTIE2_HOST_REMOVAL_ALIGN( ch_short_reads_prepped, ch_host_bowtie2index, + [[:], []], + true, + false, ) - ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads + ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.fastq ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log) } @@ -99,13 +103,18 @@ workflow SHORTREAD_PREPROCESSING { if (!params.keep_phix) { BOWTIE2_PHIX_REMOVAL_BUILD( - ch_phix_db_file + ch_phix_db_file.map { fasta -> [[id: fasta.getSimpleName()], fasta] } ) + ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_BUILD.out.versions) + BOWTIE2_PHIX_REMOVAL_ALIGN( ch_short_reads_hostremoved, BOWTIE2_PHIX_REMOVAL_BUILD.out.index, + [[:], []], + true, + false, ) - ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads + ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.fastq ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log) } diff --git a/workflows/mag.nf b/workflows/mag.nf index 5b424d62d..399494944 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -491,7 +491,7 @@ workflow MAG { ch_assemblies, ch_short_reads, ) - ch_versions = ch_versions.mix(BINNING_PREPARATION.out.bowtie2_version.first()) + ch_versions = ch_versions.mix(BINNING_PREPARATION.out.versions) } /* @@ -502,7 +502,7 @@ workflow MAG { if (params.ancient_dna) { ANCIENT_DNA_ASSEMBLY_VALIDATION(BINNING_PREPARATION.out.grouped_mappings) - ch_versions = ch_versions.mix(ANCIENT_DNA_ASSEMBLY_VALIDATION.out.versions.first()) + ch_versions = ch_versions.mix(ANCIENT_DNA_ASSEMBLY_VALIDATION.out.versions) } /*