From bd462262c3d12aeda948add55db6a59dae7833db Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 24 Jan 2025 14:37:56 +0100 Subject: [PATCH 01/14] Start replacing custom Bowtie2 modules with official nf-core ones --- modules.json | 10 + modules/nf-core/bowtie2/align/environment.yml | 7 + modules/nf-core/bowtie2/align/main.nf | 116 ++++ modules/nf-core/bowtie2/align/meta.yml | 132 ++++ .../bowtie2/align/tests/cram_crai.config | 5 + .../bowtie2/align/tests/large_index.config | 5 + .../nf-core/bowtie2/align/tests/main.nf.test | 623 ++++++++++++++++++ .../bowtie2/align/tests/main.nf.test.snap | 311 +++++++++ .../nf-core/bowtie2/align/tests/sam.config | 5 + .../nf-core/bowtie2/align/tests/sam2.config | 5 + modules/nf-core/bowtie2/align/tests/tags.yml | 2 + modules/nf-core/bowtie2/build/environment.yml | 5 + modules/nf-core/bowtie2/build/main.nf | 42 ++ modules/nf-core/bowtie2/build/meta.yml | 49 ++ .../nf-core/bowtie2/build/tests/main.nf.test | 31 + .../bowtie2/build/tests/main.nf.test.snap | 45 ++ modules/nf-core/bowtie2/build/tests/tags.yml | 2 + subworkflows/local/binning_preparation.nf | 45 +- subworkflows/local/shortread_preprocessing.nf | 38 +- 19 files changed, 1439 insertions(+), 39 deletions(-) create mode 100644 modules/nf-core/bowtie2/align/environment.yml create mode 100644 modules/nf-core/bowtie2/align/main.nf create mode 100644 modules/nf-core/bowtie2/align/meta.yml create mode 100644 modules/nf-core/bowtie2/align/tests/cram_crai.config create mode 100644 modules/nf-core/bowtie2/align/tests/large_index.config create mode 100644 modules/nf-core/bowtie2/align/tests/main.nf.test create mode 100644 modules/nf-core/bowtie2/align/tests/main.nf.test.snap create mode 100644 modules/nf-core/bowtie2/align/tests/sam.config create mode 100644 modules/nf-core/bowtie2/align/tests/sam2.config create mode 100644 modules/nf-core/bowtie2/align/tests/tags.yml create mode 100644 modules/nf-core/bowtie2/build/environment.yml create mode 100644 modules/nf-core/bowtie2/build/main.nf create mode 100644 modules/nf-core/bowtie2/build/meta.yml create mode 100644 modules/nf-core/bowtie2/build/tests/main.nf.test create mode 100644 modules/nf-core/bowtie2/build/tests/main.nf.test.snap create mode 100644 modules/nf-core/bowtie2/build/tests/tags.yml diff --git a/modules.json b/modules.json index 05e3b3dd1..d241aa28d 100644 --- a/modules.json +++ b/modules.json @@ -36,6 +36,16 @@ "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", "installed_by": ["modules"] }, + "bowtie2/align": { + "branch": "master", + "git_sha": "0e9cb409c32d3ec4f0d3804588e4778971c09b7e", + "installed_by": ["modules"] + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "cat/fastq": { "branch": "master", "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 000000000..9090f2188 --- /dev/null +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bowtie2=2.5.2 + - bioconda::samtools=1.18 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf new file mode 100644 index 000000000..7b2f25eb4 --- /dev/null +++ b/modules/nf-core/bowtie2/align/main.nf @@ -0,0 +1,116 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + touch ${prefix}.bowtie2.log + ${create_unmapped} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml new file mode 100644 index 000000000..7436097b8 --- /dev/null +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -0,0 +1,132 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Bowtie2 genome fasta file + pattern: "*.fasta" + - - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - sam: + - meta: + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" + - bam: + - meta: + type: file + description: Output BAM file containing read alignments + pattern: "*.bam" + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.bam" + - cram: + - meta: + type: file + description: Output CRAM file containing read alignments + pattern: "*.cram" + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.cram" + - csi: + - meta: + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - "*.csi": + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + - crai: + - meta: + type: file + description: Output CRAM index + pattern: "*.crai" + - "*.crai": + type: file + description: Output CRAM index + pattern: "*.crai" + - log: + - meta: + type: file + description: Alignment log + pattern: "*.log" + - "*.log": + type: file + description: Alignment log + pattern: "*.log" + - fastq: + - meta: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - "*fastq.gz": + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 000000000..03f1d5e51 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/large_index.config b/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 000000000..fdc1c59dd --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 000000000..0de5950fe --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,623 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 000000000..028e7da68 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,311 @@ +{ + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { + "content": [ + "test.bam", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + [ + + ], + [ + "versions.yml:md5,01d18ab035146ea790e9a0f70adb758f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:19:25.337323" + }, + "sarscov2 - fastq, index, fasta, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir bowtie2 + touch bowtie2/${fasta.baseName}.{1..4}.bt2 + touch bowtie2/${fasta.baseName}.rev.{1,2}.bt2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml new file mode 100644 index 000000000..2729a92ec --- /dev/null +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -0,0 +1,49 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - index: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - bowtie2: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test b/modules/nf-core/bowtie2/build/tests/main.nf.test new file mode 100644 index 000000000..163760257 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test @@ -0,0 +1,31 @@ +nextflow_process { + + name "Test Process BOWTIE2_BUILD" + script "modules/nf-core/bowtie2/build/main.nf" + process "BOWTIE2_BUILD" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + + test("Should run without failures") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test.snap b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap new file mode 100644 index 000000000..6875e0213 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "1": [ + "versions.yml:md5,1df11e9b82891527271c889c880d3974" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "versions": [ + "versions.yml:md5,1df11e9b82891527271c889c880d3974" + ] + } + ], + "timestamp": "2023-11-23T11:51:01.107681997" + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/build/tests/tags.yml b/modules/nf-core/bowtie2/build/tests/tags.yml new file mode 100644 index 000000000..81aa61dab --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/tags.yml @@ -0,0 +1,2 @@ +bowtie2/build: + - modules/nf-core/bowtie2/build/** diff --git a/subworkflows/local/binning_preparation.nf b/subworkflows/local/binning_preparation.nf index 60f63a269..f885c3db1 100644 --- a/subworkflows/local/binning_preparation.nf +++ b/subworkflows/local/binning_preparation.nf @@ -2,50 +2,49 @@ * Binning preparation with Bowtie2 */ -include { BOWTIE2_ASSEMBLY_BUILD } from '../../modules/local/bowtie2_assembly_build' -include { BOWTIE2_ASSEMBLY_ALIGN } from '../../modules/local/bowtie2_assembly_align' +include { BOWTIE2_BUILD as BOWTIE2_ASSEMBLY_BUILD } from '../../modules/nf-core/bowtie2/build' +include { BOWTIE2_ALIGN as BOWTIE2_ASSEMBLY_ALIGN } from '../../modules/nf-core/bowtie2/align' workflow BINNING_PREPARATION { take: - assemblies // channel: [ val(meta), path(assembly) ] - reads // channel: [ val(meta), [ reads ] ] + assemblies // channel: [ val(meta), path(assembly) ] + reads // channel: [ val(meta), [ reads ] ] main: // build bowtie2 index for all assemblies - BOWTIE2_ASSEMBLY_BUILD ( assemblies ) + BOWTIE2_ASSEMBLY_BUILD(assemblies) // combine assemblies with sample reads for binning depending on specified mapping mode - if (params.binning_map_mode == 'all'){ + if (params.binning_map_mode == 'all') { // combine assemblies with reads of all samples - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .combine(reads) - } else if (params.binning_map_mode == 'group'){ + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index.combine(reads) + } + else if (params.binning_map_mode == 'group') { // combine assemblies with reads of samples from same group - ch_reads_bowtie2 = reads.map{ meta, reads -> [ meta.group, meta, reads ] } - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .map { meta, assembly, index -> [ meta.group, meta, assembly, index ] } + ch_reads_bowtie2 = reads.map { meta, fastq -> [meta.group, meta, fastq] } + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index + .map { meta, assembly, index -> [meta.group, meta, assembly, index] } .combine(ch_reads_bowtie2, by: 0) - .map { group, assembly_meta, assembly, index, reads_meta, reads -> [ assembly_meta, assembly, index, reads_meta, reads ] } - - } else { + .map { _group, assembly_meta, assembly, index, reads_meta, fastq -> [assembly_meta, assembly, index, reads_meta, fastq] } + } + else { // i.e. --binning_map_mode 'own' // combine assemblies (not co-assembled) with reads from own sample - ch_reads_bowtie2 = reads.map{ meta, reads -> [ meta.id, meta, reads ] } - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.assembly_index - .map { meta, assembly, index -> [ meta.id, meta, assembly, index ] } + ch_reads_bowtie2 = reads.map { meta, fastq -> [meta.id, meta, fastq] } + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index + .map { meta, assembly, index -> [meta.id, meta, assembly, index] } .combine(ch_reads_bowtie2, by: 0) - .map { id, assembly_meta, assembly, index, reads_meta, reads -> [ assembly_meta, assembly, index, reads_meta, reads ] } - + .map { _id, assembly_meta, assembly, index, reads_meta, fastq -> [assembly_meta, assembly, index, reads_meta, fastq] } } - BOWTIE2_ASSEMBLY_ALIGN ( ch_bowtie2_input ) + BOWTIE2_ASSEMBLY_ALIGN(ch_bowtie2_input) // group mappings for one assembly ch_grouped_mappings = BOWTIE2_ASSEMBLY_ALIGN.out.mappings .groupTuple(by: 0) - .map { meta, assembly, bams, bais -> [ meta, assembly.sort()[0], bams, bais ] } // multiple symlinks to the same assembly -> use first of sorted list + .map { meta, assembly, bams, bais -> [meta, assembly.sort()[0], bams, bais] } emit: - bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { assembly_meta, reads_meta, log -> [ log ] } + bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { _assembly_meta, _reads_meta, log -> [log] } bowtie2_version = BOWTIE2_ASSEMBLY_ALIGN.out.versions grouped_mappings = ch_grouped_mappings } diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index abcee2c38..9a633b8f8 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -2,18 +2,18 @@ * SHORTREAD_PREPROCESSING: Preprocessing and QC for short reads */ -include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' -include { FASTP } from '../../modules/nf-core/fastp/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' -include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' -include { BOWTIE2_REMOVAL_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../../modules/local/bowtie2_removal_build' -include { BOWTIE2_REMOVAL_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/local/bowtie2_removal_align' -include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' -include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/mergepe/main' -include { BBMAP_BBNORM } from '../../modules/nf-core/bbmap/bbnorm/main' +include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIMMED } from '../../modules/nf-core/fastqc/main' +include { FASTP } from '../../modules/nf-core/fastp/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PE } from '../../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SE } from '../../modules/nf-core/adapterremoval/main' +include { BOWTIE2_BUILD as BOWTIE2_HOST_REMOVAL_BUILD } from '../../modules/nf-core/bowtie2/build/main.nf' +include { BOWTIE2_ALIGN as BOWTIE2_HOST_REMOVAL_ALIGN } from '../../modules/nf-core/bowtie2/align/main.nf' +include { BOWTIE2_BUILD as BOWTIE2_PHIX_REMOVAL_BUILD } from '../../modules/nf-core/bowtie2/build/main.nf' +include { BOWTIE2_ALIGN as BOWTIE2_PHIX_REMOVAL_ALIGN } from '../../modules/nf-core/bowtie2/align/main.nf' +include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq/main' +include { SEQTK_MERGEPE } from '../../modules/nf-core/seqtk/mergepe/main' +include { BBMAP_BBNORM } from '../../modules/nf-core/bbmap/bbnorm/main' workflow SHORTREAD_PREPROCESSING { take: @@ -75,7 +75,7 @@ workflow SHORTREAD_PREPROCESSING { } else { BOWTIE2_HOST_REMOVAL_BUILD( - ch_host_fasta + ch_host_fasta.map { fasta -> [[id: fasta.getSimpleName()], fasta] } ) ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index } @@ -88,8 +88,11 @@ workflow SHORTREAD_PREPROCESSING { BOWTIE2_HOST_REMOVAL_ALIGN( ch_short_reads_prepped, ch_host_bowtie2index, + [[:], []], + true, + false, ) - ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.reads + ch_short_reads_hostremoved = BOWTIE2_HOST_REMOVAL_ALIGN.out.fastq ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_HOST_REMOVAL_ALIGN.out.log) } @@ -99,13 +102,16 @@ workflow SHORTREAD_PREPROCESSING { if (!params.keep_phix) { BOWTIE2_PHIX_REMOVAL_BUILD( - ch_phix_db_file + ch_phix_db_file.map { fasta -> [[id: fasta.getSimpleName()], fasta] } ) BOWTIE2_PHIX_REMOVAL_ALIGN( ch_short_reads_hostremoved, BOWTIE2_PHIX_REMOVAL_BUILD.out.index, + [[:], []], + true, + false, ) - ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.reads + ch_short_reads_phixremoved = BOWTIE2_PHIX_REMOVAL_ALIGN.out.fastq ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.versions.first()) ch_multiqc_files = ch_multiqc_files.mix(BOWTIE2_PHIX_REMOVAL_ALIGN.out.log) } From a7f17bbab7395c9a026634ae212db72e8e8c8f02 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 5 Feb 2025 05:59:43 +0100 Subject: [PATCH 02/14] Start replacing local bowtie2 module with official --- conf/modules.config | 10 +- lib/functions.nf | 100 ++++++ modules.json | 5 + modules/nf-core/samtools/sort/environment.yml | 8 + modules/nf-core/samtools/sort/main.nf | 72 +++++ modules/nf-core/samtools/sort/meta.yml | 92 ++++++ .../nf-core/samtools/sort/tests/main.nf.test | 192 ++++++++++++ .../samtools/sort/tests/main.nf.test.snap | 287 ++++++++++++++++++ .../samtools/sort/tests/nextflow.config | 8 + .../samtools/sort/tests/nextflow_cram.config | 8 + modules/nf-core/samtools/sort/tests/tags.yml | 3 + subworkflows/local/binning_preparation.nf | 17 +- .../local/utils_nfcore_mag_pipeline/main.nf | 31 ++ 13 files changed, 829 insertions(+), 4 deletions(-) create mode 100644 lib/functions.nf create mode 100644 modules/nf-core/samtools/sort/environment.yml create mode 100644 modules/nf-core/samtools/sort/main.nf create mode 100644 modules/nf-core/samtools/sort/meta.yml create mode 100644 modules/nf-core/samtools/sort/tests/main.nf.test create mode 100644 modules/nf-core/samtools/sort/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/sort/tests/nextflow.config create mode 100644 modules/nf-core/samtools/sort/tests/nextflow_cram.config create mode 100644 modules/nf-core/samtools/sort/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index e14432d63..a4e8fce92 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -347,9 +347,15 @@ process { ] } + withName: BOWTIE2_ASSEMBLY_BUILD { + ext.prefix = { "${meta.assembler}-${meta.id}" } + } + withName: BOWTIE2_ASSEMBLY_ALIGN { + tag = "${assembly_meta.assembler}-${assembly_meta.id}-${reads_meta.id}" ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' - ext.prefix = { "${meta.id}.assembly" } + ext.args2 = "-bS" + ext.prefix = { "${assembly_meta.assembler}-${assembly_meta.id}-${assembly_meta.id}" } publishDir = [ [ path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, @@ -359,7 +365,7 @@ process { [ path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, mode: params.publish_dir_mode, - pattern: "*.{bam,bai}", + pattern: "*.{bam,csi}", enabled: params.save_assembly_mapped_reads, ], ] diff --git a/lib/functions.nf b/lib/functions.nf new file mode 100644 index 000000000..ea7dd5e02 --- /dev/null +++ b/lib/functions.nf @@ -0,0 +1,100 @@ +/* +vim: syntax=groovy +-*- mode: groovy;-*- +*/ + + +/* +******************************************************************************* +* version_check +******************************************************************************* + */ +def version_check(required_ver, current_ver){ + + try { + if( ! current_ver.matches(">= $required_ver") ){ + throw GroovyException('Nextflow version too old') + } + } catch (all) { + log.error "Error: Nextflow version $required_ver required! " + + "You are running version $current_ver.\n" + + "Please update Nextflow.\n" + exit 1 + + } +} + + +/* +******************************************************************************* +* Function to show yes/no prompt +******************************************************************************* + */ +def prompt(input){ + + if(input == "n"){ + exit 1 + } + if(input == "y"){ + return(true) + } + if(input != "n" || input!= "y"){ + println "Please use 'y' for yes and 'n' for no." + prompt(System.console().readLine 'Do you want to continue again (y/n)?') + } +} + + +/* +******************************************************************************* +* Nextpie +******************************************************************************* + */ + +@Grab('io.github.http-builder-ng:http-builder-ng-okhttp:0.14.2') +@Grab(group='org.slf4j', module='slf4j-api', version='1.7.32') + +import static groovy.json.JsonOutput.toJson +import static groovyx.net.http.HttpBuilder.configure + +import groovyx.net.http.* +import static groovyx.net.http.MultipartContent.multipart + +def Nextpie(host, port, traceFile, Workflow, Version, Group, Project, APIkey){ + File myFile = new File(traceFile) + + try { + def posts = configure { + request.uri = 'http://'+host+':'+port + request.uri.path = '/api/v1.0/upload-data' + request.headers['X-API-KEY'] = APIkey + request.contentType = 'multipart/form-data' + request.body = multipart { + field 'Workflow', Workflow + field 'Version', Version + field 'Group', Group + field 'Project', Project + part 'File', 'Trace.txt', 'text/plain', myFile + } + request.encoder 'multipart/form-data', OkHttpEncoders.&multipart + + response.success { FromServer fs, Object body -> + return body + } + + response.when(401){ FromServer fs, Object body -> + return "UNAUTHORIZED (401)" + } + + response.when(403){ FromServer fs, Object body -> + return "FORBIDDEN (403)" + } + response.when(404){ FromServer fs, Object body -> + return "NOT FOUND (404)" + } + }.post() + } + catch (Exception ce){ + return ce + } +} diff --git a/modules.json b/modules.json index d241aa28d..35949c76a 100644 --- a/modules.json +++ b/modules.json @@ -277,6 +277,11 @@ "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", "installed_by": ["modules"] }, + "samtools/sort": { + "branch": "master", + "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", + "installed_by": ["modules"] + }, "seqtk/mergepe": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 000000000..62054fc97 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf new file mode 100644 index 000000000..caf3c61a8 --- /dev/null +++ b/modules/nf-core/samtools/sort/main.nf @@ -0,0 +1,72 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + samtools cat \\ + ${bam} \\ + | \\ + samtools sort \\ + $args \\ + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${prefix}.${extension} \\ + - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + """ + touch ${prefix}.${extension} + if [ "${extension}" == "bam" ]; + then + touch ${prefix}.${extension}.csi + elif [ "${extension}" == "cram" ]; + then + touch ${prefix}.${extension}.crai + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 000000000..a9dbec5a8 --- /dev/null +++ b/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,92 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file(s) + pattern: "*.{bam,cram,sam}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true +output: + - bam: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - cram: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Sorted CRAM file + pattern: "*.{cram}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@ewels" + - "@matthdsm" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 000000000..b05e6691b --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,192 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("multiple bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("cram") { + + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("multiple bam - stub") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("cram - stub") { + + options "-stub" + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 000000000..469891fe3 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,287 @@ +{ + "cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:49:58.207549273" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:50:08.630951018" + }, + "cram - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "cram": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:50:19.061912443" + }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:55.479443" + }, + "multiple bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "4": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,8a16ba90c7d294cbb4c33ac0f7127a12" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,d185916eaff9afeb4d0aeab3310371f9" + ] + ], + "versions": [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:36:13.781404" + }, + "bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,34aa85e86abefe637f7a4a9887f016fc" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + [ + "versions.yml:md5,2659b187d681241451539d4c53500b9f" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.09.0" + }, + "timestamp": "2024-10-08T11:59:46.372244" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 000000000..f642771f5 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/modules/nf-core/samtools/sort/tests/nextflow_cram.config new file mode 100644 index 000000000..3a8c0188b --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow_cram.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index --output-fmt cram" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 000000000..cd63ea208 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/subworkflows/local/binning_preparation.nf b/subworkflows/local/binning_preparation.nf index f885c3db1..798ba878a 100644 --- a/subworkflows/local/binning_preparation.nf +++ b/subworkflows/local/binning_preparation.nf @@ -4,6 +4,7 @@ include { BOWTIE2_BUILD as BOWTIE2_ASSEMBLY_BUILD } from '../../modules/nf-core/bowtie2/build' include { BOWTIE2_ALIGN as BOWTIE2_ASSEMBLY_ALIGN } from '../../modules/nf-core/bowtie2/align' +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' workflow BINNING_PREPARATION { take: @@ -36,10 +37,22 @@ workflow BINNING_PREPARATION { .combine(ch_reads_bowtie2, by: 0) .map { _id, assembly_meta, assembly, index, reads_meta, fastq -> [assembly_meta, assembly, index, reads_meta, fastq] } } + // TODO: run current mag with dumping `ch_bowtie2_input` before adn after alignment to see what info is in meta + // and what is dropped. See if all should be exported, and either merge all meta into one one object or + // call `meta2` etc. within `modules.config for prefix/tag + + ch_bowtie2_align_input = ch_bowtie2_input.map { id, assembly_meta, index, reads_meta, fastq -> + [id, assembly_meta, index, reads_meta, fastq] + } + // TODO finish constructing this to go into BOWTIE2_ASSEMBLY_ALIGN + + BOWTIE2_ASSEMBLY_ALIGN(ch_bowtie2_input, [], [], false, false) + // TODO fix ch_bowtie2_input to match, maybe single object? + SAMTOOLS_SORT(BOWTIE2_ASSEMBLY_ALIGN.out.bam, []) + ch_sorted_assembly_bams = SAMTOOLS_SORT.out.bam.join(SAMTOOLS_SORT.out.csi) - BOWTIE2_ASSEMBLY_ALIGN(ch_bowtie2_input) // group mappings for one assembly - ch_grouped_mappings = BOWTIE2_ASSEMBLY_ALIGN.out.mappings + ch_grouped_mappings = ch_sorted_assembly_bams .groupTuple(by: 0) .map { meta, assembly, bams, bais -> [meta, assembly.sort()[0], bams, bais] } diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index a8c6dfd2b..0653008d1 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -15,6 +15,7 @@ include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' include { imNotification } from '../../nf-core/utils_nfcore_pipeline' include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { Nextpie } from '../../../lib/functions' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -191,6 +192,36 @@ workflow PIPELINE_COMPLETION { monochrome_logs, multiqc_reports.getVal(), ) + + // Nextpie + // params.nextpie_enable defined in nextflow.config + if(params.nextpie_enable){ + + // Logging + // params.workflow_name and params.workflow_ver come from nextflow.config. + // Refer above code block + log.info "workflow: " + params.workflow_name + log.info "version : " + params.workflow_ver + // params.group is taken from commandline using --group flag + log.info "group : " + params.group + log.info "project : " + workflow.runName + + // Push run metadata when group and name is provided + if(params.name && params.group){ + log.info "Pushing metadata to Nextpie http://${params.nextpie_host}:${params.nextpie_port}" + log.info "Response: " + + Nextpie(host = params.nextpie_host, + port = params.nextpie_port, + traceFile = "${params.outDir}/pipeline_info/Trace.txt", + Workflow = params.workflow_name, + Version = params.workflow_ver, + Group = params.group, + Project = workflow.runName, + APIkey = params.nextpie_api_key).toString() + }else{ + log.info "Run metadata pushing to Nextflow skipped (no --group --name provided)." + } + } } completionSummary(monochrome_logs) From f3fde6e38b8a783d075b0fa3f6d7e9a8d983eef8 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 5 Feb 2025 11:20:56 +0100 Subject: [PATCH 03/14] Finish channel conversion input to assembly bowtie2 --- CHANGELOG.md | 2 + conf/modules.config | 8 ++-- subworkflows/local/binning_preparation.nf | 56 +++++++++++++++++------ workflows/mag.nf | 4 +- 4 files changed, 49 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca448d2bd..68a4fbcab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,12 +10,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#731](https://github.com/nf-core/mag/pull/747) - Updated to nf-core 3.1.2 `TEMPLATE` (by @jfy133) +- [#754](https://github.com/nf-core/mag/pull/754) - Replaced local module with nf-core module for all Bowtie2 processes (reported by @MeriamOs, fix by @jfy133) ### `Fixed` - [#748](https://github.com/nf-core/mag/pull/748) - Fix broken phix reference channel when skipping phix removal (reported by @amizeranschi, fix by @muabnezor) - [#752](https://github.com/nf-core/mag/pull/752) - Fix QUAST results not being displayed when skipping certain steps (reported by @amizeranschi, fix by @jfy133) - [#753](https://github.com/nf-core/mag/pull/753) - Fix iGenomes reference support for host removal reference genome (reported by @Thomieh73, fix by @jfy133) +- [#754](https://github.com/nf-core/mag/pull/754) - Fix version reporting in binning preparation and ancient DNA workflows (fix by @jfy133) ### `Deprecated` diff --git a/conf/modules.config b/conf/modules.config index a4e8fce92..c399a9877 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -352,18 +352,18 @@ process { } withName: BOWTIE2_ASSEMBLY_ALIGN { - tag = "${assembly_meta.assembler}-${assembly_meta.id}-${reads_meta.id}" + tag = "${meta.assembler}-${meta.id}-${meta3.id}" ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' ext.args2 = "-bS" - ext.prefix = { "${assembly_meta.assembler}-${assembly_meta.id}-${assembly_meta.id}" } + ext.prefix = { "${meta.assembler}-${meta.id}-${meta3.id}" } publishDir = [ [ - path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, + path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.log", ], [ - path: { "${params.outdir}/Assembly/${assembly_meta.assembler}/QC/${assembly_meta.id}" }, + path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, mode: params.publish_dir_mode, pattern: "*.{bam,csi}", enabled: params.save_assembly_mapped_reads, diff --git a/subworkflows/local/binning_preparation.nf b/subworkflows/local/binning_preparation.nf index 798ba878a..91d0562cf 100644 --- a/subworkflows/local/binning_preparation.nf +++ b/subworkflows/local/binning_preparation.nf @@ -12,8 +12,12 @@ workflow BINNING_PREPARATION { reads // channel: [ val(meta), [ reads ] ] main: + ch_versions = Channel.empty() + // build bowtie2 index for all assemblies BOWTIE2_ASSEMBLY_BUILD(assemblies) + ch_versions = ch_versions.mix(BOWTIE2_ASSEMBLY_BUILD.out.versions.first()) + // combine assemblies with sample reads for binning depending on specified mapping mode if (params.binning_map_mode == 'all') { @@ -26,7 +30,9 @@ workflow BINNING_PREPARATION { ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index .map { meta, assembly, index -> [meta.group, meta, assembly, index] } .combine(ch_reads_bowtie2, by: 0) - .map { _group, assembly_meta, assembly, index, reads_meta, fastq -> [assembly_meta, assembly, index, reads_meta, fastq] } + .map { _group, assembly_meta, assembly, index, reads_meta, fastq -> + [assembly_meta, assembly, index, reads_meta, fastq] + } } else { // i.e. --binning_map_mode 'own' @@ -35,29 +41,49 @@ workflow BINNING_PREPARATION { ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index .map { meta, assembly, index -> [meta.id, meta, assembly, index] } .combine(ch_reads_bowtie2, by: 0) - .map { _id, assembly_meta, assembly, index, reads_meta, fastq -> [assembly_meta, assembly, index, reads_meta, fastq] } + .map { _id, assembly_meta, assembly, index, reads_meta, fastq -> + [assembly_meta, assembly, index, reads_meta, fastq] + } } - // TODO: run current mag with dumping `ch_bowtie2_input` before adn after alignment to see what info is in meta - // and what is dropped. See if all should be exported, and either merge all meta into one one object or - // call `meta2` etc. within `modules.config for prefix/tag - ch_bowtie2_align_input = ch_bowtie2_input.map { id, assembly_meta, index, reads_meta, fastq -> - [id, assembly_meta, index, reads_meta, fastq] + // RECONFIGURE CHANNEL TO MATCH BOWTIE2_ALIGN INPUT STRUCTURE + // I purposely flip the location of the metas here, to ensure the assembly meta gets + // exported with the resulting BAM file from the bowtie2 module, and not the reads meta. + // We have to retain the reads meta for use within prefix and tag in the bowtie2 module. + // I don't combine assembly_meta and relevant reads_meta, as otherwise would have a map + // operation after each use of SAMTOOLS_SORT.out.*, to remove the now irrelevant reads_meta + // information. + ch_bowtie2_align_input = ch_bowtie2_input.multiMap { assembly_meta, assembly, index, reads_meta, fastq -> + reads: [assembly_meta, fastq] + index: [[:], index] + assembly: [reads_meta, assembly] + saveunaligned: false + sortbam: false } - // TODO finish constructing this to go into BOWTIE2_ASSEMBLY_ALIGN - BOWTIE2_ASSEMBLY_ALIGN(ch_bowtie2_input, [], [], false, false) - // TODO fix ch_bowtie2_input to match, maybe single object? - SAMTOOLS_SORT(BOWTIE2_ASSEMBLY_ALIGN.out.bam, []) - ch_sorted_assembly_bams = SAMTOOLS_SORT.out.bam.join(SAMTOOLS_SORT.out.csi) + BOWTIE2_ASSEMBLY_ALIGN( + ch_bowtie2_align_input.reads, + ch_bowtie2_align_input.index, + ch_bowtie2_align_input.ref, + ch_bowtie2_align_input.saveunaligned, + ch_bowtie2_align_input.sortbam, + ) + ch_versions = ch_versions.mix(BOWTIE2_ASSEMBLY_ALIGN.out.versions.first()) + + SAMTOOLS_SORT(BOWTIE2_ASSEMBLY_ALIGN.out.bam, [[:], []]) + ch_sorted_assembly_bams = assemblies + .join(SAMTOOLS_SORT.out.bam) + .join(SAMTOOLS_SORT.out.csi) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) // group mappings for one assembly ch_grouped_mappings = ch_sorted_assembly_bams .groupTuple(by: 0) - .map { meta, assembly, bams, bais -> [meta, assembly.sort()[0], bams, bais] } + .map { meta, assembly, bams, csis -> [meta, assembly.sort()[0], bams, csis] } + .dump(tag: 'ch_grouped_mappings') emit: - bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { _assembly_meta, _reads_meta, log -> [log] } - bowtie2_version = BOWTIE2_ASSEMBLY_ALIGN.out.versions + bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { _meta, log -> [log] } + version = ch_versions grouped_mappings = ch_grouped_mappings } diff --git a/workflows/mag.nf b/workflows/mag.nf index 5b424d62d..399494944 100644 --- a/workflows/mag.nf +++ b/workflows/mag.nf @@ -491,7 +491,7 @@ workflow MAG { ch_assemblies, ch_short_reads, ) - ch_versions = ch_versions.mix(BINNING_PREPARATION.out.bowtie2_version.first()) + ch_versions = ch_versions.mix(BINNING_PREPARATION.out.versions) } /* @@ -502,7 +502,7 @@ workflow MAG { if (params.ancient_dna) { ANCIENT_DNA_ASSEMBLY_VALIDATION(BINNING_PREPARATION.out.grouped_mappings) - ch_versions = ch_versions.mix(ANCIENT_DNA_ASSEMBLY_VALIDATION.out.versions.first()) + ch_versions = ch_versions.mix(ANCIENT_DNA_ASSEMBLY_VALIDATION.out.versions) } /* From f01e98a93ef20f6528b355fdcabbbfe71fe2bf4c Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 5 Feb 2025 11:23:29 +0100 Subject: [PATCH 04/14] Fix bowtei2 module versions reportin gin SR processing --- subworkflows/local/shortread_preprocessing.nf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index 9a633b8f8..bcf64db3d 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -78,6 +78,7 @@ workflow SHORTREAD_PREPROCESSING { ch_host_fasta.map { fasta -> [[id: fasta.getSimpleName()], fasta] } ) ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index + ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_BUILD.out.versions.first()) } } else if (params.host_genome) { @@ -104,6 +105,8 @@ workflow SHORTREAD_PREPROCESSING { BOWTIE2_PHIX_REMOVAL_BUILD( ch_phix_db_file.map { fasta -> [[id: fasta.getSimpleName()], fasta] } ) + ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_BUILD.out.versions.first()) + BOWTIE2_PHIX_REMOVAL_ALIGN( ch_short_reads_hostremoved, BOWTIE2_PHIX_REMOVAL_BUILD.out.index, From cd1079868cff5d9daf3b4b527e35b3066de2569e Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 5 Feb 2025 11:55:22 +0100 Subject: [PATCH 05/14] Remove debugging --- conf/modules.config | 2 +- .../local/utils_nfcore_mag_pipeline/main.nf | 31 ------------------- 2 files changed, 1 insertion(+), 32 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c399a9877..e44e162be 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -352,7 +352,7 @@ process { } withName: BOWTIE2_ASSEMBLY_ALIGN { - tag = "${meta.assembler}-${meta.id}-${meta3.id}" + tag = { "${meta.assembler}-${meta.id}-${meta3.id}" } ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' ext.args2 = "-bS" ext.prefix = { "${meta.assembler}-${meta.id}-${meta3.id}" } diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf index 0653008d1..a8c6dfd2b 100644 --- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf @@ -15,7 +15,6 @@ include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' include { imNotification } from '../../nf-core/utils_nfcore_pipeline' include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' -include { Nextpie } from '../../../lib/functions' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -192,36 +191,6 @@ workflow PIPELINE_COMPLETION { monochrome_logs, multiqc_reports.getVal(), ) - - // Nextpie - // params.nextpie_enable defined in nextflow.config - if(params.nextpie_enable){ - - // Logging - // params.workflow_name and params.workflow_ver come from nextflow.config. - // Refer above code block - log.info "workflow: " + params.workflow_name - log.info "version : " + params.workflow_ver - // params.group is taken from commandline using --group flag - log.info "group : " + params.group - log.info "project : " + workflow.runName - - // Push run metadata when group and name is provided - if(params.name && params.group){ - log.info "Pushing metadata to Nextpie http://${params.nextpie_host}:${params.nextpie_port}" - log.info "Response: " + - Nextpie(host = params.nextpie_host, - port = params.nextpie_port, - traceFile = "${params.outDir}/pipeline_info/Trace.txt", - Workflow = params.workflow_name, - Version = params.workflow_ver, - Group = params.group, - Project = workflow.runName, - APIkey = params.nextpie_api_key).toString() - }else{ - log.info "Run metadata pushing to Nextflow skipped (no --group --name provided)." - } - } } completionSummary(monochrome_logs) From af290bc1e9dd1ae64b2accf4364b5dc089549468 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 5 Feb 2025 14:20:56 +0100 Subject: [PATCH 06/14] Replace sort with index --- subworkflows/local/binning_preparation.nf | 31 +++++++++++++---------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/subworkflows/local/binning_preparation.nf b/subworkflows/local/binning_preparation.nf index 91d0562cf..2adac9115 100644 --- a/subworkflows/local/binning_preparation.nf +++ b/subworkflows/local/binning_preparation.nf @@ -4,7 +4,7 @@ include { BOWTIE2_BUILD as BOWTIE2_ASSEMBLY_BUILD } from '../../modules/nf-core/bowtie2/build' include { BOWTIE2_ALIGN as BOWTIE2_ASSEMBLY_ALIGN } from '../../modules/nf-core/bowtie2/align' -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' workflow BINNING_PREPARATION { take: @@ -18,17 +18,21 @@ workflow BINNING_PREPARATION { BOWTIE2_ASSEMBLY_BUILD(assemblies) ch_versions = ch_versions.mix(BOWTIE2_ASSEMBLY_BUILD.out.versions.first()) - // combine assemblies with sample reads for binning depending on specified mapping mode if (params.binning_map_mode == 'all') { // combine assemblies with reads of all samples - ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index.combine(reads) + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index + .combine(reads) + .join(assemblies) + .map { meta, index, assembly -> [meta.group, meta, assembly, index] } } else if (params.binning_map_mode == 'group') { // combine assemblies with reads of samples from same group ch_reads_bowtie2 = reads.map { meta, fastq -> [meta.group, meta, fastq] } + ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index - .map { meta, assembly, index -> [meta.group, meta, assembly, index] } + .join(assemblies) + .map { meta, index, assembly -> [meta.group, meta, assembly, index] } .combine(ch_reads_bowtie2, by: 0) .map { _group, assembly_meta, assembly, index, reads_meta, fastq -> [assembly_meta, assembly, index, reads_meta, fastq] @@ -39,7 +43,8 @@ workflow BINNING_PREPARATION { // combine assemblies (not co-assembled) with reads from own sample ch_reads_bowtie2 = reads.map { meta, fastq -> [meta.id, meta, fastq] } ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index - .map { meta, assembly, index -> [meta.id, meta, assembly, index] } + .join(assemblies) + .map { meta, index, assembly -> [meta.id, meta, assembly, index] } .combine(ch_reads_bowtie2, by: 0) .map { _id, assembly_meta, assembly, index, reads_meta, fastq -> [assembly_meta, assembly, index, reads_meta, fastq] @@ -58,32 +63,32 @@ workflow BINNING_PREPARATION { index: [[:], index] assembly: [reads_meta, assembly] saveunaligned: false - sortbam: false + sortbam: true } BOWTIE2_ASSEMBLY_ALIGN( ch_bowtie2_align_input.reads, ch_bowtie2_align_input.index, - ch_bowtie2_align_input.ref, + ch_bowtie2_align_input.assembly, ch_bowtie2_align_input.saveunaligned, ch_bowtie2_align_input.sortbam, ) ch_versions = ch_versions.mix(BOWTIE2_ASSEMBLY_ALIGN.out.versions.first()) - SAMTOOLS_SORT(BOWTIE2_ASSEMBLY_ALIGN.out.bam, [[:], []]) + SAMTOOLS_INDEX(BOWTIE2_ASSEMBLY_ALIGN.out.bam) ch_sorted_assembly_bams = assemblies - .join(SAMTOOLS_SORT.out.bam) - .join(SAMTOOLS_SORT.out.csi) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + .join(BOWTIE2_ASSEMBLY_ALIGN.out.bam) + .join(SAMTOOLS_INDEX.out.bai) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) // group mappings for one assembly ch_grouped_mappings = ch_sorted_assembly_bams .groupTuple(by: 0) - .map { meta, assembly, bams, csis -> [meta, assembly.sort()[0], bams, csis] } + .map { meta, assembly, bams, bais -> [meta, assembly.sort()[0], bams, bais] } .dump(tag: 'ch_grouped_mappings') emit: bowtie2_assembly_multiqc = BOWTIE2_ASSEMBLY_ALIGN.out.log.map { _meta, log -> [log] } - version = ch_versions + versions = ch_versions grouped_mappings = ch_grouped_mappings } From 1588954dc3730deb3b5ba89d612d08ec6661ca82 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 5 Feb 2025 14:44:54 +0100 Subject: [PATCH 07/14] REplace with index, fix first() warning --- conf/modules.config | 14 +- modules.json | 5 + .../nf-core/samtools/index/environment.yml | 8 + modules/nf-core/samtools/index/main.nf | 49 ++++ modules/nf-core/samtools/index/meta.yml | 71 +++++ .../samtools/index/tests/csi.nextflow.config | 7 + .../nf-core/samtools/index/tests/main.nf.test | 140 ++++++++++ .../samtools/index/tests/main.nf.test.snap | 250 ++++++++++++++++++ modules/nf-core/samtools/index/tests/tags.yml | 2 + subworkflows/local/binning_preparation.nf | 3 +- subworkflows/local/shortread_preprocessing.nf | 4 +- 11 files changed, 548 insertions(+), 5 deletions(-) create mode 100644 modules/nf-core/samtools/index/environment.yml create mode 100644 modules/nf-core/samtools/index/main.nf create mode 100644 modules/nf-core/samtools/index/meta.yml create mode 100644 modules/nf-core/samtools/index/tests/csi.nextflow.config create mode 100644 modules/nf-core/samtools/index/tests/main.nf.test create mode 100644 modules/nf-core/samtools/index/tests/main.nf.test.snap create mode 100644 modules/nf-core/samtools/index/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index e44e162be..888645b47 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -354,7 +354,7 @@ process { withName: BOWTIE2_ASSEMBLY_ALIGN { tag = { "${meta.assembler}-${meta.id}-${meta3.id}" } ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' - ext.args2 = "-bS" + ext.args2 = "" ext.prefix = { "${meta.assembler}-${meta.id}-${meta3.id}" } publishDir = [ [ @@ -371,6 +371,18 @@ process { ] } + withName: SAMTOOLS_INDEX { + ext.prefix = { "${meta.assembler}-${meta.id}-${meta3.id}" } + publishDir = [ + [ + path: { "${params.outdir}/Assembly/${meta.assembler}/QC/${meta.id}" }, + mode: params.publish_dir_mode, + pattern: "*.{bam}", + enabled: params.save_assembly_mapped_reads, + ] + ] + } + withName: 'MAG_DEPTHS_PLOT|MAG_DEPTHS_SUMMARY' { publishDir = [path: { "${params.outdir}/GenomeBinning/depths/bins" }, mode: params.publish_dir_mode, pattern: "*.{png,tsv}"] } diff --git a/modules.json b/modules.json index 35949c76a..55091a8c4 100644 --- a/modules.json +++ b/modules.json @@ -277,6 +277,11 @@ "git_sha": "fd742419940e01ba1c5ecb172c3e32ec840662fe", "installed_by": ["modules"] }, + "samtools/index": { + "branch": "master", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", + "installed_by": ["modules"] + }, "samtools/sort": { "branch": "master", "git_sha": "b7800db9b069ed505db3f9d91b8c72faea9be17b", diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 000000000..62054fc97 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 000000000..311756102 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 000000000..db8df0d50 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,71 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file +output: + - bai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 000000000..0ed260efa --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 000000000..ca34fb5cd --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 000000000..72d65e81a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:25.261127166" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:12.653194876" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:21:01.854932651" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:51.485364222" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:21.184050361" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 000000000..e0f58a7a3 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/subworkflows/local/binning_preparation.nf b/subworkflows/local/binning_preparation.nf index 2adac9115..2720f7ebc 100644 --- a/subworkflows/local/binning_preparation.nf +++ b/subworkflows/local/binning_preparation.nf @@ -22,9 +22,8 @@ workflow BINNING_PREPARATION { if (params.binning_map_mode == 'all') { // combine assemblies with reads of all samples ch_bowtie2_input = BOWTIE2_ASSEMBLY_BUILD.out.index - .combine(reads) .join(assemblies) - .map { meta, index, assembly -> [meta.group, meta, assembly, index] } + .combine(reads) } else if (params.binning_map_mode == 'group') { // combine assemblies with reads of samples from same group diff --git a/subworkflows/local/shortread_preprocessing.nf b/subworkflows/local/shortread_preprocessing.nf index bcf64db3d..c70d6fee6 100644 --- a/subworkflows/local/shortread_preprocessing.nf +++ b/subworkflows/local/shortread_preprocessing.nf @@ -78,7 +78,7 @@ workflow SHORTREAD_PREPROCESSING { ch_host_fasta.map { fasta -> [[id: fasta.getSimpleName()], fasta] } ) ch_host_bowtie2index = BOWTIE2_HOST_REMOVAL_BUILD.out.index - ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_BUILD.out.versions.first()) + ch_versions = ch_versions.mix(BOWTIE2_HOST_REMOVAL_BUILD.out.versions) } } else if (params.host_genome) { @@ -105,7 +105,7 @@ workflow SHORTREAD_PREPROCESSING { BOWTIE2_PHIX_REMOVAL_BUILD( ch_phix_db_file.map { fasta -> [[id: fasta.getSimpleName()], fasta] } ) - ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_BUILD.out.versions.first()) + ch_versions = ch_versions.mix(BOWTIE2_PHIX_REMOVAL_BUILD.out.versions) BOWTIE2_PHIX_REMOVAL_ALIGN( ch_short_reads_hostremoved, From 1171ed09510226537ffdbd1c559fc27ed817cfbb Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 6 Feb 2025 09:00:20 +0100 Subject: [PATCH 08/14] Remove accidently commited file --- lib/functions.nf | 100 ----------------------------------------------- 1 file changed, 100 deletions(-) delete mode 100644 lib/functions.nf diff --git a/lib/functions.nf b/lib/functions.nf deleted file mode 100644 index ea7dd5e02..000000000 --- a/lib/functions.nf +++ /dev/null @@ -1,100 +0,0 @@ -/* -vim: syntax=groovy --*- mode: groovy;-*- -*/ - - -/* -******************************************************************************* -* version_check -******************************************************************************* - */ -def version_check(required_ver, current_ver){ - - try { - if( ! current_ver.matches(">= $required_ver") ){ - throw GroovyException('Nextflow version too old') - } - } catch (all) { - log.error "Error: Nextflow version $required_ver required! " + - "You are running version $current_ver.\n" + - "Please update Nextflow.\n" - exit 1 - - } -} - - -/* -******************************************************************************* -* Function to show yes/no prompt -******************************************************************************* - */ -def prompt(input){ - - if(input == "n"){ - exit 1 - } - if(input == "y"){ - return(true) - } - if(input != "n" || input!= "y"){ - println "Please use 'y' for yes and 'n' for no." - prompt(System.console().readLine 'Do you want to continue again (y/n)?') - } -} - - -/* -******************************************************************************* -* Nextpie -******************************************************************************* - */ - -@Grab('io.github.http-builder-ng:http-builder-ng-okhttp:0.14.2') -@Grab(group='org.slf4j', module='slf4j-api', version='1.7.32') - -import static groovy.json.JsonOutput.toJson -import static groovyx.net.http.HttpBuilder.configure - -import groovyx.net.http.* -import static groovyx.net.http.MultipartContent.multipart - -def Nextpie(host, port, traceFile, Workflow, Version, Group, Project, APIkey){ - File myFile = new File(traceFile) - - try { - def posts = configure { - request.uri = 'http://'+host+':'+port - request.uri.path = '/api/v1.0/upload-data' - request.headers['X-API-KEY'] = APIkey - request.contentType = 'multipart/form-data' - request.body = multipart { - field 'Workflow', Workflow - field 'Version', Version - field 'Group', Group - field 'Project', Project - part 'File', 'Trace.txt', 'text/plain', myFile - } - request.encoder 'multipart/form-data', OkHttpEncoders.&multipart - - response.success { FromServer fs, Object body -> - return body - } - - response.when(401){ FromServer fs, Object body -> - return "UNAUTHORIZED (401)" - } - - response.when(403){ FromServer fs, Object body -> - return "FORBIDDEN (403)" - } - response.when(404){ FromServer fs, Object body -> - return "NOT FOUND (404)" - } - }.post() - } - catch (Exception ce){ - return ce - } -} From dcd08fcb7eae38a7772cc4547f88a6a28256a92c Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 6 Feb 2025 08:16:08 +0000 Subject: [PATCH 09/14] [automated] Fix code linting --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a043657b5..5bf371e8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#755](https://github.com/nf-core/mag/pull/755) - Updated to nf-core 3.2.0 `TEMPLATE` (by @jfy133) - [#754](https://github.com/nf-core/mag/pull/754) - Replaced local module with nf-core module for all Bowtie2 processes (reported by @MeriamOs, fix by @jfy133) - ### `Fixed` - [#748](https://github.com/nf-core/mag/pull/748) - Fix broken phix reference channel when skipping phix removal (reported by @amizeranschi, fix by @muabnezor) From 4efc52b19078ef69a06361996f56e1a90180bee2 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Thu, 27 Feb 2025 14:21:14 +0100 Subject: [PATCH 10/14] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81612b1d4..c633f9c07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,6 +37,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#753](https://github.com/nf-core/mag/pull/753) - Fix iGenomes reference support for host removal reference genome (reported by @Thomieh73, fix by @jfy133) - [#759](https://github.com/nf-core/mag/pull/758) - Fixed parameters that allow both files or directories to not error with directories, and general file input validation improvements (reported by @mjfi2sb3, fix by @jfy133) +### `Deprecated` + ## 3.3.0 [2024-12-19] ### `Added` From aeefb1bdbe5dbd7dc89a1638f179359423bc42c9 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 28 Feb 2025 09:49:13 +0100 Subject: [PATCH 11/14] Sync assembly algin suffix --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 888645b47..a33962f04 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -352,7 +352,7 @@ process { } withName: BOWTIE2_ASSEMBLY_ALIGN { - tag = { "${meta.assembler}-${meta.id}-${meta3.id}" } + tag = { "${meta.assembler}-${meta.id}-${meta3.id}.assembly" } ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' ext.args2 = "" ext.prefix = { "${meta.assembler}-${meta.id}-${meta3.id}" } From 2be7217da757e6313133183ce2114389d5c4fe6b Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 28 Feb 2025 09:49:23 +0100 Subject: [PATCH 12/14] Add bt2 bump ito changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c633f9c07..ab57788e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Dependencies` +| Tool | Previous version | New version | +| ------- | ---------------- | ----------- | +| Bowtie2 | 2.4.2 | 2.5.2 | + ### `Deprecated` ## v3.3.1 - [2025-02-13] From ef99a180d838f080a06478b92e47b88bfdc13121 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Fri, 28 Feb 2025 10:08:16 +0100 Subject: [PATCH 13/14] Remove assembly as never used --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index a33962f04..888645b47 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -352,7 +352,7 @@ process { } withName: BOWTIE2_ASSEMBLY_ALIGN { - tag = { "${meta.assembler}-${meta.id}-${meta3.id}.assembly" } + tag = { "${meta.assembler}-${meta.id}-${meta3.id}" } ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : '' ext.args2 = "" ext.prefix = { "${meta.assembler}-${meta.id}-${meta3.id}" } From c32a826b668738b6a37d92885609abff888d5b87 Mon Sep 17 00:00:00 2001 From: "James A. Fellows Yates" Date: Wed, 5 Mar 2025 12:42:12 +0100 Subject: [PATCH 14/14] Minor output tweaks --- CHANGELOG.md | 1 + docs/output.md | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab57788e3..59ad4fd9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` - [#754](https://github.com/nf-core/mag/pull/754) - Replaced local module with nf-core module for all Bowtie2 processes (reported by @MeriamOs, fix by @jfy133) +- [#754](https://github.com/nf-core/mag/pull/754) - Made filenames consistent for all bowtie2 assembly-aligning output (fix by @jfy133) ### `Fixed` diff --git a/docs/output.md b/docs/output.md index daece6f6e..38922893b 100644 --- a/docs/output.md +++ b/docs/output.md @@ -219,9 +219,9 @@ Trimmed (short) reads are assembled with both megahit and SPAdes. Hybrid assembl - `[sample/group].contigs.fa.gz`: Compressed metagenome assembly in fasta format - `[sample/group].log`: Log file - `QC/[sample/group]/`: Directory containing QUAST files and Bowtie2 mapping logs - - `MEGAHIT-[sample].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set. + - `MEGAHIT-[sample]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set. - `MEGAHIT-[sample/group]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the respective sample ("sampleToMap"). - - `MEGAHIT-[sample].[bam/bai]`: Optionally saved BAM file of the Bowtie2 mapping of reads against the assembly. + - `MEGAHIT-[sample]-[sampleToMap].[bam/bai]`: Optionally saved BAM file of the Bowtie2 mapping of reads against the assembly. @@ -238,9 +238,9 @@ Trimmed (short) reads are assembled with both megahit and SPAdes. Hybrid assembl - `[sample/group].contigs.fa.gz`: Compressed assembled contigs in fasta format - `[sample/group].spades.log`: Log file - `QC/[sample/group]/`: Directory containing QUAST files and Bowtie2 mapping logs - - `SPAdes-[sample].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set. + - `SPAdes-[sample]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the sample that the metagenome was assembled from, only present if `--coassemble_group` is not set. - `SPAdes-[sample/group]-[sampleToMap].bowtie2.log`: Bowtie2 log file indicating how many reads have been mapped from the respective sample ("sampleToMap"). - - `SPAdes-[sample].[bam/bai]`: Optionally saved BAM file of the Bowtie2 mapping of reads against the assembly. + - `SPAdes-[sample]-[sampleToMap].[bam/bai]`: Optionally saved BAM file of the Bowtie2 mapping of reads against the assembly.