diff --git a/CHANGELOG.md b/CHANGELOG.md index b883fa42..80b0ddd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,25 +5,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## v4.1.0dev +- Add the `--gtf` parameter to allow the user to specify a GFF file as a reference (instead of a GTF file) ([#451](https://github.com/nf-core/scrnaseq/pull/451)) + ## v4.0.0 - 2025-03-10 -- Move `txp2gene` to `reference_genome_options` in schema as it is required by `kb_python` and `alevin` ([434](https://github.com/nf-core/scrnaseq/pull/434)) -- Fix of additional path splitting for `txp2gene` ([433](https://github.com/nf-core/scrnaseq/pull/433)) +- Move `txp2gene` to `reference_genome_options` in schema as it is required by `kb_python` and `alevin` ([#434](https://github.com/nf-core/scrnaseq/pull/434)) +- Fix of additional path splitting for `txp2gene` ([#433](https://github.com/nf-core/scrnaseq/pull/433)) - Add a checker so that `--fb_reference` does not break the pipeline in case `ab` files are not used in `cellranger multi` sub-workflow. -- Fix concatenation of multiple samples into the combined output AnnData ([416](https://github.com/nf-core/scrnaseq/pull/416)) -- Make sure STARsolo velocity output is added to the combined output AnnData, if `star_feature = 'Gene Velocyto'` ([417](https://github.com/nf-core/scrnaseq/pull/417)) +- Fix concatenation of multiple samples into the combined output AnnData ([#416](https://github.com/nf-core/scrnaseq/pull/416)) +- Make sure STARsolo velocity output is added to the combined output AnnData, if `star_feature = 'Gene Velocyto'` ([#417](https://github.com/nf-core/scrnaseq/pull/417)) - Update cellbender module to latest nf-core version ([#419](https://github.com/nf-core/scrnaseq/pull/419/)) - Add profile for gpu processes ([#419](https://github.com/nf-core/scrnaseq/pull/419/)) - Update example usage command in README with valid reference genome parameter ([#339](https://github.com/nf-core/scrnaseq/issues/339)) - Removed `--kb_filter` parameter. Kallisto filtering is triggered by default and can be turned off with `ext.args` ([#421](https://github.com/nf-core/scrnaseq/issues/421)) - Document better that `cellbender` is used for empty drops calling and not the `emptydrops` method (([#420](https://github.com/nf-core/scrnaseq/issues/420))) - Add `--limitBAMsortRAM` to STARsolo alignment, to make sure BAM sorting memory scales with the task memory ([#430](https://github.com/nf-core/scrnaseq/pull/430)) -- Replace local modules for simpleaf, `SIMPLEAF_INDEX` and `SIMPLEAF_QUANT`, with their central modules from nf-core/modules, and update simpleaf subworkflows accordingly. ([424](https://github.com/nf-core/scrnaseq/pull/424)) -- Fix of additional path splitting for `txp2gene` ([433](https://github.com/nf-core/scrnaseq/pull/433)) -- Update documents related to `simpleaf`, `alevin`, `salmon`, and `alevin-fry` for consistency.([424](https://github.com/nf-core/scrnaseq/pull/424)) -- Rename the default aligner from `alevin` to `simpleaf` for consistency.([424](https://github.com/nf-core/scrnaseq/pull/424)) -- Update the `mtx_to_h5ad` template for `simpleaf` to start from the h5ad file generated by simpleaf.([424](https://github.com/nf-core/scrnaseq/pull/424)) -- Upgrade alevinqc from 1.12.1 to 1.18.0 to match the latest output file structure of simpleaf.([424](https://github.com/nf-core/scrnaseq/pull/424)) +- Replace local modules for simpleaf, `SIMPLEAF_INDEX` and `SIMPLEAF_QUANT`, with their central modules from nf-core/modules, and update simpleaf subworkflows accordingly. ([#424](https://github.com/nf-core/scrnaseq/pull/424)) +- Fix of additional path splitting for `txp2gene` ([#433](https://github.com/nf-core/scrnaseq/pull/433)) +- Update documents related to `simpleaf`, `alevin`, `salmon`, and `alevin-fry` for consistency.([#424](https://github.com/nf-core/scrnaseq/pull/424)) +- Rename the default aligner from `alevin` to `simpleaf` for consistency.([#424](https://github.com/nf-core/scrnaseq/pull/424)) +- Update the `mtx_to_h5ad` template for `simpleaf` to start from the h5ad file generated by simpleaf.([#424](https://github.com/nf-core/scrnaseq/pull/424)) +- Upgrade alevinqc from 1.12.1 to 1.18.0 to match the latest output file structure of simpleaf.([#424](https://github.com/nf-core/scrnaseq/pull/424)) ## v3.0.0 - 2024-12-09 diff --git a/conf/modules.config b/conf/modules.config index 26d22647..e355612e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -31,6 +31,16 @@ process { ] } + withName: 'GFFREAD' { + ext.args = '--keep-exon-attrs -F -T' + publishDir = [ + path: { "${params.outdir}/reference_genome" }, + mode: params.publish_dir_mode, + enabled : params.save_reference, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + if (!params.skip_cellbender) { withName: 'CELLBENDER_REMOVEBACKGROUND' { publishDir = [ diff --git a/docs/output.md b/docs/output.md index a0e3b961..db234df2 100644 --- a/docs/output.md +++ b/docs/output.md @@ -142,6 +142,8 @@ The pipeline also possess a subworkflow imported from scdownstream to perform fi - `extract_transcriptome` - When supplied with a `--fasta` genome fasta, this contains the extracted transcriptome - The GTF file supplied with `--gtf` is used to extract the transcriptome positions appropriately +- `*.gtf` + - If only a `.gff` file was provided, the `.gtf` file generated by converting the `.gff` file will be saved here if using `--save-reference`. **Output directory: `results/${params.aligner}/mtx_conversions`** diff --git a/docs/usage.md b/docs/usage.md index 180e8018..3c6a944a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -146,7 +146,7 @@ you have to create a new cellranger-arc index ([see here](https://support.10xgen more information) If you decide to create a cellranger-arc index, then you need to create a config file to generate the index. The pipeline -can do this autmatically for you if you provide a `--fasta`, `--gtf`, and an optional `--motif` file. However, you can +can do this autmatically for you if you provide a `--fasta`, `--gtf` or `--gff`, and an optional `--motif` file. However, you can also decide to provide your own config file with `--cellrangerarc_config`, then you also have to specify with `--cellrangerarc_reference` the reference genome name that you have used and stated as _genome:_ in your config file. @@ -279,7 +279,7 @@ The `sample` column must match the corresponding entry in the main samplesheet. #### Additional reference data - Cellranger multi needs a reference for **GEX and VDJ analysis**. They are calculated on the fly given the reference - files (`--fasta` and `--gtf`) provided, but users can also provide their own with: `--cellranger_index` + files (`--fasta`, and `--gtf` or `--gff`) provided, but users can also provide their own with: `--cellranger_index` and `--cellranger_vdj_index`, for GEX and VDJ, respectively. > When running cellranger multi, without any VDJ data, users can also skip VDJ automated ref building with: `--skip_cellrangermulti_vdjref`. diff --git a/modules.json b/modules.json index d7f956de..d35e270a 100644 --- a/modules.json +++ b/modules.json @@ -62,7 +62,7 @@ }, "gffread": { "branch": "master", - "git_sha": "b1b959609bda44341120aed1766329909f54b8d0", + "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, "gunzip": { diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml index 5398f71c..46c5faec 100644 --- a/modules/nf-core/gffread/environment.yml +++ b/modules/nf-core/gffread/environment.yml @@ -1,7 +1,7 @@ -name: gffread +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::gffread=0.12.1 + - bioconda::gffread=0.12.7 diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf index d8a473e0..da55cbab 100644 --- a/modules/nf-core/gffread/main.nf +++ b/modules/nf-core/gffread/main.nf @@ -1,32 +1,57 @@ process GFFREAD { - tag "$gff" + tag "$meta.id" label 'process_low' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : - 'biocontainers/gffread:0.12.1--h8b12597_0' }" + 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hdcf5f25_4' : + 'biocontainers/gffread:0.12.7--hdcf5f25_4' }" input: - path gff + tuple val(meta), path(gff) + path fasta output: - path "*.gtf" , emit: gtf , optional: true - path "*.gff3" , emit: gffread_gff , optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.gtf") , emit: gtf , optional: true + tuple val(meta), path("*.gff3") , emit: gffread_gff , optional: true + tuple val(meta), path("*.fasta"), emit: gffread_fasta , optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${gff.baseName}" - def extension = args.contains("-T") ? 'gtf' : 'gffread.gff3' + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def fasta_arg = fasta ? "-g $fasta" : '' + def output_name = "${prefix}.${extension}" + def output = extension == "fasta" ? "$output_name" : "-o $output_name" + def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim() + // args_sorted = Move '-w', '-x', and '-y' to the end of the args string as gffread expects the file name after these parameters + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ gffread \\ $gff \\ - $args \\ - -o ${prefix}.${extension} + $fasta_arg \\ + $args_sorted \\ + $output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' ) + def output_name = "${prefix}.${extension}" + if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch $output_name + cat <<-END_VERSIONS > versions.yml "${task.process}": gffread: \$(gffread --version 2>&1) diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml index d38cbcda..bebe7f57 100644 --- a/modules/nf-core/gffread/meta.yml +++ b/modules/nf-core/gffread/meta.yml @@ -1,36 +1,75 @@ name: gffread -description: Validate, filter, convert and perform various other operations on GFF files +description: Validate, filter, convert and perform various other operations on GFF + files keywords: - gff - conversion - validation tools: - gffread: - description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more. + description: GFF/GTF utility providing format conversions, region filtering, FASTA + sequence extraction and more. homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread tool_dev_url: https://github.com/gpertea/gffread doi: 10.12688/f1000research.23297.1 licence: ["MIT"] + identifier: biotools:gffread input: - - gff: - type: file - description: A reference file in either the GFF3, GFF2 or GTF format. - pattern: "*.{gff, gtf}" + - - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" + - - fasta: + type: file + description: A multi-fasta file with the genomic sequences + pattern: "*.{fasta,fa,faa,fas,fsa}" output: - gtf: - type: file - description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present - pattern: "*.{gtf}" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gtf": + type: file + description: GTF file resulting from the conversion of the GFF input file if + '-T' argument is present + pattern: "*.{gtf}" - gffread_gff: - type: file - description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent - pattern: "*.{gff3}" + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.gff3": + type: file + description: GFF3 file resulting from the conversion of the GFF input file if + '-T' argument is absent + pattern: "*.gff3" + - gffread_fasta: + - meta: + type: map + description: | + Groovy Map containing meta data + e.g. [ id:'test' ] + - "*.fasta": + type: file + description: Fasta file produced when either of '-w', '-x', '-y' parameters + is present + pattern: "*.fasta" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@edmundmiller" maintainers: - "@edmundmiller" + - "@gallvp" diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test index 452aba1b..d039f367 100644 --- a/modules/nf-core/gffread/tests/main.nf.test +++ b/modules/nf-core/gffread/tests/main.nf.test @@ -18,7 +18,12 @@ nextflow_process { } process { """ - input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ } } @@ -26,11 +31,40 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot( - process.out.gtf, - process.out.versions - ).match() }, - { assert process.out.gffread_gff == [] } + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gtf-stub") { + + options '-stub' + config "./nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gffread_gff == [] }, + { assert process.out.gffread_fasta == [] } ) } @@ -46,7 +80,103 @@ nextflow_process { } process { """ - input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-gff3-stub") { + + options '-stub' + config "./nextflow-gff3.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_fasta == [] } + ) + } + + } + + test("sarscov2-gff3-fasta") { + + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-stub") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) """ } } @@ -54,14 +184,41 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot( - process.out.gffread_gff, - process.out.versions - ).match() }, + { assert snapshot(process.out).match() }, { assert process.out.gtf == [] }, + { assert process.out.gffread_gff == [] } + ) + } + + } + + test("sarscov2-gff3-fasta-fail-catch") { + + options '-stub' + config "./nextflow-fasta.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [id: 'genome'], + file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) + """ + } + } + + then { + assertAll ( + { assert ! process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } ) } } -} +} \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap index 00a11a40..15262320 100644 --- a/modules/nf-core/gffread/tests/main.nf.test.snap +++ b/modules/nf-core/gffread/tests/main.nf.test.snap @@ -1,24 +1,272 @@ { "sarscov2-gff3-gtf": { "content": [ - [ - "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3" - ], - [ - "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" - ] + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } ], - "timestamp": "2024-01-23T20:00:32.688779117" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:48:56.496187" }, "sarscov2-gff3-gff3": { "content": [ - [ - "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d" - ], - [ - "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6" - ] + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } ], - "timestamp": "2024-01-23T20:07:11.457356625" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:49:00.892782" + }, + "sarscov2-gff3-gtf-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + + ], + "gtf": [ + [ + { + "id": "test" + }, + "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:26.975666" + }, + "sarscov2-gff3-fasta-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:44.34792" + }, + "sarscov2-gff3-gff3-stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + + ], + "gffread_gff": [ + [ + { + "id": "test" + }, + "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T11:11:35.221671" + }, + "sarscov2-gff3-fasta": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "3": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ], + "gffread_fasta": [ + [ + { + "id": "test" + }, + "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a" + ] + ], + "gffread_gff": [ + + ], + "gtf": [ + + ], + "versions": [ + "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-09T10:54:02.88143" } } \ No newline at end of file diff --git a/modules/nf-core/gffread/tests/nextflow-fasta.config b/modules/nf-core/gffread/tests/nextflow-fasta.config new file mode 100644 index 00000000..ac6cb148 --- /dev/null +++ b/modules/nf-core/gffread/tests/nextflow-fasta.config @@ -0,0 +1,5 @@ +process { + withName: GFFREAD { + ext.args = '-w -S' + } +} diff --git a/nextflow.config b/nextflow.config index c9b8acb9..b6be2d43 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,6 +22,7 @@ params { txp2gene = null fasta = null gtf = null + gff = null // simpleaf parameters simpleaf_index = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 6ecbf557..86408fe3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -143,6 +143,13 @@ "format": "file-path", "exists": true }, + "gff": { + "type": "string", + "description": "Reference GFF annotation file", + "fa_icon": "fas fa-code-branch", + "format": "file-path", + "exists": true + }, "save_reference": { "type": "boolean", "description": "Specify this parameter to save the indices created (STAR, Kallisto, Simpleaf) to the results.", diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index ee2c8d46..e75473f9 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -21,6 +21,8 @@ include { H5AD_REMOVEBACKGROUND_BARCODES_CELLBENDER_ANNDATA } from '../subworkfl include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/main' include { GUNZIP as GUNZIP_GTF } from '../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GFF } from '../modules/nf-core/gunzip/main' +include { GFFREAD as GFFREAD } from '../modules/nf-core/gffread/main' include { H5AD_CONVERSION } from '../subworkflows/local/h5ad_conversion' @@ -42,6 +44,7 @@ workflow SCRNASEQ { // general input and params ch_genome_fasta = params.fasta ? file(params.fasta, checkIfExists: true) : [] ch_gtf = params.gtf ? file(params.gtf, checkIfExists: true) : [] + ch_gff = params.gff ? file(params.gff, checkIfExists: true) : [] ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta) : [] ch_motifs = params.motifs ? file(params.motifs) : [] ch_txp2gene = params.txp2gene ? file(params.txp2gene, checkIfExists: true) : [] @@ -54,6 +57,11 @@ workflow SCRNASEQ { ch_barcode_whitelist = [] } + // Warn if both GTF and GFF files are provided + if (params.gtf && params.gff) { + log.warn("Both GTF and GFF files are provided. GTF file will be used.") + } + // samplesheet - this is passed to the MTX conversion functions to add metadata to the // AnnData objects. ch_input = file(params.input) @@ -107,10 +115,22 @@ workflow SCRNASEQ { if (params.gtf.endsWith('.gz')) { ch_gtf = GUNZIP_GTF ( [ [:], ch_gtf ] ).gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) - } else { + } + else { ch_gtf = Channel.value( ch_gtf ) } } + else if (params.gff) { + if (params.gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ( [ [:], params.gff ] ).gunzip + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) + } + else { + ch_gff = Channel.value( params.gff ).map { [['id': "${params.gff.baseName}"], it] } + } + ch_gtf = GFFREAD ( ch_gff, [] ).gtf.map { it[1] } + ch_versions = ch_versions.mix(GFFREAD.out.versions) + } // filter gtf ch_filter_gtf = ch_gtf ? GTF_GENE_FILTER ( ch_genome_fasta, ch_gtf ).gtf : []