From 6834e6439497e584061b5736dd68277617d76e22 Mon Sep 17 00:00:00 2001 From: tedil Date: Thu, 29 Feb 2024 10:59:43 +0100 Subject: [PATCH 1/6] make flavor and branch specification possible in config.yaml --- workflow/rules/ref.smk | 4 ++-- workflow/schemas/config.schema.yaml | 13 +++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/workflow/rules/ref.smk b/workflow/rules/ref.smk index 752802db..c5eb1e97 100644 --- a/workflow/rules/ref.smk +++ b/workflow/rules/ref.smk @@ -97,8 +97,8 @@ rule download_gene_annotation: species=config["reference"]["species"], build=config["reference"]["build"], release=config["reference"]["release"], - flavor="", # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP. - branch="", # optional: specify branch + flavor=config["reference"]["flavor"], # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP. + branch=config["reference"]["branch"], # optional: specify branch log: "logs/download_gene_annotation.log", cache: "omit-software" # save space and time with between workflow caching (see docs) diff --git a/workflow/schemas/config.schema.yaml b/workflow/schemas/config.schema.yaml index cd6dd73c..63c9c3be 100644 --- a/workflow/schemas/config.schema.yaml +++ b/workflow/schemas/config.schema.yaml @@ -34,6 +34,12 @@ properties: type: integer build: type: string + flavor: + type: string + branch: + type: string + repeat_masker_download_link: + type: string required: - species - release @@ -70,8 +76,11 @@ properties: type: number minimum: 0.0 maximum: 1.0 - local: - type: boolean + mode: + type: array + items: + type: string + enum: ["local-smart", "local-strict", "global-smart", "global-strict"] events: $ref: "#/definitions/evententry" description: "a map of pairs" From f0e954d7e5de4b8e164f7b7832db06efd2e288c2 Mon Sep 17 00:00:00 2001 From: tedil Date: Thu, 29 Feb 2024 11:01:22 +0100 Subject: [PATCH 2/6] include optional flavor + branch in config.yaml, commented --- config/config.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config/config.yaml b/config/config.yaml index 0868ac4e..9f388f63 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -8,6 +8,9 @@ reference: build: GRCh38 # Ensembl release release: 107 + # flavor: "" # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP. + # branch: "" # optional + # for available downloads, please browse either of these views: # * http://repeatmasker.org/genomicDatasets/RMGenomicDatasets.html # * http://repeatmasker.org/genomicDatasets/RMGenomicDatasetsAlt.html From 933269fa0d7eba33ccfc6ef301f7afb5be8315c6 Mon Sep 17 00:00:00 2001 From: tedil Date: Thu, 29 Feb 2024 11:47:24 +0100 Subject: [PATCH 3/6] optional entries should indeed be optional --- workflow/rules/ref.smk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/rules/ref.smk b/workflow/rules/ref.smk index c5eb1e97..2707afc4 100644 --- a/workflow/rules/ref.smk +++ b/workflow/rules/ref.smk @@ -97,8 +97,8 @@ rule download_gene_annotation: species=config["reference"]["species"], build=config["reference"]["build"], release=config["reference"]["release"], - flavor=config["reference"]["flavor"], # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP. - branch=config["reference"]["branch"], # optional: specify branch + flavor=config["reference"].get("flavor", ""), # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP. + branch=config["reference"].get("branch", ""), # optional: specify branch log: "logs/download_gene_annotation.log", cache: "omit-software" # save space and time with between workflow caching (see docs) From 3206e49ab0e550f6b2aa73a3813bc07e3bd0c40a Mon Sep 17 00:00:00 2001 From: tedil Date: Thu, 29 Feb 2024 11:53:48 +0100 Subject: [PATCH 4/6] remove benchmark directives --- workflow/rules/ref.smk | 6 ------ 1 file changed, 6 deletions(-) diff --git a/workflow/rules/ref.smk b/workflow/rules/ref.smk index 2707afc4..b7de5179 100644 --- a/workflow/rules/ref.smk +++ b/workflow/rules/ref.smk @@ -45,8 +45,6 @@ rule minimap2_index: ), log: "logs/minimap2_index/genome.log", - benchmark: - "benchmarks/minimap2_index/genome.txt" params: extra="", # optional additional args cache: True @@ -65,8 +63,6 @@ rule download_regulatory_annotation: "logs/download_regulatory_annotation.log", params: release=config["reference"].get("release", "107"), - benchmark: - "benchmarks/download_regulatory_annotation.txt" cache: "omit-software" # save space and time with between workflow caching (see docs) conda: "../envs/wget.yaml" @@ -81,8 +77,6 @@ rule download_repeatmasker_annotation: "logs/download_repeatmasker_annotation.log", params: download_link=config["reference"].get("repeat_masker_download_link", ""), - benchmark: - "benchmarks/download_repeatmasker_annotation.txt" cache: "omit-software" # save space and time with between workflow caching (see docs) conda: "../envs/wget.yaml" From a29d48ebffa82b84714efa1f5a9ca7503f1fcf8d Mon Sep 17 00:00:00 2001 From: tedil Date: Thu, 29 Feb 2024 11:57:43 +0100 Subject: [PATCH 5/6] snakefmt --- workflow/rules/annotate.smk | 16 ++++++++++------ workflow/rules/map.smk | 10 ++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/workflow/rules/annotate.smk b/workflow/rules/annotate.smk index 65b56a54..e84922cd 100644 --- a/workflow/rules/annotate.smk +++ b/workflow/rules/annotate.smk @@ -26,9 +26,11 @@ rule cyrcular_annotate_graph: graph="results/calling/graphs/{group}.graph", gene_annotation="resources/gene_annotation.gff3.gz", regulatory_annotation="resources/regulatory_annotation.gff3.gz", - repeat_annotation=lambda wc: "resources/repeat_masker.fa.out.gz" - if config["reference"].get("repeat_masker_download_link", "") - else "", + repeat_annotation=lambda wc: ( + "resources/repeat_masker.fa.out.gz" + if config["reference"].get("repeat_masker_download_link", "") + else "" + ), output: annotated="results/calling/graphs/{group}.annotated.graph", threads: 1 @@ -39,9 +41,11 @@ rule cyrcular_annotate_graph: conda: "../envs/cyrcular.yaml" params: - repeat_annotation=lambda wc, input: f" --repeat-annotation {input.repeat_annotation} " - if config["reference"].get("repeat_masker_download_link", "") - else "", + repeat_annotation=lambda wc, input: ( + f" --repeat-annotation {input.repeat_annotation} " + if config["reference"].get("repeat_masker_download_link", "") + else "" + ), shell: "cyrcular graph annotate " " --reference {input.reference} " diff --git a/workflow/rules/map.smk b/workflow/rules/map.smk index 8fce0193..baea8795 100644 --- a/workflow/rules/map.smk +++ b/workflow/rules/map.smk @@ -11,7 +11,7 @@ rule minimap2_bam: params: extra=get_minimap2_mapping_params, # optional sorting="coordinate", # optional: Enable sorting. Possible values: 'none', 'queryname' or 'coordinate' - sort_extra=lambda wc, threads: f"-@ {min(threads, 4)}", # optional: extra arguments for samtools/picard + sort_extra=lambda wc, threads: f"-@ {min(threads , 4)}", # optional: extra arguments for samtools/picard threads: workflow.cores // 2 wrapper: "v1.25.0/bio/minimap2/aligner" @@ -27,9 +27,11 @@ rule merge_fastqs: wildcard_constraints: read="single|R1|R2", params: - cmd=lambda wc: "pigz -dc" - if (any(map(lambda f: f.endswith(".gz"), get_fastqs(wc)))) - else "cat", + cmd=lambda wc: ( + "pigz -dc" + if (any(map(lambda f: f.endswith(".gz"), get_fastqs(wc)))) + else "cat" + ), conda: "../envs/pigz.yaml" shell: From 7ad5117af0387016e59d12de46cf3c7b8e28943a Mon Sep 17 00:00:00 2001 From: tedil Date: Thu, 29 Feb 2024 12:04:25 +0100 Subject: [PATCH 6/6] include link tto ensembl ftp --- config/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.yaml b/config/config.yaml index 9f388f63..f25e40a6 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -8,7 +8,7 @@ reference: build: GRCh38 # Ensembl release release: 107 - # flavor: "" # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP. + # flavor: "" # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP: https://ftp.ensembl.org/pub/release-107/gtf/homo_sapiens/ # branch: "" # optional # for available downloads, please browse either of these views: