6
6
# ``varfish-server-worker`` and is used in the backend for filtering and/or exposed to the
7
7
# user via a REST API.
8
8
9
- from varfish_db_downloader .data_versions import DATA_VERSIONS as DV
9
+ from varfish_db_downloader .versions import DATA_VERSIONS as DV , PACKAGE_VERSIONS as PV
10
10
11
11
# The prefix to use for all shell commands.
12
12
SHELL_PREFIX = "export LC_ALL=C; set -x -euo pipefail;"
13
13
# Setup the shell prefix by default.
14
14
shell .prefix (SHELL_PREFIX )
15
15
16
+ # Regular expression for genome release.
17
+ RE_GENOME = r"grch(37|38)"
18
+ # Regular expression for versions.
19
+ RE_VERSION = r"\w+(\.\w+)*"
16
20
17
21
# ===============================================================================================
18
22
# Test Mode
@@ -58,6 +62,8 @@ rule help:
58
62
## all -- run all rules
59
63
rule all :
60
64
input :
65
+ # == work directory =====================================================================
66
+ #
61
67
# genes
62
68
f"work/genes/dbnsfp/{ DV .dbnsfp } /genes.tsv.gz" ,
63
69
f"work/genes/ensembl/{ DV .ensembl } /ensembl_xlink.tsv" ,
@@ -76,20 +82,20 @@ rule all:
76
82
f"work/download/annos/grch37/seqvars/dbnsfp/{ DV .dbnsfp } c/LICENSE.txt" ,
77
83
f"work/download/annos/grch37/seqvars/dbscsnv/{ DV .dbscsnv } /dbscSNV{ DV .dbscsnv } .chr1" ,
78
84
f"work/download/annos/grch37/seqvars/dbsnp/{ DV .dbsnp } /dbsnp.vcf.gz" ,
79
- "work/annos/grch37/seqvars/helixmtdb/20200327 /helixmtdb.vcf.gz" ,
85
+ f "work/annos/grch37/seqvars/helixmtdb/{ DV . helixmtdb } /helixmtdb.vcf.gz" ,
80
86
f"work/annos/grch37/seqvars/gnomad_mtdna/{ DV .gnomad_mtdna } /gnomad_mtdna.vcf.gz" ,
81
- f"work/annos/grch37/seqvars/gnomad_exomes/{ DV .gnomad_v2 } /.done" ,
82
- f"work/annos/grch37/seqvars/gnomad_genomes/{ DV .gnomad_v2 } /.done" ,
87
+ f"work/download/ annos/grch37/seqvars/gnomad_exomes/{ DV .gnomad_v2 } /.done" ,
88
+ f"work/download/ annos/grch37/seqvars/gnomad_genomes/{ DV .gnomad_v2 } /.done" ,
83
89
# ---- GRCh38
84
90
f"work/download/annos/grch38/seqvars/cadd/{ DV .cadd } /whole_genome_SNVs_inclAnno.tsv.gz" ,
85
91
f"work/download/annos/grch38/seqvars/cadd/{ DV .cadd } /gnomad.genomes.r3.0.indel_inclAnno.tsv.gz" ,
86
92
# NB: dbNSFP is dual reference (for download)
87
93
# NB: dbscSNV is dual reference (for download)
88
94
f"work/download/annos/grch37/seqvars/dbsnp/{ DV .dbsnp } /dbsnp.vcf.gz" ,
89
- "work/annos/grch38/seqvars/helixmtdb/20200327 /helixmtdb.vcf.gz" ,
95
+ f "work/annos/grch38/seqvars/helixmtdb/{ DV . helixmtdb } /helixmtdb.vcf.gz" ,
90
96
f"work/annos/grch38/seqvars/gnomad_mtdna/{ DV .gnomad_mtdna } /gnomad_mtdna.vcf.gz" ,
91
- f"work/annos/grch38/seqvars/gnomad_exomes/{ DV .gnomad_v2 } /.done" ,
92
- f"work/annos/grch38/seqvars/gnomad_genomes/{ DV .gnomad_v3 } /.done" ,
97
+ f"work/download/ annos/grch38/seqvars/gnomad_exomes/{ DV .gnomad_v2 } /.done" ,
98
+ f"work/download/ annos/grch38/seqvars/gnomad_genomes/{ DV .gnomad_v3 } /.done" ,
93
99
# -- background/population structural variants and annoations thereof
94
100
# ---- GRCh37
95
101
f"work/annos/grch37/strucvars/dbvar/{ DV .dbvar } /dbvar.bed.gz" ,
@@ -122,38 +128,87 @@ rule all:
122
128
f"work/annos/grch38/features/ucsc/{ DV .ucsc_rmsk_38 } /rmsk.bed.gz" ,
123
129
f"work/annos/grch38/features/ucsc/{ DV .ucsc_alt_seq_liftover_38 } /altSeqLiftOverPsl.bed.gz" ,
124
130
f"work/annos/grch38/features/ucsc/{ DV .ucsc_fix_seq_liftover_38 } /fixSeqLiftOverPsl.bed.gz" ,
131
+ #
132
+ # == output directory ===================================================================
133
+ #
134
+ # -- mehari data
135
+ # ---- frequencies (via annonars)
136
+ f"output/mehari/freqs-grch37-{ DV .gnomad_v2 } +{ DV .gnomad_v2 } +{ DV .gnomad_mtdna } +{ DV .helixmtdb } +{ PV .annonars } /rocksdb/IDENTITY" ,
137
+ f"output/mehari/freqs-grch38-{ DV .gnomad_v3 } +{ DV .gnomad_v2 } +{ DV .gnomad_mtdna } +{ DV .helixmtdb } +{ PV .annonars } /rocksdb/IDENTITY" ,
138
+ # -- varfish-server-worker data
139
+ # ---- CADD
140
+ f"output/worker/annos/seqvars/cadd-grch37-{ DV .cadd } +{ PV .annonars } /rocksdb/IDENTITY" ,
141
+ f"output/worker/annos/seqvars/cadd-grch38-{ DV .cadd } +{ PV .annonars } /rocksdb/IDENTITY" ,
142
+ # ---- dbSNP
143
+ f"output/worker/annos/seqvars/dbsnp-grch37-{ DV .dbsnp } +{ PV .annonars } /rocksdb/IDENTITY" ,
144
+ f"output/worker/annos/seqvars/dbsnp-grch38-{ DV .dbsnp } +{ PV .annonars } /rocksdb/IDENTITY" ,
145
+ # ---- dbNSFP
146
+ f"output/worker/annos/seqvars/dbnsfp-grch37-{ DV .dbnsfp } a+{ PV .annonars } /rocksdb/IDENTITY" ,
147
+ f"output/worker/annos/seqvars/dbnsfp-grch38-{ DV .dbnsfp } a+{ PV .annonars } /rocksdb/IDENTITY" ,
148
+ f"output/worker/annos/seqvars/dbnsfp-grch37-{ DV .dbnsfp } c+{ PV .annonars } /rocksdb/IDENTITY" ,
149
+ f"output/worker/annos/seqvars/dbnsfp-grch38-{ DV .dbnsfp } c+{ PV .annonars } /rocksdb/IDENTITY" ,
150
+ # ---- dbscSNV
151
+ f"output/worker/annos/seqvars/dbscsnv-grch37-{ DV .dbscsnv } +{ PV .annonars } /rocksdb/IDENTITY" ,
152
+ f"output/worker/annos/seqvars/dbscsnv-grch38-{ DV .dbscsnv } +{ PV .annonars } /rocksdb/IDENTITY" ,
153
+ # ---- gnomAD mtDNA
154
+ f"output/worker/annos/seqvars/gnomad-mtdna-grch37-{ DV .gnomad_mtdna } +{ PV .annonars } /rocksdb/IDENTITY" ,
155
+ f"output/worker/annos/seqvars/gnomad-mtdna-grch38-{ DV .gnomad_mtdna } +{ PV .annonars } /rocksdb/IDENTITY" ,
156
+ # ---- gnomAD exomes
157
+ f"output/worker/annos/seqvars/gnomad-exomes-grch37-{ DV .gnomad_v2 } +{ PV .annonars } /rocksdb/IDENTITY" ,
158
+ f"output/worker/annos/seqvars/gnomad-exomes-grch38-{ DV .gnomad_v2 } +{ PV .annonars } /rocksdb/IDENTITY" ,
159
+ # ---- gnomAD genomes
160
+ f"output/worker/annos/seqvars/gnomad-genomes-grch37-{ DV .gnomad_v2 } +{ PV .annonars } /rocksdb/IDENTITY" ,
161
+ f"output/worker/annos/seqvars/gnomad-genomes-grch38-{ DV .gnomad_v3 } +{ PV .annonars } /rocksdb/IDENTITY" ,
162
+ # ---- HelixMtDb
163
+ f"output/worker/annos/seqvars/helixmtdb-grch37-{ DV .helixmtdb } +{ PV .annonars } /rocksdb/IDENTITY" ,
164
+ f"output/worker/annos/seqvars/helixmtdb-grch38-{ DV .helixmtdb } +{ PV .annonars } /rocksdb/IDENTITY" ,
165
+ # ---- UCSC conservation
166
+ f"output/worker/annos/seqvars/cons-grch37-{ DV .ucsc_cons_37 } +{ PV .annonars } /rocksdb/IDENTITY" ,
167
+ f"output/worker/annos/seqvars/cons-grch38-{ DV .ucsc_cons_38 } +{ PV .annonars } /rocksdb/IDENTITY" ,
125
168
126
169
127
170
# ===============================================================================================
128
171
# Modular Snakefile Includes
129
172
# ===============================================================================================
130
173
131
174
175
+ # -- work directory -----------------------------------------------------------------------------
132
176
# Gene-related rules.
133
- include : "rules/genes/dbnsfp.smk"
134
- include : "rules/genes/ensembl.smk"
135
- include : "rules/genes/gnomad.smk"
136
- include : "rules/genes/hgnc.smk"
137
- include : "rules/genes/ncbi.smk"
177
+ include : "rules/work/ genes/dbnsfp.smk"
178
+ include : "rules/work/ genes/ensembl.smk"
179
+ include : "rules/work/ genes/gnomad.smk"
180
+ include : "rules/work/ genes/hgnc.smk"
181
+ include : "rules/work/ genes/ncbi.smk"
138
182
# Reference sequence--related rules.
139
- include : "rules/reference/human.smk"
183
+ include : "rules/work/ reference/human.smk"
140
184
# Features (position and not variant specific).
141
- include : "rules/annos/features/cons.smk"
142
- include : "rules/annos/features/ensembl.smk"
143
- include : "rules/annos/features/refseq.smk"
144
- include : "rules/annos/features/tads.smk"
145
- include : "rules/annos/features/ucsc.smk"
185
+ include : "rules/work/ annos/features/cons.smk"
186
+ include : "rules/work/ annos/features/ensembl.smk"
187
+ include : "rules/work/ annos/features/refseq.smk"
188
+ include : "rules/work/ annos/features/tads.smk"
189
+ include : "rules/work/ annos/features/ucsc.smk"
146
190
# Sequence variants and annotations.
147
- include : "rules/annos/seqvars/cadd.smk"
148
- include : "rules/annos/seqvars/dbnsfp.smk"
149
- include : "rules/annos/seqvars/dbscsnv.smk"
150
- include : "rules/annos/seqvars/dbsnp.smk"
151
- include : "rules/annos/seqvars/gnomad_mtdna.smk"
152
- include : "rules/annos/seqvars/gnomad_nuclear.smk"
153
- include : "rules/annos/seqvars/helix.smk"
191
+ include : "rules/work/ annos/seqvars/cadd.smk"
192
+ include : "rules/work/ annos/seqvars/dbnsfp.smk"
193
+ include : "rules/work/ annos/seqvars/dbscsnv.smk"
194
+ include : "rules/work/ annos/seqvars/dbsnp.smk"
195
+ include : "rules/work/ annos/seqvars/gnomad_mtdna.smk"
196
+ include : "rules/work/ annos/seqvars/gnomad_nuclear.smk"
197
+ include : "rules/work/ annos/seqvars/helix.smk"
154
198
# Structural variant related.
155
- include : "rules/annos/strucvars/dbvar.smk"
156
- include : "rules/annos/strucvars/dgv.smk"
157
- include : "rules/annos/strucvars/exac.smk"
158
- include : "rules/annos/strucvars/g1k.smk"
159
- include : "rules/annos/strucvars/gnomad.smk"
199
+ include : "rules/work/annos/strucvars/dbvar.smk"
200
+ include : "rules/work/annos/strucvars/dgv.smk"
201
+ include : "rules/work/annos/strucvars/exac.smk"
202
+ include : "rules/work/annos/strucvars/g1k.smk"
203
+ include : "rules/work/annos/strucvars/gnomad.smk"
204
+ # -- output directory ---------------------------------------------------------------------------
205
+ include : "rules/output/mehari/freqs.smk"
206
+ include : "rules/output/worker/cadd.smk"
207
+ include : "rules/output/worker/dbsnp.smk"
208
+ include : "rules/output/worker/dbnsfp.smk"
209
+ include : "rules/output/worker/dbscsnv.smk"
210
+ include : "rules/output/worker/gnomad_mtdna.smk"
211
+ include : "rules/output/worker/gnomad_exomes.smk"
212
+ include : "rules/output/worker/gnomad_genomes.smk"
213
+ include : "rules/output/worker/helix.smk"
214
+ include : "rules/output/worker/cons.smk"
0 commit comments