Skip to content

Commit ba84443

Browse files
authored
feat: adding files for varfish-server-worker (#50) (#51)
1 parent e899051 commit ba84443

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+925
-156
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,4 +86,4 @@ black:
8686
.PHONY: run-snakefmt
8787
run-snakefmt:
8888
snakefmt --line-length 100 Snakefile
89-
snakefmt --line-length 100 rules/*/*/*.smk rules/*/*/*.smk
89+
snakefmt --line-length 100 rules/*/*.smk rules/*/*/*.smk rules/*/*/*/*.smk

Snakefile

Lines changed: 68 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from varfish_db_downloader.versions import (
1010
DATA_VERSIONS as DV,
1111
PACKAGE_VERSIONS as PV,
12+
FORCE_TODAY,
1213
TODAY,
1314
RUNS_IN_CI,
1415
)
@@ -90,7 +91,6 @@ rule all:
9091
f"work/genes/entrez/{DV.today}/gene_info.jsonl",
9192
f"work/genes/gnomad/{DV.gnomad_constraints}/gnomad_constraints.tsv",
9293
f"work/genes/hgnc/{DV.today}/hgnc_info.jsonl",
93-
f"work/genes/mim2gene/{DV.today}/mim2gene.tsv",
9494
# reference-specific annotations
9595
# -- background/population sequence variants and annotations thereof
9696
# ---- GRCh37
@@ -157,41 +157,67 @@ rule all:
157157
# ----- genes
158158
f"output/full/annonars/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.today}+{PV.worker}/rocksdb/IDENTITY",
159159
# -- worker data
160-
# ----- Genes
161-
f"output/full/worker/genes-xlink-{DV.today}/genes-xlink.tsv",
162-
f"output/full/worker/genes-txs-grch37-{DV.mehari_tx}/mehari-data-txs-grch37-{DV.mehari_tx}.bin.zst",
163-
f"output/full/worker/genes-txs-grch38-{DV.mehari_tx}/mehari-data-txs-grch38-{DV.mehari_tx}.bin.zst",
160+
f"output/full/worker/genes-regions-grch37-{DV.refseq_37}+{PV.worker}/refseq_genes.bin",
161+
f"output/full/worker/genes-regions-grch37-{DV.ensembl_37}+{PV.worker}/ensembl_genes.bin",
162+
f"output/full/worker/genes-regions-grch38-{DV.refseq_38}+{PV.worker}/refseq_genes.bin",
163+
f"output/full/worker/genes-regions-grch38-{DV.ensembl_38}+{PV.worker}/ensembl_genes.bin",
164+
f"output/full/worker/genes-xlink-{DV.today}+{PV.worker}/genes-xlink.bin",
165+
f"output/full/worker/acmg-sf-{DV.acmg_sf}+{PV.worker}/acmg_sf.tsv",
166+
f"output/full/worker/mim2gene-{DV.today}+{PV.worker}/mim2gene.tsv",
167+
f"output/full/worker/masked-repeat-grch37-{DV.ucsc_rmsk_37}+{PV.worker}/masked-repeat.bin",
168+
f"output/full/worker/masked-repeat-grch38-{DV.ucsc_rmsk_38}+{PV.worker}/masked-repeat.bin",
169+
f"output/full/worker/masked-segdup-grch37-{DV.ucsc_genomic_super_dups_37}+{PV.worker}/masked-segdup.bin",
170+
f"output/full/worker/masked-segdup-grch38-{DV.ucsc_genomic_super_dups_38}+{PV.worker}/masked-segdup.bin",
171+
f"output/full/worker/bgdb-dbvar-grch37-{DV.dbvar}+{PV.worker}/bgdb-dbvar.bin",
172+
f"output/full/worker/bgdb-dbvar-grch38-{DV.dbvar}+{PV.worker}/bgdb-dbvar.bin",
173+
f"output/full/worker/bgdb-dgv-grch37-{DV.dgv}+{PV.worker}/bgdb-dgv.bin",
174+
f"output/full/worker/bgdb-dgv-grch38-{DV.dgv}+{PV.worker}/bgdb-dgv.bin",
175+
f"output/full/worker/bgdb-dgv-gs-grch37-{DV.dgv}+{PV.worker}/bgdb-dgv-gs.bin",
176+
f"output/full/worker/bgdb-dgv-gs-grch38-{DV.dgv}+{PV.worker}/bgdb-dgv-gs.bin",
177+
f"output/full/worker/bgdb-gnomad-grch37-{DV.gnomad_sv}+{PV.worker}/bgdb-gnomad.bin",
178+
f"output/full/worker/bgdb-exac-grch37-{DV.exac_cnv}+{PV.worker}/bgdb-exac.bin",
179+
f"output/full/worker/bgdb-g1k-grch37-{DV.g1k_svs}+{PV.worker}/bgdb-g1k.bin",
180+
f"output/full/worker/clinvar-strucvars-grch37-{DV.clinvar_version}+{PV.worker}/clinvar-strucvars.bin",
181+
f"output/full/worker/clinvar-strucvars-grch38-{DV.clinvar_version}+{PV.worker}/clinvar-strucvars.bin",
182+
f"output/full/worker/patho-mms-grch37-{DV.patho_mms}+{PV.worker}/patho-mms.bed",
183+
f"output/full/worker/patho-mms-grch38-{DV.patho_mms}+{PV.worker}/patho-mms.bed",
184+
"output/full/worker/tads-grch37-dixon2015/hesc.bed",
185+
"output/full/worker/tads-grch38-dixon2015/hesc.bed",
186+
# -- mehari data
187+
f"output/full/mehari/genes-xlink-{DV.today}/genes-xlink.tsv",
188+
f"output/full/mehari/genes-txs-grch37-{DV.mehari_tx}/mehari-data-txs-grch37-{DV.mehari_tx}.bin.zst",
189+
f"output/full/mehari/genes-txs-grch38-{DV.mehari_tx}/mehari-data-txs-grch38-{DV.mehari_tx}.bin.zst",
164190
# ----- HPO
165191
f"output/full/viguno/hpo-{DV.hpo}+{PV.viguno}/hp.obo",
166192
f"output/full/viguno/hpo-{DV.hpo}+{PV.viguno}/phenotype.hpoa",
167193
f"output/full/viguno/hpo-{DV.hpo}+{PV.viguno}/phenotype_to_genes.txt",
168194
f"output/full/viguno/hpo-{DV.hpo}+{PV.viguno}/hpo.bin",
169195
f"output/full/viguno/hpo-{DV.hpo}+{PV.viguno}/scores-fun-sim-avg-resnik-gene/IDENTITY",
170196
# ----- background/population structural variants and annotations thereof
171-
f"output/full/worker/annos/strucvars/dbvar-grch37-{DV.dbvar}/dbvar.bed.gz",
172-
f"output/full/worker/annos/strucvars/dbvar-grch38-{DV.dbvar}/dbvar.bed.gz",
173-
f"output/full/worker/annos/strucvars/dgv-grch37-{DV.dgv}/dgv.bed.gz",
174-
f"output/full/worker/annos/strucvars/dgv-grch38-{DV.dgv}/dgv.bed.gz",
175-
f"output/full/worker/annos/strucvars/dgv-gs-grch37-{DV.dgv_gs}/dgv-gs.bed.gz",
176-
f"output/full/worker/annos/strucvars/dgv-gs-grch38-{DV.dgv_gs}/dgv-gs.bed.gz",
177-
f"output/full/worker/annos/strucvars/exac-grch37-{DV.exac_cnv}/exac.bed.gz",
178-
f"output/full/worker/annos/strucvars/g1k-grch37-{DV.g1k_svs}/g1k.bed.gz",
179-
f"output/full/worker/annos/strucvars/gnomad-grch37-{DV.gnomad_sv}/gnomad.bed.gz",
197+
f"output/full/tracks/track-strucvars-dbvar-grch37-{DV.dbvar}+{DV.tracks}/dbvar.bed.gz",
198+
f"output/full/tracks/track-strucvars-dbvar-grch38-{DV.dbvar}+{DV.tracks}/dbvar.bed.gz",
199+
f"output/full/tracks/track-strucvars-dgv-grch37-{DV.dgv}+{DV.tracks}/dgv.bed.gz",
200+
f"output/full/tracks/track-strucvars-dgv-grch38-{DV.dgv}+{DV.tracks}/dgv.bed.gz",
201+
f"output/full/tracks/track-strucvars-dgv-gs-grch37-{DV.dgv_gs}+{DV.tracks}/dgv-gs.bed.gz",
202+
f"output/full/tracks/track-strucvars-dgv-gs-grch38-{DV.dgv_gs}+{DV.tracks}/dgv-gs.bed.gz",
203+
f"output/full/tracks/track-strucvars-exac-grch37-{DV.exac_cnv}+{DV.tracks}/exac.bed.gz",
204+
f"output/full/tracks/track-strucvars-g1k-grch37-{DV.g1k_svs}+{DV.tracks}/g1k.bed.gz",
205+
f"output/full/tracks/track-strucvars-gnomad-grch37-{DV.gnomad_sv}+{DV.tracks}/gnomad.bed.gz",
180206
# ----- known pathogenic MMS
181-
f"output/full/worker/annos/strucvars/patho-mms-grch37-{DV.patho_mms}/patho-mms.bed",
182-
f"output/full/worker/annos/strucvars/patho-mms-grch38-{DV.patho_mms}/patho-mms.bed",
207+
f"output/full/tracks/track-strucvars-patho-mms-grch37-{DV.patho_mms}+{DV.tracks}/patho-mms.bed",
208+
f"output/full/tracks/track-strucvars-patho-mms-grch38-{DV.patho_mms}+{DV.tracks}/patho-mms.bed",
183209
# ----- problematic regions (rmsk, genomicSuperDups, altSeqLiftOverPsl, fixSeqLiftOverPsl)
184-
f"output/full/worker/annos/features/ucsc-genomicsuperdups-grch37-{DV.ucsc_genomic_super_dups_37}/genomicSuperDups.bed.gz",
185-
f"output/full/worker/annos/features/ucsc-genomicsuperdups-grch38-{DV.ucsc_genomic_super_dups_38}/genomicSuperDups.bed.gz",
186-
f"output/full/worker/annos/features/ucsc-rmsk-grch37-{DV.ucsc_rmsk_37}/rmsk.bed.gz",
187-
f"output/full/worker/annos/features/ucsc-rmsk-grch38-{DV.ucsc_rmsk_38}/rmsk.bed.gz",
188-
f"output/full/worker/annos/features/ucsc-altseqliftoverpsl-grch37-{DV.ucsc_alt_seq_liftover_37}/altSeqLiftOverPsl.bed.gz",
189-
f"output/full/worker/annos/features/ucsc-altseqliftoverpsl-grch38-{DV.ucsc_alt_seq_liftover_38}/altSeqLiftOverPsl.bed.gz",
190-
f"output/full/worker/annos/features/ucsc-fixseqliftoverpsl-grch37-{DV.ucsc_fix_seq_liftover_37}/fixSeqLiftOverPsl.bed.gz",
191-
f"output/full/worker/annos/features/ucsc-fixseqliftoverpsl-grch38-{DV.ucsc_fix_seq_liftover_38}/fixSeqLiftOverPsl.bed.gz",
210+
f"output/full/tracks/track-features-ucsc-genomicsuperdups-grch37-{DV.ucsc_genomic_super_dups_37}+{DV.tracks}/genomicSuperDups.bed.gz",
211+
f"output/full/tracks/track-features-ucsc-genomicsuperdups-grch38-{DV.ucsc_genomic_super_dups_38}+{DV.tracks}/genomicSuperDups.bed.gz",
212+
f"output/full/tracks/track-features-ucsc-rmsk-grch37-{DV.ucsc_rmsk_37}+{DV.tracks}/rmsk.bed.gz",
213+
f"output/full/tracks/track-features-ucsc-rmsk-grch38-{DV.ucsc_rmsk_38}+{DV.tracks}/rmsk.bed.gz",
214+
f"output/full/tracks/track-features-ucsc-altseqliftoverpsl-grch37-{DV.ucsc_alt_seq_liftover_37}+{DV.tracks}/altSeqLiftOverPsl.bed.gz",
215+
f"output/full/tracks/track-features-ucsc-altseqliftoverpsl-grch38-{DV.ucsc_alt_seq_liftover_38}+{DV.tracks}/altSeqLiftOverPsl.bed.gz",
216+
f"output/full/tracks/track-features-ucsc-fixseqliftoverpsl-grch37-{DV.ucsc_fix_seq_liftover_37}+{DV.tracks}/fixSeqLiftOverPsl.bed.gz",
217+
f"output/full/tracks/track-features-ucsc-fixseqliftoverpsl-grch38-{DV.ucsc_fix_seq_liftover_38}+{DV.tracks}/fixSeqLiftOverPsl.bed.gz",
192218
# ----- tads
193-
"output/full/worker/annos/strucvars/tads-grch37-dixon2015/hesc.bed",
194-
"output/full/worker/annos/strucvars/tads-grch38-dixon2015/hesc.bed",
219+
f"output/full/tracks/track-tads-grch37-dixon2015+{DV.tracks}/hesc.bed",
220+
f"output/full/tracks/track-tads-grch38-dixon2015+{DV.tracks}/hesc.bed",
195221
#
196222
# == development (reduced data) directories =============================================
197223
#
@@ -221,6 +247,9 @@ rule all:
221247
f"output/reduced-dev/annonars/gnomad-exomes-grch38-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
222248
f"output/reduced-dev/annonars/gnomad-genomes-grch37-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
223249
f"output/reduced-dev/annonars/gnomad-genomes-grch38-{DV.gnomad_v3}+{PV.annonars}/rocksdb/IDENTITY",
250+
# -- mehari
251+
f"output/reduced-dev/mehari/freqs-grch37-{DV.gnomad_v2}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
252+
f"output/reduced-dev/mehari/freqs-grch38-{DV.gnomad_v3}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
224253
#
225254
# == exomes (reduced data) directories ==================================================
226255
#
@@ -250,6 +279,9 @@ rule all:
250279
f"output/reduced-exomes/annonars/gnomad-exomes-grch38-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
251280
f"output/reduced-exomes/annonars/gnomad-genomes-grch37-{DV.gnomad_v2}+{PV.annonars}/rocksdb/IDENTITY",
252281
f"output/reduced-exomes/annonars/gnomad-genomes-grch38-{DV.gnomad_v3}+{PV.annonars}/rocksdb/IDENTITY",
282+
# -- mehari
283+
f"output/reduced-exomes/mehari/freqs-grch37-{DV.gnomad_v2}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
284+
f"output/reduced-exomes/mehari/freqs-grch38-{DV.gnomad_v3}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
253285

254286

255287
# ===============================================================================================
@@ -289,6 +321,7 @@ include: "rules/work/annos/strucvars/dgv.smk"
289321
include: "rules/work/annos/strucvars/exac.smk"
290322
include: "rules/work/annos/strucvars/g1k.smk"
291323
include: "rules/work/annos/strucvars/gnomad.smk"
324+
include: "rules/work/annos/strucvars/clinvar.smk"
292325
# -- output directory ---------------------------------------------------------------------------
293326
# ---- mehari
294327
include: "rules/output/mehari/freqs.smk"
@@ -306,10 +339,18 @@ include: "rules/output/annonars/gnomad_mtdna.smk"
306339
include: "rules/output/annonars/helix.smk"
307340
include: "rules/output/annonars/genes.smk"
308341
# ---- worker
309-
# ------ global
310342
include: "rules/output/worker/patho_mms.smk"
343+
include: "rules/output/worker/clinvar.smk"
344+
include: "rules/output/worker/genes_regions.smk"
345+
include: "rules/output/worker/hgnc.smk"
346+
include: "rules/output/worker/acmg.smk"
347+
include: "rules/output/worker/mim2gene.smk"
348+
include: "rules/output/worker/masked.smk"
349+
include: "rules/output/worker/bgdb.smk"
350+
include: "rules/output/worker/tads.smk"
311351
# -- reduced output directory (dev/exomes) ------------------------------------------------------
312352
# ---- bed file
313353
include: "rules/reduced/annonars.smk"
314354
include: "rules/reduced/hpo.smk"
315355
include: "rules/reduced/targets.smk"
356+
include: "rules/reduced/mehari.smk"

data/acmg/3.1/acmg.spec.yaml renamed to data/acmg_sf/3.1/acmg_sf.spec.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
dc.format: text/tsv
2-
dc.identifier: genes/acmg/sf:3.1
2+
dc.identifier: varfish-server-worker/acmg_sf:3.1
33
dc.title: ACMG Secondary Findings (SF) Gene List (v3.1)
44
dc.description: >
55
This is version 3.1 of the ACMG gene list for reporting incidental
File renamed without changes.

data/patho-mms/20220730/patho-mms-grch38.spec.yaml

Lines changed: 0 additions & 25 deletions
This file was deleted.

download_urls.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
- url: https://github.com/bihealth/annonars-data-clinvar/releases/download/clinvar-weekly-20230625/clinvar-strucvar-grch37-2023-0625+0.6.3.tar.gz
2+
excerpt_strategy:
3+
strategy: no-excerpt
4+
count: null
5+
- url: https://github.com/bihealth/annonars-data-clinvar/releases/download/clinvar-weekly-20230625/clinvar-strucvar-grch38-2023-0625+0.6.3.tar.gz
6+
excerpt_strategy:
7+
strategy: no-excerpt
8+
count: null
9+
110
- url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.2.2/mehari-data-txs-grch37-0.2.2.bin.zst
211
excerpt_strategy:
312
strategy: no-excerpt

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,6 @@ dependencies:
4343
- annonars =0.12.7
4444
- viguno =0.1.6
4545
- mehari =0.5.7
46-
- varfish-server-worker =0.7.0
46+
- varfish-server-worker =0.8.0
4747
# S3 uploads
4848
- s5cmd =2.1.0
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:93654972056197cfa595d79d33469a68489e70292fe55efe66f2346b9a721dd3
3+
size 9853972

excerpt-data/2c4af2ee68c51be6/url.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:3fbbb56c28633c107e6bc208ead69ba07444a2022f5ebce2a8b4e961482da2b9
3+
size 139
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:dd9b82f62a8bf5087865936c880713b130f4bd89e4f83b57d009eba5c56daa6a
3+
size 25515630

0 commit comments

Comments
 (0)