Skip to content

Commit dd8170e

Browse files
authored
feat: add reduced development dataset (#44) (#47)
1 parent 3f70b57 commit dd8170e

34 files changed

+430
-151
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Ignore the workflow directories.
22
/work/
33
/output/
4+
/reduced-*/
45

56
# Python
67
__pycache__

Snakefile

Lines changed: 132 additions & 52 deletions
Large diffs are not rendered by default.

environment.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies:
4040
# Parallel (de)compression.
4141
- pigz
4242
# Varfish related
43-
- annonars =0.10.0
44-
- viguno =0.1.1
45-
- mehari =0.5.0
46-
- varfish-server-worker
43+
- annonars =0.12.7
44+
- viguno =0.1.6
45+
- mehari =0.5.7
46+
- varfish-server-worker =0.7.0

rules/output/annonars/cadd.smk

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,9 @@ rule output_annonars_cadd: # -- build CADD RocksDB with annonars
5050
unpack(input_output_annonars_cadd),
5151
output:
5252
rocksdb_identity=(
53-
"output/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/rocksdb/IDENTITY"
53+
"output/full/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/rocksdb/IDENTITY"
5454
),
55-
spec_yaml=("output/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/spec.yaml"),
55+
spec_yaml=("output/full/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/spec.yaml"),
5656
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
5757
resources:
5858
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),

rules/output/annonars/cons.smk

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ rule output_annonars_cons: # -- build UCSC conservation track RocksDB with anno
88
tsv="work/annos/{genome_release}/features/cons/{v_cons}/ucsc_conservation.tsv",
99
output:
1010
rocksdb_identity=(
11-
"output/annonars/cons-{genome_release}-{v_cons}+{v_annonars}/rocksdb/IDENTITY"
11+
"output/full/annonars/cons-{genome_release}-{v_cons}+{v_annonars}/rocksdb/IDENTITY"
1212
),
13-
spec_yaml=("output/annonars/cons-{genome_release}-{v_cons}+{v_annonars}/spec.yaml"),
13+
spec_yaml=("output/full/annonars/cons-{genome_release}-{v_cons}+{v_annonars}/spec.yaml"),
1414
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
1515
resources:
1616
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),

rules/output/annonars/dbnsfp.smk

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ rule output_annonars_dbnsfp: # -- build dbNSFP RocksDB with annonars
1313
input_output_annonars_dbnsfp,
1414
output:
1515
rocksdb_identity=(
16-
"output/annonars/dbnsfp-{genome_release}-{v_dbnsfp}+{v_annonars}/rocksdb/IDENTITY"
16+
"output/full/annonars/dbnsfp-{genome_release}-{v_dbnsfp}+{v_annonars}/rocksdb/IDENTITY"
1717
),
18-
spec_yaml=("output/annonars/dbnsfp-{genome_release}-{v_dbnsfp}+{v_annonars}/spec.yaml"),
18+
spec_yaml=("output/full/annonars/dbnsfp-{genome_release}-{v_dbnsfp}+{v_annonars}/spec.yaml"),
1919
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
2020
resources:
2121
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),

rules/output/annonars/dbscsnv.smk

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,11 @@ rule output_annonars_dbscsnv: # -- build dbscSNV RocksDB with annonars
1313
input_output_annonars_dbscsnv,
1414
output:
1515
rocksdb_identity=(
16-
"output/annonars/dbscsnv-{genome_release}-{v_dbscsnv}+{v_annonars}/rocksdb/IDENTITY"
16+
"output/full/annonars/dbscsnv-{genome_release}-{v_dbscsnv}+{v_annonars}/rocksdb/IDENTITY"
17+
),
18+
spec_yaml=(
19+
"output/full/annonars/dbscsnv-{genome_release}-{v_dbscsnv}+{v_annonars}/spec.yaml"
1720
),
18-
spec_yaml=("output/annonars/dbscsnv-{genome_release}-{v_dbscsnv}+{v_annonars}/spec.yaml"),
1921
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
2022
resources:
2123
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),

rules/output/annonars/dbsnp.smk

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@ rule output_annonars_dbsnp: # -- build dbSNP RocksDB with annonars
88
vcf="work/download/annos/{genome_release}/seqvars/dbsnp/{v_dbsnp}/dbsnp.vcf.gz",
99
output:
1010
rocksdb_identity=(
11-
"output/annonars/dbsnp-{genome_release}-{v_dbsnp}+{v_annonars}/rocksdb/IDENTITY"
11+
"output/full/annonars/dbsnp-{genome_release}-{v_dbsnp}+{v_annonars}/rocksdb/IDENTITY"
1212
),
13-
spec_yaml=("output/annonars/dbsnp-{genome_release}-{v_dbsnp}+{v_annonars}/spec.yaml"),
13+
spec_yaml=("output/full/annonars/dbsnp-{genome_release}-{v_dbsnp}+{v_annonars}/spec.yaml"),
1414
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
1515
resources:
1616
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
## Rules to create build worker genes database..
1+
## Rules to create build annonars genes database..
22

33

4-
rule output_worker_genes: # -- build genes protobuf file
4+
rule output_annonars_genes: # -- build annonars genes RocksDB file
55
input:
66
acmg_sf="data/acmg/{v_acmg_sf}/acmg.tsv",
77
gnomad_constraints="work/genes/gnomad/{v_gnomad_constraints}/gnomad_constraints.tsv",
@@ -10,22 +10,22 @@ rule output_worker_genes: # -- build genes protobuf file
1010
ncbi="work/genes/entrez/{date}/gene_info.jsonl",
1111
output:
1212
rocksdb_identity=(
13-
"output/worker/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{date}+{v_worker}/"
13+
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{date}+{v_annonars}/"
1414
"rocksdb/IDENTITY"
1515
),
1616
spec_yaml=(
17-
"output/worker/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{date}+{v_worker}/"
17+
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{date}+{v_annonars}/"
1818
"spec.yaml"
1919
),
2020
wildcard_constraints:
2121
v_acmg_sf=RE_VERSION,
2222
v_gnomad_constraints=RE_VERSION,
2323
v_dbnsfp=RE_VERSION,
2424
date=RE_VERSION,
25-
v_worker=RE_VERSION,
25+
v_annonars=RE_VERSION,
2626
shell:
2727
r"""
28-
varfish-server-worker db genes build \
28+
annonars gene import \
2929
--path-out-rocksdb $(dirname {output.rocksdb_identity}) \
3030
--path-in-acmg {input.acmg_sf} \
3131
--path-in-gnomad-constraints {input.gnomad_constraints} \
@@ -34,16 +34,16 @@ rule output_worker_genes: # -- build genes protobuf file
3434
--path-in-ncbi {input.ncbi}
3535
3636
varfish-db-downloader tpl \
37-
--template rules/output/worker/genes.spec.yaml \
37+
--template rules/output/annonars/genes.spec.yaml \
3838
--value today={TODAY} \
3939
\
40-
--value version={wildcards.v_acmg_sf}+{wildcards.v_gnomad_constraints}+{wildcards.v_dbnsfp}+{wildcards.date}+{wildcards.v_worker} \
40+
--value version={wildcards.v_acmg_sf}+{wildcards.v_gnomad_constraints}+{wildcards.v_dbnsfp}+{wildcards.date}+{wildcards.v_annonars} \
4141
--value v_acmg_sf={wildcards.v_acmg_sf} \
4242
--value v_gnomad_constraints={wildcards.v_gnomad_constraints} \
4343
--value v_dbnsfp={wildcards.v_dbnsfp} \
4444
--value date={wildcards.date} \
4545
\
46-
--value v_worker={wildcards.v_worker} \
46+
--value v_annonars={wildcards.v_annonars} \
4747
--value v_downloader={PV.downloader} \
4848
> {output.spec_yaml}
4949
"""

rules/output/worker/genes.spec.yaml renamed to rules/output/annonars/genes.spec.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
dc.identifier: worker/genes:{{ version }}
2-
dc.title: VarFish Worker genes database
1+
dc.identifier: annonars/genes:{{ version }}
2+
dc.title: annonars genes database
33
dc.creator: VarFish Development Team
44
dc.format: application/x-rocksdb
55
dc.date: {{ today }}
66
x-version: {{ version }}
77
dc.description: |
8-
Gene information from the following databases, aggregated using VarFish worker
9-
v{{ v_worker }} in varfish-downloader v{{ v_downloader }}:
8+
Gene information from the following databases, aggregated using annonars
9+
v{{ v_annonars }} in varfish-downloader v{{ v_downloader }}:
1010
1111
- ACMG Supplementary Findings Gene List {{ v_acmg_sf }}
1212
- gnomAD constraints v{{ v_gnomad_constraints }}

0 commit comments

Comments
 (0)