varfish-org
diff --git a/‎Snakefile
Lines changed: 13 additions & 9 deletions b/‎Snakefile
Lines changed: 13 additions & 9 deletions
diff --git a/‎data/acmg/3.1/acmg.spec.json
Lines changed: 0 additions & 21 deletions b/‎data/acmg/3.1/acmg.spec.json
Lines changed: 0 additions & 21 deletions
diff --git a/‎data/acmg/3.1/acmg.spec.yaml
Lines changed: 40 additions & 0 deletions b/‎data/acmg/3.1/acmg.spec.yaml
Lines changed: 40 additions & 0 deletions
diff --git a/‎data/patho-mms/20220730/patho-mms-grch37.bed.spec.json
Lines changed: 0 additions & 21 deletions b/‎data/patho-mms/20220730/patho-mms-grch37.bed.spec.json
Lines changed: 0 additions & 21 deletions
diff --git a/‎data/patho-mms/20220730/patho-mms-grch37.spec.yaml
Lines changed: 25 additions & 0 deletions b/‎data/patho-mms/20220730/patho-mms-grch37.spec.yaml
Lines changed: 25 additions & 0 deletions
diff --git a/‎data/patho-mms/20220730/patho-mms-grch38.bed.spec.json
Lines changed: 0 additions & 21 deletions b/‎data/patho-mms/20220730/patho-mms-grch38.bed.spec.json
Lines changed: 0 additions & 21 deletions
diff --git a/‎data/patho-mms/20220730/patho-mms-grch38.spec.yaml
Lines changed: 25 additions & 0 deletions b/‎data/patho-mms/20220730/patho-mms-grch38.spec.yaml
Lines changed: 25 additions & 0 deletions
diff --git a/‎data/spec-tpl.yaml
Lines changed: 17 additions & 0 deletions b/‎data/spec-tpl.yaml
Lines changed: 17 additions & 0 deletions
diff --git a/‎rules/output/annonars/cadd.smk
Lines changed: 17 additions & 2 deletions b/‎rules/output/annonars/cadd.smk
Lines changed: 17 additions & 2 deletions
diff --git a/‎rules/output/annonars/cadd.spec.yaml
Lines changed: 19 additions & 0 deletions b/‎rules/output/annonars/cadd.spec.yaml
Lines changed: 19 additions & 0 deletions
@@ -6,7 +6,12 @@
 # ``varfish-server-worker`` and is used in the backend for filtering and/or exposed to the
 # user via a REST API.
 
-from varfish_db_downloader.versions import DATA_VERSIONS as DV, PACKAGE_VERSIONS as PV
+from varfish_db_downloader.versions import (
+    DATA_VERSIONS as DV,
+    PACKAGE_VERSIONS as PV,
+    TODAY,
+    RUNS_IN_CI,
+)
 
 # The prefix to use for all shell commands.
 SHELL_PREFIX = "export LC_ALL=C; set -x -euo pipefail;"
@@ -22,16 +27,11 @@ RE_VERSION = r"\w+(\.\w+)*"
 # Test Mode
 # ===============================================================================================
 
-import os
-
 # Activate test mode by prepending the path to the "test-mode-bin" directory to the PATH.
-if os.environ.get("CI", "false").lower() == "true":
+if RUNS_IN_CI:
     cwd = os.getcwd()
     old_path = os.environ["PATH"]
     os.environ["PATH"] = f"{cwd}/test-mode-bin:{old_path}"
-    RUNS_IN_CI = True
-else:
-    RUNS_IN_CI = False
 
 
 # ===============================================================================================
@@ -115,7 +115,8 @@ rule all:
         # ---- frequencies (via annonars)
         f"output/mehari/freqs-grch37-{DV.gnomad_v2}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
         f"output/mehari/freqs-grch38-{DV.gnomad_v3}+{DV.gnomad_v2}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
-        # ---- annonars data
+        # -- annonars data
+        # ----- sequence variant annotations
         f"output/annonars/cadd-grch37-{DV.cadd}+{PV.annonars}/rocksdb/IDENTITY",
         f"output/annonars/cadd-grch38-{DV.cadd}+{PV.annonars}/rocksdb/IDENTITY",
         f"output/annonars/dbsnp-grch37-{DV.dbsnp}+{PV.annonars}/rocksdb/IDENTITY",
@@ -134,10 +135,13 @@ rule all:
         f"output/annonars/gnomad-genomes-grch38-{DV.gnomad_v3}+{PV.annonars}/rocksdb/IDENTITY",
         f"output/annonars/helixmtdb-grch37-{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
         f"output/annonars/helixmtdb-grch38-{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
+        # ----- conservation
         f"output/annonars/cons-grch37-{DV.ucsc_cons_37}+{PV.annonars}/rocksdb/IDENTITY",
         f"output/annonars/cons-grch38-{DV.ucsc_cons_38}+{PV.annonars}/rocksdb/IDENTITY",
-        # ----- Genes
+        # ----- genes
         f"output/worker/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.today}+{PV.worker}/rocksdb/IDENTITY",
+        # -- worker data
+        # ----- Genes
         f"output/worker/genes-xlink-{DV.today}/genes-xlink.tsv",
         f"output/worker/genes-txs-grch37-{DV.mehari_tx}/mehari-data-txs-grch37-{DV.mehari_tx}.bin.zst",
         f"output/worker/genes-txs-grch38-{DV.mehari_tx}/mehari-data-txs-grch38-{DV.mehari_tx}.bin.zst",
 
@@ -0,0 +1,40 @@
+dc.format: text/tsv
+dc.identifier: genes/acmg/sf:3.1
+dc.title: ACMG Secondary Findings (SF) Gene List (v3.1)
+dc.description: >
+  This is version 3.1 of the ACMG gene list for reporting incidental
+  findings.  The file was curated from PMID:35802134 as gene symbols
+  and then translated to ENSEMBL and Entrez/NCBI gene ID with the
+  HGNC BioMart
+dc.date: 2022-02-03
+dc.creator: American Collect of Medical Genetics
+dc.contributor:
+  - VarFish Developer Team
+dc.source:
+  - PMID:35802134
+  - https://www.ncbi.nlm.nih.gov/clinvar/docs/acmg/
+  - https://biomart.genenames.org/
+
+tsv.columns:
+  - name: hgnc_id
+    description: HGNC gene ID.
+  - name: ensembl_gene_id
+    description: ENSEMBL gene ID.
+  - name: ncbi_gene_id
+    description: NCBI Gene ID.
+  - name: gene_symbol
+    description: HGNC approved gene symbol.
+  - name: mim_gene_id
+    description: OMIM gene ID.
+  - name: disease_phenotype
+    description: Name of the relevant disorder(s).
+  - name: disorder_mim
+    description: MIM code of the relevant disorder(s).s
+  - name: phenotype_category
+    description: Phenotype category.
+  - name: inheritance
+    description: Mode(s) of inheritance.
+  - name: sf_list_version
+    description: ACMG SF list version that this gene first appeared in.
+  - name: variants_to_report
+    description: comment on which variants are to be reported.
@@ -0,0 +1,25 @@
+dc.format: text/tsv
+dc.identifier: features/patho-mms:wetzel-darbro-2022/grch37
+dc.title: >
+  A comprehensive list of human microdeletion and microduplication syndromes
+  (Wetzel & Darbro, 2022) for GRCh37.
+dc.description: >
+  This TSV file contains regions with microdeletion and microduplication
+  symbols as described by Wetzel & Darbro (2022).
+dc.date: 2022-07-30
+dc.creator: Wetzel & Darbro (2022)
+dc.contributor:
+  - VarFish Developer Team
+dc.source:
+  - PMID:36435749
+  - https://github.com/aswetzel/MMS
+
+tsv.columns:
+  - name: chrom
+    description: Chromosome name without chr prefix.
+  - name: begin
+    description: 0-based start position.
+  - name: end
+    description: 0-based end position.
+  - name: name
+    description: Name of the syndrome.
@@ -0,0 +1,25 @@
+dc.format: text/tsv
+dc.identifier: features/patho-mms:wetzel-darbro-2022/grch38
+dc.title: >
+  A comprehensive list of human microdeletion and microduplication syndromes
+  (Wetzel & Darbro, 2022) for GRCh38.
+dc.description: >
+  This TSV file contains regions with microdeletion and microduplication
+  symbols as described by Wetzel & Darbro (2022).
+dc.date: 2022-07-30
+dc.creator: Wetzel & Darbro (2022)
+dc.contributor:
+  - VarFish Developer Team
+dc.source:
+  - PMID:36435749
+  - https://github.com/aswetzel/MMS
+
+tsv.columns:
+  - name: chrom
+    description: Chromosome name chr prefix.
+  - name: begin
+    description: 0-based start position.
+  - name: end
+    description: 0-based end position.
+  - name: name
+    description: Name of the syndrome.
@@ -0,0 +1,17 @@
+dc.format: THE__FORMAT
+dc.identifier: THE__IDENTIFIER
+dc.title: >
+  THE__TITLE
+dc.description: >
+  THE__DESCRIPTION
+dc.date: THE__DATE
+dc.creator: THE__CREATOR
+dc.contributor:
+  - VarFish Developer Team
+dc.source:
+  - THE__SOURCE
+  - THE__SOURCE
+
+tsv.columns:
+  - name: THE__NAME
+    description: THE__DESCRIPTIOn
@@ -49,7 +49,10 @@ rule output_annonars_cadd:  # -- build CADD RocksDB with annonars
     input:
         unpack(input_output_annonars_cadd),
     output:
-        "output/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/rocksdb/IDENTITY",
+        rocksdb_identity=(
+            "output/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/rocksdb/IDENTITY"
+        ),
+        spec_yaml=("output/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/spec.yaml"),
     threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
     resources:
         runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
@@ -63,7 +66,7 @@ rule output_annonars_cadd:  # -- build CADD RocksDB with annonars
         annonars tsv import \
             --path-in-tsv {input.indels} \
             --path-in-tsv {input.snvs} \
-            --path-out-rocksdb $(dirname {output}) \
+            --path-out-rocksdb $(dirname {output.rocksdb_identity}) \
             \
             --col-chrom Chrom \
             --col-start Pos \
@@ -78,4 +81,16 @@ rule output_annonars_cadd:  # -- build CADD RocksDB with annonars
             --skip-row-count 1 \
             --add-default-null-values \
             --path-schema-json rules/output/annonars/cadd-schema-{wildcards.genome_release}.json
+
+        varfish-db-downloader tpl \
+            --template rules/output/annonars/cadd.spec.yaml \
+            --value today={TODAY} \
+            --value genome_release={wildcards.genome_release} \
+            \
+            --value version={wildcards.v_cadd}+{wildcards.v_annonars} \
+            --value v_cadd={wildcards.v_cadd} \
+            \
+            --value v_annonars={wildcards.v_annonars} \
+            --value v_downloader={PV.downloader} \
+        > {output.spec_yaml}
         """
@@ -0,0 +1,19 @@
+dc.identifier: annonars/seqvars/cadd:{{ version }}-{{ genome_release }}
+dc.title: annona-rs CADD RocksDB Database
+dc.creator: Kircher Lab
+dc.contributor:
+  - VarFish Developer Teams
+dc.format: application/x-rocksdb
+dc.date: {{ today }}
+x-version: {{ version }}
+x-genome-release: {{ genome_release }}
+dc.description: |
+  RocksDB with the information from the CADD score TSV files in their "incl. all annotations"
+  variant using the annonars package v{{ v_annonars }} in varfish-downloader v{{ v_downloader }}.
+dc.source:
+  - PMID:33618777
+  - PMID:30371827
+  - https://cadd.gs.washington.edu/
+x-created-from:
+  - name: CADD
+    version: {{ v_cadd }}