Skip to content

Commit 53e4209

Browse files
authored
feat: adding AlphaMissense scores (#60)
1 parent d42843a commit 53e4209

File tree

13 files changed

+141
-0
lines changed

13 files changed

+141
-0
lines changed

Snakefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ rule all:
8787
# genes
8888
f"work/download/genes/rcnv/2022/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz",
8989
f"work/download/genes/orphapacket/{DV.orphapacket}/orphapacket.tar.gz",
90+
"work/download/genes/alphamissense/1/AlphaMissense_gene_hg38.tsv.gz",
9091
f"work/genes/dbnsfp/{DV.dbnsfp}/genes.tsv.gz",
9192
"work/genes/decipher/v3/decipher_hi_prediction.tsv.gz",
9293
f"work/genes/ensembl/{DV.ensembl}/ensembl_xlink.tsv",
@@ -143,6 +144,8 @@ rule all:
143144
f"output/full/mehari/freqs-grch38-{DV.gnomad_v4}+{DV.gnomad_v4}+{DV.gnomad_mtdna}+{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
144145
# -- annonars data
145146
# ----- sequence variant annotations
147+
f"output/full/annonars/alphamissense-grch37-{DV.alphamissense}+{PV.annonars}/rocksdb/IDENTITY",
148+
f"output/full/annonars/alphamissense-grch38-{DV.alphamissense}+{PV.annonars}/rocksdb/IDENTITY",
146149
f"output/full/annonars/cadd-grch37-{DV.cadd}+{PV.annonars}/rocksdb/IDENTITY",
147150
f"output/full/annonars/cadd-grch38-{DV.cadd}+{PV.annonars}/rocksdb/IDENTITY",
148151
f"output/full/annonars/dbsnp-grch37-{DV.dbsnp}+{PV.annonars}/rocksdb/IDENTITY",
@@ -335,6 +338,7 @@ rule all:
335338
# Misc rules.
336339
include: "rules/work/misc/hpo.smk"
337340
# Gene-related rules.
341+
include: "rules/work/genes/alphamissense.smk"
338342
include: "rules/work/genes/dbnsfp.smk"
339343
include: "rules/work/genes/clingen.smk"
340344
include: "rules/work/genes/decipher.smk"
@@ -359,6 +363,7 @@ include: "rules/work/annos/features/refseq.smk"
359363
include: "rules/work/annos/features/tads.smk"
360364
include: "rules/work/annos/features/ucsc.smk"
361365
# Sequence variants and annotations.
366+
include: "rules/work/annos/seqvars/alphamissense.smk"
362367
include: "rules/work/annos/seqvars/cadd.smk"
363368
include: "rules/work/annos/seqvars/dbnsfp.smk"
364369
include: "rules/work/annos/seqvars/dbscsnv.smk"
@@ -380,6 +385,7 @@ include: "rules/output/mehari/freqs.smk"
380385
# ---- viguno
381386
include: "rules/output/viguno/hpo.smk"
382387
# ---- annonars
388+
include: "rules/output/annonars/alphamissense.smk"
383389
include: "rules/output/annonars/cadd.smk"
384390
include: "rules/output/annonars/cons.smk"
385391
include: "rules/output/annonars/dbnsfp.smk"

download_urls.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
- url: https://storage.googleapis.com/dm_alphamissense/AlphaMissense_hg19.tsv.gz
2+
- url: https://storage.googleapis.com/dm_alphamissense/AlphaMissense_hg38.tsv.gz
3+
- url: https://storage.googleapis.com/dm_alphamissense/AlphaMissense_gene_hg38.tsv.gz
4+
15
- url: https://ftp.ncbi.nlm.nih.gov/genomes/all/annotation_releases/9606/105.20201022/GCF_000001405.25_GRCh37.p13/GCF_000001405.25_GRCh37.p13_genomic.gff.gz
26
excerpt_strategy:
37
strategy: gz-head
@@ -77,6 +81,7 @@
7781

7882
- comment: The curation activity summary report is built in real-time.
7983
url: https://search.clinicalgenome.org/kb/reports/curation-activity-summary-report
84+
skip_upstream_check: true # does not work reliably in tests
8085

8186
- url: https://github.com/bihealth/annonars-data-clinvar/releases/download/clinvar-weekly-20230625/clinvar-strucvar-grch37-2023-0625+0.6.3.tar.gz
8287
excerpt_strategy:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:59e4ae2448406ca7d201aa2bb7508bea90bf154a1c39a59ed7df9e60aeab5799
3+
size 1126
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:b304f0ef4fecce99a54b68348ff334923254fd23d27261bf356222b81997beb7
3+
size 74
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:f962ac005fbee1508151625464b952876ced8d21231627942159d944b2b1bd39
3+
size 1569
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:5ee9e2f439cea6047377e83f67124fdca50c925bc64a588fc03af4991a608bbf
3+
size 79
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:c97ac023f6607dbd402e33dfea71118301b5d965dc758f7760dec4e3a9f40ccf
3+
size 1141
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:4fdab28332daf40f4567d25b03de660c6db2fe073ed156f7c58c332d78c12367
3+
size 74
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
## Rules to create annonars RocksDB for AlphaMissense.
2+
3+
4+
def input_output_annonars_alphamissense(wildcards):
5+
"""Input function for ``rule output_annonars_alphamissense``."""
6+
if wildcards.genome_release == "grch37":
7+
genome = "hg19"
8+
else:
9+
genome = "hg38"
10+
return f"work/download/annos/alphamissense/1/{genome}/AlphaMissense_{genome}.tsv.gz"
11+
12+
13+
rule output_annonars_alphamissense: # -- build AlphaMissense RocksDB with annonars
14+
input:
15+
input_output_annonars_alphamissense,
16+
output:
17+
rocksdb_identity=(
18+
"output/full/annonars/alphamissense-{genome_release}-{v_alphamissense}+{v_annonars}/rocksdb/IDENTITY"
19+
),
20+
spec_yaml=(
21+
"output/full/annonars/alphamissense-{genome_release}-{v_alphamissense}+{v_annonars}/spec.yaml"
22+
),
23+
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
24+
resources:
25+
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
26+
mem_mb_per_cpu=2000,
27+
wildcard_constraints:
28+
genome_release=RE_GENOME,
29+
v_alphamissense=RE_VERSION,
30+
v_annonars=RE_VERSION,
31+
shell:
32+
r"""
33+
annonars tsv import \
34+
--path-in-tsv {input} \
35+
--path-out-rocksdb $(dirname {output.rocksdb_identity}) \
36+
\
37+
--col-chrom Chrom \
38+
--col-start Pos \
39+
--col-ref Ref \
40+
--col-alt Alt \
41+
\
42+
--db-name AlphaMissense \
43+
--db-version {wildcards.v_alphamissense} \
44+
--genome-release {wildcards.genome_release} \
45+
\
46+
--inference-row-count 100000 \
47+
--skip-row-count 3 \
48+
--add-default-null-values
49+
50+
varfish-db-downloader tpl \
51+
--template rules/output/annonars/alphamissense.spec.yaml \
52+
--value today={TODAY} \
53+
--value genome_release={wildcards.genome_release} \
54+
\
55+
--value version={wildcards.v_alphamissense}+{wildcards.v_annonars} \
56+
--value v_alphamissense={wildcards.v_alphamissense} \
57+
\
58+
--value v_annonars={wildcards.v_annonars} \
59+
--value v_downloader={PV.downloader} \
60+
> {output.spec_yaml}
61+
"""
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
dc.identifier: annonars/seqvars/alphamissense:{{ version }}-{{ genome_release }}
2+
dc.title: annonars AlphaMissense RocksDB Database
3+
dc.creator: DeepMind
4+
dc.contributor:
5+
- VarFish Developer Teams
6+
dc.format: application/x-rocksdb
7+
dc.date: {{ today }}
8+
x-version: {{ version }}
9+
x-genome-release: {{ genome_release }}
10+
dc.description: |
11+
RocksDB with the information from the AlphaMissense_hgGENOME.tsv.gz
12+
files for the canonical transcripts.
13+
dc.source:
14+
- PMID:37733863
15+
- https://storage.googleapis.com/dm_alphamissense/README.pdf
16+
x-created-from:
17+
- name: AlphaMissense
18+
version: {{ v_alphamissense }}

0 commit comments

Comments
 (0)