Skip to content

Commit 62d2f9a

Browse files
authored
feat: upgrade dbNSFP to v4.5 (#77) (#78)
1 parent 53e4209 commit 62d2f9a

File tree

10 files changed

+171
-27
lines changed

10 files changed

+171
-27
lines changed

download_urls.yml

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
# dbNSFP v4.5a
2+
- url: https://dbnsfp.s3.amazonaws.com/dbNSFP4.5a.zip
3+
excerpt_strategy:
4+
strategy: manual
5+
count: null
6+
# dbNSFP v4.5c
7+
- url: https://dbnsfp.s3.amazonaws.com/dbNSFP4.5c.zip
8+
excerpt_strategy:
9+
strategy: manual
10+
count: null
11+
- url: ftp://dbnsfp:[email protected]/dbscSNV1.1.zip
12+
skip_upstream_check: true # does not work reliably in tests
13+
excerpt_strategy:
14+
strategy: manual
15+
count: null
16+
117
- url: https://storage.googleapis.com/dm_alphamissense/AlphaMissense_hg19.tsv.gz
218
- url: https://storage.googleapis.com/dm_alphamissense/AlphaMissense_hg38.tsv.gz
319
- url: https://storage.googleapis.com/dm_alphamissense/AlphaMissense_gene_hg38.tsv.gz
@@ -168,20 +184,6 @@
168184
- url: https://kircherlab.bihealth.org/download/CADD/v1.6/GRCh38/gnomad.genomes.r3.0.indel_inclAnno.tsv.gz
169185
- url: https://kircherlab.bihealth.org/download/CADD/v1.6/GRCh38/gnomad.genomes.r3.0.indel_inclAnno.tsv.gz.tbi
170186

171-
- url: https://usf.box.com/shared/static/bvfzmkpgtphvbmmrvb2iyl2jl21o49kc
172-
excerpt_strategy:
173-
strategy: manual
174-
count: null
175-
- url: https://usf.box.com/shared/static/a84zcdlkx2asq2nxh6xr2gdb4csmyvhk
176-
excerpt_strategy:
177-
strategy: manual
178-
count: null
179-
- url: ftp://dbnsfp:[email protected]/dbscSNV1.1.zip
180-
skip_upstream_check: true # does not work reliably in tests
181-
excerpt_strategy:
182-
strategy: manual
183-
count: null
184-
185187
- url: https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
186188
excerpt_strategy:
187189
strategy: no-excerpt
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:c49b81715634d6d7ebab039b7208140993c1cc83e52504b63e692dc213dd465b
3+
size 66230685
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:e8bceba72d8a6d7c168d2ebe597c8cae93c09c280f09fce953f9b8276cdc26dc
3+
size 47
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:e20e578d5862f192e641983853c2045c56c236dda6f1ece7af38a61cb0253c33
3+
size 66370719
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:6c7dbd3928e94669339ac87a039f7f0ac061c911644fd0c762b81c7f3fcc544e
3+
size 47

rules/output/annonars/dbnsfp-schema-4.4a.json renamed to rules/output/annonars/dbnsfp-schema-4.5a.json

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,82 @@
484484
"name": "VARITY_ER_LOO_rankscore",
485485
"typ": "Float"
486486
},
487+
{
488+
"name": "ESM1b_score",
489+
"typ": "String"
490+
},
491+
{
492+
"name": "ESM1b_rankscore",
493+
"typ": "Float"
494+
},
495+
{
496+
"name": "ESM1b_pred",
497+
"typ": "String"
498+
},
499+
{
500+
"name": "EVE_score",
501+
"typ": "String"
502+
},
503+
{
504+
"name": "EVE_rankscore",
505+
"typ": "Float"
506+
},
507+
{
508+
"name": "EVE_Class10_pred",
509+
"typ": "String"
510+
},
511+
{
512+
"name": "EVE_Class20_pred",
513+
"typ": "String"
514+
},
515+
{
516+
"name": "EVE_Class25_pred",
517+
"typ": "String"
518+
},
519+
{
520+
"name": "EVE_Class30_pred",
521+
"typ": "String"
522+
},
523+
{
524+
"name": "EVE_Class40_pred",
525+
"typ": "String"
526+
},
527+
{
528+
"name": "EVE_Class50_pred",
529+
"typ": "String"
530+
},
531+
{
532+
"name": "EVE_Class60_pred",
533+
"typ": "String"
534+
},
535+
{
536+
"name": "EVE_Class70_pred",
537+
"typ": "String"
538+
},
539+
{
540+
"name": "EVE_Class75_pred",
541+
"typ": "String"
542+
},
543+
{
544+
"name": "EVE_Class80_pred",
545+
"typ": "String"
546+
},
547+
{
548+
"name": "EVE_Class90_pred",
549+
"typ": "String"
550+
},
551+
{
552+
"name": "AlphaMissense_score",
553+
"typ": "String"
554+
},
555+
{
556+
"name": "AlphaMissense_rankscore",
557+
"typ": "Float"
558+
},
559+
{
560+
"name": "AlphaMissense_pred",
561+
"typ": "String"
562+
},
487563
{
488564
"name": "Aloft_Fraction_transcripts_affected",
489565
"typ": "String"

rules/output/annonars/dbnsfp-schema-4.4c.json renamed to rules/output/annonars/dbnsfp-schema-4.5c.json

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,70 @@
432432
"name": "VARITY_ER_LOO_rankscore",
433433
"typ": "Float"
434434
},
435+
{
436+
"name": "ESM1b_score",
437+
"typ": "String"
438+
},
439+
{
440+
"name": "ESM1b_rankscore",
441+
"typ": "Float"
442+
},
443+
{
444+
"name": "ESM1b_pred",
445+
"typ": "String"
446+
},
447+
{
448+
"name": "EVE_score",
449+
"typ": "String"
450+
},
451+
{
452+
"name": "EVE_rankscore",
453+
"typ": "Float"
454+
},
455+
{
456+
"name": "EVE_Class10_pred",
457+
"typ": "String"
458+
},
459+
{
460+
"name": "EVE_Class20_pred",
461+
"typ": "String"
462+
},
463+
{
464+
"name": "EVE_Class25_pred",
465+
"typ": "String"
466+
},
467+
{
468+
"name": "EVE_Class30_pred",
469+
"typ": "String"
470+
},
471+
{
472+
"name": "EVE_Class40_pred",
473+
"typ": "String"
474+
},
475+
{
476+
"name": "EVE_Class50_pred",
477+
"typ": "String"
478+
},
479+
{
480+
"name": "EVE_Class60_pred",
481+
"typ": "String"
482+
},
483+
{
484+
"name": "EVE_Class70_pred",
485+
"typ": "String"
486+
},
487+
{
488+
"name": "EVE_Class75_pred",
489+
"typ": "String"
490+
},
491+
{
492+
"name": "EVE_Class80_pred",
493+
"typ": "String"
494+
},
495+
{
496+
"name": "EVE_Class90_pred",
497+
"typ": "String"
498+
},
435499
{
436500
"name": "Aloft_Fraction_transcripts_affected",
437501
"typ": "String"

rules/output/annonars/dbnsfp.smk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ rule output_annonars_dbnsfp: # -- build dbNSFP RocksDB with annonars
3232
--genome-release {wildcards.genome_release} \
3333
--null-values=. \
3434
--inference-row-count 100000 \
35-
--path-schema-json rules/output/annonars/dbnsfp-schema-{wildcards.v_dbnsfp}.json \
3635
\
3736
--path-out-rocksdb $(dirname {output.rocksdb_identity}) \
37+
--path-schema-json rules/output/annonars/dbnsfp-schema-{wildcards.v_dbnsfp}.json \
3838
\
3939
$(if [[ "{wildcards.genome_release}" == "grch37" ]]; then \
4040
echo --col-chrom 'hg19_chr'; \

rules/work/annos/seqvars/dbnsfp.smk

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
## Rules related to dbNSFP.
22

33

4-
#: Download URL for dbNSFP 4.4a
5-
DBNSFP_ACADEMIC_URL = "https://usf.box.com/shared/static/bvfzmkpgtphvbmmrvb2iyl2jl21o49kc"
6-
#: Download URL for dbNSFP 4.4c
7-
DBNSFP_COMMMERCIAL_URL = "https://usf.box.com/shared/static/a84zcdlkx2asq2nxh6xr2gdb4csmyvhk"
8-
9-
104
def files_dbnsfp():
115
"""Helper that returns the files within the dbNSFP archive."""
126
lst = [
@@ -60,11 +54,7 @@ rule annos_seqvars_dbnsfp_download: # -- download dbNSFP ZIP file
6054
threads: 8
6155
shell:
6256
r"""
63-
if [[ "{wildcards.variant}" == a ]]; then
64-
url={DBNSFP_ACADEMIC_URL}
65-
else
66-
url={DBNSFP_COMMMERCIAL_URL}
67-
fi
57+
url=https://dbnsfp.s3.amazonaws.com/dbNSFP4.5{wildcards.variant}.zip
6858
6959
aria2c \
7060
--check-certificate=false \

varfish_db_downloader/versions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ class DataVersions:
130130
ensembl_38="109",
131131
ensembl="110",
132132
today=TODAY,
133-
dbnsfp="4.4",
133+
dbnsfp="4.5",
134134
dbscsnv="1.1",
135135
cadd="1.6",
136136
gnomad_constraints="4.0",

0 commit comments

Comments
 (0)