Skip to content

Commit 3fd72dd

Browse files
authored
feat: import of gnomAD SV data into RocksDB (#66)
1 parent 0797f2d commit 3fd72dd

File tree

107 files changed

+629
-17
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

107 files changed

+629
-17
lines changed

Snakefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ rule all:
161161
f"output/full/annonars/gnomad-genomes-grch38-{DV.gnomad_v3}+{PV.annonars}/rocksdb/IDENTITY",
162162
f"output/full/annonars/helixmtdb-grch37-{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
163163
f"output/full/annonars/helixmtdb-grch38-{DV.helixmtdb}+{PV.annonars}/rocksdb/IDENTITY",
164+
f"output/full/annonars/gnomad-sv-exomes-grch37-{DV.exac_cnv}+{PV.annonars}/rocksdb/IDENTITY",
165+
f"output/full/annonars/gnomad-sv-exomes-grch38-{DV.gnomad_cnv4}+{PV.annonars}/rocksdb/IDENTITY",
166+
f"output/full/annonars/gnomad-sv-genomes-grch37-{DV.gnomad_sv}+{PV.annonars}/rocksdb/IDENTITY",
167+
f"output/full/annonars/gnomad-sv-genomes-grch38-{DV.gnomad_sv4}+{PV.annonars}/rocksdb/IDENTITY",
164168
# ----- conservation
165169
f"output/full/annonars/cons-grch37-{DV.ucsc_cons_37}+{PV.annonars}/rocksdb/IDENTITY",
166170
f"output/full/annonars/cons-grch38-{DV.ucsc_cons_38}+{PV.annonars}/rocksdb/IDENTITY",
@@ -364,6 +368,7 @@ include: "rules/work/annos/strucvars/exac.smk"
364368
include: "rules/work/annos/strucvars/g1k.smk"
365369
include: "rules/work/annos/strucvars/gnomad.smk"
366370
include: "rules/work/annos/strucvars/clinvar.smk"
371+
include: "rules/work/annos/strucvars/gnomad_sv4.smk"
367372
# -- output directory ---------------------------------------------------------------------------
368373
# ---- mehari
369374
include: "rules/output/mehari/freqs.smk"
@@ -378,6 +383,7 @@ include: "rules/output/annonars/dbsnp.smk"
378383
include: "rules/output/annonars/gnomad_exomes.smk"
379384
include: "rules/output/annonars/gnomad_genomes.smk"
380385
include: "rules/output/annonars/gnomad_mtdna.smk"
386+
include: "rules/output/annonars/gnomad_sv.smk"
381387
include: "rules/output/annonars/helix.smk"
382388
include: "rules/output/annonars/genes.smk"
383389
# ---- worker

download_urls.yml

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,47 @@
1+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/legacy/exac_browser/ExAC.r1.sites.vep.vcf.gz
2+
excerpt_strategy:
3+
strategy: gz-head
4+
count: 225
5+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.vcf.gz
6+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.controls_only.sites.vcf.gz
7+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.nonneuro.sites.vcf.gz
8+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/exome_cnv/gnomad.v4.0.cnv.all.vcf.gz
9+
excerpt_strategy:
10+
strategy: gz-head
11+
count: 160
12+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/exome_cnv/gnomad.v4.0.cnv.non_neuro.vcf.gz
13+
excerpt_strategy:
14+
strategy: gz-head
15+
count: 160
16+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/exome_cnv/gnomad.v4.0.cnv.non_neuro_controls.vcf.gz
17+
excerpt_strategy:
18+
strategy: gz-head
19+
count: 160
20+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr1.vcf.gz
21+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr2.vcf.gz
22+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr3.vcf.gz
23+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr4.vcf.gz
24+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr5.vcf.gz
25+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr6.vcf.gz
26+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr7.vcf.gz
27+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr8.vcf.gz
28+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr9.vcf.gz
29+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr10.vcf.gz
30+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr11.vcf.gz
31+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr12.vcf.gz
32+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr13.vcf.gz
33+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr14.vcf.gz
34+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr15.vcf.gz
35+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr16.vcf.gz
36+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr17.vcf.gz
37+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr18.vcf.gz
38+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr19.vcf.gz
39+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr20.vcf.gz
40+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr21.vcf.gz
41+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chr22.vcf.gz
42+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chrX.vcf.gz
43+
- url: https://storage.googleapis.com/gcp-public-data--gnomad/release/4.0/genome_sv/gnomad.v4.0.sv.chrY.vcf.gz
44+
145
- url: https://www.deciphergenomics.org/files/downloads/HI_Predictions_Version3.bed.gz
246

347
- url: ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_GRCh37.tsv
@@ -204,7 +248,6 @@
204248
- url: http://dgv.tcag.ca/dgv/docs/DGV.GS.hg38.gff3
205249
- url: ftp://ftp.broadinstitute.org/pub/ExAC_release/release0.3.1/cnv/exac-final.autosome-1pct-sq60-qc-prot-coding.cnv.bed
206250
- url: https://ftp-trace.ncbi.nih.gov/1000genomes/ftp/phase3/integrated_sv_map/ALL.wgs.integrated_sv_map_v2.20130502.svs.genotypes.vcf.gz
207-
- url: https://storage.googleapis.com/gcp-public-data--gnomad/papers/2019-sv/gnomad_v2.1_sv.sites.vcf.gz
208251

209252
- url: https://gnomad-public-us-east-1.s3.amazonaws.com/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.1.vcf.bgz
210253
- url: https://gnomad-public-us-east-1.s3.amazonaws.com/release/2.1.1/vcf/genomes/gnomad.genomes.r2.1.1.sites.2.vcf.bgz
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:74cdc80bc74dd499a52d06dea268987036ea52bdab9d9add9c5e3816a4629f43
3+
size 420152
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:952a07ed29b3360170a993dc12a1aa6773820233779762704655446b32baa416
3+
size 414

excerpt-data/05e93e6f1f5d60e6/url.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:b4c85cf4f48048d2667f911439b4be3669a0d3f42a2943c0c512fd8443cd100a
3+
size 104
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:d891ea2a13db923a679f0f38578a4ed30518bdb9ea41b21585e9250d8d5c19e7
3+
size 489677
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:6ccc9925e01347b6290c9a7d57bb3d1ece3a68eac2c44cf840dcddf84c0b2307
3+
size 296

excerpt-data/0be9b2561c9397f2/url.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:5b0829deecc193e71383786fd59e13dd10ff79b07c77616ea04b8b0893c6ebf5
3+
size 105
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:cae509c9c8a04c8be9606de238b74e158873bc1e0a22480701a0dafa40df2849
3+
size 454927
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:bb6fa1c42693ed4db554b389c579c70f1f29694d52db290d2718c5246068e5ef
3+
size 170

0 commit comments

Comments
 (0)