Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,6 @@ jobs:
unzip qsv-0.112.0-x86_64-unknown-linux-musl.zip
cp qsv_musl-1.2.3 /usr/local/bin/qsv

- name: Install annonars
run: |
sudo bash ./utils/install-annonars.sh
- name: Install python package
run: |
pip install -e .
Expand Down
478 changes: 229 additions & 249 deletions Snakefile

Large diffs are not rendered by default.

32 changes: 25 additions & 7 deletions download_urls.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,16 @@
count: null

# dbNSFP v4.5a
- url: https://dbnsfp.s3.amazonaws.com/dbNSFP4.5a.zip
- url: https://usf.box.com/shared/static/2hzcx5s6p1xui7oen16xqzndfrkt8l9l
excerpt_strategy:
strategy: manual
count: null
# dbNSFP v4.5c
- url: https://dbnsfp.s3.amazonaws.com/dbNSFP4.5c.zip
- url: https://usf.box.com/shared/static/03xsrpna0nzgrytfo2pzk326t8jad4oc
excerpt_strategy:
strategy: manual
count: null
- url: ftp://dbnsfp:[email protected].com/dbscSNV1.1.zip
- url: https://usf.box.com/shared/static/ffwlywsat3q5ijypvunno3rg6steqfs8
skip_upstream_check: true # does not work reliably in tests
excerpt_strategy:
strategy: manual
Expand Down Expand Up @@ -141,16 +141,34 @@
url: https://search.clinicalgenome.org/kb/reports/curation-activity-summary-report
skip_upstream_check: true # does not work reliably in tests

- url: https://github.com/varfish-org/clinvar-data-jsonl/releases/download/clinvar-weekly-20240612/clinvar-data-extract-vars-20240612+0.17.0.tar.gz
- url: https://github.com/varfish-org/clinvar-data-jsonl/releases/download/clinvar-weekly-20250410/clinvar-data-extract-vars-20250410+0.18.5.tar.gz
excerpt_strategy:
strategy: manual
count: null

- url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.4.4/mehari-data-txs-grch37-0.4.4.bin.zst
- url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.10.3/mehari-data-txs-grch37-ensembl-0.10.3.bin.zst
excerpt_strategy:
strategy: no-excerpt
count: null
- url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.4.4/mehari-data-txs-grch38-0.4.4.bin.zst
- url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.10.3/mehari-data-txs-grch38-ensembl-0.10.3.bin.zst
excerpt_strategy:
strategy: no-excerpt
count: null

- url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.10.3/mehari-data-txs-grch37-refseq-0.10.3.bin.zst
excerpt_strategy:
strategy: no-excerpt
count: null
- url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.10.3/mehari-data-txs-grch38-refseq-0.10.3.bin.zst
excerpt_strategy:
strategy: no-excerpt
count: null

- url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.10.3/mehari-data-txs-grch37-ensembl-0.10.3.bin.zst
excerpt_strategy:
strategy: no-excerpt
count: null
- url: https://github.com/bihealth/mehari-data-tx/releases/download/v0.10.3/mehari-data-txs-grch38-ensembl-0.10.3.bin.zst
excerpt_strategy:
strategy: no-excerpt
count: null
Expand Down Expand Up @@ -251,7 +269,7 @@
count: 10000

- url: 'https://ensembl.org/biomart/martservice?query=<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE Query><Query virtualSchemaName = "default" formatter = "TSV" header = "0" uniqueRows = "0" count = "" datasetConfigVersion = "0.6" ><Dataset name = "hsapiens_gene_ensembl" interface = "default" ><Attribute name = "ensembl_gene_id" /><Attribute name = "ensembl_transcript_id" /><Attribute name = "entrezgene_id" /><Attribute name = "external_gene_name" /></Dataset></Query>'
- url: 'https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/json/hgnc_complete_set.json'
- url: 'https://storage.googleapis.com/public-download-files/hgnc/json/json/hgnc_complete_set.json'
skip_upstream_check: true # does not work reliably in tests
excerpt_strategy:
strategy: manual
Expand Down
16 changes: 8 additions & 8 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,23 @@ dependencies:
- jq
# Tools for file downloads.
- aria2 >=1.36.0
- wget
# Tool for processing BED files.
- bedops =2
# VCF/BCF/HTSlib/Samtools.
- bcftools =1.17
- htslib =1.17
- samtools =1.17
- bcftools =1.21
- htslib =1.21
- samtools =1.21
# Parallel (de)compression.
- pigz
# Varfish related
# - annonars =0.41.3 # current versions not on bioconda due to build issue, but docker images are available
- viguno =0.3.1
- mehari =0.25.5
- varfish-server-worker =0.13.0
- annonars =0.44.0
- viguno =0.4.0
- mehari =0.35.1
- varfish-server-worker =0.17.2
# S3 uploads
- s5cmd =2.1.0
# async HTTP requests
- httpx =0.25.0
- httpcore =0.18.0
- trio
- qsv
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/0ccc4915e7ecfd38/url.txt
Git LFS file not shown
2 changes: 1 addition & 1 deletion excerpt-data/111d8c6e08038f62/20
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/18c67f3b60f24667/url.txt
Git LFS file not shown
4 changes: 2 additions & 2 deletions excerpt-data/1963f3c58ea066be/omim_unmapped_terms.tsv
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/5bcb7090fe0e881b/url.txt
Git LFS file not shown
4 changes: 2 additions & 2 deletions excerpt-data/615312ce3f5fc1bf/OMIMinDO.tsv
Git LFS file not shown
4 changes: 2 additions & 2 deletions excerpt-data/617bebe58c82f24e/CTD_diseases.tsv.gz
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/649dbe83da5f50d9/url.txt
Git LFS file not shown
4 changes: 2 additions & 2 deletions excerpt-data/652646c24140df2a/mondo.obo
Git LFS file not shown
4 changes: 2 additions & 2 deletions excerpt-data/6f378db589a4bbb9/orphacodes
Git LFS file not shown
4 changes: 2 additions & 2 deletions excerpt-data/91f964d1aa8367a5/20
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/b31e9b26b7aeae3f/dbNSFP4.5c.zip
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/b31e9b26b7aeae3f/url.txt
Git LFS file not shown

This file was deleted.

3 changes: 0 additions & 3 deletions excerpt-data/bdc69c1e4cafdfaa/url.txt

This file was deleted.

3 changes: 3 additions & 0 deletions excerpt-data/c243f918f5fc10d2/dbNSFP4.5a.zip
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/c243f918f5fc10d2/url.txt
Git LFS file not shown
4 changes: 2 additions & 2 deletions excerpt-data/c9c7d6df0e24b954/__index__
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/caba5539b2dac784/url.txt
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/cbc00fe1a5b22fe8/dbscSNV1.1.zip
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/cbc00fe1a5b22fe8/url.txt
Git LFS file not shown
4 changes: 2 additions & 2 deletions excerpt-data/cdaaf7a3f7595d3d/__index__
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/e8a29ff831bf8ee3/hgnc_complete_set.json
Git LFS file not shown
3 changes: 3 additions & 0 deletions excerpt-data/e8a29ff831bf8ee3/url.txt
Git LFS file not shown
4 changes: 2 additions & 2 deletions excerpt-data/ebc07f725c64907d/__index__
Git LFS file not shown
4 changes: 2 additions & 2 deletions rules/output/annonars/alphamissense.smk
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ rule output_annonars_alphamissense: # -- build AlphaMissense RocksDB with annon
manifest=(
"output/full/annonars/alphamissense-{genome_release}-{v_alphamissense}+{v_annonars}/MANIFEST.txt"
),
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
threads: THREADS
resources:
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
mem_mb_per_cpu=2000,
mem_mb=MEMORY,
wildcard_constraints:
genome_release=RE_GENOME,
v_alphamissense=RE_VERSION,
Expand Down
4 changes: 2 additions & 2 deletions rules/output/annonars/cadd.smk
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ rule output_annonars_cadd: # -- build CADD RocksDB with annonars
),
spec_yaml=("output/full/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/spec.yaml"),
manifest=("output/full/annonars/cadd-{genome_release}-{v_cadd}+{v_annonars}/MANIFEST.txt"),
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
threads: THREADS
resources:
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
mem_mb_per_cpu=2000,
mem_mb=MEMORY,
wildcard_constraints:
genome_release=RE_GENOME,
v_cadd=RE_VERSION,
Expand Down
4 changes: 2 additions & 2 deletions rules/output/annonars/cons.smk
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ rule output_annonars_cons: # -- build UCSC conservation track RocksDB with anno
),
spec_yaml=("output/full/annonars/cons-{genome_release}-{v_cons}+{v_annonars}/spec.yaml"),
manifest=("output/full/annonars/cons-{genome_release}-{v_cons}+{v_annonars}/MANIFEST.txt"),
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
threads: THREADS
resources:
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
mem_mb_per_cpu=2000,
mem_mb=MEMORY,
wildcard_constraints:
genome_release=RE_GENOME,
v_cons=RE_VERSION,
Expand Down
4 changes: 2 additions & 2 deletions rules/output/annonars/dbnsfp.smk
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ rule output_annonars_dbnsfp: # -- build dbNSFP RocksDB with annonars
manifest=(
"output/full/annonars/dbnsfp-{genome_release}-{v_dbnsfp}+{v_annonars}/MANIFEST.txt"
),
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
threads: THREADS
resources:
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
mem_mb_per_cpu=2000,
mem_mb=MEMORY,
wildcard_constraints:
genome_release=RE_GENOME,
v_dbnsfp=RE_VERSION,
Expand Down
4 changes: 2 additions & 2 deletions rules/output/annonars/dbscsnv.smk
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ rule output_annonars_dbscsnv: # -- build dbscSNV RocksDB with annonars
manifest=(
"output/full/annonars/dbscsnv-{genome_release}-{v_dbscsnv}+{v_annonars}/MANIFEST.txt"
),
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
threads: THREADS
resources:
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
mem_mb_per_cpu=2000,
mem_mb=MEMORY,
wildcard_constraints:
genome_release=RE_GENOME,
v_dbscsnv=RE_VERSION,
Expand Down
4 changes: 2 additions & 2 deletions rules/output/annonars/dbsnp.smk
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ rule output_annonars_dbsnp: # -- build dbSNP RocksDB with annonars
),
spec_yaml=("output/full/annonars/dbsnp-{genome_release}-{v_dbsnp}+{v_annonars}/spec.yaml"),
manifest=("output/full/annonars/dbsnp-{genome_release}-{v_dbsnp}+{v_annonars}/MANIFEST.txt"),
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
threads: THREADS
resources:
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
mem_mb_per_cpu=2000,
mem_mb=MEMORY,
wildcard_constraints:
genome_release=RE_GENOME,
v_dbsnp=RE_VERSION,
Expand Down
4 changes: 2 additions & 2 deletions rules/output/annonars/gnomad_exomes.smk
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ rule output_annonars_gnomad_exomes: # -- build gnomAD-exomes RocksDB with annon
manifest=(
"output/full/annonars/gnomad-exomes-{genome_release}-{v_gnomad}+{v_annonars}/MANIFEST.txt"
),
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
threads: THREADS
resources:
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
mem_mb_per_cpu=2000,
mem_mb=MEMORY,
wildcard_constraints:
genome_release=RE_GENOME,
v_gnomad=RE_VERSION,
Expand Down
4 changes: 2 additions & 2 deletions rules/output/annonars/gnomad_genomes.smk
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ rule output_annonars_gnomad_genomes: # -- build gnomAD-genomes RocksDB with ann
manifest=(
"output/full/annonars/gnomad-genomes-{genome_release}-{v_gnomad}+{v_annonars}/MANIFEST.txt"
),
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
threads: THREADS
resources:
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
mem_mb_per_cpu=2000,
mem_mb=MEMORY,
wildcard_constraints:
genome_release=RE_GENOME,
v_gnomad=RE_VERSION,
Expand Down
4 changes: 2 additions & 2 deletions rules/output/annonars/gnomad_mtdna.smk
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ rule output_annonars_gnomad_mtdna: # -- build gnomAD-mtDNA RocksDB with annonar
manifest=(
"output/full/annonars/gnomad-mtdna-{genome_release}-{v_gnomad}+{v_annonars}/MANIFEST.txt"
),
threads: int(os.environ.get("THREADS_ANNONARS_IMPORT", "96"))
threads: THREADS
resources:
runtime=os.environ.get("RUNTIME_ANNONARS_IMPORT", "48h"),
mem_mb_per_cpu=2000,
mem_mb=MEMORY,
wildcard_constraints:
genome_release=RE_GENOME,
v_gnomad=RE_VERSION,
Expand Down
Loading
Loading