Skip to content

Commit 482bdd0

Browse files
authored
feat: replace orphapacket by orphadata API access (#84) (#85)
1 parent 06c800c commit 482bdd0

File tree

58 files changed

+1699
-129
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1699
-129
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Ignore all pickled data
2+
*.pickle*
3+
14
# Ignore the workflow directories.
25
/work/
36
/output/

Snakefile

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,9 @@ rule all:
8686
#
8787
# genes
8888
f"work/download/genes/rcnv/2022/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz",
89-
f"work/download/genes/orphapacket/{DV.orphapacket}/orphapacket.tar.gz",
9089
"work/download/genes/alphamissense/1/AlphaMissense_gene_hg38.tsv.gz",
90+
f"work/download/genes/ctd/{DV.today}/CTD_diseases.tsv.gz",
91+
f"work/download/do/{DV.today}/omim-unmapped.csv",
9192
f"work/genes/dbnsfp/{DV.dbnsfp}/genes.tsv.gz",
9293
"work/genes/decipher/v3/decipher_hi_prediction.tsv.gz",
9394
f"work/genes/ensembl/{DV.ensembl}/ensembl_xlink.tsv",
@@ -96,7 +97,8 @@ rule all:
9697
f"work/genes/gnomad/{DV.gnomad_constraints}/gnomad_constraints.tsv",
9798
f"work/genes/hgnc/{DV.today}/hgnc_info.jsonl",
9899
f"work/genes/omim/{DV.hpo}+{DV.today}/omim_diseases.tsv",
99-
f"work/genes/orphapacket/{DV.orphapacket}+{DV.today}/orpha_diseases.tsv",
100+
f"work/genes/orphadata/{DV.orphadata}/orphadata.jsonl",
101+
f"work/genes/mondo/{DV.today}/mondo.obo",
100102
"work/genes/rcnv/2022/rcnv_collins_2022.tsv",
101103
"work/genes/shet/2019/shet_weghorn_2019.tsv",
102104
f"work/genes/clingen/{DV.today}/ClinGen_gene_curation_list_GRCh37.tsv",
@@ -177,7 +179,7 @@ rule all:
177179
f"output/full/annonars/cons-grch37-{DV.ucsc_cons_37}+{PV.annonars}/rocksdb/IDENTITY",
178180
f"output/full/annonars/cons-grch38-{DV.ucsc_cons_38}+{PV.annonars}/rocksdb/IDENTITY",
179181
# ----- genes
180-
f"output/full/annonars/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.hpo}+{DV.orphapacket}+{DV.today}+{PV.annonars}/rocksdb/IDENTITY",
182+
f"output/full/annonars/genes-{DV.acmg_sf}+{DV.gnomad_constraints}+{DV.dbnsfp}+{DV.hpo}+{DV.today}+{PV.annonars}/rocksdb/IDENTITY",
181183
# -- worker data
182184
f"output/full/worker/genes-regions-grch37-{DV.refseq_37}+{PV.worker}/refseq_genes.bin",
183185
f"output/full/worker/genes-regions-grch37-{DV.ensembl_37}+{PV.worker}/ensembl_genes.bin",
@@ -341,6 +343,9 @@ include: "rules/work/misc/hpo.smk"
341343
include: "rules/work/genes/alphamissense.smk"
342344
include: "rules/work/genes/dbnsfp.smk"
343345
include: "rules/work/genes/clingen.smk"
346+
include: "rules/work/genes/conditions.smk"
347+
include: "rules/work/genes/ctd.smk"
348+
include: "rules/work/genes/do.smk"
344349
include: "rules/work/genes/decipher.smk"
345350
include: "rules/work/genes/ensembl.smk"
346351
include: "rules/work/genes/gnomad.smk"
@@ -350,11 +355,11 @@ include: "rules/work/genes/mehari_data_tx.smk"
350355
include: "rules/work/genes/ncbi.smk"
351356
include: "rules/work/genes/omim.smk"
352357
include: "rules/work/genes/panelapp.smk"
353-
include: "rules/work/genes/orphapacket.smk"
358+
include: "rules/work/genes/mondo.smk"
359+
include: "rules/work/genes/orphadata.smk"
354360
include: "rules/work/genes/rcnv.smk"
355361
include: "rules/work/genes/shet.smk"
356362
include: "rules/work/genes/domino.smk"
357-
include: "rules/work/genes/clingen.smk"
358363
# Reference sequence--related rules.
359364
include: "rules/work/reference/human.smk"
360365
# Features (position and not variant specific).

download_urls.yml

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,48 @@
1+
# Note that this just tests the availability of the OrphaData API. We have a file in
2+
# excerpts/__orphadata__ that is used by `genes-orpha-diseases.py` in CI=true mode.
3+
- url: https://api.orphadata.com/rd-cross-referencing/orphacodes
4+
excerpt_strategy:
5+
strategy: no-excerpt
6+
count: null
7+
- url: https://api.orphadata.com/rd-cross-referencing/orphacodes/20?lang=en
8+
excerpt_strategy:
9+
strategy: no-excerpt
10+
count: null
11+
- url: https://api.orphadata.com/rd-associated-genes/orphacodes/20
12+
excerpt_strategy:
13+
strategy: no-excerpt
14+
count: null
15+
16+
- url: https://raw.githubusercontent.com/monarch-initiative/mondo-ingest/main/src/ontology/reports/omim_unmapped_terms.tsv
17+
excerpt_strategy:
18+
strategy: no-excerpt
19+
count: null
20+
21+
- url: https://github.com/DiseaseOntology/HumanDiseaseOntology/raw/main/src/deprecated/DO_NON_Production_Files/omim_import.obo
22+
excerpt_strategy:
23+
strategy: no-excerpt
24+
count: null
25+
26+
- url: https://raw.githubusercontent.com/DiseaseOntology/HumanDiseaseOntology/main/DOreports/OMIMinDO.tsv
27+
excerpt_strategy:
28+
strategy: no-excerpt
29+
count: null
30+
31+
- url: https://github.com/DiseaseOntology/HumanDiseaseOntology/raw/main/src/deprecated/reports/omim-unmapped.csv
32+
excerpt_strategy:
33+
strategy: no-excerpt
34+
count: null
35+
36+
- url: https://ctdbase.org/reports/CTD_diseases.tsv.gz
37+
excerpt_strategy:
38+
strategy: no-excerpt
39+
count: null
40+
41+
- url: http://purl.obolibrary.org/obo/mondo.obo
42+
excerpt_strategy:
43+
strategy: no-excerpt
44+
count: null
45+
146
- url: https://panelapp.genomicsengland.co.uk/api/v1/entities/
247
excerpt_strategy:
348
strategy: no-excerpt
@@ -101,7 +146,7 @@
101146
strategy: manual
102147
count: null
103148

104-
- url: https://github.com/Orphanet/orphapacket/archive/refs/tags/v10.1.tar.gz
149+
- url: https://data.bioontology.org/ontologies/ORDO/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb&download_format=csv
105150
excerpt_strategy:
106151
strategy: no-excerpt
107152
count: null
@@ -130,19 +175,19 @@
130175
strategy: no-excerpt
131176
count: null
132177

133-
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-06-06/hp.obo
178+
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2024-01-16/hp.obo
134179
excerpt_strategy:
135180
strategy: no-excerpt
136181
count: null
137-
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-06-06/phenotype.hpoa
182+
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2024-01-16/phenotype.hpoa
138183
excerpt_strategy:
139184
strategy: no-excerpt
140185
count: null
141-
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-06-06/phenotype_to_genes.txt
186+
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2024-01-16/phenotype_to_genes.txt
142187
excerpt_strategy:
143188
strategy: no-excerpt
144189
count: null
145-
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-06-06/genes_to_phenotype.txt
190+
- url: https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2024-01-16/genes_to_phenotype.txt
146191
excerpt_strategy:
147192
strategy: no-excerpt
148193
count: null

environment.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ dependencies:
1212
- click
1313
- loguru
1414
- numpy
15+
- pydantic
16+
- pronto >=2.5,<3.0
1517
- pyyaml
1618
- requests
1719
- requests-ftp
@@ -41,9 +43,13 @@ dependencies:
4143
# Parallel (de)compression.
4244
- pigz
4345
# Varfish related
44-
- annonars =0.33.0
46+
- annonars =0.34.0
4547
- viguno =0.2.0
4648
- mehari =0.21.1
4749
- varfish-server-worker =0.10.2
4850
# S3 uploads
4951
- s5cmd =2.1.0
52+
# async HTTP requests
53+
- httpx =0.25.0
54+
- httpcore =0.18.0
55+
- trio

excerpt-data/111d8c6e08038f62/20

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:b610d631a11a2f6f4b93b8f7f7458447fa1a974ade2a18b1b9c9b5047dac516c
3+
size 3338

excerpt-data/111d8c6e08038f62/url.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:bda92b632aaeacd032c57d87e6420ccac3945bcd78bf51bafc117da4eb63a601
3+
size 69
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:384eccb1d84e8a5036027cd340d2b4b18ce41170de28407f3022d7f3dd2392dc
3+
size 4779

excerpt-data/1963f3c58ea066be/url.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:2153c0f0281a0399c77b0964d96c1286f6716eac34304f68f4e838fe41a02bdd
3+
size 116

excerpt-data/32c97f6adaf88f01/hp.obo

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:508bb0326603671327e9f4af05beffd1800e053557c7c1a0d137fd1f458bb0f7
3+
size 9719364

excerpt-data/32c97f6adaf88f01/url.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:b057fd342e531569cd4c2af122986569a44bc55348bcbc8104866b9ea696acfb
3+
size 94

0 commit comments

Comments
 (0)