Skip to content

Commit b0d6884

Browse files
authored
feat: adding pHaplo, pTriplo, sHet as seen in DECIPHER (#56)
1 parent 51435df commit b0d6884

File tree

8 files changed

+81
-1
lines changed

8 files changed

+81
-1
lines changed

Snakefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,16 @@ rule all:
8686
#
8787
# genes
8888
f"work/download/genes/clingen/{DV.clingen_gene}/clingen.csv",
89+
f"work/download/genes/rcnv/2022/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz",
90+
f"work/download/genes/shet/2019/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz",
8991
f"work/genes/dbnsfp/{DV.dbnsfp}/genes.tsv.gz",
9092
f"work/genes/ensembl/{DV.ensembl}/ensembl_xlink.tsv",
9193
f"work/genes/enst_ensg/grch37/{DV.ensembl_37}/enst_ensg.tsv",
9294
f"work/genes/entrez/{DV.today}/gene_info.jsonl",
9395
f"work/genes/gnomad/{DV.gnomad_constraints}/gnomad_constraints.tsv",
9496
f"work/genes/hgnc/{DV.today}/hgnc_info.jsonl",
97+
"work/genes/rcnv/2022/rcnv_collins_2022.tsv",
98+
"work/genes/shet/2019/shet_weghorn_2019.tsv",
9599
# reference-specific annotations
96100
# -- background/population sequence variants and annotations thereof
97101
# ---- GRCh37
@@ -324,6 +328,8 @@ include: "rules/work/genes/gnomad.smk"
324328
include: "rules/work/genes/hgnc.smk"
325329
include: "rules/work/genes/mehari_data_tx.smk"
326330
include: "rules/work/genes/ncbi.smk"
331+
include: "rules/work/genes/rcnv.smk"
332+
include: "rules/work/genes/shet.smk"
327333
# Reference sequence--related rules.
328334
include: "rules/work/reference/human.smk"
329335
# Features (position and not variant specific).
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:b3e976fa85bcce488ed726bf8c6362d4b6f63be044e1691fdd643ad702a7d4f0
3+
size 366098

download_urls.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
- url: https://zenodo.org/record/6347673/files/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz
2+
13
- comment: The curation activity summary report is built in real-time.
24
url: https://search.clinicalgenome.org/kb/reports/curation-activity-summary-report
35

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:e60e4cdf8a5b95fc4740ff447e50cc48798f4d553bf339a605e0a02300091b96
3+
size 2095

excerpt-data/31137c2c77bc0cea/url.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:52a5e225efe130a6f89bf8eeb16aa0f76dd7b4ef31d900fe056cd25e452b191b
3+
size 91

rules/output/annonars/genes.smk

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ rule output_annonars_genes: # -- build annonars genes RocksDB file
99
dbnsfp="work/genes/dbnsfp/{v_dbnsfp}/genes.tsv.gz",
1010
hgnc="work/genes/hgnc/{date}/hgnc_info.jsonl",
1111
ncbi="work/genes/entrez/{date}/gene_info.jsonl",
12+
rcnv="work/genes/rcnv/2022/rcnv_collins_2022.tsv",
13+
shet="work/genes/shet/2019/shet_weghorn_2019.tsv",
1214
output:
1315
rocksdb_identity=(
1416
"output/full/annonars/genes-{v_acmg_sf}+{v_gnomad_constraints}+{v_dbnsfp}+{date}+{v_annonars}/"
@@ -38,7 +40,9 @@ rule output_annonars_genes: # -- build annonars genes RocksDB file
3840
--path-in-gnomad-constraints {input.gnomad_constraints} \
3941
--path-in-dbnsfp {input.dbnsfp} \
4042
--path-in-hgnc {input.hgnc} \
41-
--path-in-ncbi {input.ncbi}
43+
--path-in-ncbi {input.ncbi} \
44+
--path-in-rcnv {input.rcnv} \
45+
--path-in-shet {input.shet}
4246
4347
varfish-db-downloader tpl \
4448
--template rules/output/annonars/genes.spec.yaml \

rules/work/genes/rcnv.smk

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
## Rules related to Collins (2022) gene annotation.
2+
3+
4+
rule genes_rcnv_download: # -- download pHaplo/pTriplo scores
5+
output:
6+
tsv="work/download/genes/rcnv/2022/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz",
7+
tsv_md5="work/download/genes/rcnv/2022/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz.md5",
8+
shell:
9+
r"""
10+
wget --no-check-certificate \
11+
-O {output.tsv} \
12+
https://zenodo.org/record/6347673/files/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz
13+
14+
md5sum {output.tsv} > {output.tsv_md5}
15+
"""
16+
17+
18+
rule genes_rcnv_postproces: # -- postprocess file for HGNC gene IDs
19+
input:
20+
tsv="work/download/genes/rcnv/2022/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz",
21+
xlink=f"output/full/mehari/genes-xlink-{DV.today}/genes-xlink.tsv",
22+
output:
23+
tsv="work/genes/rcnv/2022/rcnv_collins_2022.tsv",
24+
tsv_md5="work/genes/rcnv/2022/rcnv_collins_2022.tsv.md5",
25+
shell:
26+
"""
27+
qsv join -d '\t' \
28+
'#gene' <(zcat {input.tsv}) \
29+
gene_symbol {input.xlink} \
30+
| qsv select 'hgnc_id,pHaplo,pTriplo' \
31+
| qsv rename 'hgnc_id,p_haplo,p_triplo' \
32+
| qsv sort \
33+
| tr ',' '\t' \
34+
> {output.tsv}
35+
36+
md5sum {output.tsv} > {output.tsv}.md5
37+
"""

rules/work/genes/shet.smk

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
## Rules related to Weghorn (2019) gene annotation.
2+
3+
4+
rule genes_shet: # -- postprocess file for HGNC gene IDs
5+
input:
6+
tsv="bundled-data/weghorn_2019/Weghorn_2019_Supplementary_Table_1.txt.gz",
7+
xlink=f"output/full/mehari/genes-xlink-{DV.today}/genes-xlink.tsv",
8+
output:
9+
tsv="work/genes/shet/2019/shet_weghorn_2019.tsv",
10+
shell:
11+
"""
12+
qsv join -d '\t' \
13+
'Gene' <(zcat {input.tsv}) \
14+
gene_symbol {input.xlink} \
15+
| qsv select 'hgnc_id,low_det' \
16+
| qsv rename 'hgnc_id,s_het' \
17+
| qsv sort \
18+
| tr ',' '\t' \
19+
> {output.tsv}
20+
21+
md5sum {output.tsv} > {output.tsv}.md5
22+
"""

0 commit comments

Comments
 (0)