File tree Expand file tree Collapse file tree 5 files changed +67
-0
lines changed
excerpt-data/86ffcde07ca5f388 Expand file tree Collapse file tree 5 files changed +67
-0
lines changed Original file line number Diff line number Diff line change @@ -89,6 +89,7 @@ rule all:
8989 f"work/download/genes/rcnv/2022/Collins_rCNV_2022.dosage_sensitivity_scores.tsv.gz" ,
9090 f"work/download/genes/orphapacket/{ DV .orphapacket } /orphapacket.tar.gz" ,
9191 f"work/genes/dbnsfp/{ DV .dbnsfp } /genes.tsv.gz" ,
92+ "work/genes/decipher/v3/decipher_hi_prediction.tsv.gz" ,
9293 f"work/genes/ensembl/{ DV .ensembl } /ensembl_xlink.tsv" ,
9394 f"work/genes/enst_ensg/grch37/{ DV .ensembl_37 } /enst_ensg.tsv" ,
9495 f"work/genes/entrez/{ DV .today } /gene_info.jsonl" ,
@@ -329,6 +330,7 @@ include: "rules/work/misc/hpo.smk"
329330# Gene-related rules.
330331include : "rules/work/genes/dbnsfp.smk"
331332include : "rules/work/genes/clingen.smk"
333+ include : "rules/work/genes/decipher.smk"
332334include : "rules/work/genes/ensembl.smk"
333335include : "rules/work/genes/gnomad.smk"
334336include : "rules/work/genes/gtex.smk"
Original file line number Diff line number Diff line change 1+ - url : https://www.deciphergenomics.org/files/downloads/HI_Predictions_Version3.bed.gz
2+
13- url : ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_GRCh37.tsv
24- url : ftp://ftp.clinicalgenome.org/ClinGen_region_curation_list_GRCh38.tsv
35
Original file line number Diff line number Diff line change 1+ version https://git-lfs.github.com/spec/v1
2+ oid sha256:f2d2ed0ba3247c444a0f55a0659d16b253f6030108e3497785505d11dea72838
3+ size 3072
Original file line number Diff line number Diff line change 1+ version https://git-lfs.github.com/spec/v1
2+ oid sha256:5daa1c09650953bb18b68be21872a95a222b60e1619b3959a1cb8d443e40a055
3+ size 80
Original file line number Diff line number Diff line change 1+ ## Rules related to DECIPHER gene information.
2+
3+
4+ rule genes_decipher_hi_download : # -- download DECIPHER HI predictions
5+ output :
6+ bed = "work/download/genes/decipher/v3/HI_Predictions_Version3.bed.gz" ,
7+ bed_md5 = "work/download/genes/decipher/v3/HI_Predictions_Version3.bed.gz.md5" ,
8+ shell :
9+ r"""
10+ wget --no-check-certificate \
11+ -O {output.bed} \
12+ https://www.deciphergenomics.org/files/downloads/HI_Predictions_Version3.bed.gz
13+
14+ md5sum {output.bed} > {output.bed_md5}
15+ """
16+
17+
18+ rule genes_decipher_hi_convert : # -- convert DECIPHER HI predictions to TSV
19+ input :
20+ hgnc = f"output/full/mehari/genes-xlink-{ DV .today } /genes-xlink.tsv" ,
21+ bed = "work/download/genes/decipher/v3/HI_Predictions_Version3.bed.gz" ,
22+ output :
23+ tsv = "work/genes/decipher/v3/decipher_hi_prediction.tsv.gz" ,
24+ tsv_md5 = "work/genes/decipher/v3/decipher_hi_prediction.tsv.gz.md5" ,
25+ shell :
26+ r"""
27+ set -x
28+
29+ export TMPDIR=$(mktemp -d)
30+ trap "rm -rf $TMPDIR" EXIT
31+
32+ echo -e "gene_symbol\tp_hi\thi_index" > $TMPDIR/tmp.tsv
33+
34+ zcat {input.bed} \
35+ | tail -n +2 \
36+ | cut -f 4 \
37+ | tr '|' '\t' \
38+ | sed -e 's/%$//g' \
39+ >> $TMPDIR/tmp.tsv
40+
41+ qsv join \
42+ gene_symbol {input.hgnc} \
43+ gene_symbol $TMPDIR/tmp.tsv \
44+ > $TMPDIR/tmp2.tsv
45+
46+ ( \
47+ echo -e "hgnc_id\thgnc_symbol\tp_hi\thi_index"; \
48+ tail -n +2 $TMPDIR/tmp2.tsv \
49+ | tr ',' '\t' \
50+ | cut -f 1,5-7 \
51+ | LC_ALL=C sort \
52+ ) \
53+ | gzip -c \
54+ > {output.tsv}
55+
56+ md5sum {output.tsv} > {output.tsv_md5}
57+ """
You can’t perform that action at this time.
0 commit comments