Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
# created-by: conda 25.11.1
@EXPLICIT
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.1.4-hbd8a1cb_0.conda#bddacf101bb4dd0e51811cb69c7790e2
https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_16.conda#26c46f90d0e727e95c6c9498a33a09f3
https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda#0539938c55b6b1a59b560e843ad864a4
https://conda.anaconda.org/conda-forge/noarch/tzdata-2025c-hc9c84f9_1.conda#ad659d0a2b3e47e38d829aa8cad2d610
https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d
https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda#6d0363467e6ed84f11435eb309f2ff06
https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda#51a19bba1b8ebfb60df25cde030b7ebc
https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.6-hb03c661_0.conda#920bb03579f15389b9e512095ad995b7
https://conda.anaconda.org/conda-forge/linux-64/gzip-1.14-hb9d3cd8_0.conda#84fa0d10c305971e39f516ca8354a9cd
https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.3-hb9d3cd8_0.conda#b38117a3c920364aff79f870c984b4a3
https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda#8b09ae86839581147ef2e5c5e229d164
https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda#35f29eec58405aaf55e01cb470d8c26a
https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_16.conda#5a68259fac2da8f2ee6f7bfe49c9eb8b
https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.2-hb03c661_0.conda#c7c83eecbb72d88b940c249af56c8b17
https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb9d3cd8_0.conda#c7e925f37e3b40d893459e625f6a53f1
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_16.conda#68f68355000ec3f1d6f26ea13e8f525f
https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.3-h5347b49_0.conda#db409b7c1720428638e7c0d509d3e1b5
https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8
https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7
https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda#9ee58d5c534af06558933af3c845a780
https://conda.anaconda.org/conda-forge/linux-64/icu-78.2-h33c6efd_0.conda#186a18e3ba246eccfc7cff00cd19a870
https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda#c277e0a4d549b03ac1e9d6cbbe3d017b
https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055
https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hcf80075_0.conda#eecce068c7e4eddeb169591baac20ac4
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-hdf11a46_16.conda#1b3152694d236cf233b76b8c56bf0eae
https://conda.anaconda.org/conda-forge/linux-64/readline-8.3-h853b02a_0.conda#d7d95fc8287ea7bf33e0e7116d2b95ec
https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_ha0e22de_103.conda#86bc20552bf46075e3d92b67f089172d
https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda#4a13eeac0b5c8e5b8ab496e6c4ddd829
https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368
https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_105.conda#3ec0aa5037d39b06554109a01e6fb0c6
https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.67.0-had1ee68_0.conda#b499ce4b026493a13774bcf0f4c33849
https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.2-hf4e2dac_0.conda#da5be73701eecd0e8454423fd6ffcf30
https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.18.0-h4e3cde8_0.conda#0a5563efed19ca4461cf927419b6eb73
https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-h32b2ec7_100_cp314.conda#1cef1236a05c3a98f68c33ae9425f656
https://conda.anaconda.org/conda-forge/linux-64/curl-8.18.0-h4e3cde8_0.conda#261410cab40c7142adce3a09e24cae41
https://conda.anaconda.org/conda-forge/noarch/pip-25.3-pyh145f28c_0.conda#bf47878473e5ab9fdb4115735230e191
https://conda.anaconda.org/bioconda/noarch/snakemake-wrapper-utils-0.8.0-pyhdfd78af_0.conda#1650e521333852f45468d97b1b2fdcce
8 changes: 8 additions & 0 deletions bio/reference/ensembl-genomes-annotation/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
channels:
- conda-forge
- bioconda
- nodefaults
dependencies:
- curl >=8.17.0
- gzip
- snakemake-wrapper-utils =0.8.0
13 changes: 13 additions & 0 deletions bio/reference/ensembl-genomes-annotation/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: ensembl-genomes-annotation
description: |
Download annotation of genomic sites (e.g. transcripts) for non-vertebrate
species from Ensembl Genomes FTP servers, and store them in a single .gtf or
.gff3 file. Adapted from ``ensembl-annotation`` wrapper.
authors:
- Brandon Seah
- Johannes Köster
url: https://ensemblgenomes.org/
output:
- Ensembl Genomes GTF or GFF3 annotation file
params:
- url: URL from where to download cache data (optional; by default is ``https://ftp.ebi.ac.uk/ensemblgenomes/pub``)
28 changes: 28 additions & 0 deletions bio/reference/ensembl-genomes-annotation/test/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
rule get_annotation:
output:
"refs/ensembl-genomes-annotation.gtf",
params:
species="schizosaccharomyces_pombe",
division="fungi",
assembly="ASM294v2",
release="62",
log:
"logs/get_annotation.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-genomes-annotation"


rule get_annotation_gz:
output:
"refs/ensembl-genomes-annotation.gtf.gz",
params:
species="schizosaccharomyces_pombe",
division="fungi",
assembly="ASM294v2",
release="62",
log:
"logs/get_annotation.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-genomes-annotation"
64 changes: 64 additions & 0 deletions bio/reference/ensembl-genomes-annotation/wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
__author__ = "Brandon Seah, Johannes Köster"
__copyright__ = "Copyright 2025, Brandon Seah, Johannes Köster"
__email__ = "brandon_seah@tll.org.sg"
__license__ = "MIT"

import subprocess as sp
import sys

from snakemake.shell import shell
from snakemake.logging import logger
from pathlib import Path
from snakemake_wrapper_utils.snakemake import get_format

log = snakemake.log_fmt_shell(stdout=False, stderr=True)

species = snakemake.params.species.lower()
release = int(snakemake.params.release)
division = snakemake.params.division.lower()
assembly = snakemake.params.assembly

available_divisions = ["fungi", "metazoa", "plants", "protists"]
if division not in available_divisions:
raise ValueError(
f"Invalid division. Division must be one of: {', '.join(available_divisions)}"
)

# TODO: bacteria, fungi/ascomycota folders are further subdivided
# TODO: chromosome file names may not follow standard naming convention, e.g.
# Plasmodium_falciparum.GCA000002765v3.dna.primary_assembly.Pf3D7_01_v3.fa.gz

out_fmt = get_format(str(snakemake.output[0]))
out_gz = str(snakemake.output[0]).endswith(".gz")

if out_fmt not in ["gtf", "gff3"]:
raise ValueError(
"invalid format specified. Only 'gtf[.gz]' and 'gff3[.gz]' are currently supported."
)
else:
suffix = out_fmt + ".gz"

url = snakemake.params.get("url", "https://ftp.ebi.ac.uk/ensemblgenomes/pub")
url = f"{url}/release-{release}/{division}/{out_fmt}/{species}/{species.capitalize()}.{assembly}.{release}.{suffix}"
ftp_url = url.replace("https://", "ftp://")

try:
if out_gz:
shell("curl --fail -L {url} > {snakemake.output[0]} {log}")
else:
shell("(curl --fail -L {url} | gzip -d > {snakemake.output[0]}) {log}")
except sp.CalledProcessError:
try:
if out_gz:
shell("curl --fail -L {ftp_url} > {snakemake.output[0]} {log}")
else:
shell("(curl --fail -L {ftp_url} | gzip -d > {snakemake.output[0]}) {log}")
except sp.CalledProcessError:
if snakemake.log:
sys.stderr = open(snakemake.log[0], "a")
print(
"Unable to download annotation data from Ensembl Genomes.\n"
"Did you check that this combination of species, division, assembly, and release is actually provided?",
file=sys.stderr,
)
exit(1)
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
# created-by: conda 25.11.1
@EXPLICIT
https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2026.1.4-hbd8a1cb_0.conda#bddacf101bb4dd0e51811cb69c7790e2
https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_16.conda#26c46f90d0e727e95c6c9498a33a09f3
https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d
https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda#6d0363467e6ed84f11435eb309f2ff06
https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.6-hb03c661_0.conda#920bb03579f15389b9e512095ad995b7
https://conda.anaconda.org/conda-forge/linux-64/gzip-1.14-hb9d3cd8_0.conda#84fa0d10c305971e39f516ca8354a9cd
https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.3-hb9d3cd8_0.conda#b38117a3c920364aff79f870c984b4a3
https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_16.conda#5a68259fac2da8f2ee6f7bfe49c9eb8b
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_16.conda#68f68355000ec3f1d6f26ea13e8f525f
https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda#edb0dca6bc32e4f4789199455a1dbeb8
https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda#47e340acb35de30501a76c7c799c41d7
https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda#9ee58d5c534af06558933af3c845a780
https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20250104-pl5321h7949ede_0.conda#c277e0a4d549b03ac1e9d6cbbe3d017b
https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda#172bf1cd1ff8629f2b1179945ed45055
https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.1-hcf80075_0.conda#eecce068c7e4eddeb169591baac20ac4
https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-hdf11a46_16.conda#1b3152694d236cf233b76b8c56bf0eae
https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda#4a13eeac0b5c8e5b8ab496e6c4ddd829
https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.3-h659f571_0.conda#3f43953b7d3fb3aaa1d0d0723d91e368
https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.67.0-had1ee68_0.conda#b499ce4b026493a13774bcf0f4c33849
https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.18.0-h4e3cde8_0.conda#0a5563efed19ca4461cf927419b6eb73
https://conda.anaconda.org/conda-forge/linux-64/curl-8.18.0-h4e3cde8_0.conda#261410cab40c7142adce3a09e24cae41
6 changes: 6 additions & 0 deletions bio/reference/ensembl-genomes-sequence/environment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- nodefaults
dependencies:
- curl >=8.17.0
- gzip
14 changes: 14 additions & 0 deletions bio/reference/ensembl-genomes-sequence/meta.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: ensembl-genomes-sequence
description: |
Download sequences (e.g. genome) of non-vertebrate species from Ensembl
Genomes FTP servers, and store them in a single .fasta file. The release
numbers and folder organization differ from the Ensembl project, which
maintains data for vertebrate species. Based on wrapper ``ensembl-sequence``
authors:
- Brandon Seah
- Johannes Köster
url: https://ensemblgenomes.org/
output:
- fasta file
params:
- url: URL from where to download cache data (optional; by default is ``ftp://ftp.ebi.ac.uk/ensemblgenomes/pub/``)
81 changes: 81 additions & 0 deletions bio/reference/ensembl-genomes-sequence/test/Snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
rule get_genome:
output:
"refs/ensembl-genomes-genome.fasta",
params:
species="plasmodium_falciparum",
assembly="GCA000002765v3",
division="protists",
datatype="dna",
release="62",
log:
"logs/get_genome.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-genomes-sequence"


rule get_genome_gzipped:
output:
"refs/ensembl-genomes-genome.fa.gz",
params:
species="plasmodium_falciparum",
assembly="GCA000002765v3",
division="protists",
datatype="dna",
release="62",
log:
"logs/get_genome.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-genomes-sequence"


rule get_single_chromosome:
output:
"refs/ensembl-genomes-chrMt.fasta",
params:
species="schizosaccharomyces_pombe",
assembly="ASM294v2",
division="fungi",
datatype="dna",
release="62",
chromosome=["MT"], # optional: restrict to one or multiple chromosomes, for multiple see below
log:
"logs/get_genome.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-genomes-sequence"


rule get_multiple_chromosome:
output:
"refs/ensembl-genomes-chr3_and_chrMt.fasta",
params:
species="schizosaccharomyces_pombe",
assembly="ASM294v2",
division="fungi",
datatype="dna",
release="62",
chromosome=["III", "MT"],
log:
"logs/get_genome.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-genomes-sequence"


rule get_multiple_chromosomes_gzipped:
output:
"refs/ensembl-genomes-chr3_and_chrMt.fasta.gz",
params:
species="schizosaccharomyces_pombe",
assembly="ASM294v2",
division="fungi",
datatype="dna",
release="62",
chromosome=["III", "MT"],
log:
"logs/get_genome.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"master/bio/reference/ensembl-genomes-sequence"
Loading
Loading