Skip to content

Commit a882720

Browse files
committed
phylogenetic: Support nextstrain run
Mostly workflow path updates to support `nextstrain run`. This should allow the default phylogenetic build to run with `nextstrain run mpox phylogenetic <analysis-dir>`. The following commit will add support for the other builds.
1 parent 4b95eb6 commit a882720

File tree

10 files changed

+79
-76
lines changed

10 files changed

+79
-76
lines changed

phylogenetic/Snakefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ if version.parse(augur_version) < version.parse(min_augur_version):
1212

1313
if not config:
1414

15-
configfile: "defaults/hmpxv1/config.yaml"
15+
configfile: os.path.join(workflow.basedir, "defaults/hmpxv1/config.yaml")
1616

1717

1818
build_dir = "results"
@@ -49,7 +49,7 @@ include: "rules/export.smk"
4949
if "custom_rules" in config:
5050
for rule_file in config["custom_rules"]:
5151

52-
include: rule_file
52+
include: os.path.join(os.getcwd(), rule_file)
5353

5454

5555
rule clean:

phylogenetic/defaults/clade-i/config.yaml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
reference: "defaults/clade-i/reference.fasta"
2-
genome_annotation: "defaults/clade-i/genome_annotation.gff3"
3-
genbank_reference: "defaults/clade-i/reference.gb"
4-
include: "defaults/clade-i/include.txt"
5-
exclude: "defaults/exclude.txt"
6-
clades: "defaults/clades.tsv"
7-
lat_longs: "defaults/lat_longs.tsv"
8-
color_ordering: "defaults/color_ordering.tsv"
9-
color_scheme: "defaults/color_schemes.tsv"
10-
auspice_config: "defaults/clade-i/auspice_config.json"
11-
description: "defaults/description.md"
12-
tree_mask: "defaults/clade-i/tree_mask.tsv"
1+
reference: "clade-i/reference.fasta"
2+
genome_annotation: "clade-i/genome_annotation.gff3"
3+
genbank_reference: "clade-i/reference.gb"
4+
include: "clade-i/include.txt"
5+
exclude: "exclude.txt"
6+
clades: "clades.tsv"
7+
lat_longs: "lat_longs.tsv"
8+
color_ordering: "color_ordering.tsv"
9+
color_scheme: "color_schemes.tsv"
10+
auspice_config: "clade-i/auspice_config.json"
11+
description: "description.md"
12+
tree_mask: "clade-i/tree_mask.tsv"
1313

1414
# Use `accession` as the ID column since `strain` currently contains duplicates¹.
1515
# ¹ https://github.com/nextstrain/mpox/issues/33
@@ -59,7 +59,7 @@ recency: true
5959
mask:
6060
from_beginning: 800
6161
from_end: 6422
62-
maskfile: "defaults/clade-i/mask.bed"
62+
maskfile: "clade-i/mask.bed"
6363

6464
colors:
6565
ignore_categories:

phylogenetic/defaults/hmpxv1/config.yaml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
reference: "defaults/reference.fasta"
2-
genome_annotation: "defaults/genome_annotation.gff3"
3-
genbank_reference: "defaults/reference.gb"
4-
include: "defaults/hmpxv1/include.txt"
5-
exclude: "defaults/exclude.txt"
6-
clades: "defaults/clades.tsv"
7-
lat_longs: "defaults/lat_longs.tsv"
8-
color_ordering: "defaults/color_ordering.tsv"
9-
color_scheme: "defaults/color_schemes.tsv"
10-
auspice_config: "defaults/hmpxv1/auspice_config.json"
11-
description: "defaults/description.md"
12-
tree_mask: "defaults/tree_mask.tsv"
1+
reference: "reference.fasta"
2+
genome_annotation: "genome_annotation.gff3"
3+
genbank_reference: "reference.gb"
4+
include: "hmpxv1/include.txt"
5+
exclude: "exclude.txt"
6+
clades: "clades.tsv"
7+
lat_longs: "lat_longs.tsv"
8+
color_ordering: "color_ordering.tsv"
9+
color_scheme: "color_schemes.tsv"
10+
auspice_config: "hmpxv1/auspice_config.json"
11+
description: "description.md"
12+
tree_mask: "tree_mask.tsv"
1313

1414
# Use `accession` as the ID column since `strain` currently contains duplicates¹.
1515
# ¹ https://github.com/nextstrain/mpox/issues/33
@@ -101,4 +101,4 @@ recency: true
101101
mask:
102102
from_beginning: 800
103103
from_end: 6422
104-
maskfile: "defaults/mask.bed"
104+
maskfile: "mask.bed"

phylogenetic/defaults/hmpxv1_big/config.yaml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
reference: "defaults/reference.fasta"
2-
genome_annotation: "defaults/genome_annotation.gff3"
3-
genbank_reference: "defaults/reference.gb"
4-
include: "defaults/hmpxv1_big/include.txt"
5-
exclude: "defaults/exclude.txt"
6-
clades: "defaults/clades.tsv"
7-
lat_longs: "defaults/lat_longs.tsv"
8-
color_ordering: "defaults/color_ordering.tsv"
9-
color_scheme: "defaults/color_schemes.tsv"
10-
auspice_config: "defaults/hmpxv1_big/auspice_config.json"
11-
description: "defaults/description.md"
12-
tree_mask: "defaults/tree_mask.tsv"
1+
reference: "reference.fasta"
2+
genome_annotation: "genome_annotation.gff3"
3+
genbank_reference: "reference.gb"
4+
include: "hmpxv1_big/include.txt"
5+
exclude: "exclude.txt"
6+
clades: "clades.tsv"
7+
lat_longs: "lat_longs.tsv"
8+
color_ordering: "color_ordering.tsv"
9+
color_scheme: "color_schemes.tsv"
10+
auspice_config: "hmpxv1_big/auspice_config.json"
11+
description: "description.md"
12+
tree_mask: "tree_mask.tsv"
1313

1414
# Use `accession` as the ID column since `strain` currently contains duplicates¹.
1515
# ¹ https://github.com/nextstrain/mpox/issues/33
@@ -64,4 +64,4 @@ recency: true
6464
mask:
6565
from_beginning: 800
6666
from_end: 6422
67-
maskfile: "defaults/mask.bed"
67+
maskfile: "mask.bed"

phylogenetic/defaults/mpxv/config.yaml

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
1-
auspice_config: "defaults/mpxv/auspice_config.json"
2-
include: "defaults/mpxv/include.txt"
3-
exclude: "defaults/exclude.txt"
4-
reference: "defaults/reference.fasta"
5-
genome_annotation: "defaults/genome_annotation.gff3"
6-
genbank_reference: "defaults/reference.gb"
7-
lat_longs: "defaults/lat_longs.tsv"
8-
color_ordering: "defaults/color_ordering.tsv"
9-
color_scheme: "defaults/color_schemes.tsv"
10-
description: "defaults/description.md"
11-
clades: "defaults/clades.tsv"
12-
tree_mask: "defaults/tree_mask.tsv"
1+
auspice_config: "mpxv/auspice_config.json"
2+
include: "mpxv/include.txt"
3+
exclude: "exclude.txt"
4+
reference: "reference.fasta"
5+
genome_annotation: "genome_annotation.gff3"
6+
genbank_reference: "reference.gb"
7+
lat_longs: "lat_longs.tsv"
8+
color_ordering: "color_ordering.tsv"
9+
color_scheme: "color_schemes.tsv"
10+
description: "description.md"
11+
clades: "clades.tsv"
12+
tree_mask: "tree_mask.tsv"
1313

1414
# Use `accession` as the ID column since `strain` currently contains duplicates¹.
1515
# ¹ https://github.com/nextstrain/mpox/issues/33
@@ -94,4 +94,4 @@ recency: true
9494
mask:
9595
from_beginning: 1350
9696
from_end: 6422
97-
maskfile: "defaults/mask_overview.bed"
97+
maskfile: "mask_overview.bed"

phylogenetic/rules/annotate_phylogeny.smk

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ rule translate:
6060
input:
6161
tree=build_dir + "/{build_name}/tree.nwk",
6262
node_data=build_dir + "/{build_name}/nt_muts.json",
63-
genome_annotation=config["genome_annotation"],
63+
genome_annotation=resolve_config_path(config["genome_annotation"]),
6464
output:
6565
node_data=build_dir + "/{build_name}/aa_muts.json",
6666
log:
@@ -120,7 +120,7 @@ rule clades:
120120
tree=build_dir + "/{build_name}/tree.nwk",
121121
aa_muts=build_dir + "/{build_name}/aa_muts.json",
122122
nuc_muts=build_dir + "/{build_name}/nt_muts.json",
123-
clades=config["clades"],
123+
clades=resolve_config_path(config["clades"]),
124124
output:
125125
node_data=build_dir + "/{build_name}/clades_raw.json",
126126
log:
@@ -154,7 +154,7 @@ rule rename_clades:
154154
r"""
155155
exec &> >(tee {log:q})
156156
157-
python scripts/clades_renaming.py \
157+
python {workflow.basedir}/scripts/clades_renaming.py \
158158
--input-node-data {input:q} \
159159
--output-node-data {output.node_data:q}
160160
"""
@@ -180,7 +180,7 @@ rule assign_clades_via_metadata:
180180
r"""
181181
exec &> >(tee {log:q})
182182
183-
python scripts/assign-clades-via-metadata.py \
183+
python {workflow.basedir}/scripts/assign-clades-via-metadata.py \
184184
--metadata {input.metadata:q} \
185185
--tree {input.tree:q} \
186186
--output-node-data {output.node_data:q}
@@ -201,7 +201,7 @@ rule mutation_context:
201201
r"""
202202
exec &> >(tee {log:q})
203203
204-
python3 scripts/mutation_context.py \
204+
python3 {workflow.basedir}/scripts/mutation_context.py \
205205
--tree {input.tree:q} \
206206
--mutations {input.node_data:q} \
207207
--output {output.node_data:q}
@@ -226,7 +226,7 @@ rule recency:
226226
r"""
227227
exec &> >(tee {log:q})
228228
229-
python3 scripts/construct-recency-from-submission-date.py \
229+
python3 {workflow.basedir}/scripts/construct-recency-from-submission-date.py \
230230
--metadata {input.metadata:q} \
231231
--metadata-id-columns {params.strain_id:q} \
232232
--output {output:q} 2>&1

phylogenetic/rules/config.smk

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ from textwrap import dedent, indent
77
from typing import Union
88

99

10+
include: "../../shared/vendored/snakemake/config.smk"
11+
12+
1013
def as_list(config_param: Union[list,str]) -> list:
1114
if isinstance(config_param, list):
1215
return config_param

phylogenetic/rules/construct_phylogeny.smk

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ rule tree:
2121
"""
2222
input:
2323
alignment=build_dir + "/{build_name}/masked.fasta",
24-
tree_mask=config["tree_mask"],
24+
tree_mask=resolve_config_path(config["tree_mask"]),
2525
output:
2626
tree=build_dir + "/{build_name}/tree_raw.nwk",
2727
threads: workflow.cores
@@ -64,7 +64,7 @@ rule fix_tree:
6464
r"""
6565
exec &> >(tee {log:q})
6666
67-
python3 scripts/fix_tree.py \
67+
python3 {workflow.basedir}/scripts/fix_tree.py \
6868
--alignment {input.alignment:q} \
6969
--input-tree {input.tree:q} \
7070
{params.root} \

phylogenetic/rules/export.smk

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,16 +43,16 @@ rule remove_time:
4343
r"""
4444
exec &> >(tee {log:q})
4545
46-
python3 scripts/remove_timeinfo.py \
46+
python3 {workflow.basedir}/scripts/remove_timeinfo.py \
4747
--input-node-data {input:q} \
4848
--output-node-data {output:q}
4949
"""
5050

5151

5252
rule colors:
5353
input:
54-
ordering=config["color_ordering"],
55-
color_schemes=config["color_scheme"],
54+
ordering=resolve_config_path(config["color_ordering"]),
55+
color_schemes=resolve_config_path(config["color_scheme"]),
5656
metadata=build_dir + "/{build_name}/metadata.tsv",
5757
output:
5858
colors=build_dir + "/{build_name}/colors.tsv",
@@ -66,7 +66,7 @@ rule colors:
6666
r"""
6767
exec &> >(tee {log:q})
6868
69-
python3 scripts/assign-colors.py \
69+
python3 {workflow.basedir}/scripts/assign-colors.py \
7070
--ordering {input.ordering:q} \
7171
--color-schemes {input.color_schemes:q} \
7272
--output {output.colors:q} \
@@ -102,9 +102,9 @@ rule export:
102102
else []
103103
),
104104
colors=build_dir + "/{build_name}/colors.tsv",
105-
lat_longs=config["lat_longs"],
106-
description=config["description"],
107-
auspice_config=config["auspice_config"],
105+
lat_longs=resolve_config_path(config["lat_longs"]),
106+
description=resolve_config_path(config["description"]),
107+
auspice_config=resolve_config_path(config["auspice_config"]),
108108
output:
109109
auspice_json=build_dir + "/{build_name}/tree.json",
110110
root_sequence=build_dir + "/{build_name}/tree_root-sequence.json",

phylogenetic/rules/prepare_sequences.smk

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ rule filter:
6969
input:
7070
sequences="data/sequences.fasta",
7171
metadata="data/metadata.tsv",
72-
exclude=config["exclude"],
72+
exclude=resolve_config_path(config["exclude"]),
7373
output:
7474
sequences=build_dir + "/{build_name}/good_sequences.fasta",
7575
metadata=build_dir + "/{build_name}/good_metadata.tsv",
@@ -128,7 +128,7 @@ rule add_private_data:
128128
r"""
129129
exec &> >(tee {log:q})
130130
131-
python3 scripts/combine_data_sources.py \
131+
python3 {workflow.basedir}/scripts/combine_data_sources.py \
132132
--metadata nextstrain={input.metadata:q} private={input.private_metadata:q} \
133133
--sequences {input.sequences:q} {input.private_sequences:q} \
134134
--output-metadata {output.metadata:q} \
@@ -182,7 +182,7 @@ rule combine_samples:
182182
if config.get("private_metadata", False)
183183
else build_dir + "/{build_name}/good_metadata.tsv"
184184
),
185-
include=config["include"],
185+
include=resolve_config_path(config["include"]),
186186
output:
187187
sequences=build_dir + "/{build_name}/filtered.fasta",
188188
metadata=build_dir + "/{build_name}/metadata.tsv",
@@ -221,7 +221,7 @@ rule reverse_reverse_complements:
221221
r"""
222222
exec &> >(tee {log:q})
223223
224-
python3 scripts/reverse_reversed_sequences.py \
224+
python3 {workflow.basedir}/scripts/reverse_reversed_sequences.py \
225225
--metadata {input.metadata:q} \
226226
--sequences {input.sequences:q} \
227227
--output {output:q}
@@ -234,8 +234,8 @@ rule align:
234234
"""
235235
input:
236236
sequences=build_dir + "/{build_name}/reversed.fasta",
237-
reference=config["reference"],
238-
genome_annotation=config["genome_annotation"],
237+
reference=resolve_config_path(config["reference"]),
238+
genome_annotation=resolve_config_path(config["genome_annotation"]),
239239
output:
240240
alignment=build_dir + "/{build_name}/aligned.fasta",
241241
params:
@@ -279,7 +279,7 @@ rule mask:
279279
"""
280280
input:
281281
sequences=build_dir + "/{build_name}/aligned.fasta",
282-
mask=config["mask"]["maskfile"],
282+
mask=resolve_config_path(config["mask"]["maskfile"]),
283283
output:
284284
build_dir + "/{build_name}/masked.fasta",
285285
params:

0 commit comments

Comments
 (0)