Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,26 @@
*profile*
# Files generated by invoking Julia with --code-coverage
*.jl.cov
*.jl.*.cov

# Files generated by invoking Julia with --track-allocation
*.jl.mem

# System-specific files and directories generated by the BinaryProvider and BinDeps packages
# They contain absolute paths specific to the host computer, and so should not be committed
deps/deps.jl
deps/build.log
deps/downloads/
deps/usr/
deps/src/

# Build artifacts for creating documentation generated by the Documenter package
docs/build/
docs/site/

# File generated by Pkg, the package manager, based on a corresponding Project.toml
# It records a fixed state of all packages used by the project. As such, it should not be
# committed for packages, but should be committed for applications that require a static
# environment.
Manifest.toml

*profile*
3 changes: 3 additions & 0 deletions src/Evolve/Evolve.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,12 @@ using BioSequences
using StatsBase
using SubstitutionModels
using TreeTools
using FASTX
using Dates

export evolve!
export compute_mutations!
export write_seq2fasta

include("main.jl")

Expand Down
74 changes: 73 additions & 1 deletion src/Evolve/main.jl
Original file line number Diff line number Diff line change
@@ -1,22 +1,59 @@
"""
evolve!(t::Tree, L::Int, μ=1.; model = JC69(1.), seqkey=:seq)

initialize a random sequence of nucleotides of length `L` at the root, then simulate evolution
by changing nucleotides at random according to a probability distribution specified
by the chosen `model`, which takes the mutation rate μ and branch lengths of nodes as parameters.

The sequence at a node `n` can be accessed by `n.data.dat[seqkey]`, if a mutation has occured
on the branch between node `n` and its ancestor is given as a boolean in `n.data.dat["evolved"]`.
"""
function evolve!(t::Tree, L::Int, μ=1.; model = JC69(1.), seqkey=:seq)
t.root.data.dat[seqkey] = randseq(DNAAlphabet{4}(), L)
t.root.data.dat["evolved"]= false
for c in t.root.child
evolve!(c, model, μ, seqkey)
end
end

function evolve!(n::TreeNode, model, μ, seqkey=:seq)
n.data.dat["evolved"]= false
w = ProbabilityWeights(SubstitutionModels.P(model, μ * n.tau)[:,1], 1.)
n.data.dat[seqkey] = deepcopy(n.anc.data.dat[seqkey])
evolve!(n.data.dat[seqkey], w)
evolve_seq!(n, w, seqkey)
for c in n.child
evolve!(c, model, μ, seqkey)
end
end

function evolve_seq!(n::TreeNode, w, seqkey=:seq)
seq = n.data.dat[seqkey]
for (i,nt) in enumerate(seq)
newnt = sample(1:4, w)
if newnt != 1
n.data.dat["evolved"] = true
if nt == DNA_A
if newnt == 2; seq[i] = DNA_C
elseif newnt == 3; seq[i] = DNA_G
elseif newnt == 4; seq[i] = DNA_T; end
elseif nt == DNA_C
if newnt == 2; seq[i] = DNA_G
elseif newnt == 3; seq[i] = DNA_T
elseif newnt == 4; seq[i] = DNA_A; end
elseif nt == DNA_G
if newnt == 2; seq[i] = DNA_T
elseif newnt == 3; seq[i] = DNA_A
elseif newnt == 4; seq[i] = DNA_C; end
elseif nt == DNA_T
if newnt == 2; seq[i] = DNA_A
elseif newnt == 3; seq[i] = DNA_C
elseif newnt == 4; seq[i] = DNA_G; end
end
end
end
return nothing
end

function evolve!(seq::NucSeq{4, DNAAlphabet{4}}, Q)
Qt = Q'
for (i, nt) in enumerate(seq)
Expand Down Expand Up @@ -49,3 +86,38 @@ function onehot(c::DNA)
end
return s
end

"""
write_seq2fasta(t::Tree, fasta_name::String, output_dir::String, seqkey=:seq; only_terminals=false, remove_0_mutations=true)

write evolved sequences to a fasta file with name `fasta_name`, `only_terminals` specifies if only terminal sequences should be
written to the file and `remove_0_mutations` if only sequences with mutations on their branches should be written to the file.
"""
function write_seq2fasta(t::Tree, fasta_name::String, output_dir::String, seqkey=:seq;
only_terminals=false, remove_0_mutations=true, write_date=false, year_rate=4.73e-3)
mkpath(output_dir)
open(FASTA.Writer, output_dir *"/"*fasta_name* ".fasta") do w
if only_terminals
iter = POTleaves(t)
else
iter = POT(t)
end
for node in iter
if remove_0_mutations && !node.data.dat["evolved"]
continue
else
x = node.data.dat[seqkey]
if write_date
start_date = Date(2000,1,1)
dist = TreeTools.distance(node, t.root)/year_rate
date = start_date+Dates.Year(round(dist)) + Dates.Day(round((dist-round(dist))/365))
rec = FASTA.Record(node.label*"|"*string(date), x)
else
rec = FASTA.Record(node.label, x)
end
write(w, rec)
end
end
end

end