Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 50 additions & 35 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ repository = "https://github.com/pydna-group/pydna/tree/master"
version = "6.0.0-a.24.post.17+b7b559bd66"
[tool.poetry.dependencies]
appdirs = ">=1.4.4"
biopython = "1.85"
biopython = "^1.86"
cai2 = { version = ">=1.0.5", optional = true }
matplotlib = { version = ">=3.4.3", optional = true }
networkx = ">=2.8.8"
Expand Down
43 changes: 36 additions & 7 deletions src/pydna/alphabet.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,7 +887,7 @@ def regex_ss_melt_factory(length: int) -> re.Pattern:
return re.compile(regex.encode("ascii"))


def regex_ds_melt_factory(length: int) -> re.Pattern:
def regex_ds_melt_factory(length: int, circular: bool) -> re.Pattern:
"""
A regular expression for finding double-stranded regions flanked by single-stranded DNA
that can be melted to shed multiple double stranded fragments.
Expand All @@ -907,11 +907,35 @@ def regex_ds_melt_factory(length: int) -> re.Pattern:
aaaG TTACAttt <-- "TTA" is found by the regex for length <= 3
tttCTAAT Taaa

The name of the capture groups (watson and crick) identifies the ssDNA
strand on the left side of the dsDNA. For example:

::
gCtC CTTCtC CTg
GaGAAGAAGaGAAGA

The first capture group CtC starts with ssDNA on the watson strand,
then dsDNA, then ssNDA on the crick strand and the match will be
``{'watson': b'CtC', 'crick': None}``.

The second capture group CTg starts with ssDNA on the crick strand,
then dsDNA, then ssNDA on the watson strand and the match will be
``{'watson': None, 'crick': b'CTg'}``.

This can be slightly confusing if there are no overhangs, for example:

::
CtC CTTCtC CT
GaGAAGAAGaGAAGA

Will give ``{'watson': b'CtC', 'crick': None}``, because it "could have
started" with the ssDNA on the watson strand, but not on the crick strand.

Examples
--------

>>> from pydna.dseq import Dseq
>>> regex = regex_ds_melt_factory(3)
>>> regex = regex_ds_melt_factory(3, False)
>>> s = Dseq("aaaGFTTAIAttt")
>>> s
Dseq(-13)
Expand All @@ -926,21 +950,26 @@ def regex_ds_melt_factory(length: int) -> re.Pattern:
length : int
Max length of double stranded region flanked by single stranded
regions.
circular : bool
Whether the sequence is circular.

Returns
-------
TYPE
re.Pattern
regular expression object.

"""

start_if_not_circular = "|^" if not circular else ""
end_if_not_circular = "|$" if not circular else ""

regex = (
f"(?P<watson>((?<=[{ss_letters_watson}])|^)"
f"(?P<watson>((?<=[{ss_letters_watson}]){start_if_not_circular})"
f"([{ds_letters}]{{1,{length}}})"
f"((?=[^{ss_letters_watson}{ds_letters}])|$))|"
f"(?P<crick>((?<=[{ss_letters_crick}])|^)"
f"((?=[{ss_letters_crick}]){end_if_not_circular}))|"
f"(?P<crick>((?<=[{ss_letters_crick}]){start_if_not_circular})"
f"([{ds_letters}]{{1,{length}}})"
f"((?=[^{ss_letters_crick}{ds_letters}])|$))"
f"((?=[{ss_letters_watson}]){end_if_not_circular}))"
)

return re.compile(regex.encode("ascii"))
Expand Down
9 changes: 2 additions & 7 deletions src/pydna/cre_lox.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from Bio.Seq import reverse_complement
from pydna.sequence_regex import compute_regex_site, dseqrecord_finditer
from Bio.SeqFeature import Location, SimpleLocation, SeqFeature
from pydna.utils import shift_location
from pydna.utils import shift_location, deduplicate

# We create a dictionary to map ambiguous bases to their consensus base
# For example, ambigous_base_dict['ACGT'] -> 'N'
Expand Down Expand Up @@ -58,12 +58,7 @@ def cre_loxP_overlap(
value_y = match_y.group()
if value_x[13:21] == value_y[13:21]:
out.append((match_x.start() + 13, match_y.start() + 13, 8))
# Unique values (keeping the order)
unique_out = []
for item in out:
if item not in unique_out:
unique_out.append(item)
return unique_out
return deduplicate(out)


loxP_dict = {
Expand Down
Loading