Skip to content
11 changes: 10 additions & 1 deletion src/haddock/libs/libalign.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,7 @@ def load_coords(
numbering_dic=None,
model2ref_chain_dict=None,
add_resname=None,
keep_hetatm: bool = False,
):
"""Load coordinates from PDB.

Expand All @@ -463,6 +464,9 @@ def load_coords(

add_resname : bool
use the residue name in the identifier

keep_hetatm : bool
Should HETATM lines be considered ?

Returns
-------
Expand All @@ -475,14 +479,19 @@ def load_coords(
coord_dic: CoordsDict = {}
chain_dic: ResDict = {}
idx: int = 0
# Set types of coordinates lines to extract
if keep_hetatm:
coordinates_line_to_extract = ("ATOM", "HETATM")
else:
coordinates_line_to_extract = ("ATOM", )
# Check filetype
if isinstance(pdb_f, PDBFile):
pdb_f = pdb_f.rel_path
# Read file
with open(pdb_f, "r") as fh:
for line in fh.readlines():
# Skip non ATOM records lines
if not line.startswith("ATOM"):
if not line.startswith(coordinates_line_to_extract):
continue
# Extract PDB line data
atom_name = line[slc_name].strip()
Expand Down
55 changes: 47 additions & 8 deletions src/haddock/modules/analysis/caprieval/capri.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def load_contacts(
cutoff: float = 5.0,
numbering_dic: Optional[dict[str, dict[int, int]]] = None,
model2ref_chain_dict: Optional[dict[str, str]] = None,
keep_hetatm: bool = False,
) -> set[tuple]:
"""Load residue-based contacts.

Expand All @@ -121,6 +122,8 @@ def load_contacts(
PDB file of the model to have its atoms identified
cutoff : float, optional
Cutoff distance for the interface identification.
keep_hetatm : bool
Should HETATM coordinates be considered ? (default False)

Returns
-------
Expand All @@ -136,6 +139,7 @@ def load_contacts(
atoms,
numbering_dic=numbering_dic,
model2ref_chain_dict=model2ref_chain_dict,
keep_hetatm=keep_hetatm,
)
# create coordinate arrays
coord_arrays: dict[str, NDFloat] = {}
Expand Down Expand Up @@ -217,6 +221,7 @@ def __init__(
self.atoms = self._load_atoms(model, reference, full=self.allatoms)
self.r_chain = params["receptor_chain"]
self.l_chains = params["ligand_chains"]
self.keep_hetatm = params["keep_hetatm"]
self.model2ref_numbering = None
self.model2ref_chain_dict = None
self.output_ss_fname = Path(f"capri_ss_{identificator}.tsv")
Expand All @@ -235,14 +240,21 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:
The cutoff distance for the intermolecular contacts.
"""
# Identify reference interface
ref_interface_resdic = self.identify_interface(self.reference, cutoff)
ref_interface_resdic = self.identify_interface(
self.reference,
cutoff,
keep_hetatm=self.keep_hetatm,
)

if len(ref_interface_resdic) == 0:
log.warning("No reference interface found")
else:
# Load interface coordinates
ref_coord_dic, _ = load_coords(
self.reference, self.atoms, ref_interface_resdic
self.reference,
self.atoms,
ref_interface_resdic,
keep_hetatm=self.keep_hetatm,
)
try:
mod_coord_dic, _ = load_coords(
Expand All @@ -251,6 +263,7 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:
ref_interface_resdic,
numbering_dic=self.model2ref_numbering,
model2ref_chain_dict=self.model2ref_chain_dict,
keep_hetatm=self.keep_hetatm,
)
except ALIGNError as alignerror:
log.warning(alignerror)
Expand Down Expand Up @@ -285,13 +298,18 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:

def calc_lrmsd(self) -> None:
"""Calculate the L-RMSD."""
ref_coord_dic, _ = load_coords(self.reference, self.atoms)
ref_coord_dic, _ = load_coords(
self.reference,
self.atoms,
keep_hetatm=self.keep_hetatm,
)
try:
mod_coord_dic, _ = load_coords(
self.model,
self.atoms,
numbering_dic=self.model2ref_numbering,
model2ref_chain_dict=self.model2ref_chain_dict,
keep_hetatm=self.keep_hetatm,
)
except ALIGNError as alignerror:
log.warning(alignerror)
Expand Down Expand Up @@ -390,11 +408,18 @@ def calc_ilrmsd(self, cutoff: float = 10.0) -> None:
The cutoff distance for the intermolecular contacts.
"""
# Identify interface
ref_interface_resdic = self.identify_interface(self.reference, cutoff)
ref_interface_resdic = self.identify_interface(
self.reference,
cutoff,
keep_hetatm=self.keep_hetatm,
)
# Load interface coordinates

ref_int_coord_dic, _ = load_coords(
self.reference, self.atoms, ref_interface_resdic
self.reference,
self.atoms,
ref_interface_resdic,
keep_hetatm=self.keep_hetatm,
)
try:
mod_int_coord_dic, _ = load_coords(
Expand All @@ -403,6 +428,7 @@ def calc_ilrmsd(self, cutoff: float = 10.0) -> None:
ref_interface_resdic,
numbering_dic=self.model2ref_numbering,
model2ref_chain_dict=self.model2ref_chain_dict,
keep_hetatm=self.keep_hetatm,
)
except ALIGNError as alignerror:
log.warning(alignerror)
Expand Down Expand Up @@ -491,14 +517,19 @@ def calc_fnat(self, cutoff: float = 5.0) -> None:
cutoff : float
The cutoff distance for the intermolecular contacts.
"""
ref_contacts = load_contacts(self.reference, cutoff)
ref_contacts = load_contacts(
self.reference,
cutoff,
keep_hetatm=self.keep_hetatm,
)
if len(ref_contacts) != 0:
try:
model_contacts = load_contacts(
self.model,
cutoff,
numbering_dic=self.model2ref_numbering, # type: ignore
model2ref_chain_dict=self.model2ref_chain_dict, # type: ignore
keep_hetatm=self.keep_hetatm,
)
except ALIGNError as alignerror:
log.warning(alignerror)
Expand All @@ -511,14 +542,19 @@ def calc_fnat(self, cutoff: float = 5.0) -> None:
def calc_global_rmsd(self) -> None:
"""Calculate the full structure RMSD."""
# Load reference atomic coordinates
ref_coord_dic, _ = load_coords(self.reference, self.atoms)
ref_coord_dic, _ = load_coords(
self.reference,
self.atoms,
keep_hetatm=self.keep_hetatm,
)
# Load model atomic coordinates
try:
model_coord_dic, _ = load_coords(
self.model,
self.atoms,
numbering_dic=self.model2ref_numbering,
model2ref_chain_dict=self.model2ref_chain_dict,
keep_hetatm=self.keep_hetatm,
)
except ALIGNError as alignerror:
log.warning(alignerror)
Expand Down Expand Up @@ -688,6 +724,7 @@ def _load_atoms(
def identify_interface(
pdb_f: PDBPath,
cutoff: float = 5.0,
keep_hetatm: bool = False,
) -> dict[str, list[int]]:
"""Identify the interface.

Expand All @@ -697,6 +734,8 @@ def identify_interface(
PDB file of the model to have its atoms identified
cutoff : float, optional
Cutoff distance for the interface identification.
keep_hetatm : bool
Should HETATM coordinates be considered ? (default False)

Returns
-------
Expand All @@ -707,7 +746,7 @@ def identify_interface(
pdb_f = pdb_f.rel_path

interface_resdic: dict[str, list[int]] = {}
contacts = load_contacts(pdb_f, cutoff)
contacts = load_contacts(pdb_f, cutoff, keep_hetatm=keep_hetatm)

for contact in contacts:
first_chain, first_resid, sec_chain, sec_resid = contact
Expand Down
10 changes: 10 additions & 0 deletions src/haddock/modules/analysis/caprieval/defaults.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,13 @@ allatoms:
backbone atoms will be considered, otherwise all the heavy-atoms.
group: analysis
explevel: easy

keep_hetatm:
default: false
type: boolean
title: HETATM to be considered during the analysis.
short: HETATM to be considered during the analysis.
long: HETATM to be considered during the analysis. If false (default), only
ATOM coordinates lines will be used, otherwise the HETATM from reference will also be used.
group: analysis
explevel: easy
22 changes: 22 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,28 @@ def fixture_protlig_input_list():
yield [pdb_obj_1, pdb_obj_2]


@pytest.fixture(name="prot_HETATMlig_input_list")
def fixture_prot_HETATMlig_input_list():
"""Protein-Ligand with HETATM input."""
with (
tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") as tmp_1,
tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") as tmp_2,
):
dst_prot_1 = tmp_1.name
dst_prot_2 = tmp_2.name

src_prot_1 = Path(golden_data, "protlig_complex_1_HETATM.pdb")
src_prot_2 = Path(golden_data, "protlig_complex_2.pdb")

shutil.copy(src_prot_1, dst_prot_1)
shutil.copy(src_prot_2, dst_prot_2)

pdb_obj_1 = PDBFile(file_name=dst_prot_1, path=Path(dst_prot_1).parent)
pdb_obj_2 = PDBFile(file_name=dst_prot_2, path=Path(dst_prot_2).parent)

yield [pdb_obj_1, pdb_obj_2]


@pytest.fixture(name="protprot_onechain_list")
def fixture_protprot_onechain_list() -> Generator[list[PDBFile], Any, Any]:
"""Protein-Protein complex with a single chain ID."""
Expand Down
Loading