Skip to content

Commit 26f83e6

Browse files
authored
Merge pull request #1279 from haddocking/caprieval-hetatm
adding support for HETATM in caprieval
2 parents 6912875 + 4e1fcfa commit 26f83e6

File tree

6 files changed

+6090
-11
lines changed

6 files changed

+6090
-11
lines changed

src/haddock/libs/libalign.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,7 @@ def load_coords(
445445
numbering_dic=None,
446446
model2ref_chain_dict=None,
447447
add_resname=None,
448+
keep_hetatm: bool = False,
448449
):
449450
"""Load coordinates from PDB.
450451
@@ -463,6 +464,9 @@ def load_coords(
463464
464465
add_resname : bool
465466
use the residue name in the identifier
467+
468+
keep_hetatm : bool
469+
Should HETATM lines be considered ?
466470
467471
Returns
468472
-------
@@ -475,14 +479,19 @@ def load_coords(
475479
coord_dic: CoordsDict = {}
476480
chain_dic: ResDict = {}
477481
idx: int = 0
482+
# Set types of coordinates lines to extract
483+
if keep_hetatm:
484+
coordinates_line_to_extract = ("ATOM", "HETATM")
485+
else:
486+
coordinates_line_to_extract = ("ATOM", )
478487
# Check filetype
479488
if isinstance(pdb_f, PDBFile):
480489
pdb_f = pdb_f.rel_path
481490
# Read file
482491
with open(pdb_f, "r") as fh:
483492
for line in fh.readlines():
484493
# Skip non ATOM records lines
485-
if not line.startswith("ATOM"):
494+
if not line.startswith(coordinates_line_to_extract):
486495
continue
487496
# Extract PDB line data
488497
atom_name = line[slc_name].strip()

src/haddock/modules/analysis/caprieval/capri.py

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def load_contacts(
112112
cutoff: float = 5.0,
113113
numbering_dic: Optional[dict[str, dict[int, int]]] = None,
114114
model2ref_chain_dict: Optional[dict[str, str]] = None,
115+
keep_hetatm: bool = False,
115116
) -> set[tuple]:
116117
"""Load residue-based contacts.
117118
@@ -121,6 +122,8 @@ def load_contacts(
121122
PDB file of the model to have its atoms identified
122123
cutoff : float, optional
123124
Cutoff distance for the interface identification.
125+
keep_hetatm : bool
126+
Should HETATM coordinates be considered ? (default False)
124127
125128
Returns
126129
-------
@@ -136,6 +139,7 @@ def load_contacts(
136139
atoms,
137140
numbering_dic=numbering_dic,
138141
model2ref_chain_dict=model2ref_chain_dict,
142+
keep_hetatm=keep_hetatm,
139143
)
140144
# create coordinate arrays
141145
coord_arrays: dict[str, NDFloat] = {}
@@ -217,6 +221,7 @@ def __init__(
217221
self.atoms = self._load_atoms(model, reference, full=self.allatoms)
218222
self.r_chain = params["receptor_chain"]
219223
self.l_chains = params["ligand_chains"]
224+
self.keep_hetatm = params["keep_hetatm"]
220225
self.model2ref_numbering = None
221226
self.model2ref_chain_dict = None
222227
self.output_ss_fname = Path(f"capri_ss_{identificator}.tsv")
@@ -235,14 +240,21 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:
235240
The cutoff distance for the intermolecular contacts.
236241
"""
237242
# Identify reference interface
238-
ref_interface_resdic = self.identify_interface(self.reference, cutoff)
243+
ref_interface_resdic = self.identify_interface(
244+
self.reference,
245+
cutoff,
246+
keep_hetatm=self.keep_hetatm,
247+
)
239248

240249
if len(ref_interface_resdic) == 0:
241250
log.warning("No reference interface found")
242251
else:
243252
# Load interface coordinates
244253
ref_coord_dic, _ = load_coords(
245-
self.reference, self.atoms, ref_interface_resdic
254+
self.reference,
255+
self.atoms,
256+
ref_interface_resdic,
257+
keep_hetatm=self.keep_hetatm,
246258
)
247259
try:
248260
mod_coord_dic, _ = load_coords(
@@ -251,6 +263,7 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:
251263
ref_interface_resdic,
252264
numbering_dic=self.model2ref_numbering,
253265
model2ref_chain_dict=self.model2ref_chain_dict,
266+
keep_hetatm=self.keep_hetatm,
254267
)
255268
except ALIGNError as alignerror:
256269
log.warning(alignerror)
@@ -285,13 +298,18 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:
285298

286299
def calc_lrmsd(self) -> None:
287300
"""Calculate the L-RMSD."""
288-
ref_coord_dic, _ = load_coords(self.reference, self.atoms)
301+
ref_coord_dic, _ = load_coords(
302+
self.reference,
303+
self.atoms,
304+
keep_hetatm=self.keep_hetatm,
305+
)
289306
try:
290307
mod_coord_dic, _ = load_coords(
291308
self.model,
292309
self.atoms,
293310
numbering_dic=self.model2ref_numbering,
294311
model2ref_chain_dict=self.model2ref_chain_dict,
312+
keep_hetatm=self.keep_hetatm,
295313
)
296314
except ALIGNError as alignerror:
297315
log.warning(alignerror)
@@ -390,11 +408,18 @@ def calc_ilrmsd(self, cutoff: float = 10.0) -> None:
390408
The cutoff distance for the intermolecular contacts.
391409
"""
392410
# Identify interface
393-
ref_interface_resdic = self.identify_interface(self.reference, cutoff)
411+
ref_interface_resdic = self.identify_interface(
412+
self.reference,
413+
cutoff,
414+
keep_hetatm=self.keep_hetatm,
415+
)
394416
# Load interface coordinates
395417

396418
ref_int_coord_dic, _ = load_coords(
397-
self.reference, self.atoms, ref_interface_resdic
419+
self.reference,
420+
self.atoms,
421+
ref_interface_resdic,
422+
keep_hetatm=self.keep_hetatm,
398423
)
399424
try:
400425
mod_int_coord_dic, _ = load_coords(
@@ -403,6 +428,7 @@ def calc_ilrmsd(self, cutoff: float = 10.0) -> None:
403428
ref_interface_resdic,
404429
numbering_dic=self.model2ref_numbering,
405430
model2ref_chain_dict=self.model2ref_chain_dict,
431+
keep_hetatm=self.keep_hetatm,
406432
)
407433
except ALIGNError as alignerror:
408434
log.warning(alignerror)
@@ -491,14 +517,19 @@ def calc_fnat(self, cutoff: float = 5.0) -> None:
491517
cutoff : float
492518
The cutoff distance for the intermolecular contacts.
493519
"""
494-
ref_contacts = load_contacts(self.reference, cutoff)
520+
ref_contacts = load_contacts(
521+
self.reference,
522+
cutoff,
523+
keep_hetatm=self.keep_hetatm,
524+
)
495525
if len(ref_contacts) != 0:
496526
try:
497527
model_contacts = load_contacts(
498528
self.model,
499529
cutoff,
500530
numbering_dic=self.model2ref_numbering, # type: ignore
501531
model2ref_chain_dict=self.model2ref_chain_dict, # type: ignore
532+
keep_hetatm=self.keep_hetatm,
502533
)
503534
except ALIGNError as alignerror:
504535
log.warning(alignerror)
@@ -511,14 +542,19 @@ def calc_fnat(self, cutoff: float = 5.0) -> None:
511542
def calc_global_rmsd(self) -> None:
512543
"""Calculate the full structure RMSD."""
513544
# Load reference atomic coordinates
514-
ref_coord_dic, _ = load_coords(self.reference, self.atoms)
545+
ref_coord_dic, _ = load_coords(
546+
self.reference,
547+
self.atoms,
548+
keep_hetatm=self.keep_hetatm,
549+
)
515550
# Load model atomic coordinates
516551
try:
517552
model_coord_dic, _ = load_coords(
518553
self.model,
519554
self.atoms,
520555
numbering_dic=self.model2ref_numbering,
521556
model2ref_chain_dict=self.model2ref_chain_dict,
557+
keep_hetatm=self.keep_hetatm,
522558
)
523559
except ALIGNError as alignerror:
524560
log.warning(alignerror)
@@ -688,6 +724,7 @@ def _load_atoms(
688724
def identify_interface(
689725
pdb_f: PDBPath,
690726
cutoff: float = 5.0,
727+
keep_hetatm: bool = False,
691728
) -> dict[str, list[int]]:
692729
"""Identify the interface.
693730
@@ -697,6 +734,8 @@ def identify_interface(
697734
PDB file of the model to have its atoms identified
698735
cutoff : float, optional
699736
Cutoff distance for the interface identification.
737+
keep_hetatm : bool
738+
Should HETATM coordinates be considered ? (default False)
700739
701740
Returns
702741
-------
@@ -707,7 +746,7 @@ def identify_interface(
707746
pdb_f = pdb_f.rel_path
708747

709748
interface_resdic: dict[str, list[int]] = {}
710-
contacts = load_contacts(pdb_f, cutoff)
749+
contacts = load_contacts(pdb_f, cutoff, keep_hetatm=keep_hetatm)
711750

712751
for contact in contacts:
713752
first_chain, first_resid, sec_chain, sec_resid = contact

src/haddock/modules/analysis/caprieval/defaults.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,3 +197,13 @@ allatoms:
197197
backbone atoms will be considered, otherwise all the heavy-atoms.
198198
group: analysis
199199
explevel: easy
200+
201+
keep_hetatm:
202+
default: false
203+
type: boolean
204+
title: HETATM to be considered during the analysis.
205+
short: HETATM to be considered during the analysis.
206+
long: HETATM to be considered during the analysis. If false (default), only
207+
ATOM coordinates lines will be used, otherwise the HETATM from reference will also be used.
208+
group: analysis
209+
explevel: easy

tests/conftest.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,28 @@ def fixture_protlig_input_list():
103103
yield [pdb_obj_1, pdb_obj_2]
104104

105105

106+
@pytest.fixture(name="prot_HETATMlig_input_list")
107+
def fixture_prot_HETATMlig_input_list():
108+
"""Protein-Ligand with HETATM input."""
109+
with (
110+
tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") as tmp_1,
111+
tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") as tmp_2,
112+
):
113+
dst_prot_1 = tmp_1.name
114+
dst_prot_2 = tmp_2.name
115+
116+
src_prot_1 = Path(golden_data, "protlig_complex_1_HETATM.pdb")
117+
src_prot_2 = Path(golden_data, "protlig_complex_2.pdb")
118+
119+
shutil.copy(src_prot_1, dst_prot_1)
120+
shutil.copy(src_prot_2, dst_prot_2)
121+
122+
pdb_obj_1 = PDBFile(file_name=dst_prot_1, path=Path(dst_prot_1).parent)
123+
pdb_obj_2 = PDBFile(file_name=dst_prot_2, path=Path(dst_prot_2).parent)
124+
125+
yield [pdb_obj_1, pdb_obj_2]
126+
127+
106128
@pytest.fixture(name="protprot_onechain_list")
107129
def fixture_protprot_onechain_list() -> Generator[list[PDBFile], Any, Any]:
108130
"""Protein-Protein complex with a single chain ID."""

0 commit comments

Comments
 (0)