haddocking · VGPReys · Aug 5, 2025 · May 28, 2025 · May 28, 2025 · May 29, 2025
diff --git a/src/haddock/libs/libalign.py b/src/haddock/libs/libalign.py
@@ -445,6 +445,7 @@ def load_coords(
     numbering_dic=None,
     model2ref_chain_dict=None,
     add_resname=None,
+    keep_hetatm: bool = False,
 ):
     """Load coordinates from PDB.
 
@@ -463,6 +464,9 @@ def load_coords(
 
     add_resname : bool
         use the residue name in the identifier
+
+    keep_hetatm : bool
+        Should HETATM lines be considered ?
 
     Returns
     -------
@@ -475,14 +479,19 @@ def load_coords(
     coord_dic: CoordsDict = {}
     chain_dic: ResDict = {}
     idx: int = 0
+    # Set types of coordinates lines to extract
+    if keep_hetatm:
+        coordinates_line_to_extract = ("ATOM", "HETATM")
+    else:
+        coordinates_line_to_extract = ("ATOM", )
     # Check filetype
     if isinstance(pdb_f, PDBFile):
         pdb_f = pdb_f.rel_path
     # Read file
     with open(pdb_f, "r") as fh:
         for line in fh.readlines():
             # Skip non ATOM records lines
-            if not line.startswith("ATOM"):
+            if not line.startswith(coordinates_line_to_extract):
                 continue
             # Extract PDB line data
             atom_name = line[slc_name].strip()

diff --git a/src/haddock/modules/analysis/caprieval/capri.py b/src/haddock/modules/analysis/caprieval/capri.py
@@ -112,6 +112,7 @@ def load_contacts(
     cutoff: float = 5.0,
     numbering_dic: Optional[dict[str, dict[int, int]]] = None,
     model2ref_chain_dict: Optional[dict[str, str]] = None,
+    keep_hetatm: bool = False,
 ) -> set[tuple]:
     """Load residue-based contacts.
 
@@ -121,6 +122,8 @@ def load_contacts(
         PDB file of the model to have its atoms identified
     cutoff : float, optional
         Cutoff distance for the interface identification.
+    keep_hetatm : bool
+        Should HETATM coordinates be considered ? (default False)
 
     Returns
     -------
@@ -136,6 +139,7 @@ def load_contacts(
         atoms,
         numbering_dic=numbering_dic,
         model2ref_chain_dict=model2ref_chain_dict,
+        keep_hetatm=keep_hetatm,
     )
     # create coordinate arrays
     coord_arrays: dict[str, NDFloat] = {}
@@ -217,6 +221,7 @@ def __init__(
         self.atoms = self._load_atoms(model, reference, full=self.allatoms)
         self.r_chain = params["receptor_chain"]
         self.l_chains = params["ligand_chains"]
+        self.keep_hetatm = params["keep_hetatm"]
         self.model2ref_numbering = None
         self.model2ref_chain_dict = None
         self.output_ss_fname = Path(f"capri_ss_{identificator}.tsv")
@@ -235,14 +240,21 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:
             The cutoff distance for the intermolecular contacts.
         """
         # Identify reference interface
-        ref_interface_resdic = self.identify_interface(self.reference, cutoff)
+        ref_interface_resdic = self.identify_interface(
+            self.reference,
+            cutoff,
+            keep_hetatm=self.keep_hetatm,
+            )
 
         if len(ref_interface_resdic) == 0:
             log.warning("No reference interface found")
         else:
             # Load interface coordinates
             ref_coord_dic, _ = load_coords(
-                self.reference, self.atoms, ref_interface_resdic
+                self.reference,
+                self.atoms,
+                ref_interface_resdic,
+                keep_hetatm=self.keep_hetatm,
             )
             try:
                 mod_coord_dic, _ = load_coords(
@@ -251,6 +263,7 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:
                     ref_interface_resdic,
                     numbering_dic=self.model2ref_numbering,
                     model2ref_chain_dict=self.model2ref_chain_dict,
+                    keep_hetatm=self.keep_hetatm,
                 )
             except ALIGNError as alignerror:
                 log.warning(alignerror)
@@ -285,13 +298,18 @@ def calc_irmsd(self, cutoff: float = 5.0) -> None:
 
     def calc_lrmsd(self) -> None:
         """Calculate the L-RMSD."""
-        ref_coord_dic, _ = load_coords(self.reference, self.atoms)
+        ref_coord_dic, _ = load_coords(
+            self.reference,
+            self.atoms,
+            keep_hetatm=self.keep_hetatm,
+            )
         try:
             mod_coord_dic, _ = load_coords(
                 self.model,
                 self.atoms,
                 numbering_dic=self.model2ref_numbering,
                 model2ref_chain_dict=self.model2ref_chain_dict,
+                keep_hetatm=self.keep_hetatm,
             )
         except ALIGNError as alignerror:
             log.warning(alignerror)
@@ -390,11 +408,18 @@ def calc_ilrmsd(self, cutoff: float = 10.0) -> None:
             The cutoff distance for the intermolecular contacts.
         """
         # Identify interface
-        ref_interface_resdic = self.identify_interface(self.reference, cutoff)
+        ref_interface_resdic = self.identify_interface(
+            self.reference,
+            cutoff,
+            keep_hetatm=self.keep_hetatm,
+            )
         # Load interface coordinates
 
         ref_int_coord_dic, _ = load_coords(
-            self.reference, self.atoms, ref_interface_resdic
+            self.reference,
+            self.atoms,
+            ref_interface_resdic,
+            keep_hetatm=self.keep_hetatm,
         )
         try:
             mod_int_coord_dic, _ = load_coords(
@@ -403,6 +428,7 @@ def calc_ilrmsd(self, cutoff: float = 10.0) -> None:
                 ref_interface_resdic,
                 numbering_dic=self.model2ref_numbering,
                 model2ref_chain_dict=self.model2ref_chain_dict,
+                keep_hetatm=self.keep_hetatm,
             )
         except ALIGNError as alignerror:
             log.warning(alignerror)
@@ -491,14 +517,19 @@ def calc_fnat(self, cutoff: float = 5.0) -> None:
         cutoff : float
             The cutoff distance for the intermolecular contacts.
         """
-        ref_contacts = load_contacts(self.reference, cutoff)
+        ref_contacts = load_contacts(
+            self.reference,
+            cutoff,
+            keep_hetatm=self.keep_hetatm,
+            )
         if len(ref_contacts) != 0:
             try:
                 model_contacts = load_contacts(
                     self.model,
                     cutoff,
                     numbering_dic=self.model2ref_numbering,  # type: ignore
                     model2ref_chain_dict=self.model2ref_chain_dict,  # type: ignore
+                    keep_hetatm=self.keep_hetatm,
                 )
             except ALIGNError as alignerror:
                 log.warning(alignerror)
@@ -511,14 +542,19 @@ def calc_fnat(self, cutoff: float = 5.0) -> None:
     def calc_global_rmsd(self) -> None:
         """Calculate the full structure RMSD."""
         # Load reference atomic coordinates
-        ref_coord_dic, _ = load_coords(self.reference, self.atoms)
+        ref_coord_dic, _ = load_coords(
+            self.reference,
+            self.atoms,
+            keep_hetatm=self.keep_hetatm,
+            )
         # Load model atomic coordinates
         try:
             model_coord_dic, _ = load_coords(
                 self.model,
                 self.atoms,
                 numbering_dic=self.model2ref_numbering,
                 model2ref_chain_dict=self.model2ref_chain_dict,
+                keep_hetatm=self.keep_hetatm,
             )
         except ALIGNError as alignerror:
             log.warning(alignerror)
@@ -688,6 +724,7 @@ def _load_atoms(
     def identify_interface(
         pdb_f: PDBPath,
         cutoff: float = 5.0,
+        keep_hetatm: bool = False,
     ) -> dict[str, list[int]]:
         """Identify the interface.
 
@@ -697,6 +734,8 @@ def identify_interface(
             PDB file of the model to have its atoms identified
         cutoff : float, optional
             Cutoff distance for the interface identification.
+        keep_hetatm : bool
+            Should HETATM coordinates be considered ? (default False)
 
         Returns
         -------
@@ -707,7 +746,7 @@ def identify_interface(
             pdb_f = pdb_f.rel_path
 
         interface_resdic: dict[str, list[int]] = {}
-        contacts = load_contacts(pdb_f, cutoff)
+        contacts = load_contacts(pdb_f, cutoff, keep_hetatm=keep_hetatm)
 
         for contact in contacts:
             first_chain, first_resid, sec_chain, sec_resid = contact

diff --git a/src/haddock/modules/analysis/caprieval/defaults.yaml b/src/haddock/modules/analysis/caprieval/defaults.yaml
@@ -197,3 +197,13 @@ allatoms:
         backbone atoms will be considered, otherwise all the heavy-atoms.
   group: analysis
   explevel: easy
+
+keep_hetatm:
+  default: false
+  type: boolean
+  title: HETATM to be considered during the analysis.
+  short: HETATM to be considered during the analysis.
+  long: HETATM to be considered during the analysis. If false (default), only
+        ATOM coordinates lines will be used, otherwise the HETATM from reference will also be used.
+  group: analysis
+  explevel: easy
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -103,6 +103,28 @@ def fixture_protlig_input_list():
         yield [pdb_obj_1, pdb_obj_2]
 
 
+@pytest.fixture(name="prot_HETATMlig_input_list")
+def fixture_prot_HETATMlig_input_list():
+    """Protein-Ligand with HETATM input."""
+    with (
+        tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") as tmp_1,
+        tempfile.NamedTemporaryFile(delete=False, suffix=".pdb") as tmp_2,
+    ):
+        dst_prot_1 = tmp_1.name
+        dst_prot_2 = tmp_2.name
+
+        src_prot_1 = Path(golden_data, "protlig_complex_1_HETATM.pdb")
+        src_prot_2 = Path(golden_data, "protlig_complex_2.pdb")
+
+        shutil.copy(src_prot_1, dst_prot_1)
+        shutil.copy(src_prot_2, dst_prot_2)
+
+        pdb_obj_1 = PDBFile(file_name=dst_prot_1, path=Path(dst_prot_1).parent)
+        pdb_obj_2 = PDBFile(file_name=dst_prot_2, path=Path(dst_prot_2).parent)
+
+        yield [pdb_obj_1, pdb_obj_2]
+
+
 @pytest.fixture(name="protprot_onechain_list")
 def fixture_protprot_onechain_list() -> Generator[list[PDBFile], Any, Any]:
     """Protein-Protein complex with a single chain ID."""