From 74ac0202b57c35c25629f018e7a0c45bff3e8953 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 11 Jun 2025 12:12:55 +0200 Subject: [PATCH 1/4] adding new restrain_ligand subcommand to cli restraints --- src/haddock/clis/cli_restraints.py | 13 +- .../clis/restraints/restrain_ligand.py | 213 ++++++++++++++++++ 2 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 src/haddock/clis/restraints/restrain_ligand.py diff --git a/src/haddock/clis/cli_restraints.py b/src/haddock/clis/cli_restraints.py index 0848024ef1..30807a7717 100644 --- a/src/haddock/clis/cli_restraints.py +++ b/src/haddock/clis/cli_restraints.py @@ -50,7 +50,10 @@ add_rand_removal_arguments, main as random_removal, ) - +from haddock.clis.restraints.restrain_ligand import ( + add_restrain_ligand_arguments, + main as restrain_ligand, + ) # Command line interface parser ap = argparse.ArgumentParser( @@ -108,6 +111,14 @@ restraints_random_removal_subcommand ) +# restrain_ligand subcommand +restraints_restrain_ligand_subcommand = subparsers.add_parser("restrain_ligand") +restraints_restrain_ligand_subcommand.set_defaults(func=restrain_ligand) +restraints_restrain_ligand_subcommand = add_restrain_ligand_arguments( + restraints_restrain_ligand_subcommand +) + + def _ap(): return ap diff --git a/src/haddock/clis/restraints/restrain_ligand.py b/src/haddock/clis/restraints/restrain_ligand.py new file mode 100644 index 0000000000..0bdf13071e --- /dev/null +++ b/src/haddock/clis/restraints/restrain_ligand.py @@ -0,0 +1,213 @@ +"""haddock3-restraints restrain_ligand subcommand. + +Given an input PDB file and a residue name (ligands), the tool will create +unambiguous restraints to keep this ligand in place during the refinements. + + +Usage: + haddock3-restraints restrain_ligand -r -d -n + +positional arguments: + pdb_file Input PDB file. + ligand_name Residue name. + +options: + -h, --help show this help message and exit + -r RADIUS, --radius RADIUS + Radius used for neighbors search around center of mass of ligand. + (default: 10.0) + -n MAX_RESTRAINTS, --max-restraints MAX_RESTRAINTS + Maximum number of restraints to return. (default: 200) + -d DEVIATION, --deviation DEVIATION + Allowed deviation from actual distance. (default: 1.0) +""" + +import os +import sys +import random +from pathlib import Path + +import numpy as np +from Bio.PDB import PDBParser +from Bio.PDB import NeighborSearch + +from haddock.core.typing import Union + + +def add_restrain_ligand_arguments(restrain_ligand_subcommand): + """Add arguments to the random removal subcommand.""" + restrain_ligand_subcommand.add_argument( + "pdb_file", + type=str, + help="Input PDB file.", + ) + restrain_ligand_subcommand.add_argument( + "ligand_name", + type=str, + help="Name of the residue for which restraints must be generated.", + ) + restrain_ligand_subcommand.add_argument( + "-r", + "--radius", + help=( + "Radius used for neighbors search around " + "center of mass of ligand. (default: %(default)s)" + ), + required=False, + default=10.0, + type=float, + ) + restrain_ligand_subcommand.add_argument( + "-d", + "--deviation", + help="Allowed deviation from actual distance. (default: %(default)s)", + required=False, + default=1.0, + type=float, + ) + restrain_ligand_subcommand.add_argument( + "-n", + "--max-restraints", + help="Maximum number of restraints to return. (default: %(default)s)", + required=False, + default=200, + ) + return restrain_ligand_subcommand + + +def restrain_ligand( + pdbfile: Union[str, Path], + ligand_name: str, + radius: float = 10.0, + deviation: float = 1.0, + max_restraints: int = 20, + ) -> str: + """Generate unambiguous restraints to keep a ligand in place. + + Parameters + ---------- + pdbfile : Union[str, Path] + Path to a PDB file containing a ligand + ligand_name : str + Residue name of the ligand to work with + radius : float, optional + Radius used for neighbors search around center of mass of ligand, by default 10.0 + deviation : float, optional + Allowed deviation from actual distance, by default 1.0 + max_restraints : int, optional + Maximum number of restraints to return, by default 20 + + Returns + ------- + unambig_str : str + The actual unambiguous restraints as a string. + """ + # Read in structure + pdb_parser = PDBParser(QUIET=1) + structure = pdb_parser.get_structure("", pdbfile) + + # Remove hydrogens + atom_lst = list(structure.get_atoms()) + for atom in atom_lst: + if atom.element == 'H': + res = atom.parent + res.detach_child(atom.name) + + # Try to find the ligand in the structure + ligand = None + for residue in structure.get_residues(): + if residue.resname.strip() == ligand_name.strip(): + ligand = residue + break + + if not ligand: + print(f"[!!] Ligand residue '{ligand_name}' not found in structure") + sys.exit(1) + + # Calculate center of mass of the ligand + ligand_com = list(map(lambda x: sum(x)/len(x), zip(*[at.coord for at in ligand]))) + ligand_com = np.asarray(ligand_com, dtype=np.float32) + + # Calculate neighbors considering only aminoacid/nucleotide atoms (excl. waters, other ligands, etc) + sel_atoms = [ + at for at in structure.get_atoms() + if at.parent.id[0] == ' ' and at.parent != ligand + ] + ns = NeighborSearch(sel_atoms) + neighbors = ns.search(ligand_com, radius, level="R") # 10A radius, return residues + + # Calculate residue closer to each ligand atom and the respective distance + ligand_atoms = ligand.child_list + min_dist_list, _seen = [], set() + for l_atm in ligand_atoms: + distances = [] + for residue_atoms in neighbors: + for r_atm in residue_atoms: + distances.append((r_atm, l_atm, r_atm - l_atm)) + # Sort list by distances + distances.sort(key=lambda x: x[-1]) + # Point first entry (shortest distance) + for ind in range(len(distances)): + closest_candidate = distances[ind] + candidate_residue = closest_candidate[0].parent + # One restraint per residue to keep the number of restraints small + if candidate_residue not in _seen: + min_dist_list.append(closest_candidate) + _seen.add(candidate_residue) + break + + # Output + assign_str_template = ( + "assign (segi {receptor_chainid:4s} and resi {receptor_resid:4d} " + "and name {receptor_atname:6s}){linesep:s}" + " (segi {ligand_chainid:4s} and resi {ligand_resid:4d} " + "and name {ligand_atname:6s}) " + "{distance:6.3f} {deviation:.2f} {deviation:.2f}{linesep:s}" + ) + + _unambig_str_list = [ + f"! Restraints to fix {ligand_name} in its initial position{os.linesep}" + ] + + # Loop over all min distances + for dist in min_dist_list: + r_at, l_at, d = dist + # Build assign statement + assign_str = assign_str_template.format( + receptor_chainid=r_at.parent.parent.id, + receptor_resid=r_at.parent.id[1], + receptor_atname='"' + r_at.name + '"', + ligand_chainid=l_at.parent.parent.id, + ligand_resid=l_at.parent.id[1], + ligand_atname='"' + l_at.name + '"', + distance=d, + deviation=deviation, + linesep=os.linesep, + ) + _unambig_str_list.append(assign_str) + + # Limit the number of restraints + if max_restraints < len(_unambig_str_list): + unambig_str_list = random.sample(_unambig_str_list, max_restraints) + else: + unambig_str_list = _unambig_str_list + + unambig_str = "".join(unambig_str_list) + return unambig_str + + +def main( + pdb_file: Union[str, Path], + ligand_name: str, + radius: float = 10.0, + deviation: float = 1.0, + max_restraints: int = 20, + ) -> None: + """Simple wrapper of the restrain_ligand function.""" + unambig_str = restrain_ligand( + pdb_file, ligand_name, + radius=radius, + deviation=deviation, + max_restraints=max_restraints, + ) + print(unambig_str) From 57fcc7eab9e2b16786ae6fb586300b00ba59fbe5 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 11 Jun 2025 12:13:18 +0200 Subject: [PATCH 2/4] testing new subcommand --- tests/test_cli_restraints.py | 44 ++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/test_cli_restraints.py b/tests/test_cli_restraints.py index d15b3c3430..a679a09c02 100644 --- a/tests/test_cli_restraints.py +++ b/tests/test_cli_restraints.py @@ -18,6 +18,7 @@ from haddock.clis.restraints.restrain_bodies import restrain_bodies from haddock.clis.restraints.validate_tbl import validate_tbl from haddock.clis.restraints.random_removal import random_removal +from haddock.clis.restraints.restrain_ligand import restrain_ligand from haddock.clis.restraints.z_surface_restraints import ( compute_barycenter, get_z_coords, @@ -60,6 +61,12 @@ def example_pdb_file(): return Path(golden_data, "protein.pdb") +@pytest.fixture +def example_liganded_pdbfile(): + """Provide example pdb file containing a ligand.""" + return Path(golden_data, "protlig_complex_1.pdb") + + def test_parse_actpass_file(example_actpass_file): exp_active = [97, 98, 115, 116, 117] exp_passive = [91, 93, 95, 118, 119, 120] @@ -363,3 +370,40 @@ def test_random_removal_seed(example_tbl_file): # Check that 2 and 3 are different ! assert rd_rm_tbl2 != rd_rm_tbl3 assert rd_rm_tbl1 != rd_rm_tbl3 + + +def test_restrain_ligand(example_liganded_pdbfile): + """Tests related to the restrain_ligand subcommand.""" + ligand_restraints_default = restrain_ligand(example_liganded_pdbfile, "G39") + assert ligand_restraints_default is not None + assert ligand_restraints_default != "" + + # Reducing the distance deviation + ligand_restraints_small_devi = restrain_ligand( + example_liganded_pdbfile, "G39", + deviation=0.5, + ) + assert ligand_restraints_small_devi is not None + assert ligand_restraints_small_devi != "" + assert ligand_restraints_default != ligand_restraints_small_devi + + # Reducing the radius + ligand_restraints_small_radius = restrain_ligand( + example_liganded_pdbfile, "G39", + radius=2.0, + ) + assert ligand_restraints_small_radius is not None + assert ligand_restraints_small_radius != "" + assert ligand_restraints_default != ligand_restraints_small_radius + assert ligand_restraints_small_radius.count("assi") <= ligand_restraints_default.count("assi") + + # Reducing the number of restaints + ligand_restraints_low_nb_restraints = restrain_ligand( + example_liganded_pdbfile, "G39", + max_restraints=2, + ) + assert ligand_restraints_low_nb_restraints is not None + assert ligand_restraints_low_nb_restraints != "" + assert ligand_restraints_default != ligand_restraints_low_nb_restraints + assert ligand_restraints_low_nb_restraints.count("assi") <= ligand_restraints_default.count("assi") + assert ligand_restraints_low_nb_restraints.count("assi") == 2 From d9353ada35844a08bc14bdd9c62d560ad0489439 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Wed, 11 Jun 2025 12:32:28 +0200 Subject: [PATCH 3/4] fine tuning + code comments --- .../clis/restraints/restrain_ligand.py | 32 ++++++++++++------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/src/haddock/clis/restraints/restrain_ligand.py b/src/haddock/clis/restraints/restrain_ligand.py index 0bdf13071e..72abf3839d 100644 --- a/src/haddock/clis/restraints/restrain_ligand.py +++ b/src/haddock/clis/restraints/restrain_ligand.py @@ -50,7 +50,7 @@ def add_restrain_ligand_arguments(restrain_ligand_subcommand): "-r", "--radius", help=( - "Radius used for neighbors search around " + "Radius, in Angstrom, used for neighbors search around " "center of mass of ligand. (default: %(default)s)" ), required=False, @@ -60,7 +60,10 @@ def add_restrain_ligand_arguments(restrain_ligand_subcommand): restrain_ligand_subcommand.add_argument( "-d", "--deviation", - help="Allowed deviation from actual distance. (default: %(default)s)", + help=( + "Allowed deviation from actual distance, in Angstrom. " + "(default: %(default)s)" + ), required=False, default=1.0, type=float, @@ -71,6 +74,7 @@ def add_restrain_ligand_arguments(restrain_ligand_subcommand): help="Maximum number of restraints to return. (default: %(default)s)", required=False, default=200, + type=int, ) return restrain_ligand_subcommand @@ -107,9 +111,8 @@ def restrain_ligand( structure = pdb_parser.get_structure("", pdbfile) # Remove hydrogens - atom_lst = list(structure.get_atoms()) - for atom in atom_lst: - if atom.element == 'H': + for atom in structure.get_atoms(): + if atom.element == "H": res = atom.parent res.detach_child(atom.name) @@ -119,7 +122,7 @@ def restrain_ligand( if residue.resname.strip() == ligand_name.strip(): ligand = residue break - + # Case where the ligand is not found in the structure if not ligand: print(f"[!!] Ligand residue '{ligand_name}' not found in structure") sys.exit(1) @@ -128,29 +131,36 @@ def restrain_ligand( ligand_com = list(map(lambda x: sum(x)/len(x), zip(*[at.coord for at in ligand]))) ligand_com = np.asarray(ligand_com, dtype=np.float32) - # Calculate neighbors considering only aminoacid/nucleotide atoms (excl. waters, other ligands, etc) + # Create a selection of aminoacid/nucleotide atoms + # (excl. waters, other ligands, etc) + # also filters atoms that are from the queried ligand sel_atoms = [ at for at in structure.get_atoms() if at.parent.id[0] == ' ' and at.parent != ligand ] + # Perfom neighbor search on this selection ns = NeighborSearch(sel_atoms) neighbors = ns.search(ligand_com, radius, level="R") # 10A radius, return residues # Calculate residue closer to each ligand atom and the respective distance ligand_atoms = ligand.child_list min_dist_list, _seen = [], set() + + # Loop over ligand atoms for l_atm in ligand_atoms: distances = [] + # Loop over neighbors residues and atoms for residue_atoms in neighbors: for r_atm in residue_atoms: + # Compute distance and hold it distances.append((r_atm, l_atm, r_atm - l_atm)) # Sort list by distances distances.sort(key=lambda x: x[-1]) - # Point first entry (shortest distance) - for ind in range(len(distances)): - closest_candidate = distances[ind] + # Loop over sorted distances + for closest_candidate in distances: candidate_residue = closest_candidate[0].parent # One restraint per residue to keep the number of restraints small + # If a residue is already used, take next one if candidate_residue not in _seen: min_dist_list.append(closest_candidate) _seen.add(candidate_residue) @@ -191,7 +201,7 @@ def restrain_ligand( unambig_str_list = random.sample(_unambig_str_list, max_restraints) else: unambig_str_list = _unambig_str_list - + # Concatenate into a single string unambig_str = "".join(unambig_str_list) return unambig_str From fca2c5898a1326cae1b7e3627d56d5cda0bdb560 Mon Sep 17 00:00:00 2001 From: VGPReys Date: Thu, 12 Jun 2025 15:00:19 +0200 Subject: [PATCH 4/4] updating changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c728742e09..fea3338447 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ # Changelog +- 2025-05-XX: Added new random_removal sub-command in the haddock3-restraints CLI - Issue #1240 - 2025-06-05: Added support for pyroglutamic acid (PCA) - Issue #1228 - 2025-06-06: Added selection of Nter, Cter and 5'end states at topology generation - Issue #1269 +- 2025-12-06: Added new restrain_ligand sub-command in the haddock3-restraints CLI - Issue #1299