|
| 1 | +"""haddock3-restraints restrain_ligand subcommand. |
| 2 | +
|
| 3 | +Given an input PDB file and a residue name (ligands), the tool will create |
| 4 | +unambiguous restraints to keep this ligand in place during the refinements. |
| 5 | +
|
| 6 | +
|
| 7 | +Usage: |
| 8 | + haddock3-restraints restrain_ligand <pdb_file> <ligand_name> -r <radius> -d <deviation> -n <max_nb_restraints> |
| 9 | +
|
| 10 | +positional arguments: |
| 11 | + pdb_file Input PDB file. |
| 12 | + ligand_name Residue name. |
| 13 | +
|
| 14 | +options: |
| 15 | + -h, --help show this help message and exit |
| 16 | + -r RADIUS, --radius RADIUS |
| 17 | + Radius used for neighbors search around center of mass of ligand. |
| 18 | + (default: 10.0) |
| 19 | + -n MAX_RESTRAINTS, --max-restraints MAX_RESTRAINTS |
| 20 | + Maximum number of restraints to return. (default: 200) |
| 21 | + -d DEVIATION, --deviation DEVIATION |
| 22 | + Allowed deviation from actual distance. (default: 1.0) |
| 23 | +""" |
| 24 | + |
| 25 | +import os |
| 26 | +import sys |
| 27 | +import random |
| 28 | +from pathlib import Path |
| 29 | + |
| 30 | +import numpy as np |
| 31 | +from Bio.PDB import PDBParser |
| 32 | +from Bio.PDB import NeighborSearch |
| 33 | + |
| 34 | +from haddock.core.typing import Union |
| 35 | + |
| 36 | + |
| 37 | +def add_restrain_ligand_arguments(restrain_ligand_subcommand): |
| 38 | + """Add arguments to the random removal subcommand.""" |
| 39 | + restrain_ligand_subcommand.add_argument( |
| 40 | + "pdb_file", |
| 41 | + type=str, |
| 42 | + help="Input PDB file.", |
| 43 | + ) |
| 44 | + restrain_ligand_subcommand.add_argument( |
| 45 | + "ligand_name", |
| 46 | + type=str, |
| 47 | + help="Name of the residue for which restraints must be generated.", |
| 48 | + ) |
| 49 | + restrain_ligand_subcommand.add_argument( |
| 50 | + "-r", |
| 51 | + "--radius", |
| 52 | + help=( |
| 53 | + "Radius, in Angstrom, used for neighbors search around " |
| 54 | + "center of mass of ligand. (default: %(default)s)" |
| 55 | + ), |
| 56 | + required=False, |
| 57 | + default=10.0, |
| 58 | + type=float, |
| 59 | + ) |
| 60 | + restrain_ligand_subcommand.add_argument( |
| 61 | + "-d", |
| 62 | + "--deviation", |
| 63 | + help=( |
| 64 | + "Allowed deviation from actual distance, in Angstrom. " |
| 65 | + "(default: %(default)s)" |
| 66 | + ), |
| 67 | + required=False, |
| 68 | + default=1.0, |
| 69 | + type=float, |
| 70 | + ) |
| 71 | + restrain_ligand_subcommand.add_argument( |
| 72 | + "-n", |
| 73 | + "--max-restraints", |
| 74 | + help="Maximum number of restraints to return. (default: %(default)s)", |
| 75 | + required=False, |
| 76 | + default=200, |
| 77 | + type=int, |
| 78 | + ) |
| 79 | + return restrain_ligand_subcommand |
| 80 | + |
| 81 | + |
| 82 | +def restrain_ligand( |
| 83 | + pdbfile: Union[str, Path], |
| 84 | + ligand_name: str, |
| 85 | + radius: float = 10.0, |
| 86 | + deviation: float = 1.0, |
| 87 | + max_restraints: int = 20, |
| 88 | + ) -> str: |
| 89 | + """Generate unambiguous restraints to keep a ligand in place. |
| 90 | +
|
| 91 | + Parameters |
| 92 | + ---------- |
| 93 | + pdbfile : Union[str, Path] |
| 94 | + Path to a PDB file containing a ligand |
| 95 | + ligand_name : str |
| 96 | + Residue name of the ligand to work with |
| 97 | + radius : float, optional |
| 98 | + Radius used for neighbors search around center of mass of ligand, by default 10.0 |
| 99 | + deviation : float, optional |
| 100 | + Allowed deviation from actual distance, by default 1.0 |
| 101 | + max_restraints : int, optional |
| 102 | + Maximum number of restraints to return, by default 20 |
| 103 | +
|
| 104 | + Returns |
| 105 | + ------- |
| 106 | + unambig_str : str |
| 107 | + The actual unambiguous restraints as a string. |
| 108 | + """ |
| 109 | + # Read in structure |
| 110 | + pdb_parser = PDBParser(QUIET=1) |
| 111 | + structure = pdb_parser.get_structure("", pdbfile) |
| 112 | + |
| 113 | + # Remove hydrogens |
| 114 | + for atom in structure.get_atoms(): |
| 115 | + if atom.element == "H": |
| 116 | + res = atom.parent |
| 117 | + res.detach_child(atom.name) |
| 118 | + |
| 119 | + # Try to find the ligand in the structure |
| 120 | + ligand = None |
| 121 | + for residue in structure.get_residues(): |
| 122 | + if residue.resname.strip() == ligand_name.strip(): |
| 123 | + ligand = residue |
| 124 | + break |
| 125 | + # Case where the ligand is not found in the structure |
| 126 | + if not ligand: |
| 127 | + print(f"[!!] Ligand residue '{ligand_name}' not found in structure") |
| 128 | + sys.exit(1) |
| 129 | + |
| 130 | + # Calculate center of mass of the ligand |
| 131 | + ligand_com = list(map(lambda x: sum(x)/len(x), zip(*[at.coord for at in ligand]))) |
| 132 | + ligand_com = np.asarray(ligand_com, dtype=np.float32) |
| 133 | + |
| 134 | + # Create a selection of aminoacid/nucleotide atoms |
| 135 | + # (excl. waters, other ligands, etc) |
| 136 | + # also filters atoms that are from the queried ligand |
| 137 | + sel_atoms = [ |
| 138 | + at for at in structure.get_atoms() |
| 139 | + if at.parent.id[0] == ' ' and at.parent != ligand |
| 140 | + ] |
| 141 | + # Perfom neighbor search on this selection |
| 142 | + ns = NeighborSearch(sel_atoms) |
| 143 | + neighbors = ns.search(ligand_com, radius, level="R") # 10A radius, return residues |
| 144 | + |
| 145 | + # Calculate residue closer to each ligand atom and the respective distance |
| 146 | + ligand_atoms = ligand.child_list |
| 147 | + min_dist_list, _seen = [], set() |
| 148 | + |
| 149 | + # Loop over ligand atoms |
| 150 | + for l_atm in ligand_atoms: |
| 151 | + distances = [] |
| 152 | + # Loop over neighbors residues and atoms |
| 153 | + for residue_atoms in neighbors: |
| 154 | + for r_atm in residue_atoms: |
| 155 | + # Compute distance and hold it |
| 156 | + distances.append((r_atm, l_atm, r_atm - l_atm)) |
| 157 | + # Sort list by distances |
| 158 | + distances.sort(key=lambda x: x[-1]) |
| 159 | + # Loop over sorted distances |
| 160 | + for closest_candidate in distances: |
| 161 | + candidate_residue = closest_candidate[0].parent |
| 162 | + # One restraint per residue to keep the number of restraints small |
| 163 | + # If a residue is already used, take next one |
| 164 | + if candidate_residue not in _seen: |
| 165 | + min_dist_list.append(closest_candidate) |
| 166 | + _seen.add(candidate_residue) |
| 167 | + break |
| 168 | + |
| 169 | + # Output |
| 170 | + assign_str_template = ( |
| 171 | + "assign (segi {receptor_chainid:4s} and resi {receptor_resid:4d} " |
| 172 | + "and name {receptor_atname:6s}){linesep:s}" |
| 173 | + " (segi {ligand_chainid:4s} and resi {ligand_resid:4d} " |
| 174 | + "and name {ligand_atname:6s}) " |
| 175 | + "{distance:6.3f} {deviation:.2f} {deviation:.2f}{linesep:s}" |
| 176 | + ) |
| 177 | + |
| 178 | + _unambig_str_list = [ |
| 179 | + f"! Restraints to fix {ligand_name} in its initial position{os.linesep}" |
| 180 | + ] |
| 181 | + |
| 182 | + # Loop over all min distances |
| 183 | + for dist in min_dist_list: |
| 184 | + r_at, l_at, d = dist |
| 185 | + # Build assign statement |
| 186 | + assign_str = assign_str_template.format( |
| 187 | + receptor_chainid=r_at.parent.parent.id, |
| 188 | + receptor_resid=r_at.parent.id[1], |
| 189 | + receptor_atname='"' + r_at.name + '"', |
| 190 | + ligand_chainid=l_at.parent.parent.id, |
| 191 | + ligand_resid=l_at.parent.id[1], |
| 192 | + ligand_atname='"' + l_at.name + '"', |
| 193 | + distance=d, |
| 194 | + deviation=deviation, |
| 195 | + linesep=os.linesep, |
| 196 | + ) |
| 197 | + _unambig_str_list.append(assign_str) |
| 198 | + |
| 199 | + # Limit the number of restraints |
| 200 | + if max_restraints < len(_unambig_str_list): |
| 201 | + unambig_str_list = random.sample(_unambig_str_list, max_restraints) |
| 202 | + else: |
| 203 | + unambig_str_list = _unambig_str_list |
| 204 | + # Concatenate into a single string |
| 205 | + unambig_str = "".join(unambig_str_list) |
| 206 | + return unambig_str |
| 207 | + |
| 208 | + |
| 209 | +def main( |
| 210 | + pdb_file: Union[str, Path], |
| 211 | + ligand_name: str, |
| 212 | + radius: float = 10.0, |
| 213 | + deviation: float = 1.0, |
| 214 | + max_restraints: int = 20, |
| 215 | + ) -> None: |
| 216 | + """Simple wrapper of the restrain_ligand function.""" |
| 217 | + unambig_str = restrain_ligand( |
| 218 | + pdb_file, ligand_name, |
| 219 | + radius=radius, |
| 220 | + deviation=deviation, |
| 221 | + max_restraints=max_restraints, |
| 222 | + ) |
| 223 | + print(unambig_str) |
0 commit comments