diff --git a/package/CHANGELOG b/package/CHANGELOG index 0064a159b3..43dc28ba29 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -18,6 +18,7 @@ The rules for this file: yuxuanzhuang, yuyuan871111, tanishy7777, tulga-rdn + * 2.10.0 Fixes @@ -48,6 +49,8 @@ Enhancements (Issue #4948 #2874, PR #4965) * Enables parallelization for analysis.lineardensity.LinearDensity (Issue #4678, PR #5007) + * Improves performance of `between` keyword for HydrogenBondAnalysis. + (Issue #4130, PR #5029) Changes * Removed undocumented and unused attribute diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py index 544bd6e4e1..2ea8073cdb 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py @@ -239,7 +239,7 @@ import logging import warnings from collections.abc import Iterable - +from collections import defaultdict import numpy as np from ..base import AnalysisBase, Results, ResultsGroup @@ -679,90 +679,136 @@ def _get_dh_pairs(self): return donors, hydrogens - def _filter_atoms(self, donors, acceptors): - """Create a mask to filter donor, hydrogen and acceptor atoms. - - This can be used to consider only hydrogen bonds between two or more - specified groups. - - Groups are specified with the `between` keyword when creating the - HydrogenBondAnalysis object. - - Returns - ------- - mask: np.ndarray - - - .. versionchanged:: 2.5.0 - Change return value to a mask instead of separate AtomGroups. -`` - """ - - mask = np.full(donors.n_atoms, fill_value=False) - for group1, group2 in self.between_ags: - - # Find donors in G1 and acceptors in G2 - mask[ - np.logical_and( - np.isin(donors.indices, group1.indices), - np.isin(acceptors.indices, group2.indices) - ) - ] = True - - # Find acceptors in G1 and donors in G2 - mask[ - np.logical_and( - np.isin(acceptors.indices, group1.indices), - np.isin(donors.indices, group2.indices) - ) - ] = True - - return mask - - def _prepare(self): self.results.hbonds = [[], [], [], [], [], []] - - + def _single_frame(self): - box = self._ts.dimensions # Update donor-hydrogen pairs if necessary if self.update_selections: self._donors, self._hydrogens = self._get_dh_pairs() - # find D and A within cutoff distance of one another - # min_cutoff = 1.0 as an atom cannot form a hydrogen bond with itself - d_a_indices, d_a_distances = capped_distance( - self._donors.positions, - self._acceptors.positions, - max_cutoff=self.d_a_cutoff, - min_cutoff=1.0, - box=box, - return_distances=True, - ) + if self.between_ags is not None: + # map donor indices to respective hydrogen indices + donor_idx_to_H_indices = defaultdict(list) + + for d, h in zip(self._donors, self._hydrogens): + donor_idx_to_H_indices[d.index].append(h.index) + + tmp_hbond_candidates = [] + + for group1, group2 in self.between_ags: + + tmp_donors_g1_mask = np.isin(self._donors.indices, group1.indices) + donors_for_dist = self._donors[tmp_donors_g1_mask] + tmp_acceptors_g2_mask = np.isin(self._acceptors.indices, group2.indices) + acceptors_for_dist = self._acceptors[tmp_acceptors_g2_mask] + + d_a_indices_12, d_a_distances_12 = capped_distance( + donors_for_dist.positions, + acceptors_for_dist.positions, + max_cutoff=self.d_a_cutoff, + box=box, + return_distances=True, + ) - if np.size(d_a_indices) == 0: - warnings.warn( - "No hydrogen bonds were found given d-a cutoff of " - f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and " - f"Acceptor, {self.acceptors_sel}." + if np.size(d_a_indices_12) == 0: + warnings.warn( + "No hydrogen bonds were found given d-a cutoff of " + f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and " + f"Acceptor, {self.acceptors_sel}." + ) + + + tmp_donors = donors_for_dist[d_a_indices_12.T[0]] + tmp_acceptors = acceptors_for_dist[d_a_indices_12.T[1]] + distance = d_a_distances_12 + + # Fetch hydrogens bonded to the tmp_donorss + # TODO: figure out vectorization + for i in range(len(d_a_indices_12)): + if tmp_donors[i].index in donor_idx_to_H_indices: + possible_H_indices = donor_idx_to_H_indices[tmp_donors[i].index] + for h_idx in possible_H_indices: + tmp_hbond_candidates.append( + (tmp_donors[i].index, h_idx, tmp_acceptors[i].index, distance[i]) + ) + + if np.array_equal(group1.indices, group2.indices): + continue # in case between 2 same residues like ["protein", "protein"] + + tmp_donors_g2_mask = np.isin(self._donors.indices, group2.indices) + donors_for_dist = self._donors[tmp_donors_g2_mask] + tmp_acceptors_g1_mask = np.isin(self._acceptors.indices, group1.indices) + acceptors_for_dist = self._acceptors[tmp_acceptors_g1_mask] + + d_a_indices_21, d_a_distances_21 = capped_distance( + donors_for_dist.positions, + acceptors_for_dist.positions, + max_cutoff=self.d_a_cutoff, + box=box, + return_distances=True, + ) + + if np.size(d_a_indices_12) == 0: + warnings.warn( + "No hydrogen bonds were found given d-a cutoff of " + f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and " + f"Acceptor, {self.acceptors_sel}." + ) + + + tmp_donors = donors_for_dist[d_a_indices_21.T[0]] + tmp_acceptors = acceptors_for_dist[d_a_indices_21.T[1]] + distance = d_a_distances_21 + + for i in range(len(d_a_indices_21)): + if tmp_donors[i].index in donor_idx_to_H_indices: + possible_H_indices = donor_idx_to_H_indices[tmp_donors[i].index] + for h_idx in possible_H_indices: + tmp_hbond_candidates.append( + (tmp_donors[i].index, h_idx, tmp_acceptors[i].index, distance[i]) + ) + + unique_hbond_dict = {} + for d_idx, h_idx, a_idx, dist in tmp_hbond_candidates: + key = (d_idx, h_idx, a_idx) + if key not in unique_hbond_dict: + unique_hbond_dict[key] = dist + + unique_keys = list(unique_hbond_dict.keys()) + unique_d_idx = [k[0] for k in unique_keys] + unique_h_idx = [k[1] for k in unique_keys] + unique_a_idx = [k[2] for k in unique_keys] + unique_distances = np.array(list(unique_hbond_dict.values())) + + tmp_donors = self.u.atoms[unique_d_idx] + tmp_hydrogens = self.u.atoms[unique_h_idx] + tmp_acceptors = self.u.atoms[unique_a_idx] + d_a_distances_filtered = unique_distances + + else: + d_a_indices, d_a_distances = capped_distance( + self._donors.positions, + self._acceptors.positions, + max_cutoff=self.d_a_cutoff, + box=box, + return_distances=True, ) - # Remove D-A pairs more than d_a_cutoff away from one another - tmp_donors = self._donors[d_a_indices.T[0]] - tmp_hydrogens = self._hydrogens[d_a_indices.T[0]] - tmp_acceptors = self._acceptors[d_a_indices.T[1]] + if np.size(d_a_indices) == 0: + warnings.warn( + "No hydrogen bonds were found given d-a cutoff of " + f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and " + f"Acceptor, {self.acceptors_sel}." + ) - # Remove donor-acceptor pairs between pairs of AtomGroups we are not - # interested in - if self.between_ags is not None: - between_mask = self._filter_atoms(tmp_donors, tmp_acceptors) - tmp_donors = tmp_donors[between_mask] - tmp_hydrogens = tmp_hydrogens[between_mask] - tmp_acceptors = tmp_acceptors[between_mask] - d_a_distances = d_a_distances[between_mask] + # Remove D-A pairs more than d_a_cutoff away from one another + tmp_donors = self._donors[d_a_indices.T[0]] + tmp_hydrogens = self._hydrogens[d_a_indices.T[0]] + tmp_acceptors = self._acceptors[d_a_indices.T[1]] + d_a_distances_filtered = d_a_distances # Find D-H-A angles greater than d_h_a_angle_cutoff d_h_a_angles = np.rad2deg( @@ -786,7 +832,7 @@ def _single_frame(self): hbond_donors = tmp_donors[hbond_indices] hbond_hydrogens = tmp_hydrogens[hbond_indices] hbond_acceptors = tmp_acceptors[hbond_indices] - hbond_distances = d_a_distances[hbond_indices] + hbond_distances = d_a_distances_filtered[hbond_indices] hbond_angles = d_h_a_angles[hbond_indices] # Store data on hydrogen bonds found at this frame