MDAnalysis · tanishy7777 · Apr 15, 2025 · Apr 15, 2025 · Apr 16, 2025 · Apr 16, 2025
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -18,6 +18,7 @@ The rules for this file:
          yuxuanzhuang, yuyuan871111, tanishy7777, tulga-rdn
 
 
+
  * 2.10.0
 
 Fixes
@@ -48,6 +49,8 @@ Enhancements
    (Issue #4948 #2874, PR #4965)
  * Enables parallelization for analysis.lineardensity.LinearDensity
    (Issue #4678, PR #5007)
+ * Improves performance of `between` keyword for HydrogenBondAnalysis.
+   (Issue #4130, PR #5029)
 
 Changes
  * Removed undocumented and unused attribute

diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py
@@ -239,7 +239,7 @@
 import logging
 import warnings
 from collections.abc import Iterable
-
+from collections import defaultdict
 import numpy as np
 
 from ..base import AnalysisBase, Results, ResultsGroup
@@ -679,90 +679,136 @@
 
         return donors, hydrogens
 
-    def _filter_atoms(self, donors, acceptors):
-        """Create a mask to filter donor, hydrogen and acceptor atoms.
-
-        This can be used to consider only hydrogen bonds between two or more
-        specified groups.
-
-        Groups are specified with the `between` keyword when creating the
-        HydrogenBondAnalysis object.
-
-           Returns
-           -------
-           mask: np.ndarray
-
-
-        .. versionchanged:: 2.5.0
-           Change return value to a mask instead of separate AtomGroups.
-``
-        """
-
-        mask = np.full(donors.n_atoms, fill_value=False)
-        for group1, group2 in self.between_ags:
-
-            # Find donors in G1 and acceptors in G2
-            mask[
-                    np.logical_and(
-                        np.isin(donors.indices, group1.indices),
-                        np.isin(acceptors.indices, group2.indices)
-                    )
-            ] = True
-
-            # Find acceptors in G1 and donors in G2
-            mask[
-                np.logical_and(
-                    np.isin(acceptors.indices, group1.indices),
-                    np.isin(donors.indices, group2.indices)
-                )
-            ] = True
-
-        return mask
-
-
     def _prepare(self):
         self.results.hbonds = [[], [], [], [], [], []]
-
-
+
     def _single_frame(self):
-
         box = self._ts.dimensions
 
         # Update donor-hydrogen pairs if necessary
         if self.update_selections:
             self._donors, self._hydrogens = self._get_dh_pairs()
 
-        # find D and A within cutoff distance of one another
-        # min_cutoff = 1.0 as an atom cannot form a hydrogen bond with itself
-        d_a_indices, d_a_distances = capped_distance(
-            self._donors.positions,
-            self._acceptors.positions,
-            max_cutoff=self.d_a_cutoff,
-            min_cutoff=1.0,
-            box=box,
-            return_distances=True,
-        )
+        if self.between_ags is not None:
+            # map donor indices to respective hydrogen indices
+            donor_idx_to_H_indices = defaultdict(list)
+
+            for d, h in zip(self._donors, self._hydrogens):
+                donor_idx_to_H_indices[d.index].append(h.index)
+
+            tmp_hbond_candidates = [] 
+
+            for group1, group2 in self.between_ags:
+
+                tmp_donors_g1_mask = np.isin(self._donors.indices, group1.indices) 
+                donors_for_dist = self._donors[tmp_donors_g1_mask] 
+                tmp_acceptors_g2_mask = np.isin(self._acceptors.indices, group2.indices) 
+                acceptors_for_dist = self._acceptors[tmp_acceptors_g2_mask] 
+
+                d_a_indices_12, d_a_distances_12 = capped_distance(
+                    donors_for_dist.positions,
+                    acceptors_for_dist.positions,
+                    max_cutoff=self.d_a_cutoff,
+                    box=box,
+                    return_distances=True,
+                )
 
-        if np.size(d_a_indices) == 0:
-            warnings.warn(
-                "No hydrogen bonds were found given d-a cutoff of "
-                f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and "
-                f"Acceptor, {self.acceptors_sel}."
+                if np.size(d_a_indices_12) == 0:
+                    warnings.warn(
+                        "No hydrogen bonds were found given d-a cutoff of "
+                        f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and "
+                        f"Acceptor, {self.acceptors_sel}."
+                    )
+
+
+                tmp_donors = donors_for_dist[d_a_indices_12.T[0]]
+                tmp_acceptors = acceptors_for_dist[d_a_indices_12.T[1]]
+                distance = d_a_distances_12
+
+                # Fetch hydrogens bonded to the tmp_donorss
+                # TODO: figure out vectorization
+                for i in range(len(d_a_indices_12)):
+                    if tmp_donors[i].index in donor_idx_to_H_indices:
+                        possible_H_indices = donor_idx_to_H_indices[tmp_donors[i].index]
+                        for h_idx in possible_H_indices:
+                            tmp_hbond_candidates.append(
+                                (tmp_donors[i].index, h_idx, tmp_acceptors[i].index, distance[i])
+                            )
+
+                if np.array_equal(group1.indices, group2.indices):
+                    continue # in case between 2 same residues like ["protein", "protein"]
+
+                tmp_donors_g2_mask = np.isin(self._donors.indices, group2.indices) 
+                donors_for_dist = self._donors[tmp_donors_g2_mask] 
+                tmp_acceptors_g1_mask = np.isin(self._acceptors.indices, group1.indices) 
+                acceptors_for_dist = self._acceptors[tmp_acceptors_g1_mask] 
+
+                d_a_indices_21, d_a_distances_21 = capped_distance(
+                    donors_for_dist.positions,
+                    acceptors_for_dist.positions,
+                    max_cutoff=self.d_a_cutoff,
+                    box=box,
+                    return_distances=True,
+                )
+
+                if np.size(d_a_indices_12) == 0:
+                    warnings.warn(
+                        "No hydrogen bonds were found given d-a cutoff of "
+                        f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and "
+                        f"Acceptor, {self.acceptors_sel}."
+                    )
+
+
+                tmp_donors = donors_for_dist[d_a_indices_21.T[0]]
+                tmp_acceptors = acceptors_for_dist[d_a_indices_21.T[1]]
+                distance = d_a_distances_21
+
+                for i in range(len(d_a_indices_21)):
+                    if tmp_donors[i].index in donor_idx_to_H_indices:
+                        possible_H_indices = donor_idx_to_H_indices[tmp_donors[i].index]
+                        for h_idx in possible_H_indices:
+                            tmp_hbond_candidates.append(
+                                (tmp_donors[i].index, h_idx, tmp_acceptors[i].index, distance[i])
+                            )
+
+            unique_hbond_dict = {}
+            for d_idx, h_idx, a_idx, dist in tmp_hbond_candidates:
+                key = (d_idx, h_idx, a_idx)
+                if key not in unique_hbond_dict:
+                    unique_hbond_dict[key] = dist
+
+            unique_keys = list(unique_hbond_dict.keys())
+            unique_d_idx = [k[0] for k in unique_keys]
+            unique_h_idx = [k[1] for k in unique_keys]
+            unique_a_idx = [k[2] for k in unique_keys]
+            unique_distances = np.array(list(unique_hbond_dict.values()))
+
+            tmp_donors = self.u.atoms[unique_d_idx]
+            tmp_hydrogens = self.u.atoms[unique_h_idx]
+            tmp_acceptors = self.u.atoms[unique_a_idx]
+            d_a_distances_filtered = unique_distances
+
+        else:
+            d_a_indices, d_a_distances = capped_distance(
+                self._donors.positions,
+                self._acceptors.positions,
+                max_cutoff=self.d_a_cutoff,
+                box=box,
+                return_distances=True,
             )
 
-        # Remove D-A pairs more than d_a_cutoff away from one another
-        tmp_donors = self._donors[d_a_indices.T[0]]
-        tmp_hydrogens = self._hydrogens[d_a_indices.T[0]]
-        tmp_acceptors = self._acceptors[d_a_indices.T[1]]
+            if np.size(d_a_indices) == 0:
+                warnings.warn(
+                    "No hydrogen bonds were found given d-a cutoff of "
+                    f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and "
+                    f"Acceptor, {self.acceptors_sel}."
+                )
 
-        # Remove donor-acceptor pairs between pairs of AtomGroups we are not
-        # interested in
-        if self.between_ags is not None:
-            between_mask = self._filter_atoms(tmp_donors, tmp_acceptors)
-            tmp_donors = tmp_donors[between_mask]
-            tmp_hydrogens = tmp_hydrogens[between_mask]
-            tmp_acceptors = tmp_acceptors[between_mask]
-            d_a_distances = d_a_distances[between_mask]
+            # Remove D-A pairs more than d_a_cutoff away from one another
+            tmp_donors = self._donors[d_a_indices.T[0]]
+            tmp_hydrogens = self._hydrogens[d_a_indices.T[0]]
+            tmp_acceptors = self._acceptors[d_a_indices.T[1]]
+            d_a_distances_filtered = d_a_distances
 
         # Find D-H-A angles greater than d_h_a_angle_cutoff
         d_h_a_angles = np.rad2deg(
@@ -786,7 +832,7 @@
         hbond_donors = tmp_donors[hbond_indices]
         hbond_hydrogens = tmp_hydrogens[hbond_indices]
         hbond_acceptors = tmp_acceptors[hbond_indices]
-        hbond_distances = d_a_distances[hbond_indices]
+        hbond_distances = d_a_distances_filtered[hbond_indices]
         hbond_angles = d_h_a_angles[hbond_indices]
 
         # Store data on hydrogen bonds found at this frame