Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The rules for this file:
yuxuanzhuang, yuyuan871111, tanishy7777, tulga-rdn



* 2.10.0

Fixes
Expand Down Expand Up @@ -48,6 +49,8 @@ Enhancements
(Issue #4948 #2874, PR #4965)
* Enables parallelization for analysis.lineardensity.LinearDensity
(Issue #4678, PR #5007)
* Improves performance of `between` keyword for HydrogenBondAnalysis.
(Issue #4130, PR #5029)

Changes
* Removed undocumented and unused attribute
Expand Down
192 changes: 119 additions & 73 deletions package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@
import logging
import warnings
from collections.abc import Iterable

from collections import defaultdict
import numpy as np

from ..base import AnalysisBase, Results, ResultsGroup
Expand Down Expand Up @@ -679,90 +679,136 @@

return donors, hydrogens

def _filter_atoms(self, donors, acceptors):
"""Create a mask to filter donor, hydrogen and acceptor atoms.
This can be used to consider only hydrogen bonds between two or more
specified groups.
Groups are specified with the `between` keyword when creating the
HydrogenBondAnalysis object.
Returns
-------
mask: np.ndarray
.. versionchanged:: 2.5.0
Change return value to a mask instead of separate AtomGroups.
``
"""

mask = np.full(donors.n_atoms, fill_value=False)
for group1, group2 in self.between_ags:

# Find donors in G1 and acceptors in G2
mask[
np.logical_and(
np.isin(donors.indices, group1.indices),
np.isin(acceptors.indices, group2.indices)
)
] = True

# Find acceptors in G1 and donors in G2
mask[
np.logical_and(
np.isin(acceptors.indices, group1.indices),
np.isin(donors.indices, group2.indices)
)
] = True

return mask


def _prepare(self):
self.results.hbonds = [[], [], [], [], [], []]



def _single_frame(self):

box = self._ts.dimensions

# Update donor-hydrogen pairs if necessary
if self.update_selections:
self._donors, self._hydrogens = self._get_dh_pairs()

# find D and A within cutoff distance of one another
# min_cutoff = 1.0 as an atom cannot form a hydrogen bond with itself
d_a_indices, d_a_distances = capped_distance(
self._donors.positions,
self._acceptors.positions,
max_cutoff=self.d_a_cutoff,
min_cutoff=1.0,
box=box,
return_distances=True,
)
if self.between_ags is not None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please could you move everything in this if block into a separate function to make _single_frame a little easier to read?

# map donor indices to respective hydrogen indices
donor_idx_to_H_indices = defaultdict(list)

for d, h in zip(self._donors, self._hydrogens):
donor_idx_to_H_indices[d.index].append(h.index)

tmp_hbond_candidates = []

for group1, group2 in self.between_ags:

tmp_donors_g1_mask = np.isin(self._donors.indices, group1.indices)
donors_for_dist = self._donors[tmp_donors_g1_mask]
tmp_acceptors_g2_mask = np.isin(self._acceptors.indices, group2.indices)
acceptors_for_dist = self._acceptors[tmp_acceptors_g2_mask]
Comment on lines +703 to +706
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

                tmp_donors_mask = np.logical_or(
                    np.isin(self._donors.indices, group1.indices),
                    np.isin(self._donors.indices, group2.indices),
                 )
                donors_for_dist = self._donors[tmp_donors_mask] 
                tmp_acceptors_mask = np.logical_or(
                    np.isin(self._acceptors.indices, group1.indices),
                    np.isin(self._acceptors.indices, group2.indices),
                ) 
                acceptors_for_dist = self._acceptors[tmp_acceptors_mask] 

I think if you do this you only need to make one call to capped_distance per pair of groups, rather than calculating distances between g1 donors to g2 acceptors and then g2 donors to g1 donors


d_a_indices_12, d_a_distances_12 = capped_distance(
donors_for_dist.positions,
acceptors_for_dist.positions,
max_cutoff=self.d_a_cutoff,
box=box,
return_distances=True,
)

if np.size(d_a_indices) == 0:
warnings.warn(
"No hydrogen bonds were found given d-a cutoff of "
f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and "
f"Acceptor, {self.acceptors_sel}."
if np.size(d_a_indices_12) == 0:
warnings.warn(

Check warning on line 717 in package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py#L717

Added line #L717 was not covered by tests
"No hydrogen bonds were found given d-a cutoff of "
f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and "
f"Acceptor, {self.acceptors_sel}."
Comment on lines +718 to +720
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this warning is no longer accurate - it should warn that no hydrogen bonds were found between groups 1 and 2 for the given cutoff and atom selections

)


tmp_donors = donors_for_dist[d_a_indices_12.T[0]]
tmp_acceptors = acceptors_for_dist[d_a_indices_12.T[1]]
distance = d_a_distances_12

# Fetch hydrogens bonded to the tmp_donorss
# TODO: figure out vectorization
for i in range(len(d_a_indices_12)):
if tmp_donors[i].index in donor_idx_to_H_indices:
possible_H_indices = donor_idx_to_H_indices[tmp_donors[i].index]
for h_idx in possible_H_indices:
tmp_hbond_candidates.append(
(tmp_donors[i].index, h_idx, tmp_acceptors[i].index, distance[i])
)
Comment on lines +724 to +736
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would this work?

hydrogens_for_dist = self._hydrogens[tmp_donors_g1_mask]
tmp_hydrogens = self._hydrogens[d_a_indices.T[0]]

self._donors and self._hydrogens have the same size, and are paired (i.e. if we want to keep donors tmp_donors_g1_mask we also want to keep hydrogens tmp_donors_g1_mask.


if np.array_equal(group1.indices, group2.indices):
continue # in case between 2 same residues like ["protein", "protein"]

tmp_donors_g2_mask = np.isin(self._donors.indices, group2.indices)
donors_for_dist = self._donors[tmp_donors_g2_mask]
tmp_acceptors_g1_mask = np.isin(self._acceptors.indices, group1.indices)
acceptors_for_dist = self._acceptors[tmp_acceptors_g1_mask]

d_a_indices_21, d_a_distances_21 = capped_distance(
donors_for_dist.positions,
acceptors_for_dist.positions,
max_cutoff=self.d_a_cutoff,
box=box,
return_distances=True,
)

if np.size(d_a_indices_12) == 0:
warnings.warn(

Check warning on line 755 in package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py

View check run for this annotation

Codecov / codecov/patch

package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py#L755

Added line #L755 was not covered by tests
"No hydrogen bonds were found given d-a cutoff of "
f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and "
f"Acceptor, {self.acceptors_sel}."
)


tmp_donors = donors_for_dist[d_a_indices_21.T[0]]
tmp_acceptors = acceptors_for_dist[d_a_indices_21.T[1]]
distance = d_a_distances_21

for i in range(len(d_a_indices_21)):
if tmp_donors[i].index in donor_idx_to_H_indices:
possible_H_indices = donor_idx_to_H_indices[tmp_donors[i].index]
for h_idx in possible_H_indices:
tmp_hbond_candidates.append(
(tmp_donors[i].index, h_idx, tmp_acceptors[i].index, distance[i])
)

unique_hbond_dict = {}
for d_idx, h_idx, a_idx, dist in tmp_hbond_candidates:
key = (d_idx, h_idx, a_idx)
if key not in unique_hbond_dict:
unique_hbond_dict[key] = dist
Comment on lines +774 to +778
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if you do tmp_hydrogens = self._hydrogens[d_a_indices.T[0]] as suggested above, then all the hydrogen bonds will be unique anyway and you won't need this dictionary


unique_keys = list(unique_hbond_dict.keys())
unique_d_idx = [k[0] for k in unique_keys]
unique_h_idx = [k[1] for k in unique_keys]
unique_a_idx = [k[2] for k in unique_keys]
unique_distances = np.array(list(unique_hbond_dict.values()))

tmp_donors = self.u.atoms[unique_d_idx]
tmp_hydrogens = self.u.atoms[unique_h_idx]
tmp_acceptors = self.u.atoms[unique_a_idx]
d_a_distances_filtered = unique_distances

else:
d_a_indices, d_a_distances = capped_distance(
self._donors.positions,
self._acceptors.positions,
max_cutoff=self.d_a_cutoff,
box=box,
return_distances=True,
)

# Remove D-A pairs more than d_a_cutoff away from one another
tmp_donors = self._donors[d_a_indices.T[0]]
tmp_hydrogens = self._hydrogens[d_a_indices.T[0]]
tmp_acceptors = self._acceptors[d_a_indices.T[1]]
if np.size(d_a_indices) == 0:
warnings.warn(
"No hydrogen bonds were found given d-a cutoff of "
f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and "
f"Acceptor, {self.acceptors_sel}."
)

# Remove donor-acceptor pairs between pairs of AtomGroups we are not
# interested in
if self.between_ags is not None:
between_mask = self._filter_atoms(tmp_donors, tmp_acceptors)
tmp_donors = tmp_donors[between_mask]
tmp_hydrogens = tmp_hydrogens[between_mask]
tmp_acceptors = tmp_acceptors[between_mask]
d_a_distances = d_a_distances[between_mask]
# Remove D-A pairs more than d_a_cutoff away from one another
tmp_donors = self._donors[d_a_indices.T[0]]
tmp_hydrogens = self._hydrogens[d_a_indices.T[0]]
tmp_acceptors = self._acceptors[d_a_indices.T[1]]
d_a_distances_filtered = d_a_distances

# Find D-H-A angles greater than d_h_a_angle_cutoff
d_h_a_angles = np.rad2deg(
Expand All @@ -786,7 +832,7 @@
hbond_donors = tmp_donors[hbond_indices]
hbond_hydrogens = tmp_hydrogens[hbond_indices]
hbond_acceptors = tmp_acceptors[hbond_indices]
hbond_distances = d_a_distances[hbond_indices]
hbond_distances = d_a_distances_filtered[hbond_indices]
hbond_angles = d_h_a_angles[hbond_indices]

# Store data on hydrogen bonds found at this frame
Expand Down
Loading