-
Notifications
You must be signed in to change notification settings - Fork 728
Performance Improvement for hydrogenbond analysis for the between
keyword
#5029
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
15fb3cb
c24198b
e6928f6
1c0a680
5962b13
8ce4165
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -239,7 +239,7 @@ | |
import logging | ||
import warnings | ||
from collections.abc import Iterable | ||
|
||
from collections import defaultdict | ||
import numpy as np | ||
|
||
from ..base import AnalysisBase, Results, ResultsGroup | ||
|
@@ -679,90 +679,136 @@ | |
|
||
return donors, hydrogens | ||
|
||
def _filter_atoms(self, donors, acceptors): | ||
"""Create a mask to filter donor, hydrogen and acceptor atoms. | ||
This can be used to consider only hydrogen bonds between two or more | ||
specified groups. | ||
Groups are specified with the `between` keyword when creating the | ||
HydrogenBondAnalysis object. | ||
Returns | ||
------- | ||
mask: np.ndarray | ||
.. versionchanged:: 2.5.0 | ||
Change return value to a mask instead of separate AtomGroups. | ||
`` | ||
""" | ||
|
||
mask = np.full(donors.n_atoms, fill_value=False) | ||
for group1, group2 in self.between_ags: | ||
|
||
# Find donors in G1 and acceptors in G2 | ||
mask[ | ||
np.logical_and( | ||
np.isin(donors.indices, group1.indices), | ||
np.isin(acceptors.indices, group2.indices) | ||
) | ||
] = True | ||
|
||
# Find acceptors in G1 and donors in G2 | ||
mask[ | ||
np.logical_and( | ||
np.isin(acceptors.indices, group1.indices), | ||
np.isin(donors.indices, group2.indices) | ||
) | ||
] = True | ||
|
||
return mask | ||
|
||
|
||
def _prepare(self): | ||
self.results.hbonds = [[], [], [], [], [], []] | ||
|
||
|
||
|
||
def _single_frame(self): | ||
|
||
box = self._ts.dimensions | ||
|
||
# Update donor-hydrogen pairs if necessary | ||
if self.update_selections: | ||
self._donors, self._hydrogens = self._get_dh_pairs() | ||
|
||
# find D and A within cutoff distance of one another | ||
# min_cutoff = 1.0 as an atom cannot form a hydrogen bond with itself | ||
d_a_indices, d_a_distances = capped_distance( | ||
self._donors.positions, | ||
self._acceptors.positions, | ||
max_cutoff=self.d_a_cutoff, | ||
min_cutoff=1.0, | ||
box=box, | ||
return_distances=True, | ||
) | ||
if self.between_ags is not None: | ||
# map donor indices to respective hydrogen indices | ||
donor_idx_to_H_indices = defaultdict(list) | ||
|
||
for d, h in zip(self._donors, self._hydrogens): | ||
donor_idx_to_H_indices[d.index].append(h.index) | ||
|
||
tmp_hbond_candidates = [] | ||
|
||
for group1, group2 in self.between_ags: | ||
|
||
tmp_donors_g1_mask = np.isin(self._donors.indices, group1.indices) | ||
donors_for_dist = self._donors[tmp_donors_g1_mask] | ||
tmp_acceptors_g2_mask = np.isin(self._acceptors.indices, group2.indices) | ||
acceptors_for_dist = self._acceptors[tmp_acceptors_g2_mask] | ||
Comment on lines
+703
to
+706
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tmp_donors_mask = np.logical_or(
np.isin(self._donors.indices, group1.indices),
np.isin(self._donors.indices, group2.indices),
)
donors_for_dist = self._donors[tmp_donors_mask]
tmp_acceptors_mask = np.logical_or(
np.isin(self._acceptors.indices, group1.indices),
np.isin(self._acceptors.indices, group2.indices),
)
acceptors_for_dist = self._acceptors[tmp_acceptors_mask] I think if you do this you only need to make one call to |
||
|
||
d_a_indices_12, d_a_distances_12 = capped_distance( | ||
donors_for_dist.positions, | ||
acceptors_for_dist.positions, | ||
max_cutoff=self.d_a_cutoff, | ||
box=box, | ||
return_distances=True, | ||
) | ||
|
||
if np.size(d_a_indices) == 0: | ||
warnings.warn( | ||
"No hydrogen bonds were found given d-a cutoff of " | ||
f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and " | ||
f"Acceptor, {self.acceptors_sel}." | ||
if np.size(d_a_indices_12) == 0: | ||
warnings.warn( | ||
"No hydrogen bonds were found given d-a cutoff of " | ||
f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and " | ||
f"Acceptor, {self.acceptors_sel}." | ||
Comment on lines
+718
to
+720
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this warning is no longer accurate - it should warn that no hydrogen bonds were found between groups 1 and 2 for the given cutoff and atom selections |
||
) | ||
|
||
|
||
tmp_donors = donors_for_dist[d_a_indices_12.T[0]] | ||
tmp_acceptors = acceptors_for_dist[d_a_indices_12.T[1]] | ||
distance = d_a_distances_12 | ||
|
||
# Fetch hydrogens bonded to the tmp_donorss | ||
# TODO: figure out vectorization | ||
for i in range(len(d_a_indices_12)): | ||
if tmp_donors[i].index in donor_idx_to_H_indices: | ||
possible_H_indices = donor_idx_to_H_indices[tmp_donors[i].index] | ||
for h_idx in possible_H_indices: | ||
tmp_hbond_candidates.append( | ||
(tmp_donors[i].index, h_idx, tmp_acceptors[i].index, distance[i]) | ||
) | ||
Comment on lines
+724
to
+736
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would this work? hydrogens_for_dist = self._hydrogens[tmp_donors_g1_mask]
tmp_hydrogens = self._hydrogens[d_a_indices.T[0]]
|
||
|
||
if np.array_equal(group1.indices, group2.indices): | ||
continue # in case between 2 same residues like ["protein", "protein"] | ||
|
||
tmp_donors_g2_mask = np.isin(self._donors.indices, group2.indices) | ||
donors_for_dist = self._donors[tmp_donors_g2_mask] | ||
tmp_acceptors_g1_mask = np.isin(self._acceptors.indices, group1.indices) | ||
acceptors_for_dist = self._acceptors[tmp_acceptors_g1_mask] | ||
|
||
d_a_indices_21, d_a_distances_21 = capped_distance( | ||
donors_for_dist.positions, | ||
acceptors_for_dist.positions, | ||
max_cutoff=self.d_a_cutoff, | ||
box=box, | ||
return_distances=True, | ||
) | ||
|
||
if np.size(d_a_indices_12) == 0: | ||
warnings.warn( | ||
"No hydrogen bonds were found given d-a cutoff of " | ||
f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and " | ||
f"Acceptor, {self.acceptors_sel}." | ||
) | ||
|
||
|
||
tmp_donors = donors_for_dist[d_a_indices_21.T[0]] | ||
tmp_acceptors = acceptors_for_dist[d_a_indices_21.T[1]] | ||
distance = d_a_distances_21 | ||
|
||
for i in range(len(d_a_indices_21)): | ||
if tmp_donors[i].index in donor_idx_to_H_indices: | ||
possible_H_indices = donor_idx_to_H_indices[tmp_donors[i].index] | ||
for h_idx in possible_H_indices: | ||
tmp_hbond_candidates.append( | ||
(tmp_donors[i].index, h_idx, tmp_acceptors[i].index, distance[i]) | ||
) | ||
|
||
unique_hbond_dict = {} | ||
for d_idx, h_idx, a_idx, dist in tmp_hbond_candidates: | ||
key = (d_idx, h_idx, a_idx) | ||
if key not in unique_hbond_dict: | ||
unique_hbond_dict[key] = dist | ||
Comment on lines
+774
to
+778
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think if you do |
||
|
||
unique_keys = list(unique_hbond_dict.keys()) | ||
unique_d_idx = [k[0] for k in unique_keys] | ||
unique_h_idx = [k[1] for k in unique_keys] | ||
unique_a_idx = [k[2] for k in unique_keys] | ||
unique_distances = np.array(list(unique_hbond_dict.values())) | ||
|
||
tmp_donors = self.u.atoms[unique_d_idx] | ||
tmp_hydrogens = self.u.atoms[unique_h_idx] | ||
tmp_acceptors = self.u.atoms[unique_a_idx] | ||
d_a_distances_filtered = unique_distances | ||
|
||
else: | ||
d_a_indices, d_a_distances = capped_distance( | ||
self._donors.positions, | ||
self._acceptors.positions, | ||
max_cutoff=self.d_a_cutoff, | ||
box=box, | ||
return_distances=True, | ||
) | ||
|
||
# Remove D-A pairs more than d_a_cutoff away from one another | ||
tmp_donors = self._donors[d_a_indices.T[0]] | ||
tmp_hydrogens = self._hydrogens[d_a_indices.T[0]] | ||
tmp_acceptors = self._acceptors[d_a_indices.T[1]] | ||
if np.size(d_a_indices) == 0: | ||
warnings.warn( | ||
"No hydrogen bonds were found given d-a cutoff of " | ||
f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and " | ||
f"Acceptor, {self.acceptors_sel}." | ||
) | ||
|
||
# Remove donor-acceptor pairs between pairs of AtomGroups we are not | ||
# interested in | ||
if self.between_ags is not None: | ||
between_mask = self._filter_atoms(tmp_donors, tmp_acceptors) | ||
tmp_donors = tmp_donors[between_mask] | ||
tmp_hydrogens = tmp_hydrogens[between_mask] | ||
tmp_acceptors = tmp_acceptors[between_mask] | ||
d_a_distances = d_a_distances[between_mask] | ||
# Remove D-A pairs more than d_a_cutoff away from one another | ||
tmp_donors = self._donors[d_a_indices.T[0]] | ||
tmp_hydrogens = self._hydrogens[d_a_indices.T[0]] | ||
tmp_acceptors = self._acceptors[d_a_indices.T[1]] | ||
d_a_distances_filtered = d_a_distances | ||
|
||
# Find D-H-A angles greater than d_h_a_angle_cutoff | ||
d_h_a_angles = np.rad2deg( | ||
|
@@ -786,7 +832,7 @@ | |
hbond_donors = tmp_donors[hbond_indices] | ||
hbond_hydrogens = tmp_hydrogens[hbond_indices] | ||
hbond_acceptors = tmp_acceptors[hbond_indices] | ||
hbond_distances = d_a_distances[hbond_indices] | ||
hbond_distances = d_a_distances_filtered[hbond_indices] | ||
hbond_angles = d_h_a_angles[hbond_indices] | ||
|
||
# Store data on hydrogen bonds found at this frame | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please could you move everything in this
if
block into a separate function to make_single_frame
a little easier to read?