Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
- 2025-06-06: Added selection of Nter, Cter and 5'end states at topology generation - Issue #1269
- 2025-12-06: Added new restrain_ligand sub-command in the haddock3-restraints CLI - Issue #1299
- 2025-07-23: Added printing of covalent energies to the PDB headers - Issue #1323
- 2025-07-24: Added computation of standard deviation in alascan cluster analyses - Issue #1332
8 changes: 4 additions & 4 deletions integration_tests/test_alascan.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_alascan_default(alascan_module, mocker):

expected_csv1 = Path(alascan_module.path, "scan_protprot_complex_1.tsv")
expected_csv2 = Path(alascan_module.path, "scan_protprot_complex_2.tsv")
expected_clt_csv = Path(alascan_module.path, "scan_clt_-.tsv")
expected_clt_csv = Path(alascan_module.path, "scan_clt_unclustered.tsv")

assert expected_csv1.exists(), f"{expected_csv1} does not exist"
assert expected_csv2.exists(), f"{expected_csv2} does not exist"
Expand All @@ -89,9 +89,9 @@ def test_alascan_default(alascan_module, mocker):
# ARG 17 B should have a negative delta_score
assert df.loc[df["ori_resname"] == "ARG"].iloc[0, :]["delta_score"] < 0.0

# check clt csv
# check cluster csv
df_clt = pd.read_csv(expected_clt_csv, sep="\t", comment="#")
assert df_clt.shape == (18, 11), f"{expected_clt_csv} has wrong shape"
assert df_clt.shape == (18, 16), f"{expected_clt_csv} has wrong shape"
# average delta score of A-38-ASP should be negative
assert (
df_clt.loc[df_clt["full_resname"] == "A-38-ASP"].iloc[0, :]["delta_score"] < 0.0
Expand All @@ -106,7 +106,7 @@ def test_alascan_single_model(alascan_module, mocker):
alascan_module.run()

expected_csv = Path(alascan_module.path, "scan_2oob.tsv")
expected_clt_csv = Path(alascan_module.path, "scan_clt_-.tsv")
expected_clt_csv = Path(alascan_module.path, "scan_clt_unclustered.tsv")

assert expected_csv.exists(), f"{expected_csv} does not exist"
assert expected_clt_csv.exists(), f"{expected_clt_csv} does not exist"
Expand Down
48 changes: 33 additions & 15 deletions src/haddock/libs/libplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -1414,7 +1414,7 @@ def make_alascan_plot(
clt_id: int,
scan_res: str = "ALA",
offline: bool = False,
) -> None:
) -> str:
"""
Make a plotly interactive plot.

Expand All @@ -1429,6 +1429,11 @@ def make_alascan_plot(
Cluster ID.
scan_res : str, optional
Residue name used for the scan, by default "ALA"

Returns
-------
html_output_filename : str
Name of the plot generated
"""
plot_name = f"scan_clt_{clt_id}"
log.info(f"Generating {scan_res} scanning plot {plot_name}")
Expand All @@ -1437,18 +1442,21 @@ def make_alascan_plot(
width, height = 2000, 1000
fig = go.Figure(layout={"width": width, "height": height})
# add traces
# Delta HADDOCK score
fig.add_trace(
go.Bar(
x=df["full_resname"],
y=df["delta_score"],
error_y={"type": "data", "array": df["delta_score_std"]},
name="delta_score",
)
)

# Delta VdW
fig.add_trace(
go.Bar(
x=df["full_resname"],
y=df["delta_vdw"],
error_y={"type": "data", "array": df["delta_vdw_std"]},
name="delta_vdw",
)
)
Expand All @@ -1457,37 +1465,46 @@ def make_alascan_plot(
go.Bar(
x=df["full_resname"],
y=0.2 * df["delta_elec"],
error_y={"type": "data", "array": df["delta_elec_std"]},
name="delta_elec",
)
)

# Delta desolvation score
fig.add_trace(
go.Bar(
x=df["full_resname"],
y=df["delta_desolv"],
error_y={"type": "data", "array": df["delta_desolv_std"]},
name="delta_desolv",
)
)
# prettifying layout
fig.update_layout(
title=f"{scan_res} scanning cluster {clt_id}",
xaxis=dict(
title=dict(text="Residue Name", font=dict(size=16)),
tickfont_size=14,
tick0=df["full_resname"],
xaxis={
"title": {"text": "Residue Name", "font": {"size": 16}},
"tickfont_size": 14,
"tick0": df["full_resname"],
# in case we want to show less residues
# dtick=10,
),
yaxis=dict(
title=dict(text="Average Delta (WT - mutant)", font=dict(size=16)),
tickfont_size=14,
),
legend=dict(x=1.01, y=1.0, font_family="Helvetica", font_size=16),
# "dtick": 10,
},
yaxis={
"title": {
"text": "Average Delta (WT - mutant)",
"font": {"size": 16}
},
"tickfont_size": 14,
},
legend={
"x": 1.01, "y": 1.0,
"font_family": "Helvetica",
"font_size": 16
},
barmode="group",
bargap=0.05,
bargroupgap=0.05,
hovermode="x unified",
hoverlabel=dict(font_size=16, font_family="Helvetica"),
hoverlabel={"font_size": 16, "font_family": "Helvetica"},
)
for n in range(df.shape[0] - 1):
fig.add_vline(x=0.5 + n, line_color="gray", opacity=0.2)
Expand All @@ -1501,6 +1518,7 @@ def make_alascan_plot(
figure_width=width,
offline=offline,
)
return html_output_filename


def fig_to_html(
Expand Down
82 changes: 50 additions & 32 deletions src/haddock/modules/analysis/alascan/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,12 +271,13 @@ def alascan_cluster_analysis(models):
cl_id = native.clt_id
# unclustered models have cl_id = None
if cl_id is None:
cl_id = "-"
cl_id = "unclustered"
# Initiate key if this cluster id is encountered for the first time
if cl_id not in clt_scan:
clt_scan[cl_id] = {}
cl_pops[cl_id] = 1
else:
cl_pops[cl_id] += 1
cl_pops[cl_id] = 0
# Increase the popultation of that cluster
cl_pops[cl_id] += 1
# read the scan file
alascan_fname = f"scan_{native.file_name.removesuffix('.pdb')}.tsv"
df_scan = pd.read_csv(alascan_fname, sep="\t", comment="#")
Expand All @@ -293,44 +294,60 @@ def alascan_cluster_analysis(models):
delta_bsa = row['delta_bsa']
# add to the cluster data with the ident logic
ident = f"{chain}-{res}-{ori_resname}"
if ident not in clt_scan[cl_id]:
# Create variable with appropriate key
if ident not in clt_scan[cl_id].keys():
clt_scan[cl_id][ident] = {
'delta_score': delta_score,
'delta_vdw': delta_vdw,
'delta_elec': delta_elec,
'delta_desolv': delta_desolv,
'delta_bsa': delta_bsa,
'frac_pr': 1
'delta_score': [],
'delta_vdw': [],
'delta_elec': [],
'delta_desolv': [],
'delta_bsa': [],
'frac_pr': 0,
}
else:
clt_scan[cl_id][ident]['delta_score'] += delta_score
clt_scan[cl_id][ident]['delta_vdw'] += delta_vdw
clt_scan[cl_id][ident]['delta_elec'] += delta_elec
clt_scan[cl_id][ident]['delta_desolv'] += delta_desolv
clt_scan[cl_id][ident]['delta_bsa'] += delta_bsa
clt_scan[cl_id][ident]['frac_pr'] += 1
# now average the data
# Add data
clt_scan[cl_id][ident]['delta_score'].append(delta_score)
clt_scan[cl_id][ident]['delta_vdw'].append(delta_vdw)
clt_scan[cl_id][ident]['delta_elec'].append(delta_elec)
clt_scan[cl_id][ident]['delta_desolv'].append(delta_desolv)
clt_scan[cl_id][ident]['delta_bsa'].append(delta_bsa)
clt_scan[cl_id][ident]['frac_pr'] += 1
# now average the data for every cluster
for cl_id in clt_scan:
scan_clt_filename = f"scan_clt_{cl_id}.tsv"
log.info(f"Writing {scan_clt_filename}")
clt_data = []
# Loop over residues
for ident in clt_scan[cl_id]:
# Split identifyer to retrieve residue data
chain = ident.split("-")[0]
resnum = int(ident.split("-")[1])
resname = ident.split("-")[2]
frac_pr = clt_scan[cl_id][ident]["frac_pr"]
clt_data.append([chain, resnum, resname, ident,
clt_scan[cl_id][ident]['delta_score'] / frac_pr,
clt_scan[cl_id][ident]['delta_vdw'] / frac_pr,
clt_scan[cl_id][ident]['delta_elec'] / frac_pr,
clt_scan[cl_id][ident]['delta_desolv'] / frac_pr,
clt_scan[cl_id][ident]['delta_bsa'] / frac_pr,
clt_scan[cl_id][ident]['frac_pr'] / cl_pops[cl_id]
]
)
df_cols = ['chain', 'resnum', 'resname', 'full_resname', 'delta_score',
'delta_vdw', 'delta_elec', 'delta_desolv', 'delta_bsa',
'frac_pres']
# Point data for this specific residue
clt_res_dt = clt_scan[cl_id][ident]
# Compute averages and stddev and hold data.
clt_data.append([
chain,
resnum,
resname,
ident,
np.mean(clt_res_dt['delta_score']),
np.std(clt_res_dt['delta_score']),
np.mean(clt_res_dt['delta_vdw']),
np.std(clt_res_dt['delta_vdw']),
np.mean(clt_res_dt['delta_elec']),
np.std(clt_res_dt['delta_elec']),
np.mean(clt_res_dt['delta_desolv']),
np.std(clt_res_dt['delta_desolv']),
np.mean(clt_res_dt['delta_bsa']),
np.std(clt_res_dt['delta_bsa']),
clt_res_dt['frac_pr'] / cl_pops[cl_id],
])
df_cols = [
'chain', 'resnum', 'resname', 'full_resname',
'delta_score', 'delta_score_std', 'delta_vdw', 'delta_vdw_std',
'delta_elec', 'delta_elec_std', 'delta_desolv', 'delta_desolv_std',
'delta_bsa', 'delta_bsa_std', 'frac_pres',
]
df_scan_clt = pd.DataFrame(clt_data, columns=df_cols)
# adding clt-based Z score
df_scan_clt = add_zscores(df_scan_clt, 'delta_score')
Expand All @@ -347,6 +364,7 @@ def alascan_cluster_analysis(models):
with open(scan_clt_filename, 'w') as f:
f.write(f"{'#' * 80}{os.linesep}") # noqa E501
f.write(f"# `alascan` cluster results for cluster {cl_id}{os.linesep}") # noqa E501
f.write(f"# reported values are the average for the cluster{os.linesep}") # noqa E501
f.write(f"#{os.linesep}")
f.write(f"# z_score is calculated with respect to the mean values of all residues{os.linesep}") # noqa E501
f.write(f"{'#' * 80}{os.linesep}") # noqa E501
Expand Down
15 changes: 13 additions & 2 deletions tests/test_libplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,14 @@ def example_capri_ss_dashcluster():
def example_df_scan_clt():
"""Return example alascan clt DataFrame."""
example_clt_data = [
["A", 38, "ASP", "A-38-ASP", -2.0, -1.0, -0.4, -2.3, -0.5, -7.2, 1.0],
["A", 69, "LYS", "A-38-ASP", -0.0, 1.0, -0.4, 0.8, 0.5, -7.2, 1.0],
[
"A", 38, "ASP", "A-38-ASP", -2.0, -1.0, 0.1, -0.4, 0.04, -2.3,
0.23, -0.5, 0.05, -7.2, 0.72, 1.0,
],
[
"A", 69, "LYS", "A-38-ASP", -0.0, 1.0, 0.1, -0.4, 0.04, -2.3,
0.23, -0.5, 0.05, -7.2, 0.72, 1.0,
],
]
columns = [
"chain",
Expand All @@ -242,10 +248,15 @@ def example_df_scan_clt():
"full_resname",
"score",
"delta_score",
"delta_score_std",
"delta_vdw",
"delta_vdw_std",
"delta_elec",
"delta_elec_std",
"delta_desolv",
"delta_desolv_std",
"delta_bsa",
"delta_bsa_std",
"frac_pres",
]

Expand Down
8 changes: 4 additions & 4 deletions tests/test_module_alascan.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@ def test_run(
)

alascan.run()
assert Path(alascan.path, "scan_clt_-.tsv").exists()
assert Path(alascan.path, "scan_clt_-.html").exists()
assert Path(alascan.path, "scan_clt_unclustered.tsv").exists()
assert Path(alascan.path, "scan_clt_unclustered.html").exists()


def test_scanjob_run(scanjob_obj, mocker):
Expand Down Expand Up @@ -378,13 +378,13 @@ def test_alascan_cluster_analysis(protprot_input_list, scan_file, monkeypatch):
shutil.copy(scan_file, Path("scan_protprot_complex_2.tsv"))
alascan_cluster_analysis(protprot_input_list)

assert Path("scan_clt_-.tsv").exists()
assert Path("scan_clt_unclustered.tsv").exists()

protprot_input_list[1].clt_id = 1
alascan_cluster_analysis(protprot_input_list)

assert Path("scan_clt_1.tsv").exists()
assert Path("scan_clt_-.tsv").exists()
assert Path("scan_clt_unclustered.tsv").exists()


def test_create_alascan_plots(mocker, caplog):
Expand Down