Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
- 2025-06-06: Added selection of Nter, Cter and 5'end states at topology generation - Issue #1269
- 2025-12-06: Added new restrain_ligand sub-command in the haddock3-restraints CLI - Issue #1299
- 2025-07-23: Added printing of covalent energies to the PDB headers - Issue #1323
- 2025-07-24: Added computation of standard deviation in alascan cluster analyses - Issue #1332
8 changes: 4 additions & 4 deletions integration_tests/test_alascan.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def test_alascan_default(alascan_module, mocker):

expected_csv1 = Path(alascan_module.path, "scan_protprot_complex_1.tsv")
expected_csv2 = Path(alascan_module.path, "scan_protprot_complex_2.tsv")
expected_clt_csv = Path(alascan_module.path, "scan_clt_-.tsv")
expected_clt_csv = Path(alascan_module.path, "scan_clt_unclustered.tsv")

assert expected_csv1.exists(), f"{expected_csv1} does not exist"
assert expected_csv2.exists(), f"{expected_csv2} does not exist"
Expand All @@ -89,9 +89,9 @@ def test_alascan_default(alascan_module, mocker):
# ARG 17 B should have a negative delta_score
assert df.loc[df["ori_resname"] == "ARG"].iloc[0, :]["delta_score"] < 0.0

# check clt csv
# check cluster csv
df_clt = pd.read_csv(expected_clt_csv, sep="\t", comment="#")
assert df_clt.shape == (18, 11), f"{expected_clt_csv} has wrong shape"
assert df_clt.shape == (18, 16), f"{expected_clt_csv} has wrong shape"
# average delta score of A-38-ASP should be negative
assert (
df_clt.loc[df_clt["full_resname"] == "A-38-ASP"].iloc[0, :]["delta_score"] < 0.0
Expand All @@ -106,7 +106,7 @@ def test_alascan_single_model(alascan_module, mocker):
alascan_module.run()

expected_csv = Path(alascan_module.path, "scan_2oob.tsv")
expected_clt_csv = Path(alascan_module.path, "scan_clt_-.tsv")
expected_clt_csv = Path(alascan_module.path, "scan_clt_unclustered.tsv")

assert expected_csv.exists(), f"{expected_csv} does not exist"
assert expected_clt_csv.exists(), f"{expected_clt_csv} does not exist"
Expand Down
48 changes: 33 additions & 15 deletions src/haddock/libs/libplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -1414,7 +1414,7 @@ def make_alascan_plot(
clt_id: int,
scan_res: str = "ALA",
offline: bool = False,
) -> None:
) -> str:
"""
Make a plotly interactive plot.

Expand All @@ -1429,6 +1429,11 @@ def make_alascan_plot(
Cluster ID.
scan_res : str, optional
Residue name used for the scan, by default "ALA"

Returns
-------
html_output_filename : str
Name of the plot generated
"""
plot_name = f"scan_clt_{clt_id}"
log.info(f"Generating {scan_res} scanning plot {plot_name}")
Expand All @@ -1437,18 +1442,21 @@ def make_alascan_plot(
width, height = 2000, 1000
fig = go.Figure(layout={"width": width, "height": height})
# add traces
# Delta HADDOCK score
fig.add_trace(
go.Bar(
x=df["full_resname"],
y=df["delta_score"],
error_y={"type": "data", "array": df["delta_score_std"]},
name="delta_score",
)
)

# Delta VdW
fig.add_trace(
go.Bar(
x=df["full_resname"],
y=df["delta_vdw"],
error_y={"type": "data", "array": df["delta_vdw_std"]},
name="delta_vdw",
)
)
Expand All @@ -1457,37 +1465,46 @@ def make_alascan_plot(
go.Bar(
x=df["full_resname"],
y=0.2 * df["delta_elec"],
error_y={"type": "data", "array": df["delta_elec_std"]},
name="delta_elec",
)
)

# Delta desolvation score
fig.add_trace(
go.Bar(
x=df["full_resname"],
y=df["delta_desolv"],
error_y={"type": "data", "array": df["delta_desolv_std"]},
name="delta_desolv",
)
)
# prettifying layout
fig.update_layout(
title=f"{scan_res} scanning cluster {clt_id}",
xaxis=dict(
title=dict(text="Residue Name", font=dict(size=16)),
tickfont_size=14,
tick0=df["full_resname"],
xaxis={
"title": {"text": "Residue Name", "font": {"size": 16}},
"tickfont_size": 14,
"tick0": df["full_resname"],
# in case we want to show less residues
# dtick=10,
),
yaxis=dict(
title=dict(text="Average Delta (WT - mutant)", font=dict(size=16)),
tickfont_size=14,
),
legend=dict(x=1.01, y=1.0, font_family="Helvetica", font_size=16),
# "dtick": 10,
},
yaxis={
"title": {
"text": "Average Delta (WT - mutant)",
"font": {"size": 16}
},
"tickfont_size": 14,
},
legend={
"x": 1.01, "y": 1.0,
"font_family": "Helvetica",
"font_size": 16
},
barmode="group",
bargap=0.05,
bargroupgap=0.05,
hovermode="x unified",
hoverlabel=dict(font_size=16, font_family="Helvetica"),
hoverlabel={"font_size": 16, "font_family": "Helvetica"},
)
for n in range(df.shape[0] - 1):
fig.add_vline(x=0.5 + n, line_color="gray", opacity=0.2)
Expand All @@ -1501,6 +1518,7 @@ def make_alascan_plot(
figure_width=width,
offline=offline,
)
return html_output_filename


def fig_to_html(
Expand Down
82 changes: 50 additions & 32 deletions src/haddock/modules/analysis/alascan/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,12 +271,13 @@ def alascan_cluster_analysis(models):
cl_id = native.clt_id
# unclustered models have cl_id = None
if cl_id is None:
cl_id = "-"
cl_id = "unclustered"
# Initiate key if this cluster id is encountered for the first time
if cl_id not in clt_scan:
clt_scan[cl_id] = {}
cl_pops[cl_id] = 1
else:
cl_pops[cl_id] += 1
cl_pops[cl_id] = 0
# Increase the population of that cluster
cl_pops[cl_id] += 1
# read the scan file
alascan_fname = f"scan_{native.file_name.removesuffix('.pdb')}.tsv"
df_scan = pd.read_csv(alascan_fname, sep="\t", comment="#")
Expand All @@ -293,44 +294,60 @@ def alascan_cluster_analysis(models):
delta_bsa = row['delta_bsa']
# add to the cluster data with the ident logic
ident = f"{chain}-{res}-{ori_resname}"
if ident not in clt_scan[cl_id]:
# Create variable with appropriate key
if ident not in clt_scan[cl_id].keys():
clt_scan[cl_id][ident] = {
'delta_score': delta_score,
'delta_vdw': delta_vdw,
'delta_elec': delta_elec,
'delta_desolv': delta_desolv,
'delta_bsa': delta_bsa,
'frac_pr': 1
'delta_score': [],
'delta_vdw': [],
'delta_elec': [],
'delta_desolv': [],
'delta_bsa': [],
'frac_pr': 0,
}
else:
clt_scan[cl_id][ident]['delta_score'] += delta_score
clt_scan[cl_id][ident]['delta_vdw'] += delta_vdw
clt_scan[cl_id][ident]['delta_elec'] += delta_elec
clt_scan[cl_id][ident]['delta_desolv'] += delta_desolv
clt_scan[cl_id][ident]['delta_bsa'] += delta_bsa
clt_scan[cl_id][ident]['frac_pr'] += 1
# now average the data
# Add data
clt_scan[cl_id][ident]['delta_score'].append(delta_score)
clt_scan[cl_id][ident]['delta_vdw'].append(delta_vdw)
clt_scan[cl_id][ident]['delta_elec'].append(delta_elec)
clt_scan[cl_id][ident]['delta_desolv'].append(delta_desolv)
clt_scan[cl_id][ident]['delta_bsa'].append(delta_bsa)
clt_scan[cl_id][ident]['frac_pr'] += 1
# now average the data for every cluster
for cl_id in clt_scan:
scan_clt_filename = f"scan_clt_{cl_id}.tsv"
log.info(f"Writing {scan_clt_filename}")
clt_data = []
# Loop over residues
for ident in clt_scan[cl_id]:
# Split identifyer to retrieve residue data
chain = ident.split("-")[0]
resnum = int(ident.split("-")[1])
resname = ident.split("-")[2]
frac_pr = clt_scan[cl_id][ident]["frac_pr"]
clt_data.append([chain, resnum, resname, ident,
clt_scan[cl_id][ident]['delta_score'] / frac_pr,
clt_scan[cl_id][ident]['delta_vdw'] / frac_pr,
clt_scan[cl_id][ident]['delta_elec'] / frac_pr,
clt_scan[cl_id][ident]['delta_desolv'] / frac_pr,
clt_scan[cl_id][ident]['delta_bsa'] / frac_pr,
clt_scan[cl_id][ident]['frac_pr'] / cl_pops[cl_id]
]
)
df_cols = ['chain', 'resnum', 'resname', 'full_resname', 'delta_score',
'delta_vdw', 'delta_elec', 'delta_desolv', 'delta_bsa',
'frac_pres']
# Point data for this specific residue
clt_res_dt = clt_scan[cl_id][ident]
# Compute averages and stddev and hold data.
clt_data.append([
chain,
resnum,
resname,
ident,
np.mean(clt_res_dt['delta_score']),
np.std(clt_res_dt['delta_score']),
np.mean(clt_res_dt['delta_vdw']),
np.std(clt_res_dt['delta_vdw']),
np.mean(clt_res_dt['delta_elec']),
np.std(clt_res_dt['delta_elec']),
np.mean(clt_res_dt['delta_desolv']),
np.std(clt_res_dt['delta_desolv']),
np.mean(clt_res_dt['delta_bsa']),
np.std(clt_res_dt['delta_bsa']),
clt_res_dt['frac_pr'] / cl_pops[cl_id],
])
df_cols = [
'chain', 'resnum', 'resname', 'full_resname',
'delta_score', 'delta_score_std', 'delta_vdw', 'delta_vdw_std',
'delta_elec', 'delta_elec_std', 'delta_desolv', 'delta_desolv_std',
'delta_bsa', 'delta_bsa_std', 'frac_pres',
]
df_scan_clt = pd.DataFrame(clt_data, columns=df_cols)
# adding clt-based Z score
df_scan_clt = add_zscores(df_scan_clt, 'delta_score')
Expand All @@ -347,6 +364,7 @@ def alascan_cluster_analysis(models):
with open(scan_clt_filename, 'w') as f:
f.write(f"{'#' * 80}{os.linesep}") # noqa E501
f.write(f"# `alascan` cluster results for cluster {cl_id}{os.linesep}") # noqa E501
f.write(f"# reported values are the average for the cluster{os.linesep}") # noqa E501
f.write(f"#{os.linesep}")
f.write(f"# z_score is calculated with respect to the mean values of all residues{os.linesep}") # noqa E501
f.write(f"{'#' * 80}{os.linesep}") # noqa E501
Expand Down
15 changes: 13 additions & 2 deletions tests/test_libplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,14 @@ def example_capri_ss_dashcluster():
def example_df_scan_clt():
"""Return example alascan clt DataFrame."""
example_clt_data = [
["A", 38, "ASP", "A-38-ASP", -2.0, -1.0, -0.4, -2.3, -0.5, -7.2, 1.0],
["A", 69, "LYS", "A-38-ASP", -0.0, 1.0, -0.4, 0.8, 0.5, -7.2, 1.0],
[
"A", 38, "ASP", "A-38-ASP", -2.0, -1.0, 0.1, -0.4, 0.04, -2.3,
0.23, -0.5, 0.05, -7.2, 0.72, 1.0,
],
[
"A", 69, "LYS", "A-38-ASP", -0.0, 1.0, 0.1, -0.4, 0.04, -2.3,
0.23, -0.5, 0.05, -7.2, 0.72, 1.0,
],
]
columns = [
"chain",
Expand All @@ -242,10 +248,15 @@ def example_df_scan_clt():
"full_resname",
"score",
"delta_score",
"delta_score_std",
"delta_vdw",
"delta_vdw_std",
"delta_elec",
"delta_elec_std",
"delta_desolv",
"delta_desolv_std",
"delta_bsa",
"delta_bsa_std",
"frac_pres",
]

Expand Down
8 changes: 4 additions & 4 deletions tests/test_module_alascan.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,8 @@ def test_run(
)

alascan.run()
assert Path(alascan.path, "scan_clt_-.tsv").exists()
assert Path(alascan.path, "scan_clt_-.html").exists()
assert Path(alascan.path, "scan_clt_unclustered.tsv").exists()
assert Path(alascan.path, "scan_clt_unclustered.html").exists()


def test_scanjob_run(scanjob_obj, mocker):
Expand Down Expand Up @@ -378,13 +378,13 @@ def test_alascan_cluster_analysis(protprot_input_list, scan_file, monkeypatch):
shutil.copy(scan_file, Path("scan_protprot_complex_2.tsv"))
alascan_cluster_analysis(protprot_input_list)

assert Path("scan_clt_-.tsv").exists()
assert Path("scan_clt_unclustered.tsv").exists()

protprot_input_list[1].clt_id = 1
alascan_cluster_analysis(protprot_input_list)

assert Path("scan_clt_1.tsv").exists()
assert Path("scan_clt_-.tsv").exists()
assert Path("scan_clt_unclustered.tsv").exists()


def test_create_alascan_plots(mocker, caplog):
Expand Down