Skip to content

Commit f88fb29

Browse files
committed
refactor(benchmark): move plot generation to analyze_combined, remove per-granularity plots
1 parent 200ff4f commit f88fb29

File tree

2 files changed

+183
-170
lines changed

2 files changed

+183
-170
lines changed

benchmark/bear/analyze_combined.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
from pathlib import Path
2+
3+
import matplotlib
4+
matplotlib.use("Agg")
5+
import matplotlib.pyplot as plt
6+
from matplotlib.axes import Axes
7+
from matplotlib.patches import Patch
8+
import numpy as np
9+
from rich.console import Console
10+
11+
from analyze_results import (
12+
DATA_DIR,
13+
_format_log_axis,
14+
_save_plot,
15+
load_measured_ostrich_results,
16+
load_ostrich_dm_by_version,
17+
load_ostrich_vm_by_version,
18+
load_ostrich_vq_median,
19+
load_results,
20+
load_tal_dm_by_version,
21+
load_tal_vm_by_version,
22+
load_tal_vq_median,
23+
)
24+
25+
console = Console()
26+
27+
# Line styles: system distinguished by color, granularity by dash pattern.
28+
# No markers. Dashes for hourly use long on/off segments to stay readable.
29+
STYLES = {
30+
"tal_daily": {"color": "#0072B2", "linestyle": "-", "label": "TAL daily (89 ver.)"},
31+
"tal_hourly": {"color": "#0072B2", "linestyle": (0, (8, 4)), "label": "TAL hourly (1,299 ver.)"},
32+
"ost_daily": {"color": "#D55E00", "linestyle": "-", "label": "OSTRICH daily (89 ver.)"},
33+
"ost_hourly": {"color": "#D55E00", "linestyle": (0, (8, 4)), "label": "OSTRICH hourly (1,299 ver.)"},
34+
}
35+
36+
37+
def _load_granularity_data(granularity: str) -> tuple[dict, list[Path]]:
38+
results_file = DATA_DIR / f"benchmark_results_{granularity}.json"
39+
ostrich_results_file = DATA_DIR / f"ostrich_benchmark_results_{granularity}.json"
40+
ostrich_raw_files = [DATA_DIR / f"ostrich_raw_{pt}_{granularity}.txt" for pt in ["p", "po"]]
41+
data = load_results(results_file)
42+
load_measured_ostrich_results(ostrich_results_file)
43+
return data, ostrich_raw_files
44+
45+
46+
def _normalize_keys(data: dict[int, float]) -> tuple[list[float], list[float]]:
47+
versions = sorted(data.keys())
48+
max_v = max(versions)
49+
pct = [v / max_v * 100 for v in versions]
50+
vals = [data[v] for v in versions]
51+
return pct, vals
52+
53+
54+
def _plot_line(ax: Axes, pct: list[float], vals: list[float], style_key: str) -> None:
55+
s = STYLES[style_key]
56+
ax.plot(pct, vals, color=s["color"], linestyle=s["linestyle"],
57+
linewidth=1.5, label=s["label"])
58+
59+
60+
def _plot_line_chart(ax: Axes,
61+
daily_data: dict[int, float], hourly_data: dict[int, float],
62+
daily_ost_data: dict[int, float] | None,
63+
hourly_ost_data: dict[int, float] | None) -> None:
64+
pct_d, vals_d = _normalize_keys(daily_data)
65+
pct_h, vals_h = _normalize_keys(hourly_data)
66+
_plot_line(ax, pct_d, vals_d, "tal_daily")
67+
_plot_line(ax, pct_h, vals_h, "tal_hourly")
68+
if daily_ost_data:
69+
pct, vals = _normalize_keys(daily_ost_data)
70+
_plot_line(ax, pct, vals, "ost_daily")
71+
if hourly_ost_data:
72+
pct, vals = _normalize_keys(hourly_ost_data)
73+
_plot_line(ax, pct, vals, "ost_hourly")
74+
75+
76+
def plot_vm_combined(daily_data: dict, daily_ost: list[Path],
77+
hourly_data: dict, hourly_ost: list[Path],
78+
plot_dir: Path) -> None:
79+
fig, ax = plt.subplots(figsize=(8, 5))
80+
ost_d = load_ostrich_vm_by_version(daily_ost) if any(f.exists() for f in daily_ost) else None
81+
ost_h = load_ostrich_vm_by_version(hourly_ost) if any(f.exists() for f in hourly_ost) else None
82+
_plot_line_chart(ax,
83+
load_tal_vm_by_version(daily_data["results"]["vm"]),
84+
load_tal_vm_by_version(hourly_data["results"]["vm"]),
85+
ost_d, ost_h)
86+
_format_log_axis(ax)
87+
ax.set_xlabel("Version (% of total)")
88+
ax.set_ylabel("Lookup time (ms)")
89+
ax.set_title("VM: median across all triple patterns")
90+
ax.legend(fontsize=9, handlelength=3)
91+
ax.grid(True, alpha=0.3)
92+
fig.tight_layout()
93+
_save_plot(fig, plot_dir, "vm_comparison")
94+
95+
96+
def plot_dm_combined(daily_data: dict, daily_ost: list[Path],
97+
hourly_data: dict, hourly_ost: list[Path],
98+
plot_dir: Path) -> None:
99+
fig, ax = plt.subplots(figsize=(8, 5))
100+
ost_d = load_ostrich_dm_by_version(daily_ost) if any(f.exists() for f in daily_ost) else None
101+
ost_h = load_ostrich_dm_by_version(hourly_ost) if any(f.exists() for f in hourly_ost) else None
102+
_plot_line_chart(ax,
103+
load_tal_dm_by_version(daily_data["results"]["dm"]),
104+
load_tal_dm_by_version(hourly_data["results"]["dm"]),
105+
ost_d, ost_h)
106+
_format_log_axis(ax)
107+
ax.set_xlabel("Delta target version (% of total)")
108+
ax.set_ylabel("Lookup time (ms)")
109+
ax.set_title("DM: median across all triple patterns from V0")
110+
ax.legend(fontsize=9, handlelength=3)
111+
ax.grid(True, alpha=0.3)
112+
fig.tight_layout()
113+
_save_plot(fig, plot_dir, "dm_comparison")
114+
115+
116+
def plot_vq_combined(daily_data: dict, daily_ost: list[Path],
117+
hourly_data: dict, hourly_ost: list[Path],
118+
plot_dir: Path) -> None:
119+
fig, ax = plt.subplots(figsize=(7, 5))
120+
121+
has_ost_d = any(f.exists() for f in daily_ost)
122+
has_ost_h = any(f.exists() for f in hourly_ost)
123+
124+
groups = ["TAL"]
125+
if has_ost_d or has_ost_h:
126+
groups.append("OSTRICH")
127+
x = np.arange(len(groups))
128+
width = 0.35
129+
130+
daily_vals = [load_tal_vq_median(daily_data["results"]["vq"])]
131+
hourly_vals = [load_tal_vq_median(hourly_data["results"]["vq"])]
132+
if has_ost_d:
133+
daily_vals.append(load_ostrich_vq_median(daily_ost))
134+
if has_ost_h:
135+
hourly_vals.append(load_ostrich_vq_median(hourly_ost))
136+
137+
# Legend uses neutral gray so it does not imply a specific system color.
138+
# Bars themselves use per-system colors on the x-axis labels.
139+
bars_d = ax.bar(x - width / 2, daily_vals, width,
140+
color=[("#0072B2", "#D55E00")[i] for i in range(len(groups))],
141+
edgecolor="black")
142+
bars_h = ax.bar(x + width / 2, hourly_vals, width,
143+
color=[("#0072B2", "#D55E00")[i] for i in range(len(groups))],
144+
edgecolor="black", hatch="//")
145+
146+
legend_handles = [
147+
Patch(facecolor="white", edgecolor="black", label="Daily (89 ver.)"),
148+
Patch(facecolor="white", edgecolor="black", hatch="//", label="Hourly (1,299 ver.)"),
149+
]
150+
ax.legend(handles=legend_handles, fontsize=9)
151+
152+
for bars in [bars_d, bars_h]:
153+
for bar in bars:
154+
val = bar.get_height()
155+
ax.text(bar.get_x() + bar.get_width() / 2, val, f"{val:.2f}",
156+
ha="center", va="bottom", fontsize=8)
157+
158+
ax.set_xticks(x)
159+
ax.set_xticklabels(groups)
160+
_format_log_axis(ax)
161+
ax.set_ylabel("Lookup time (ms)")
162+
ax.set_title("VQ: median across all triple patterns")
163+
ax.grid(True, alpha=0.3, axis="y")
164+
fig.tight_layout()
165+
_save_plot(fig, plot_dir, "vq_comparison")
166+
167+
168+
def main() -> None:
169+
plot_dir = DATA_DIR / "analysis" / "combined" / "plots"
170+
console.rule("[bold]Loading data")
171+
172+
daily_data, daily_ost = _load_granularity_data("daily")
173+
hourly_data, hourly_ost = _load_granularity_data("hourly")
174+
175+
console.rule("[bold]Generating combined plots")
176+
plot_vm_combined(daily_data, daily_ost, hourly_data, hourly_ost, plot_dir)
177+
plot_dm_combined(daily_data, daily_ost, hourly_data, hourly_ost, plot_dir)
178+
plot_vq_combined(daily_data, daily_ost, hourly_data, hourly_ost, plot_dir)
179+
console.print("[bold green]Done.[/bold green]")
180+
181+
182+
if __name__ == "__main__":
183+
main()

benchmark/bear/analyze_results.py

Lines changed: 0 additions & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -318,171 +318,6 @@ def _save_plot(fig: Figure, plot_dir: Path, name: str) -> None:
318318
console.print(f" Saved: {plot_dir / name}.{{pdf,jpg}}")
319319

320320

321-
def plot_vm_comparison(tal_vm: List[dict], ostrich_raw_files: List[Path], plot_dir: Path) -> None:
322-
tal_data = load_tal_vm_by_version(tal_vm)
323-
fig, ax = plt.subplots(figsize=(8, 5))
324-
versions = sorted(tal_data.keys())
325-
ax.plot(versions, [tal_data[v] for v in versions], label="TAL", marker="", linewidth=1.5)
326-
if any(f.exists() for f in ostrich_raw_files):
327-
ost_data = load_ostrich_vm_by_version(ostrich_raw_files)
328-
ost_versions = sorted(ost_data.keys())
329-
ax.plot(ost_versions, [ost_data[v] for v in ost_versions], label="OSTRICH", marker="", linewidth=1.5)
330-
_format_log_axis(ax)
331-
ax.set_xlabel("Version")
332-
ax.set_ylabel("Lookup time (ms)")
333-
ax.set_title("VM: median across all triple patterns")
334-
ax.legend()
335-
ax.grid(True, alpha=0.3)
336-
_save_plot(fig, plot_dir, "vm_comparison")
337-
338-
339-
def plot_dm_comparison(tal_dm: List[dict], ostrich_raw_files: List[Path], plot_dir: Path) -> None:
340-
tal_data = load_tal_dm_by_version(tal_dm)
341-
fig, ax = plt.subplots(figsize=(8, 5))
342-
versions = sorted(tal_data.keys())
343-
ax.plot(versions, [tal_data[v] for v in versions], label="TAL", marker="o", linewidth=1.5, markersize=4)
344-
if any(f.exists() for f in ostrich_raw_files):
345-
ost_data = load_ostrich_dm_by_version(ostrich_raw_files)
346-
ost_versions = sorted(ost_data.keys())
347-
ax.plot(ost_versions, [ost_data[v] for v in ost_versions], label="OSTRICH", marker="", linewidth=1.5)
348-
_format_log_axis(ax)
349-
ax.set_xlabel("Version (delta from V0)")
350-
ax.set_ylabel("Lookup time (ms)")
351-
ax.set_title("DM: median across all triple patterns from V0")
352-
ax.legend()
353-
ax.grid(True, alpha=0.3)
354-
_save_plot(fig, plot_dir, "dm_comparison")
355-
356-
357-
def plot_vq_comparison(tal_vq: List[dict], ostrich_raw_files: List[Path], plot_dir: Path) -> None:
358-
systems = ["TAL"]
359-
values = [load_tal_vq_median(tal_vq)]
360-
if any(f.exists() for f in ostrich_raw_files):
361-
systems.append("OSTRICH")
362-
values.append(load_ostrich_vq_median(ostrich_raw_files))
363-
fig, ax = plt.subplots(figsize=(6, 5))
364-
bars = ax.bar(systems, values)
365-
for bar, val in zip(bars, values):
366-
ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), f"{val:.2f}",
367-
ha="center", va="bottom", fontsize=9)
368-
_format_log_axis(ax)
369-
ax.set_ylabel("Lookup time (ms)")
370-
ax.set_title("VQ: median across all triple patterns")
371-
ax.grid(True, alpha=0.3, axis="y")
372-
_save_plot(fig, plot_dir, "vq_comparison")
373-
374-
375-
def plot_by_pattern(tal_results: List[dict], ostrich_raw_files: List[Path],
376-
query_type: str, load_tal_fn, load_ost_fn, plot_dir: Path,
377-
x_label: str, version_key: str) -> None:
378-
has_ostrich = any(f.exists() for f in ostrich_raw_files)
379-
fig, axes = plt.subplots(1, 2, figsize=(14, 5), sharey=True)
380-
for i, pt in enumerate(["p", "po"]):
381-
ax = axes[i]
382-
tal_data = load_tal_fn(tal_results, pattern_filter=pt)
383-
versions = sorted(tal_data.keys())
384-
ax.plot(versions, [tal_data[v] for v in versions], label="TAL", linewidth=1.5)
385-
if has_ostrich:
386-
ost_data = load_ost_fn(ostrich_raw_files, pattern_filter=pt)
387-
ost_versions = sorted(ost_data.keys())
388-
ax.plot(ost_versions, [ost_data[v] for v in ost_versions], label="OSTRICH", linewidth=1.5)
389-
_format_log_axis(ax)
390-
ax.set_xlabel(x_label)
391-
ax.set_title(f"?{'P?' if pt == 'p' else 'PO'} patterns")
392-
ax.legend()
393-
ax.grid(True, alpha=0.3)
394-
axes[0].set_ylabel("Lookup time (ms)")
395-
fig.suptitle(f"{query_type.upper()}: median by pattern type", fontsize=13)
396-
fig.tight_layout()
397-
_save_plot(fig, plot_dir, f"{query_type}_by_pattern")
398-
399-
400-
def generate_plots(data: dict, ostrich_raw_files: List[Path], plot_dir: Path,
401-
disk_usage: dict[str, int | None] | None = None) -> None:
402-
results = data.get("results", {})
403-
vm_results = results.get("vm", [])
404-
dm_results = results.get("dm", [])
405-
vq_results = results.get("vq", [])
406-
407-
if vm_results:
408-
plot_vm_comparison(vm_results, ostrich_raw_files, plot_dir)
409-
plot_by_pattern(vm_results, ostrich_raw_files, "vm",
410-
load_tal_vm_by_version, load_ostrich_vm_by_version,
411-
plot_dir, "Version", "version_index")
412-
if dm_results:
413-
plot_dm_comparison(dm_results, ostrich_raw_files, plot_dir)
414-
plot_by_pattern(dm_results, ostrich_raw_files, "dm",
415-
load_tal_dm_by_version, load_ostrich_dm_by_version,
416-
plot_dir, "Version (delta from V0)", "version_end")
417-
if vq_results:
418-
plot_vq_comparison(vq_results, ostrich_raw_files, plot_dir)
419-
420-
# Storage and memory
421-
if disk_usage:
422-
plot_storage_comparison(disk_usage, plot_dir)
423-
plot_memory_comparison(data, plot_dir)
424-
425-
426-
def plot_storage_comparison(disk_usage: dict[str, int | None], plot_dir: Path) -> None:
427-
ocdm_ds = disk_usage["ocdm_dataset_bytes"]
428-
ocdm_prov = disk_usage["ocdm_provenance_bytes"]
429-
qlever = disk_usage["qlever_index_bytes"]
430-
ostrich = disk_usage["ostrich_store_bytes"]
431-
432-
if ocdm_ds is None and qlever is None and ostrich is None:
433-
console.print(" [dim]Skipping storage_comparison (no data)[/dim]")
434-
return
435-
436-
fig, ax = plt.subplots(figsize=(6, 5))
437-
systems = []
438-
sizes_mb = []
439-
440-
if ocdm_ds is not None or qlever is not None:
441-
ocdm_total = (ocdm_ds or 0) + (ocdm_prov or 0)
442-
tal_total = ocdm_total + (qlever or 0)
443-
systems.append("TAL\n(OCDM + QLever)")
444-
sizes_mb.append(tal_total / 1048576)
445-
446-
if ostrich is not None:
447-
systems.append("OSTRICH")
448-
sizes_mb.append(ostrich / 1048576)
449-
450-
bars = ax.bar(systems, sizes_mb, color=["#1f77b4", "#ff7f0e"][:len(systems)])
451-
for bar, val in zip(bars, sizes_mb):
452-
ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), f"{val:.1f} MB",
453-
ha="center", va="bottom", fontsize=9)
454-
ax.set_ylabel("Storage (MB)")
455-
ax.set_title("Storage comparison")
456-
ax.grid(True, alpha=0.3, axis="y")
457-
_save_plot(fig, plot_dir, "storage_comparison")
458-
459-
460-
def plot_memory_comparison(data: dict, plot_dir: Path) -> None:
461-
results = data.get("results", {})
462-
query_types = []
463-
medians_kb = []
464-
for qt in ["vm", "dm", "vq"]:
465-
qt_results = results.get(qt, [])
466-
valid = [r["median_memory_bytes"] for r in qt_results if r.get("median_memory_bytes") is not None]
467-
if valid:
468-
query_types.append(qt.upper())
469-
medians_kb.append(statistics.median(valid) / 1024)
470-
471-
if not query_types:
472-
console.print(" [dim]Skipping memory_comparison (no data)[/dim]")
473-
return
474-
475-
fig, ax = plt.subplots(figsize=(6, 5))
476-
bars = ax.bar(query_types, medians_kb)
477-
for bar, val in zip(bars, medians_kb):
478-
ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), f"{val:.0f} KB",
479-
ha="center", va="bottom", fontsize=9)
480-
ax.set_ylabel("Peak memory (KB)")
481-
ax.set_title("TAL median peak memory by query type")
482-
ax.grid(True, alpha=0.3, axis="y")
483-
_save_plot(fig, plot_dir, "memory_comparison")
484-
485-
486321
def generate_comparison_table(tal_results: dict, ocdm_timing_file: Path, qlever_timing_file: Path) -> List[dict]:
487322
rows = []
488323
for system_name, published in PUBLISHED_RESULTS.items():
@@ -782,11 +617,6 @@ def main():
782617
console.print()
783618
print_comparison_table(comparison)
784619

785-
console.rule("[bold]Generating plots")
786-
ostrich_raw_files = [DATA_DIR / f"ostrich_raw_{pt}_{args.granularity}.txt" for pt in ["p", "po"]]
787-
plot_dir = output_dir / "plots"
788-
generate_plots(data, ostrich_raw_files, plot_dir, disk_usage)
789-
790620

791621
if __name__ == "__main__":
792622
main()

0 commit comments

Comments
 (0)