Skip to content

Commit b9a1bf3

Browse files
committed
feat: add performance benchmarking tools
- Add tools/benchmarking/benchmark_geozarr.py for GeoZarr vs EOPF comparison - Add tools/benchmarking/benchmark_tile_performance.py for tile rendering benchmarks - Add workflows/examples/run-benchmark-test.yaml for Argo Workflows integration - CLI interface with JSON output for automation
1 parent da5ac25 commit b9a1bf3

File tree

3 files changed

+560
-0
lines changed

3 files changed

+560
-0
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/usr/bin/env python3
2+
"""Automated GeoZarr vs EOPF performance comparison.
3+
4+
Measures load time and memory usage comparing original EOPF Zarr format
5+
against optimized GeoZarr format.
6+
7+
Usage:
8+
benchmark_geozarr.py --eopf-url s3://... --geozarr-url s3://... --output results.json
9+
"""
10+
11+
import argparse
12+
import json
13+
import logging
14+
import sys
15+
import time
16+
from dataclasses import asdict, dataclass
17+
from pathlib import Path
18+
19+
import xarray as xr
20+
21+
logging.basicConfig(level=logging.INFO)
22+
logger = logging.getLogger(__name__)
23+
24+
25+
@dataclass
26+
class BenchmarkResult:
27+
"""Performance measurement result."""
28+
29+
format_type: str # "eopf" or "geozarr"
30+
dataset_url: str
31+
load_time_seconds: float
32+
dataset_size_mb: float
33+
num_variables: int
34+
chunk_sizes: dict[str, tuple[int, ...]]
35+
36+
37+
def benchmark_load_time(dataset_url: str, format_type: str) -> BenchmarkResult:
38+
"""Measure dataset load time and basic metrics."""
39+
logger.info(f"Benchmarking {format_type}: {dataset_url}")
40+
41+
start = time.perf_counter()
42+
ds = xr.open_zarr(dataset_url, consolidated=True)
43+
load_time = time.perf_counter() - start
44+
45+
# Collect metrics
46+
chunks = {var: ds[var].chunks for var in list(ds.data_vars)[:3]} # Sample 3 vars
47+
size_mb = sum(var.nbytes for var in ds.data_vars.values()) / 1024 / 1024
48+
49+
result = BenchmarkResult(
50+
format_type=format_type,
51+
dataset_url=dataset_url,
52+
load_time_seconds=round(load_time, 3),
53+
dataset_size_mb=round(size_mb, 2),
54+
num_variables=len(ds.data_vars),
55+
chunk_sizes=chunks,
56+
)
57+
58+
ds.close()
59+
logger.info(f"✓ {format_type} load time: {load_time:.3f}s")
60+
return result
61+
62+
63+
def compare_results(eopf: BenchmarkResult, geozarr: BenchmarkResult) -> dict:
64+
"""Generate comparison summary."""
65+
speedup = (
66+
eopf.load_time_seconds / geozarr.load_time_seconds if geozarr.load_time_seconds > 0 else 0
67+
)
68+
69+
return {
70+
"eopf": asdict(eopf),
71+
"geozarr": asdict(geozarr),
72+
"comparison": {
73+
"speedup_factor": round(speedup, 2),
74+
"time_saved_seconds": round(eopf.load_time_seconds - geozarr.load_time_seconds, 3),
75+
"faster_format": "geozarr" if speedup > 1 else "eopf",
76+
},
77+
}
78+
79+
80+
def main(argv: list[str] | None = None) -> int:
81+
parser = argparse.ArgumentParser(description="Benchmark GeoZarr vs EOPF performance")
82+
parser.add_argument("--eopf-url", required=True, help="URL to EOPF Zarr dataset")
83+
parser.add_argument("--geozarr-url", required=True, help="URL to GeoZarr dataset")
84+
parser.add_argument("--output", type=Path, help="Output JSON file path")
85+
parser.add_argument("--verbose", action="store_true")
86+
87+
args = parser.parse_args(argv)
88+
89+
if args.verbose:
90+
logging.getLogger().setLevel(logging.DEBUG)
91+
92+
try:
93+
# Run benchmarks
94+
eopf_result = benchmark_load_time(args.eopf_url, "eopf")
95+
geozarr_result = benchmark_load_time(args.geozarr_url, "geozarr")
96+
97+
# Generate comparison
98+
results = compare_results(eopf_result, geozarr_result)
99+
100+
# Write output
101+
if args.output:
102+
args.output.parent.mkdir(parents=True, exist_ok=True)
103+
args.output.write_text(json.dumps(results, indent=2))
104+
logger.info(f"Results written to: {args.output}")
105+
106+
# Print summary
107+
print(json.dumps(results, indent=2))
108+
109+
speedup = results["comparison"]["speedup_factor"]
110+
if speedup > 1:
111+
logger.info(f"✅ GeoZarr is {speedup}x faster than EOPF")
112+
else:
113+
logger.warning(f"⚠️ EOPF is {1 / speedup:.2f}x faster than GeoZarr")
114+
115+
return 0
116+
117+
except Exception as e:
118+
logger.error(f"Benchmark failed: {e}")
119+
return 1
120+
121+
122+
if __name__ == "__main__":
123+
sys.exit(main())

0 commit comments

Comments
 (0)