Skip to content

Performance badges #17

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
binary:
runs-on: "ubuntu-latest"
container:
image: "ghcr.io/tarantool/sdvg-ci:0.0.1"
image: "ghcr.io/tarantool/sdvg-ci:0.0.2"
strategy:
matrix:
os_family: ["darwin", "linux"]
Expand All @@ -38,7 +38,7 @@ jobs:
docker:
runs-on: "ubuntu-latest"
container:
image: "ghcr.io/tarantool/sdvg-ci:0.0.1"
image: "ghcr.io/tarantool/sdvg-ci:0.0.2"
strategy:
matrix:
os_family: ["linux"]
Expand Down
60 changes: 54 additions & 6 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
lint:
runs-on: "ubuntu-latest"
container:
image: "ghcr.io/tarantool/sdvg-ci:0.0.1"
image: "ghcr.io/tarantool/sdvg-ci:0.0.2"
steps:
- uses: "actions/checkout@v4"

Expand All @@ -34,7 +34,7 @@ jobs:
unit:
runs-on: "ubuntu-latest"
container:
image: "ghcr.io/tarantool/sdvg-ci:0.0.1"
image: "ghcr.io/tarantool/sdvg-ci:0.0.2"
steps:
- uses: "actions/checkout@v4"

Expand All @@ -46,7 +46,7 @@ jobs:
cover:
runs-on: "ubuntu-latest"
container:
image: "ghcr.io/tarantool/sdvg-ci:0.0.1"
image: "ghcr.io/tarantool/sdvg-ci:0.0.2"
steps:
- uses: "actions/checkout@v4"

Expand All @@ -64,10 +64,58 @@ jobs:

performance:
runs-on: "ubuntu-latest"
env:
BENCH_MASTER_ARTIFACT_KEY: "bench-master"
BENCH_MASTER_INFO_DIR: "bench-master-info"
BENCH_MASTER_FILE_PATH: "bench-master-info/benchmark-master.txt"
BENCH_MASTER_SHA_FILE_PATH: "bench-master-info/benchmark-master-sha.txt"
container:
image: "ghcr.io/tarantool/sdvg-ci:0.0.1"
image: "ghcr.io/tarantool/sdvg-ci:0.0.2"

steps:
- uses: "actions/checkout@v4"

- name: "Run benchmarks"
run: "make test/performance | tee performance.out"
- name: "Download master benchmark artifact"
uses: "dawidd6/action-download-artifact@v11"
with:
github_token: "${{ secrets.GITHUB_TOKEN }}"
branch: "${{ github.event.repository.default_branch }}"
if_no_artifact_found: "warn"
allow_forks: false
name: "${{ env.BENCH_MASTER_ARTIFACT_KEY }}"
path: "${{ env.BENCH_MASTER_INFO_DIR }}"

- name: "Run benchmarks on current branch"
run: "make test/performance | tee benchmark.txt; exit ${PIPESTATUS[0]}"

- name: "Make comparison report"
run: |
python ./build/ci/compare_benchmarks.py \
--old-commit-sha-path "$BENCH_MASTER_SHA_FILE_PATH" \
"$BENCH_MASTER_FILE_PATH" \
benchmark.txt \
>> performance-report.md

cat performance-report.md >> $GITHUB_STEP_SUMMARY

- uses: "mshick/add-pr-comment@v2"
if: "${{ github.event_name == 'pull_request' }}"
with:
message-path: "performance-report.md"
message-id: "perf-report-pr-${{ github.event.pull_request.number }}"
refresh-message-position: true

- name: "Prepare master benchmark info for uploading as artifact"
if: "${{ github.ref_name == github.event.repository.default_branch }}"
run: |
mkdir -p ${{ env.BENCH_MASTER_INFO_DIR }}
mv benchmark.txt "${{ env.BENCH_MASTER_FILE_PATH }}"
echo "${GITHUB_SHA:0:7}" > ${{ env.BENCH_MASTER_SHA_FILE_PATH }}

- name: "Upload master benchmark artifact"
if: "${{ github.ref_name == github.event.repository.default_branch }}"
uses: "actions/upload-artifact@v4"
with:
name: "${{ env.BENCH_MASTER_ARTIFACT_KEY }}"
path: "${{ env.BENCH_MASTER_INFO_DIR }}"

2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ test/cover:
go tool cover -html=coverage.out -o coverage.html

test/performance:
go test -run=^$$ -bench=. -cpu 4 ./...
go test -run=^$$ -bench=. -count=2 -cpu 4 ./...

include ./build/package/Makefile
include ./build/ci/Makefile
Expand Down
14 changes: 12 additions & 2 deletions build/ci/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,15 @@ WORKDIR /tmp
# Install dependencies

RUN apk update \
&& apk add --update --no-cache bash curl git make gcc musl-dev docker
&& apk add --update --no-cache bash curl git make gcc musl-dev docker tar

# Configure python

RUN apk add --no-cache python3 py3-pip \
&& python3 -m venv /venv \
&& /venv/bin/pip install --upgrade pip setuptools wheel

ENV PATH="/venv/bin:$PATH"

# Configure Go

Expand All @@ -32,6 +40,8 @@ WORKDIR /sdvg

COPY ./go.mod ./go.mod
COPY ./go.sum ./go.sum
COPY ./build/ci/requirements.txt ./requirements.txt

RUN git config --global --add safe.directory /sdvg \
&& go mod download
&& go mod download \
&& pip install -r ./requirements.txt
2 changes: 1 addition & 1 deletion build/ci/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Arguments

ci_image = ghcr.io/tarantool/sdvg-ci:0.0.1
ci_image = ghcr.io/tarantool/sdvg-ci:0.0.2

# Targets

Expand Down
227 changes: 227 additions & 0 deletions build/ci/compare_benchmarks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
import argparse
import re
import statistics
import os
import textwrap
from collections import OrderedDict
from typing import Dict, Tuple, List, Literal, Optional

import pandas as pd

METRICS = {
'MB/s': {'name': 'B/s', 'good_direction': 'up', 'scale': 2 ** 20},
'values/s': {'good_direction': 'up'},
# 'ns/op': {'name': 's/op', 'good_direction': 'down', 'scale': 1e-9},
# 'rows/s': {'good_direction': 'up'},
}

EMOJIS = {
'good': '⚡️',
'bad': '💔'
}


def format_benchmark_name(name: str) -> str:
name = name.replace("Benchmark", "")
name = name.replace("/CI/", "/")

parts = name.split("/")
if len(parts) == 1:
return parts[0]

base_name = " ".join(parts[:-1])
params_split = parts[-1].split("-")

params = []
for i in range(0, len(params_split) - 1, 2):
params.append(f"{params_split[i]}={params_split[i + 1]}")

if params:
return f"{base_name} ({', '.join(params)})"
else:
return base_name


def parse_bench_line(line: str) -> Tuple[Optional[str], Optional[Dict[str, float]]]:
"""parses `go test -bench` results output.
Example:

BenchmarkPartitioning/CI/cpu-4 2569041 475.5 ns/op 218.73 MB/s 8412793 rows/s 16825587 values/s

result:
('Partitioning (cpu=4)', {'ns/op': 475.5, 'MB/s': 218.73, 'rows/s': 8412793, 'values/s': 16825587}
"""

parts = re.split(r'\s+', line.strip())
if len(parts) < 3 or not parts[0].startswith("Benchmark") or "/CI/" not in parts[0]:
return None, None

bench_name = format_benchmark_name(parts[0])

metrics = {}
for value, metric in zip(parts[2::2], parts[3::2]):
if metric not in METRICS:
continue
try:
metrics[metric] = float(value)
except ValueError:
raise ValueError(f"Failed to parse value '{value}' for '{metric}'")

return bench_name, metrics


def parse_metrics_file(path: str) -> Dict[str, Dict[str, List[float]]]:
results = {}

with open(path) as f:
for line in f:
name_test, metrics = parse_bench_line(line)
if name_test is None:
continue

if not metrics:
continue

if name_test not in results:
results[name_test] = {m: [] for m in METRICS.keys()}

for metric_name, value in metrics.items():
results[name_test][metric_name].append(value)

return results


def aggregate_results(
parsed_metrics: Dict[str, Dict[str, List[float]]],
method: Literal["mean", "median"]
) -> OrderedDict[str, Dict[str, float]]:
aggregated: OrderedDict[str, Dict[str, float]] = OrderedDict()

for bench_name, metrics in parsed_metrics.items():
aggregated[bench_name] = {}

for m, values in metrics.items():
if method == "median":
aggregated[bench_name][m] = statistics.median(values)
elif method == "mean":
aggregated[bench_name][m] = statistics.mean(values)

return aggregated


def humanize_number(val: float, scale: float) -> str:
if val is None:
return "?"

val = val * scale
abs_val = abs(val)
if abs_val >= 1_000_000:
return f"{val / 1_000_000:.2f}M"
elif abs_val >= 1_000:
return f"{val / 1_000:.2f}K"
else:
return f"{val:.2f}"


def format_metric_changes(metric_name: str, old_val, new_val: Optional[float], alert_threshold: float) -> str:
old_val_str = humanize_number(old_val, METRICS[metric_name].get('scale', 1))
new_val_str = humanize_number(new_val, METRICS[metric_name].get('scale', 1))

if old_val is None or new_val is None:
suffix = " ⚠️"
else:
change_pct = (new_val / old_val - 1) * 100
suffix = f" ({change_pct:+.2f}%)"

if abs(change_pct) >= alert_threshold:
is_better = METRICS[metric_name].get('good_direction') == 'up' and change_pct > 0
suffix += f" {EMOJIS['good'] if is_better else EMOJIS['bad']}"

return f"{old_val_str} → {new_val_str}{suffix}"


def compare_benchmarks_df(old_metrics, new_metrics, alert_threshold=None):
if old_metrics is None:
old_metrics = {}

if new_metrics is None:
new_metrics = {}

all_metrics = OrderedDict()
all_metrics.update(old_metrics)
all_metrics.update(new_metrics)

df = pd.DataFrame(columns=["Benchmark"] + [v.get('name', k) for k, v in METRICS.items()])

for bench_name in all_metrics.keys():
row = {"Benchmark": bench_name}

for metric_name, metric_params in METRICS.items():
old_val = old_metrics.get(bench_name, {}).get(metric_name, None)
new_val = new_metrics.get(bench_name, {}).get(metric_name, None)
row[metric_params.get('name', metric_name)] = format_metric_changes(
metric_name, old_val, new_val, alert_threshold
)

df.loc[len(df)] = row

return df.to_markdown(index=False)


def build_report_header(old_file, sha_file: str) -> str:
event_name = os.environ.get("GITHUB_EVENT_NAME", "")
base_branch = os.environ.get("GITHUB_DEFAULT_BRANCH", "master")

warning = ""
if not os.path.exists(old_file):
warning = textwrap.dedent("""
> [!WARNING]
> No test results found for master branch. Please run workflow on master first to compare results.
""").strip()

if event_name == "pull_request":
pr_branch = os.environ.get("GITHUB_HEAD_REF", "")
header_ending = f"`{pr_branch}`" if not os.path.exists(old_file) else f"`{base_branch}` VS `{pr_branch}`"
else:
if not os.path.exists(old_file):
header_ending = f"`{base_branch}`"
else:
prev_master_sha = "(sha not found)"
if sha_file and os.path.exists(sha_file):
with open(sha_file) as f:
prev_master_sha = f.read().strip()

commit_sha = os.environ.get("GITHUB_SHA", "")[:7]
header_ending = f"`{base_branch} {prev_master_sha}` VS `{base_branch} {commit_sha}`"

header = f"# Perf tests report: {header_ending}\n"
return f"{warning}\n\n{header}" if warning else header


def main():
parser = argparse.ArgumentParser(description="Compare go test -bench results in markdown format")
parser.add_argument(
"--alert-threshold", type=float, default=7,
help="Percent change threshold for adding emoji alerts"
)
parser.add_argument(
"--aggregation", choices=["mean", "median"], default="mean",
help="Aggregation method for multiple runs of the same benchmark"
)
parser.add_argument("--old-commit-sha-path", help="Path to file with sha commit of the old benchmark")
parser.add_argument("old_file", help="Path to old benchmark results file", nargs='?', default="")
parser.add_argument("new_file", help="Path to new benchmark results file")
args = parser.parse_args()

old_metrics = None
if args.old_file and os.path.exists(args.old_file):
old_metrics = aggregate_results(parse_metrics_file(args.old_file), args.aggregation)

new_metrics = aggregate_results(parse_metrics_file(args.new_file), args.aggregation)

print(build_report_header(args.old_file, args.old_commit_sha_path))
print(compare_benchmarks_df(old_metrics, new_metrics, alert_threshold=args.alert_threshold))


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions build/ci/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pandas==2.3.1
tabulate==0.9.0
Loading