Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
732 changes: 457 additions & 275 deletions data/arrow.go

Large diffs are not rendered by default.

794 changes: 794 additions & 0 deletions data/arrow_bench_test.go

Large diffs are not rendered by default.

135 changes: 69 additions & 66 deletions data/arrow_column_builders.go

Large diffs are not rendered by default.

90 changes: 90 additions & 0 deletions data/benchmark-compare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/bin/bash
set -e

# Script to compare benchmark performance between branches using best practices
# Usage: ./benchmark-compare.sh [base-branch] [feature-branch] [count]
#
# Best practices implemented:
# - Uses -count=10 for statistical significance (can override with 3rd arg)
# - Clears build cache between runs
# - Stabilizes CPU frequency where possible
# - Uses -benchtime for longer runs to reduce noise
#
# If no arguments provided:
# - Runs benchmarks on current branch and saves to new.txt
# - Switches to main, runs benchmarks and saves to old.txt
# - Switches back and compares with benchstat
#
# If arguments provided:
# - Uses specified branches for comparison

CURRENT_BRANCH=$(git branch --show-current)
BASE_BRANCH=${1:-main}
FEATURE_BRANCH=${2:-$CURRENT_BRANCH}
COUNT=${3:-10} # Default to 10 runs for better statistical significance

echo "======================================================================"
echo "Benchmark Comparison with Best Practices"
echo "======================================================================"
echo " Base branch: $BASE_BRANCH"
echo " Feature branch: $FEATURE_BRANCH"
echo " Iterations: $COUNT (minimum 6 recommended for confidence intervals)"
echo ""

# Check if benchstat is installed
if ! command -v benchstat &> /dev/null; then
echo "benchstat is not installed. Installing..."
go install golang.org/x/perf/cmd/benchstat@latest
echo ""
fi

# Warn about CPU frequency scaling
echo "NOTE: For most accurate results:"
echo " - Close other applications"
echo " - Disable CPU frequency scaling if possible"
echo " - Run on AC power (laptops)"
echo " - Consider: sudo cpupower frequency-set --governor performance (Linux)"
echo ""

# Save current state
echo "Saving current work..."
git stash push -u -m "benchmark comparison stash" 2>/dev/null || true

# Function to run benchmarks with best practices
run_benchmarks() {
local branch=$1
local output=$2

echo "======================================================================"
echo "Running benchmarks on $branch..."
echo "======================================================================"

# Clear build cache to ensure clean build
echo "Clearing build cache..."
go clean -cache -testcache

# Run benchmarks with:
# - count=$COUNT: Multiple runs for statistical significance
# - benchmem: Include memory allocation stats
# - benchtime=1s: Run each benchmark for at least 1 second (reduces timing noise)
# - run=^$: Don't run any tests, only benchmarks
echo "Running $COUNT iterations (this may take several minutes)..."
go test -bench=. -benchmem -count=$COUNT -benchtime=1s -cpu=1 -run=^$ ./data 2>&1 | tee "$output"

echo ""
echo "Results saved to $output"
}

# Run benchmarks on base branch
git checkout "$BASE_BRANCH" 2>&1 | grep -v "^M\s" || true
run_benchmarks "$BASE_BRANCH" "old.txt"

# Run benchmarks on feature branch
git checkout "$FEATURE_BRANCH" 2>&1 | grep -v "^M\s" || true
run_benchmarks "$FEATURE_BRANCH" "new.txt"

echo ""
echo "======================================================================"
echo "Benchmark Comparison Results"
echo "======================================================================"
benchstat -alpha=0.05 old.txt new.txt
Loading