Skip to content

Commit d66524a

Browse files
committed
scripts: add cherry-pick verification tool with fuzzy matching
This script compares a release branch against a source branch (e.g. master) to verify that all cherry-picked commits are unmodified. It first attempts fast matching using normalized patch hashes. If no exact match is found, it falls back to a fuzzy matching mechanism: - Filters source commits by matching author and commit subject - Compares normalized diffs using diff -u - Selects the closest match based on line difference count Useful for verifying cherry-picks or rebased commits during release processes. Supports scan and compare limits for performance.
1 parent b3eb9a3 commit d66524a

File tree

1 file changed

+177
-0
lines changed

1 file changed

+177
-0
lines changed

scripts/fuzzy-match-release-branch.sh

Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
SRC_BRANCH=""
6+
RELEASE_BRANCH=""
7+
SRC_SCAN_LIMIT=1000
8+
RELEASE_LIMIT=0
9+
10+
show_help() {
11+
echo "Usage: $0 --source <branch> --release <branch> [--scan-limit N] [--limit N]"
12+
echo ""
13+
echo " --source Branch where cherry-picks originated (e.g. master)"
14+
echo " --release Branch where cherry-picks landed (e.g. release-rc1)"
15+
echo " --scan-limit Max commits to scan in source branch (default: 1000)"
16+
echo " --limit Number of release commits to compare (default: all)"
17+
exit 1
18+
}
19+
20+
# Parse args
21+
while [[ $# -gt 0 ]]; do
22+
case "$1" in
23+
--source) SRC_BRANCH="$2"; shift 2 ;;
24+
--release) RELEASE_BRANCH="$2"; shift 2 ;;
25+
--scan-limit) SRC_SCAN_LIMIT="$2"; shift 2 ;;
26+
--limit) RELEASE_LIMIT="$2"; shift 2 ;;
27+
-h|--help) show_help ;;
28+
*) echo "Unknown argument: $1"; show_help ;;
29+
esac
30+
done
31+
32+
if [[ -z "$SRC_BRANCH" || -z "$RELEASE_BRANCH" ]]; then
33+
echo "❌ Missing required arguments."; show_help
34+
fi
35+
36+
# Cross-platform hashing
37+
hash_patch() {
38+
if command -v md5sum >/dev/null 2>&1; then
39+
md5sum | awk '{print $1}'
40+
else
41+
md5 | awk '{print $NF}'
42+
fi
43+
}
44+
45+
echo "🔍 Preparing comparison:"
46+
echo " Source branch : $SRC_BRANCH"
47+
echo " Release branch : $RELEASE_BRANCH"
48+
echo " Max source scan: $SRC_SCAN_LIMIT"
49+
echo " Max release compare: $([[ $RELEASE_LIMIT -gt 0 ]] && echo "$RELEASE_LIMIT" || echo "ALL")"
50+
echo ""
51+
52+
echo "🔄 Fetching latest refs..."
53+
git fetch --all --quiet || true
54+
55+
echo "📥 Collecting release commits..."
56+
RELEASE_COMMITS=$(git rev-list --no-merges "$RELEASE_BRANCH" ^"$SRC_BRANCH")
57+
if [[ "$RELEASE_LIMIT" -gt 0 ]]; then
58+
RELEASE_COMMITS=$(echo "$RELEASE_COMMITS" | head -n "$RELEASE_LIMIT")
59+
fi
60+
RELEASE_COMMITS=$(echo "$RELEASE_COMMITS" | tail -r)
61+
RELEASE_COMMITS_ARRAY=()
62+
while IFS= read -r line; do RELEASE_COMMITS_ARRAY+=("$line"); done <<< "$RELEASE_COMMITS"
63+
echo " → Found ${#RELEASE_COMMITS_ARRAY[@]} release commits."
64+
65+
echo "📥 Collecting source commits..."
66+
SRC_COMMITS=$(git rev-list --no-merges --max-count="$SRC_SCAN_LIMIT" "$SRC_BRANCH")
67+
SRC_COMMITS_ARRAY=()
68+
while IFS= read -r line; do SRC_COMMITS_ARRAY+=("$line"); done <<< "$SRC_COMMITS"
69+
echo " → Found ${#SRC_COMMITS_ARRAY[@]} source commits to scan."
70+
echo ""
71+
72+
echo "⚙️ Indexing source commit metadata..."
73+
SRC_COMMIT_META=()
74+
SRC_PATCH_HASHES=()
75+
SRC_PATCHES=()
76+
77+
for commit in "${SRC_COMMITS_ARRAY[@]}"; do
78+
author=$(git log -1 --pretty=format:"%an <%ae>" "$commit")
79+
subject=$(git log -1 --pretty=format:"%s" "$commit")
80+
meta_key="${subject}__${author}"
81+
patch=$(git show --pretty=format:%b "$commit" | tail -n +2 | sed 's/^[[:space:]]*//')
82+
patch_hash=$(echo "$patch" | hash_patch)
83+
84+
SRC_COMMIT_META+=("$meta_key")
85+
SRC_PATCH_HASHES+=("$patch_hash")
86+
SRC_PATCHES+=("$patch")
87+
done
88+
89+
TOTAL=${#RELEASE_COMMITS_ARRAY[@]}
90+
MATCHED=0
91+
UNMATCHED=0
92+
93+
for i in "${!RELEASE_COMMITS_ARRAY[@]}"; do
94+
rc_commit="${RELEASE_COMMITS_ARRAY[$i]}"
95+
rc_author=$(git log -1 --pretty=format:"%an <%ae>" "$rc_commit")
96+
rc_subject=$(git log -1 --pretty=format:"%s" "$rc_commit")
97+
meta_key="${rc_subject}__${rc_author}"
98+
99+
echo -ne "[$((i + 1))/$TOTAL] Checking ${rc_commit:0:7}... "
100+
101+
rc_patch=$(git show --pretty=format:%b "$rc_commit" | tail -n +2 | sed 's/^[[:space:]]*//')
102+
rc_patch_hash=$(echo "$rc_patch" | hash_patch)
103+
104+
found_exact=""
105+
for j in "${!SRC_PATCH_HASHES[@]}"; do
106+
if [[ "${SRC_PATCH_HASHES[$j]}" == "$rc_patch_hash" ]]; then
107+
found_exact="${SRC_COMMITS_ARRAY[$j]}"
108+
break
109+
fi
110+
done
111+
112+
if [[ -n "$found_exact" ]]; then
113+
src_subject=$(git log -1 --pretty=format:"%s" "$found_exact")
114+
src_author=$(git log -1 --pretty=format:"%an <%ae>" "$found_exact")
115+
echo "✅ MATCHES ${found_exact:0:7}"
116+
echo " ↪ RELEASE: $rc_commit $rc_author"
117+
echo " \"$rc_subject\""
118+
echo " ↪ SOURCE : $found_exact $src_author"
119+
echo " \"$src_subject\""
120+
echo ""
121+
MATCHED=$((MATCHED + 1))
122+
continue
123+
fi
124+
125+
echo "❌ NO MATCH"
126+
UNMATCHED=$((UNMATCHED + 1))
127+
128+
echo "🔍 Unmatched Commit:"
129+
echo " ↪ Commit : $rc_commit"
130+
echo " ↪ Author : $rc_author"
131+
echo " ↪ Subject: \"$rc_subject\""
132+
echo ""
133+
134+
# Try fuzzy match only within commits that share author+subject
135+
best_score=99999
136+
best_index=""
137+
for j in "${!SRC_COMMIT_META[@]}"; do
138+
if [[ "${SRC_COMMIT_META[$j]}" == "$meta_key" ]]; then
139+
diff=$(diff -u <(echo "$rc_patch") <(echo "${SRC_PATCHES[$j]}") || true)
140+
score=$(echo "$diff" | grep '^[-+]' | wc -l | xargs)
141+
if [[ "$score" -lt "$best_score" ]]; then
142+
best_score=$score
143+
best_index=$j
144+
fi
145+
fi
146+
done
147+
148+
if [[ -n "$best_index" ]]; then
149+
match_commit="${SRC_COMMITS_ARRAY[$best_index]}"
150+
match_author=$(git log -1 --pretty=format:"%an <%ae>" "$match_commit")
151+
match_subject=$(git log -1 --pretty=format:"%s" "$match_commit")
152+
153+
echo "🤔 Closest fuzzy match: $match_commit ($best_score changed lines)"
154+
echo " ↪ Author : $match_author"
155+
echo " ↪ Subject: \"$match_subject\""
156+
echo ""
157+
echo "🔧 Check it manually:"
158+
echo " git diff --histogram $rc_commit $match_commit"
159+
else
160+
echo "⚠️ No commits with matching author + subject in source branch."
161+
fi
162+
163+
echo ""
164+
echo "🔍 Diff of release commit:"
165+
echo "---------------------------------------------"
166+
git show "$rc_commit" | sed 's/^/ /'
167+
echo "---------------------------------------------"
168+
echo ""
169+
done
170+
171+
# Summary
172+
echo ""
173+
echo "🔎 Summary:"
174+
echo " ✅ Matched : $MATCHED"
175+
echo " ❌ Unmatched : $UNMATCHED"
176+
echo " 📦 Total : $TOTAL"
177+

0 commit comments

Comments
 (0)