diff --git a/scripts/fuzzy-match-release-branch.sh b/scripts/fuzzy-match-release-branch.sh new file mode 100755 index 00000000000..46f1b7dc396 --- /dev/null +++ b/scripts/fuzzy-match-release-branch.sh @@ -0,0 +1,252 @@ +#!/usr/bin/env bash + +# Disabled `set -euo pipefail` to prevent premature exit on Linux due to +# process substitution failures. Some commands (e.g. `diff <(...) <(...)`) can +# fail if input is empty or pipes break, which is tolerated logic in this +# script. macOS handles these cases more gracefully, but GNU diff in Linux does +# not - leading to hard script exits mid-match. +# +# set -euo pipefail + +SRC_BRANCH="" +RELEASE_BRANCH="" +SRC_SCAN_LIMIT=1000 +RELEASE_LIMIT=0 + +show_help() { + echo "" + echo "๐Ÿ” fuzzy-match-release-branch.sh" + echo "" + echo " Compares commits in a release branch to those in a source branch (e.g. master) and identifies" + echo " cherry-picked commits based on patch equivalence or fuzzy metadata (subject, author, date)." + echo "" + echo " โ“ Use this to:" + echo " - Audit cherry-picks in release branches" + echo " - Detect missing or altered backports" + echo " - Spot accidental omissions during cherry-pick workflows" + echo "" + echo " ๐Ÿ“ฆ Usage:" + echo " $0 --source --release [--scan-limit N] [--limit N]" + echo "" + echo " ๐Ÿ”ง Options:" + echo " --source Source branch where original commits exist (e.g. master)" + echo " --release Release branch to check for matching cherry-picks" + echo " --scan-limit Max commits to scan in source branch (default: 1000)" + echo " --limit Number of release commits to compare (default: all)" + echo "" + echo " ๐Ÿงช Example: Find the closest matches for the last 92 commits in 0-19-2-branch-rc2 from master (scanning up to 300 commits):" + echo "" + echo " ./scripts/fuzzy-match-release-branch.sh --source master --release 0-19-2-branch-rc2 --limit 92 --scan-limit 300" + echo "" + echo " ๐Ÿ“ Notes:" + echo " - Requires git history for both branches to be present locally" + echo " - Patch comparison is normalized (removes index lines, trims whitespace)" + echo " - Fuzzy matching uses subject + author + date if no exact patch match found" + echo "" + exit 1 +} + +normalize_patch() { + sed '/^index [0-9a-f]\{7,\}\.\.[0-9a-f]\{7,\} [0-9]\{6\}$/d' +} + +# Parse args +while [[ $# -gt 0 ]]; do + case "$1" in + --source|--release|--scan-limit|--limit) + if [[ -z "${2:-}" || "$2" =~ ^- ]]; then + echo "Error: Missing value for argument $1" >&2 + show_help + fi + case "$1" in + --source) SRC_BRANCH="$2" ;; + --release) RELEASE_BRANCH="$2" ;; + --scan-limit) SRC_SCAN_LIMIT="$2" ;; + --limit) RELEASE_LIMIT="$2" ;; + esac + shift 2 + ;; + -h|--help) show_help ;; + *) echo "Unknown argument: $1"; show_help ;; + esac +done + +if [[ -z "$SRC_BRANCH" || -z "$RELEASE_BRANCH" ]]; then + echo "โŒ Missing required arguments."; show_help +fi + +# Cross-platform hashing +hash_patch() { + if command -v md5sum >/dev/null 2>&1; then + md5sum | awk '{print $1}' + else + md5 | awk '{print $NF}' + fi +} + +echo "๐Ÿ” Preparing comparison:" +echo " Source branch : $SRC_BRANCH" +echo " Release branch : $RELEASE_BRANCH" +echo " Max source scan: $SRC_SCAN_LIMIT" +echo " Max release compare: $([[ $RELEASE_LIMIT -gt 0 ]] && echo \"$RELEASE_LIMIT\" || echo \"ALL\")" +echo "" + +echo "๐Ÿ”„ Fetching latest refs..." +git fetch --all --quiet || true + +echo "๐Ÿ“ฅ Collecting release commits..." +RELEASE_COMMITS=$(git rev-list --no-merges "$RELEASE_BRANCH" ^"$SRC_BRANCH") +if [[ "$RELEASE_LIMIT" -gt 0 ]]; then + RELEASE_COMMITS=$(echo "$RELEASE_COMMITS" | head -n "$RELEASE_LIMIT") +fi +RELEASE_COMMITS=$(echo "$RELEASE_COMMITS" | awk '{ lines[NR] = $0 } END { for (i = NR; i > 0; i--) print lines[i] }') +RELEASE_COMMITS_ARRAY=() +while IFS= read -r line; do + [[ -n "$line" ]] && RELEASE_COMMITS_ARRAY+=("$line") +done <<< "$RELEASE_COMMITS" +echo " โ†’ Found ${#RELEASE_COMMITS_ARRAY[@]} release commits." + +if [[ "${#RELEASE_COMMITS_ARRAY[@]}" -eq 0 ]]; then + echo "โŒ No release commits found. Exiting." + exit 1 +fi + +echo "๐Ÿ“ฅ Collecting source commits..." +SRC_COMMITS=$(git rev-list --no-merges --max-count="$SRC_SCAN_LIMIT" "$SRC_BRANCH") +SRC_COMMITS_ARRAY=() +while IFS= read -r line; do + [[ -n "$line" ]] && SRC_COMMITS_ARRAY+=("$line") +done <<< "$SRC_COMMITS" +echo " โ†’ Found ${#SRC_COMMITS_ARRAY[@]} source commits to scan." +echo "" + +echo "โš™๏ธ Indexing source commit metadata..." +echo " โ†’ Processing ${#SRC_COMMITS_ARRAY[@]} commits from $SRC_BRANCH..." +SRC_COMMIT_META=() +SRC_PATCH_HASHES=() +SRC_PATCHES=() + +progress=0 +for commit in "${SRC_COMMITS_ARRAY[@]}"; do + progress=$((progress + 1)) + echo -ne "\r [$progress/${#SRC_COMMITS_ARRAY[@]}] Indexing $commit" + author=$(git log -1 --pretty=format:"%an <%ae>" "$commit" 2>/dev/null) || continue + subject=$(git log -1 --pretty=format:"%s" "$commit" 2>/dev/null) || continue + authordate=$(git log -1 --pretty=format:"%ai" "$commit" 2>/dev/null) || continue + meta_key="${subject}__${author}__${authordate}" + patch=$(git show --format= --unified=3 "$commit" | normalize_patch | sed 's/^[[:space:]]*//') + patch_hash=$(echo "$patch" | hash_patch) + + SRC_COMMIT_META+=("$meta_key") + SRC_PATCH_HASHES+=("$patch_hash") + SRC_PATCHES+=("$patch") +done + +echo -e "\n โ†’ Completed source indexing." + +TOTAL=${#RELEASE_COMMITS_ARRAY[@]} +MATCHED=0 +UNMATCHED=0 + +for i in "${!RELEASE_COMMITS_ARRAY[@]}"; do + rc_commit="${RELEASE_COMMITS_ARRAY[$i]}" + rc_author=$(git log -1 --pretty=format:"%an <%ae>" "$rc_commit" 2>/dev/null) || continue + rc_subject=$(git log -1 --pretty=format:"%s" "$rc_commit" 2>/dev/null) || continue + rc_authordate=$(git log -1 --pretty=format:"%ai" "$rc_commit" 2>/dev/null) || continue + meta_key="${rc_subject}__${rc_author}__${rc_authordate}" + + echo -ne "[$((i + 1))/$TOTAL] Checking ${rc_commit:0:7}... " + + rc_patch=$(git show --format= --unified=3 "$rc_commit" | normalize_patch | sed 's/^[[:space:]]*//') + rc_patch_hash=$(echo "$rc_patch" | hash_patch) + + found_exact_index=-1 + for j in "${!SRC_PATCH_HASHES[@]}"; do + if [[ "${SRC_PATCH_HASHES[$j]}" == "$rc_patch_hash" ]]; then + found_exact_index=$j + break + fi + done + + if [[ $found_exact_index -ne -1 ]]; then + found_exact="${SRC_COMMITS_ARRAY[$found_exact_index]}" + meta_info="${SRC_COMMIT_META[$found_exact_index]}" + src_subject="${meta_info%%__*}" + rest="${meta_info#*__}" + src_author="${rest%%__*}" + src_authordate="${rest##*__}" + echo "โœ… MATCHES ${found_exact:0:7}" + echo " โ†ช RELEASE: $rc_commit" + echo " Author : $rc_author" + echo " Date : $rc_authordate" + echo " Subject: \"$rc_subject\"" + echo " โ†ช SOURCE : $found_exact" + echo " Author : $src_author" + echo " Date : $src_authordate" + echo " Subject: \"$src_subject\"" + echo "" + MATCHED=$((MATCHED + 1)) + continue + fi + + echo "โŒ NO MATCH" + UNMATCHED=$((UNMATCHED + 1)) + + echo "๐Ÿ” Unmatched Commit:" + echo " โ†ช Commit : $rc_commit" + echo " โ†ช Author : $rc_author" + echo " โ†ช Subject: \"$rc_subject\"" + echo "" + + best_score=99999 + best_index="" + fuzzy_candidates=0 + + for j in "${!SRC_COMMIT_META[@]}"; do + if [[ "${SRC_COMMIT_META[$j]}" == "$meta_key" ]]; then + ((fuzzy_candidates++)) + diff=$(diff -u <(echo "$rc_patch") <(echo "${SRC_PATCHES[$j]}") || true) + score=$(echo "$diff" | grep -vE '^(--- |\+\+\+ )' | grep -c '^[-+]') + if [[ "$score" -lt "$best_score" ]]; then + best_score=$score + best_index=$j + fi + fi + done + + if [[ "$fuzzy_candidates" -eq 0 ]]; then + echo "โš ๏ธ No commits with matching author + subject + date in source branch." + else + match_commit="${SRC_COMMITS_ARRAY[$best_index]}" + match_author=$(git log -1 --pretty=format:"%an <%ae>" "$match_commit") + match_subject=$(git log -1 --pretty=format:"%s" "$match_commit") + + changed_files=$(git show --pretty="" --name-only "$rc_commit") + + echo "๐Ÿค” Closest fuzzy match: $match_commit ($best_score changed lines from $fuzzy_candidates candidates)" + echo " โ†ช Author : $match_author" + echo " โ†ช Subject: \"$match_subject\"" + echo " โ†ช Files Changed:" + echo "$changed_files" | sed 's/^/ - /' + echo "" + + echo "๐Ÿ”ง Check it manually (patch diff):" + echo " git diff $match_commit $rc_commit -- \$(git show --pretty=\"\" --name-only $rc_commit)" + echo "" + + echo "๐Ÿ” Diff between release and closest match:" + echo "---------------------------------------------" + git diff "$match_commit" "$rc_commit" -- $changed_files | sed 's/^/ /' || true + echo "---------------------------------------------" + echo "" + fi + +done + +# Summary +echo "" +echo "๐Ÿ”Ž Summary:" +echo " โœ… Matched : $MATCHED" +echo " โŒ Unmatched : $UNMATCHED" +echo " ๐Ÿ“ฆ Total : $TOTAL" +