diff --git a/README.adoc b/README.adoc
index 2a9e4f1..5097509 100644
--- a/README.adoc
+++ b/README.adoc
@@ -966,6 +966,46 @@ asciidoc:
     - '@redpanda-data/docs-extensions-and-macros/macros/rp-connect-components'
 ```
 
+== CLI Tools
+
+This library provides automated documentation generation tools for writers working with Redpanda documentation.
+
+=== Metrics Documentation
+
+Generate metrics reference documentation for Redpanda:
+
+[,bash]
+----
+# Extract from a specific GitHub tag/branch (recommended)
+npx doc-tools generate source-metrics-docs --tag v25.2.1-rc4
+
+# Extract from a local repository
+npx doc-tools generate metrics-docs --redpanda-repo /path/to/redpanda
+
+# Legacy Docker-based extraction
+npx doc-tools generate metrics-docs-legacy --tag v25.2.1-rc4
+----
+
+All commands generate separate files for internal and external metrics:
+
+* `autogenerated/internal_metrics_reference.adoc` - Internal metrics for engineering teams
+* `autogenerated/public_metrics_reference.adoc` - Public metrics for documentation
+* `autogenerated/metrics.json` - Machine-readable metrics data
+
+=== Other Documentation Tools
+
+* `property-docs`: Generate configuration property documentation
+* `rpk-docs`: Generate RPK CLI documentation  
+* `helm-spec`: Generate Helm chart specifications
+* `crd-spec`: Generate CRD specifications
+
+For complete command options, run:
+
+[,bash]
+----
+npx doc-tools generate --help
+----
+
 == Development quickstart
 
 This section provides information on how to develop this project.
diff --git a/bin/doc-tools.js b/bin/doc-tools.js
index fa321ce..472c522 100755
--- a/bin/doc-tools.js
+++ b/bin/doc-tools.js
@@ -264,6 +264,27 @@ function verifyMetricsDependencies() {
   requireDockerDaemon();
 }
 
+/**
+ * Ensures all dependencies required for generating metrics documentation from source code are installed.
+ *
+ * Checks for the presence of `make`, Python 3.10 or newer, Git, and at least one C++ compiler (`gcc` or `clang`). 
+ * Exits the process with an error message if any dependency is missing.
+ */
+function verifyMetricsExtractorDependencies() {
+  requireCmd('make', 'Your OS package manager');
+  requirePython();
+  requireCmd('git', 'Install Git: https://git-scm.com/downloads');
+  try {
+    execSync('gcc --version', { stdio: 'ignore' });
+  } catch {
+    try {
+      execSync('clang --version', { stdio: 'ignore' });
+    } catch {
+      fail('A C++ compiler (gcc or clang) is required for tree-sitter compilation.');
+    }
+  }
+}
+
 // --------------------------------------------------------------------
 // Main CLI Definition
 // --------------------------------------------------------------------
@@ -394,9 +415,19 @@ const commonOptions = {
 function runClusterDocs(mode, tag, options) {
   const script = path.join(__dirname, '../cli-utils/generate-cluster-docs.sh');
   const args = [mode, tag, options.dockerRepo, options.consoleTag, options.consoleDockerRepo];
-  console.log(`⏳ Running ${script} with arguments: ${args.join(' ')}`);
+  
+  console.log(`🚀 Starting cluster (${mode}/${tag})...`);
+  
+  const startTime = Date.now();
   const r = spawnSync('bash', [script, ...args], { stdio: 'inherit', shell: true });
-  if (r.status !== 0) process.exit(r.status);
+  const duration = ((Date.now() - startTime) / 1000).toFixed(1);
+  
+  if (r.status !== 0) {
+    console.error(`❌ Script failed with exit code ${r.status}`);
+    process.exit(r.status);
+  } else {
+    console.log(`✅ Completed ${mode} docs generation (${duration}s)`);
+  }
 }
 
 // helper to diff two autogenerated directories
@@ -429,7 +460,51 @@ function diffDirs(kind, oldTag, newTag) {
 
 automation
   .command('metrics-docs')
-  .description('Generate JSON and AsciiDoc documentation for Redpanda metrics')
+  .description('Generate JSON and AsciiDoc documentation for Redpanda metrics from source code')
+  .requiredOption('-r, --redpanda-repo <path>', 'Path to the Redpanda repository root directory')
+  .option('--json-output <path>', 'Custom path for JSON output file', 'autogenerated/metrics.json')
+  .option('--internal-asciidoc <path>', 'Custom path for internal metrics AsciiDoc file', 'autogenerated/internal_metrics_reference.adoc')
+  .option('--external-asciidoc <path>', 'Custom path for external/public metrics AsciiDoc file', 'autogenerated/public_metrics_reference.adoc')
+  .action((options) => {
+    console.log(`🎯 Starting enhanced metrics extraction from source code`);
+    
+    verifyMetricsExtractorDependencies();
+
+    // Verify Redpanda repository path exists
+    if (!fs.existsSync(options.redpandaRepo)) {
+      console.error(`❌ Redpanda repository path does not exist: ${options.redpandaRepo}`);
+      process.exit(1);
+    }
+
+    console.log(`⏳ Extracting metrics from ${options.redpandaRepo}...`);
+    
+    const startTime = Date.now();
+    const result = spawnSync('python3', [
+      path.join(__dirname, '../tools/metrics-extractor/metrics_extractor.py'),
+      '--redpanda-repo', options.redpandaRepo,
+      '--json-output', options.jsonOutput,
+      '--internal-asciidoc', options.internalAsciidoc,
+      '--external-asciidoc', options.externalAsciidoc
+    ], { stdio: 'inherit' });
+    
+    const duration = ((Date.now() - startTime) / 1000).toFixed(1);
+    
+    if (result.status !== 0) {
+      console.error(`❌ Metrics extraction failed with exit code ${result.status}`);
+      process.exit(result.status);
+    }
+
+    console.log(`✅ Enhanced metrics extraction completed! (${duration}s)`);
+    console.log(`📄 Generated files:`);
+    console.log(`   JSON: ${options.jsonOutput}`);
+    console.log(`   Internal metrics: ${options.internalAsciidoc}`);
+    console.log(`   Public metrics: ${options.externalAsciidoc}`);
+    process.exit(0);
+  });
+
+automation
+  .command('metrics-docs-legacy')
+  .description('Generate JSON and AsciiDoc documentation for Redpanda metrics using Docker cluster (legacy)')
   .requiredOption('-t, --tag <tag>', 'Redpanda version to use when starting Redpanda in Docker')
   .option(
     '--docker-repo <repo>',
@@ -448,6 +523,8 @@ automation
   )
   .option('--diff <oldTag>', 'Also diff autogenerated metrics from <oldTag> → <tag>')
   .action((options) => {
+    console.log(`🎯 Starting legacy metrics docs generation for ${options.tag}`);
+    
     verifyMetricsDependencies();
 
     const newTag = options.tag;
@@ -456,18 +533,20 @@ automation
     if (oldTag) {
       const oldDir = path.join('autogenerated', oldTag, 'metrics');
       if (!fs.existsSync(oldDir)) {
-        console.log(`⏳ Generating metrics docs for old tag ${oldTag}…`);
+        console.log(`⏳ Generating metrics docs for old tag ${oldTag}...`);
         runClusterDocs('metrics', oldTag, options);
       }
     }
 
-    console.log(`⏳ Generating metrics docs for new tag ${newTag}…`);
+    console.log(`⏳ Generating metrics docs for new tag ${newTag}...`);
     runClusterDocs('metrics', newTag, options);
 
     if (oldTag) {
+      console.log(`🔄 Diffing ${oldTag} → ${newTag}...`);
       diffDirs('metrics', oldTag, newTag);
     }
 
+    console.log(`✅ Legacy metrics docs generation completed!`);
     process.exit(0);
   });
 
@@ -713,6 +792,41 @@ automation
     process.exit(0);
   });
 
+automation
+  .command('source-metrics-docs')
+  .description('Generate metrics documentation from Redpanda source code using tree-sitter')
+  .option('--tag <tag>', 'Git tag or branch to extract from', 'dev')
+  .option('--diff <oldTag>', 'Also diff autogenerated metrics from <oldTag> → <tag>')
+  .action((options) => {
+    verifyMetricsExtractorDependencies();
+
+    const newTag = options.tag;
+    const oldTag = options.diff;
+    const cwd = path.resolve(__dirname, '../tools/metrics-extractor');
+    const make = (tag) => {
+      console.log(`⏳ Building source-based metrics docs for ${tag}…`);
+      const r = spawnSync('make', ['build', `TAG=${tag}`], { cwd, stdio: 'inherit' });
+      if (r.error) {
+        console.error(`❌ ${r.error.message}`);
+        process.exit(1);
+      }
+      if (r.status !== 0) process.exit(r.status);
+    };
+
+    if (oldTag) {
+      const oldDir = path.join('autogenerated', oldTag, 'source-metrics');
+      if (!fs.existsSync(oldDir)) make(oldTag);
+    }
+
+    make(newTag);
+
+    if (oldTag) {
+      diffDirs('source-metrics', oldTag, newTag);
+    }
+
+    process.exit(0);
+  });
+
 automation
   .command('rpk-docs')
   .description('Generate AsciiDoc documentation for rpk CLI commands')
diff --git a/cli-utils/generate-cluster-docs.sh b/cli-utils/generate-cluster-docs.sh
index 2bfa4ac..1e53997 100755
--- a/cli-utils/generate-cluster-docs.sh
+++ b/cli-utils/generate-cluster-docs.sh
@@ -2,6 +2,13 @@
 set -euo pipefail
 IFS=$'\n\t'
 
+# Function to log with timestamp (only for key operations)
+log_step() {
+  echo "[$(date '+%H:%M:%S')] $1" 
+}
+
+log_step "🚀 Starting cluster setup..."
+
 ###############################################################################
 # Pre-flight: Ensure Docker is available and running
 ###############################################################################
@@ -15,6 +22,11 @@ if ! docker info &> /dev/null; then
   exit 1
 fi
 
+if ! command -v curl &> /dev/null; then
+  echo "❌ curl is not installed or not in PATH. Please install curl to continue."
+  exit 1
+fi
+
 ###############################################################################
 # Load overrides from an optional .env file in the current directory
 ###############################################################################
@@ -56,40 +68,63 @@ export REDPANDA_CONSOLE_DOCKER_REPO="$CONSOLE_REPO"
 ###############################################################################
 # Start Redpanda cluster
 ###############################################################################
+log_step "� Starting Redpanda cluster..."
 "$SCRIPT_DIR/start-cluster.sh" "$TAG"
 
 # Wait for the cluster to settle
 if [[ "$MODE" == "metrics" ]]; then
-  echo "⏳ Waiting 300 seconds for metrics to be available…"
-  sleep 300
+  log_step "⏳ Waiting for metrics endpoint..."
+  
+  # Wait for metrics endpoint to be responsive
+  timeout=300
+  counter=0
+  metrics_url="http://localhost:19644/public_metrics/"
+  
+  while ! curl -f -s "$metrics_url" > /dev/null 2>&1; do
+    if [ $counter -ge $timeout ]; then
+      echo "❌ Metrics endpoint did not become ready within ${timeout}s"
+      exit 1
+    fi
+    sleep 10
+    counter=$((counter + 10))
+  done
+  
+  log_step "✅ Metrics endpoint ready"
 else
-  echo "⏳ Waiting 30 seconds for cluster to be ready…"
   sleep 30
 fi
 
 ###############################################################################
 # Python virtual environment setup
 ###############################################################################
+log_step "🐍 Setting up Python environment..."
 "$SCRIPT_DIR/python-venv.sh" \
   "$SCRIPT_DIR/venv" \
-  "$SCRIPT_DIR/../tools/metrics/requirements.txt"
+  "$SCRIPT_DIR/../tools/metrics-extractor/requirements.txt"
 
 ###############################################################################
 # Run documentation generator
 ###############################################################################
+log_step "📝 Generating $MODE documentation..."
+
 if [[ "$MODE" == "metrics" ]]; then
+  # Use enhanced metrics extractor with separate internal/external docs
   "$SCRIPT_DIR/venv/bin/python" \
-    "$SCRIPT_DIR/../tools/metrics/metrics.py" "$TAG"
+    "$SCRIPT_DIR/../tools/metrics-extractor/metrics_extractor.py" \
+    --json-output "autogenerated/${TAG}/metrics/metrics.json" \
+    --internal-asciidoc "autogenerated/${TAG}/metrics/internal_metrics_reference.adoc" \
+    --external-asciidoc "autogenerated/${TAG}/metrics/public_metrics_reference.adoc"
 else
   "$SCRIPT_DIR/venv/bin/python" \
     "$SCRIPT_DIR/../tools/gen-rpk-ascii.py" "$TAG"
 fi
 
-echo "✅ $MODE docs generated successfully!"
+log_step "✅ Documentation generated successfully"
 
 # Tear down the cluster
+log_step "🧹 Cleaning up cluster..."
 cd "$SCRIPT_DIR"/../docker-compose
-docker compose -p "$PROJECT_NAME" down --volumes
+docker compose -p "$PROJECT_NAME" down --volumes > /dev/null 2>&1
 
 # Return to the original directory
 cd "$ORIGINAL_PWD" || exit 1
diff --git a/cli-utils/install-test-dependencies.sh b/cli-utils/install-test-dependencies.sh
index 0fe1861..6a74dd8 100755
--- a/cli-utils/install-test-dependencies.sh
+++ b/cli-utils/install-test-dependencies.sh
@@ -13,7 +13,7 @@ install_node() {
     eval "$(fnm env)" || { echo "Failed to load fnm environment"; exit 1; }
     fnm install --lts || { echo "Failed to install Node.js"; exit 1; }
     fnm use --lts || { echo "Failed to use Node.js"; exit 1; }
-    echo "Node.js version: $(node -v)"
+    echo "Node.js version: $(node -v)" 
   fi
 }
 
diff --git a/cli-utils/python-venv.sh b/cli-utils/python-venv.sh
index d7d9bac..82ae94f 100755
--- a/cli-utils/python-venv.sh
+++ b/cli-utils/python-venv.sh
@@ -7,7 +7,7 @@ set -euo pipefail
 VENV_DIR="${1:-venv}"
 REQ_FILE="${2:-requirements.txt}"
 
-echo "Recreating Python venv at $VENV_DIR..."
+echo "Recreating Python venv at $VENV_DIR..." 
 rm -rf "$VENV_DIR"
 python3 -m venv "$VENV_DIR"
 "$VENV_DIR/bin/pip" install --upgrade pip --quiet
diff --git a/docker-compose/25.1/docker-compose.yml b/docker-compose/25.1/docker-compose.yml
index 0d4352d..2a53d80 100644
--- a/docker-compose/25.1/docker-compose.yml
+++ b/docker-compose/25.1/docker-compose.yml
@@ -51,13 +51,15 @@ services:
       - 19092:19092
       - 19644:9644
     healthcheck:
-      test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
+      test: ["CMD", "rpk", "version"]
       interval: 10s
       timeout: 15s
       retries: 10
     depends_on:
       minio:
         condition: service_healthy
+      mc:
+        condition: service_completed_successfully
   redpanda-1:
     command:
       - redpanda
@@ -88,8 +90,12 @@ services:
       - 29092:29092
       - 29644:9644
     depends_on:
-      - redpanda-0
-      - minio
+      redpanda-0:
+        condition: service_started
+      minio:
+        condition: service_healthy
+      mc:
+        condition: service_completed_successfully
   redpanda-2:
     command:
       - redpanda
@@ -120,8 +126,12 @@ services:
       - 39092:39092
       - 39644:9644
     depends_on:
-      - redpanda-0
-      - minio
+      redpanda-0:
+        condition: service_started
+      minio:
+        condition: service_healthy
+      mc:
+        condition: service_completed_successfully
   ####################
   # Redpanda Console #
   ####################
@@ -132,8 +142,6 @@ services:
       - redpanda_network
     entrypoint: /bin/sh
     command: -c 'echo "$$CONSOLE_CONFIG_FILE" > /tmp/config.yml && /app/console'
-    volumes:
-      - ./config:/tmp/config/
     environment:
       CONFIG_FILEPATH: ${CONFIG_FILEPATH:-/tmp/config.yml}
       CONSOLE_CONFIG_FILE: |
@@ -391,10 +399,11 @@ services:
       - AWS_REGION=local
     entrypoint: >
       /bin/sh -c "
-      until (/usr/bin/mc config host add minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
+      until (/usr/bin/mc alias set minio http://minio:9000 minio redpandaTieredStorage7) do echo '...waiting...' && sleep 1; done;
       /usr/bin/mc mb minio/redpanda;
       /usr/bin/mc policy set public minio/redpanda;
-      tail -f /dev/null
+      echo 'MinIO bucket initialization complete';
+      exit 0
       "
   catalog:
     image: tabulario/iceberg-rest
diff --git a/docker-compose/docker-compose.yml b/docker-compose/docker-compose.yml
index 0d4352d..44509ee 100644
--- a/docker-compose/docker-compose.yml
+++ b/docker-compose/docker-compose.yml
@@ -51,7 +51,7 @@ services:
       - 19092:19092
       - 19644:9644
     healthcheck:
-      test: ["CMD", "rpk", "cluster", "info", "-X", "user=superuser", "-X", "pass=secretpassword"]
+      test: ["CMD", "rpk", "version"]
       interval: 10s
       timeout: 15s
       retries: 10
diff --git a/package.json b/package.json
index ee2a911..04070d1 100644
--- a/package.json
+++ b/package.json
@@ -21,7 +21,9 @@
     "get-console-version": "doc-tools get-console-version",
     "build": "antora --to-dir docs --fetch local-antora-playbook.yml",
     "serve": "wds --node-resolve --open preview/test/ --watch --root-dir docs",
-    "test": "jest"
+    "test": "jest",
+    "metrics:extract": "doc-tools metrics-docs",
+    "metrics:legacy": "doc-tools metrics-docs-legacy"
   },
   "contributors": [
     {
diff --git a/tools/metrics-extractor/Makefile b/tools/metrics-extractor/Makefile
new file mode 100644
index 0000000..982cc4c
--- /dev/null
+++ b/tools/metrics-extractor/Makefile
@@ -0,0 +1,154 @@
+# Redpanda Metrics Extractor Makefile
+# Extracts metrics from Redpanda source code using tree-sitter
+
+SHELL := /bin/bash
+TAG ?= dev
+OUTPUT_DIR := autogenerated/$(TAG)/source-metrics
+REDPANDA_REPO := https://github.com/redpanda-data/redpanda.git
+REDPANDA_DIR := tmp/redpanda-$(TAG)
+TREESITTER_DIR := tree-sitter/tree-sitter-cpp
+PYTHON_VENV := venv
+PYTHON := $(PYTHON_VENV)/bin/python
+PIP := $(PYTHON_VENV)/bin/pip
+TREE_SITTER := npx tree-sitter
+
+.PHONY: all build clean setup-venv install-deps clone-redpanda extract-metrics help
+
+all: build
+
+help:
+	@echo "Redpanda Metrics Extractor"
+	@echo ""
+	@echo "Available targets:"
+	@echo "  build              - Extract metrics for specified TAG (default: dev)"
+	@echo "  clean              - Clean temporary files and output"
+	@echo "  setup-venv         - Set up Python virtual environment"
+	@echo "  install-deps       - Install Python dependencies"
+	@echo "  clone-redpanda     - Clone Redpanda repository"
+	@echo "  extract-metrics    - Run metrics extraction"
+	@echo ""
+	@echo "Usage examples:"
+	@echo "  make build TAG=v23.3.1"
+	@echo "  make build TAG=dev"
+
+build: setup-venv install-deps clone-redpanda treesitter extract-metrics
+
+setup-venv:
+	@echo "Setting up Python virtual environment..."
+	python3 -m venv $(PYTHON_VENV)
+
+install-deps: setup-venv
+	@echo "Installing Python dependencies..."
+	$(PIP) install --upgrade pip
+	$(PIP) install -r requirements.txt
+
+clone-redpanda:
+	@echo "Cloning Redpanda repository (tag: $(TAG))..."
+	@mkdir -p tmp
+	@if [ -d "$(REDPANDA_DIR)" ]; then \
+		echo "Repository already exists, updating..."; \
+		cd $(REDPANDA_DIR) && git fetch --all --tags && git checkout $(TAG); \
+		if git show-ref --verify --quiet refs/heads/$(TAG); then \
+			git pull origin $(TAG); \
+		fi; \
+	else \
+		echo "Checking if $(TAG) is a branch or tag..."; \
+		if git ls-remote --heads $(REDPANDA_REPO) $(TAG) | grep -q $(TAG); then \
+			echo "$(TAG) is a branch, cloning..."; \
+			git clone --depth 1 --branch $(TAG) $(REDPANDA_REPO) $(REDPANDA_DIR); \
+		elif git ls-remote --tags $(REDPANDA_REPO) $(TAG) | grep -q $(TAG); then \
+			echo "$(TAG) is a tag, cloning and checking out..."; \
+			git clone $(REDPANDA_REPO) $(REDPANDA_DIR); \
+			cd $(REDPANDA_DIR) && git checkout $(TAG); \
+		else \
+			echo "Error: $(TAG) not found as branch or tag in $(REDPANDA_REPO)"; \
+			exit 1; \
+		fi; \
+	fi
+
+treesitter:
+	@echo "Ensuring tree-sitter-cpp grammar..."
+	@if [ ! -d "$(TREESITTER_DIR)" ]; then \
+		git clone https://github.com/tree-sitter/tree-sitter-cpp.git "$(TREESITTER_DIR)"; \
+	fi
+	@echo "Checking out compatible version v0.20.5..."
+	@cd "$(TREESITTER_DIR)" && git checkout v0.20.5
+	@echo "Generating parser in $(TREESITTER_DIR)..."
+	@cd "$(TREESITTER_DIR)" && npm install --silent && $(TREE_SITTER) generate
+
+extract-metrics:
+	@echo "Extracting metrics from Redpanda source code..."
+	@mkdir -p $(OUTPUT_DIR)
+	$(PYTHON) metrics_extractor.py \
+		--redpanda-repo $(REDPANDA_DIR) \
+		--json-output $(OUTPUT_DIR)/metrics.json \
+		--internal-asciidoc $(OUTPUT_DIR)/internal_metrics_reference.adoc \
+		--external-asciidoc $(OUTPUT_DIR)/public_metrics_reference.adoc \
+		--verbose
+
+generate-comparison:
+	@echo "Generating metrics comparison..."
+	@if [ -f "$(OUTPUT_DIR)/metrics.json" ]; then \
+		$(PYTHON) compare_metrics.py $(OUTPUT_DIR)/metrics.json; \
+	else \
+		echo "No metrics file found. Run 'make build' first."; \
+	fi
+
+clean:
+	@echo "Cleaning temporary files and output..."
+	rm -rf tmp/
+	rm -rf tree-sitter/
+	rm -rf $(PYTHON_VENV)/
+	rm -rf autogenerated/
+	find . -name "*.pyc" -delete
+	find . -name "__pycache__" -delete
+
+clean-cache:
+	@echo "Cleaning Python cache..."
+	find . -name "*.pyc" -delete
+	find . -name "__pycache__" -delete
+
+install-system-deps:
+	@echo "Installing system dependencies..."
+	@echo "Make sure you have the following installed:"
+	@echo "  - Python 3.8+"
+	@echo "  - git"
+	@echo "  - build-essential (on Linux)"
+	@echo "  - tree-sitter CLI (optional)"
+
+test:
+	@echo "Running tests..."
+	$(PYTHON) -m pytest tests/ -v
+
+lint:
+	@echo "Running linting..."
+	$(PYTHON) -m flake8 *.py
+	$(PYTHON) -m black --check *.py
+
+format:
+	@echo "Formatting code..."
+	$(PYTHON) -m black *.py
+
+# Development targets
+dev-setup: setup-venv install-deps
+	$(PIP) install pytest flake8 black
+
+dev-test: dev-setup
+	make test
+
+# Quick extraction from local Redpanda directory
+extract-local:
+	@if [ -z "$(REDPANDA_PATH)" ]; then \
+		echo "Error: REDPANDA_PATH not set. Usage: make extract-local REDPANDA_PATH=/path/to/redpanda"; \
+		exit 1; \
+	fi
+	@echo "Extracting metrics from local Redpanda at $(REDPANDA_PATH)..."
+	@mkdir -p $(OUTPUT_DIR)
+	$(PYTHON) metrics_extractor.py \
+		--recursive \
+		--output $(OUTPUT_DIR)/metrics.json \
+		--internal-asciidoc $(OUTPUT_DIR)/internal_metrics_reference.adoc \
+		--external-asciidoc $(OUTPUT_DIR)/public_metrics_reference.adoc \
+		--filter-namespace redpanda \
+		--verbose \
+		$(REDPANDA_PATH)/src
diff --git a/tools/metrics-extractor/README.adoc b/tools/metrics-extractor/README.adoc
new file mode 100644
index 0000000..2e1e1fe
--- /dev/null
+++ b/tools/metrics-extractor/README.adoc
@@ -0,0 +1,72 @@
+= Redpanda Metrics Extractor
+:description: Automated extraction of metrics from Redpanda source code
+:page-categories: Development, Documentation, Automation
+
+This tool extracts Redpanda metrics from C++ source code and generates documentation in AsciiDoc format.
+
+== Usage for Writers
+
+=== Quick Start
+
+Generate metrics documentation from a GitHub tag or branch:
+
+[source,bash]
+----
+# Extract from specific tag (recommended)
+npx doc-tools generate source-metrics-docs --tag v25.2.1-rc4
+
+# Extract from development branch
+npx doc-tools generate source-metrics-docs --tag dev
+----
+
+=== Local Development
+
+If you have a local Redpanda repository:
+
+[source,bash]
+----
+npx doc-tools generate metrics-docs --redpanda-repo /path/to/redpanda
+----
+
+=== Output Files
+
+All commands generate three files in the `autogenerated/` directory:
+
+* `internal_metrics_reference.adoc` - Internal metrics for engineering documentation
+* `public_metrics_reference.adoc` - Public metrics for user-facing documentation  
+* `metrics.json` - Machine-readable metrics data
+
+== Technical Details
+
+The tool automatically extracts metrics created with these constructors:
+
+* `sm::make_gauge`, `sm::make_counter`, `sm::make_histogram`
+* `sm::make_total_bytes`, `sm::make_derive`
+* `ss::metrics::make_total_operations`, `ss::metrics::make_current_bytes`
+
+For each metric, it extracts:
+* Name and type (gauge, counter, histogram)
+* Description and labels
+* Classification (internal vs external)
+* Source location
+== Prerequisites
+
+* Python 3.8+ and build tools (for tree-sitter compilation)
+* Git
+
+== Development Commands
+
+For developers working on the tool itself:
+
+[source,bash]
+----
+# Set up development environment
+make setup-venv
+make install-deps
+
+# Run tests
+make test
+
+# Manual extraction (development)
+python metrics_extractor.py --redpanda-repo /path/to/redpanda/src --verbose
+----
diff --git a/tools/metrics-extractor/compare_original.py b/tools/metrics-extractor/compare_original.py
new file mode 100644
index 0000000..b3ad4db
--- /dev/null
+++ b/tools/metrics-extractor/compare_original.py
@@ -0,0 +1,818 @@
+#!/usr/bin/env python3
+"""
+Dual Metrics Documentation Diff Tool
+
+This tool compares Prometheus metrics documentation files in AsciiDoc format
+for both public and internal metrics, identifying differences in metrics, 
+descriptions, types, and labels.
+
+Usage:
+    python metrics_diff.py --original-public orig_pub.adoc --generated-public gen_pub.adoc
+    python metrics_diff.py --original-internal orig_int.adoc --generated-internal gen_int.adoc
+    python metrics_diff.py --original-public orig_pub.adoc --generated-public gen_pub.adoc --original-internal orig_int.adoc --generated-internal gen_int.adoc
+"""
+
+import re
+import json
+import argparse
+import sys
+from typing import Dict, List, Set, Tuple, Optional
+from dataclasses import dataclass, field
+from collections import defaultdict
+
+
+@dataclass
+class MetricInfo:
+    """Structure to hold metric information"""
+    name: str
+    description: str = ""
+    metric_type: str = ""
+    labels: List[str] = field(default_factory=list)
+    usage: str = ""
+    section: str = ""
+    raw_content: str = ""
+
+
+class MetricsParser:
+    """Parser for AsciiDoc metrics documentation"""
+    
+    def __init__(self, metric_header_level: str = "==="):
+        """
+        Initialize parser with specific header level
+        metric_header_level: "===" for public metrics, "==" for internal metrics
+        """
+        self.metrics = {}
+        self.current_section = ""
+        self.metric_header_level = metric_header_level
+    
+    def parse_file(self, content: str) -> Dict[str, MetricInfo]:
+        """Parse the AsciiDoc content and extract metrics information"""
+        lines = content.split('\n')
+        current_metric = None
+        in_metric_block = False
+        collecting_description = False
+        collecting_labels = False
+        collecting_usage = False
+        raw_start = 0
+        
+        i = 0
+        while i < len(lines):
+            line = lines[i].strip()
+            
+            # Skip empty lines
+            if not line:
+                i += 1
+                continue
+            
+            # Handle different header levels based on metric type
+            if self.metric_header_level == "===":
+                # PUBLIC METRICS: == is section, === is metric
+                if line.startswith('== ') and not line.startswith('=== '):
+                    self.current_section = line[3:].strip()
+                    i += 1
+                    continue
+                elif line.startswith('=== '):
+                    # Save previous metric if exists
+                    if current_metric and current_metric.name:
+                        raw_end = i
+                        current_metric.raw_content = '\n'.join(lines[raw_start:raw_end])
+                        self.metrics[current_metric.name] = current_metric
+                    
+                    metric_name = line[4:].strip()
+                    current_metric = MetricInfo(
+                        name=metric_name,
+                        section=self.current_section
+                    )
+                    in_metric_block = True
+                    collecting_description = True
+                    collecting_labels = False
+                    collecting_usage = False
+                    raw_start = i
+                    i += 1
+                    continue
+                    
+            elif self.metric_header_level == "==":
+                # INTERNAL METRICS: == is metric (no section headers typically)
+                if line.startswith('== ') and not line.startswith('=== '):
+                    # Check if this looks like a metric name (starts with vectorized_)
+                    potential_metric = line[3:].strip()
+                    if potential_metric.startswith('vectorized_') or len(potential_metric.split()) == 1:
+                        # This is a metric header
+                        # Save previous metric if exists
+                        if current_metric and current_metric.name:
+                            raw_end = i
+                            current_metric.raw_content = '\n'.join(lines[raw_start:raw_end])
+                            self.metrics[current_metric.name] = current_metric
+                        
+                        metric_name = potential_metric
+                        current_metric = MetricInfo(
+                            name=metric_name,
+                            section=self.current_section or "Internal Metrics"
+                        )
+                        in_metric_block = True
+                        collecting_description = True
+                        collecting_labels = False
+                        collecting_usage = False
+                        raw_start = i
+                        i += 1
+                        continue
+                    else:
+                        # This might be a section header (rare in internal metrics)
+                        self.current_section = potential_metric
+                        i += 1
+                        continue
+            
+            if not in_metric_block or not current_metric:
+                i += 1
+                continue
+            
+            # Check for Type specification
+            if line.startswith('*Type*:'):
+                current_metric.metric_type = line.split(':', 1)[1].strip()
+                collecting_description = False
+                i += 1
+                continue
+            
+            # Check for Labels section
+            if line.startswith('*Labels*:'):
+                collecting_labels = True
+                collecting_description = False
+                collecting_usage = False
+                i += 1
+                continue
+            
+            # Check for Usage section
+            if line.startswith('*Usage*:'):
+                collecting_usage = True
+                collecting_description = False
+                collecting_labels = False
+                i += 1
+                continue
+            
+            # Check for end of metric (horizontal rule)
+            if line.startswith('---'):
+                collecting_description = False
+                collecting_labels = False
+                collecting_usage = False
+                i += 1
+                continue
+            
+            # Collect content based on current state
+            if collecting_description and not line.startswith('*') and not line.startswith('- ') and not line.startswith('* '):
+                if current_metric.description:
+                    current_metric.description += " " + line
+                else:
+                    current_metric.description = line
+            
+            elif collecting_labels:
+                # Extract label information
+                if line.startswith('- ') or line.startswith('* '):
+                    label_text = line[2:].strip()
+                    # Clean up label text by removing backticks and extra formatting
+                    label_text = re.sub(r'`([^`]+)`', r'\1', label_text)
+                    current_metric.labels.append(label_text)
+                elif line and not line.startswith('*'):
+                    # Continue collecting labels if not a new section
+                    if current_metric.labels:
+                        current_metric.labels[-1] += " " + line
+            
+            elif collecting_usage:
+                if not line.startswith('*'):
+                    if current_metric.usage:
+                        current_metric.usage += " " + line
+                    else:
+                        current_metric.usage = line
+            
+            i += 1
+        
+        # Don't forget the last metric
+        if current_metric and current_metric.name:
+            current_metric.raw_content = '\n'.join(lines[raw_start:])
+            self.metrics[current_metric.name] = current_metric
+        
+        return self.metrics
+
+
+class MetricsDiff:
+    """Class to compare two sets of metrics and generate diff report"""
+    
+    def __init__(self, original_metrics: Dict[str, MetricInfo], generated_metrics: Dict[str, MetricInfo], metrics_type: str = ""):
+        self.original = original_metrics
+        self.generated = generated_metrics
+        self.metrics_type = metrics_type
+        
+    def get_metric_sets(self) -> Tuple[Set[str], Set[str], Set[str], Set[str]]:
+        """Get sets of metric names for comparison"""
+        original_names = set(self.original.keys())
+        generated_names = set(self.generated.keys())
+        
+        removed = original_names - generated_names
+        added = generated_names - original_names
+        common = original_names & generated_names
+        
+        return removed, added, common, original_names | generated_names
+    
+    def compare_metrics(self) -> Dict:
+        """Compare metrics and return comprehensive diff report"""
+        removed, added, common, all_metrics = self.get_metric_sets()
+        
+        report = {
+            'metrics_type': self.metrics_type,
+            'summary': {
+                'total_original': len(self.original),
+                'total_generated': len(self.generated),
+                'removed_count': len(removed),
+                'added_count': len(added),
+                'common_count': len(common),
+                'modified_count': 0
+            },
+            'removed_metrics': sorted(list(removed)),
+            'added_metrics': sorted(list(added)),
+            'modified_metrics': {},
+            'section_changes': self._analyze_section_changes(),
+            'type_changes': self._analyze_type_changes(),
+            'label_changes': self._analyze_label_changes()
+        }
+        
+        # Analyze modifications in common metrics
+        modified_count = 0
+        for metric_name in common:
+            original_metric = self.original[metric_name]
+            generated_metric = self.generated[metric_name]
+            
+            changes = self._compare_single_metric(original_metric, generated_metric)
+            if changes:
+                report['modified_metrics'][metric_name] = changes
+                modified_count += 1
+        
+        report['summary']['modified_count'] = modified_count
+        
+        return report
+    
+    def _compare_single_metric(self, original: MetricInfo, generated: MetricInfo) -> Dict:
+        """Compare two metrics and return differences"""
+        changes = {}
+        
+        # Compare descriptions
+        orig_desc = original.description.strip()
+        gen_desc = generated.description.strip()
+        if orig_desc != gen_desc:
+            changes['description'] = {
+                'original': orig_desc,
+                'generated': gen_desc,
+                'length_diff': len(gen_desc) - len(orig_desc)
+            }
+        
+        # Compare types
+        if original.metric_type != generated.metric_type:
+            changes['type'] = {
+                'original': original.metric_type,
+                'generated': generated.metric_type
+            }
+        
+        # Compare labels
+        original_labels = set(original.labels)
+        generated_labels = set(generated.labels)
+        
+        if original_labels != generated_labels:
+            changes['labels'] = {
+                'removed': sorted(list(original_labels - generated_labels)),
+                'added': sorted(list(generated_labels - original_labels)),
+                'original_count': len(original_labels),
+                'generated_count': len(generated_labels),
+                'original_labels': sorted(list(original_labels)),
+                'generated_labels': sorted(list(generated_labels))
+            }
+        
+        # Compare usage
+        orig_usage = original.usage.strip()
+        gen_usage = generated.usage.strip()
+        if orig_usage != gen_usage:
+            changes['usage'] = {
+                'original': orig_usage,
+                'generated': gen_usage,
+                'original_has_usage': bool(orig_usage),
+                'generated_has_usage': bool(gen_usage)
+            }
+        
+        # Compare sections
+        if original.section != generated.section:
+            changes['section'] = {
+                'original': original.section,
+                'generated': generated.section
+            }
+        
+        return changes
+    
+    def _analyze_section_changes(self) -> Dict:
+        """Analyze changes in metric organization by sections"""
+        original_by_section = defaultdict(list)
+        generated_by_section = defaultdict(list)
+        
+        for name, metric in self.original.items():
+            original_by_section[metric.section].append(name)
+        
+        for name, metric in self.generated.items():
+            generated_by_section[metric.section].append(name)
+        
+        section_changes = {}
+        all_sections = set(original_by_section.keys()) | set(generated_by_section.keys())
+        
+        for section in all_sections:
+            original_metrics = set(original_by_section.get(section, []))
+            generated_metrics = set(generated_by_section.get(section, []))
+            
+            if original_metrics != generated_metrics:
+                section_changes[section] = {
+                    'original_count': len(original_metrics),
+                    'generated_count': len(generated_metrics),
+                    'removed': sorted(list(original_metrics - generated_metrics)),
+                    'added': sorted(list(generated_metrics - original_metrics)),
+                    'moved_in': [],
+                    'moved_out': []
+                }
+        
+        # Identify metrics that moved between sections
+        for metric_name in set(self.original.keys()) & set(self.generated.keys()):
+            orig_section = self.original[metric_name].section
+            gen_section = self.generated[metric_name].section
+            if orig_section != gen_section:
+                if orig_section in section_changes:
+                    section_changes[orig_section]['moved_out'].append(f"{metric_name} -> {gen_section}")
+                if gen_section in section_changes:
+                    section_changes[gen_section]['moved_in'].append(f"{metric_name} <- {orig_section}")
+        
+        return section_changes
+    
+    def _analyze_type_changes(self) -> Dict:
+        """Analyze changes in metric types"""
+        type_changes = {}
+        
+        for metric_name in set(self.original.keys()) & set(self.generated.keys()):
+            orig_type = self.original[metric_name].metric_type
+            gen_type = self.generated[metric_name].metric_type
+            
+            if orig_type != gen_type:
+                type_changes[metric_name] = {
+                    'original': orig_type,
+                    'generated': gen_type
+                }
+        
+        return type_changes
+    
+    def _analyze_label_changes(self) -> Dict:
+        """Analyze changes in metric labels across all metrics"""
+        label_stats = {
+            'metrics_with_labels_removed': 0,
+            'metrics_with_labels_added': 0,
+            'metrics_with_label_changes': 0,
+            'total_labels_removed': 0,
+            'total_labels_added': 0,
+            'common_labels_removed': set(),
+            'common_labels_added': set()
+        }
+        
+        for metric_name in set(self.original.keys()) & set(self.generated.keys()):
+            orig_labels = set(self.original[metric_name].labels)
+            gen_labels = set(self.generated[metric_name].labels)
+            
+            removed_labels = orig_labels - gen_labels
+            added_labels = gen_labels - orig_labels
+            
+            if removed_labels or added_labels:
+                label_stats['metrics_with_label_changes'] += 1
+                
+                if removed_labels:
+                    label_stats['metrics_with_labels_removed'] += 1
+                    label_stats['total_labels_removed'] += len(removed_labels)
+                    label_stats['common_labels_removed'].update(removed_labels)
+                
+                if added_labels:
+                    label_stats['metrics_with_labels_added'] += 1
+                    label_stats['total_labels_added'] += len(added_labels)
+                    label_stats['common_labels_added'].update(added_labels)
+        
+        # Convert sets to sorted lists for JSON serialization
+        label_stats['common_labels_removed'] = sorted(list(label_stats['common_labels_removed']))
+        label_stats['common_labels_added'] = sorted(list(label_stats['common_labels_added']))
+        
+        return label_stats
+
+
+class DualMetricsReportGenerator:
+    """Generate combined reports for both public and internal metrics"""
+    
+    def __init__(self, public_diff: Optional[MetricsDiff] = None, internal_diff: Optional[MetricsDiff] = None):
+        self.public_diff = public_diff
+        self.internal_diff = internal_diff
+    
+    def generate_combined_report(self, output_file: str = None) -> str:
+        """Generate a comprehensive report for both metric types"""
+        report_lines = []
+        report_lines.append("# Metrics Documentation Diff Report")
+        report_lines.append("=" * 60)
+        report_lines.append("")
+        
+        # Overall summary
+        total_original = 0
+        total_generated = 0
+        total_removed = 0
+        total_added = 0
+        total_modified = 0
+        
+        if self.public_diff:
+            public_data = self.public_diff.compare_metrics()
+            total_original += public_data['summary']['total_original']
+            total_generated += public_data['summary']['total_generated']
+            total_removed += public_data['summary']['removed_count']
+            total_added += public_data['summary']['added_count']
+            total_modified += public_data['summary']['modified_count']
+        
+        if self.internal_diff:
+            internal_data = self.internal_diff.compare_metrics()
+            total_original += internal_data['summary']['total_original']
+            total_generated += internal_data['summary']['total_generated']
+            total_removed += internal_data['summary']['removed_count']
+            total_added += internal_data['summary']['added_count']
+            total_modified += internal_data['summary']['modified_count']
+        
+        report_lines.append("## Overall Summary")
+        report_lines.append(f"- **Total original metrics**: {total_original}")
+        report_lines.append(f"- **Total generated metrics**: {total_generated}")
+        report_lines.append(f"- **Net change**: {total_generated - total_original:+d}")
+        report_lines.append(f"- **Total removed**: {total_removed}")
+        report_lines.append(f"- **Total added**: {total_added}")
+        report_lines.append(f"- **Total modified**: {total_modified}")
+        report_lines.append("")
+        
+        # Individual reports
+        if self.public_diff:
+            report_lines.append("# PUBLIC METRICS")
+            report_lines.append("=" * 40)
+            public_report = self._generate_single_report(self.public_diff, "Public")
+            report_lines.extend(public_report.split('\n')[3:])  # Skip the title
+            report_lines.append("")
+        
+        if self.internal_diff:
+            report_lines.append("# INTERNAL METRICS")
+            report_lines.append("=" * 40)
+            internal_report = self._generate_single_report(self.internal_diff, "Internal")
+            report_lines.extend(internal_report.split('\n')[3:])  # Skip the title
+            report_lines.append("")
+        
+        report_text = '\n'.join(report_lines)
+        
+        if output_file:
+            with open(output_file, 'w', encoding='utf-8') as f:
+                f.write(report_text)
+            print(f"Combined report saved to {output_file}")
+        
+        return report_text
+    
+    def _generate_single_report(self, diff_tool: MetricsDiff, metrics_type: str) -> str:
+        """Generate a report for a single metrics type"""
+        diff_data = diff_tool.compare_metrics()
+        
+        report_lines = []
+        report_lines.append(f"# {metrics_type} Metrics Report")
+        report_lines.append("=" * 40)
+        report_lines.append("")
+        
+        # Summary
+        summary = diff_data['summary']
+        report_lines.append("## Summary")
+        report_lines.append(f"- **Original metrics count**: {summary['total_original']}")
+        report_lines.append(f"- **Generated metrics count**: {summary['total_generated']}")
+        report_lines.append(f"- **Net change**: {summary['total_generated'] - summary['total_original']:+d}")
+        report_lines.append(f"- **Removed metrics**: {summary['removed_count']}")
+        report_lines.append(f"- **Added metrics**: {summary['added_count']}")
+        report_lines.append(f"- **Modified metrics**: {summary['modified_count']}")
+        report_lines.append(f"- **Unchanged metrics**: {summary['common_count'] - summary['modified_count']}")
+        report_lines.append("")
+        
+        # Label changes summary
+        label_stats = diff_data['label_changes']
+        if label_stats['metrics_with_label_changes'] > 0:
+            report_lines.append("## Label Changes Summary")
+            report_lines.append(f"- Metrics with label changes: {label_stats['metrics_with_label_changes']}")
+            report_lines.append(f"- Total labels removed: {label_stats['total_labels_removed']}")
+            report_lines.append(f"- Total labels added: {label_stats['total_labels_added']}")
+            if label_stats['common_labels_removed']:
+                removed_preview = ', '.join(label_stats['common_labels_removed'][:5])
+                if len(label_stats['common_labels_removed']) > 5:
+                    removed_preview += "..."
+                report_lines.append(f"- Common removed labels: {removed_preview}")
+            if label_stats['common_labels_added']:
+                added_preview = ', '.join(label_stats['common_labels_added'][:5])
+                if len(label_stats['common_labels_added']) > 5:
+                    added_preview += "..."
+                report_lines.append(f"- Common added labels: {added_preview}")
+            report_lines.append("")
+        
+        # Type changes summary
+        type_changes = diff_data['type_changes']
+        if type_changes:
+            report_lines.append("## Type Changes Summary")
+            report_lines.append(f"- Metrics with type changes: {len(type_changes)}")
+            for metric, change in list(type_changes.items())[:5]:  # Show first 5
+                report_lines.append(f"  - {metric}: {change['original']} → {change['generated']}")
+            if len(type_changes) > 5:
+                report_lines.append(f"  - ... and {len(type_changes) - 5} more")
+            report_lines.append("")
+        
+        # Removed metrics
+        if diff_data['removed_metrics']:
+            report_lines.append("## Removed Metrics")
+            for metric in diff_data['removed_metrics']:
+                section = diff_tool.original[metric].section if metric in diff_tool.original else "Unknown"
+                report_lines.append(f"- {metric} (from {section})")
+            report_lines.append("")
+        
+        # Added metrics
+        if diff_data['added_metrics']:
+            report_lines.append("## Added Metrics")
+            for metric in diff_data['added_metrics']:
+                section = diff_tool.generated[metric].section if metric in diff_tool.generated else "Unknown"
+                report_lines.append(f"- {metric} (in {section})")
+            report_lines.append("")
+        
+        # Modified metrics (show top 10)
+        if diff_data['modified_metrics']:
+            report_lines.append("## Modified Metrics (Top 10)")
+            count = 0
+            for metric_name, changes in diff_data['modified_metrics'].items():
+                if count >= 10:
+                    report_lines.append(f"... and {len(diff_data['modified_metrics']) - 10} more modified metrics")
+                    break
+                
+                report_lines.append(f"### {metric_name}")
+                
+                if 'description' in changes:
+                    desc_change = changes['description']
+                    report_lines.append("**Description changed:**")
+                    if len(desc_change['original']) > 100 or len(desc_change['generated']) > 100:
+                        report_lines.append(f"- Length change: {desc_change['length_diff']:+d} characters")
+                        report_lines.append(f"- Original: {desc_change['original'][:100]}...")
+                        report_lines.append(f"- Generated: {desc_change['generated'][:100]}...")
+                    else:
+                        report_lines.append(f"- Original: {desc_change['original']}")
+                        report_lines.append(f"- Generated: {desc_change['generated']}")
+                    report_lines.append("")
+                
+                if 'type' in changes:
+                    report_lines.append("**Type changed:**")
+                    report_lines.append(f"- {changes['type']['original']} → {changes['type']['generated']}")
+                    report_lines.append("")
+                
+                if 'labels' in changes:
+                    label_changes = changes['labels']
+                    report_lines.append("**Labels changed:**")
+                    report_lines.append(f"- Count: {label_changes['original_count']} → {label_changes['generated_count']}")
+                    if label_changes['removed']:
+                        report_lines.append(f"- Removed: {', '.join(label_changes['removed'])}")
+                    if label_changes['added']:
+                        report_lines.append(f"- Added: {', '.join(label_changes['added'])}")
+                    report_lines.append("")
+                
+                if 'usage' in changes:
+                    usage_change = changes['usage']
+                    report_lines.append("**Usage changed:**")
+                    report_lines.append(f"- Had usage: {usage_change['original_has_usage']} → {usage_change['generated_has_usage']}")
+                    report_lines.append("")
+                
+                if 'section' in changes:
+                    report_lines.append("**Section changed:**")
+                    report_lines.append(f"- {changes['section']['original']} → {changes['section']['generated']}")
+                
+                report_lines.append("---")
+                report_lines.append("")
+                count += 1
+        
+        # Section changes
+        if diff_data['section_changes']:
+            report_lines.append("## Section Changes")
+            for section, changes in diff_data['section_changes'].items():
+                report_lines.append(f"### {section}")
+                report_lines.append(f"- Metric count: {changes['original_count']} → {changes['generated_count']}")
+                if changes['removed']:
+                    removed_preview = ', '.join(changes['removed'][:5])
+                    if len(changes['removed']) > 5:
+                        removed_preview += f" ... ({len(changes['removed']) - 5} more)"
+                    report_lines.append(f"- Removed: {removed_preview}")
+                if changes['added']:
+                    added_preview = ', '.join(changes['added'][:5])
+                    if len(changes['added']) > 5:
+                        added_preview += f" ... ({len(changes['added']) - 5} more)"
+                    report_lines.append(f"- Added: {added_preview}")
+                if changes['moved_out']:
+                    report_lines.append(f"- Moved out: {', '.join(changes['moved_out'][:3])}...")
+                if changes['moved_in']:
+                    report_lines.append(f"- Moved in: {', '.join(changes['moved_in'][:3])}...")
+                report_lines.append("")
+        
+        return '\n'.join(report_lines)
+
+
+def main():
+    """Main function to run the metrics diff tool"""
+    parser = argparse.ArgumentParser(
+        description='Compare Redpanda metrics documentation files (public and/or internal)',
+        epilog='''
+Examples:
+  %(prog)s --original-public orig_pub.adoc --generated-public gen_pub.adoc
+  %(prog)s --original-internal orig_int.adoc --generated-internal gen_int.adoc
+  %(prog)s --original-public orig_pub.adoc --generated-public gen_pub.adoc --original-internal orig_int.adoc --generated-internal gen_int.adoc
+        ''',
+        formatter_class=argparse.RawDescriptionHelpFormatter
+    )
+    
+    # Public metrics arguments
+    parser.add_argument('--original-public', help='Path to original (published) public metrics file')
+    parser.add_argument('--generated-public', help='Path to generated (automated) public metrics file')
+    
+    # Internal metrics arguments
+    parser.add_argument('--original-internal', help='Path to original (published) internal metrics file')
+    parser.add_argument('--generated-internal', help='Path to generated (automated) internal metrics file')
+    
+    # Output arguments
+    parser.add_argument('--output', help='Output file for the combined report (default: metrics_diff_report.md)')
+    parser.add_argument('--json', help='Output file for JSON data (default: metrics_diff_data.json)')
+    parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
+    parser.add_argument('--debug', action='store_true', help='Debug parsing (shows first 10 metrics found)')
+    
+    args = parser.parse_args()
+    
+    # Validate arguments
+    if not any([args.original_public, args.original_internal]):
+        print("Error: You must specify at least one type of metrics to compare.")
+        print("Use --original-public and --generated-public for public metrics,")
+        print("or --original-internal and --generated-internal for internal metrics,")
+        print("or both.")
+        sys.exit(1)
+    
+    if args.original_public and not args.generated_public:
+        print("Error: --original-public requires --generated-public")
+        sys.exit(1)
+    
+    if args.original_internal and not args.generated_internal:
+        print("Error: --original-internal requires --generated-internal")
+        sys.exit(1)
+    
+    # Initialize diff tools
+    public_diff = None
+    internal_diff = None
+    
+    # Process public metrics if provided
+    if args.original_public and args.generated_public:
+        try:
+            print(f"Loading public metrics files...")
+            print(f"  Original: {args.original_public}")
+            with open(args.original_public, 'r', encoding='utf-8') as f:
+                orig_public_content = f.read()
+            
+            print(f"  Generated: {args.generated_public}")
+            with open(args.generated_public, 'r', encoding='utf-8') as f:
+                gen_public_content = f.read()
+            
+            print("Parsing public metrics...")
+            orig_public_parser = MetricsParser("===")  # Public metrics use ===
+            gen_public_parser = MetricsParser("===")
+            
+            orig_public_metrics = orig_public_parser.parse_file(orig_public_content)
+            gen_public_metrics = gen_public_parser.parse_file(gen_public_content)
+            
+            print(f"✓ Parsed {len(orig_public_metrics)} original public metrics")
+            print(f"✓ Parsed {len(gen_public_metrics)} generated public metrics")
+            
+            public_diff = MetricsDiff(orig_public_metrics, gen_public_metrics, "public")
+            
+        except FileNotFoundError as e:
+            print(f"Error: Could not find public metrics file '{e.filename}'")
+            sys.exit(1)
+        except Exception as e:
+            print(f"Error processing public metrics: {e}")
+            sys.exit(1)
+    
+    # Process internal metrics if provided
+    if args.original_internal and args.generated_internal:
+        try:
+            print(f"Loading internal metrics files...")
+            print(f"  Original: {args.original_internal}")
+            with open(args.original_internal, 'r', encoding='utf-8') as f:
+                orig_internal_content = f.read()
+            
+            print(f"  Generated: {args.generated_internal}")
+            with open(args.generated_internal, 'r', encoding='utf-8') as f:
+                gen_internal_content = f.read()
+            
+            print("Parsing internal metrics...")
+            orig_internal_parser = MetricsParser("==")  # Internal metrics use ==
+            gen_internal_parser = MetricsParser("==")
+            
+            orig_internal_metrics = orig_internal_parser.parse_file(orig_internal_content)
+            gen_internal_metrics = gen_internal_parser.parse_file(gen_internal_content)
+            
+            print(f"✓ Parsed {len(orig_internal_metrics)} original internal metrics")
+            print(f"✓ Parsed {len(gen_internal_metrics)} generated internal metrics")
+            
+            if args.debug:
+                print("\nDEBUG: First 10 original internal metrics found:")
+                for i, (name, metric) in enumerate(list(orig_internal_metrics.items())[:10]):
+                    print(f"  {i+1:2d}. {name} (type: {metric.metric_type}, section: {metric.section})")
+                
+                print("\nDEBUG: First 10 generated internal metrics found:")
+                for i, (name, metric) in enumerate(list(gen_internal_metrics.items())[:10]):
+                    print(f"  {i+1:2d}. {name} (type: {metric.metric_type}, section: {metric.section})")
+            
+            internal_diff = MetricsDiff(orig_internal_metrics, gen_internal_metrics, "internal")
+            
+        except FileNotFoundError as e:
+            print(f"Error: Could not find internal metrics file '{e.filename}'")
+            sys.exit(1)
+        except Exception as e:
+            print(f"Error processing internal metrics: {e}")
+            sys.exit(1)
+    
+    # Verbose output
+    if args.verbose:
+        if public_diff:
+            print("\nPublic metrics summary:")
+            pub_data = public_diff.compare_metrics()
+            print(f"  Original: {pub_data['summary']['total_original']}")
+            print(f"  Generated: {pub_data['summary']['total_generated']}")
+            print(f"  Changes: {pub_data['summary']['modified_count']} modified, {pub_data['summary']['added_count']} added, {pub_data['summary']['removed_count']} removed")
+        
+        if internal_diff:
+            print("\nInternal metrics summary:")
+            int_data = internal_diff.compare_metrics()
+            print(f"  Original: {int_data['summary']['total_original']}")
+            print(f"  Generated: {int_data['summary']['total_generated']}")
+            print(f"  Changes: {int_data['summary']['modified_count']} modified, {int_data['summary']['added_count']} added, {int_data['summary']['removed_count']} removed")
+    
+    # Generate reports
+    print("\nAnalyzing differences...")
+    report_generator = DualMetricsReportGenerator(public_diff, internal_diff)
+    
+    report_file = args.output or "metrics_diff_report.md"
+    json_file = args.json or "metrics_diff_data.json"
+    
+    try:
+        # Generate combined report
+        report = report_generator.generate_combined_report(report_file)
+        
+        print("\n" + "="*60)
+        print("METRICS DOCUMENTATION DIFF REPORT")
+        print("="*60)
+        print(report[:3000])  # Show first 3000 characters
+        if len(report) > 3000:
+            print(f"\n... (truncated, full report saved to {report_file})")
+        
+        # Save detailed JSON data
+        combined_data = {}
+        if public_diff:
+            combined_data['public'] = public_diff.compare_metrics()
+        if internal_diff:
+            combined_data['internal'] = internal_diff.compare_metrics()
+        
+        with open(json_file, "w", encoding='utf-8') as f:
+            json.dump(combined_data, f, indent=2, default=str)
+        
+        print(f"\n✓ Combined report saved to: {report_file}")
+        print(f"✓ JSON data saved to: {json_file}")
+        
+        # Summary stats
+        total_original = 0
+        total_generated = 0
+        total_changes = 0
+        
+        if public_diff:
+            pub_data = public_diff.compare_metrics()
+            total_original += pub_data['summary']['total_original']
+            total_generated += pub_data['summary']['total_generated']
+            total_changes += pub_data['summary']['modified_count'] + pub_data['summary']['added_count'] + pub_data['summary']['removed_count']
+        
+        if internal_diff:
+            int_data = internal_diff.compare_metrics()
+            total_original += int_data['summary']['total_original']
+            total_generated += int_data['summary']['total_generated']
+            total_changes += int_data['summary']['modified_count'] + int_data['summary']['added_count'] + int_data['summary']['removed_count']
+        
+        print(f"\n📊 Overall Summary:")
+        print(f"   Total metrics: {total_original} → {total_generated} ({total_generated - total_original:+d})")
+        print(f"   Total changes: {total_changes}")
+        
+        if public_diff and internal_diff:
+            print(f"   Public metrics processed: ✓")
+            print(f"   Internal metrics processed: ✓")
+        elif public_diff:
+            print(f"   Public metrics processed: ✓")
+        elif internal_diff:
+            print(f"   Internal metrics processed: ✓")
+        
+    except Exception as e:
+        print(f"Error generating report: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/tools/metrics-extractor/metrics_bag.py b/tools/metrics-extractor/metrics_bag.py
new file mode 100644
index 0000000..15e1a12
--- /dev/null
+++ b/tools/metrics-extractor/metrics_bag.py
@@ -0,0 +1,211 @@
+import logging
+import hashlib
+import uuid
+from collections import defaultdict
+
+logger = logging.getLogger("metrics_bag")
+
+
+class MetricsBag:
+    """Container for storing and managing extracted metrics"""
+    
+    def __init__(self):
+        self._metrics = {}
+        self._unique_id_counter = 0
+    
+    def _generate_unique_id(self, name, group_name, file_path, line_number):
+        """Generate a unique ID for a metric based on its properties"""
+        # Create a deterministic unique ID based on the metric's key properties
+        key_string = f"{group_name or 'unknown'}::{name}::{file_path}::{line_number}"
+        # Use SHA256 hash of the key string to create a unique but deterministic ID
+        hash_object = hashlib.sha256(key_string.encode())
+        return hash_object.hexdigest()[:16]  # Use first 16 characters for readability
+    
+    def add_metric(self, name, metric_type, description="", labels=None, 
+                   file="", constructor="", line_number=None, group_name=None, full_name=None, 
+                   internal_external_type="external", **kwargs):
+        """Add a metric to the bag"""
+        if labels is None:
+            labels = []
+        
+        # Generate unique ID for this metric but now use full_name as the key
+        unique_id = self._generate_unique_id(name, group_name, file, line_number)
+        
+        # Use full_name as the key, fallback to unique_id if full_name is not available
+        key = full_name if full_name else unique_id
+        
+        # If metric already exists, merge information
+        if key in self._metrics:
+            existing = self._metrics[key]
+            
+            # Update description if current one is empty
+            if not existing.get("description") and description:
+                existing["description"] = description
+            
+            # Update unique_id if not present (for backward compatibility)
+            if "unique_id" not in existing:
+                existing["unique_id"] = unique_id
+            
+            # Update group_name and full_name if new values are provided and are not None
+            # Allow overwriting None values with actual values
+            if group_name is not None:
+                existing["group_name"] = group_name
+            elif "group_name" not in existing:
+                existing["group_name"] = None
+                
+            if full_name is not None:
+                existing["full_name"] = full_name
+            elif "full_name" not in existing:
+                existing["full_name"] = None
+            
+            # Update internal_external_type
+            if internal_external_type is not None:
+                existing["metric_type"] = internal_external_type
+            elif "metric_type" not in existing:
+                existing["metric_type"] = "external"  # default
+            
+            # Merge labels
+            existing_labels = set(existing.get("labels", []))
+            new_labels = set(labels)
+            existing["labels"] = sorted(existing_labels | new_labels)
+            
+            # Add file location if not already present
+            files = existing.get("files", [])
+            file_info = {"file": file, "line": line_number}
+            if file_info not in files:
+                files.append(file_info)
+                existing["files"] = files
+        else:
+            # Create new metric entry
+            metric_data = {
+                "unique_id": unique_id,  # Add unique_id as a field
+                "name": name,
+                "type": metric_type,
+                "description": description,
+                "labels": sorted(labels) if labels else [],
+                "constructor": constructor,
+                "files": [{"file": file, "line": line_number}],
+                "group_name": group_name,
+                "full_name": full_name,
+                "metric_type": internal_external_type  # Add the internal/external classification
+            }
+            
+            # Add any additional kwargs
+            metric_data.update(kwargs)
+            
+            self._metrics[key] = metric_data
+    
+    def get_metric(self, name):
+        """Get a specific metric by name"""
+        return self._metrics.get(name)
+    
+    def get_all_metrics(self):
+        """Get all metrics as a dictionary"""
+        return self._metrics.copy()
+    
+    def get_metrics_by_type(self, metric_type):
+        """Get all metrics of a specific type"""
+        return {
+            name: metric for name, metric in self._metrics.items()
+            if metric.get("type") == metric_type
+        }
+    
+    def get_metrics_by_constructor(self, constructor):
+        """Get all metrics created by a specific constructor"""
+        return {
+            name: metric for name, metric in self._metrics.items()
+            if metric.get("constructor") == constructor
+        }
+    
+    def merge(self, other_bag):
+        """Merge another MetricsBag into this one"""
+        if not isinstance(other_bag, MetricsBag):
+            raise ValueError("Can only merge with another MetricsBag instance")
+        
+        for name, metric in other_bag.get_all_metrics().items():
+            self.add_metric(
+                name=metric["name"],
+                metric_type=metric["type"],
+                description=metric.get("description", ""),
+                labels=metric.get("labels", []),
+                file=metric.get("files", [{}])[0].get("file", ""),
+                constructor=metric.get("constructor", ""),
+                line_number=metric.get("files", [{}])[0].get("line"),
+                group_name=metric.get("group_name"),
+                full_name=metric.get("full_name"),
+                internal_external_type=metric.get("metric_type", "external")
+            )
+    
+    def filter_by_prefix(self, prefix):
+        """Get metrics that start with a specific prefix"""
+        return {
+            name: metric for name, metric in self._metrics.items()
+            if name.startswith(prefix)
+        }
+    
+    def get_statistics(self):
+        """Get statistics about the metrics in the bag"""
+        stats = {
+            "total_metrics": len(self._metrics),
+            "by_type": defaultdict(int),
+            "by_constructor": defaultdict(int),
+            "with_description": 0,
+            "with_labels": 0
+        }
+        
+        for metric in self._metrics.values():
+            stats["by_type"][metric.get("type", "unknown")] += 1
+            stats["by_constructor"][metric.get("constructor", "unknown")] += 1
+            
+            if metric.get("description"):
+                stats["with_description"] += 1
+            
+            if metric.get("labels"):
+                stats["with_labels"] += 1
+        
+        # Convert defaultdict to regular dict for JSON serialization
+        stats["by_type"] = dict(stats["by_type"])
+        stats["by_constructor"] = dict(stats["by_constructor"])
+        
+        return stats
+    
+    def to_dict(self):
+        """Convert the metrics bag to a dictionary for JSON serialization"""
+        # Use the full names (or unique IDs as fallback) as JSON keys
+        return {
+            "metrics": self._metrics,  # Use full names as keys directly
+            "statistics": self.get_statistics()
+        }
+    
+    def to_prometheus_format(self):
+        """Convert metrics to a Prometheus-like format"""
+        prometheus_metrics = []
+        
+        for name, metric in self._metrics.items():
+            prometheus_metric = {
+                "name": name,
+                "help": metric.get("description", ""),
+                "type": metric.get("type", "unknown")
+            }
+            
+            if metric.get("labels"):
+                prometheus_metric["labels"] = metric["labels"]
+            
+            prometheus_metrics.append(prometheus_metric)
+        
+        return prometheus_metrics
+    
+    def __len__(self):
+        return len(self._metrics)
+    
+    def __iter__(self):
+        return iter(self._metrics.items())
+    
+    def __contains__(self, name):
+        return name in self._metrics
+    
+    def __getitem__(self, name):
+        return self._metrics[name]
+    
+    def __repr__(self):
+        return f"MetricsBag({len(self._metrics)} metrics)"
diff --git a/tools/metrics-extractor/metrics_extractor.py b/tools/metrics-extractor/metrics_extractor.py
new file mode 100644
index 0000000..f16a0cd
--- /dev/null
+++ b/tools/metrics-extractor/metrics_extractor.py
@@ -0,0 +1,463 @@
+#!/usr/bin/env python3
+import logging
+import sys
+import os
+import json
+import re
+import argparse
+import warnings
+from pathlib import Path
+from tree_sitter import Language, Parser
+from metrics_parser import build_treesitter_cpp_library, extract_metrics_from_files
+from metrics_bag import MetricsBag
+
+# Suppress tree-sitter deprecation warnings
+warnings.filterwarnings("ignore", category=FutureWarning, module="tree_sitter")
+
+logger = logging.getLogger("metrics_extractor")
+
+
+def validate_paths(options):
+    path = options.redpanda_repo
+
+    if not os.path.exists(path):
+        logger.error(f'Path does not exist: "{path}".')
+        sys.exit(1)
+
+
+def get_cpp_files(options):
+    """Get all C++ source files from the path"""
+    path = Path(options.redpanda_repo)
+    
+    # If the path is a file, return it directly
+    if path.is_file() and path.suffix in ['.cc', '.cpp', '.cxx', '.h', '.hpp']:
+        return [path.resolve()]
+    
+    # Otherwise, treat it as a directory
+    file_patterns = ["*.cc", "*.cpp", "*.cxx"]
+    cpp_files = []
+    
+    for pattern in file_patterns:
+        if options.recursive:
+            cpp_files.extend(path.rglob(pattern))
+        else:
+            cpp_files.extend(path.glob(pattern))
+    
+    return [f.resolve() for f in cpp_files]
+
+
+def get_treesitter_cpp_parser_and_language(treesitter_dir, destination_path):
+    """Initialize tree-sitter C++ parser and language"""
+    if not os.path.exists(destination_path):
+        build_treesitter_cpp_library(treesitter_dir, destination_path)
+
+    cpp_language = Language(destination_path, "cpp")
+    treesitter_parser = Parser()
+    treesitter_parser.set_language(cpp_language)
+
+    return treesitter_parser, cpp_language
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Extract Redpanda metrics from C++ source code using tree-sitter"
+    )
+    parser.add_argument(
+        "--redpanda-repo",
+        "-r",
+        required=True,
+        help="Path to the Redpanda source code directory"
+    )
+    parser.add_argument(
+        "--recursive", 
+        action="store_true", 
+        default=True,
+        help="Search for C++ files recursively (default: True)"
+    )
+    parser.add_argument(
+        "--json-output", 
+        default="metrics.json", 
+        help="Output JSON file (default: metrics.json)"
+    )
+    parser.add_argument(
+        "--internal-asciidoc", 
+        help="Generate AsciiDoc output file for internal metrics"
+    )
+    parser.add_argument(
+        "--external-asciidoc", 
+        help="Generate AsciiDoc output file for external metrics"
+    )
+    parser.add_argument(
+        "--asciidoc", 
+        "-a", 
+        help="Generate AsciiDoc output file (deprecated: use --internal-asciidoc and --external-asciidoc)"
+    )
+    parser.add_argument(
+        "--verbose", 
+        "-v", 
+        action="store_true", 
+        help="Enable verbose logging"
+    )
+    parser.add_argument(
+        "--filter-namespace", 
+        help="Filter metrics by namespace (e.g., redpanda)"
+    )
+    
+    return parser.parse_args()
+
+
+def clean_description(description):
+    """Ensure description ends with appropriate punctuation"""
+    if not description:
+        return description
+    
+    description = description.strip()
+    if description and not description.endswith(('.', '!', '?')):
+        description += '.'
+    
+    return description
+
+
+def clean_labels(labels):
+    """Clean up labels by removing whitespace and deduplicating"""
+    if not labels:
+        return []
+    
+    cleaned_labels = set()
+    simple_labels = set()  # Track simple labels to avoid adding redundant braced versions
+    
+    for label in labels:
+        # Remove extra whitespace and newlines
+        clean_label = ' '.join(label.split())
+        
+        # Skip empty labels
+        if not clean_label:
+            continue
+            
+        # Handle cases like "{shard}" vs "shard" - prefer the simpler form
+        if clean_label.startswith('{') and clean_label.endswith('}'):
+            # Extract the content inside braces
+            inner_content = clean_label[1:-1].strip()
+            # If it's a simple label (no comma), prefer the unbrace version
+            if ',' not in inner_content and inner_content:
+                simple_label = inner_content.strip()
+                simple_labels.add(simple_label)
+                cleaned_labels.add(simple_label)  # Add the simple version
+            else:
+                # Complex label with commas, keep the braced version
+                cleaned_labels.add(clean_label)
+        else:
+            # Simple label
+            simple_labels.add(clean_label)
+            cleaned_labels.add(clean_label)
+    
+    # Convert back to sorted list
+    return sorted(list(cleaned_labels))
+
+
+def generate_asciidoc_by_type(metrics_bag, internal_output_file, external_output_file):
+    """Generate separate AsciiDoc documentation for internal and external metrics"""
+    all_metrics = metrics_bag.get_all_metrics()
+    
+    # Separate metrics by type
+    internal_metrics = {}
+    external_metrics = {}
+    
+    for metric_key, metric_info in all_metrics.items():
+        metric_type = metric_info.get('metric_type', 'external')  # Default to external if not specified
+        if metric_type == 'internal':
+            internal_metrics[metric_key] = metric_info
+        else:
+            external_metrics[metric_key] = metric_info
+    
+    # Group metrics by category/prefix for better organization
+    def group_metrics_by_category(metrics_dict):
+        """Group metrics by their prefix (first part before underscore)"""
+        groups = {}
+        for metric_key, metric_info in metrics_dict.items():
+            # Extract category from full_name or fallback to metric_key
+            full_name = metric_info.get('full_name', metric_key)
+            
+            # Remove redpanda_ or vectorized_ prefix first
+            clean_name = full_name
+            if clean_name.startswith('redpanda_'):
+                clean_name = clean_name[9:]  # Remove 'redpanda_'
+            elif clean_name.startswith('vectorized_'):
+                clean_name = clean_name[11:]  # Remove 'vectorized_'
+            
+            # Get the category (first part before underscore)
+            parts = clean_name.split('_')
+            category = parts[0] if parts else 'other'
+            
+            # Create more meaningful category names
+            category_mapping = {
+                'cluster': 'Cluster metrics',
+                'kafka': 'Kafka metrics', 
+                'raft': 'Raft metrics',
+                'storage': 'Storage metrics',
+                'memory': 'Infrastructure metrics',
+                'io': 'Infrastructure metrics',
+                'rpc': 'RPC metrics',
+                'cloud': 'Cloud storage metrics',
+                'application': 'Application metrics',
+                'reactor': 'Infrastructure metrics',
+                'scheduler': 'Infrastructure metrics',
+                'network': 'Infrastructure metrics',
+                'internal': 'RPC metrics',
+                'pandaproxy': 'REST proxy metrics',
+                'rest': 'REST proxy metrics',
+                'schema': 'Schema registry metrics',
+                'transform': 'Data transforms metrics',
+                'wasm': 'Data transforms metrics',
+                'security': 'Security metrics',
+                'authorization': 'Security metrics',
+                'tls': 'Security metrics',
+                'debug': 'Debug bundle metrics',
+                'alien': 'Infrastructure metrics',
+                'archival': 'Cloud storage metrics',
+                'ntp': 'Partition metrics',
+                'space': 'Storage metrics',
+                'chunk': 'Storage metrics',
+                'tx': 'Transaction metrics',
+                'leader': 'Raft metrics',
+                'node': 'Raft metrics',
+                'stall': 'Infrastructure metrics',
+                'httpd': 'Infrastructure metrics',
+                'host': 'Infrastructure metrics',
+                'uptime': 'Infrastructure metrics',
+                'cpu': 'Infrastructure metrics',
+                'iceberg': 'Iceberg metrics'
+            }
+            
+            # Use the mapping, but fallback to a few broad categories instead of creating many
+            category_name = category_mapping.get(category)
+            if not category_name:
+                # Group unmapped categories into broader buckets
+                if category in ['active', 'adjacent', 'anomalies', 'available', 'backlog', 'batch', 'batches', 'brokers', 'buffer', 'bytes', 'cached', 'certificate', 'chunked', 'cleanly', 'client', 'closed', 'committed', 'compacted', 'compaction', 'complete', 'connection', 'connections', 'connects', 'consumed', 'corrupted']:
+                    category_name = 'Application metrics'
+                elif category in ['data', 'datalake', 'decompressed', 'dirty', 'disk', 'dispatch', 'dlq', 'end', 'error', 'errors', 'events', 'failed', 'failures', 'fetch', 'files', 'high', 'housekeeping']:
+                    category_name = 'Application metrics'
+                elif category in ['in', 'inflight', 'invalid', 'lag', 'last', 'latest', 'loaded', 'local', 'log', 'logs', 'max', 'method', 'non', 'num', 'offsets', 'out', 'parquet', 'partition', 'partitions']:
+                    category_name = 'Application metrics'
+                elif category in ['queued', 'raw', 'read', 'received', 'reclaim', 'records', 'request', 'requests', 'result', 'retention', 'segments', 'sent', 'server', 'service', 'shares', 'start', 'state', 'successful', 'target', 'throttle', 'tombstones', 'topics', 'total', 'traffic', 'translations', 'trust', 'truststore', 'unavailable', 'under', 'urgent', 'write', 'written']:
+                    category_name = 'Application metrics'
+                else:
+                    category_name = 'Other metrics'
+            
+            if category_name not in groups:
+                groups[category_name] = {}
+            groups[category_name][metric_key] = metric_info
+        
+        return groups
+    
+    # Generate internal metrics documentation
+    if internal_output_file:
+        with open(internal_output_file, 'w') as f:
+            f.write("= Internal Metrics\n")
+            f.write(":description: Redpanda internal metrics for detailed analysis, debugging, and troubleshooting.\n")
+            f.write(":page-aliases: reference:internal-metrics.adoc\n")
+            f.write("\n")
+            f.write("This section provides reference descriptions about the internal metrics exported from Redpanda's `/metrics` endpoint.\n")
+            f.write("\n")
+            f.write("include::shared:partial$metrics-usage-tip.adoc[]\n")
+            f.write("\n")
+            f.write("[IMPORTANT]\n")
+            f.write("====\n")
+            f.write("In a live system, Redpanda metrics are exported only for features that are in use. For example, a metric for consumer groups is not exported when no groups are registered.\n")
+            f.write("\n")
+            f.write("To see the available internal metrics in your system, query the `/metrics` endpoint:\n")
+            f.write("\n")
+            f.write("[,bash]\n")
+            f.write("----\n")
+            f.write("curl http://<node-addr>:9644/metrics | grep \"[HELP|TYPE]\"\n")
+            f.write("----\n")
+            f.write("====\n")
+            f.write("\n")
+            f.write("Internal metrics (`/metrics`) can generate thousands of metric series in production environments. Use them judiciously in monitoring systems to avoid performance issues. For alerting and dashboards, prefer public metrics (`/public_metrics`) which are optimized for lower cardinality.\n")
+            f.write("\n")
+            f.write("The xref:reference:properties/cluster-properties.adoc#aggregate_metrics[aggregate_metrics] cluster property controls internal metrics cardinality. When you enable this property, internal metrics combine labels (like shard) to reduce the number of series. Public metrics always combine labels, regardless of this setting.\n")
+            f.write("\n")
+            
+            # Group and sort internal metrics
+            internal_groups = group_metrics_by_category(internal_metrics)
+            
+            for group_name in sorted(internal_groups.keys()):
+                f.write(f"== {group_name}\n\n")
+                
+                # Sort metrics within each group
+                sorted_group_metrics = sorted(internal_groups[group_name].items())
+                
+                for metric_key, metric_info in sorted_group_metrics:
+                    # Use full_name as section header, fallback to metric_key if full_name is not available
+                    section_name = metric_info.get('full_name', metric_key)
+                    f.write(f"=== {section_name}\n\n")
+                    
+                    description = clean_description(metric_info.get('description'))
+                    if description:
+                        f.write(f"{description}\n\n")
+                    else:
+                        f.write("No description available.\n\n")
+                    
+                    f.write(f"*Type*: {metric_info.get('type', 'unknown')}\n\n")
+                    
+                    cleaned_labels = clean_labels(metric_info.get('labels', []))
+                    if cleaned_labels:
+                        f.write("*Labels*:\n\n")
+                        for label in cleaned_labels:
+                            f.write(f"- `{label}`\n")
+                        f.write("\n")
+                    
+                    f.write("---\n\n")
+    
+    # Generate external metrics documentation
+    if external_output_file:
+        with open(external_output_file, 'w') as f:
+            f.write("= Public Metrics\n")
+            f.write(":description: Public metrics to create your system dashboard.\n")
+            f.write("// tag::single-source[]\n")
+            f.write("\n")
+            f.write("This section provides reference descriptions for the public metrics exported from Redpanda's `/public_metrics` endpoint.\n")
+            f.write("\n")
+            f.write("// Cloud does not expose the internal metrics.\n")
+            f.write("ifndef::env-cloud[]\n")
+            f.write("include::shared:partial$metrics-usage-tip.adoc[]\n")
+            f.write("endif::[]\n")
+            f.write("\n")
+            f.write("[IMPORTANT]\n")
+            f.write("====\n")
+            f.write("In a live system, Redpanda metrics are exported only for features that are in use. For example, Redpanda does not export metrics for consumer groups if no groups are registered.\n")
+            f.write("\n")
+            f.write("To see the available public metrics in your system, query the `/public_metrics` endpoint:\n")
+            f.write("\n")
+            f.write("[,bash]\n")
+            f.write("----\n")
+            f.write("curl http://<node-addr>:9644/public_metrics | grep \"[HELP|TYPE]\"\n")
+            f.write("----\n")
+            f.write("\n")
+            f.write("====\n")
+            f.write("\n")
+            
+            # Group and sort external metrics
+            external_groups = group_metrics_by_category(external_metrics)
+            
+            for group_name in sorted(external_groups.keys()):
+                f.write(f"== {group_name}\n\n")
+                
+                # Sort metrics within each group
+                sorted_group_metrics = sorted(external_groups[group_name].items())
+                
+                for metric_key, metric_info in sorted_group_metrics:
+                    # Use full_name as section header, fallback to metric_key if full_name is not available
+                    section_name = metric_info.get('full_name', metric_key)
+                    f.write(f"=== {section_name}\n\n")
+                    
+                    description = clean_description(metric_info.get('description'))
+                    if description:
+                        f.write(f"{description}\n\n")
+                    else:
+                        f.write("No description available.\n\n")
+                    
+                    f.write(f"*Type*: {metric_info.get('type', 'unknown')}\n\n")
+                    
+                    cleaned_labels = clean_labels(metric_info.get('labels', []))
+                    if cleaned_labels:
+                        f.write("*Labels*:\n\n")
+                        for label in cleaned_labels:
+                            f.write(f"- `{label}`\n")
+                        f.write("\n")
+                    
+                    f.write("---\n\n")
+            
+            f.write("// end::single-source[]\n")
+
+
+def main():
+    args = parse_args()
+    
+    # Set logging level - only show warnings and errors unless verbose is requested
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG, format='%(levelname)s: %(message)s')
+    else:
+        logging.basicConfig(level=logging.WARNING, format='%(levelname)s: %(message)s')
+    
+    validate_paths(args)
+    
+    if args.verbose:
+        logger.info("Initializing tree-sitter C++ parser...")
+    
+    # Use the same pattern as property-extractor
+    treesitter_dir = os.path.join(os.getcwd(), "tree-sitter/tree-sitter-cpp")
+    destination_path = os.path.join(treesitter_dir, "tree-sitter-cpp.so")
+
+    if not os.path.exists(os.path.join(treesitter_dir, "src/parser.c")):
+        logger.error("Missing parser.c. Ensure Tree-sitter submodules are initialized.")
+        logger.error("Run 'make treesitter' first to generate the parser.")
+        sys.exit(1)
+    
+    treesitter_parser, cpp_language = get_treesitter_cpp_parser_and_language(
+        treesitter_dir, destination_path
+    )
+    
+    if args.verbose:
+        logger.info("Finding C++ source files...")
+    cpp_files = get_cpp_files(args)
+    if args.verbose:
+        logger.info(f"Found {len(cpp_files)} C++ files")
+    
+    if args.verbose:
+        logger.info("Extracting metrics from source files...")
+    metrics_bag = extract_metrics_from_files(
+        cpp_files, treesitter_parser, cpp_language, args.filter_namespace
+    )
+    
+    # Show clean summary with internal/external breakdown
+    all_metrics = metrics_bag.get_all_metrics()
+    total_metrics = len(all_metrics)
+    
+    # Count internal vs external metrics
+    internal_count = sum(1 for metric in all_metrics.values() if metric.get('metric_type') == 'internal')
+    external_count = sum(1 for metric in all_metrics.values() if metric.get('metric_type') == 'external')
+    
+    print(f"✅ Successfully extracted {total_metrics} metrics from {len(cpp_files)} C++ files.")
+    print(f"Internal metrics: {internal_count}")
+    print(f"External metrics: {external_count}")
+    
+    with open(args.json_output, 'w') as f:
+        json.dump(metrics_bag.to_dict(), f, indent=2)
+    
+    # Output AsciiDoc if requested
+    if args.internal_asciidoc or args.external_asciidoc:
+        generate_asciidoc_by_type(metrics_bag, args.internal_asciidoc, args.external_asciidoc)
+    
+    # Handle legacy --asciidoc argument (generate both files)
+    if args.asciidoc:
+        if args.verbose:
+            logger.info(f"Writing legacy AsciiDoc output to {args.asciidoc}")
+        # For backward compatibility, generate both internal and external in one file
+        generate_asciidoc_by_type(metrics_bag, args.asciidoc, None)
+    
+    # Only show summary messages, not duplicate file outputs
+    print(f"📄 JSON output: {args.json_output}")
+    if args.internal_asciidoc:
+        print(f"📄 Internal metrics: {args.internal_asciidoc}")
+    if args.external_asciidoc:
+        print(f"📄 External metrics: {args.external_asciidoc}")
+    if args.asciidoc:
+        print(f"📄 Legacy AsciiDoc: {args.asciidoc}")
+    
+    # Show breakdown by type
+    metrics_by_type = {}
+    for metric_data in metrics_bag.get_all_metrics().values():
+        metric_type = metric_data.get('type', 'unknown')
+        metrics_by_type[metric_type] = metrics_by_type.get(metric_type, 0) + 1
+    
+    if metrics_by_type:
+        print(f"📊 Metrics by type:")
+        for metric_type, count in sorted(metrics_by_type.items()):
+            print(f"   • {metric_type}: {count}")
+    
+    print("🎉 Metrics extraction completed successfully!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/metrics-extractor/metrics_parser.py b/tools/metrics-extractor/metrics_parser.py
new file mode 100644
index 0000000..a9d4c81
--- /dev/null
+++ b/tools/metrics-extractor/metrics_parser.py
@@ -0,0 +1,1165 @@
+import os
+import re
+import subprocess
+import logging
+from pathlib import Path
+from metrics_bag import MetricsBag
+
+logger = logging.getLogger("metrics_parser")
+
+# Tree-sitter queries for different metric constructors
+METRICS_QUERIES = {
+    'sm_make_gauge': """
+    (call_expression
+        function: (qualified_identifier
+            scope: (namespace_identifier) @namespace
+            name: (identifier) @function_name)
+        arguments: (argument_list
+            (string_literal) @metric_name
+            . *
+            (call_expression
+                function: (qualified_identifier
+                    scope: (namespace_identifier)
+                    name: (identifier))
+                arguments: (argument_list
+                    (string_literal) @description))?))
+    """,
+    
+    'ss_metrics_make_current_bytes': """
+    (call_expression
+        function: (qualified_identifier
+            scope: (qualified_identifier
+                scope: (namespace_identifier) @outer_namespace
+                name: (namespace_identifier) @inner_namespace)
+            name: (identifier) @function_name)
+        arguments: (argument_list
+            (string_literal) @metric_name
+            . *
+            (call_expression
+                function: (qualified_identifier
+                    scope: (qualified_identifier
+                        scope: (namespace_identifier)
+                        name: (namespace_identifier))
+                    name: (identifier))
+                arguments: (argument_list
+                    (string_literal) @description))?))
+    """
+}
+
+# Map function names to metric types
+FUNCTION_TO_TYPE = {
+    'make_gauge': 'gauge',
+    'make_counter': 'counter', 
+    'make_histogram': 'histogram',
+    'make_total_bytes': 'counter',
+    'make_derive': 'counter',
+    'make_total_operations': 'counter',
+    'make_current_bytes': 'gauge'
+}
+
+
+def build_treesitter_cpp_library(treesitter_dir, destination_path):
+    """Build tree-sitter C++ library - expects parser to be already generated"""
+    from tree_sitter import Language
+    Language.build_library(destination_path, [treesitter_dir])
+
+
+def get_file_contents(path):
+    """Read file contents as bytes"""
+    try:
+        with open(path, "rb") as f:
+            return f.read()
+    except Exception as e:
+        logger.warning(f"Could not read file {path}: {e}")
+        return b""
+
+
+def unquote_string(value):
+    """Remove quotes from string literals and handle escape sequences"""
+    if not value:
+        return ""
+    
+    # Remove outer quotes and handle raw strings
+    value = value.strip()
+    if value.startswith('R"') and value.endswith('"'):
+        # Raw string literal: R"delimiter(content)delimiter"
+        match = re.match(r'R"([^(]*)\((.*)\)\1"', value, re.DOTALL)
+        if match:
+            return match.group(2)
+    elif value.startswith('"') and value.endswith('"'):
+        # Regular string literal
+        value = value[1:-1]
+        # Handle basic escape sequences
+        value = value.replace('\\"', '"')
+        value = value.replace('\\\\', '\\')
+        value = value.replace('\\n', '\n')
+        value = value.replace('\\t', '\t')
+    
+    return value
+
+
+def extract_labels_from_code(code_context):
+    """Extract potential label names from code context around metrics"""
+    labels = set()
+    
+    # Look for common label patterns
+    label_patterns = [
+        r'\.aggregate\s*\(\s*([^)]+)\s*\)',  # .aggregate(aggregate_labels)
+        r'auto\s+(\w*labels\w*)\s*=',        # auto aggregate_labels =
+        r'std::vector<[^>]*>\s*{([^}]+)}',   # std::vector<sm::label>{sm::shard_label}
+        r'sm::([a-z_]*label[a-z_]*)',        # sm::shard_label, sm::topic_label, etc.
+        r'"([^"]+)"\s*:\s*[^,}]+',            # key-value pairs
+    ]
+    
+    for pattern in label_patterns:
+        matches = re.findall(pattern, code_context)
+        for match in matches:
+            if isinstance(match, str):
+                # Clean up label names
+                cleaned = match.strip().replace('sm::', '').replace('_label', '')
+                if cleaned and not cleaned.isspace():
+                    labels.add(cleaned)
+            elif isinstance(match, tuple):
+                for submatch in match:
+                    cleaned = submatch.strip().replace('sm::', '').replace('_label', '')
+                    if cleaned and not cleaned.isspace():
+                        labels.add(cleaned)
+    
+    return sorted(list(labels))
+
+
+def determine_metric_type_from_variable(start_node, variable_name, file_path):
+    """
+    Determine if a metrics variable is internal or external by searching for its declaration.
+    Looks for patterns like:
+    - metrics::public_metric_groups _service_metrics; (external)
+    - ss::metrics::metric_groups _metrics; (internal)
+    """
+    
+    # Go to the root of the file to search for declarations
+    root_node = start_node
+    while root_node.parent:
+        root_node = root_node.parent
+    
+    def search_for_variable_declaration(node):
+        if node.type == 'declaration':
+            declaration_text = node.text.decode('utf-8', errors='ignore')
+            if variable_name in declaration_text:
+                # Check if it's a public_metric_groups declaration
+                if 'public_metric_groups' in declaration_text:
+                    return "external"
+                elif 'metric_groups' in declaration_text and 'public' not in declaration_text:
+                    return "internal"
+        
+        # Search children recursively
+        for child in node.children:
+            result = search_for_variable_declaration(child)
+            if result:
+                return result
+        
+        return None
+    
+    # First search the current file
+    result = search_for_variable_declaration(root_node)
+    if result:
+        return result
+    
+    # If not found in current file, try to search the corresponding header file
+    if file_path and str(file_path).endswith('.cc'):
+        header_path = str(file_path).replace('.cc', '.h')
+        try:
+            header_content = get_file_contents(header_path)
+            if header_content and variable_name.encode() in header_content:
+                header_text = header_content.decode('utf-8', errors='ignore')
+                logger.debug(f"Searching header file: {header_path}")
+                if f'public_metric_groups {variable_name}' in header_text:
+                    logger.debug(f"Found {variable_name} as public_metric_groups in header -> external")
+                    return "external"
+                elif f'metric_groups {variable_name}' in header_text and 'public' not in header_text:
+                    logger.debug(f"Found {variable_name} as metric_groups in header -> internal")
+                    return "internal"
+        except Exception as e:
+            logger.debug(f"Could not read header file {header_path}: {e}")
+    
+    # Default fallback based on variable name patterns
+    if variable_name in ['_public_metrics', '_jobs_metrics', '_service_metrics', '_probe_metrics']:
+        logger.debug(f"Using name-based fallback: {variable_name} -> external")
+        return "external"
+    elif variable_name in ['_internal_metrics', '_metrics']:
+        logger.debug(f"Using name-based fallback: {variable_name} -> internal")
+        return "internal"
+    else:
+        logger.debug(f"Unknown variable pattern: {variable_name}, defaulting to external")
+        return "external"
+
+
+def find_group_name_and_type_from_ast(metric_call_expr_node, file_path=None):
+    """
+    Traverse up the AST from a metric definition to find the enclosing 
+    add_group call and extract its name and metric type (internal/external).
+    Returns tuple: (group_name, metric_type)
+    """
+    current_node = metric_call_expr_node
+    while current_node:
+        # We are looking for a call expression, e.g., _metrics.add_group(...) or _public_metrics.add_group(...)
+        if current_node.type == 'call_expression':
+            function_node = current_node.child_by_field_name('function')
+            if function_node and function_node.text.decode('utf-8').endswith('.add_group'):
+                function_text = function_node.text.decode('utf-8')
+                
+                # Extract the variable name from the add_group call (e.g., "_service_metrics" from "_service_metrics.add_group")
+                variable_name = function_text.replace('.add_group', '')
+                logger.debug(f"Found add_group call with variable: {variable_name}")
+                
+                # Determine metric type by searching for the variable declaration
+                metric_type = determine_metric_type_from_variable(current_node, variable_name, file_path)
+                logger.debug(f"Determined metric_type: {metric_type} for variable: {variable_name}")
+                
+                # This is an add_group call. Now, get its arguments.
+                args_node = current_node.child_by_field_name('arguments')
+                if not args_node or args_node.named_child_count == 0:
+                    continue
+
+                # The first argument should be prometheus_sanitize::metrics_name(...) or a variable
+                first_arg_node = args_node.named_children[0]
+                
+                # Check if this argument is a call to prometheus_sanitize::metrics_name
+                if first_arg_node.type == 'call_expression':
+                    inner_function = first_arg_node.child_by_field_name('function')
+                    inner_args = first_arg_node.child_by_field_name('arguments')
+                    
+                    if inner_function and '::metrics_name' in inner_function.text.decode('utf-8'):
+                        # Found it. Extract the string literal from its arguments.
+                        if inner_args and inner_args.named_child_count > 0:
+                            group_name_node = inner_args.named_children[0]
+                            if group_name_node.type == 'string_literal':
+                                group_name = unquote_string(group_name_node.text.decode('utf-8'))
+                                return group_name, metric_type
+                            elif group_name_node.type == 'identifier':
+                                # The argument to metrics_name is a variable, resolve it
+                                inner_var_name = group_name_node.text.decode('utf-8')
+                                logger.debug(f"Found variable in metrics_name call: {inner_var_name}")
+                                
+                                # Try all our resolution strategies for this variable
+                                resolved_value = resolve_variable_in_local_scope(current_node, inner_var_name)
+                                if not resolved_value:
+                                    resolved_value = resolve_variable_declaration(current_node, inner_var_name)
+                                if not resolved_value:
+                                    resolved_value = resolve_variable_forward_in_function(current_node, inner_var_name)
+                                if not resolved_value:
+                                    resolved_value = find_any_group_name_in_file(current_node)
+                                
+                                if resolved_value:
+                                    logger.debug(f"Resolved metrics_name variable {inner_var_name} to: {resolved_value}")
+                                    return resolved_value, metric_type
+                                else:
+                                    logger.error(f"Could not resolve metrics_name variable: {inner_var_name}")
+                                    # EMERGENCY FALLBACK: Try to guess from common patterns
+                                    if inner_var_name == "cluster_metric_prefix":
+                                        logger.warning("Using emergency fallback for cluster_metric_prefix -> 'cluster'")
+                                        return "cluster", metric_type
+                # Handle simple string literal as group name
+                elif first_arg_node.type == 'string_literal':
+                    group_name = unquote_string(first_arg_node.text.decode('utf-8'))
+                    return group_name, metric_type
+                # Handle variable reference (like group_name)
+                elif first_arg_node.type == 'identifier':
+                    variable_name = first_arg_node.text.decode('utf-8')
+                    logger.debug(f"Found variable reference: {variable_name} at line {first_arg_node.start_point[0] + 1}")
+                    
+                    # Try multiple strategies to resolve the variable
+                    group_name = None
+                    
+                    # Strategy 1: Search in the immediate local scope first
+                    group_name = resolve_variable_in_local_scope(current_node, variable_name)
+                    if group_name:
+                        logger.debug(f"Resolved variable {variable_name} locally to: {group_name}")
+                        return group_name, metric_type
+                    
+                    # Strategy 2: Search in broader scopes
+                    group_name = resolve_variable_declaration(current_node, variable_name)
+                    if group_name:
+                        logger.debug(f"Resolved variable {variable_name} in broader scope to: {group_name}")
+                        return group_name, metric_type
+                    
+                    # Strategy 3: Search the entire function/method
+                    group_name = resolve_variable_in_function_scope(current_node, variable_name)
+                    if group_name:
+                        logger.debug(f"Resolved variable {variable_name} in function scope to: {group_name}")
+                        return group_name, metric_type
+                    
+                    # Strategy 4: Search forward in the function for variable declarations
+                    group_name = resolve_variable_forward_in_function(current_node, variable_name)
+                    if group_name:
+                        logger.debug(f"Found variable {variable_name} declared later in function: {group_name}")
+                        return group_name, metric_type
+                    
+                    # Strategy 5: Last resort - search entire file for any group_name variable
+                    if variable_name == "group_name":
+                        group_name = find_any_group_name_in_file(current_node)
+                        if group_name:
+                            logger.debug(f"Found fallback group_name in file: {group_name}")
+                            return group_name, metric_type
+                    
+                    logger.error(f"CRITICAL: Could not resolve variable '{variable_name}' - this should not happen!")
+                    
+                    # EMERGENCY FALLBACK: Hard-coded common patterns
+                    if variable_name == "group_name":
+                        # Return a placeholder that can be manually reviewed
+                        logger.warning(f"Using emergency fallback for group_name - returning 'unknown'")
+                        return "unknown", metric_type
+                    
+                    # Add debugging to see what scopes we searched
+                    logger.debug(f"Current node type: {current_node.type}, parent: {current_node.parent.type if current_node.parent else 'None'}")
+
+        current_node = current_node.parent
+    return None, "external"  # Default to external if not found
+
+
+def resolve_variable_declaration(start_node, variable_name):
+    """
+    Search for a variable declaration within the current scope and enclosing scopes.
+    Looks for patterns like: const auto group_name = prometheus_sanitize::metrics_name("...");
+    """
+    logger.debug(f"Searching for variable '{variable_name}' starting from node type: {start_node.type}")
+    
+    # Search from current scope up to the translation unit
+    scope_node = start_node
+    
+    # Keep searching in broader scopes until we find the variable or reach the top
+    while scope_node:
+        logger.debug(f"Searching in scope: {scope_node.type}")
+        
+        # Search for variable declarations in the current scope
+        def search_declarations(node, depth=0):
+            indent = "  " * depth
+            logger.debug(f"{indent}Checking node type: {node.type}")
+            
+            if node.type == 'declaration':
+                logger.debug(f"{indent}Found declaration: {node.text.decode('utf-8')[:100]}...")
+                # Look for variable declarators
+                for child in node.children:
+                    if child.type == 'init_declarator':
+                        declarator = child.child_by_field_name('declarator')
+                        initializer = child.child_by_field_name('value')
+                        
+                        if declarator and initializer:
+                            # Check if this is our variable
+                            declarator_text = declarator.text.decode('utf-8')
+                            logger.debug(f"{indent}  Declarator: {declarator_text}")
+                            if variable_name in declarator_text:
+                                logger.debug(f"{indent}  Found matching variable!")
+                                # Check if the initializer is a call to prometheus_sanitize::metrics_name
+                                if initializer.type == 'call_expression':
+                                    func_node = initializer.child_by_field_name('function')
+                                    if func_node and '::metrics_name' in func_node.text.decode('utf-8'):
+                                        args_node = initializer.child_by_field_name('arguments')
+                                        if args_node and args_node.named_child_count > 0:
+                                            first_arg = args_node.named_children[0]
+                                            if first_arg.type == 'string_literal':
+                                                result = unquote_string(first_arg.text.decode('utf-8'))
+                                                logger.debug(f"{indent}  Resolved to: {result}")
+                                                return result
+            
+            # Recursively search all child nodes
+            for child in node.children:
+                result = search_declarations(child, depth + 1)
+                if result:
+                    return result
+            
+            return None
+        
+        # Search in the current scope
+        result = search_declarations(scope_node)
+        if result:
+            return result
+        
+        # Move to parent scope
+        if scope_node.type == 'translation_unit':
+            # We've reached the top level, stop here
+            logger.debug("Reached translation unit, stopping search")
+            break
+        scope_node = scope_node.parent
+        if scope_node:
+            logger.debug(f"Moving to parent scope: {scope_node.type}")
+    
+    logger.debug(f"Variable '{variable_name}' not found in any scope")
+    return None
+
+
+def resolve_variable_in_local_scope(start_node, variable_name):
+    """
+    Search for a variable declaration in the immediate local scope around the add_group call.
+    This handles cases where the variable is declared just before the add_group call.
+    """
+    logger.debug(f"Searching for variable '{variable_name}' in local scope")
+    
+    # First, try to find the enclosing function/method
+    func_node = start_node
+    while func_node and func_node.type not in ['function_definition', 'method_definition']:
+        func_node = func_node.parent
+    
+    if not func_node:
+        logger.debug("No function/method definition found")
+        return None
+    
+    # Get the function body (compound_statement)
+    body_node = None
+    for child in func_node.children:
+        if child.type == 'compound_statement':
+            body_node = child
+            break
+    
+    if not body_node:
+        logger.debug("No function body found")
+        return None
+    
+    # Search all declarations in the function body
+    def search_in_node(node):
+        if node.type == 'declaration':
+            # Check for auto group_name = prometheus_sanitize::metrics_name(...);
+            for child in node.children:
+                if child.type == 'init_declarator':
+                    declarator = child.child_by_field_name('declarator')
+                    initializer = child.child_by_field_name('value')
+                    
+                    if declarator and initializer:
+                        # Handle 'auto' type declarations
+                        if declarator.type == 'identifier' and declarator.text.decode('utf-8') == variable_name:
+                            # Found our variable!
+                            if initializer.type == 'call_expression':
+                                func_node = initializer.child_by_field_name('function')
+                                if func_node and '::metrics_name' in func_node.text.decode('utf-8'):
+                                    args_node = initializer.child_by_field_name('arguments')
+                                    if args_node and args_node.named_child_count > 0:
+                                        first_arg = args_node.named_children[0]
+                                        if first_arg.type == 'string_literal':
+                                            return unquote_string(first_arg.text.decode('utf-8'))
+        
+        # Recursively search children
+        for child in node.children:
+            result = search_in_node(child)
+            if result:
+                return result
+        
+        return None
+    
+    return search_in_node(body_node)
+
+
+def resolve_variable_in_function_scope(start_node, variable_name):
+    """
+    Search for a variable declaration within the entire function scope.
+    This is the most aggressive search strategy.
+    """
+    logger.debug(f"Searching for variable '{variable_name}' in function scope")
+    
+    # Go up to the function definition
+    func_node = start_node
+    while func_node and func_node.type not in ['function_definition', 'lambda_expression', 'method_definition']:
+        func_node = func_node.parent
+    
+    if not func_node:
+        logger.debug("No function definition found for function scope search")
+        return None
+    
+    logger.debug(f"Found function node: {func_node.type}")
+    
+    # Search the entire function body recursively
+    def search_in_function(node, depth=0):
+        indent = "  " * depth
+        logger.debug(f"{indent}Searching node type: {node.type}")
+        
+        # Print the text for declarations to help debug
+        if node.type in ['declaration', 'expression_statement']:
+            text = node.text.decode('utf-8')[:100]
+            logger.debug(f"{indent}Found {node.type}: {text}...")
+        
+        if node.type == 'declaration':
+            result = extract_variable_from_declaration(node, variable_name)
+            if result:
+                logger.debug(f"{indent}Found variable in declaration: {result}")
+                return result
+        elif node.type == 'expression_statement':
+            # Some variable declarations might be parsed as expression statements
+            for child in node.children:
+                if child.type == 'assignment_expression':
+                    left = child.child_by_field_name('left')
+                    right = child.child_by_field_name('right')
+                    if left and right and left.text.decode('utf-8') == variable_name:
+                        if right.type == 'call_expression':
+                            func_call = right.child_by_field_name('function')
+                            if func_call and '::metrics_name' in func_call.text.decode('utf-8'):
+                                args_node = right.child_by_field_name('arguments')
+                                if args_node and args_node.named_child_count > 0:
+                                    first_arg = args_node.named_children[0]
+                                    if first_arg.type == 'string_literal':
+                                        result = unquote_string(first_arg.text.decode('utf-8'))
+                                        logger.debug(f"{indent}Found variable in assignment: {result}")
+                                        return result
+        
+        # Search all children
+        for child in node.children:
+            result = search_in_function(child, depth + 1)
+            if result:
+                return result
+        
+        return None
+    
+    return search_in_function(func_node)
+
+
+def resolve_variable_forward_in_function(start_node, variable_name):
+    """
+    Search forward from the current position for variable declarations.
+    This handles cases where variables are declared after they're referenced in metric definitions
+    but before the add_group call.
+    """
+    logger.debug(f"Searching forward for variable '{variable_name}'")
+    
+    # Go up to the function definition
+    func_node = start_node
+    while func_node and func_node.type not in ['function_definition', 'lambda_expression', 'method_definition']:
+        func_node = func_node.parent
+    
+    if not func_node:
+        logger.debug("No function definition found for forward search")
+        return None
+    
+    # Find the function body
+    body_node = None
+    for child in func_node.children:
+        if child.type == 'compound_statement':
+            body_node = child
+            break
+    
+    if not body_node:
+        logger.debug("No function body found for forward search")
+        return None
+    
+    # Search through all statements in the function body
+    for statement in body_node.children:
+        if statement.type == 'declaration':
+            result = extract_variable_from_declaration(statement, variable_name)
+            if result:
+                logger.debug(f"Found forward declaration: {result}")
+                return result
+        elif statement.type == 'expression_statement':
+            # Check for assignment expressions
+            for child in statement.children:
+                if child.type == 'assignment_expression':
+                    left = child.child_by_field_name('left')
+                    right = child.child_by_field_name('right')
+                    if left and right and left.text.decode('utf-8') == variable_name:
+                        if right.type == 'call_expression':
+                            func_call = right.child_by_field_name('function')
+                            if func_call and '::metrics_name' in func_call.text.decode('utf-8'):
+                                args_node = right.child_by_field_name('arguments')
+                                if args_node and args_node.named_child_count > 0:
+                                    first_arg = args_node.named_children[0]
+                                    if first_arg.type == 'string_literal':
+                                        result = unquote_string(first_arg.text.decode('utf-8'))
+                                        logger.debug(f"Found forward assignment: {result}")
+                                        return result
+    
+    return None
+
+
+def extract_variable_from_declaration(declaration_node, variable_name):
+    """
+    Extract the value of a variable from a declaration node if it matches the variable name.
+    Handles patterns like:
+    - const auto group_name = prometheus_sanitize::metrics_name("...");
+    - constexpr static auto cluster_metric_prefix = "cluster";
+    """
+    for child in declaration_node.children:
+        if child.type == 'init_declarator':
+            declarator = child.child_by_field_name('declarator')
+            initializer = child.child_by_field_name('value')
+            
+            if declarator and initializer:
+                declarator_text = declarator.text.decode('utf-8')
+                if variable_name in declarator_text:
+                    # Check if the initializer is a call to prometheus_sanitize::metrics_name
+                    if initializer.type == 'call_expression':
+                        func_node = initializer.child_by_field_name('function')
+                        if func_node and '::metrics_name' in func_node.text.decode('utf-8'):
+                            args_node = initializer.child_by_field_name('arguments')
+                            if args_node and args_node.named_child_count > 0:
+                                first_arg = args_node.named_children[0]
+                                if first_arg.type == 'string_literal':
+                                    return unquote_string(first_arg.text.decode('utf-8'))
+                    # Also check for simple string literal assignment (like constexpr static auto cluster_metric_prefix = "cluster")
+                    elif initializer.type == 'string_literal':
+                        return unquote_string(initializer.text.decode('utf-8'))
+    return None
+
+
+def find_any_group_name_in_file(start_node):
+    """
+    Last resort: search the entire file for any variable that's assigned 
+    a prometheus_sanitize::metrics_name value, regardless of variable name.
+    """
+    logger.debug("Searching entire file for any metrics_name assignment")
+    
+    # Go to the root of the file
+    root_node = start_node
+    while root_node.parent:
+        root_node = root_node.parent
+    
+    # Search the entire file for any prometheus_sanitize::metrics_name call
+    def search_entire_file(node):
+        if node.type == 'declaration':
+            # Look for any variable declared with prometheus_sanitize::metrics_name
+            for child in node.children:
+                if child.type == 'init_declarator':
+                    declarator = child.child_by_field_name('declarator')
+                    initializer = child.child_by_field_name('value')
+                    
+                    if declarator and initializer:
+                        # Check if the initializer is a call to prometheus_sanitize::metrics_name
+                        if initializer.type == 'call_expression':
+                            func_node = initializer.child_by_field_name('function')
+                            if func_node and '::metrics_name' in func_node.text.decode('utf-8'):
+                                args_node = initializer.child_by_field_name('arguments')
+                                if args_node and args_node.named_child_count > 0:
+                                    first_arg = args_node.named_children[0]
+                                    if first_arg.type == 'string_literal':
+                                        result = unquote_string(first_arg.text.decode('utf-8'))
+                                        declarator_text = declarator.text.decode('utf-8')
+                                        logger.debug(f"Found metrics_name assignment in file: {declarator_text} = {result}")
+                                        return result
+        
+        # Also check for assignment expressions
+        if node.type == 'assignment_expression':
+            left = node.child_by_field_name('left')
+            right = node.child_by_field_name('right')
+            if left and right:
+                if right.type == 'call_expression':
+                    func_node = right.child_by_field_name('function')
+                    if func_node and '::metrics_name' in func_node.text.decode('utf-8'):
+                        args_node = right.child_by_field_name('arguments')
+                        if args_node and args_node.named_child_count > 0:
+                            first_arg = args_node.named_children[0]
+                            if first_arg.type == 'string_literal':
+                                result = unquote_string(first_arg.text.decode('utf-8'))
+                                left_text = left.text.decode('utf-8')
+                                logger.debug(f"Found metrics_name assignment in file: {left_text} = {result}")
+                                return result
+        
+        # Search all children recursively
+        for child in node.children:
+            result = search_entire_file(child)
+            if result:
+                return result
+        
+        return None
+    
+    return search_entire_file(root_node)
+
+
+def find_any_metrics_name_in_file(start_node, file_path):
+    """
+    Enhanced search: find ANY variable in the file that's assigned a prometheus_sanitize::metrics_name value.
+    This handles cases where the variable name is not 'group_name' (e.g., 'cluster_metrics_name').
+    """
+    logger.debug(f"Enhanced file-wide search for metrics_name declarations in {file_path}")
+    
+    # Go to the root of the file
+    root_node = start_node
+    while root_node.parent:
+        root_node = root_node.parent
+    
+    def search_any_metrics_name(node):
+        if node.type == 'declaration':
+            # Look for any variable declared with prometheus_sanitize::metrics_name
+            for child in node.children:
+                if child.type == 'init_declarator':
+                    declarator = child.child_by_field_name('declarator')
+                    initializer = child.child_by_field_name('value')
+                    
+                    if declarator and initializer:
+                        if initializer.type == 'call_expression':
+                            func_node = initializer.child_by_field_name('function')
+                            if func_node and '::metrics_name' in func_node.text.decode('utf-8'):
+                                args_node = initializer.child_by_field_name('arguments')
+                                if args_node and args_node.named_child_count > 0:
+                                    first_arg = args_node.named_children[0]
+                                    if first_arg.type == 'string_literal':
+                                        result = unquote_string(first_arg.text.decode('utf-8'))
+                                        var_name = declarator.text.decode('utf-8')
+                                        logger.debug(f"Found metrics_name declaration: {var_name} = '{result}'")
+                                        return result
+        
+        # Also check for assignment expressions (not just declarations)
+        if node.type == 'assignment_expression':
+            left = node.child_by_field_name('left')
+            right = node.child_by_field_name('right')
+            if left and right:
+                if right.type == 'call_expression':
+                    func_node = right.child_by_field_name('function')
+                    if func_node and '::metrics_name' in func_node.text.decode('utf-8'):
+                        args_node = right.child_by_field_name('arguments')
+                        if args_node and args_node.named_child_count > 0:
+                            first_arg = args_node.named_children[0]
+                            if first_arg.type == 'string_literal':
+                                result = unquote_string(first_arg.text.decode('utf-8'))
+                                var_name = left.text.decode('utf-8')
+                                logger.debug(f"Found metrics_name assignment: {var_name} = '{result}'")
+                                return result
+        
+        # Search all children recursively
+        for child in node.children:
+            result = search_any_metrics_name(child)
+            if result:
+                return result
+        
+        return None
+    
+    return search_any_metrics_name(root_node)
+
+
+def infer_group_name_from_path(file_path):
+    """
+    Programmatic inference of group names from file paths with common patterns.
+    """
+    path_str = str(file_path).lower()
+    file_parts = path_str.split('/')
+    
+    # Define path-based inference rules
+    inference_rules = [
+        # Pattern: (path_contains, additional_condition, group_name)
+        (['kafka', 'quota'], lambda p: 'quota' in p, "kafka:quotas"),
+        (['datalake', 'translation'], lambda p: 'translation' in p, "iceberg:translation"),
+        (['iceberg', 'rest_client'], lambda p: 'rest_client' in p, "iceberg:rest_client"),
+        (['cluster', 'partition'], lambda p: 'partition' in p, "cluster:partition"),
+        (['debug_bundle'], lambda p: True, "debug_bundle"),
+        (['kafka'], lambda p: True, "kafka"),
+        (['cluster'], lambda p: True, "cluster"),
+        (['iceberg'], lambda p: True, "iceberg"),
+        (['storage'], lambda p: 'cloud' in p, "cloud_storage"),
+    ]
+    
+    # Apply inference rules
+    for path_keywords, condition, group_name in inference_rules:
+        if all(keyword in file_parts for keyword in path_keywords) and condition(path_str):
+            return group_name
+    
+    # Default fallback
+    return "unknown"
+
+
+def find_group_name_from_ast(metric_call_expr_node):
+    """
+    Traverse up the AST from a metric definition to find the enclosing 
+    add_group call and extract its name. This is more reliable than regex.
+    """
+    group_name, _ = find_group_name_and_type_from_ast(metric_call_expr_node, None)
+    return group_name
+
+
+def construct_full_metric_name(group_name, metric_name, metric_type="external"):
+    """Construct the full Prometheus metric name from group and metric name"""
+    # Add debug logging
+    if not group_name or group_name == "unknown":
+        # Fallback based on metric type
+        if metric_type == "internal":
+            result = f"vectorized_{metric_name}"
+        else:
+            result = f"redpanda_{metric_name}"
+        return result
+    
+    # Sanitize the group name: replace special characters with underscores.
+    sanitized_group = group_name.replace(':', '_').replace('-', '_')
+    
+    # Ensure the correct prefix is present based on metric type
+    if metric_type == "internal":
+        # Internal metrics should have vectorized_ prefix
+        if not sanitized_group.startswith('vectorized_'):
+            full_group_name = f"vectorized_{sanitized_group}"
+        else:
+            full_group_name = sanitized_group
+    else:
+        # External metrics should have redpanda_ prefix
+        if not sanitized_group.startswith('redpanda_'):
+            full_group_name = f"redpanda_{sanitized_group}"
+        else:
+            full_group_name = sanitized_group
+    
+    # The full metric name is: <full_group_name>_<metric_name>
+    result = f"{full_group_name}_{metric_name}"
+    return result
+
+
+def parse_seastar_replicated_metrics(tree_root, source_code, file_path):
+    """Parse seastar replicated metrics from seastar::metrics::replicate_metric_families calls"""
+    metrics_bag = MetricsBag()
+    
+    # Look ONLY for seastar::metrics::replicate_metric_families calls
+    def find_replicate_calls(node):
+        if node.type == 'call_expression':
+            function_node = node.child_by_field_name('function')
+            if function_node:
+                function_text = function_node.text.decode('utf-8')
+                # Be very specific - must be exactly replicate_metric_families
+                if 'replicate_metric_families' in function_text and 'seastar::metrics::' in function_text:
+                    logger.debug(f"Found seastar replicate_metric_families call in {file_path}")
+                    args_node = node.child_by_field_name('arguments')
+                    if args_node:
+                        # Look for the array of metric names
+                        for child in args_node.children:
+                            if child.type == 'initializer_list':
+                                # This is the array of {"metric_name", handle} pairs
+                                for item in child.children:
+                                    if item.type == 'initializer_list':
+                                        # Each item is {"metric_name", handle}
+                                        metric_items = [c for c in item.children if c.type == 'string_literal']
+                                        if metric_items:
+                                            metric_name = unquote_string(metric_items[0].text.decode('utf-8'))
+                                            if metric_name:
+                                                logger.debug(f"Found replicated seastar metric: {metric_name}")
+                                                # Seastar metrics are typically in the "application" group
+                                                full_metric_name = f"redpanda_{metric_name}"
+                                                
+                                                metrics_bag.add_metric(
+                                                    name=metric_name,
+                                                    metric_type="gauge",  # Most seastar metrics are gauges
+                                                    description=f"Seastar replicated metric: {metric_name}",
+                                                    labels=[],
+                                                    file=str(file_path),
+                                                    constructor="seastar_replicated",
+                                                    group_name="application",
+                                                    full_name=full_metric_name,
+                                                    internal_external_type="public",
+                                                    line_number=node.start_point[0] + 1
+                                                )
+        
+        # Search children recursively
+        for child in node.children:
+            find_replicate_calls(child)
+    
+    find_replicate_calls(tree_root)
+    return metrics_bag
+
+
+def parse_direct_seastar_metrics(tree_root, source_code, file_path):
+    """Parse direct ss::metrics calls like sm::make_gauge ONLY in specific contexts"""
+    metrics_bag = MetricsBag()
+    
+    # Look for ss::metrics or sm:: calls but ONLY in the application.cc context
+    # This is a very specific pattern that should not interfere with regular metrics
+    if 'application.cc' not in str(file_path):
+        return metrics_bag  # Only process application.cc for direct seastar metrics
+    
+    def find_direct_seastar_calls(node):
+        if node.type == 'call_expression':
+            function_node = node.child_by_field_name('function')
+            if function_node:
+                function_text = function_node.text.decode('utf-8')
+                
+                # Be very specific - must be sm:: prefix AND in the right context
+                seastar_type = None
+                if function_text == 'sm::make_gauge':
+                    seastar_type = 'gauge'
+                elif function_text == 'sm::make_counter':
+                    seastar_type = 'counter'
+                elif function_text == 'sm::make_histogram':
+                    seastar_type = 'histogram'
+                
+                # Also check for direct ss::metrics calls
+                if not seastar_type:
+                    if function_text == 'ss::metrics::make_gauge':
+                        seastar_type = 'gauge'
+                    elif function_text == 'ss::metrics::make_counter':
+                        seastar_type = 'counter'
+                    elif function_text == 'ss::metrics::make_histogram':
+                        seastar_type = 'histogram'
+                
+                if seastar_type:
+                    # Additional check: must be in a specific function context
+                    # Look for setup_public_metrics or similar function
+                    current = node.parent
+                    in_correct_function = False
+                    while current:
+                        if current.type == 'function_definition':
+                            # Check if this is the setup_public_metrics function
+                            for child in current.children:
+                                if child.type == 'function_declarator':
+                                    func_name = child.text.decode('utf-8')
+                                    if 'setup_public_metrics' in func_name:
+                                        in_correct_function = True
+                                        break
+                            break
+                        current = current.parent
+                    
+                    if not in_correct_function:
+                        return  # Skip if not in the right function
+                    
+                    args_node = node.child_by_field_name('arguments')
+                    if args_node and args_node.named_child_count > 0:
+                        # First argument is typically the metric name
+                        first_arg = args_node.named_children[0]
+                        if first_arg.type == 'string_literal':
+                            metric_name = unquote_string(first_arg.text.decode('utf-8'))
+                            
+                            # Try to find description from subsequent arguments
+                            description = f"Seastar direct metric: {metric_name}"
+                            for i in range(1, args_node.named_child_count):
+                                arg = args_node.named_children[i]
+                                if arg.type == 'call_expression':
+                                    # Look for sm::description() calls
+                                    desc_func = arg.child_by_field_name('function')
+                                    if desc_func and 'description' in desc_func.text.decode('utf-8'):
+                                        desc_args = arg.child_by_field_name('arguments')
+                                        if desc_args and desc_args.named_child_count > 0:
+                                            desc_arg = desc_args.named_children[0]
+                                            if desc_arg.type == 'string_literal':
+                                                description = unquote_string(desc_arg.text.decode('utf-8'))
+                                                break
+                            
+                            logger.debug(f"Found direct seastar metric: {metric_name}")
+                            full_metric_name = f"redpanda_{metric_name}"
+                            
+                            metrics_bag.add_metric(
+                                name=metric_name,
+                                metric_type=seastar_type,
+                                description=description,
+                                labels=[],
+                                file=str(file_path),
+                                constructor=f"seastar_{seastar_type}",
+                                group_name="application",
+                                full_name=full_metric_name,
+                                internal_external_type="public",
+                                line_number=node.start_point[0] + 1
+                            )
+        
+        # Search children recursively
+        for child in node.children:
+            find_direct_seastar_calls(child)
+    
+    find_direct_seastar_calls(tree_root)
+    return metrics_bag
+
+
+def parse_cpp_file(file_path, treesitter_parser, cpp_language, filter_namespace=None):
+    """Parse a single C++ file for metrics definitions"""
+    # Only show debug info in verbose mode
+    
+    source_code = get_file_contents(file_path)
+    if not source_code:
+        return MetricsBag()
+
+    try:
+        tree = treesitter_parser.parse(source_code)
+    except Exception as e:
+        logger.warning(f"Failed to parse {file_path}: {e}")
+        return MetricsBag()
+
+    metrics_bag = MetricsBag()
+    
+    # TODO: Add seastar metrics parsing later - currently disabled to avoid contamination
+    # First, parse seastar metrics
+    # seastar_replicated = parse_seastar_replicated_metrics(tree.root_node, source_code, file_path)
+    # metrics_bag.merge(seastar_replicated)
+    
+    # seastar_direct = parse_direct_seastar_metrics(tree.root_node, source_code, file_path)
+    # metrics_bag.merge(seastar_direct)
+    
+    # Then parse regular prometheus metrics
+    # A general query to find all function calls
+    simple_query = cpp_language.query("(call_expression) @call")
+    
+    try:
+        captures = simple_query.captures(tree.root_node)
+        
+        for node, _ in captures:
+            call_expr = node
+            function_identifier_node = call_expr.child_by_field_name("function")
+            if not function_identifier_node:
+                continue
+
+            function_text = function_identifier_node.text.decode("utf-8", errors="ignore")
+            
+            metric_type = None
+            constructor = None
+            
+            # Check if this is a metrics function we're interested in
+            for func, m_type in FUNCTION_TO_TYPE.items():
+                if func in function_text:
+                    metric_type = m_type
+                    constructor = func
+                    break
+
+            if metric_type:
+                # Found a metrics function, now extract its details
+                args_node = call_expr.child_by_field_name("arguments")
+                if args_node:
+                    metric_name, description = extract_metric_details(args_node, source_code)
+                    
+                    if metric_name:
+                        # Apply namespace filter if specified
+                        if filter_namespace and not metric_name.startswith(filter_namespace):
+                            continue
+                        
+                        # Use robust AST traversal to find the group name and metric type
+                        group_name, internal_external_type = find_group_name_and_type_from_ast(call_expr, file_path)
+
+                        full_metric_name = construct_full_metric_name(group_name, metric_name, internal_external_type)
+                        
+                        # Get code context for labels
+                        start_byte = call_expr.start_byte
+                        end_byte = call_expr.end_byte
+                        context_start = max(0, start_byte - 500)
+                        context_end = min(len(source_code), end_byte + 500)
+                        code_context = source_code[context_start:context_end].decode("utf-8", errors="ignore")
+                        
+                        labels = extract_labels_from_code(code_context)
+                        
+                        # CRITICAL SAFEGUARD: Never allow null group names
+                        if group_name is None:
+                            logger.error(f"CRITICAL: group_name is None for metric '{metric_name}' in {file_path}")
+                            logger.error(f"File context: {metric_name} at line {call_expr.start_point[0] + 1}")
+                            
+                            # Enhanced emergency fallback: try to find any metrics_name declaration in the file
+                            group_name = find_any_metrics_name_in_file(call_expr, file_path)
+                            
+                            if not group_name:
+                                # Last resort: programmatic file path inference
+                                group_name = infer_group_name_from_path(file_path)
+                                logger.warning(f"Emergency fallback: inferred group_name='{group_name}' from file path")
+                            else:
+                                logger.warning(f"Emergency fallback: found group_name='{group_name}' via file-wide search")
+                        
+                            # CRITICAL: Recalculate full_metric_name with the corrected group_name
+                            full_metric_name = construct_full_metric_name(group_name, metric_name, internal_external_type)
+                            logger.debug(f"Recalculated full_metric_name after emergency fallback: {full_metric_name}")
+                        
+                        metrics_bag.add_metric(
+                            name=metric_name,
+                            metric_type=metric_type,
+                            description=description,
+                            labels=labels,
+                            file=str(file_path.relative_to(Path.cwd()) if file_path.is_absolute() else file_path),
+                            constructor=constructor,
+                            line_number=call_expr.start_point[0] + 1,
+                            group_name=group_name,
+                            full_name=full_metric_name,
+                            internal_external_type=internal_external_type  # Add the new field
+                        )
+                        
+    except Exception as e:
+        logger.warning(f"Query failed on {file_path}: {e}")
+    
+    return metrics_bag
+
+
+def extract_metric_details(args_node, source_code):
+    """Extract metric name and description from argument list"""
+    metric_name = ""
+    description = ""
+    
+    # Find all string literals and their positions
+    string_literals = []
+    
+    def collect_string_info(node):
+        """Recursively find all string literals with their positions"""
+        if node.type == "string_literal":
+            text = node.text.decode("utf-8", errors="ignore")
+            unquoted = unquote_string(text)
+            start_pos = node.start_point
+            end_pos = node.end_point
+            string_literals.append({
+                'text': unquoted,
+                'start': start_pos,
+                'end': end_pos,
+                'raw': text
+            })
+        for child in node.children:
+            collect_string_info(child)
+    
+    collect_string_info(args_node)
+    
+    # Sort string literals by their position in the source
+    string_literals.sort(key=lambda x: (x['start'][0], x['start'][1]))
+    
+    # First string literal is the metric name
+    if string_literals:
+        metric_name = string_literals[0]['text']
+    
+    # Look for description by finding sm::description() calls or consecutive string literals
+    args_text = args_node.text.decode("utf-8", errors="ignore")
+    
+    if "description" in args_text:
+        # Improved AST-based approach to find all strings in description
+        description_strings = []
+        found_description = False
+        
+        for i, str_info in enumerate(string_literals):
+            # Skip the first string which is the metric name
+            if i == 0:
+                continue
+            
+            # Get the full args context and find position of this string
+            str_pos = args_text.find(str_info['raw'])
+            if str_pos != -1:
+                context_before = args_text[:str_pos]
+                
+                # Check if this string comes after "description" in the context
+                if "description" in context_before and not found_description:
+                    found_description = True
+                    description_strings.append(str_info['text'])
+                    
+                    # Look ahead to collect all consecutive string literals
+                    # that are part of the same description (C++ auto-concatenation)
+                    for j in range(i + 1, len(string_literals)):
+                        next_str = string_literals[j]
+                        next_pos = args_text.find(next_str['raw'])
+                        
+                        if next_pos != -1:
+                            # Check if there's only whitespace/comments between strings
+                            between_text = args_text[str_pos + len(str_info['raw']):next_pos]
+                            
+                            # Clean up the between text - remove comments and normalize whitespace
+                            between_clean = re.sub(r'//.*?$', '', between_text, flags=re.MULTILINE)
+                            between_clean = re.sub(r'/\*.*?\*/', '', between_clean, flags=re.DOTALL)
+                            between_clean = between_clean.strip()
+                            
+                            # If only whitespace/punctuation between strings, they're concatenated
+                            if not between_clean or all(c in ' \t\n\r,)' for c in between_clean):
+                                description_strings.append(next_str['text'])
+                                str_info = next_str  # Update position for next iteration
+                                str_pos = next_pos
+                            else:
+                                # Found something else, stop collecting
+                                break
+                        else:
+                            break
+                    break
+        
+        # Join all collected description strings
+        if description_strings:
+            description = ''.join(description_strings)
+        elif len(string_literals) > 1:
+            # Final fallback: just use the second string literal
+            description = string_literals[1]['text']
+
+    # Filter out descriptions with unresolved format placeholders
+    if description and '{}' in description:
+        description = ""
+
+    return metric_name, description
+
+
+def extract_metrics_from_files(cpp_files, treesitter_parser, cpp_language, filter_namespace=None):
+    """Extract metrics from multiple C++ files"""
+    all_metrics = MetricsBag()
+    
+    for file_path in cpp_files:
+        try:
+            file_metrics = parse_cpp_file(file_path, treesitter_parser, cpp_language, filter_namespace)
+            all_metrics.merge(file_metrics)
+        except Exception as e:
+            logger.warning(f"Failed to process {file_path}: {e}")
+            continue
+    
+    return all_metrics
diff --git a/tools/metrics-extractor/requirements.txt b/tools/metrics-extractor/requirements.txt
new file mode 100644
index 0000000..030b250
--- /dev/null
+++ b/tools/metrics-extractor/requirements.txt
@@ -0,0 +1,2 @@
+tree_sitter==0.21.1
+setuptools>=42.0.0
diff --git a/tools/metrics-extractor/tests/test_extraction.py b/tools/metrics-extractor/tests/test_extraction.py
new file mode 100644
index 0000000..006b16d
--- /dev/null
+++ b/tools/metrics-extractor/tests/test_extraction.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""
+Test sample C++ code to validate metrics extraction
+"""
+
+# Sample C++ code with various metric constructors
+SAMPLE_CPP_CODE = '''
+#include <seastar/core/metrics.hh>
+
+namespace redpanda {
+
+class kafka_server {
+public:
+    kafka_server() {
+        setup_metrics();
+    }
+
+private:
+    void setup_metrics() {
+        _metrics.add_group("kafka", {
+            sm::make_gauge(
+                "requests_total",
+                [this] { return _total_requests; },
+                sm::description("Total number of Kafka requests processed")),
+            
+            sm::make_counter(
+                "bytes_received_total",
+                [this] { return _bytes_received; },
+                sm::description("Total bytes received from Kafka clients")),
+            
+            sm::make_histogram(
+                "request_latency_seconds",
+                sm::description("Latency histogram of Kafka requests")),
+            
+            sm::make_total_bytes(
+                "memory_usage_bytes",
+                [this] { return _memory_used; },
+                sm::description("Current memory usage in bytes")),
+            
+            ss::metrics::make_total_operations(
+                "operations_total",
+                [this] { return _operations; },
+                ss::metrics::description("Total operations performed")),
+            
+            ss::metrics::make_current_bytes(
+                "cache_size_bytes",
+                [this] { return _cache_size; },
+                ss::metrics::description("Current cache size in bytes"))
+        });
+    }
+
+    uint64_t _total_requests = 0;
+    uint64_t _bytes_received = 0;
+    uint64_t _memory_used = 0;
+    uint64_t _operations = 0;
+    uint64_t _cache_size = 0;
+    ss::metrics::metric_groups _metrics;
+};
+
+} // namespace redpanda
+'''
+
+def test_sample_extraction():
+    """Test that the sample code extracts expected metrics"""
+    import tempfile
+    import os
+    from pathlib import Path
+    
+    # Write sample code to temporary file
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.cc', delete=False) as f:
+        f.write(SAMPLE_CPP_CODE)
+        temp_file = f.name
+    
+    try:
+        # Import and test the parser
+        from metrics_parser import parse_cpp_file, get_treesitter_cpp_parser_and_language
+        
+        # Initialize tree-sitter (this will download and compile if needed)
+        parser, language = get_treesitter_cpp_parser_and_language("tree-sitter", "tree-sitter-cpp.so")
+        
+        # Parse the file
+        metrics_bag = parse_cpp_file(Path(temp_file), parser, language, filter_namespace="redpanda")
+        
+        # Check results
+        all_metrics = metrics_bag.get_all_metrics()
+        print(f"Found {len(all_metrics)} metrics:")
+        
+        expected_metrics = [
+            ("requests_total", "gauge"),
+            ("bytes_received_total", "counter"),
+            ("request_latency_seconds", "histogram"),
+            ("memory_usage_bytes", "counter"),
+            ("operations_total", "counter"),
+            ("cache_size_bytes", "gauge")
+        ]
+        
+        for metric_name, expected_type in expected_metrics:
+            if metric_name in all_metrics:
+                metric = all_metrics[metric_name]
+                print(f"  ✓ {metric_name} ({metric['type']}) - {metric.get('description', 'No description')}")
+                assert metric['type'] == expected_type, f"Expected {expected_type}, got {metric['type']}"
+            else:
+                print(f"  ✗ {metric_name} - NOT FOUND")
+        
+        print(f"\nStatistics: {metrics_bag.get_statistics()}")
+        
+    finally:
+        # Clean up
+        os.unlink(temp_file)
+
+
+if __name__ == "__main__":
+    test_sample_extraction()
diff --git a/tools/metrics-extractor/tree-sitter-cpp.so b/tools/metrics-extractor/tree-sitter-cpp.so
new file mode 100644
index 0000000..ac87c29
Binary files /dev/null and b/tools/metrics-extractor/tree-sitter-cpp.so differ
diff --git a/tools/metrics-extractor/validate.py b/tools/metrics-extractor/validate.py
new file mode 100644
index 0000000..c0ac3d9
--- /dev/null
+++ b/tools/metrics-extractor/validate.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+"""
+Integration script to validate the complete metrics extraction pipeline
+"""
+import os
+import sys
+import json
+import subprocess
+from pathlib import Path
+
+
+def check_dependencies():
+    """Check if all required dependencies are available"""
+    print("🔧 Checking dependencies...")
+    
+    try:
+        import tree_sitter
+        print("  ✓ tree-sitter is available")
+    except ImportError:
+        print("  ❌ tree-sitter not found. Install with: pip install tree-sitter")
+        return False
+    
+    # Check if we can import our modules
+    try:
+        from metrics_parser import build_treesitter_cpp_library, extract_metrics_from_files
+        from metrics_bag import MetricsBag
+        print("  ✓ All custom modules are available")
+    except ImportError as e:
+        print(f"  ❌ Import error: {e}")
+        return False
+    
+    return True
+
+
+def test_end_to_end():
+    """Test the complete extraction pipeline"""
+    print("\n🧪 Running end-to-end test...")
+    
+    try:
+        # Run the example script
+        result = subprocess.run([
+            sys.executable, "example.py", "--verbose"
+        ], capture_output=True, text=True)
+        
+        if result.returncode == 0:
+            print("  ✓ Example script completed successfully")
+            return True
+        else:
+            print(f"  ❌ Example script failed:")
+            print(f"     stdout: {result.stdout}")
+            print(f"     stderr: {result.stderr}")
+            return False
+    except Exception as e:
+        print(f"  ❌ Error running example: {e}")
+        return False
+
+
+def validate_cli_integration():
+    """Validate that the CLI integration works"""
+    print("\n🔗 Validating CLI integration...")
+    
+    # Check if the doc-tools.js file has our new command
+    doc_tools_path = Path("../../bin/doc-tools.js")
+    if not doc_tools_path.exists():
+        print("  ❌ doc-tools.js not found")
+        return False
+    
+    with open(doc_tools_path, 'r') as f:
+        content = f.read()
+    
+    if 'source-metrics-docs' in content:
+        print("  ✓ source-metrics-docs command found in doc-tools.js")
+    else:
+        print("  ❌ source-metrics-docs command not found in doc-tools.js")
+        return False
+    
+    if 'verifyMetricsExtractorDependencies' in content:
+        print("  ✓ verifyMetricsExtractorDependencies function found")
+    else:
+        print("  ❌ verifyMetricsExtractorDependencies function not found")
+        return False
+    
+    return True
+
+
+def generate_usage_summary():
+    """Generate a summary of how to use the new automation"""
+    print("\n📋 Usage Summary")
+    print("================")
+    print()
+    print("The new Redpanda metrics automation has been successfully created!")
+    print()
+    print("🔧 Setup:")
+    print("  1. cd tools/metrics-extractor")
+    print("  2. make setup-venv")
+    print("  3. make install-deps")
+    print()
+    print("🚀 Usage:")
+    print("  • Extract from dev branch:")
+    print("    make build TAG=dev")
+    print()
+    print("  • Extract from specific version:")
+    print("    make build TAG=v23.3.1")
+    print()
+    print("  • Extract from local Redpanda repo:")
+    print("    make extract-local REDPANDA_PATH=/path/to/redpanda")
+    print()
+    print("  • CLI integration:")
+    print("    npx doc-tools generate source-metrics-docs --tag=dev")
+    print()
+    print("📊 Output files:")
+    print("  • autogenerated/{TAG}/source-metrics/metrics.json")
+    print("  • autogenerated/{TAG}/source-metrics/metrics.adoc")
+    print()
+    print("🆚 Comparison with existing metrics:")
+    print("  python compare_metrics.py autogenerated/dev/source-metrics/metrics.json")
+    print()
+    print("📁 Key differences from the current metrics automation:")
+    print("  • Extracts metrics directly from C++ source code")
+    print("  • Uses tree-sitter for robust parsing")
+    print("  • Captures ALL metrics defined in source, not just exposed ones")
+    print("  • Provides file locations and constructor information")
+    print("  • Works offline without requiring a running cluster")
+    print()
+    print("🔍 Supported metric constructors:")
+    print("  • sm::make_gauge")
+    print("  • sm::make_counter") 
+    print("  • sm::make_histogram")
+    print("  • sm::make_total_bytes")
+    print("  • sm::make_derive")
+    print("  • ss::metrics::make_total_operations")
+    print("  • ss::metrics::make_current_bytes")
+
+
+def main():
+    """Main integration validation"""
+    print("🚀 Redpanda Metrics Extractor Integration Test")
+    print("===============================================")
+    
+    # Change to the metrics-extractor directory
+    os.chdir(Path(__file__).parent)
+    
+    success = True
+    
+    # Check dependencies
+    if not check_dependencies():
+        success = False
+    
+    # Test end-to-end functionality
+    if success and not test_end_to_end():
+        success = False
+    
+    # Validate CLI integration
+    if not validate_cli_integration():
+        print("  ⚠️  CLI integration validation failed, but automation should still work")
+    
+    if success:
+        print("\n🎉 All tests passed!")
+        generate_usage_summary()
+        return 0
+    else:
+        print("\n❌ Some tests failed. Please check the errors above.")
+        print("\nFor manual testing:")
+        print("  python example.py")
+        return 1
+
+
+if __name__ == "__main__":
+    exit(main())
diff --git a/tools/metrics/compare_metrics.py b/tools/metrics/compare_metrics.py
new file mode 100644
index 0000000..8e06c8d
--- /dev/null
+++ b/tools/metrics/compare_metrics.py
@@ -0,0 +1,365 @@
+#!/usr/bin/env python3
+"""
+AsciiDoc Metrics Comparison Tool
+
+This script compares two AsciiDoc files containing Redpanda metrics documentation.
+It extracts metric information from both files and provides detailed comparison results.
+Handles different heading levels (== vs ===) for the same metrics.
+"""
+
+import re
+import argparse
+from typing import Dict, List, Tuple, Optional
+from dataclasses import dataclass
+from difflib import SequenceMatcher
+
+
+@dataclass
+class Metric:
+    """Represents a single metric with its properties."""
+    name: str
+    description: str
+    type_info: str
+    labels: List[str]
+    usage: str
+    related_topics: List[str]
+    raw_content: str
+    heading_level: str  # Added to track original heading level
+
+
+class MetricsParser:
+    """Parser for extracting metrics from AsciiDoc files."""
+    
+    def __init__(self):
+        # Updated pattern to match both == and === metric sections
+        # This pattern captures metrics that start with redpanda_, vectorized_, or similar prefixes
+        self.metric_pattern = re.compile(
+            r'^(={2,3})\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*\n\n(.*?)(?=\n={2,3}|\n=(?!=)|\Z)',
+            re.DOTALL | re.MULTILINE
+        )
+        
+    def parse_file(self, content: str) -> Dict[str, Metric]:
+        """Parse AsciiDoc content and extract metrics."""
+        metrics = {}
+        
+        matches = self.metric_pattern.findall(content)
+        
+        for match in matches:
+            heading_level = match[0]  # == or ===
+            metric_name = match[1].strip()
+            metric_content = match[2].strip()
+            
+            # Only process if it looks like a metric name (contains underscore and doesn't start with uppercase)
+            if '_' in metric_name and not metric_name[0].isupper():
+                try:
+                    metric = self._parse_metric_content(metric_name, metric_content, heading_level)
+                    metrics[metric_name] = metric
+                except Exception as e:
+                    print(f"Warning: Failed to parse metric {metric_name}: {e}")
+                
+        return metrics
+    
+    def _parse_metric_content(self, name: str, content: str, heading_level: str) -> Metric:
+        """Parse individual metric content."""
+        lines = content.split('\n')
+        
+        # Extract description (first non-empty line before *Type*)
+        description = ""
+        type_info = ""
+        labels = []
+        usage = ""
+        related_topics = []
+        
+        i = 0
+        # Get description
+        while i < len(lines):
+            line = lines[i].strip()
+            if line and not line.startswith('*Type*'):
+                description = line
+                break
+            i += 1
+        
+        # Extract other fields
+        current_section = None
+        section_content = []
+        
+        for line in lines:
+            line = line.strip()
+            
+            if line.startswith('*Type*:'):
+                if current_section:
+                    self._process_section(current_section, section_content, locals())
+                current_section = 'type'
+                section_content = [line.replace('*Type*:', '').strip()]
+                
+            elif line.startswith('*Labels*:'):
+                if current_section:
+                    self._process_section(current_section, section_content, locals())
+                current_section = 'labels'
+                section_content = []
+                
+            elif line.startswith('*Usage*:'):
+                if current_section:
+                    self._process_section(current_section, section_content, locals())
+                current_section = 'usage'
+                section_content = []
+                
+            elif line.startswith('*Related topics*:'):
+                if current_section:
+                    self._process_section(current_section, section_content, locals())
+                current_section = 'related'
+                section_content = []
+                
+            elif line.startswith('---'):
+                if current_section:
+                    self._process_section(current_section, section_content, locals())
+                break
+                
+            elif current_section and line:
+                section_content.append(line)
+        
+        # Process final section
+        if current_section:
+            self._process_section(current_section, section_content, locals())
+        
+        return Metric(
+            name=name,
+            description=description,
+            type_info=type_info,
+            labels=labels,
+            usage=usage,
+            related_topics=related_topics,
+            raw_content=content,
+            heading_level=heading_level
+        )
+    
+    def _process_section(self, section: str, content: List[str], local_vars: dict):
+        """Process content for specific sections."""
+        if section == 'type':
+            local_vars['type_info'] = ' '.join(content).strip()
+        elif section == 'labels':
+            # Extract labels, handling various formats
+            for line in content:
+                if line.startswith('*') or line.startswith('-'):
+                    # Remove markdown formatting and extract label
+                    clean_line = re.sub(r'[*`-]', '', line).strip()
+                    if clean_line:
+                        local_vars['labels'].append(clean_line)
+        elif section == 'usage':
+            local_vars['usage'] = ' '.join(content).strip()
+        elif section == 'related':
+            local_vars['related_topics'] = content.copy()
+
+
+class MetricsComparator:
+    """Compares two sets of metrics and provides detailed analysis."""
+    
+    def __init__(self):
+        self.similarity_threshold = 0.8
+    
+    def compare(self, file1_metrics: Dict[str, Metric], file2_metrics: Dict[str, Metric]) -> dict:
+        """Compare two sets of metrics and return detailed results."""
+        
+        file1_names = set(file1_metrics.keys())
+        file2_names = set(file2_metrics.keys())
+        
+        # Find differences
+        only_in_file1 = file1_names - file2_names
+        only_in_file2 = file2_names - file1_names
+        common_metrics = file1_names & file2_names
+        
+        # Analyze common metrics for description improvements
+        improved_descriptions = []
+        different_properties = []
+        heading_level_differences = []
+        
+        for metric_name in common_metrics:
+            metric1 = file1_metrics[metric_name]
+            metric2 = file2_metrics[metric_name]
+            
+            # Check for heading level differences
+            if metric1.heading_level != metric2.heading_level:
+                heading_level_differences.append({
+                    'name': metric_name,
+                    'file1_level': metric1.heading_level,
+                    'file2_level': metric2.heading_level
+                })
+            
+            # Compare descriptions
+            if metric1.description != metric2.description:
+                similarity = self._calculate_similarity(metric1.description, metric2.description)
+                
+                improved_descriptions.append({
+                    'name': metric_name,
+                    'file1_desc': metric1.description,
+                    'file2_desc': metric2.description,
+                    'similarity': similarity,
+                    'likely_improvement': len(metric1.description) > len(metric2.description) and similarity > 0.5
+                })
+            
+            # Compare other properties
+            differences = self._compare_metric_properties(metric1, metric2)
+            if differences:
+                different_properties.append({
+                    'name': metric_name,
+                    'differences': differences
+                })
+        
+        return {
+            'file1_unique': sorted(only_in_file1),
+            'file2_unique': sorted(only_in_file2),
+            'common_count': len(common_metrics),
+            'improved_descriptions': improved_descriptions,
+            'different_properties': different_properties,
+            'heading_level_differences': heading_level_differences,
+            'total_file1': len(file1_metrics),
+            'total_file2': len(file2_metrics)
+        }
+    
+    def _calculate_similarity(self, text1: str, text2: str) -> float:
+        """Calculate similarity between two text strings."""
+        return SequenceMatcher(None, text1.lower(), text2.lower()).ratio()
+    
+    def _compare_metric_properties(self, metric1: Metric, metric2: Metric) -> List[str]:
+        """Compare properties of two metrics and return list of differences."""
+        differences = []
+        
+        if metric1.type_info != metric2.type_info:
+            differences.append(f"Type: '{metric1.type_info}' vs '{metric2.type_info}'")
+        
+        if set(metric1.labels) != set(metric2.labels):
+            differences.append(f"Labels differ")
+        
+        if metric1.usage != metric2.usage:
+            differences.append(f"Usage differs")
+        
+        return differences
+
+
+def print_comparison_results(results: dict, file1_name: str, file2_name: str):
+    """Print detailed comparison results."""
+    
+    print(f"\n{'='*60}")
+    print(f"METRICS COMPARISON REPORT")
+    print(f"{'='*60}")
+    print(f"File 1 ({file1_name}): {results['total_file1']} metrics")
+    print(f"File 2 ({file2_name}): {results['total_file2']} metrics")
+    print(f"Common metrics: {results['common_count']}")
+    
+    # Heading level differences
+    if results['heading_level_differences']:
+        print(f"\n📏 HEADING LEVEL DIFFERENCES:")
+        print(f"   Count: {len(results['heading_level_differences'])}")
+        for item in results['heading_level_differences']:
+            print(f"   - {item['name']}: {item['file1_level']} vs {item['file2_level']}")
+    
+    # Metrics only in file 1 (should be removed)
+    if results['file1_unique']:
+        print(f"\n🗑️  METRICS TO REMOVE (only in {file1_name}):")
+        print(f"   Count: {len(results['file1_unique'])}")
+        for metric in results['file1_unique']:
+            print(f"   - {metric}")
+    
+    # Metrics only in file 2 (missing from file 1)
+    if results['file2_unique']:
+        print(f"\n📝 METRICS MISSING FROM {file1_name}:")
+        print(f"   Count: {len(results['file2_unique'])}")
+        for metric in results['file2_unique']:
+            print(f"   - {metric}")
+    
+    # Description improvements
+    if results['improved_descriptions']:
+        print(f"\n✨ POTENTIAL DESCRIPTION IMPROVEMENTS:")
+        print(f"   Count: {len(results['improved_descriptions'])}")
+        
+        for item in results['improved_descriptions']:
+            print(f"\n   📊 {item['name']}:")
+            print(f"      Similarity: {item['similarity']:.2f}")
+            
+            if item['likely_improvement']:
+                print(f"      🔍 LIKELY IMPROVEMENT (File 1 has longer description)")
+            
+            print(f"      File 1: {item['file1_desc'][:100]}{'...' if len(item['file1_desc']) > 100 else ''}")
+            print(f"      File 2: {item['file2_desc'][:100]}{'...' if len(item['file2_desc']) > 100 else ''}")
+    
+    # Other property differences
+    if results['different_properties']:
+        print(f"\n🔧 OTHER PROPERTY DIFFERENCES:")
+        print(f"   Count: {len(results['different_properties'])}")
+        
+        for item in results['different_properties']:
+            print(f"\n   📊 {item['name']}:")
+            for diff in item['differences']:
+                print(f"      - {diff}")
+
+
+def main():
+    """Main function to run the comparison tool."""
+    parser = argparse.ArgumentParser(description='Compare AsciiDoc metrics files')
+    parser.add_argument('file1', help='First AsciiDoc file (formatted)')
+    parser.add_argument('file2', help='Second AsciiDoc file (factual)')
+    parser.add_argument('--output', '-o', help='Output file for results')
+    parser.add_argument('--debug', action='store_true', help='Enable debug output')
+    
+    args = parser.parse_args()
+    
+    # Read files
+    try:
+        with open(args.file1, 'r', encoding='utf-8') as f:
+            content1 = f.read()
+        with open(args.file2, 'r', encoding='utf-8') as f:
+            content2 = f.read()
+    except FileNotFoundError as e:
+        print(f"Error: File not found - {e}")
+        return 1
+    except Exception as e:
+        print(f"Error reading files: {e}")
+        return 1
+    
+    # Parse metrics
+    parser = MetricsParser()
+    print("Parsing first file...")
+    metrics1 = parser.parse_file(content1)
+    print("Parsing second file...")
+    metrics2 = parser.parse_file(content2)
+    
+    if args.debug:
+        print(f"Debug: Found {len(metrics1)} metrics in file1")
+        print(f"Debug: Found {len(metrics2)} metrics in file2")
+        if metrics1:
+            print(f"Debug: Sample metrics from file1: {list(metrics1.keys())[:5]}")
+        if metrics2:
+            print(f"Debug: Sample metrics from file2: {list(metrics2.keys())[:5]}")
+    
+    # Compare metrics
+    comparator = MetricsComparator()
+    results = comparator.compare(metrics1, metrics2)
+    
+    # Print results
+    print_comparison_results(results, args.file1, args.file2)
+    
+    # Save to file if requested
+    if args.output:
+        try:
+            import sys
+            from io import StringIO
+            
+            # Capture output
+            old_stdout = sys.stdout
+            sys.stdout = captured_output = StringIO()
+            print_comparison_results(results, args.file1, args.file2)
+            sys.stdout = old_stdout
+            
+            # Write to file
+            with open(args.output, 'w', encoding='utf-8') as f:
+                f.write(captured_output.getvalue())
+            
+            print(f"\nResults saved to: {args.output}")
+        except Exception as e:
+            print(f"Error saving output: {e}")
+    
+    return 0
+
+
+if __name__ == '__main__':
+    exit(main())
\ No newline at end of file
diff --git a/tools/metrics/metrics.py b/tools/metrics/metrics.py
index c6460a9..f9e8bbe 100644
--- a/tools/metrics/metrics.py
+++ b/tools/metrics/metrics.py
@@ -120,6 +120,27 @@ def parse_metrics(metrics_text):
     logging.info(f"Extracted {len(metrics)} metrics.")
     return metrics
 
+def filter_metrics_for_docs(metrics):
+    """Filter metrics for documentation - remove duplicates and histogram suffixes."""
+    filtered = {}
+    seen_names = set()  # Track metric names to detect duplicates
+    
+    for name, data in metrics.items():
+        # Skip histogram/summary suffixes
+        if name.endswith(('_bucket', '_count', '_sum')):
+            continue
+            
+        # Check for duplicate metric names
+        if name in seen_names:
+            logging.warning(f"Duplicate metric name found: {name}")
+            continue
+            
+        filtered[name] = data
+        seen_names.add(name)
+    
+    logging.info(f"Filtered from {len(metrics)} to {len(filtered)} metrics for documentation.")
+    return filtered
+
 def output_asciidoc(metrics, adoc_file):
     """Output metrics as AsciiDoc."""
     with open(adoc_file, "w") as f:
@@ -159,8 +180,8 @@ def ensure_directory_exists(directory):
     repo_root = os.getcwd()
     gen_path = os.path.join(repo_root, "autogenerated")
     if not os.path.isdir(gen_path):
-        logging.error(f"autogenerated folder not found at: {gen_path}")
-        sys.exit(1)
+        logging.info(f"Creating autogenerated folder at: {gen_path}")
+        os.makedirs(gen_path, exist_ok=True)
 
     # Build the output directory using the already provided tag_modified.
     output_dir = os.path.join(gen_path, tag_modified, "metrics")
@@ -183,7 +204,11 @@ def ensure_directory_exists(directory):
         logging.error("No internal metrics retrieved.")
         internal_metrics = {}
 
-    # Merge public and internal metrics.
+    # Filter metrics for documentation
+    public_metrics_filtered = filter_metrics_for_docs(public_metrics)
+    internal_metrics_filtered = filter_metrics_for_docs(internal_metrics)
+
+    # Merge public and internal metrics (unfiltered for JSON)
     merged_metrics = {
         "public": public_metrics,
         "internal": internal_metrics
@@ -195,5 +220,5 @@ def ensure_directory_exists(directory):
     INTERNAL_ASCIIDOC_OUTPUT_FILE = os.path.join(output_dir, "internal-metrics.adoc")
 
     output_json(merged_metrics, JSON_OUTPUT_FILE)
-    output_asciidoc(public_metrics, ASCIIDOC_OUTPUT_FILE)
-    output_asciidoc(internal_metrics, INTERNAL_ASCIIDOC_OUTPUT_FILE)
+    output_asciidoc(public_metrics_filtered, ASCIIDOC_OUTPUT_FILE)
+    output_asciidoc(internal_metrics_filtered, INTERNAL_ASCIIDOC_OUTPUT_FILE)
\ No newline at end of file