diff --git a/.github/requirements.txt b/.github/requirements.txt index 3d731f3cee..b9f780c987 100644 --- a/.github/requirements.txt +++ b/.github/requirements.txt @@ -2,3 +2,4 @@ awkward deepdiff scipy uproot +numpy diff --git a/.github/workflows/linux-eic-shell.yml b/.github/workflows/linux-eic-shell.yml index b46f985bbf..7295428c29 100644 --- a/.github/workflows/linux-eic-shell.yml +++ b/.github/workflows/linux-eic-shell.yml @@ -1277,9 +1277,94 @@ jobs: name: capybara_${{ github.event.pull_request.number }}.md path: capybara_${{ github.event.pull_request.number }}.md + reconstruction-quality-analysis: + runs-on: ubuntu-24.04 + needs: + - eicrecon-gun + - eicrecon-dis + strategy: + matrix: + include: + # Gun reconstruction analysis + - particle: pi + detector_config: craterlake + file_type: gun + - particle: e + detector_config: craterlake + file_type: gun + # DIS reconstruction analysis + - beam: 10x100 + minq2: 1000 + detector_config: craterlake_tracking_only + file_type: dis + - beam: 18x275 + minq2: 1000 + detector_config: craterlake_18x275 + file_type: dis + steps: + - name: Checkout repository + uses: actions/checkout@v5 + with: + sparse-checkout: scripts + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + pip install uproot awkward numpy + - name: Download gun reconstruction artifacts + if: matrix.file_type == 'gun' + uses: actions/download-artifact@v5 + with: + name: rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root + path: ./ + - name: Download DIS reconstruction artifacts + if: matrix.file_type == 'dis' + uses: actions/download-artifact@v5 + with: + name: rec_dis_${{ matrix.beam }}_minQ2=${{ matrix.minq2 }}_${{ matrix.detector_config }}.edm4eic.root + path: ./ + - name: Run reconstruction quality analysis (gun) + if: matrix.file_type == 'gun' + run: | + chmod +x scripts/compare_reconstruction_truth.py + echo "Running reconstruction quality analysis for particle gun..." + python scripts/compare_reconstruction_truth.py rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root --output reconstruction_quality_${{ matrix.particle }}_${{ matrix.detector_config }}.txt || echo "Analysis failed for gun file" + - name: Run reconstruction quality analysis (DIS) + if: matrix.file_type == 'dis' + run: | + chmod +x scripts/compare_reconstruction_truth.py + echo "Running reconstruction quality analysis for DIS..." + python scripts/compare_reconstruction_truth.py rec_dis_${{ matrix.beam }}_minQ2=${{ matrix.minq2 }}_${{ matrix.detector_config }}.edm4eic.root --output reconstruction_quality_dis_${{ matrix.beam }}_${{ matrix.detector_config }}.txt || echo "Analysis failed for DIS file" + - name: Display results + run: | + echo "=== RECONSTRUCTION QUALITY ANALYSIS RESULTS ===" + for result_file in reconstruction_quality_*.txt; do + if [ -f "$result_file" ]; then + echo "Results from: $result_file" + echo "----------------------------------------" + cat "$result_file" + echo "" + echo "========================================" + echo "" + fi + done + if ! ls reconstruction_quality_*.txt 1> /dev/null 2>&1; then + echo "No analysis results found" + fi + - name: Upload analysis results + uses: actions/upload-artifact@v4 + with: + name: reconstruction-quality-${{ matrix.file_type }}-${{ matrix.particle || matrix.beam }}-${{ matrix.detector_config }} + path: reconstruction_quality_*.txt + if-no-files-found: error + deploy-docs: needs: - collect-docs + - reconstruction-quality-analysis permissions: pages: write id-token: write diff --git a/.gitignore b/.gitignore index 895ed66270..8a28e32160 100644 --- a/.gitignore +++ b/.gitignore @@ -168,4 +168,9 @@ src/tools/default_flags_table/fieldmaps/ src/examples/test_data_generator/__pycache__/ +# Python cache files +__pycache__/ +*.py[cod] +*$py.class + .run/ diff --git a/docs/design/reconstruction_quality.md b/docs/design/reconstruction_quality.md new file mode 100644 index 0000000000..8fcf7d6750 --- /dev/null +++ b/docs/design/reconstruction_quality.md @@ -0,0 +1,127 @@ +# Reconstruction Quality Analysis + +This document describes the automated reconstruction quality analysis that compares reconstructed particle quantities against Monte Carlo truth. + +## Overview + +The EICrecon GitHub Actions workflow includes automated quality assessment that: + +1. **Analyzes reconstruction outputs** from both particle gun and deep inelastic scattering (DIS) events +2. **Compares reconstructed quantities to truth** using MC-reconstruction associations +3. **Calculates performance metrics** including accuracy, precision, and efficiency +4. **Provides quality assessments** to help identify reconstruction issues + +## Implementation + +### Analysis Script + +The analysis is performed by `scripts/compare_reconstruction_truth.py`, which: + +- Reads EDM4hep ROOT files produced by EICrecon +- Extracts MC truth and reconstructed particle information +- Uses association collections to match truth and reconstructed particles +- Calculates ratios (reconstructed/truth) for key quantities +- Provides statistical analysis and quality assessments + +### GitHub Actions Integration + +The `reconstruction-quality-analysis` job in the CI workflow: + +- Depends on `eicrecon-gun` and `eicrecon-dis` jobs +- Downloads reconstruction artifacts from those jobs +- Runs analysis for multiple detector configurations +- Uploads results as artifacts for review +- Displays summary in the workflow output + +### Analyzed Collections + +The script analyzes the following data when associations are available: + +1. **Reconstructed Particles**: + - Uses `MCParticles`, `ReconstructedParticles`, and `ReconstructedParticleAssociations` + - Compares momentum components, total momentum, and energy + +2. **Central Tracking**: + - Uses `CentralCKFTracks` and `CentralCKFTrackAssociations` + - Compares track-level momentum measurements + +3. **Truth-Seeded Tracking**: + - Uses `CentralCKFTruthSeededTracks` and associations + - Provides baseline performance with perfect seeding + +## Metrics and Quality Assessment + +### Key Metrics + +For each quantity, the analysis reports: + +- **Mean ratio**: Average of reconstructed/truth ratios +- **Resolution**: Standard deviation relative to mean (σ/μ) +- **Efficiency**: Fraction of MC particles successfully reconstructed +- **Percentile ranges**: Distribution characteristics + +### Quality Grades + +The analysis provides qualitative assessments: + +- **EXCELLENT**: Mean ratio 0.9-1.1, resolution < 20% +- **GOOD**: Mean ratio 0.8-1.2, resolution < 30% +- **FAIR**: Mean ratio 0.7-1.3, resolution < 50% +- **POOR**: Outside fair range + +### Example Output + +``` +=== Reconstructed Particles Analysis === +Total MC particles: 1500 +Total reconstructed particles: 1342 +Successfully matched: 1298 +Reconstruction efficiency: 86.5% + +Momentum: + Mean ratio: 0.995 ± 0.087 + Median ratio: 0.998 + RMS: 1.002 + Resolution (σ/μ): 0.087 + N matched: 1298 / 1298 total + 25th-75th percentile: [0.942, 1.047] + Quality assessment: EXCELLENT +``` + +## Usage + +### Manual Analysis + +To run the analysis manually: + +```bash +# Analyze a single file +python scripts/compare_reconstruction_truth.py reconstruction_output.edm4eic.root + +# Analyze multiple files with output to file +python scripts/compare_reconstruction_truth.py *.edm4eic.root --output results.txt +``` + +### Interpreting Results + +- **Ratios near 1.0** indicate accurate reconstruction +- **Small resolution values** indicate precise/consistent reconstruction +- **High efficiency** indicates good particle-finding capability +- **Compare different detector configurations** to assess design choices +- **Compare gun vs DIS events** to understand performance in different environments + +## Integration with CI/CD + +The analysis runs automatically on: + +- Pull requests (for changed code validation) +- Main branch pushes (for continuous monitoring) +- Scheduled runs (for regression detection) + +Results are: + +- Displayed in the GitHub Actions output +- Uploaded as downloadable artifacts +- Can be integrated into automated quality gates + +This provides continuous monitoring of reconstruction performance and helps identify regressions or improvements in the codebase. diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000000..2d1df8aaf6 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,68 @@ +# Analysis Scripts + +This directory contains analysis scripts for EICrecon reconstruction quality assessment. + +## compare_reconstruction_truth.py + +This script compares reconstructed particle properties against Monte Carlo truth for collections where truth-reconstruction associations exist. It calculates and prints quantitative comparisons in terms of reconstructed/truth ratios to assess reconstruction performance. + +### Features + +- Reads EDM4hep ROOT files produced by EICrecon +- Analyzes particle-level quantities (momentum, energy) using `ReconstructedParticleAssociations` +- Analyzes track-level quantities using `CentralCKFTrackAssociations` +- Supports both regular and truth-seeded reconstruction +- Provides statistical analysis including mean, median, RMS, and resolution metrics +- Filters outliers to provide meaningful statistics + +### Usage + +```bash +python compare_reconstruction_truth.py [--output output.txt] +``` + +### Example + +```bash +# Analyze a single file +python compare_reconstruction_truth.py rec_e_1GeV_20GeV_craterlake.edm4eic.root + +# Analyze multiple files with output to file +python compare_reconstruction_truth.py *.edm4eic.root --output analysis_results.txt +``` + +### Output + +The script prints analysis results for each collection type: + +1. **Reconstructed Particles**: Momentum and energy ratios +2. **Central CKF Tracks**: Track parameter ratios +3. **Truth-Seeded Tracks**: Performance with perfect seeding + +For each quantity, the following statistics are reported: +- Mean ratio ± standard deviation +- Median ratio +- RMS +- Resolution (σ/μ) +- Number of matched particles/tracks +- 25th-75th percentile range + +### Dependencies + +- `uproot`: For reading ROOT files +- `awkward`: For handling jagged arrays +- `numpy`: For numerical calculations + +### Integration with GitHub Actions + +This script is automatically run in the EICrecon GitHub Actions workflow via the `reconstruction-quality-analysis` job, which: + +1. Downloads reconstruction output files from `eicrecon-gun` and `eicrecon-dis` jobs +2. Runs the analysis on various detector configurations +3. Uploads the results as artifacts +4. Displays a summary in the workflow output + +The analysis covers: +- Particle gun events (pions and electrons) +- Deep inelastic scattering events +- Multiple detector configurations (craterlake, craterlake_tracking_only, craterlake_18x275) diff --git a/scripts/compare_reconstruction_truth.py b/scripts/compare_reconstruction_truth.py new file mode 100644 index 0000000000..885319ec2a --- /dev/null +++ b/scripts/compare_reconstruction_truth.py @@ -0,0 +1,393 @@ +#!/usr/bin/env python3 +""" +Reconstruction Quality Analysis Script + +This script compares reconstructed particle properties against MC truth +for collections where truth-reconstruction associations exist. + +It prints quantitative comparisons in terms of reconstructed/truth ratios +to assess how well the reconstruction is performing. +""" + +import sys +import argparse +import numpy as np +from pathlib import Path +from collections import defaultdict + +try: + import uproot + import awkward as ak +except ImportError as e: + print(f"Error: Required Python packages not available: {e}") + print("Please install: pip install uproot awkward") + sys.exit(1) + + +def read_collections(file_path, collections): + """Read specified collections from an EDM4hep ROOT file.""" + try: + with uproot.open(file_path) as file: + # Get the events tree (try common names) + tree_names = ['events', 'Events', 'tree', 'T'] + tree = None + + for tree_name in tree_names: + if tree_name in file: + tree = file[tree_name] + break + + if tree is None: + available_trees = list(file.keys()) + print(f"Warning: No recognized event tree found in {file_path}") + print(f"Available objects: {available_trees}") + return {} + + # Check which collections exist + available_collections = set(tree.keys()) + requested_collections = set(collections) + + found_collections = requested_collections.intersection(available_collections) + missing_collections = requested_collections - available_collections + + if missing_collections: + print(f"Warning: Collections not found in {file_path}: {missing_collections}") + + if not found_collections: + print(f"Warning: No requested collections found in {file_path}") + print(f"Available collections: {list(available_collections)[:10]}...") # Show first 10 + return {} + + # Read the collections + print(f"Reading collections from {file_path}: {found_collections}") + data = tree.arrays(list(found_collections), library="ak") + + return data + + except Exception as e: + print(f"Error reading {file_path}: {e}") + return {} + + +def calculate_particle_ratios(mc_particles, reco_particles, associations): + """Calculate momentum and energy ratios between reconstructed and MC truth particles.""" + + ratios = { + 'momentum': [], + 'energy': [], + 'momentum_x': [], + 'momentum_y': [], + 'momentum_z': [], + 'n_matched': 0, + 'n_mc_total': 0, + 'n_reco_total': 0 + } + + try: + # Count total particles + ratios['n_mc_total'] = ak.sum(ak.num(mc_particles, axis=1)) + ratios['n_reco_total'] = ak.sum(ak.num(reco_particles, axis=1)) + + if associations is None: + print("No associations available for particle matching") + return ratios + + # Loop through events + for event_idx in range(len(associations)): + if event_idx >= len(mc_particles) or event_idx >= len(reco_particles): + continue + + assoc_event = associations[event_idx] + mc_event = mc_particles[event_idx] + reco_event = reco_particles[event_idx] + + # Process associations in this event + for assoc in assoc_event: + try: + # Get MC and reconstructed particle indices from association + mc_idx = assoc.sim + reco_idx = assoc.rec + + if mc_idx < len(mc_event) and reco_idx < len(reco_event): + mc_p = mc_event[mc_idx] + reco_p = reco_event[reco_idx] + + # Calculate momenta + mc_momentum = np.sqrt(mc_p.momentum.x**2 + mc_p.momentum.y**2 + mc_p.momentum.z**2) + reco_momentum = np.sqrt(reco_p.momentum.x**2 + reco_p.momentum.y**2 + reco_p.momentum.z**2) + + if mc_momentum > 0 and reco_momentum > 0: + ratios['momentum'].append(reco_momentum / mc_momentum) + ratios['n_matched'] += 1 + + if mc_p.momentum.x != 0: + ratios['momentum_x'].append(reco_p.momentum.x / mc_p.momentum.x) + if mc_p.momentum.y != 0: + ratios['momentum_y'].append(reco_p.momentum.y / mc_p.momentum.y) + if mc_p.momentum.z != 0: + ratios['momentum_z'].append(reco_p.momentum.z / mc_p.momentum.z) + + # Calculate energy ratio + if mc_p.energy > 0 and reco_p.energy > 0: + ratios['energy'].append(reco_p.energy / mc_p.energy) + + except Exception as e: + print(f"Error processing association: {e}") + continue + + except Exception as e: + print(f"Error calculating particle ratios: {e}") + + return ratios + + +def calculate_track_ratios(mc_particles, tracks, track_associations): + """Calculate track parameter ratios between reconstructed and MC truth.""" + + ratios = { + 'track_momentum': [], + 'n_matched_tracks': 0, + 'n_total_tracks': 0 + } + + try: + if track_associations is None or tracks is None: + return ratios + + ratios['n_total_tracks'] = ak.sum(ak.num(tracks, axis=1)) + + # Process track associations + for event_idx in range(len(track_associations)): + if event_idx >= len(mc_particles) or event_idx >= len(tracks): + continue + + assoc_event = track_associations[event_idx] + mc_event = mc_particles[event_idx] + track_event = tracks[event_idx] + + for assoc in assoc_event: + try: + mc_idx = assoc.sim + track_idx = assoc.rec + + if mc_idx < len(mc_event) and track_idx < len(track_event): + mc_p = mc_event[mc_idx] + track = track_event[track_idx] + + # Calculate momentum from track parameters if available + # This depends on the track parameter format + mc_momentum = np.sqrt(mc_p.momentum.x**2 + mc_p.momentum.y**2 + mc_p.momentum.z**2) + + if hasattr(track, 'momentum') and mc_momentum > 0: + track_momentum = np.sqrt(track.momentum.x**2 + track.momentum.y**2 + track.momentum.z**2) + if track_momentum > 0: + ratios['track_momentum'].append(track_momentum / mc_momentum) + ratios['n_matched_tracks'] += 1 + + except Exception as e: + continue + + except Exception as e: + print(f"Error calculating track ratios: {e}") + + return ratios + + +def print_analysis_results(ratios, collection_name): + """Print statistical analysis of the ratios.""" + + print(f"\n=== {collection_name} Analysis ===") + + # Print matching statistics first + total_particles = ratios.get('n_mc_total', 0) + ratios.get('n_reco_total', 0) + matched = ratios.get('n_matched', 0) + ratios.get('n_matched_tracks', 0) + + if total_particles > 0: + print(f"Total MC particles: {ratios.get('n_mc_total', 0)}") + print(f"Total reconstructed particles: {ratios.get('n_reco_total', 0)}") + print(f"Successfully matched: {matched}") + if ratios.get('n_mc_total', 0) > 0: + efficiency = matched / ratios.get('n_mc_total', 1) * 100 + print(f"Reconstruction efficiency: {efficiency:.1f}%") + + # Print ratio statistics + found_ratios = False + for ratio_type, values in ratios.items(): + if isinstance(values, list) and len(values) > 0: + values = np.array(values) + + # Filter out extreme outliers (ratios beyond 0.1 to 10) + filtered_values = values[(values > 0.1) & (values < 10.0)] + + if len(filtered_values) > 0: + found_ratios = True + mean_ratio = np.mean(filtered_values) + std_ratio = np.std(filtered_values) + median_ratio = np.median(filtered_values) + + print(f"\n{ratio_type.replace('_', ' ').title()}:") + print(f" Mean ratio: {mean_ratio:.3f} ± {std_ratio:.3f}") + print(f" Median ratio: {median_ratio:.3f}") + print(f" RMS: {np.sqrt(np.mean(filtered_values**2)):.3f}") + if mean_ratio > 0: + print(f" Resolution (σ/μ): {std_ratio/mean_ratio:.3f}") + print(f" N matched: {len(filtered_values)} / {len(values)} total") + + # Print percentile information + p25, p75 = np.percentile(filtered_values, [25, 75]) + print(f" 25th-75th percentile: [{p25:.3f}, {p75:.3f}]") + + # Assess quality + if 0.9 <= mean_ratio <= 1.1 and std_ratio/mean_ratio < 0.2: + quality = "EXCELLENT" + elif 0.8 <= mean_ratio <= 1.2 and std_ratio/mean_ratio < 0.3: + quality = "GOOD" + elif 0.7 <= mean_ratio <= 1.3 and std_ratio/mean_ratio < 0.5: + quality = "FAIR" + else: + quality = "POOR" + print(f" Quality assessment: {quality}") + + if not found_ratios: + print("No reconstruction ratios could be calculated (no matched particles with associations)") + + print(f"{'='*50}") + + +def analyze_file(file_path, file_type): + """Analyze a single reconstruction file.""" + + print(f"\n{'='*60}") + print(f"Analyzing {file_type}: {Path(file_path).name}") + print(f"{'='*60}") + + # Define collections to read + collections_to_read = [ + "MCParticles", + "ReconstructedParticles", + "ReconstructedParticleAssociations", + "CentralCKFTracks", + "CentralCKFTrackAssociations", + "CentralCKFTruthSeededTracks", + "CentralCKFTruthSeededTrackAssociations" + ] + + # Read data + data = read_collections(file_path, collections_to_read) + + # Check if data is empty (could be empty dict or awkward array with no fields) + if isinstance(data, dict) and not data: + print("No data could be read from file") + return None + elif hasattr(data, 'fields') and len(data.fields) == 0: + print("No data could be read from file") + return None + + results = {} + + # Analyze particles if available + if all(col in data for col in ["MCParticles", "ReconstructedParticles"]): + assoc = data.get("ReconstructedParticleAssociations") + particle_ratios = calculate_particle_ratios( + data["MCParticles"], + data["ReconstructedParticles"], + assoc + ) + print_analysis_results(particle_ratios, "Reconstructed Particles") + results['particles'] = particle_ratios + + # Analyze regular tracks if available + if all(col in data for col in ["MCParticles", "CentralCKFTracks"]): + track_assoc = data.get("CentralCKFTrackAssociations") + track_ratios = calculate_track_ratios( + data["MCParticles"], + data["CentralCKFTracks"], + track_assoc + ) + print_analysis_results(track_ratios, "Central CKF Tracks") + results['tracks'] = track_ratios + + # Analyze truth-seeded tracks if available + if all(col in data for col in ["MCParticles", "CentralCKFTruthSeededTracks"]): + truth_track_assoc = data.get("CentralCKFTruthSeededTrackAssociations") + truth_track_ratios = calculate_track_ratios( + data["MCParticles"], + data["CentralCKFTruthSeededTracks"], + truth_track_assoc + ) + print_analysis_results(truth_track_ratios, "Central CKF Truth-Seeded Tracks") + results['truth_seeded_tracks'] = truth_track_ratios + + return results + + +def main(): + parser = argparse.ArgumentParser(description="Compare reconstruction quality against MC truth") + parser.add_argument("files", nargs="+", help="EDM4hep ROOT files to analyze") + parser.add_argument("--output", help="Output file for results (default: stdout)") + + args = parser.parse_args() + + # Redirect output if requested + if args.output: + sys.stdout = open(args.output, 'w') + + print("EICRECON RECONSTRUCTION QUALITY ANALYSIS") + print("=" * 60) + print(f"Analyzing {len(args.files)} file(s)") + print("This analysis compares reconstructed quantities to MC truth") + print("for collections where truth-reconstruction associations exist.\n") + + overall_results = [] + + try: + for i, file_path in enumerate(args.files, 1): + if not Path(file_path).exists(): + print(f"Warning: File not found: {file_path}") + continue + + print(f"[{i}/{len(args.files)}] Processing: {Path(file_path).name}") + + # Determine file type from name + file_name = Path(file_path).name + if "gun" in file_name: + file_type = "Particle Gun" + elif "dis" in file_name: + file_type = "Deep Inelastic Scattering" + else: + file_type = "Unknown" + + result = analyze_file(file_path, file_type) + overall_results.append((file_path, file_type, result)) + + except KeyboardInterrupt: + print("\nAnalysis interrupted by user") + except Exception as e: + print(f"Error during analysis: {e}") + import traceback + traceback.print_exc() + return 1 + + print(f"\n{'='*60}") + print("SUMMARY") + print(f"{'='*60}") + + if overall_results: + print(f"Successfully analyzed {len(overall_results)} files:") + for file_path, file_type, _ in overall_results: + print(f" - {Path(file_path).name} ({file_type})") + else: + print("No files were successfully analyzed.") + + print("\nReconstruction quality analysis completed.") + print("\nKey metrics interpretation:") + print("- Mean ratio close to 1.0 indicates good reconstruction accuracy") + print("- Small resolution (σ/μ) indicates good reconstruction precision") + print("- High efficiency indicates good particle finding capability") + print("- Quality assessments: EXCELLENT > GOOD > FAIR > POOR") + + return 0 + + +if __name__ == "__main__": + sys.exit(main())