Skip to content
Draft
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions .github/workflows/linux-eic-shell.yml
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,65 @@ jobs:
path: rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_${{ matrix.test_plugins }}.hists.root
if-no-files-found: error

eicrecon-input-collections-test:
runs-on: ubuntu-24.04
needs:
- build
- npsim-gun
strategy:
matrix:
CXX: [g++]
particle: [e]
detector_config: [craterlake]
steps:
- name: Checkout .github
uses: actions/checkout@v5
with:
sparse-checkout: .github
- name: Download install directory
uses: actions/download-artifact@v5
with:
name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}-ASAN
- name: Unarchive install directory
run: tar -xaf install.tar.zst
- uses: actions/download-artifact@v5
with:
name: sim_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4hep.root
- uses: cvmfs-contrib/github-action-cvmfs@v5
- name: Run EICrecon with all collections (baseline)
uses: eic/run-cvmfs-osg-eic-shell@main
with:
platform-release: "${{ env.platform }}:${{ env.release }}"
setup: "/opt/detector/epic-${{ env.detector-version }}/bin/thisepic.sh"
run: |
export DETECTOR_CONFIG=${DETECTOR}_${{ matrix.detector_config }}
export LD_LIBRARY_PATH=$PWD/install/lib:$LD_LIBRARY_PATH
export JANA_PLUGIN_PATH=$PWD/install/lib/EICrecon/plugins${JANA_PLUGIN_PATH:+:${JANA_PLUGIN_PATH}}
$PWD/install/bin/eicrecon ${{env.JANA_OPTIONS}} ${{env.JANA_OPTIONS_GUN}} -Ppodio:output_file=rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_all_collections.edm4eic.root sim_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4hep.root -Ppodio:output_collections=EventHeader,EcalBarrelScFiRawHits,EcalBarrelImagingRawHits,CentralTrackingRecHits
- name: Run EICrecon without MCParticles (test input filtering)
uses: eic/run-cvmfs-osg-eic-shell@main
with:
platform-release: "${{ env.platform }}:${{ env.release }}"
setup: "/opt/detector/epic-${{ env.detector-version }}/bin/thisepic.sh"
run: |
export DETECTOR_CONFIG=${DETECTOR}_${{ matrix.detector_config }}
export LD_LIBRARY_PATH=$PWD/install/lib:$LD_LIBRARY_PATH
export JANA_PLUGIN_PATH=$PWD/install/lib/EICrecon/plugins${JANA_PLUGIN_PATH:+:${JANA_PLUGIN_PATH}}
# Test that our input_collections parameter works to exclude MCParticles - simulating real data without MC truth
$PWD/install/bin/eicrecon ${{env.JANA_OPTIONS}} ${{env.JANA_OPTIONS_GUN}} -Ppodio:input_collections=EventHeader,EcalBarrel.*,HcalBarrel.*,SiBarrel.* -Ppodio:output_file=rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_no_mc.edm4eic.root sim_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4hep.root -Ppodio:output_collections=EventHeader,EcalBarrelScFiRawHits,EcalBarrelImagingRawHits,CentralTrackingRecHits
# List the output collections that were created from the limited input collections
podio-dump --category events rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_no_mc.edm4eic.root
- uses: actions/upload-artifact@v4
with:
name: rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_all_collections.edm4eic.root
path: rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_all_collections.edm4eic.root
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
name: rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_no_mc.edm4eic.root
path: rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_no_mc.edm4eic.root
if-no-files-found: error

eicrecon-benchmarks-plugins:
runs-on: ubuntu-24.04
needs:
Expand Down
83 changes: 82 additions & 1 deletion src/services/io/podio/JEventSourcePODIO.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@
#include <algorithm>
#include <exception>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <mutex>
#include <regex>
#include <sstream>
#include <utility>
#include <vector>
Expand Down Expand Up @@ -89,6 +92,26 @@ JEventSourcePODIO::JEventSourcePODIO(std::string resource_name, JApplication* ap
GetApplication()->SetDefaultParameter("podio:print_type_table", print_type_table,
"Print list of collection names and their types");

// Get the list of input collections to include
std::vector<std::string> input_collections;
GetApplication()->SetDefaultParameter(
"podio:input_collections", input_collections,
"Comma separated list of collection names to read from input. If not set, all collections "
"will be "
"read from input file. Setting this allows filtering which collections are loaded.");

m_input_collections = std::set<std::string>(input_collections.begin(), input_collections.end());

// Log input collections configuration for debugging
if (!input_collections.empty()) {
m_log->info("podio:input_collections parameter set - will only load specified collections:");
for (const auto& coll : input_collections) {
m_log->info(" - {}", coll);
}
} else {
m_log->debug("podio:input_collections not set - will load all available collections");
}

// Hopefully we won't need to reimplement background event merging. Using podio frames, it looks like we would
// have to do a deep copy of all data in order to insert it into the same frame, which would probably be
// quite inefficient.
Expand Down Expand Up @@ -217,9 +240,29 @@ void JEventSourcePODIO::GetEvent(std::shared_ptr<JEvent> _event) {
}
}

// Insert contents odf frame into JFactories
// Insert contents of frame into JFactories
VisitPodioCollection<InsertingVisitor> visit;

// Resolve input collections patterns on first event only (thread-safe)
static std::once_flag resolve_once;
if (!m_input_collections.empty()) {
std::call_once(resolve_once, [this, &frame]() {
ResolveInputCollections(frame->getAvailableCollections());
m_log->info("Filtering input collections - loading {} of {} available collections",
m_resolved_input_collections.size(), frame->getAvailableCollections().size());
});
}

for (const std::string& coll_name : frame->getAvailableCollections()) {
// Filter collections based on resolved input_collections parameter
// If input_collections is not set (empty), load all collections (default behavior)
// If input_collections is set, only load collections that match the resolved patterns
if (!m_input_collections.empty() &&
m_resolved_input_collections.find(coll_name) == m_resolved_input_collections.end()) {
// Skip this collection as it's not in the resolved input_collections list
continue;
}

const podio::CollectionBase* collection = frame->get(coll_name);
InsertingVisitor visitor(event, coll_name);
visit(visitor, *collection);
Expand Down Expand Up @@ -278,6 +321,44 @@ double JEventSourceGeneratorT<JEventSourcePODIO>::CheckOpenable(std::string reso
return 0.03;
}

//------------------------------------------------------------------------------
// ResolveInputCollections
//
/// Resolve regex patterns in m_input_collections to actual collection names
/// from the available collections in the input file
//------------------------------------------------------------------------------
void JEventSourcePODIO::ResolveInputCollections(
const std::vector<std::string>& available_collections) {

// Clear any previously resolved collections
m_resolved_input_collections.clear();

if (m_input_collections.empty()) {
// If no input collections specified, load all available collections
return;
}

// Convert available collections to a set for efficient lookup
std::set<std::string> all_collections_set(available_collections.begin(),
available_collections.end());

// Turn regexes among input collections into actual collection names
std::vector<std::regex> input_collections_regex(m_input_collections.size());
std::transform(m_input_collections.begin(), m_input_collections.end(),
input_collections_regex.begin(),
[](const std::string& r) { return std::regex(r); });

std::copy_if(all_collections_set.begin(), all_collections_set.end(),
std::inserter(m_resolved_input_collections, m_resolved_input_collections.end()),
[&](const std::string& c) {
return std::any_of(input_collections_regex.begin(), input_collections_regex.end(),
[&](const std::regex& r) { return std::regex_match(c, r); });
});

m_log->debug("Resolved {} input collection patterns to {} actual collections",
m_input_collections.size(), m_resolved_input_collections.size());
}

//------------------------------------------------------------------------------
// PrintCollectionTypeTable
//
Expand Down
7 changes: 7 additions & 0 deletions src/services/io/podio/JEventSourcePODIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
#include <spdlog/logger.h>
#include <cstddef>
#include <memory>
#include <set>
#include <string>
#include <vector>

#if ((JANA_VERSION_MAJOR == 2) && (JANA_VERSION_MINOR >= 3)) || (JANA_VERSION_MAJOR > 2)
#define JANA_NEW_CALLBACK_STYLE 1
Expand Down Expand Up @@ -40,6 +42,8 @@ class JEventSourcePODIO : public JEventSource {

void PrintCollectionTypeTable(void);

void ResolveInputCollections(const std::vector<std::string>& available_collections);

protected:
podio::ROOTReader m_reader;

Expand All @@ -49,6 +53,9 @@ class JEventSourcePODIO : public JEventSource {
bool m_run_forever = false;
bool m_use_event_headers = true;

std::set<std::string> m_input_collections; // config. parameter
std::set<std::string> m_resolved_input_collections; // resolved from regex patterns

private:
std::shared_ptr<spdlog::logger> m_log;
};
Expand Down
Loading