diff --git a/.github/workflows/linux-eic-shell.yml b/.github/workflows/linux-eic-shell.yml index 824bafa87a..7b1906bfb7 100644 --- a/.github/workflows/linux-eic-shell.yml +++ b/.github/workflows/linux-eic-shell.yml @@ -618,6 +618,65 @@ jobs: path: rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_${{ matrix.test_plugins }}.hists.root if-no-files-found: error + eicrecon-input-collections-test: + runs-on: ubuntu-24.04 + needs: + - build + - npsim-gun + strategy: + matrix: + CXX: [g++] + particle: [e] + detector_config: [craterlake] + steps: + - name: Checkout .github + uses: actions/checkout@v5 + with: + sparse-checkout: .github + - name: Download install directory + uses: actions/download-artifact@v5 + with: + name: install-${{ matrix.CXX }}-eic-shell-Release-${{ env.platform }}-${{ env.release }}-ASAN + - name: Unarchive install directory + run: tar -xaf install.tar.zst + - uses: actions/download-artifact@v5 + with: + name: sim_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4hep.root + - uses: cvmfs-contrib/github-action-cvmfs@v5 + - name: Run EICrecon (create RawHits collections) + uses: eic/run-cvmfs-osg-eic-shell@main + with: + platform-release: "${{ env.platform }}:${{ env.release }}" + setup: "/opt/detector/epic-${{ env.detector-version }}/bin/thisepic.sh" + run: | + export DETECTOR_CONFIG=${DETECTOR}_${{ matrix.detector_config }} + export LD_LIBRARY_PATH=$PWD/install/lib:$LD_LIBRARY_PATH + export JANA_PLUGIN_PATH=$PWD/install/lib/EICrecon/plugins${JANA_PLUGIN_PATH:+:${JANA_PLUGIN_PATH}} + $PWD/install/bin/eicrecon ${{env.JANA_OPTIONS}} ${{env.JANA_OPTIONS_GUN}} -Ppodio:output_collections=.*RawHits -Ppodio:output_file=rawhits_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root sim_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4hep.root + - name: Run EICrecon (reconstruction from RawHits only) + uses: eic/run-cvmfs-osg-eic-shell@main + with: + platform-release: "${{ env.platform }}:${{ env.release }}" + setup: "/opt/detector/epic-${{ env.detector-version }}/bin/thisepic.sh" + run: | + export DETECTOR_CONFIG=${DETECTOR}_${{ matrix.detector_config }} + export LD_LIBRARY_PATH=$PWD/install/lib:$LD_LIBRARY_PATH + export JANA_PLUGIN_PATH=$PWD/install/lib/EICrecon/plugins${JANA_PLUGIN_PATH:+:${JANA_PLUGIN_PATH}} + # Test that our input_collections parameter works with RawHits-only input - simulating real data processing pipeline + $PWD/install/bin/eicrecon ${{env.JANA_OPTIONS}} ${{env.JANA_OPTIONS_GUN}} -Ppodio:input_collections=EventHeader,.*RawHits -Ppodio:output_file=rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_from_rawhits.edm4eic.root rawhits_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root + # List the output collections that were created from the RawHits-only input collections + podio-dump --category events rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_from_rawhits.edm4eic.root + - uses: actions/upload-artifact@v4 + with: + name: rawhits_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root + path: rawhits_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}.edm4eic.root + if-no-files-found: error + - uses: actions/upload-artifact@v4 + with: + name: rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_from_rawhits.edm4eic.root + path: rec_${{ matrix.particle }}_1GeV_20GeV_${{ matrix.detector_config }}_from_rawhits.edm4eic.root + if-no-files-found: error + eicrecon-benchmarks-plugins: runs-on: ubuntu-24.04 needs: diff --git a/src/services/io/podio/JEventSourcePODIO.cc b/src/services/io/podio/JEventSourcePODIO.cc index 8cff18acc0..d110de518f 100644 --- a/src/services/io/podio/JEventSourcePODIO.cc +++ b/src/services/io/podio/JEventSourcePODIO.cc @@ -22,8 +22,11 @@ #include #include #include +#include #include #include +#include +#include #include #include #include @@ -89,6 +92,26 @@ JEventSourcePODIO::JEventSourcePODIO(std::string resource_name, JApplication* ap GetApplication()->SetDefaultParameter("podio:print_type_table", print_type_table, "Print list of collection names and their types"); + // Get the list of input collections to include + std::vector input_collections; + GetApplication()->SetDefaultParameter( + "podio:input_collections", input_collections, + "Comma separated list of collection names to read from input. If not set, all collections " + "will be " + "read from input file. Setting this allows filtering which collections are loaded."); + + m_input_collections = std::set(input_collections.begin(), input_collections.end()); + + // Log input collections configuration for debugging + if (!input_collections.empty()) { + m_log->info("podio:input_collections parameter set - will only load specified collections:"); + for (const auto& coll : input_collections) { + m_log->info(" - {}", coll); + } + } else { + m_log->debug("podio:input_collections not set - will load all available collections"); + } + // Hopefully we won't need to reimplement background event merging. Using podio frames, it looks like we would // have to do a deep copy of all data in order to insert it into the same frame, which would probably be // quite inefficient. @@ -217,9 +240,29 @@ void JEventSourcePODIO::GetEvent(std::shared_ptr _event) { } } - // Insert contents odf frame into JFactories + // Insert contents of frame into JFactories VisitPodioCollection visit; + + // Resolve input collections patterns on first event only (thread-safe) + static std::once_flag resolve_once; + if (!m_input_collections.empty()) { + std::call_once(resolve_once, [this, &frame]() { + ResolveInputCollections(frame->getAvailableCollections()); + m_log->info("Filtering input collections - loading {} of {} available collections", + m_resolved_input_collections.size(), frame->getAvailableCollections().size()); + }); + } + for (const std::string& coll_name : frame->getAvailableCollections()) { + // Filter collections based on resolved input_collections parameter + // If input_collections is not set (empty), load all collections (default behavior) + // If input_collections is set, only load collections that match the resolved patterns + if (!m_input_collections.empty() && + m_resolved_input_collections.find(coll_name) == m_resolved_input_collections.end()) { + // Skip this collection as it's not in the resolved input_collections list + continue; + } + const podio::CollectionBase* collection = frame->get(coll_name); InsertingVisitor visitor(event, coll_name); visit(visitor, *collection); @@ -278,6 +321,44 @@ double JEventSourceGeneratorT::CheckOpenable(std::string reso return 0.03; } +//------------------------------------------------------------------------------ +// ResolveInputCollections +// +/// Resolve regex patterns in m_input_collections to actual collection names +/// from the available collections in the input file +//------------------------------------------------------------------------------ +void JEventSourcePODIO::ResolveInputCollections( + const std::vector& available_collections) { + + // Clear any previously resolved collections + m_resolved_input_collections.clear(); + + if (m_input_collections.empty()) { + // If no input collections specified, load all available collections + return; + } + + // Convert available collections to a set for efficient lookup + std::set all_collections_set(available_collections.begin(), + available_collections.end()); + + // Turn regexes among input collections into actual collection names + std::vector input_collections_regex(m_input_collections.size()); + std::transform(m_input_collections.begin(), m_input_collections.end(), + input_collections_regex.begin(), + [](const std::string& r) { return std::regex(r); }); + + std::copy_if(all_collections_set.begin(), all_collections_set.end(), + std::inserter(m_resolved_input_collections, m_resolved_input_collections.end()), + [&](const std::string& c) { + return std::any_of(input_collections_regex.begin(), input_collections_regex.end(), + [&](const std::regex& r) { return std::regex_match(c, r); }); + }); + + m_log->debug("Resolved {} input collection patterns to {} actual collections", + m_input_collections.size(), m_resolved_input_collections.size()); +} + //------------------------------------------------------------------------------ // PrintCollectionTypeTable // diff --git a/src/services/io/podio/JEventSourcePODIO.h b/src/services/io/podio/JEventSourcePODIO.h index 261c600d3d..8a6672a25f 100644 --- a/src/services/io/podio/JEventSourcePODIO.h +++ b/src/services/io/podio/JEventSourcePODIO.h @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include #if ((JANA_VERSION_MAJOR == 2) && (JANA_VERSION_MINOR >= 3)) || (JANA_VERSION_MAJOR > 2) #define JANA_NEW_CALLBACK_STYLE 1 @@ -40,6 +42,8 @@ class JEventSourcePODIO : public JEventSource { void PrintCollectionTypeTable(void); + void ResolveInputCollections(const std::vector& available_collections); + protected: podio::ROOTReader m_reader; @@ -49,6 +53,9 @@ class JEventSourcePODIO : public JEventSource { bool m_run_forever = false; bool m_use_event_headers = true; + std::set m_input_collections; // config. parameter + std::set m_resolved_input_collections; // resolved from regex patterns + private: std::shared_ptr m_log; };