Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 41 additions & 30 deletions CLDConfig/CLDReconstruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,32 +19,37 @@
import os
from Gaudi.Configuration import INFO, WARNING, DEBUG

from Configurables import k4DataSvc, MarlinProcessorWrapper
from k4MarlinWrapper.inputReader import create_reader, attach_edm4hep2lcio_conversion
from Gaudi.Configurables import EventDataSvc, MarlinProcessorWrapper, GeoSvc, TrackingCellIDEncodingSvc
from k4FWCore import ApplicationMgr, IOSvc
from k4FWCore.parseArgs import parser
from py_utils import SequenceLoader, attach_lcio2edm4hep_conversion, create_writer, parse_collection_patch_file
from py_utils import SequenceLoader, parse_collection_patch_file
from k4MarlinWrapper.io_helpers import IOHandlerHelper

import ROOT
ROOT.gROOT.SetBatch(True)


parser_group = parser.add_argument_group("CLDReconstruction.py custom options")
parser_group.add_argument("--inputFiles", action="extend", nargs="+", metavar=("file1", "file2"), help="One or multiple input files")
# Need the dummy input such that the IOHandlerHelper.add_reader call below does not crash when called with --help
parser_group.add_argument("--inputFiles", action="store", nargs="+", metavar=("file1", "file2"), help="One or multiple input files", default=["dummy_input.edm4hep.root"])
parser_group.add_argument("--outputBasename", help="Basename of the output file(s)", default="output")
parser_group.add_argument("--trackingOnly", action="store_true", help="Run only track reconstruction", default=False)
parser_group.add_argument("--enableLCFIJet", action="store_true", help="Enable LCFIPlus jet clustering parts", default=False)
parser_group.add_argument("--enableMLJetTagger", action="store_true", help="Enable ML-based jet flavor tagging", default=False)
parser_group.add_argument("--MLJetTaggerModel", action="store", help="Type of ML model to use for inference", type=str, default="model_ParT_ecm240_cld_o2_v5")
parser_group.add_argument("--cms", action="store", help="Choose a Centre-of-Mass energy", default=240, choices=(91, 160, 240, 365), type=int)
parser_group.add_argument("--compactFile", help="Compact detector file to use", type=str, default=os.environ["K4GEO"] + "/FCCee/CLD/compact/CLD_o2_v07/CLD_o2_v07.xml")
tracking_group = parser_group.add_mutually_exclusive_group()
tracking_group.add_argument("--conformalTracking", action="store_true", default=True, help="Use conformal tracking pattern recognition")
tracking_group.add_argument("--truthTracking", action="store_true", default=False, help="Cheat tracking pattern recognition")
reco_args = parser.parse_known_args()[0]

algList = []
svcList = []

evtsvc = k4DataSvc("EventDataSvc")
svcList.append(evtsvc)
evtsvc = EventDataSvc("EventDataSvc")
iosvc = IOSvc()

svcList = [evtsvc, iosvc]
algList = []

CONFIG = {
"CalorimeterIntegrationTimeWindow": "10ns",
Expand All @@ -59,7 +64,6 @@

REC_COLLECTION_CONTENTS_FILE = "collections_rec_level.txt" # file with the collections to be patched in when writing from LCIO to EDM4hep

from Configurables import GeoSvc, TrackingCellIDEncodingSvc, Lcio2EDM4hepTool
geoservice = GeoSvc("GeoSvc")
geoservice.detectors = [reco_args.compactFile]
geoservice.OutputLevel = INFO
Expand Down Expand Up @@ -92,13 +96,8 @@
},
)

if reco_args.inputFiles:
read = create_reader(reco_args.inputFiles, evtsvc)
read.OutputLevel = INFO
algList.append(read)
else:
print('WARNING: No input files specified, the CLD Reconstruction will fail')
read = None
io_handler = IOHandlerHelper(algList, iosvc)
io_handler.add_reader(reco_args.inputFiles)

MyAIDAProcessor = MarlinProcessorWrapper("MyAIDAProcessor")
MyAIDAProcessor.OutputLevel = WARNING
Expand Down Expand Up @@ -144,19 +143,38 @@
sequenceLoader.load("HighLevelReco/PFOSelector")
sequenceLoader.load("HighLevelReco/JetClusteringOrRenaming")
sequenceLoader.load("HighLevelReco/JetAndVertex")
sequenceLoader.load("HighLevelReco/MLJetTagger")
# event number processor, down here to attach the conversion back to edm4hep to it
algList.append(EventNumber)
from Configurables import EDM4hep2LcioTool
input_conv = EDM4hep2LcioTool("EventNumber_InputConverter")
input_conv.convertAll = True
input_conv.collNameMapping = {"MCParticles": "MCParticle"}
input_conv.OutputLevel = DEBUG
EventNumber.EDM4hep2LcioTool = input_conv

DST_KEEPLIST = ["MCParticlesSkimmed", "MCPhysicsParticles", "RecoMCTruthLink", "SiTracks", "SiTracks_Refitted", "PandoraClusters", "PandoraPFOs", "SelectedPandoraPFOs", "LooseSelectedPandoraPFOs", "TightSelectedPandoraPFOs", "RefinedVertexJets", "RefinedVertexJets_rel", "RefinedVertexJets_vtx", "RefinedVertexJets_vtx_RP", "BuildUpVertices", "BuildUpVertices_res", "BuildUpVertices_RP", "BuildUpVertices_res_RP", "BuildUpVertices_V0", "BuildUpVertices_V0_res", "BuildUpVertices_V0_RP", "BuildUpVertices_V0_res_RP", "PrimaryVertices", "PrimaryVertices_res", "PrimaryVertices_RP", "PrimaryVertices_res_RP", "RefinedVertices", "RefinedVertices_RP"]

DST_SUBSETLIST = ["EfficientMCParticles", "InefficientMCParticles", "MCPhysicsParticles"]

# TODO: replace all the ugly strings by something sensible like Enum
if CONFIG["OutputMode"] == "LCIO":
Output_REC = create_writer("lcio", "Output_REC", f"{reco_args.outputBasename}_REC")
algList.append(Output_REC)
Output_REC = io_handler.add_lcio_writer("Output_REC")
Output_REC.Parameters = {
"LCIOOutputFile": [f"{reco_args.outputBasename}_REC.slcio"],
"LCIOWriteMode": ["WRITE_NEW"],
}

Output_DST = create_writer("lcio", "Output_DST", f"{reco_args.outputBasename}_DST", DST_KEEPLIST, DST_SUBSETLIST)
Output_DST = io_handler.add_lcio_writer("Output_DST")
dropped_types = ["MCParticle", "LCRelation", "SimCalorimeterHit", "CalorimeterHit", "SimTrackerHit", "TrackerHit", "TrackerHitPlane", "Track", "ReconstructedParticle", "LCFloatVec"]
Output_DST.Parameters = {
"LCIOOutputFile": [f"{reco_args.outputBasename}_DST.slcio"],
"LCIOWriteMode": ["WRITE_NEW"],
"DropCollectionNames": [],
"DropCollectionTypes": dropped_types,
"FullSubsetCollections": DST_SUBSETLIST,
"KeepCollectionNames": DST_KEEPLIST,
}
algList.append(Output_DST)

if CONFIG["OutputMode"] == "EDM4Hep":
Expand All @@ -169,21 +187,14 @@
}
algList.append(collPatcherRec)

Output_REC = create_writer("edm4hep", "Output_REC", f"{reco_args.outputBasename}_REC")
algList.append(Output_REC)

io_handler.add_edm4hep_writer(f"{reco_args.outputBasename}_REC.edm4hep.root", ["keep *"])
# FIXME: needs https://github.com/key4hep/k4FWCore/issues/226
# Output_DST = create_writer("edm4hep", "Output_DST", f"{reco_args.outputBasename}_DST", DST_KEEPLIST)
# algList.append(Output_DST)

# <DST output for edm4hep>

# We need to convert the inputs in case we have EDM4hep input
attach_edm4hep2lcio_conversion(algList, read)

# We need to convert the outputs in case we have EDM4hep output
attach_lcio2edm4hep_conversion(algList)
# We need to attach all the necessary converters
io_handler.finalize_converters()

from Configurables import ApplicationMgr
ApplicationMgr( TopAlg = algList,
EvtSel = 'NONE',
EvtMax = 3, # Overridden by the --num-events switch to k4run
Expand Down
11 changes: 9 additions & 2 deletions CLDConfig/HighLevelReco/JetAndVertex.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from Gaudi.Configuration import WARNING
from Gaudi.Configuration import WARNING, INFO
from Configurables import MarlinProcessorWrapper


Expand Down Expand Up @@ -135,6 +135,12 @@
"UpdateVertexRPDaughters": ["0"],
"UseMCP": ["0"]
}
JetClusteringAndRefinerPatcher = MarlinProcessorWrapper(
"JetClusteringAndRefinerPatcher", OutputLevel=INFO, ProcessorType="PatchCollections"
)
JetClusteringAndRefinerPatcher.Parameters = {
"PatchCollections": ["yth", "VertexJets|y01,y12,y23,y34,y45,y56,y67,y78,y89,y910"]
}

if CONFIG["VertexUnconstrained"] == "ON":
VertexFinderUnconstrained = MarlinProcessorWrapper("VertexFinderUnconstrained")
Expand Down Expand Up @@ -192,13 +198,14 @@
}

JetAndVertexSequence = [
VertexFinder,
VertexFinder
]

# FIXME: LCFIPlus causes occasional breakage: https://github.com/lcfiplus/LCFIPlus/issues/69
# due to not adding the jet clustering parameters to every event as PID information
if reco_args.enableLCFIJet:
JetAndVertexSequence.append(JetClusteringAndRefiner)
JetAndVertexSequence.append(JetClusteringAndRefinerPatcher)

if CONFIG["VertexUnconstrained"] == "ON":
JetAndVertexSequence.append(VertexFinderUnconstrained)
73 changes: 73 additions & 0 deletions CLDConfig/HighLevelReco/MLJetTagger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#
# Copyright (c) 2014-2024 Key4hep-Project.
#
# This file is part of Key4hep.
# See https://key4hep.github.io/key4hep-doc/ for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from Configurables import JetTagger
import yaml
import os

if reco_args.enableMLJetTagger:
# check if jet clustering is also enabled (prerequisite for jet flavor tagging)
if not reco_args.enableLCFIJet:
raise ValueError("MLJetTagger requires LCFIPlus jet clustering to be enabled. Please add --enableLCFIJet to the command or disable --enableMLJetTagger.")

# Get the directory of the current script
script_dir = os.path.dirname(os.path.abspath(__file__))
# Construct the path to the YAML file
yaml_path = os.path.join(script_dir, "models_MLJetTagger.yaml")

# Load YAML config
with open(yaml_path, "r") as file:
model_config = yaml.safe_load(file)

# check if the model type is valid
if reco_args.MLJetTaggerModel not in model_config:
raise ValueError(f"Invalid model type '{reco_args.MLJetTaggerModel}'. Valid options are: {', '.join(model_config.keys())}.")

# load the model configuration
onnx_model = model_config[reco_args.MLJetTaggerModel]["onnx_model"]
json_onnx_config = model_config[reco_args.MLJetTaggerModel]["json_onnx_config"]
flavor_collection_names = model_config[reco_args.MLJetTaggerModel]["flavor_collection_names"]

# print out the model configuration
print("RUNNING JET TAGGING WITH MLJETTAGGER")

print(f"Using MLJetTagger model: \t\t {reco_args.MLJetTaggerModel}\n",
f"The model uses the architecture: \t {model_config[reco_args.MLJetTaggerModel]['model']}\n",
f"was trained on the kinematics: \t {model_config[reco_args.MLJetTaggerModel]['kinematics']}\n",
f"and the detector version: \t\t {model_config[reco_args.MLJetTaggerModel]['detector']}\n",
f"at a center-of-mass energy of: \t {model_config[reco_args.MLJetTaggerModel]['ecm']} GeV\n",
f"Comment: \t\t\t\t {model_config[reco_args.MLJetTaggerModel]['comment']}\n",
f"Appending collections to the event: \t {', '.join(flavor_collection_names)}\n",)

# create the MLJetTagger algorithm

k4MLJetTagger = JetTagger("JetTagger",
model_path=onnx_model,
json_path=json_onnx_config,
flavor_collection_names = flavor_collection_names, # to make sure the order and naming is correct
InputJets=["RefinedVertexJets"],
InputPrimaryVertices=["PrimaryVertices"],
OutputIDCollections=flavor_collection_names,
)

# append sequence to the algorithm list
MLJetTaggerSequence = [
k4MLJetTagger,
]
else:
MLJetTaggerSequence = []
18 changes: 18 additions & 0 deletions CLDConfig/HighLevelReco/models_MLJetTagger.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# this yaml file stores and should be filled in with information about how a jet-flavor tagger is trained and the necessary information to run inference

model_ParT_ecm240_cld_o2_v5:
model: "ParticleTransformer"
ecm: 240
detector: "CLD_o2_v5"
kinematics: "Z(vv)H(jj)"
onnx_model: "/eos/experiment/fcc/ee/jet_flavour_tagging/fullsim_test_spring2024/fullsimCLD240_2mio.onnx"
json_onnx_config: "/eos/experiment/fcc/ee/jet_flavour_tagging/fullsim_test_spring2024/preprocess_fullsimCLD240_2mio.json"
flavor_collection_names:
- "RefinedJetTag_G"
- "RefinedJetTag_U"
- "RefinedJetTag_S"
- "RefinedJetTag_C"
- "RefinedJetTag_B"
- "RefinedJetTag_D"
- "RefinedJetTag_TAU"
comment: "The model was trained on 1.9 mio/jets per flavor. First implementation of ML tagging for full sim."
Empty file added CLDConfig/cdb.log
Empty file.
6 changes: 6 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ add_test(NAME LCFIJet
)
set_property(TEST LCFIJet APPEND PROPERTY DEPENDS ddsim_edm4hep)

add_test(NAME MLJetTagger
WORKING_DIRECTORY ${CLDConfig_DIR}
COMMAND k4run --enableLCFIJet --enableMLJetTagger --inputFiles=test.edm4hep.root --outputBasename=trkOnly_test_edm4hep CLDReconstruction.py --GeoSvc.detectors=${DETECTOR}
)
set_property(TEST MLJetTagger APPEND PROPERTY DEPENDS ddsim_edm4hep)

add_test(NAME tracking_truth
WORKING_DIRECTORY ${CLDConfig_DIR}
COMMAND k4run --trackingOnly --truthTracking --inputFiles=test.edm4hep.root --outputBasename=trkOnly_truth_test_edm4hep CLDReconstruction.py --GeoSvc.detectors=${DETECTOR}
Expand Down
Loading