diff --git a/ControlRegions/WZ/2022EE_v12/aliases.py b/ControlRegions/WZ/2022EE_v12/aliases.py index f06c74c9..b2f06dd0 100644 --- a/ControlRegions/WZ/2022EE_v12/aliases.py +++ b/ControlRegions/WZ/2022EE_v12/aliases.py @@ -17,7 +17,8 @@ # LepCut3l__ele_wp90iso__mu_cut_TightID_POG eleWP = 'wp90iso' -muWP = 'cut_TightID_POG' +muWP = 'cut_Tight_HWW' +#muWP = 'cut_TightID_POG' aliases['LepWPCut'] = { 'expr': 'LepCut3l__ele_'+eleWP+'__mu_'+muWP, @@ -177,6 +178,8 @@ 'samples': mc } +# The following aliases have already been defined. +""" # WH3l_mOSll for data aliases['WH3l_mOSll'] = { 'linesToAdd' : [f'#include "{configurations}/ControlRegions/WZ/macros/mOS_ll.cc"'], @@ -216,3 +219,4 @@ 'args' : 'nLepton,Lepton_pt,Lepton_eta,Lepton_phi,Lepton_pdgId', 'samples' : ['DATA'], } +""" diff --git a/ControlRegions/WZ/2023_v12/aliases.py b/ControlRegions/WZ/2023_v12/aliases.py new file mode 100644 index 00000000..d909ad70 --- /dev/null +++ b/ControlRegions/WZ/2023_v12/aliases.py @@ -0,0 +1,223 @@ +import os +import copy +import inspect + +# /afs/cern.ch/user/n/ntrevisa/work/latinos/Run3/PlotsConfigurationsRun3/ControlRegions/WZ/2023_v12 +configurations = os.path.realpath(inspect.getfile(inspect.currentframe())) # this file +configurations = os.path.dirname(configurations) # 2023_v12 +configurations = os.path.dirname(configurations) # WZ +configurations = os.path.dirname(configurations) # ControlRegions +configurations = os.path.dirname(configurations) # PlotsConfigurationsRun3 + +aliases = {} +aliases = OrderedDict() + +mc = [skey for skey in samples if skey not in ('Fake', 'DATA', 'Dyemb', 'DATA_EG', 'DATA_Mu', 'DATA_EMu', 'Fake_EG', 'Fake_Mu', 'Fake_EMu')] +mc_emb = [skey for skey in samples if skey not in ('Fake', 'DATA', 'DATA_Mu', 'DATA_EMu', 'Fake_EG', 'Fake_Mu', 'Fake_EMu')] + +# LepCut3l__ele_wp90iso__mu_cut_TightID_POG +eleWP = 'wp90iso' +muWP = 'cut_Tight_HWW' +#muWP = 'cut_TightID_POG' + +aliases['LepWPCut'] = { + 'expr': 'LepCut3l__ele_'+eleWP+'__mu_'+muWP, + 'samples': mc + ['DATA'], +} + +aliases['LepWPSF'] = { + 'expr': 'LepSF3l__ele_'+eleWP+'__mu_'+muWP, + 'samples': mc +} + +# gen-matching to prompt only (GenLepMatch3l matches to *any* gen lepton) +aliases['PromptGenLepMatch3l'] = { + 'expr': 'Alt(Lepton_promptgenmatched, 0, 0) * Alt(Lepton_promptgenmatched, 1, 0) * Alt(Lepton_promptgenmatched, 2, 0)', + 'samples': mc +} + +# Jet bins +# using Alt(CleanJet_pt, n, 0) instead of Sum(CleanJet_pt >= 30) because jet pt ordering is not strictly followed in JES-varied samples + +# No jet with pt > 30 GeV +aliases['zeroJet'] = { + 'expr': 'Alt(CleanJet_pt, 0, 0) < 30.' +} + +aliases['oneJet'] = { + 'expr': 'Alt(CleanJet_pt, 0, 0) > 30.' +} + +aliases['multiJet'] = { + 'expr': 'Alt(CleanJet_pt, 1, 0) > 30.' +} + +aliases['noJetInHorn'] = { + 'expr' : 'Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0', +} + +######################################################################## +# B-Tagging WP: https://btv-wiki.docs.cern.ch/ScaleFactors/Run3Summer23/ +######################################################################## + +# Algo / WP / WP cut +btagging_WPs = { + "DeepFlavB" : { + "loose" : "0.0479", + "medium" : "0.2431", + "tight" : "0.6553", + "xtight" : "0.7667", + "xxtight" : "0.9459", + }, + "RobustParTAK4B" : { + "loose" : "0.0358", + "medium" : "0.1917", + "tight" : "0.6172", + "xtight" : "0.7515", + "xxtight" : "0.9659", + }, + "PNetB" : { + "loose" : "0.0681", + "medium" : "0.3487", + "tight" : "0.7969", + "xtight" : "0.8882", + "xxtight" : "0.9883", + } +} + +# Algo / SF name +btagging_SFs = { + "DeepFlavB" : "deepjet", + "RobustParTAK4B" : "partTransformer", + "PNetB" : "deepjet", +} + +# Algorithm and WP selection +bAlgo = 'DeepFlavB' # ['DeepFlavB','RobustParTAK4B','PNetB'] +WP = 'loose' # ['loose','medium','tight','xtight','xxtight'] + +# Access information from dictionaries +bWP = btagging_WPs[bAlgo][WP] +bSF = btagging_SFs[bAlgo] + +# B tagging selections and scale factors +aliases['bVeto'] = { + 'expr': f'Sum(CleanJet_pt > 20. && abs(CleanJet_eta) < 2.5 && Take(Jet_btag{bAlgo}, CleanJet_jetIdx) > {bWP}) == 0' +} + +aliases['bVetoSF'] = { + 'expr': 'TMath::Exp(Sum(LogVec((CleanJet_pt>20 && abs(CleanJet_eta)<2.5)*Take(Jet_btagSF_{}_shape, CleanJet_jetIdx)+1*(CleanJet_pt<20 || abs(CleanJet_eta)>2.5))))'.format(bSF), + 'samples': mc +} + +aliases['bReq'] = { + 'expr': f'Sum(CleanJet_pt > 30. && abs(CleanJet_eta) < 2.5 && Take(Jet_btag{bAlgo}, CleanJet_jetIdx) > {bWP}) >= 1' +} + +aliases['bReqSF'] = { + 'expr': 'TMath::Exp(Sum(LogVec((CleanJet_pt>30 && abs(CleanJet_eta)<2.5)*Take(Jet_btagSF_{}_shape, CleanJet_jetIdx)+1*(CleanJet_pt<30 || abs(CleanJet_eta)>2.5))))'.format(bSF), + 'samples': mc +} + +# Top control region +aliases['topcr'] = { + 'expr': 'mtw2>30 && mll>50 && ((zeroJet && !bVeto) || bReq)' +} + +# WW control region +aliases['wwcr'] = { + 'expr': 'mth>60 && mtw2>30 && mll>100 && bVeto' +} + +# Overall b tag SF +aliases['btagSF'] = { + 'expr': '(bVeto || (topcr && zeroJet))*bVetoSF + (topcr && !zeroJet)*bReqSF', + 'samples': mc +} + +# Systematic uncertainty variations +for shift in ['jes','lf','hf','lfstats1','lfstats2','hfstats1','hfstats2','cferr1','cferr2']: + + for targ in ['bVeto', 'bReq']: + alias = aliases['%sSF%sup' % (targ, shift)] = copy.deepcopy(aliases['%sSF' % targ]) + alias['expr'] = alias['expr'].replace('btagSF_deepjet_shape', 'btagSF_deepjet_shape_up_%s' % shift) + + alias = aliases['%sSF%sdown' % (targ, shift)] = copy.deepcopy(aliases['%sSF' % targ]) + alias['expr'] = alias['expr'].replace('btagSF_deepjet_shape', 'btagSF_deepjet_shape_down_%s' % shift) + + aliases['btagSF%sup' % shift] = { + 'expr': aliases['btagSF']['expr'].replace('SF', 'SF' + shift + 'up'), + 'samples': mc + } + + aliases['btagSF%sdown' % shift] = { + 'expr': aliases['btagSF']['expr'].replace('SF', 'SF' + shift + 'down'), + 'samples': mc + } + +########################################################################## +# End of b tagging +########################################################################## + +# Data/MC scale factors and systematic uncertainties +aliases['SFweight'] = { + 'expr': ' * '.join(['SFweight3l', 'LepWPCut', 'LepWPSF','btagSF']), + 'samples': mc +} + +aliases['SFweightEleUp'] = { + 'expr': 'LepSF3l__ele_'+eleWP+'__Up', + 'samples': mc +} +aliases['SFweightEleDown'] = { + 'expr': 'LepSF3l__ele_'+eleWP+'__Down', + 'samples': mc +} +aliases['SFweightMuUp'] = { + 'expr': 'LepSF3l__mu_'+muWP+'__Up', + 'samples': mc +} +aliases['SFweightMuDown'] = { + 'expr': 'LepSF3l__mu_'+muWP+'__Down', + 'samples': mc +} + +# # WH3l_mOSll for data +# aliases['WH3l_mOSll'] = { +# 'linesToAdd' : [f'#include "{configurations}/ControlRegions/WZ/macros/mOS_ll.cc"'], +# 'class' : 'mOS_ll', +# 'args' : 'nLepton,Lepton_pt,Lepton_eta,Lepton_phi,Lepton_pdgId', +# 'samples' : ['DATA'], +# } + +# # WH3l_mlll for data +# aliases['WH3l_mlll'] = { +# 'linesToAdd' : [f'#include "{configurations}/ControlRegions/WZ/macros/m_lll.cc"'], +# 'class' : 'm_lll', +# 'args' : 'nLepton,Lepton_pt,Lepton_eta,Lepton_phi,Lepton_pdgId', +# 'samples' : ['DATA'], +# } + +# # WH3l_ZVeto for data +# aliases['WH3l_ZVeto'] = { +# 'linesToAdd' : [f'#include "{configurations}/ControlRegions/WZ/macros/z_veto.cc"'], +# 'class' : 'z_veto', +# 'args' : 'nLepton,Lepton_pt,Lepton_eta,Lepton_phi,Lepton_pdgId', +# 'samples' : ['DATA'], +# } + +# # WH3l_flagOSSF for data +# aliases['WH3l_flagOSSF'] = { +# 'linesToAdd' : [f'#include "{configurations}/ControlRegions/WZ/macros/flag_ossf.cc"'], +# 'class' : 'flag_ossf', +# 'args' : 'nLepton,Lepton_pt,Lepton_eta,Lepton_phi,Lepton_pdgId', +# 'samples' : ['DATA'], +# } + +# # WH3l_flagOSSF for data +# aliases['WH3l_chlll'] = { +# 'linesToAdd' : [f'#include "{configurations}/ControlRegions/WZ/macros/ch_lll.cc"'], +# 'class' : 'ch_lll', +# 'args' : 'nLepton,Lepton_pt,Lepton_eta,Lepton_phi,Lepton_pdgId', +# 'samples' : ['DATA'], +# } diff --git a/ControlRegions/WZ/2023_v12/configuration.py b/ControlRegions/WZ/2023_v12/configuration.py new file mode 100644 index 00000000..788b8cb5 --- /dev/null +++ b/ControlRegions/WZ/2023_v12/configuration.py @@ -0,0 +1,94 @@ +import sys,os + +# tag used to identify the configuration folder version +tag = "WZ2023_v12" + +# file to use as runner script, default uses mkShapesRDF.shapeAnalysis.runner, otherwise specify path to script +runnerFile = "default" + +# output file name +outputFile = "mkShapes__{}.root".format(tag) + +# path to ouput folder +outputFolder = "/eos/user/" + os.getlogin()[0] + "/" + os.getlogin() + "/mkShapesRDF_rootfiles/" + tag + "/rootFile/" + +# path to batch folder (used for condor submission) +batchFolder = "condor" + +# path to configuration folder (will contain all the compiled configuration files) +configsFolder = "configs" + +# luminosity to normalize to (in 1/fb) +# https://github.com/latinos/mkShapesRDF/blob/master/mkShapesRDF/processor/data/TrigMaker_cfg.py#L1151 +lumi = 17.794 + +# file with dict of aliases to define +aliasesFile = "aliases.py" + +# file with dict of variables +variablesFile = "variables.py" + +# file with dict of cuts +cutsFile = "cuts.py" + +# file with dict of samples +samplesFile = "samples.py" + +# file with dict of samples +plotFile = "plot.py" + +# file with dict of structure (used to define combine processes) +structureFile = "structure.py" + +# nuisances file for mkDatacards and for mkShape +nuisancesFile = "nuisances.py" + +# minRatio = 0.5 +# maxRatio = 1.5 + +# path to folder where to save plots +plotPath = "plots_" + tag + +# this lines are executed right before the runner on the condor node +mountEOS = [ + # "export KRB5CCNAME=/home/gpizzati/krb5\n", +] + +# list of imports to import when compiling the whole configuration folder, it should not contain imports used by configuration.py +imports = ["os", "glob", ("collections", "OrderedDict"), "ROOT"] + +# list of files to compile +filesToExec = [ + samplesFile, + aliasesFile, + variablesFile, + cutsFile, + plotFile, + nuisancesFile, + structureFile, +] + +# list of variables to keep in the compiled configuration folder +varsToKeep = [ + "batchVars", + "outputFolder", + "batchFolder", + "configsFolder", + "outputFile", + "runnerFile", + "tag", + "samples", + "aliases", + "variables", + ("cuts", {"cuts": "cuts", "preselections": "preselections"}), + ("plot", {"plot": "plot", "groupPlot": "groupPlot", "legend": "legend"}), + "nuisances", + "structure", + "lumi", +] + +# list of variables to keep in the batch submission script (script.py) +batchVars = varsToKeep[varsToKeep.index("samples") :] + + +varsToKeep += ['plotPath'] diff --git a/ControlRegions/WZ/2023_v12/cuts.py b/ControlRegions/WZ/2023_v12/cuts.py new file mode 100644 index 00000000..e271e33a --- /dev/null +++ b/ControlRegions/WZ/2023_v12/cuts.py @@ -0,0 +1,28 @@ +cuts = {} + +# Preselections - applied to all the cuts +preselections = 'Alt(Lepton_pt,0,0)>25 \ + && Alt(Lepton_pt,1,0)>10 \ + && (abs(Lepton_pdgId[1])==13 || Lepton_pt[1]>13) \ + && Alt(Lepton_pt,2,0)>10 \ + && (nLepton>=3 && Alt(Lepton_pt,3,0)<10) \ + && (WH3l_mOSll[0] < 0 || WH3l_mOSll[0] > 12) \ + && (WH3l_mOSll[1] < 0 || WH3l_mOSll[1] > 12) \ + && (WH3l_mOSll[2] < 0 || WH3l_mOSll[2] > 12) \ + && abs(WH3l_chlll) == 1 \ + && bVeto \ + && noJetInHorn \ +' + +# Inclusive +cuts['wh3l_wz_13TeV_incl'] = 'WH3l_flagOSSF == 1 && PuppiMET_pt > 45 && WH3l_ZVeto < 20 && WH3l_mlll > 100' + +# Jet bins +cuts['wh3l_wz_13TeV'] = { + 'expr' : 'WH3l_flagOSSF == 1 && PuppiMET_pt > 45 && WH3l_ZVeto < 20 && WH3l_mlll > 100', + 'categories' : { + '0j' : 'zeroJet', + '1j' : 'oneJet && Alt(CleanJet_pt,1,0)<30', + '2j' : 'multiJet', + } +} diff --git a/ControlRegions/WZ/2023_v12/nuisances.py b/ControlRegions/WZ/2023_v12/nuisances.py new file mode 100644 index 00000000..bd87d720 --- /dev/null +++ b/ControlRegions/WZ/2023_v12/nuisances.py @@ -0,0 +1,52 @@ +print(treeBaseDir) +def makeMCDirectory(var=''): + _treeBaseDir = treeBaseDir + '' + if useXROOTD: + _treeBaseDir = redirector + treeBaseDir + if var== '': + return '/'.join([_treeBaseDir, mcProduction, mcSteps]) + else: + return '/'.join([_treeBaseDir, mcProduction, mcSteps + '__' + var]) + + +# merge cuts +_mergedCuts = [] +for cut in list(cuts.keys()): + __cutExpr = '' + if type(cuts[cut]) == dict: + __cutExpr = cuts[cut]['expr'] + for cat in list(cuts[cut]['categories'].keys()): + _mergedCuts.append(cut + '_' + cat) + elif type(cuts[cut]) == str: + _mergedCuts.append(cut) + + +# Dfinitions of groups of samples +mc = [skey for skey in samples if skey not in ('DATA')] + +nuisances = {} + + +################################ EXPERIMENTAL UNCERTAINTIES ################################# + +#### Luminosity + +# https://twiki.cern.ch/twiki/bin/view/CMS/LumiRecommendationsRun3 +nuisances['lumi_2023'] = { + 'name' : 'lumi_2023', + 'type' : 'lnN', + 'samples' : dict((skey, '1.013') for skey in mc) +} + +### MC statistical uncertainty +autoStats = True +if autoStats: + ## Use the following if you want to apply the automatic combine MC stat nuisances. + nuisances['stat'] = { + 'type': 'auto', + 'maxPoiss': '10', + 'includeSignal': '0', + # nuisance ['maxPoiss'] = Number of threshold events for Poisson modelling + # nuisance ['includeSignal'] = Include MC stat nuisances on signal processes (1=True, 0=False) + 'samples': {} + } diff --git a/ControlRegions/WZ/2023_v12/plot.py b/ControlRegions/WZ/2023_v12/plot.py new file mode 100644 index 00000000..5490a0d9 --- /dev/null +++ b/ControlRegions/WZ/2023_v12/plot.py @@ -0,0 +1,79 @@ +# Group of plots + +groupPlot = {} + +groupPlot['DY'] = { + 'nameHR' : 'DY', + 'isSignal' : 0, + 'color' : 418, # kGreen + 2 + 'samples' : ['DY'] +} + +groupPlot['Top'] = { + 'nameHR' : 't#bar{t}', + 'isSignal' : 0, + 'color' : 400, # kYellow + 'samples' : ['Top'] +} + +groupPlot['WW'] = { + 'nameHR' : 'WW', + 'isSignal' : 0, + 'color' : 851, # kAzure -9 + 'samples' : ['WW'] +} + +groupPlot['WZ'] = { + 'nameHR' : 'WZ', + 'isSignal' : 0, + 'color' : 619, # kViolet + 1 + 'samples' : ['WZ'] +} + +# Plots + +plot = {} + +plot['DY'] = { + 'color' : 418, # kGreen + 2 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +plot['Top'] = { + 'color' : 400, # kKYellow + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +plot['WW'] = { + 'color' : 851, # kKYellow + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +plot['WZ'] = { + 'color' : 619, # kKYellow + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +# Data + +plot['DATA'] = { + 'nameHR' : 'Data', + 'color' : 1 , + 'isSignal' : 0, + 'isData' : 1 , + 'isBlind' : 0 +} + + +# Legend definition +legend = {} +legend['lumi'] = 'L = 17.8 fb^{-1}' +legend['sqrt'] = '#sqrt{s} = 13.6 TeV' diff --git a/ControlRegions/WZ/2023_v12/samples.py b/ControlRegions/WZ/2023_v12/samples.py new file mode 100644 index 00000000..421dfe11 --- /dev/null +++ b/ControlRegions/WZ/2023_v12/samples.py @@ -0,0 +1,201 @@ +from mkShapesRDF.lib.search_files import SearchFiles + +searchFiles = SearchFiles() + +redirector = "" +useXROOTD = False + +# MC: /eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/Summer23_130x_nAODv12_Full2023v12/MCl2loose2023v12__MCCorr2023v12JetScaling__l2tight +# DATA: /eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/Run2023_Prompt_nAODv12_Full2023v12/DATAl2loose2023v12__l2tight + +mcProduction = 'Summer23_130x_nAODv12_Full2023v12' +mcSteps = 'MCl2loose2023v12__MCCorr2023v12JetScaling__l2tight' +dataReco = 'Run2023_Prompt_nAODv12_Full2023v12' +dataSteps = 'DATAl2loose2023v12__l2tight' +# fakeSteps = 'DATAl1loose2022EFGv12__fakeW' + +############################################## +###### Tree base directory for the site ###### +############################################## +treeBaseDir = '/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano' +limitFiles = -1 + +def makeMCDirectory(var=""): + _treeBaseDir = treeBaseDir + "" + if redirector != "": + _treeBaseDir = redirector + treeBaseDir + if var == "": + return "/".join([_treeBaseDir, mcProduction, mcSteps]) + else: + return "/".join([_treeBaseDir, mcProduction, mcSteps + "__" + var]) + + +mcDirectory = makeMCDirectory() +# fakeDirectory = os.path.join(treeBaseDir, dataReco, fakeSteps) +dataDirectory = os.path.join(treeBaseDir, dataReco, dataSteps) + +samples = {} + + +def nanoGetSampleFiles(path, name): + _files = searchFiles.searchFiles(path, name, redirector=redirector) + if limitFiles != -1 and len(_files) > limitFiles: + return [(name, _files[:limitFiles])] + else: + return [(name, _files)] + + +def CombineBaseW(samples, proc, samplelist): + _filtFiles = list(filter(lambda k: k[0] in samplelist, samples[proc]["name"])) + _files = list(map(lambda k: k[1], _filtFiles)) + _l = list(map(lambda k: len(k), _files)) + leastFiles = _files[_l.index(min(_l))] + dfSmall = ROOT.RDataFrame("Runs", leastFiles) + s = dfSmall.Sum("genEventSumw").GetValue() + f = ROOT.TFile(leastFiles[0]) + t = f.Get("Events") + t.GetEntry(1) + xs = t.baseW * s + + __files = [] + for f in _files: + __files += f + df = ROOT.RDataFrame("Runs", __files) + s = df.Sum("genEventSumw").GetValue() + newbaseW = str(xs / s) + weight = newbaseW + "/baseW" + + for iSample in samplelist: + addSampleWeight(samples, proc, iSample, weight) + + +def addSampleWeight(samples, sampleName, sampleNameType, weight): + obj = list(filter(lambda k: k[0] == sampleNameType, samples[sampleName]["name"]))[0] + samples[sampleName]["name"] = list( + filter(lambda k: k[0] != sampleNameType, samples[sampleName]["name"]) + ) + if len(obj) > 2: + samples[sampleName]["name"].append( + (obj[0], obj[1], obj[2] + "*(" + weight + ")") + ) + else: + samples[sampleName]["name"].append((obj[0], obj[1], "(" + weight + ")")) + + +################################################ +############ DATA DECLARATION ################## +################################################ + +DataRun = [ + ['Cv1','Run2023C-Prompt-v1'], + ['Cv2','Run2023C-Prompt-v2'], + ['Cv3','Run2023C-Prompt-v3'], + ['Cv4','Run2023C-Prompt-v4'], +] + + +DataSets = ['MuonEG','Muon0','Muon1','EGamma0','EGamma1'] + +DataTrig = { + 'MuonEG' : 'Trigger_ElMu' , + #'SingleMuon' : '!Trigger_ElMu && Trigger_sngMu' , + 'Muon0' : '!Trigger_ElMu && (Trigger_sngMu || Trigger_dblMu)', + 'Muon1' : '!Trigger_ElMu && (Trigger_sngMu || Trigger_dblMu)', + 'EGamma0' : '!Trigger_ElMu && !Trigger_sngMu && !Trigger_dblMu && (Trigger_sngEl || Trigger_dblEl)', + 'EGamma1' : '!Trigger_ElMu && !Trigger_sngMu && !Trigger_dblMu && (Trigger_sngEl || Trigger_dblEl)' +} + + + +#DataRun = [ +# ['C','Run2023C-Prompt-v1'], +# ['C','Run2023C-Prompt-v2'], +# ['C','Run2023C-Prompt-v3'], +# ['C','Run2023C-Prompt-v4'], +#] +# +#DataSets = ['MuonEG','Muon0','EGamma0'] +#DataSets = ['MuonEG','SingleMuon','Muon','EGamma'] +# +#DataTrig = { +# 'MuonEG' : ' Trigger_ElMu' , +# 'SingleMuon' : '!Trigger_ElMu && Trigger_sngMu' , +# 'Muon' : '!Trigger_ElMu && (Trigger_sngMu || Trigger_dblMu)', +# 'EGamma' : '!Trigger_ElMu && !Trigger_sngMu && !Trigger_dblMu && (Trigger_sngEl || Trigger_dblEl)' +#} + + +######################################### +############ MC COMMON ################## +######################################### + +# SFweight does not include btag weights +mcCommonWeightNoMatch = 'XSWeight*METFilter_Common*SFweight' +mcCommonWeight = 'XSWeight*METFilter_Common*PromptGenLepMatch3l*SFweight' + +#mcCommonWeight = 'XSWeight*METFilter_Common*SFweight' + +########################################### +############# BACKGROUNDS ############### +########################################### + +# DY +files = nanoGetSampleFiles(mcDirectory, 'DYto2L-2Jets_MLL-50') + +samples['DY'] = { + 'name': files, + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + +# Top +files = nanoGetSampleFiles(mcDirectory, 'TTTo2L2Nu') + +samples['Top'] = { + 'name': files, + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + +# WW +files = nanoGetSampleFiles(mcDirectory, 'WWTo2L2Nu') + +samples['WW'] = { + 'name': files, + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + +# WZ +files = nanoGetSampleFiles(mcDirectory, 'WZTo3LNu') + +samples['WZ'] = { + 'name': files, + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + +########################################### +################## DATA ################### +########################################### + +samples['DATA'] = { + 'name': [], + 'weight': 'LepWPCut*METFilter_DATA', + 'weights': [], + 'isData': ['all'], + 'FilesPerJob': 15 +} + +for _, sd in DataRun: + for pd in DataSets: + datatag = pd + '_' + sd + + if (pd == "SingleMuon" and _ in ["D"]) or (pd == "Muon" and _ == "B"): + continue + files = nanoGetSampleFiles(dataDirectory, datatag) + + print(datatag) + + samples['DATA']['name'].extend(files) + addSampleWeight(samples, 'DATA', datatag, DataTrig[pd]) diff --git a/ControlRegions/WZ/2023_v12/structure.py b/ControlRegions/WZ/2023_v12/structure.py new file mode 100644 index 00000000..ae0c2ae8 --- /dev/null +++ b/ControlRegions/WZ/2023_v12/structure.py @@ -0,0 +1,33 @@ +# structure configuration for datacard + +structure = {} + +# keys here must match keys in samples.py +# + +structure['DY'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +structure['Top'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +structure['WW'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +structure['WZ'] = { + 'isSignal' : 0, + 'isData' : 0 +} + + +# data +structure['DATA'] = { + 'isSignal' : 0, + 'isData' : 1 +} diff --git a/ControlRegions/WZ/2023_v12/variables.py b/ControlRegions/WZ/2023_v12/variables.py new file mode 100644 index 00000000..bde2e953 --- /dev/null +++ b/ControlRegions/WZ/2023_v12/variables.py @@ -0,0 +1,158 @@ +# variables +variables = {} + +variables['events'] = { + 'name' : '1', + 'range' : (1,0,2), + 'xaxis' : 'events', + 'fold' : 3 +} + +variables['nvtx'] = { + 'name' : 'PV_npvsGood', + 'range' : (100, 0, 100), + 'xaxis' : 'number of vertices', + 'fold' : 3 +} + +variables['mll'] = { + 'name': 'mll', + 'range' : (60,60,120), + 'xaxis' : 'm_{ll} [GeV]', + 'fold' : 0 +} + +variables['mlll'] = { + 'name': 'WH3l_mlll', + 'range' : (50,0,200), + 'xaxis' : 'm_{lll} [GeV]', + 'fold' : 0 +} + +variables['ptll'] = { + 'name': 'ptll', + 'range' : (20, 0,200), + 'xaxis' : 'p_{T}^{ll} [GeV]', + 'fold' : 0 +} + +variables['drll'] = { + 'name': 'drll', + 'range' : (50, 0,5), + 'xaxis' : '#Delta R_{ll}', + 'fold' : 0 +} + +variables['dphill'] = { + 'name': 'dphill', + 'range' : (50, 0,5), + 'xaxis' : '#Delta #phi_{ll}', + 'fold' : 0 +} + +variables['ptll_more'] = { + 'name': 'ptll', + 'range' : (50, 0,100), + 'xaxis' : 'p_{T}^{ll} [GeV]', + 'fold' : 0 +} + +variables['pt1'] = { + 'name': 'Lepton_pt[0]', + 'range' : (20,0,200), + 'xaxis' : 'p_{T} 1st lep', + 'fold' : 3 +} + +variables['pt2'] = { + 'name': 'Lepton_pt[1]', + 'range' : (20,0,140), + 'xaxis' : 'p_{T} 2nd lep', + 'fold' : 3 +} + +variables['pt3'] = { + 'name': 'Lepton_pt[1]', + 'range' : (20,0,100), + 'xaxis' : 'p_{T} 3rd lep', + 'fold' : 3 +} + +variables['eta1'] = { + 'name': 'Lepton_eta[0]', + 'range' : (40,-3,3), + 'xaxis' : '#eta 1st lep', + 'fold' : 3 +} + +variables['eta2'] = { + 'name': 'Lepton_eta[1]', + 'range' : (40,-3,3), + 'xaxis' : '#eta 2nd lep', + 'fold' : 3 +} + +variables['eta3'] = { + 'name': 'Lepton_eta[2]', + 'range' : (40,-3,3), + 'xaxis' : '#eta 3rd lep', + 'fold' : 3 +} + +variables['puppimet'] = { + 'name': 'PuppiMET_pt', + 'range' : (20,0,200), + 'xaxis' : 'Puppi MET p_{T} [GeV]', + 'fold' : 3 +} + +############# New Jet processing + +variables['njet'] = { + 'name': 'Sum(CleanJet_pt>30)', + 'range' : (5,0,5), + 'xaxis' : 'Number of jets', + 'fold' : 2 +} + +variables['jetpt1'] = { + 'name': 'Alt(CleanJet_pt, 0, -99) - 9999.9*(CleanJet_pt[0]<30)', + 'range' : (40,0,200), + 'xaxis' : 'p_{T} 1st jet', + 'fold' : 0 +} + +variables['jetpt2'] = { + 'name': 'Alt(CleanJet_pt, 1, -99) - 9999.9*(CleanJet_pt[1]<30)', + 'range' : (40,0,200), + 'xaxis' : 'p_{T} 2nd jet', + 'fold' : 0 +} + +variables['jeteta1'] = { + 'name': 'Alt(CleanJet_eta, 0, -99) - 9999.9*(CleanJet_pt[0]<30)', + 'range' : (30,-4.7,4.7), + 'xaxis' : '#eta 1st jet', + 'fold' : 0 +} + +variables['jeteta1_fine_binning'] = { + 'name': 'Alt(CleanJet_eta, 0, -99) - 9999.9*(CleanJet_pt[0]<30)', + 'range' : (94,-4.7,4.7), + 'xaxis' : '#eta 1st jet', + 'fold' : 0 +} + +variables['jeteta2'] = { + 'name': 'Alt(CleanJet_eta, 1, -99) - 9999.9*(CleanJet_pt[1]<30)', + 'range' : (30,-4.7,4.7), + 'xaxis' : '#eta 2nd jet', + 'fold' : 0 +} + +variables['jeteta2_fine_binning'] = { + 'name': 'Alt(CleanJet_eta, 1, -99) - 9999.9*(CleanJet_pt[1]<30)', + 'range' : (94,-4.7,4.7), + 'xaxis' : '#eta 2nd jet', + 'fold' : 0 +} diff --git a/ZH3l_BDT/readme.md b/ZH3l_BDT/readme.md new file mode 100644 index 00000000..cd139cb5 --- /dev/null +++ b/ZH3l_BDT/readme.md @@ -0,0 +1,10 @@ +Building towards a set of codes for the ZH3l analysis using the PlotsConfigurationsRun3/RDF framework. Among the many changes we desire to implement, the following are the ones we're starting out with: +1. Make the Run2 codes of ZH3l analysis compatible with the Run3 framework (Relevant folder: zh3l_run2_rdf). +2. Train a sample BDT in Run3 framework using Run2 data, weights, and scale-factors (Relevant folder: zh3l_run2_bdt_rdf). +3. Setup a full working pipeline enabling a classification neural network in PlotsConfigurationsRun3 / mkShapesRDF for the ZH3l analysis. + +Notes related to point 1 of the above list: +Most of the changes include switching to RDF compliant expressions, commands and functions. +Useful resource related to Run2->Run3 translation: https://indico.cern.ch/event/1201818/contributions/5180441/attachments/2567971/4427668/mkShapesRDF.pdf +Converting some functions in Run3 framework is pending and this has been put off for later. This means some nuisances (QCDscale and pdf) and base weights (for DY and ZZ samples) are not accurate. +10 out of 271 sample files were not processed owing to some segmentation faults (probably from corrupt/incorrect MC files). \ No newline at end of file diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/README.md b/ZH3l_BDT/zh3l_run2_bdt_rdf/README.md new file mode 100644 index 00000000..08d946c5 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/README.md @@ -0,0 +1,13 @@ +# Configuration for ZH3l BDT training + +The configurations and files to train BDT(s) for the ZH3l analysis are located in './bdt_train/'. The instructions to run the trainings follow. + python Classification_BDT.py +Currently no parameter/hyper-parameter optimization has been done. The model architecture and input parameters are also not designed/optimized for best performance. The codes simply establish the pipeline required to train and evaluate a BDT in the mkShapesRDF/PlotsConfigurationsRun3 framework. + +### Plot training results + +TBA + +# Evaluating trained BDT + +Work in progress. The configurations and files to run the analysis using the trained BDT are stored in './'. \ No newline at end of file diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/aliases.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/aliases.py new file mode 100644 index 00000000..0aeb3148 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/aliases.py @@ -0,0 +1,256 @@ +#Aliases (mostly btag) +import os +import copy +import inspect + +mc = [skey for skey in samples if skey not in ('Fake', 'DATA')] + +#2018 +#bWP = '0.1241' #Loose +bWP = '0.4184' +# DS - ideally from https://btv-wiki.docs.cern.ch/ScaleFactors/Run2UL2018/#general-remarks, but seems like this is calculated from another dataset: https://indico.cern.ch/event/826738/contributions/3464989/attachments/1860911/3058344/BTag_190612_PtRel.pdf. +configurations = os.path.realpath(inspect.getfile(inspect.currentframe())) # this file +configurations = os.path.dirname(configurations) # 2022_v12 +configurations = os.path.dirname(configurations) # WZ +configurations = os.path.dirname(configurations) # ControlRegions +configurations = os.path.dirname(configurations) # PlotsConfigurationsRun3 + +aliases = {} +aliases = OrderedDict() + +aliases['bVeto'] = { + 'expr': '(Sum((CleanJet_pt > 20.0 && abs(CleanJet_eta) < 2.5) * Take(Jet_btagDeepB, CleanJet_jetIdx) > ' + bWP + ') == 0)' + +} + +aliases['btagSF'] = { +'expr': 'TMath::Exp(Sum(LogVec((CleanJet_pt>20 && abs(CleanJet_eta)<2.5)*Take(Jet_btagSF_deepcsv_shape, CleanJet_jetIdx)+1*(CleanJet_pt<20 || abs(CleanJet_eta)>2.5))))', +'samples': mc +} + +systs = ['jes','lf','hf','lfstats1','lfstats2','hfstats1','hfstats2','cferr1','cferr2'] +# DS -(?)- jes - Jet Energy Scale uncertainty, lf/hf - light/heavy flavor, stats - statistical uncertainty, cferr - charm flavour + +for s in systs: + aliases['btagSF'+s+'up'] = { + 'expr': aliases['btagSF']['expr'].replace('shape','shape_up_'+s), + 'samples':mc + } + aliases['btagSF'+s+'down'] = { + 'expr': aliases['btagSF']['expr'].replace('shape','shape_down_'+s), + 'samples':mc + } + +aliases['Jet_PUIDSF'] = { + 'expr': 'TMath::Exp(Sum((Jet_jetId >= 2) * LogVec(Jet_PUIDSF_loose)))', + 'samples': mc +} + +aliases['Jet_PUIDSF_up'] = { + 'expr': 'TMath::Exp(Sum((Jet_jetId >= 2) * LogVec(Jet_PUIDSF_loose_up)))', + 'samples': mc +} + +aliases['Jet_PUIDSF_down'] = { + 'expr': 'TMath::Exp(Sum((Jet_jetId >= 2) * LogVec(Jet_PUIDSF_loose_down)))', + 'samples': mc +} + +aliases['PromptGenLepMatch3l'] = { + 'expr': 'Alt(Lepton_promptgenmatched, 0, 0) * Alt(Lepton_promptgenmatched, 1, 0) * Alt(Lepton_promptgenmatched, 2, 0)', + 'samples': mc +} + +#aliases['LepWPCutNew'] = { 'expr': '(((abs(Lepton_pdgId[0])==13 && Muon_mvaTTH[Lepton_muonIdx[0]]>0.8) || (abs(Lepton_pdgId[0])==11 && Electron_mvaTTH[Lepton_electronIdx[0]]>0.7)) && ((abs(Lepton_pdgId[1])==13 && Muon_mvaTTH[Lepton_muonIdx[1]]>0.8) || (abs(Lepton_pdgId[1])==11 && Electron_mvaTTH[Lepton_electronIdx[1]]>0.7)) && ((abs(Lepton_pdgId[2])==13 && Muon_mvaTTH[Lepton_muonIdx[2]]>0.8) || (abs(Lepton_pdgId[2])==11 && Electron_mvaTTH[Lepton_electronIdx[2]]>0.7)))', + # 'samples': mc + ['DATA'] +#} + +aliases['Top_pTrw'] = { + 'expr': '(topGenPt * antitopGenPt > 0.) * (TMath::Sqrt(TMath::Exp(0.0615 - 0.0005 * topGenPt) * TMath::Exp(0.0615 - 0.0005 * antitopGenPt))) + (topGenPt * antitopGenPt <= 0.)', + 'samples': ['top'] +} + +#aliases['ZH3l_dphilmetjj_test'] = { +# 'linesToAdd': [ +# '.L %s/src/PlotsConfigurations/Configurations/ZH3l/scripts/ZH3l_patch.cc+' % os.getenv('CMSSW_BASE') +# ], +# 'class': 'ZH3l_patch', +# 'args': ("dphilmetjj") +#} + +#aliases['ZH3l_dphilmetj_test'] = { +# 'class': 'ZH3l_patch', +# 'args': ("dphilmetj") +#} + +#aliases['ZH3l_mTlmet_test'] = { +# 'class': 'ZH3l_patch', +# 'args': ("mTlmet") +#} + +#aliases['ZH3l_mTlmetj_test'] = { +# 'class': 'ZH3l_patch', +# 'args': ("mTlmetj") +#} + +#aliases['ZH3l_mTlmetjj_test'] = { +# 'class': 'ZH3l_patch', +# 'args': ("mTlmetjj") +#} + +####################### +### SFs for tthMVA ### +####################### + +aliases['SFweightEleUp'] = { + 'expr': 'LepSF3l__ele_'+eleWP_new+'__Up', + 'samples': mc +} + +aliases['SFweightEleDown'] = { + 'expr': 'LepSF3l__ele_'+eleWP_new+'__Do', + 'samples': mc +} + +aliases['SFweightMuUp'] = { + 'expr': 'LepSF3l__mu_'+muWP_new+'__Up', + 'samples': mc +} + +aliases['SFweightMuDown'] = { + 'expr': 'LepSF3l__mu_'+muWP_new+'__Do', + 'samples': mc +} + + +#aliases['ttHMVA_SF_3l'] = { +# 'linesToAdd': ['.L %s/src/PlotsConfigurations/Configurations/patches/compute_SF_BETA.C+' % os.getenv('CMSSW_BASE')], +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'total_SF'), +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Up_0'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_up', 0), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Up_1'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_up', 1), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Up_2'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_up', 2), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Down_0'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_down', 0), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Down_1'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_down', 1), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Down_2'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_down', 2), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_3l_ele_SF_Up'] = { +# 'expr' : '(ttHMVA_SF_Up_0[0]*(abs(Lepton_pdgId[0]) == 11) + (abs(Lepton_pdgId[0]) == 13)) *\ +# (ttHMVA_SF_Up_1[0]*(abs(Lepton_pdgId[1]) == 11) + (abs(Lepton_pdgId[1]) == 13)) *\ +# (ttHMVA_SF_Up_2[0]*(abs(Lepton_pdgId[2]) == 11) + (abs(Lepton_pdgId[2]) == 13))', +# 'nominalOnly' : True, +# 'samples' : mc +#} + +#aliases['ttHMVA_3l_ele_SF_Down'] = { +# 'expr' : '(ttHMVA_SF_Down_0[0]*(abs(Lepton_pdgId[0]) == 11) + (abs(Lepton_pdgId[0]) == 13)) *\ +# (ttHMVA_SF_Down_1[0]*(abs(Lepton_pdgId[1]) == 11) + (abs(Lepton_pdgId[1]) == 13)) *\ +# (ttHMVA_SF_Down_2[0]*(abs(Lepton_pdgId[2]) == 11) + (abs(Lepton_pdgId[2]) == 13))', +# 'nominalOnly' : True, +# 'samples' : mc +#} + +#aliases['ttHMVA_3l_mu_SF_Up'] = { +# 'expr' : '(ttHMVA_SF_Up_0[0]*(abs(Lepton_pdgId[0]) == 13) + (abs(Lepton_pdgId[0]) == 11)) *\ +# (ttHMVA_SF_Up_1[0]*(abs(Lepton_pdgId[1]) == 13) + (abs(Lepton_pdgId[1]) == 11)) *\ +# (ttHMVA_SF_Up_2[0]*(abs(Lepton_pdgId[2]) == 13) + (abs(Lepton_pdgId[2]) == 11))', +# 'nominalOnly' : True, +# 'samples' : mc +#} + +#aliases['ttHMVA_3l_mu_SF_Down'] = { +# 'expr' : '(ttHMVA_SF_Down_0[0]*(abs(Lepton_pdgId[0]) == 13) + (abs(Lepton_pdgId[0]) == 11)) *\ +# (ttHMVA_SF_Down_1[0]*(abs(Lepton_pdgId[1]) == 13) + (abs(Lepton_pdgId[1]) == 11)) *\ +# (ttHMVA_SF_Down_2[0]*(abs(Lepton_pdgId[2]) == 13) + (abs(Lepton_pdgId[2]) == 11))', +# 'nominalOnly' : True, +# 'samples' : mc +#} + +# In WpWmJJ_EWK events, partons [0] and [1] are always the decay products of the first W +aliases['lhe_mW1'] = { + 'expr': 'TMath::Sqrt(2. * Take(LHEPart_pt, 0) * Take(LHEPart_pt, 1) * (TMath::CosH(Take(LHEPart_eta, 0) - Take(LHEPart_eta, 1)) - TMath::Cos(Take(LHEPart_phi, 0) - Take(LHEPart_phi, 1))))', + 'samples': ['WWewk'] +} + +# and [2] [3] are the second W +aliases['lhe_mW2'] = { + 'expr': 'TMath::Sqrt(2. * Take(LHEPart_pt, 2) * Take(LHEPart_pt, 3) * (TMath::CosH(Take(LHEPart_eta, 2) - Take(LHEPart_eta, 3)) - TMath::Cos(Take(LHEPart_phi, 2) - Take(LHEPart_phi, 3))))', + 'samples': ['WWewk'] +} + +aliases['gstarHigh'] = { + 'expr': 'Gen_ZGstar_mass <0 || Gen_ZGstar_mass > 4', + 'samples': ['WZ'] +} + +# aliases['nCleanGenJet'] = { +# 'linesToAdd': ['.L %s/src/PlotsConfigurations/Configurations/Differential/ngenjet.cc+' % os.getenv('CMSSW_BASE') +# ], +# 'class': 'CountGenJet', +# 'samples': mc +# } + +# configurations = os.path.abspath('.') + '/' +# print(configurations) +# print('\n\n\n') + +# aliases['nCleanGenJet'] = { +# 'linesToAdd': ['.L %sngenjet.cc+' % configurations], +# 'class': 'CountGenJet', +# 'args': 'nLeptonGen, LeptonGen_isPrompt,\ +# LeptonGen_pdgId, LeptonGen_pt, LeptonGen_eta, LeptonGen_phi, \ +# LeptonGen_mass, nPhotonGen, PhotonGen_pt, PhotonGen_eta,PhotonGen_phi, \ +# PhotonGen_mass, nGenJet, GenJet_pt, GenJet_eta, GenJet_phi', +# 'samples': mc +# } + +#puidSFSource = '%s/src/LatinoAnalysis/NanoGardener/python/data/JetPUID_effcyandSF.root' % os.getenv('CMSSW_BASE') +#puidSFSource = '%s/src/PlotsConfigurations/Configurations/patches/PUID_80XTraining_EffSFandUncties.root' % os.getenv('CMSSW_BASE') + +#aliases['PUJetIdSF'] = { +# 'linesToAdd': [ +# 'gSystem->AddIncludePath("-I%s/src");' % os.getenv('CMSSW_BASE'), +# '.L %s/src/PlotsConfigurations/Configurations/patches/pujetidsf_event_new.cc+' % os.getenv('CMSSW_BASE') +# ], +# 'class': 'PUJetIdEventSF', +# 'args': (puidSFSource, '2018', 'loose'), +# 'samples': mc +#} diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/Classification_BDT.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/Classification_BDT.py new file mode 100644 index 00000000..bfe2a09e --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/Classification_BDT.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +from ROOT import TMVA, TFile, TTree, TCut, TChain +from subprocess import call +from os.path import isfile + +import sys + +import config_BDT as config + +# Setup TMVA +def runJob(output_and_dataset_name = ""): + TMVA.Tools.Instance() + TMVA.PyMethodBase.PyInitialize() + + output = TFile.Open('TMVA{}.root'.format(output_and_dataset_name), 'RECREATE') + factory = TMVA.Factory('TMVAClassification', output,'!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') + # factory = TMVA.Factory('TMVAClassification', output,'!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification') + + dataloader = TMVA.DataLoader("dataset{}".format(output_and_dataset_name)) + + for br in config.mvaVariables: + dataloader.AddVariable(br) + + for sampleName, sample in config.samples.items(): + isData = config.structure[sampleName]['isData'] + if (isinstance(isData, int) and isData == 1) or (not isinstance(isData, int) and 'all' in isData): + continue + + sample['tree'] = TChain("Events") + print("Sample name: ", sampleName) + for name, *location_weights in sample['name']: + print("Sub-sample: ", name) + locations = location_weights[0] + # weights = location_weights[1] if len(location_weights) > 1 else None + for loc in locations: + print("file: ", loc) + sample['tree'].Add(loc) + + if config.structure[sampleName]['isSignal']==1: + dataloader.AddSignalTree(sample['tree'], 1.0) + else: + dataloader.AddBackgroundTree(sample['tree'], 1.0) + # output_dim += 1 + # Reference: https://root.cern.ch/download/doc/tmva/TMVAUsersGuide.pdf + # Train test dataset will contain less/equal events compared to signal and background trees. How these events are chosen is given by the next line. Event weights are given by Monte Carlo generators, and may turn out to be overall very small or large. To avoid artifacts due to this, TMVA can internally renormalise the signal and background training using NormMode. + dataloader.PrepareTrainingAndTestTree(TCut(config.cut),'SplitMode=Random:NormMode=NumEvents:!V') + # dataloader.PrepareTrainingAndTestTree(TCut(config.cut),'nTrain_Signal=100000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V')#SSSF + + factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" ) + # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4D3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3" ) + # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4D4", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=4" ) + # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4D5", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=5" ) + # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4D6", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=6" ) + # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4C3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2" ) + # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" ) + # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4F07" , "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ) + # factory.BookMethod(dataloader, TMVA.Types.kBDT, "BDTG4SK01F07", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ) + # Run training, test and evaluation + factory.TrainAllMethods() + factory.TestAllMethods() + factory.EvaluateAllMethods() + + output.Close() + +if __name__ == "__main__": + + print("Input arguments: {}".format(sys.argv)) + if len(sys.argv) > 1: + print("Suffix is: {}".format(sys.argv[1])) + output_and_dataset = sys.argv[1] + runJob(output_and_dataset) + os.system("mv dataset dataset{}".format(output_and_dataset)) + else: + print("No suffix, running with standard output name") + runJob() diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/config_BDT.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/config_BDT.py new file mode 100644 index 00000000..bba9cc4c --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/config_BDT.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python + +from __future__ import print_function +import os +from ROOT import gROOT, TFile, TChain, TCut + +# import models +import preselections + +isDEV=False + +# Load configuration +with open("configuration_BDT.py") as handle: + exec(handle.read()) # Read the file content as a string + +samples={} +structure={} +cuts={} +for f in [samplesFile, structureFile, cutsFile]: + with open(f) as handle: + exec(handle.read()) + + +# Reduce sample files for fast dev +if isDEV: + for sampleName, sample in samples.items(): + if sampleName not in ['DY', 'top', 'ttV', 'WW', 'Zg', 'ZgS', 'WZ', 'ZZ', 'VVV', 'ZH_hww','ggZH_hww','WH_hww','ttH_hww', 'ZH_htt', 'WH_htt', 'Fake_e', 'Fake_m']: + # if sampleName not in ['Wg','Zg','WgS','ZgS','ZZ','WZ','top','DY','WH_hww_plus','WH_hww_minus','WH_htt_plus','WH_htt_minus']: + samples.pop(sampleName) + continue + +# Define data to be loaded +with open("./preselections.py") as handle: + exec(handle.read()) + +cut="(({0}) && ({1}))".format(cuts['NONE'],preselections['ALL']) + +# mvaVariables = [ +# 'CleanJet_pt[0][0]', +# 'ZH3l_dphilmetjj', +# 'PuppiMET_pt', +# 'Lepton_pt[0][0]', +# 'Lepton_pt[1][0]', +# 'Lepton_pt[2][0]' +# ] + +mvaVariables = [ + 'Alt$( CleanJet_pt[0], 0)', + 'ZH3l_dphilmetjj', + 'PuppiMET_pt', + 'Alt$( Lepton_pt[0], 0)', + 'Alt$( Lepton_pt[1], 0)', + 'Alt$( Lepton_pt[2], 0)' +] + +# : 1 : WH3l_ZVeto : 8.013e-02 +# : 2 : MinIf$(WH3l_mOSll[],WH3l_mOSll[Iteration$]>0) : 6.387e-02 +# : 3 : MinIf$(WH3l_ptOSll[],WH3l_ptOSll[Iteration$]>0) : 6.168e-02 +# : 4 : WH3l_dphilmet[0] : 5.700e-02 +# : 5 : Alt$(Lepton_pt[0],0) : 5.583e-02 +# : 6 : WH3l_dphilmet[2] : 5.460e-02 +# : 7 : WH3l_ptlll : 5.448e-02 +# : 8 : WH3l_dphilllmet : 5.393e-02 +# : 9 : WH3l_mtWWW : 5.162e-02 +# : 10 : WH3l_mlll : 5.103e-02 +# : 11 : WH3l_ptWWW : 5.083e-02 +# : 12 : MinIf$(WH3l_drOSll[],WH3l_drOSll[Iteration$]>0) : 4.966e-02 +# : 13 : PuppiMET_pt : 4.907e-02 +# : 14 : WH3l_mtlmet[1] : 4.747e-02 +# : 15 : WH3l_dphilmet[1] : 4.527e-02 +# : 16 : Alt$(Lepton_pt[1],0) : 4.456e-02 +# : 17 : WH3l_mtlmet[2] : 4.388e-02 +# : 18 : WH3l_mtlmet[0] : 4.260e-02 +# : 19 : Alt$(Lepton_pt[2],0) : 4.250e-02 diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/configuration_BDT.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/configuration_BDT.py new file mode 100644 index 00000000..7dc6dfd2 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/configuration_BDT.py @@ -0,0 +1,17 @@ +# example of configuration file + +import os + +tagName = '' + +# luminosity to normalize to (in 1/fb) +lumi = 59.83 + +# file with list of cuts +cutsFile = 'cuts_BDT.py' + +# file with list of samples +samplesFile = 'samples_BDT.py' + +# structure file for datacard +structureFile = 'structure.py' diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/cuts_BDT.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/cuts_BDT.py new file mode 100644 index 00000000..7281a38a --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/cuts_BDT.py @@ -0,0 +1,144 @@ +# cuts + +cuts = {} +# 'MinIf$( WH3l_mOSll[], WH3l_mOSll[Iteration$] > 0) > 12 \ +# && Alt$(Lepton_pt[0],0)>25 \ +# && Alt$(Lepton_pt[1],0)>10 \ +# && Alt$(Lepton_pt[2],0)>10 \ +# && (nLepton>=3 && Alt$(Lepton_pt[3],0)<10) \ +# && abs(WH3l_chlll) == 1 \ +# && Alt$(CleanJet_pt[0], 0) < 30 \ +# && WH3l_flagOSSF == 1 \ +# && WH3l_ZVeto > 20 \ +# && PuppiMET_pt > 40 \ +# ' + +cuts['NONE'] = '1' # No cuts for BDT training + +cuts['zmass_cut'] = 'WH3l_ZVeto < 25' + +cuts['jet_cut_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + ' + +cuts['bveto_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + ' + +cuts['z4lveto_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + ' + +cuts['zh3l_SR_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetjj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetjj > 3.14159/2 \ + ' + +cuts['met_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + ' + +cuts['zh3l_SR_2j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetjj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_2j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetjj > 3.14159/2 \ + ' + +cuts['jet_cut_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + ' + +cuts['bveto_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + ' + +cuts['z4lveto_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + ' + +cuts['zh3l_SR_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetj > 3.14159/2 \ + ' + +cuts['met_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + ' + +cuts['zh3l_SR_1j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_1j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetj > 3.14159/2 \ + ' + +''' + #11 = e +# 13 = mu +# 15 = tau +''' diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/preselections.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/preselections.py new file mode 100644 index 00000000..5b66af1a --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/preselections.py @@ -0,0 +1,21 @@ +preselections = {} + +# preselections['ALL'] = 'Lepton_pt[0][0]>25 \ +# && Lepton_pt[1][0]>20 \ +# && Lepton_pt[2][0]>15 \ +# && Lepton_pt[3][0]<10 \ +# && (WH3l_mOSll[0] < 0 || WH3l_mOSll[0] > 12) \ +# && (WH3l_mOSll[1] < 0 || WH3l_mOSll[1] > 12) \ +# && (WH3l_mOSll[2] < 0 || WH3l_mOSll[2] > 12) \ +# && abs(WH3l_chlll) == 1 \ +# ' + +preselections['ALL'] = 'Alt$( Lepton_pt[0], 0) > 25 \ + && Alt$( Lepton_pt[1], 0) > 20 \ + && Alt$( Lepton_pt[2], 0) > 15 \ + && Alt$( Lepton_pt[3], 0) < 10 \ + && (WH3l_mOSll[0] < 0 || WH3l_mOSll[0] > 12) \ + && (WH3l_mOSll[1] < 0 || WH3l_mOSll[1] > 12) \ + && (WH3l_mOSll[2] < 0 || WH3l_mOSll[2] > 12) \ + && abs(WH3l_chlll) == 1 \ + ' \ No newline at end of file diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/samples_BDT.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/samples_BDT.py new file mode 100644 index 00000000..0d7178db --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/samples_BDT.py @@ -0,0 +1,417 @@ +import os +import subprocess + +# global getSampleFiles +# from LatinoAnalysis.Tools.commonTools import getSampleFiles, addSampleWeight, getBaseWnAOD +# It looks like getBaseWnAOD and addSampleWeight from Latinos does the same job as CombineBaseW function from makeShapesRDF. Similarly getSampleFiles function from Latinos is the same as nanoGetSampleFiles from makeShapesRDF. + +from mkShapesRDF.lib.search_files import SearchFiles +searchFiles = SearchFiles() +redirector = "" +limitFiles = 1 +samples = {} + +def nanoGetSampleFiles(path, name): + _files = searchFiles.searchFiles(path, name, redirector=redirector) + if limitFiles != -1 and len(_files) > limitFiles: + return [(name, _files[:limitFiles])] + else: + return [(name, _files)] + +def getSampleFilesNano(inputDir,Sample,absPath=False): + # return getSampleFiles(inputDir,Sample,absPath,'nanoLatino_') + return nanoGetSampleFiles(inputDir, Sample) + +def CombineBaseW(samples, proc, samplelist): + _filtFiles = list(filter(lambda k: k[0] in samplelist, samples[proc]["name"])) + _files = list(map(lambda k: k[1], _filtFiles)) + _l = list(map(lambda k: len(k), _files)) + leastFiles = _files[_l.index(min(_l))] + dfSmall = ROOT.RDataFrame("Runs", leastFiles) + s = dfSmall.Sum("genEventSumw").GetValue() + f = ROOT.TFile(leastFiles[0]) + t = f.Get("Events") + t.GetEntry(1) + xs = t.baseW * s + + __files = [] + for f in _files: + __files += f + df = ROOT.RDataFrame("Runs", __files) + s = df.Sum("genEventSumw").GetValue() + newbaseW = str(xs / s) + return newbaseW # "/baseW is used after getBaseWnAOD/CombineBaseW calls in this code" + # weight = newbaseW + "/baseW" + + # for iSample in samplelist: + # addSampleWeight(samples, proc, iSample, weight) + + +def addSampleWeight(samples, sampleName, sampleNameType, weight): + obj = list(filter(lambda k: k[0] == sampleNameType, samples[sampleName]["name"]))[0] + samples[sampleName]["name"] = list( + filter(lambda k: k[0] != sampleNameType, samples[sampleName]["name"]) + ) + if len(obj) > 2: + samples[sampleName]["name"].append( + (obj[0], obj[1], obj[2] + "*(" + weight + ")") + ) + else: + samples[sampleName]["name"].append((obj[0], obj[1], "(" + weight + ")")) + + +############################################## +###### Tree Directory according to site ###### +############################################## + +SITE=os.uname()[1] +xrootdPath='' +if 'iihe' in SITE : + xrootdPath = 'dcap://maite.iihe.ac.be/' + treeBaseDir = '/pnfs/iihe/cms/store/user/xjanssen/HWW2015/' +elif 'cern' in SITE : + #xrootdPath='root://eoscms.cern.ch/' + treeBaseDir = '/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/' + +directory = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7' + +################################################ +############ NUMBER OF LEPTONS ################# +################################################ + +#Nlep='2' +Nlep='3' +#Nlep='4' + +################################################ +############### Lepton WP ###################### +################################################ + +eleWP='mvaFall17V1Iso_WP90' +#eleWP='mvaFall17V1Iso_WP90_SS' +#eleWP='mvaFall17V2Iso_WP90' +#eleWP='mvaFall17V2Iso_WP90_SS' +muWP ='cut_Tight_HWWW' +eleWP_new = 'mvaFall17V1Iso_WP90_tthmva_70' +muWP_new = 'cut_Tight_HWWW_tthmva_80' + +LepWPCut = 'LepCut'+Nlep+'l__ele_'+eleWP_new+'__mu_'+muWP_new +#LepWPCut = 'LepCut'+Nlep+'l__ele_'+eleWP+'__mu_'+muWP +#LepWPweight = 'ttHMVA_SF_3l[0]' #SF for new WPs, defined in aliases +LepWPweight = 'LepSF'+Nlep+'l__ele_'+eleWP_new+'__mu_'+muWP_new + +################################################ +############ BASIC MC WEIGHTS ################## +################################################ + +XSWeight = 'XSWeight' +SFweight = 'SFweight'+Nlep+'l*'+LepWPweight+'*'+LepWPCut+'*Jet_PUIDSF' +PromptGenLepMatch = 'PromptGenLepMatch'+Nlep+'l' + +################################################ +############## FAKE WEIGHTS #################### +################################################ + +#eleWP_new = 'mvaFall17V1Iso_WP90_tthmva_70' +#muWP_new = 'cut_Tight_HWWW_tthmva_80' + +if Nlep == '2' : + fakeW = 'fakeW2l_ele_'+eleWP_new+'_mu_'+muWP_new + #fakeW = 'fakeW2l_ele_'+eleWP+'_mu_'+muWP +else: + fakeW = 'fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_'+Nlep+'l' + #fakeW = 'fakeW_ele_'+eleWP+'_mu_'+muWP+'_'+Nlep+'l' + +################################################ +############### B-Tag WP ###################### +################################################ + +SFweight += '*btagSF' #define in aliases.py + +################################################ +############ MET FILTERS ################### +################################################ + +METFilter_MC = 'METFilter_MC' +METFilter_DATA = 'METFilter_DATA' + +################################################ +############ DATA DECLARATION ################## +################################################ + +DataRun = [ + ['A','Run2018A-02Apr2020-v1'] , + ['B','Run2018B-02Apr2020-v1'] , + ['C','Run2018C-02Apr2020-v1'] , + ['D','Run2018D-02Apr2020-v1'] , + ] + +DataSets = ['MuonEG','DoubleMuon','SingleMuon','EGamma'] + +DataTrig = { + 'MuonEG' : 'Trigger_ElMu' , + 'DoubleMuon' : '!Trigger_ElMu && Trigger_dblMu' , + 'SingleMuon' : '!Trigger_ElMu && !Trigger_dblMu && Trigger_sngMu' , + 'EGamma' : '!Trigger_ElMu && !Trigger_dblMu && !Trigger_sngMu && (Trigger_sngEl || Trigger_dblEl)' , + } + +########################################### +############# BACKGROUNDS ############### +########################################### + +############ DY ############ + +ptllDYW_NLO = '(0.87*(gen_ptll<10)+(0.379119+0.099744*gen_ptll-0.00487351*gen_ptll**2+9.19509e-05*gen_ptll**3-6.0212e-07*gen_ptll**4)*(gen_ptll>=10 && gen_ptll<45)+(9.12137e-01+1.11957e-04*gen_ptll-3.15325e-06*gen_ptll**2-4.29708e-09*gen_ptll**3+3.35791e-11*gen_ptll**4)*(gen_ptll>=45 && gen_ptll<200) + 1*(gen_ptll>200))' +ptllDYW_LO = '((0.632927+0.0456956*gen_ptll-0.00154485*gen_ptll*gen_ptll+2.64397e-05*gen_ptll*gen_ptll*gen_ptll-2.19374e-07*gen_ptll*gen_ptll*gen_ptll*gen_ptll+6.99751e-10*gen_ptll*gen_ptll*gen_ptll*gen_ptll*gen_ptll)*(gen_ptll>0)*(gen_ptll<100)+(1.41713-0.00165342*gen_ptll)*(gen_ptll>=100)*(gen_ptll<300)+1*(gen_ptll>=300))' +Zgfilter = '( !(Sum(PhotonGen_isPrompt==1 && PhotonGen_pt>15 && abs(PhotonGen_eta)<2.6) > 0 && Sum(LeptonGen_isPrompt==1 && LeptonGen_pt>15)>=2) )' #Zg sample uses photon pt > 15, lepton pt > 15 + +samples['DY'] = { 'name' : getSampleFilesNano(directory,'DYJetsToLL_M-10to50-LO') + + getSampleFilesNano(directory,'DYJetsToLL_M-10to50-LO_ext1') + + getSampleFilesNano(directory,'DYJetsToLL_M-50-LO') + + getSampleFilesNano(directory, 'DYJetsToLL_M-50') + + getSampleFilesNano(directory, 'DYJetsToLL_M-50_ext2') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-100to200') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-200to400') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-400to600') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-600toInf') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-70to100') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-100to200') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-200to400') + # + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-400to600') + # + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-400to600_ext2') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-600to800') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-800to1200') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-1200to2500') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-2500toInf'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC +'*'+Zgfilter, + 'FilesPerJob' : 5, + 'suppressNegative' :['all'], + 'suppressNegativeNuisances' :['all'], + } + +M10baseW = '1'#CombineBaseW(directory,'Autumn18_102X_nAODv7_Full2018v7',['DYJetsToLL_M-10to50-LO','DYJetsToLL_M-10to50-LO_ext1']) +#HT400baseW = getBaseWnAOD(directory,'Autumn18_102X_nAODv7_Full2018v7',['DYJetsToLL_M-50_HT-400to600']) + +addSampleWeight(samples,'DY','DYJetsToLL_M-10to50-LO' ,ptllDYW_LO+'*(LHE_HT<100.0)*'+M10baseW+'/baseW') +addSampleWeight(samples,'DY','DYJetsToLL_M-10to50-LO_ext1' ,ptllDYW_LO+'*(LHE_HT<100.0)*'+M10baseW+'/baseW') +addSampleWeight(samples,'DY','DYJetsToLL_M-50-LO' ,ptllDYW_LO+'*(LHE_HT<70.0)') +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-100to200' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-200to400' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-400to600' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-600toInf' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-70to100' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-100to200' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-200to400' ,ptllDYW_LO) +#addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-400to600' ,ptllDYW_LO+'*'+HT400baseW+'/baseW') +#addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-400to600_ext2',ptllDYW_LO+'*'+HT400baseW+'/baseW') +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-600to800' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-800to1200' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-1200to2500' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-2500toInf' ,ptllDYW_LO) +addSampleWeight(samples,'DY', 'DYJetsToLL_M-50' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_ext2' ,ptllDYW_LO) + +############ Top ############ + +samples['top'] = { 'name' : getSampleFilesNano(directory,'TTTo2L2Nu') + + getSampleFilesNano(directory,'ST_s-channel_ext1') + # + getSampleFilesNano(directory,'ST_t-channel_antitop') + + getSampleFilesNano(directory,'ST_t-channel_top') + + getSampleFilesNano(directory,'ST_tW_antitop_ext1') + + getSampleFilesNano(directory,'ST_tW_top_ext1'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + 'FilesPerJob' : 6, + } + +addSampleWeight(samples,'top','TTTo2L2Nu','Top_pTrw') + +samples['ttV'] = { 'name' : getSampleFilesNano(directory,'TTWJetsToLNu') + + getSampleFilesNano(directory,'TTZjets'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + 'FilesPerJob' : 5, + } + + +############ WW ############ + + +samples['WW'] = { 'name' : getSampleFilesNano(directory,'WWTo2L2Nu'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*nllW', + } + +#samples['WWewk'] = { 'name' : getSampleFilesNano(directory,'WpWmJJ_EWK_QCD_noTop_noHiggs') +# + getSampleFilesNano(directory,'WpWpJJ_EWK_QCD'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*(Sum$(abs(GenPart_pdgId)==6 || GenPart_pdgId==25)==0)*(lhe_mW1[0] > 60. && lhe_mW1[0] < 100. && lhe_mW2[0] > 60. && lhe_mW2[0] < 100.)', +# } + +#samples['ggWW'] = { 'name' : getSampleFilesNano(directory,'GluGluToWWToENEN') +# + getSampleFilesNano(directory,'GluGluToWWToENMN') +# + getSampleFilesNano(directory,'GluGluToWWToENTN') +# + getSampleFilesNano(directory,'GluGluToWWToMNEN') +# + getSampleFilesNano(directory,'GluGluToWWToMNMN') +# + getSampleFilesNano(directory,'GluGluToWWToMNTN') +# + getSampleFilesNano(directory,'GluGluToWWToTNEN') +# + getSampleFilesNano(directory,'GluGluToWWToTNMN') +# + getSampleFilesNano(directory,'GluGluToWWToTNTN'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*1.53/1.4', +# } + +###### Zg + +samples['Zg'] = { 'name' : getSampleFilesNano(directory,'ZGToLLG'), + 'weight' : XSWeight+'*'+SFweight+'*'+METFilter_MC + '*(Gen_ZGstar_mass <= 0)', + 'FilesPerJob' : 6 , + } + +###### Zg* + +samples['ZgS'] = { 'name' : getSampleFilesNano(directory,'ZGToLLG'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*(Gen_ZGstar_mass > 0)', + 'FilesPerJob' : 4 , + } + +##### WZ + +samples['WZ'] = { 'name': getSampleFilesNano(directory,'WZTo3LNu_mllmin01') + + getSampleFilesNano(directory,'WZTo2L2Q'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*(gstarHigh)' , + 'FilesPerJob' : 5 , + } + +##### ZZ + +samples['ZZ'] = { 'name' : getSampleFilesNano(directory,'ZZTo2L2Nu_ext1') + + getSampleFilesNano(directory,'ZZTo2L2Nu_ext2') + + getSampleFilesNano(directory,'ZZTo2L2Q') + + getSampleFilesNano(directory,'ZZTo4L_ext1') + + getSampleFilesNano(directory,'ZZTo4L_ext2') + #+ getSampleFilesNano(directory,'ggZZ4m') #Missing file for ElepTup + + getSampleFilesNano(directory,'ggZZ4m_ext1') + + getSampleFilesNano(directory,'ggZZ4t') + + getSampleFilesNano(directory,'ggZZ2e2t') + + getSampleFilesNano(directory,'ggZZ2m2t') + + getSampleFilesNano(directory,'ggZZ2e2m'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC, + 'FilesPerJob' : 3, + } + +ZZ2LbaseW = '1'#CombineBaseW(directory,'Autumn18_102X_nAODv7_Full2018v7',['ZZTo2L2Nu_ext1','ZZTo2L2Nu_ext2']) +ZZ4LbaseW = '1'#CombineBaseW(directory,'Autumn18_102X_nAODv7_Full2018v7',['ZZTo4L_ext1', 'ZZTo4L_ext2']) +#ggZZbaseW = getBaseWnAOD(directory,'Autumn18_102X_nAODv7_Full2018v7',['ggZZ4m', 'ggZZ4m_ext1']) + +addSampleWeight(samples,'ZZ','ZZTo2L2Nu_ext1',"1.07*"+ZZ2LbaseW+"/baseW") ## The non-ggZZ NNLO/NLO k-factor, cited from https://arxiv.org/abs/1405.2219v1 +addSampleWeight(samples,'ZZ','ZZTo2L2Nu_ext2',"1.07*"+ZZ2LbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ZZTo2L2Q', "1.07") +addSampleWeight(samples,'ZZ','ZZTo4L_ext1', "1.07*"+ZZ4LbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ZZTo4L_ext2', "1.07*"+ZZ4LbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ggZZ2e2t', "1.68") ## The NLO/LO k-factor, cited from https://arxiv.org/abs/1509.06734v1 +addSampleWeight(samples,'ZZ','ggZZ2m2t', "1.68") +addSampleWeight(samples,'ZZ','ggZZ2e2m', "1.68") +#addSampleWeight(samples,'ZZ','ggZZ4m', "1.68*"+ggZZbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ggZZ4m_ext1', "1.68") +addSampleWeight(samples,'ZZ','ggZZ4t', "1.68") + +############ VVV ############ + +samples['VVV'] = { 'name' : getSampleFilesNano(directory,'ZZZ') + + getSampleFilesNano(directory,'WZZ') + + getSampleFilesNano(directory,'WWZ') + + getSampleFilesNano(directory,'WWW'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +########################################## +################ SIGNALS ################# +########################################## + +############ ZH H->WW ############ + +samples['ZH_hww'] = { 'name' : getSampleFilesNano(directory,'HZJ_HToWW_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +samples['ggZH_hww'] = { 'name' : getSampleFilesNano(directory,'GluGluZH_HToWW_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +############ WH H->WW ############ + +samples['WH_hww'] = { 'name' : getSampleFilesNano(directory,'HWplusJ_HToWW_M125') + + getSampleFilesNano(directory,'HWminusJ_HToWW_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +############ ttH ############ + +samples['ttH_hww'] = { 'name' : getSampleFilesNano(directory,'ttHToNonbb_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +############ bbH ############ +# Not available for Latinos 2016 v6 + +############ H->TauTau ############ + +#samples['ggH_htt'] = { 'name' : getSampleFilesNano(directory,'GluGluHToTauTau_M125'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , +# } + +#samples['qqH_htt'] = { 'name' : getSampleFilesNano(directory,'VBFHToTauTau_M125'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , +# } + +samples['ZH_htt'] = { 'name' : getSampleFilesNano(directory,'HZJ_HToTauTau_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +samples['WH_htt'] = { 'name' : getSampleFilesNano(directory,'HWplusJ_HToTauTau_M125') + + getSampleFilesNano(directory,'HWminusJ_HToTauTau_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +########################################### +################## FAKE ################### +########################################### + +samples['Fake'] = { 'name': [ ] , + 'weight' : fakeW+'*'+METFilter_DATA, + 'weights' : [ ] , + 'isData': ['all'], + 'FilesPerJob' : 500 , + 'suppressNegativeNuisances' :['all'], + } + +directory = treeBaseDir+'Run2018_102X_nAODv7_Full2018v7/DATAl1loose2018v7__l2loose__fakeW/' +#directory = treeBaseDir+'Run2018_102X_nAODv6_Full2018v6/DATAl1loose2018v6__l2loose__fakeW/' +for Run in DataRun : + for DataSet in DataSets : + FileTarget = getSampleFilesNano(directory,DataSet+'_'+Run[1],True) + for iFile in FileTarget: + samples['Fake']['name'].append(iFile) + samples['Fake']['weights'].append(DataTrig[DataSet]) + +# Commented out by DS to help facilitate BDT training, 18Dec25 +# samples['Fake']['subsamples'] = { +# 'e': 'abs(ZH3l_pdgid_l) == 11', +# 'm': 'abs(ZH3l_pdgid_l) == 13' +# } + +########################################### +################## DATA ################### +########################################### + +samples['DATA'] = { 'name': [ ] , + 'weight' : METFilter_DATA+'*'+LepWPCut, + 'weights' : [ ], + 'isData': ['all'], + 'FilesPerJob' : 500 , + } + +directory = treeBaseDir+'/Run2018_102X_nAODv7_Full2018v7/DATAl1loose2018v7__l2loose__l2tightOR2018v7/' +for Run in DataRun : + for DataSet in DataSets : + FileTarget = getSampleFilesNano(directory,DataSet+'_'+Run[1],True) + for iFile in FileTarget: + samples['DATA']['name'].append(iFile) + samples['DATA']['weights'].append(DataTrig[DataSet]) + diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/structure.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/structure.py new file mode 100644 index 00000000..e6836144 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/bdt_train/structure.py @@ -0,0 +1,119 @@ +# structure configuration for datacard +# keys here must match keys in samples.py +structure = {} + +# Backgrounds + +# structure['Fake_e'] = { +# 'isSignal' : 0, +# 'isData' : 0, +# } + +# structure['Fake_m'] = { +# 'isSignal' : 0, +# 'isData' : 0, +# } +# Both fake subsamples merged into one for BDT training (DS, 18Dec25) +structure['Fake'] = { + 'isSignal' : 0, + 'isData' : 0, + } + +structure['DY'] = { + 'isSignal' : 0, + 'isData' : 0, + 'removeFromCuts' : ['zh3l_SR_1j'] + } + +structure['Zg'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['ZgS'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['WW'] = { + 'isSignal' : 0, + 'isData' : 0, + 'removeFromCuts' : ['zh3l_SR_1j','zh3l_SR_2j'] + } + +#structure['WWewk'] = { +# 'isSignal' : 0, +# 'isData' : 0 +# } + +#structure['ggWW'] = { +# 'isSignal' : 0, +# 'isData' : 0 +# } + +structure['WZ'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['VVV'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['ZZ'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['ttV'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['top'] = { + 'isSignal' : 0, + 'isData' : 0, + 'removeFromCuts' : ['zh3l_WZ_CR_1j','zh3l_SR_1j'] + } + +# Signal +structure['WH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ZH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ggZH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ttH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['WH_htt'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ZH_htt'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +# Data +structure['DATA'] = { + 'isSignal' : 0, + 'isData' : 1 + } + + + + diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/configuration.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/configuration.py new file mode 100644 index 00000000..51d4dfda --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/configuration.py @@ -0,0 +1,92 @@ +import sys,os + +# tag used to identify the configuration folder version +tag = 'ZH3l_BDTrun2' + +# file to use as runner script, default uses mkShapesRDF.shapeAnalysis.runner, otherwise specify path to script +runnerFile = "default" + +# output file name +outputFile = "mkShapes__{}.root".format(tag) + +# path to ouput folder +outputFolder = "/eos/user/" + os.getlogin()[0] + "/" + os.getlogin() + "/mkShapesRDF_rootfiles/" + tag + "/rootFile/" + +# path to batch folder (used for condor submission) +batchFolder = "condor" + +# path to configuration folder (will contain all the compiled configuration files) +configsFolder = "configs" + +# luminosity to normalize to (in 1/fb) +# https://github.com/latinos/mkShapesRDF/blob/Run3/mkShapesRDF/processor/data/TrigMaker_cfg.py#L1016 +lumi = 8.174732641 + +# file with list of variables +variablesFile = 'variables.py' + +# file with TTree aliases +aliasesFile = 'aliases.py' + +# file with list of cuts +cutsFile = 'cuts.py' + +# file with list of samples +samplesFile = 'samples.py' + +# file with list of samples +plotFile = 'plot.py' + +# structure file for datacard +structureFile = 'structure.py' + +# nuisances file for mkDatacards and for mkShape +nuisancesFile = 'nuisances.py' + +# path to folder where to save plots +plotPath = "plots_" + tag + +# this lines are executed right before the runner on the condor node +mountEOS = [ + # "export KRB5CCNAME=/home/gpizzati/krb5\n", +] + +# list of imports to import when compiling the whole configuration folder, it should not contain imports used by configuration.py +imports = ["os", "glob", ("collections", "OrderedDict"), "ROOT"] + +# list of files to compile +filesToExec = [ + samplesFile, + aliasesFile, + cutsFile, + variablesFile, + plotFile, + nuisancesFile, + structureFile, +] + +# list of variables to keep in the compiled configuration folder +varsToKeep = [ + "batchVars", + "outputFolder", + "batchFolder", + "configsFolder", + "outputFile", + "runnerFile", + "tag", + "samples", + "aliases", + "variables", + ("cuts", {"cuts": "cuts", "preselections": "preselections"}), + ("plot", {"plot": "plot", "groupPlot": "groupPlot", "legend": "legend"}), + "nuisances", + "structure", + "lumi", +] + +# list of variables to keep in the batch submission script (script.py) +batchVars = varsToKeep[varsToKeep.index("samples") :] + + +varsToKeep += ['plotPath'] + diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/cuts.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/cuts.py new file mode 100644 index 00000000..a620ab34 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/cuts.py @@ -0,0 +1,139 @@ +cuts = {} + +preselections = 'Alt( Lepton_pt, 0, 0)>25 \ + && Alt( Lepton_pt, 1, 0)>20 \ + && Alt( Lepton_pt, 2, 0)>15 \ + && Alt( Lepton_pt, 3, 0)<10 \ + && (WH3l_mOSll[0] < 0 || WH3l_mOSll[0] > 12) \ + && (WH3l_mOSll[1] < 0 || WH3l_mOSll[1] > 12) \ + && (WH3l_mOSll[2] < 0 || WH3l_mOSll[2] > 12) \ + && abs(WH3l_chlll) == 1 \ + ' + +cuts['zmass_cut'] = 'WH3l_ZVeto < 25' + +cuts['jet_cut_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + ' + +cuts['bveto_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + ' + +cuts['z4lveto_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + ' + +cuts['zh3l_SR_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetjj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetjj > 3.14159/2 \ + ' + +cuts['met_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + ' + +cuts['zh3l_SR_2j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetjj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_2j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetjj > 3.14159/2 \ + ' + +cuts['jet_cut_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + ' + +cuts['bveto_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + ' + +cuts['z4lveto_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + ' + +cuts['zh3l_SR_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetj > 3.14159/2 \ + ' + +cuts['met_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + ' + +cuts['zh3l_SR_1j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_1j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetj > 3.14159/2 \ + ' + +''' + #11 = e +# 13 = mu +# 15 = tau +''' diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/ch_lll.cc b/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/ch_lll.cc new file mode 100644 index 00000000..de6ef009 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/ch_lll.cc @@ -0,0 +1,41 @@ +#ifndef whchlll +#define whchlll + + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" + +using namespace ROOT; +using namespace ROOT::VecOps; + +float ch_lll(int nLepton, + RVecF Lepton_pt, + RVecF Lepton_eta, + RVecF Lepton_phi, + RVecF Lepton_pdgId){ + + + // Create default value + float ch_lll = -9999.0; + + // Check that we have at least 3 good leptons + if (nLepton < 3) return ch_lll; + + bool WH3l_ok = abs( abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2] ) <= 1; + if (!WH3l_ok) return ch_lll; + + ch_lll = abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2]; + + return ch_lll; +} + + +#endif diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/flag_ossf.cc b/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/flag_ossf.cc new file mode 100644 index 00000000..a46acea2 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/flag_ossf.cc @@ -0,0 +1,57 @@ +#ifndef flagossf +#define flagossf + + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" + +using namespace ROOT; +using namespace ROOT::VecOps; + +bool flag_ossf(int nLepton, + RVecF Lepton_pt, + RVecF Lepton_eta, + RVecF Lepton_phi, + RVecF Lepton_pdgId){ + + // Create default value + bool flag_OSSF = false; + + // Check that we have at least 3 good leptons + if (nLepton < 3) return flag_OSSF; + + bool WH3l_ok = abs( abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2] ) <= 1; + if (!WH3l_ok) return flag_OSSF; + + // Initialize leptons 4-vectors + std::vector leptons_vector = { + ROOT::Math::PtEtaPhiMVector(Lepton_pt[0],Lepton_eta[0],Lepton_phi[0],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[1],Lepton_eta[1],Lepton_phi[1],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[2],Lepton_eta[2],Lepton_phi[2],0) + }; + + // Compute minimum difference |mll - mZ| + float minmllDiffToZ = 9999.0; + for (uint i = 0; i < 3; i++){ + for (uint j = i+1; j < 3; j++){ + if ( Lepton_pdgId[i] + Lepton_pdgId[j] != 0 ) continue; + float mllDiffToZ = abs( (leptons_vector[i] + leptons_vector[j]).M() - 91.1876 ); + if ( mllDiffToZ < minmllDiffToZ ) minmllDiffToZ = mllDiffToZ; + } + } + + if (minmllDiffToZ != 9999.0) flag_OSSF = true; + + return flag_OSSF; +} + + +#endif diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/mOS_ll.cc b/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/mOS_ll.cc new file mode 100644 index 00000000..1ec3c638 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/mOS_ll.cc @@ -0,0 +1,54 @@ +#ifndef mosll +#define mosll + + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" + +using namespace ROOT; +using namespace ROOT::VecOps; + +ROOT::RVecF mOS_ll(int nLepton, + RVecF Lepton_pt, + RVecF Lepton_eta, + RVecF Lepton_phi, + RVecF Lepton_pdgId){ + + // Create default value + ROOT::RVecF mOSll_vector = {-9999.0, -9999.0, -9999.0}; + + // Check that we have at least 3 good leptons + if (nLepton < 3) return mOSll_vector; + + bool WH3l_ok = abs( abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2] ) <= 1; + if (!WH3l_ok) return mOSll_vector; + + // Initialize leptons 4-vectors + std::vector leptons_vector = { + ROOT::Math::PtEtaPhiMVector(Lepton_pt[0],Lepton_eta[0],Lepton_phi[0],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[1],Lepton_eta[1],Lepton_phi[1],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[2],Lepton_eta[2],Lepton_phi[2],0) + }; + + // Loop over the three leptons + for (uint i = 0; i < 3; i++){ + for (uint j = i+1; j < 3; j++){ + if (Lepton_pdgId[i]*Lepton_pdgId[j] < 0) + mOSll_vector.push_back( (leptons_vector[i]+leptons_vector[j]).M() ); + else + mOSll_vector.push_back(-9999.0); + } + } + return mOSll_vector; +} + + +#endif diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/m_lll.cc b/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/m_lll.cc new file mode 100644 index 00000000..46ce151b --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/m_lll.cc @@ -0,0 +1,49 @@ +#ifndef whmlll +#define whmlll + + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" + +using namespace ROOT; +using namespace ROOT::VecOps; + +float m_lll(int nLepton, + RVecF Lepton_pt, + RVecF Lepton_eta, + RVecF Lepton_phi, + RVecF Lepton_pdgId){ + + + // Create default value + float mlll = -9999.0; + + // Check that we have at least 3 good leptons + if (nLepton < 3) return mlll; + + bool WH3l_ok = abs( abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2] ) <= 1; + if (!WH3l_ok) return mlll; + + // Initialize leptons 4-vectors + std::vector leptons_vector = { + ROOT::Math::PtEtaPhiMVector(Lepton_pt[0],Lepton_eta[0],Lepton_phi[0],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[1],Lepton_eta[1],Lepton_phi[1],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[2],Lepton_eta[2],Lepton_phi[2],0) + }; + + mlll = (leptons_vector[0] + leptons_vector[1] + leptons_vector[2]).M(); + + return mlll; + +} + + +#endif diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/z_veto.cc b/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/z_veto.cc new file mode 100644 index 00000000..a527c0c4 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/macros/z_veto.cc @@ -0,0 +1,56 @@ +#ifndef zveto +#define zveto + + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" + +using namespace ROOT; +using namespace ROOT::VecOps; + +float z_veto(int nLepton, + RVecF Lepton_pt, + RVecF Lepton_eta, + RVecF Lepton_phi, + RVecF Lepton_pdgId){ + + + // Create default value + float z_veto = 9999.0; + + // Check that we have at least 3 good leptons + if (nLepton < 3) return z_veto; + + bool WH3l_ok = abs( abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2] ) <= 1; + if (!WH3l_ok) return z_veto; + + // Initialize leptons 4-vectors + std::vector leptons_vector = { + ROOT::Math::PtEtaPhiMVector(Lepton_pt[0],Lepton_eta[0],Lepton_phi[0],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[1],Lepton_eta[1],Lepton_phi[1],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[2],Lepton_eta[2],Lepton_phi[2],0) + }; + + // Compute minimum difference |mll - mZ| + float minmllDiffToZ = 9999.0; + for (uint i = 0; i < 3; i++){ + for (uint j = i+1; j < 3; j++){ + if ( Lepton_pdgId[i] + Lepton_pdgId[j] != 0 ) continue; + float mllDiffToZ = abs( (leptons_vector[i] + leptons_vector[j]).M() - 91.1876 ); + if ( mllDiffToZ < minmllDiffToZ ) minmllDiffToZ = mllDiffToZ; + } + } + + return minmllDiffToZ; +} + + +#endif diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/ngenjet.cc b/ZH3l_BDT/zh3l_run2_bdt_rdf/ngenjet.cc new file mode 100644 index 00000000..99893569 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/ngenjet.cc @@ -0,0 +1,121 @@ + +#ifndef ngenjet +#define ngenjet + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" + +using namespace ROOT; +using namespace ROOT::VecOps; +double CountGenJet( + int nLeptonGen, + RVecB LeptonGen_isPrompt, + RVecI LeptonGen_pdgId, + RVecF LeptonGen_pt, + RVecF LeptonGen_eta, + RVecF LeptonGen_phi, + RVecF LeptonGen_mass, + int nPhotonGen, + RVecF PhotonGen_pt, + RVecF PhotonGen_eta, + RVecF PhotonGen_phi, + RVecF PhotonGen_mass, + int nGenJet, + RVecF GenJet_pt, + RVecF GenJet_eta, + RVecF GenJet_phi + ){ + unsigned nJ = nGenJet; + + unsigned nL = nLeptonGen; + + std::vector iPromptL{}; + iPromptL.reserve(nL); + + for (unsigned iL{0}; iL != nL; ++iL) { + if (!LeptonGen_isPrompt[iL]) + continue; + + unsigned absId{static_cast(std::abs(LeptonGen_pdgId[iL]))}; + if (absId != 11 && absId != 13) + continue; + + iPromptL.push_back(iL); + } + + if (iPromptL.size() == 0) { + unsigned n{0}; + for (unsigned iJ{0}; iJ != nJ; ++iJ) { + if (GenJet_pt[iJ] > 30.) + ++n; + } + return n; + } + + std::vector dressedLeptons{}; + for (unsigned iL : iPromptL) { + dressedLeptons.push_back( + ROOT::Math::PtEtaPhiMVector( + LeptonGen_pt[iL], + LeptonGen_eta[iL], + LeptonGen_phi[iL], + LeptonGen_mass[iL] + ) + ); + } + + unsigned nP = nPhotonGen; + + for (unsigned iP{0}; iP != nP; ++iP) { + double minDR2{1000.}; + int iDMin{-1}; + for (unsigned iD{0}; iD != iPromptL.size(); ++iD) { + unsigned iL{iPromptL[iD]}; + double dEta{LeptonGen_eta[iL] - PhotonGen_eta[iP]}; + double dPhi{TVector2::Phi_mpi_pi(LeptonGen_phi[iL] - PhotonGen_phi[iP])}; + double dR2{dEta * dEta + dPhi * dPhi}; + if (dR2 < minDR2) { + minDR2 = dR2; + iDMin = iD; + } + } + + if (minDR2 < 0.09) + dressedLeptons[iDMin] += ROOT::Math::PtEtaPhiMVector( + PhotonGen_pt[iP], + PhotonGen_eta[iP], + PhotonGen_phi[iP], + PhotonGen_mass[iP]); + } + + unsigned n{0}; + for (unsigned iJ{0}; iJ != nJ; ++iJ) { + if (GenJet_pt[iJ] < 30.) + continue; + + bool overlap{false}; + for (auto& p4 : dressedLeptons) { + if (p4.pt() < 10.) + continue; + + double dEta{p4.eta() - GenJet_eta[iJ]}; + double dPhi{TVector2::Phi_mpi_pi(p4.phi() - GenJet_phi[iJ])}; + if (dEta * dEta + dPhi * dPhi < 0.016) { + overlap = true; + break; + } + } + if (!overlap) + ++n; + } + return n; +} + +#endif \ No newline at end of file diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/nuisances.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/nuisances.py new file mode 100644 index 00000000..2dc2d2b2 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/nuisances.py @@ -0,0 +1,580 @@ +# nuisances +# name of samples here must match keys in samples.py + +try: + mc = [skey for skey in samples if skey != 'DATA' and not skey.startswith('Fake')] +except NameError: + mc = [] + +try: + fitcuts = [cut for cut in cuts if 'SR' in cut or 'CR' in cut] +except NameError: + fitcuts = [] + +nuisances = {} + +#### Luminosity + +nuisances['lumi_Uncorrelated'] = { + 'name': 'lumi_13TeV_2018', + 'type': 'lnN', + 'samples': dict((skey, '1.015') for skey in mc if skey not in ['WZ']), + 'cuts' : fitcuts +} + +nuisances['lumi_XYFact'] = { + 'name': 'lumi_13TeV_XYFact', + 'type': 'lnN', + 'samples': dict((skey, '1.02') for skey in mc if skey not in ['WZ']), + 'cuts' : fitcuts +} + +nuisances['lumi_LScale'] = { + 'name': 'lumi_13TeV_LSCale', + 'type': 'lnN', + 'samples': dict((skey, '1.002') for skey in mc if skey not in ['WZ']), + 'cuts' : fitcuts +} + +nuisances['lumi_CurrCalib'] = { + 'name': 'lumi_13TeV_CurrCalib', + 'type': 'lnN', + 'samples': dict((skey, '1.002') for skey in mc if skey not in ['WZ']), + 'cuts' : fitcuts +} + +#### Theoretical Systematics +# Commenting out as mkShapesRDF does not have LatinoAnalysis tools' HiggsXSection module (DS, 03Nov25). +# Scale +# from LatinoAnalysis.Tools.HiggsXSection import * +# HiggsXS = HiggsXSection() + +# nuisances['QCDscale_VH'] = { +# 'name' : 'QCDscale_VH', +# 'samples' : { +# 'WH_hww' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','WH','125.09','scale','sm'), +# 'ZH_hww' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ZH','125.09','scale','sm'), +# 'WH_htt' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','WH','125.09','scale','sm'), +# 'ZH_htt' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ZH','125.09','scale','sm') +# }, +# 'type' : 'lnN', +# 'cuts' : fitcuts +# } + +# nuisances['QCDscale_ggZH'] = { +# 'name' : 'QCDscale_ggZH', +# 'samples' : { +# 'ggZH_hww': HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ggZH','125.09','scale','sm'), +# }, +# 'type' : 'lnN', +# 'cuts' : fitcuts +# } + +# nuisances['QCDscale_ttH'] = { +# 'name' : 'QCDscale_ttH', +# 'samples' : { +# 'ttH_hww': HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ttH','125.09','scale','sm'), +# }, +# 'type' : 'lnN', +# 'cuts' : fitcuts +# } + +variations = ['Alt(LHEScaleWeight, 0, 1)', 'Alt(LHEScaleWeight, 1, 1)', 'Alt(LHEScaleWeight, 3, 1)', 'Alt(LHEScaleWeight, 5, 1)', 'Alt(LHEScaleWeight, 7, 1)', 'Alt(LHEScaleWeight, 8, 1)'] +# variations = ['Alt$(LHEScaleWeight[0],1)', 'Alt$(LHEScaleWeight[1],1)', 'Alt$(LHEScaleWeight[3],1)', 'Alt$(LHEScaleWeight[5],1)', 'Alt$(LHEScaleWeight[7],1)', 'Alt$(LHEScaleWeight[8],1)'] + +# Commenting out as mkShapesRDF could not find variations for the QCDscale nuisances (DS, 05Nov25). +# nuisances['QCDscale_V'] = { +# 'name': 'QCDscale_V', +# 'kind': 'weight_envelope', +# 'type': 'shape', +# 'samples': {'DY': variations}, +# 'AsLnN': '1', +# 'cuts' : fitcuts +# } + +# nuisances['QCDscale_VV'] = { +# 'name': 'QCDscale_VV', +# 'kind': 'weight_envelope', +# 'type': 'shape', +# 'samples': { +# 'WW' : variations, +# 'Zg' : variations, +# 'ZgS' : variations, +# 'WZ' : variations, +# 'ZZ' : variations +# }, +# 'cuts' : fitcuts +# } + +nuisances['QCDscale_WWewk'] = { + 'name': 'QCDscale_WWewk', + 'samples': { + 'WWewk': '1.11', + }, + 'type': 'lnN', + 'cuts' : fitcuts +} + + +# ggww and interference +nuisances['QCDscale_ggVV'] = { + 'name': 'QCDscale_ggVV', + 'type': 'lnN', + 'samples': { + 'ggWW': '1.15', + }, + 'cuts' : fitcuts +} + +nuisances['QCDscale_qqbar_ACCEPT'] = { + 'name' : 'QCDscale_qqbar_ACCEPT', + 'type' : 'lnN', + 'samples' : { + 'WH_hww' : '1.010', + 'ZH_hww' : '1.015', + 'WH_htt' : '1.010', + 'ZH_htt' : '1.015', + }, + 'cuts' : fitcuts +} + +nuisances['QCDscale_gg_ACCEPT'] = { + 'name' : 'QCDscale_gg_ACCEPT', + 'samples' : { + 'ggZH_hww': '1.012', + 'ggWW' : '1.012' + }, + 'type' : 'lnN', + 'cuts' : fitcuts +} + +# Commenting out as mkShapesRDF does not have LatinoAnalysis tools' HiggsXSection module (DS, 03Nov25). +# pdf +# nuisances['pdf_Higgs_gg'] = { +# 'name' : 'pdf_Higgs_gg', +# 'samples' : { +# 'ggZH_hww': HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ggZH','125.09','pdf','sm'), +# }, +# 'type' : 'lnN', +# 'cuts' : fitcuts +# } + +# nuisances['pdf_Higgs_qqbar'] = { +# 'name' : 'pdf_Higgs_qqbar', +# 'type' : 'lnN', +# 'samples' : { +# 'WH_hww' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','WH' ,'125.09','pdf','sm'), +# 'ZH_hww' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ZH' ,'125.09','pdf','sm'), +# 'WH_htt' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','WH' ,'125.09','pdf','sm'), +# 'ZH_htt' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ZH' ,'125.09','pdf','sm'), +# }, +# 'cuts' : fitcuts +# } + +# nuisances['pdf_Higgs_ttH'] = { +# 'name': 'pdf_Higgs_ttH', +# 'type': 'lnN', +# 'samples': { +# 'ttH_hww' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ttH','125.09','pdf','sm') +# }, +# 'cuts' : fitcuts +# } + +nuisances['pdf_qqbar'] = { + 'name': 'pdf_qqbar', + 'type': 'lnN', + 'samples': { + 'Zg': '1.04', + 'ZgS': '1.04', + 'WZ': '1.04', + 'ZZ': '1.04', + }, + 'cuts' : fitcuts + } + +nuisances['pdf_Higgs_gg_ACCEPT'] = { + 'name' : 'pdf_Higgs_gg_ACCEPT', + 'samples' : { + 'ggZH_hww': '1.006', + }, + 'type' : 'lnN', + 'cuts' : fitcuts + } + +nuisances['pdf_Higgs_qqbar_ACCEPT'] = { + 'name' : 'pdf_Higgs_qqbar_ACCEPT', + 'type' : 'lnN', + 'samples' : { + 'WH_hww' : '1.003', + 'ZH_hww' : '1.002', + 'WH_htt' : '1.003', + 'ZH_htt' : '1.002', + }, + 'cuts' : fitcuts + } + +nuisances['pdf_qqbar_ACCEPT'] = { + 'name' : 'pdf_qqbar_ACCEPT', + 'type' : 'lnN', + 'samples' : { + 'WZ' : '1.001', + 'ZZ' : '1.001', + }, + 'cuts' : fitcuts + } + +nuisances['pdf_gg_ACCEPT'] = { + 'name': 'pdf_gg_ACCEPT', + 'samples': { + 'ggWW': '1.006', + }, + 'type': 'lnN', + 'cuts' : fitcuts +} + +# nuisances['PS_ISR'] = { +# 'name': 'PS_ISR', +# 'kind': 'weight', +# 'type': 'shape', +# 'samples': { +# 'ggZH_hww': ['1.066107*(nCleanGenJet==0) + 1.047857*(nCleanGenJet==1) + 1.030005*(nCleanGenJet==2) + 1.005028*(nCleanGenJet>=3)', '0.921874*(nCleanGenJet==0) + 0.941939*(nCleanGenJet==1) + 0.962282*(nCleanGenJet==2) + 0.991580*(nCleanGenJet>=3)'], +# 'ZH_hww': ['1.000684*(nCleanGenJet==0) + 1.000924*(nCleanGenJet==1) + 1.001683*(nCleanGenJet==2) + 1.002104*(nCleanGenJet>=3)', '0.999150*(nCleanGenJet==0) + 0.998821*(nCleanGenJet==1) + 0.997859*(nCleanGenJet==2) + 0.997316*(nCleanGenJet>=3)'], +# 'WZ': ['1.002552*(nCleanGenJet==0) + 1.010286*(nCleanGenJet==1) + 1.014420*(nCleanGenJet==2) + 1.006226*(nCleanGenJet>=3)', '0.996802*(nCleanGenJet==0) + 0.987227*(nCleanGenJet==1) + 0.982005*(nCleanGenJet==2) + 0.992030*(nCleanGenJet>=3)'], +# 'ZZ': ['1.003210*(nCleanGenJet==0) + 1.005480*(nCleanGenJet==1) + 1.004674*(nCleanGenJet==2) + 0.987845*(nCleanGenJet>=3)', '0.995997*(nCleanGenJet==0) + 0.993056*(nCleanGenJet==1) + 0.993659*(nCleanGenJet==2) + 1.014695*(nCleanGenJet>=3)'], +# }, +# 'cuts' : fitcuts +# } + +# nuisances['PS_FSR'] = { +# 'name': 'PS_FSR', +# 'kind': 'weight', +# 'type': 'shape', +# 'samples': { +# 'ggZH_hww': ['0.987316*(nCleanGenJet==0) + 0.986764*(nCleanGenJet==1) + 0.996498*(nCleanGenJet==2) + 1.004161*(nCleanGenJet>=3)', '1.019871*(nCleanGenJet==0) + 1.013853*(nCleanGenJet==1) + 1.005229*(nCleanGenJet==2) + 0.998573*(nCleanGenJet>=3)'], +# 'ZH_hww': ['0.992867*(nCleanGenJet==0) + 0.992845*(nCleanGenJet==1) + 0.999470*(nCleanGenJet==2) + 1.007245*(nCleanGenJet>=3)', '1.012465*(nCleanGenJet==0) + 1.012743*(nCleanGenJet==1) + 1.003215*(nCleanGenJet==2) + 0.991286*(nCleanGenJet>=3)'], +# 'WZ': ['0.992987*(nCleanGenJet==0) + 0.993725*(nCleanGenJet==1) + 1.000617*(nCleanGenJet==2) + 1.010869*(nCleanGenJet>=3)', '1.011267*(nCleanGenJet==0) + 1.010097*(nCleanGenJet==1) + 0.999445*(nCleanGenJet==2) + 0.983609*(nCleanGenJet>=3)'], +# 'ZZ': ['0.997245*(nCleanGenJet==0) + 0.998689*(nCleanGenJet==1) + 1.004475*(nCleanGenJet==2) + 1.011440*(nCleanGenJet>=3)', '1.004482*(nCleanGenJet==0) + 1.002081*(nCleanGenJet==1) + 0.992617*(nCleanGenJet==2) + 0.981314*(nCleanGenJet>=3)'], +# }, +# 'cuts' : fitcuts +# } + +nuisances['PU'] = { + 'name': 'CMS_PU_2018', + 'kind': 'weight', + 'type': 'shape', + 'samples': { + 'WZ': ['1.00103945*(puWeightUp/puWeight)', '0.99898154*(puWeightDown/puWeight)'], + 'ZZ': ['1.00125684*(puWeightUp/puWeight)', '0.99878368*(puWeightDown/puWeight)'], + 'ZH_hww': ['1.00129463*(puWeightUp/puWeight)', '0.99847264*(puWeightDown/puWeight)'], + 'ggZH_hww': ['0.99973837*(puWeightUp/puWeight)', '1.0001526*(puWeightDown/puWeight)'], + }, + 'AsLnN': '1', + 'cuts' : fitcuts +} + +### PU ID SF uncertainty +puid_syst = ['Jet_PUIDSF_up/Jet_PUIDSF', 'Jet_PUIDSF_down/Jet_PUIDSF'] + +nuisances['jetPUID'] = { + 'name': 'CMS_PUID_2018', + 'kind': 'weight', + 'type': 'shape', + 'samples': dict((skey, puid_syst) for skey in mc) +} + +nuisances['UE_whss'] = { + 'name' : 'UE_whss', + 'skipCMS' : 1, + 'type' : 'lnN', + 'samples' : { + 'WH_hww' : '1.015', + 'ZH_hww' : '1.015', + 'ggZH_hww' : '1.015', + 'WH_htt' : '1.015', + 'ZH_htt' : '1.015', + }, + 'cuts' : fitcuts +} + +nuisances['WZ3l2jnorm'] = { + 'name' : 'CMS_hww_WZ3l2jnorm', + 'samples' : { + 'WZ' : '1.00', + }, + 'type' : 'rateParam', + 'cuts' : [ + 'zh3l_WZ_CR_2j', + 'zh3l_WZ_CR_2j_met', + 'zh3l_SR_2j', + 'zh3l_SR_2j_met', + ] + } + +nuisances['WZ3l1jnorm'] = { + 'name' : 'CMS_hww_WZ3l1jnorm', + 'samples' : { + 'WZ' : '1.00', + }, + 'type' : 'rateParam', + 'cuts' : [ + 'zh3l_WZ_CR_1j', + 'zh3l_WZ_CR_1j_met', + 'zh3l_SR_1j', + 'zh3l_SR_1j_met', + ] + } + +#### Top + +apply_on = { + 'top': [ + '(topGenPt * antitopGenPt <= 0.) * 1.0816 + (topGenPt * antitopGenPt > 0.)', + '(topGenPt * antitopGenPt <= 0.) * 0.9184 + (topGenPt * antitopGenPt > 0.)' + ] +} + +nuisances['singleTopToTTbar'] = { + 'name': 'singleTopToTTbar', + 'skipCMS': 1, + 'kind': 'weight', + 'type': 'shape', + 'samples': apply_on, + 'cuts' : fitcuts +} + +nuisances['TopPtRew'] = { + 'name': 'CMS_topPtRew', # Theory uncertainty + 'kind': 'weight', + 'type': 'shape', + 'samples': {'top': ["1.", "1./Top_pTrw"]}, + 'symmetrize': True, + 'cuts' : fitcuts +} + +#### FAKES + +fakeW_EleUp = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lElUp / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_EleDown = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lElDown / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_MuUp = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lMuUp / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_MuDown = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lMuDown / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_statEleUp = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lstatElUp / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_statEleDown = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lstatElDown / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_statMuUp = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lstatMuUp / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_statMuDown = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lstatMuDown / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' + +nuisances['fake_syst_e'] = { + 'name' : 'CMS_fake_syst_e', + 'type' : 'lnN', + 'samples' : { + 'Fake_e' : '1.30', + }, + 'cuts' : fitcuts +} + +nuisances['fake_syst_m'] = { + 'name' : 'CMS_fake_syst_m', + 'type' : 'lnN', + 'samples' : { + 'Fake_m' : '1.30', + }, + 'cuts' : fitcuts +} + +nuisances['fake_ele'] = { + 'name' : 'CMS_fake_e_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : { + 'Fake' : [ fakeW_EleUp , fakeW_EleDown ], + }, + 'cuts' : fitcuts +} + +nuisances['fake_ele_stat'] = { + 'name' : 'CMS_fake_stat_e_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : { + 'Fake' : [ fakeW_statEleUp , fakeW_statEleDown ], + }, + 'cuts' : fitcuts +} + +nuisances['fake_mu'] = { + 'name' : 'CMS_fake_m_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : { + 'Fake' : [ fakeW_MuUp , fakeW_MuDown ], + }, + 'cuts' : fitcuts +} + +nuisances['fake_mu_stat'] = { + 'name' : 'CMS_fake_stat_m_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : { + 'Fake' : [ fakeW_statMuUp , fakeW_statMuDown ], + }, + 'cuts' : fitcuts +} + +###### B-tagger + +for shift in ['jes', 'lf', 'hf', 'hfstats1', 'hfstats2', 'lfstats1', 'lfstats2', 'cferr1', 'cferr2']: + btag_syst = ['(btagSF%sup)/(btagSF)' % shift, '(btagSF%sdown)/(btagSF)' % shift] + + name = 'CMS_btag_%s' % shift + if 'stats' in shift: + name += '_2018' + + nuisances['btag_shape_%s' % shift] = { + 'name': name, + 'kind': 'weight', + 'type': 'shape', + 'samples': dict((skey, btag_syst) for skey in mc), + 'cuts' : fitcuts + } + +#### Trigger Efficiency + +trig_syst = ['((TriggerEffWeight_3l_u)/(TriggerEffWeight_3l))*(TriggerEffWeight_3l>0.02) + (TriggerEffWeight_3l<=0.02)', '(TriggerEffWeight_3l_d)/(TriggerEffWeight_3l)'] + +nuisances['trigg'] = { + 'name' : 'CMS_eff_hwwtrigger_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : dict((skey, trig_syst) for skey in mc), + 'cuts' : fitcuts +} + +##### Electron Efficiency and energy scale + +id_syst_ele = [ 'SFweightEleUp', 'SFweightEleDown'] + +nuisances['eff_e'] = { + 'name' : 'CMS_eff_e_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : dict((skey, id_syst_ele) for skey in mc), + 'cuts' : fitcuts +} + +nuisances['electronpt'] = { + 'name' : 'CMS_scale_e_2018', + 'kind' : 'suffix', + 'type' : 'shape', + 'mapUp' : 'ElepTup', + 'mapDown' : 'ElepTdo', + 'samples' : dict((skey, ['1', '1']) for skey in mc), + 'folderUp' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__ElepTup_suffix', + 'folderDown' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__ElepTdo_suffix', + 'AsLnN' : '1', + 'cuts' : fitcuts +} + +###### Muon Efficiency and energy scale + +id_syst_mu = [ 'SFweightMuUp', 'SFweightMuDown'] + +nuisances['eff_m'] = { + 'name' : 'CMS_eff_m_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : dict((skey, id_syst_mu) for skey in mc), + 'cuts' : fitcuts +} + +nuisances['muonpt'] = { + 'name' : 'CMS_scale_m_2018', + 'kind' : 'suffix', + 'type' : 'shape', + 'mapUp' : 'MupTup', + 'mapDown' : 'MupTdo', + 'samples' : dict((skey, ['1', '1']) for skey in mc), + 'folderUp' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__MupTup_suffix', + 'folderDown' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__MupTdo_suffix', + 'AsLnN' : '1', + 'cuts' : fitcuts +} + +####### Jet energy scale + +jes_systs = ['JESAbsolute','JESAbsolute_2018','JESBBEC1','JESBBEC1_2018','JESEC2','JESEC2_2018','JESFlavorQCD','JESHF','JESHF_2018','JESRelativeBal','JESRelativeSample_2018'] +folderup = "" +folderdo = "" + +for js in jes_systs: + if 'Absolute' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESAbsoluteup_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESAbsolutedo_suffix' + elif 'BBEC1' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESBBEC1up_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESBBEC1do_suffix' + elif 'EC2' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESEC2up_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESEC2do_suffix' + elif 'HF' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESHFup_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESHFdo_suffix' + elif 'Relative' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESRelativeup_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESRelativedo_suffix' + elif 'FlavorQCD' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESFlavorQCDup_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESFlavorQCDdo_suffix' + + nuisances[js] = { + 'name': 'CMS_scale_'+js, + 'kind': 'suffix', + 'type': 'shape', + 'mapUp': js+'up', + 'mapDown': js+'do', + 'samples': dict((skey, ['1', '1']) for skey in mc), + 'folderUp' : folderup, + 'folderDown' : folderdo, + 'AsLnN': '1', + 'cuts' : fitcuts + } + +###### Jet energy resolution +nuisances['JER'] = { + 'name' : 'CMS_res_j_2018', + 'kind': 'suffix', + 'type': 'shape', + 'mapUp': 'JERup', + 'mapDown': 'JERdo', + 'samples': dict((skey, ['1', '1']) for skey in mc), + 'folderUp': treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JERup_suffix', + 'folderDown': treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JERdo_suffix', + 'AsLnN' : '1' +} +##### MET energy scale + +nuisances['met'] = { + 'name' : 'CMS_scale_met_2018', + 'kind' : 'suffix', + 'type' : 'shape', + 'mapUp' : 'METup', + 'mapDown' : 'METdo', + 'samples' : dict((skey, ['1', '1']) for skey in mc), + 'folderUp' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__METup_suffix', + 'folderDown' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__METdo_suffix', + 'AsLnN' : '1', + 'cuts' : fitcuts +} + +# Use the following if you want to apply the automatic combine MC stat nuisances. +nuisances['stat'] = { + 'type' : 'auto', + 'maxPoiss' : '10', + 'includeSignal' : '1', + # nuisance ['maxPoiss'] = Number of threshold events for Poisson modelling + # nuisance ['includeSignal'] = Include MC stat nuisances on signal processes (1=True, 0=False) + 'samples' : {}, + 'cuts' : fitcuts + } + +for n in nuisances.values(): + n['skipCMS'] = 1 diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/plot.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/plot.py new file mode 100644 index 00000000..e53e2f8a --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/plot.py @@ -0,0 +1,249 @@ +# plot configuration +groupPlot = {} + +# Groups of samples to improve the plots. +# If not defined, normal plots is used + +groupPlot['DY'] = { + 'nameHR' : 'DY', + 'color': 616, # kMagenta + 'isSignal' : 0, + 'samples' : ['DY'] +} + +groupPlot['WW'] = { + 'nameHR' : 'WW', + 'color': 632, # kRed + 'isSignal' : 0, + 'samples' : ['WW'] +} + +groupPlot['Zg'] = { + 'nameHR' : 'Zg', + 'color': 859, # kAzure -1 + 'isSignal' : 0, + 'samples' : ['Zg'] + } + +groupPlot['ZgS'] = { + 'nameHR' : 'ZgS', + 'color': 432, # kCyan + 'isSignal' : 0, + 'samples' : ['ZgS'] + } + +groupPlot['WZ'] = { + 'nameHR' : "WZ", + 'isSignal' : 0, + 'color' : 400, # Yellow + 'samples' : ['WZ'] + } + +groupPlot['ZZ'] = { + 'nameHR' : "ZZ", + 'isSignal' : 0, + 'color' : 617, # kViolet + 1 + 'samples' : ['ZZ'] + } + +groupPlot['VVV'] = { + 'nameHR' : 'VVV', + 'isSignal' : 0, + 'color': 806, # kOrange + 6 + 'samples' : ['VVV'] + } + + +groupPlot['top'] = { + 'nameHR' : 'top', + 'color': 416, # kGreen + 'isSignal' : 0, + 'samples' : ['top'] +} + +groupPlot['ttV'] = { + 'nameHR' : 'ttV', + 'color': 419, # kGreen+3 + 'isSignal' : 0, + 'samples' : ['ttV'] +} + +groupPlot['Fake'] = { + 'nameHR' : 'Non-prompt', + 'isSignal' : 0, + 'color': 921, # kGray + 1 + 'samples' : ['Fake_e', 'Fake_m'] + } + +groupPlot['Higgs'] = { + 'nameHR' : 'Higgs', + 'isSignal' : 1, + 'color': 632, # kRed + 'scaleMultiplicativeOverlaid' : 10.0, + 'samples' : ['WH_hww', 'ZH_hww', 'ggZH_hww', 'ZH_htt', 'ttH_hww', 'WH_htt'] + } + +# Individual plots +plot = {} + +plot['DY'] = { + 'nameHR' : 'DY', + 'color': 858, # kAzure -2 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['WW'] = { + 'nameHR' : 'WW', + 'color': 858, # kAzure -2 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +#plot['WWewk'] = { +# 'nameHR' : 'WWewk', +# 'color': 858, # kAzure -2 +# 'isSignal' : 0, +# 'isData' : 0, +# 'scale' : 1.0 +# } + +#plot['ggWW'] = { +# 'nameHR' : 'ggWW', +# 'color': 858, # kAzure -2 +# 'isSignal' : 0, +# 'isData' : 0, +# 'scale' : 1.0 +# } + +plot['Zg'] = { + 'nameHR' : 'Zg', + 'color': 859, # kAzure -1 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['ZgS'] = { + 'nameHR' : 'ZgS', + 'color': 859, # kAzure -1 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['WZ'] = { + 'nameHR' : 'WZ', + 'color': 858, # kAzure -2 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + #'scale' : 0.89 #1j norm + #'scale' : 1.22 #2j norm + } + +plot['ZZ'] = { + 'nameHR' : 'ZZ', + 'color': 856, # kAzure -4 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['VVV'] = { + 'color': 857, # kAzure -3 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['top'] = { + 'color': 419, # kGreen+3 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 # ele/mu trigger efficiency datadriven +} + +plot['ttV'] = { + 'color': 419, # kGreen+3 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 # ele/mu trigger efficiency datadriven +} + +plot['Fake_e'] = { + 'color': 921, # kGray + 1 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['Fake_m'] = { + 'color': 921, # kGray + 1 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['ttH_hww'] = { + 'nameHR' : 'ttH', + 'color': 632+3, # kRed+3 + 'isSignal' : 1, + 'isData' : 0, + 'scale' : 1 + } + +plot['ZH_hww'] = { + 'nameHR' : 'ZH', + 'color': 632+3, # kRed+3 + 'isSignal' : 1, + 'isData' : 0, + 'scale' : 1 + } + +plot['ggZH_hww'] = { + 'nameHR' : 'ggZH', + 'color': 632+4, # kRed+4 + 'isSignal' : 1, + 'isData' : 0, + 'scale' : 1 + } + +plot['WH_hww'] = { + 'nameHR' : 'WH', + 'color': 632+2, # kRed+2 + 'isSignal' : 1, + 'isData' : 0, + 'scale' : 1 + } + +# plot['WH_htt'] = { +# 'nameHR' : 'WH htt', +# 'color': 632+1, # kRed+4 +# 'isSignal' : 1, +# 'isData' : 0, +# 'scale' : 1 +# } + +plot['ZH_htt'] = { + 'nameHR' : 'ZH htt', + 'color': 632+1, # kRed+4 + 'isSignal' : 1, + 'isData' : 0, + 'scale' : 1 + } + +plot['DATA'] = { + 'nameHR' : 'Data', + 'color': 1 , + 'isSignal' : 0, + 'isData' : 1 , + 'isBlind' : 0 + } + +# additional options +legend = {} +legend['lumi'] = 'L = 59.7/fb' +legend['sqrt'] = '#sqrt{s} = 13 TeV' diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/samples.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/samples.py new file mode 100644 index 00000000..6dbfabbf --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/samples.py @@ -0,0 +1,416 @@ +import os +import subprocess + +# global getSampleFiles +# from LatinoAnalysis.Tools.commonTools import getSampleFiles, addSampleWeight, getBaseWnAOD +# It looks like getBaseWnAOD and addSampleWeight from Latinos does the same job as CombineBaseW function from makeShapesRDF. Similarly getSampleFiles function from Latinos is the same as nanoGetSampleFiles from makeShapesRDF. + +from mkShapesRDF.lib.search_files import SearchFiles +searchFiles = SearchFiles() +redirector = "" +limitFiles = 3 +samples = {} + +def nanoGetSampleFiles(path, name): + _files = searchFiles.searchFiles(path, name, redirector=redirector) + if limitFiles != -1 and len(_files) > limitFiles: + return [(name, _files[:limitFiles])] + else: + return [(name, _files)] + +def getSampleFilesNano(inputDir,Sample,absPath=False): + # return getSampleFiles(inputDir,Sample,absPath,'nanoLatino_') + return nanoGetSampleFiles(inputDir, Sample) # getSampleFiles replaced with nanoGetSampleFiles(path, name) (DS, 03Nov25) + +def CombineBaseW(samples, proc, samplelist): + _filtFiles = list(filter(lambda k: k[0] in samplelist, samples[proc]["name"])) + _files = list(map(lambda k: k[1], _filtFiles)) + _l = list(map(lambda k: len(k), _files)) + leastFiles = _files[_l.index(min(_l))] + dfSmall = ROOT.RDataFrame("Runs", leastFiles) + s = dfSmall.Sum("genEventSumw").GetValue() + f = ROOT.TFile(leastFiles[0]) + t = f.Get("Events") + t.GetEntry(1) + xs = t.baseW * s + + __files = [] + for f in _files: + __files += f + df = ROOT.RDataFrame("Runs", __files) + s = df.Sum("genEventSumw").GetValue() + newbaseW = str(xs / s) + return newbaseW # "/baseW is used after getBaseWnAOD/CombineBaseW calls in this code" + # weight = newbaseW + "/baseW" + + # for iSample in samplelist: + # addSampleWeight(samples, proc, iSample, weight) + + +def addSampleWeight(samples, sampleName, sampleNameType, weight): + obj = list(filter(lambda k: k[0] == sampleNameType, samples[sampleName]["name"]))[0] + samples[sampleName]["name"] = list( + filter(lambda k: k[0] != sampleNameType, samples[sampleName]["name"]) + ) + if len(obj) > 2: + samples[sampleName]["name"].append( + (obj[0], obj[1], obj[2] + "*(" + weight + ")") + ) + else: + samples[sampleName]["name"].append((obj[0], obj[1], "(" + weight + ")")) + + +############################################## +###### Tree Directory according to site ###### +############################################## + +SITE=os.uname()[1] +xrootdPath='' +if 'iihe' in SITE : + xrootdPath = 'dcap://maite.iihe.ac.be/' + treeBaseDir = '/pnfs/iihe/cms/store/user/xjanssen/HWW2015/' +elif 'cern' in SITE : + #xrootdPath='root://eoscms.cern.ch/' + treeBaseDir = '/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/' + +directory = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7' + +################################################ +############ NUMBER OF LEPTONS ################# +################################################ + +#Nlep='2' +Nlep='3' +#Nlep='4' + +################################################ +############### Lepton WP ###################### +################################################ + +eleWP='mvaFall17V1Iso_WP90' +#eleWP='mvaFall17V1Iso_WP90_SS' +#eleWP='mvaFall17V2Iso_WP90' +#eleWP='mvaFall17V2Iso_WP90_SS' +muWP ='cut_Tight_HWWW' +eleWP_new = 'mvaFall17V1Iso_WP90_tthmva_70' +muWP_new = 'cut_Tight_HWWW_tthmva_80' + +LepWPCut = 'LepCut'+Nlep+'l__ele_'+eleWP_new+'__mu_'+muWP_new +#LepWPCut = 'LepCut'+Nlep+'l__ele_'+eleWP+'__mu_'+muWP +#LepWPweight = 'ttHMVA_SF_3l[0]' #SF for new WPs, defined in aliases +LepWPweight = 'LepSF'+Nlep+'l__ele_'+eleWP_new+'__mu_'+muWP_new + +################################################ +############ BASIC MC WEIGHTS ################## +################################################ + +XSWeight = 'XSWeight' +SFweight = 'SFweight'+Nlep+'l*'+LepWPweight+'*'+LepWPCut+'*Jet_PUIDSF' +PromptGenLepMatch = 'PromptGenLepMatch'+Nlep+'l' + +################################################ +############## FAKE WEIGHTS #################### +################################################ + +#eleWP_new = 'mvaFall17V1Iso_WP90_tthmva_70' +#muWP_new = 'cut_Tight_HWWW_tthmva_80' + +if Nlep == '2' : + fakeW = 'fakeW2l_ele_'+eleWP_new+'_mu_'+muWP_new + #fakeW = 'fakeW2l_ele_'+eleWP+'_mu_'+muWP +else: + fakeW = 'fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_'+Nlep+'l' + #fakeW = 'fakeW_ele_'+eleWP+'_mu_'+muWP+'_'+Nlep+'l' + +################################################ +############### B-Tag WP ###################### +################################################ + +SFweight += '*btagSF' #define in aliases.py + +################################################ +############ MET FILTERS ################### +################################################ + +METFilter_MC = 'METFilter_MC' +METFilter_DATA = 'METFilter_DATA' + +################################################ +############ DATA DECLARATION ################## +################################################ + +DataRun = [ + ['A','Run2018A-02Apr2020-v1'] , + ['B','Run2018B-02Apr2020-v1'] , + ['C','Run2018C-02Apr2020-v1'] , + ['D','Run2018D-02Apr2020-v1'] , + ] + +DataSets = ['MuonEG','DoubleMuon','SingleMuon','EGamma'] + +DataTrig = { + 'MuonEG' : 'Trigger_ElMu' , + 'DoubleMuon' : '!Trigger_ElMu && Trigger_dblMu' , + 'SingleMuon' : '!Trigger_ElMu && !Trigger_dblMu && Trigger_sngMu' , + 'EGamma' : '!Trigger_ElMu && !Trigger_dblMu && !Trigger_sngMu && (Trigger_sngEl || Trigger_dblEl)' , + } + +########################################### +############# BACKGROUNDS ############### +########################################### + +############ DY ############ + +ptllDYW_NLO = '(0.87*(gen_ptll<10)+(0.379119+0.099744*gen_ptll-0.00487351*gen_ptll**2+9.19509e-05*gen_ptll**3-6.0212e-07*gen_ptll**4)*(gen_ptll>=10 && gen_ptll<45)+(9.12137e-01+1.11957e-04*gen_ptll-3.15325e-06*gen_ptll**2-4.29708e-09*gen_ptll**3+3.35791e-11*gen_ptll**4)*(gen_ptll>=45 && gen_ptll<200) + 1*(gen_ptll>200))' +ptllDYW_LO = '((0.632927+0.0456956*gen_ptll-0.00154485*gen_ptll*gen_ptll+2.64397e-05*gen_ptll*gen_ptll*gen_ptll-2.19374e-07*gen_ptll*gen_ptll*gen_ptll*gen_ptll+6.99751e-10*gen_ptll*gen_ptll*gen_ptll*gen_ptll*gen_ptll)*(gen_ptll>0)*(gen_ptll<100)+(1.41713-0.00165342*gen_ptll)*(gen_ptll>=100)*(gen_ptll<300)+1*(gen_ptll>=300))' +Zgfilter = '( !(Sum(PhotonGen_isPrompt==1 && PhotonGen_pt>15 && abs(PhotonGen_eta)<2.6) > 0 && Sum(LeptonGen_isPrompt==1 && LeptonGen_pt>15)>=2) )' #Zg sample uses photon pt > 15, lepton pt > 15 + +samples['DY'] = { 'name' : getSampleFilesNano(directory,'DYJetsToLL_M-10to50-LO') + + getSampleFilesNano(directory,'DYJetsToLL_M-10to50-LO_ext1') + + getSampleFilesNano(directory,'DYJetsToLL_M-50-LO') + + getSampleFilesNano(directory, 'DYJetsToLL_M-50') + + getSampleFilesNano(directory, 'DYJetsToLL_M-50_ext2') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-100to200') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-200to400') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-400to600') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-600toInf') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-70to100') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-100to200') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-200to400') + # + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-400to600') + # + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-400to600_ext2') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-600to800') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-800to1200') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-1200to2500') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-2500toInf'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC +'*'+Zgfilter, + 'FilesPerJob' : 5, + 'suppressNegative' :['all'], + 'suppressNegativeNuisances' :['all'], + } + +M10baseW = '1'#CombineBaseW(directory,'Autumn18_102X_nAODv7_Full2018v7',['DYJetsToLL_M-10to50-LO','DYJetsToLL_M-10to50-LO_ext1']) +#HT400baseW = getBaseWnAOD(directory,'Autumn18_102X_nAODv7_Full2018v7',['DYJetsToLL_M-50_HT-400to600']) + +addSampleWeight(samples,'DY','DYJetsToLL_M-10to50-LO' ,ptllDYW_LO+'*(LHE_HT<100.0)*'+M10baseW+'/baseW') +addSampleWeight(samples,'DY','DYJetsToLL_M-10to50-LO_ext1' ,ptllDYW_LO+'*(LHE_HT<100.0)*'+M10baseW+'/baseW') +addSampleWeight(samples,'DY','DYJetsToLL_M-50-LO' ,ptllDYW_LO+'*(LHE_HT<70.0)') +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-100to200' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-200to400' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-400to600' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-600toInf' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-70to100' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-100to200' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-200to400' ,ptllDYW_LO) +#addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-400to600' ,ptllDYW_LO+'*'+HT400baseW+'/baseW') +#addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-400to600_ext2',ptllDYW_LO+'*'+HT400baseW+'/baseW') +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-600to800' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-800to1200' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-1200to2500' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-2500toInf' ,ptllDYW_LO) +addSampleWeight(samples,'DY', 'DYJetsToLL_M-50' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_ext2' ,ptllDYW_LO) + +############ Top ############ + +samples['top'] = { 'name' : getSampleFilesNano(directory,'TTTo2L2Nu') + + getSampleFilesNano(directory,'ST_s-channel_ext1') + # + getSampleFilesNano(directory,'ST_t-channel_antitop') + + getSampleFilesNano(directory,'ST_t-channel_top') + + getSampleFilesNano(directory,'ST_tW_antitop_ext1') + + getSampleFilesNano(directory,'ST_tW_top_ext1'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + 'FilesPerJob' : 6, + } + +addSampleWeight(samples,'top','TTTo2L2Nu','Top_pTrw') + +samples['ttV'] = { 'name' : getSampleFilesNano(directory,'TTWJetsToLNu') + + getSampleFilesNano(directory,'TTZjets'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + 'FilesPerJob' : 5, + } + + +############ WW ############ + + +samples['WW'] = { 'name' : getSampleFilesNano(directory,'WWTo2L2Nu'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*nllW', + } + +#samples['WWewk'] = { 'name' : getSampleFilesNano(directory,'WpWmJJ_EWK_QCD_noTop_noHiggs') +# + getSampleFilesNano(directory,'WpWpJJ_EWK_QCD'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*(Sum$(abs(GenPart_pdgId)==6 || GenPart_pdgId==25)==0)*(lhe_mW1[0] > 60. && lhe_mW1[0] < 100. && lhe_mW2[0] > 60. && lhe_mW2[0] < 100.)', +# } + +#samples['ggWW'] = { 'name' : getSampleFilesNano(directory,'GluGluToWWToENEN') +# + getSampleFilesNano(directory,'GluGluToWWToENMN') +# + getSampleFilesNano(directory,'GluGluToWWToENTN') +# + getSampleFilesNano(directory,'GluGluToWWToMNEN') +# + getSampleFilesNano(directory,'GluGluToWWToMNMN') +# + getSampleFilesNano(directory,'GluGluToWWToMNTN') +# + getSampleFilesNano(directory,'GluGluToWWToTNEN') +# + getSampleFilesNano(directory,'GluGluToWWToTNMN') +# + getSampleFilesNano(directory,'GluGluToWWToTNTN'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*1.53/1.4', +# } + +###### Zg + +samples['Zg'] = { 'name' : getSampleFilesNano(directory,'ZGToLLG'), + 'weight' : XSWeight+'*'+SFweight+'*'+METFilter_MC + '*(Gen_ZGstar_mass <= 0)', + 'FilesPerJob' : 6 , + } + +###### Zg* + +samples['ZgS'] = { 'name' : getSampleFilesNano(directory,'ZGToLLG'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*(Gen_ZGstar_mass > 0)', + 'FilesPerJob' : 4 , + } + +##### WZ + +samples['WZ'] = { 'name': getSampleFilesNano(directory,'WZTo3LNu_mllmin01') + + getSampleFilesNano(directory,'WZTo2L2Q'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*(gstarHigh)' , + 'FilesPerJob' : 5 , + } + +##### ZZ + +samples['ZZ'] = { 'name' : getSampleFilesNano(directory,'ZZTo2L2Nu_ext1') + + getSampleFilesNano(directory,'ZZTo2L2Nu_ext2') + + getSampleFilesNano(directory,'ZZTo2L2Q') + + getSampleFilesNano(directory,'ZZTo4L_ext1') + + getSampleFilesNano(directory,'ZZTo4L_ext2') + #+ getSampleFilesNano(directory,'ggZZ4m') #Missing file for ElepTup + + getSampleFilesNano(directory,'ggZZ4m_ext1') + + getSampleFilesNano(directory,'ggZZ4t') + + getSampleFilesNano(directory,'ggZZ2e2t') + + getSampleFilesNano(directory,'ggZZ2m2t') + + getSampleFilesNano(directory,'ggZZ2e2m'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC, + 'FilesPerJob' : 3, + } + +ZZ2LbaseW = '1'#CombineBaseW(directory,'Autumn18_102X_nAODv7_Full2018v7',['ZZTo2L2Nu_ext1','ZZTo2L2Nu_ext2']) +ZZ4LbaseW = '1'#CombineBaseW(directory,'Autumn18_102X_nAODv7_Full2018v7',['ZZTo4L_ext1', 'ZZTo4L_ext2']) +#ggZZbaseW = getBaseWnAOD(directory,'Autumn18_102X_nAODv7_Full2018v7',['ggZZ4m', 'ggZZ4m_ext1']) + +addSampleWeight(samples,'ZZ','ZZTo2L2Nu_ext1',"1.07*"+ZZ2LbaseW+"/baseW") ## The non-ggZZ NNLO/NLO k-factor, cited from https://arxiv.org/abs/1405.2219v1 +addSampleWeight(samples,'ZZ','ZZTo2L2Nu_ext2',"1.07*"+ZZ2LbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ZZTo2L2Q', "1.07") +addSampleWeight(samples,'ZZ','ZZTo4L_ext1', "1.07*"+ZZ4LbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ZZTo4L_ext2', "1.07*"+ZZ4LbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ggZZ2e2t', "1.68") ## The NLO/LO k-factor, cited from https://arxiv.org/abs/1509.06734v1 +addSampleWeight(samples,'ZZ','ggZZ2m2t', "1.68") +addSampleWeight(samples,'ZZ','ggZZ2e2m', "1.68") +#addSampleWeight(samples,'ZZ','ggZZ4m', "1.68*"+ggZZbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ggZZ4m_ext1', "1.68") +addSampleWeight(samples,'ZZ','ggZZ4t', "1.68") + +############ VVV ############ + +samples['VVV'] = { 'name' : getSampleFilesNano(directory,'ZZZ') + + getSampleFilesNano(directory,'WZZ') + + getSampleFilesNano(directory,'WWZ') + + getSampleFilesNano(directory,'WWW'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +########################################## +################ SIGNALS ################# +########################################## + +############ ZH H->WW ############ + +samples['ZH_hww'] = { 'name' : getSampleFilesNano(directory,'HZJ_HToWW_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +samples['ggZH_hww'] = { 'name' : getSampleFilesNano(directory,'GluGluZH_HToWW_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +############ WH H->WW ############ + +samples['WH_hww'] = { 'name' : getSampleFilesNano(directory,'HWplusJ_HToWW_M125') + + getSampleFilesNano(directory,'HWminusJ_HToWW_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +############ ttH ############ + +samples['ttH_hww'] = { 'name' : getSampleFilesNano(directory,'ttHToNonbb_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +############ bbH ############ +# Not available for Latinos 2016 v6 + +############ H->TauTau ############ + +#samples['ggH_htt'] = { 'name' : getSampleFilesNano(directory,'GluGluHToTauTau_M125'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , +# } + +#samples['qqH_htt'] = { 'name' : getSampleFilesNano(directory,'VBFHToTauTau_M125'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , +# } + +samples['ZH_htt'] = { 'name' : getSampleFilesNano(directory,'HZJ_HToTauTau_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +samples['WH_htt'] = { 'name' : getSampleFilesNano(directory,'HWplusJ_HToTauTau_M125') + + getSampleFilesNano(directory,'HWminusJ_HToTauTau_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +########################################### +################## FAKE ################### +########################################### + +samples['Fake'] = { 'name': [ ] , + 'weight' : fakeW+'*'+METFilter_DATA, + 'weights' : [ ] , + 'isData': ['all'], + 'FilesPerJob' : 500 , + 'suppressNegativeNuisances' :['all'], + } + +directory = treeBaseDir+'Run2018_102X_nAODv7_Full2018v7/DATAl1loose2018v7__l2loose__fakeW/' +#directory = treeBaseDir+'Run2018_102X_nAODv6_Full2018v6/DATAl1loose2018v6__l2loose__fakeW/' +for Run in DataRun : + for DataSet in DataSets : + FileTarget = getSampleFilesNano(directory,DataSet+'_'+Run[1],True) + for iFile in FileTarget: + samples['Fake']['name'].append(iFile) + samples['Fake']['weights'].append(DataTrig[DataSet]) + +samples['Fake']['subsamples'] = { + 'e': 'abs(ZH3l_pdgid_l) == 11', + 'm': 'abs(ZH3l_pdgid_l) == 13' +} + +########################################### +################## DATA ################### +########################################### + +samples['DATA'] = { 'name': [ ] , + 'weight' : METFilter_DATA+'*'+LepWPCut, + 'weights' : [ ], + 'isData': ['all'], + 'FilesPerJob' : 500 , + } + +directory = treeBaseDir+'/Run2018_102X_nAODv7_Full2018v7/DATAl1loose2018v7__l2loose__l2tightOR2018v7/' +for Run in DataRun : + for DataSet in DataSets : + FileTarget = getSampleFilesNano(directory,DataSet+'_'+Run[1],True) + for iFile in FileTarget: + samples['DATA']['name'].append(iFile) + samples['DATA']['weights'].append(DataTrig[DataSet]) + diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/structure.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/structure.py new file mode 100644 index 00000000..44147676 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/structure.py @@ -0,0 +1,113 @@ +# structure configuration for datacard +# keys here must match keys in samples.py +structure = {} + +# Backgrounds +structure['Fake_e'] = { + 'isSignal' : 0, + 'isData' : 0, + } + +structure['Fake_m'] = { + 'isSignal' : 0, + 'isData' : 0, + } + +structure['DY'] = { + 'isSignal' : 0, + 'isData' : 0, + 'removeFromCuts' : ['zh3l_SR_1j'] + } + +structure['Zg'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['ZgS'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['WW'] = { + 'isSignal' : 0, + 'isData' : 0, + 'removeFromCuts' : ['zh3l_SR_1j','zh3l_SR_2j'] + } + +#structure['WWewk'] = { +# 'isSignal' : 0, +# 'isData' : 0 +# } + +#structure['ggWW'] = { +# 'isSignal' : 0, +# 'isData' : 0 +# } + +structure['WZ'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['VVV'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['ZZ'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['ttV'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['top'] = { + 'isSignal' : 0, + 'isData' : 0, + 'removeFromCuts' : ['zh3l_WZ_CR_1j','zh3l_SR_1j'] + } + +# Signal +structure['WH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ZH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ggZH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ttH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['WH_htt'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ZH_htt'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +# Data +structure['DATA'] = { + 'isSignal' : 0, + 'isData' : 1 + } + + + + diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/valid_data_samples.txt b/ZH3l_BDT/zh3l_run2_bdt_rdf/valid_data_samples.txt new file mode 100644 index 00000000..dfc044df --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/valid_data_samples.txt @@ -0,0 +1 @@ +/eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2_manualHADD2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_4.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_5.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_6.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_7.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_8.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_9.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_10.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_11.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_12.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_13.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_14.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_15.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_16.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_17.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_18.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_19.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_20.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_21.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_22.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_23.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_24.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_25.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_26.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_27.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_28.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_29.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_31.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_32.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_33.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_34.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_35.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_36.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_37.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_38.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_39.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_40.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_41.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_42.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_43.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_44.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_45.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_46.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_47.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_48.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_50.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_51.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_52.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_53.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_54.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_55.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_56.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_57.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_58.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_59.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_60.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_61.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_62.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_63.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_64.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_65.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_66.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_67.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_68.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_70.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_71.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_72.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_74.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_75.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_76.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_77.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_78.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_79.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_80.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_81.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_82.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_83.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_84.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_85.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_86.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_87.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_88.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_89.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_90.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_91.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_4.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_5.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_6.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_7.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_8.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_9.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_10.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_11.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_12.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_13.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_14.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_15.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_16.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_17.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_18.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_19.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_21.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_22.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_23.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_24.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_25.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_26.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_27.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_28.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_29.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_30.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_31.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_32.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_33.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_4.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_5.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_6.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_7.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_8.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_9.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_10.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_11.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_12.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_13.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_14.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_15.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_16.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WW_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__Zg_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__Zg_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__Zg_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZgS_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZgS_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZgS_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZgS_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_4.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_5.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_6.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_7.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_8.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_9.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_10.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_11.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_12.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_13.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_15.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_16.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_17.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_4.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_5.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_6.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_7.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_8.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_9.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_10.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_11.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_12.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_13.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_14.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_15.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_16.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_17.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_18.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_19.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_20.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_21.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_22.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_23.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_24.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_25.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_26.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_27.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_28.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_29.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_30.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_31.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_32.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_33.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_34.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_35.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_36.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_37.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_38.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_39.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_40.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_41.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_42.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_43.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_44.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_45.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_46.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_47.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_48.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_49.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_50.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_51.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_52.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_53.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_54.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_55.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_56.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_57.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_58.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_59.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_60.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_61.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_62.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_63.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_64.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_65.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_66.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_67.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_68.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_69.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_70.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_71.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_72.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_74.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_75.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_76.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_77.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_78.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_80.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_81.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_82.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_83.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_84.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_85.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_86.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_87.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_88.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_89.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_90.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZH_hww_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ggZH_hww_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WH_hww_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttH_hww_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZH_htt_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__Fake_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__Fake_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DATA_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DATA_1.root \ No newline at end of file diff --git a/ZH3l_BDT/zh3l_run2_bdt_rdf/variables.py b/ZH3l_BDT/zh3l_run2_bdt_rdf/variables.py new file mode 100644 index 00000000..0ec4ca8a --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_bdt_rdf/variables.py @@ -0,0 +1,129 @@ +# variables + +variables = {} + +#'fold' : # 0 = not fold (default), 1 = fold underflowbin, 2 = fold overflow bin, 3 = fold underflow and overflow +# The following is needed if combining plots between years: +#'doWeight' : 1 +#'binX' : 1 +#'binY' : <# of bins> + +variables['events'] = { 'name': '1', + 'range' : (1,0,2), + 'xaxis' : 'events', + 'fold' : 3 + } + +variables['njet'] = { 'name' : 'ZH3l_njet', + 'range' : (10,0,10), + 'xaxis' : 'N_{jet}', + 'fold' : 0 + } + +variables['met'] = { 'name' : 'PuppiMET_pt', + 'range' : (20,0,100), + 'xaxis' : 'Puppi MET', + 'fold' : 0 + } + +variables['pt1'] = { 'name': 'Lepton_pt[0]', + 'range' : (10,0.,200), + 'xaxis' : 'lept1_p_{T} [GeV]', + 'fold' : 0 + } + +variables['dphilmetj'] = { 'name' : 'ZH3l_dphilmetj', #FIXME + 'range' : (16,0,3.14159), + 'xaxis' : 'dphilmetj', + 'fold' : 0 + } + +variables['dphilmetjj'] = { 'name' : 'ZH3l_dphilmetjj', #FIXME + 'range' : (16,0,3.14159), + 'xaxis' : 'dphilmetjj', + 'fold' : 0, + 'cuts' : [cut for cut in cuts if '1j' not in cut] + } + +variables['pTlmetj'] = { 'name' : 'ZH3l_pTlmetj', + 'range' : (20,0,400), + 'xaxis' : 'pTlmetj', + 'fold' : 0 + } + +variables['pTlmetjj'] = { 'name' : 'ZH3l_pTlmetjj', + 'range' : (20,0,400), + 'xaxis' : 'pTlmetjj', + 'fold' : 0, + 'cuts' : [cut for cut in cuts if '1j' not in cut] + } + +variables['mTlmetjj'] = { 'name' : 'ZH3l_mTlmetjj', #FIXME + 'range' : (16,50,450), + 'xaxis' : 'mTlmetjj', + 'fold' : 3, + 'cuts' : [cut for cut in cuts if '1j' not in cut] + } + +variables['mTlmetj'] = { 'name' : 'ZH3l_mTlmetj', #FIXME + 'range' : (10,0,250), + 'xaxis' : 'mTlmetj', + 'fold' : 2, + } + +variables['ptz'] = { 'name' : 'ZH3l_pTZ', + 'range' : (20,0,400), + 'xaxis' : 'ptz', + 'fold' : 0 + } + +variables['mtw_notZ'] = { 'name' : 'ZH3l_mTlmet', #FIXME + 'range' : (20,0,200), + 'xaxis' : 'mTlmet', + 'fold' : 0 + } + +variables['mtw_fit'] = { 'name' : 'ZH3l_mTlmet', #FIXME + 'range' : (8,0,160), + 'xaxis' : 'mTlmet', + 'fold' : 2 + } + +variables['checkmZ'] = { 'name' : 'ZH3l_checkmZ', + 'range' : (20,0,200), + 'xaxis' : 'checkmZ', + 'fold' : 0 + } + +variables['ptjet0'] = { 'name' : 'CleanJet_pt[0]', + 'range' : (20,0,200), + 'xaxis' : 'Leading jet p_{T}', + 'fold' : 0 + } + +variables['ptjet1'] = { 'name' : 'CleanJet_pt[1]', + 'range' : (20,0,200), + 'xaxis' : 'Subleading jet p_{T}', + 'cuts' : [cut for cut in cuts if '1j' not in cut], + 'fold' : 0 + } + +variables['WlepId'] = { 'name' : 'ZH3l_pdgid_l', + 'range' : (31,-15.5,15.5), + 'xaxis' : 'W lepton ID', + 'fold' : 0 + } + +variables['flavor3l'] = { 'name' : '0*(abs(Lepton_pdgId[0])==13 && abs(Lepton_pdgId[1])==13 && abs(Lepton_pdgId[2])==13)+ \ + 1*(abs(Lepton_pdgId[0])==13 && abs(Lepton_pdgId[1])==13 && abs(Lepton_pdgId[2])==11)+ \ + 2*(abs(Lepton_pdgId[0])==13 && abs(Lepton_pdgId[1])==11 && abs(Lepton_pdgId[2])==13)+ \ + 3*(abs(Lepton_pdgId[0])==11 && abs(Lepton_pdgId[1])==13 && abs(Lepton_pdgId[2])==13)+ \ + 4*(abs(Lepton_pdgId[0])==13 && abs(Lepton_pdgId[1])==11 && abs(Lepton_pdgId[2])==11)+ \ + 5*(abs(Lepton_pdgId[0])==11 && abs(Lepton_pdgId[1])==13 && abs(Lepton_pdgId[2])==11)+ \ + 6*(abs(Lepton_pdgId[0])==11 && abs(Lepton_pdgId[1])==11 && abs(Lepton_pdgId[2])==13)+ \ + 7*(abs(Lepton_pdgId[0])==11 && abs(Lepton_pdgId[1])==11 && abs(Lepton_pdgId[2])==11)', + 'range' : (8,-0.5,7.5), + 'xaxis' : 'Trilepton flavor', + 'fold' : 0 + } + diff --git a/ZH3l_BDT/zh3l_run2_rdf/aliases.py b/ZH3l_BDT/zh3l_run2_rdf/aliases.py new file mode 100644 index 00000000..0aeb3148 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/aliases.py @@ -0,0 +1,256 @@ +#Aliases (mostly btag) +import os +import copy +import inspect + +mc = [skey for skey in samples if skey not in ('Fake', 'DATA')] + +#2018 +#bWP = '0.1241' #Loose +bWP = '0.4184' +# DS - ideally from https://btv-wiki.docs.cern.ch/ScaleFactors/Run2UL2018/#general-remarks, but seems like this is calculated from another dataset: https://indico.cern.ch/event/826738/contributions/3464989/attachments/1860911/3058344/BTag_190612_PtRel.pdf. +configurations = os.path.realpath(inspect.getfile(inspect.currentframe())) # this file +configurations = os.path.dirname(configurations) # 2022_v12 +configurations = os.path.dirname(configurations) # WZ +configurations = os.path.dirname(configurations) # ControlRegions +configurations = os.path.dirname(configurations) # PlotsConfigurationsRun3 + +aliases = {} +aliases = OrderedDict() + +aliases['bVeto'] = { + 'expr': '(Sum((CleanJet_pt > 20.0 && abs(CleanJet_eta) < 2.5) * Take(Jet_btagDeepB, CleanJet_jetIdx) > ' + bWP + ') == 0)' + +} + +aliases['btagSF'] = { +'expr': 'TMath::Exp(Sum(LogVec((CleanJet_pt>20 && abs(CleanJet_eta)<2.5)*Take(Jet_btagSF_deepcsv_shape, CleanJet_jetIdx)+1*(CleanJet_pt<20 || abs(CleanJet_eta)>2.5))))', +'samples': mc +} + +systs = ['jes','lf','hf','lfstats1','lfstats2','hfstats1','hfstats2','cferr1','cferr2'] +# DS -(?)- jes - Jet Energy Scale uncertainty, lf/hf - light/heavy flavor, stats - statistical uncertainty, cferr - charm flavour + +for s in systs: + aliases['btagSF'+s+'up'] = { + 'expr': aliases['btagSF']['expr'].replace('shape','shape_up_'+s), + 'samples':mc + } + aliases['btagSF'+s+'down'] = { + 'expr': aliases['btagSF']['expr'].replace('shape','shape_down_'+s), + 'samples':mc + } + +aliases['Jet_PUIDSF'] = { + 'expr': 'TMath::Exp(Sum((Jet_jetId >= 2) * LogVec(Jet_PUIDSF_loose)))', + 'samples': mc +} + +aliases['Jet_PUIDSF_up'] = { + 'expr': 'TMath::Exp(Sum((Jet_jetId >= 2) * LogVec(Jet_PUIDSF_loose_up)))', + 'samples': mc +} + +aliases['Jet_PUIDSF_down'] = { + 'expr': 'TMath::Exp(Sum((Jet_jetId >= 2) * LogVec(Jet_PUIDSF_loose_down)))', + 'samples': mc +} + +aliases['PromptGenLepMatch3l'] = { + 'expr': 'Alt(Lepton_promptgenmatched, 0, 0) * Alt(Lepton_promptgenmatched, 1, 0) * Alt(Lepton_promptgenmatched, 2, 0)', + 'samples': mc +} + +#aliases['LepWPCutNew'] = { 'expr': '(((abs(Lepton_pdgId[0])==13 && Muon_mvaTTH[Lepton_muonIdx[0]]>0.8) || (abs(Lepton_pdgId[0])==11 && Electron_mvaTTH[Lepton_electronIdx[0]]>0.7)) && ((abs(Lepton_pdgId[1])==13 && Muon_mvaTTH[Lepton_muonIdx[1]]>0.8) || (abs(Lepton_pdgId[1])==11 && Electron_mvaTTH[Lepton_electronIdx[1]]>0.7)) && ((abs(Lepton_pdgId[2])==13 && Muon_mvaTTH[Lepton_muonIdx[2]]>0.8) || (abs(Lepton_pdgId[2])==11 && Electron_mvaTTH[Lepton_electronIdx[2]]>0.7)))', + # 'samples': mc + ['DATA'] +#} + +aliases['Top_pTrw'] = { + 'expr': '(topGenPt * antitopGenPt > 0.) * (TMath::Sqrt(TMath::Exp(0.0615 - 0.0005 * topGenPt) * TMath::Exp(0.0615 - 0.0005 * antitopGenPt))) + (topGenPt * antitopGenPt <= 0.)', + 'samples': ['top'] +} + +#aliases['ZH3l_dphilmetjj_test'] = { +# 'linesToAdd': [ +# '.L %s/src/PlotsConfigurations/Configurations/ZH3l/scripts/ZH3l_patch.cc+' % os.getenv('CMSSW_BASE') +# ], +# 'class': 'ZH3l_patch', +# 'args': ("dphilmetjj") +#} + +#aliases['ZH3l_dphilmetj_test'] = { +# 'class': 'ZH3l_patch', +# 'args': ("dphilmetj") +#} + +#aliases['ZH3l_mTlmet_test'] = { +# 'class': 'ZH3l_patch', +# 'args': ("mTlmet") +#} + +#aliases['ZH3l_mTlmetj_test'] = { +# 'class': 'ZH3l_patch', +# 'args': ("mTlmetj") +#} + +#aliases['ZH3l_mTlmetjj_test'] = { +# 'class': 'ZH3l_patch', +# 'args': ("mTlmetjj") +#} + +####################### +### SFs for tthMVA ### +####################### + +aliases['SFweightEleUp'] = { + 'expr': 'LepSF3l__ele_'+eleWP_new+'__Up', + 'samples': mc +} + +aliases['SFweightEleDown'] = { + 'expr': 'LepSF3l__ele_'+eleWP_new+'__Do', + 'samples': mc +} + +aliases['SFweightMuUp'] = { + 'expr': 'LepSF3l__mu_'+muWP_new+'__Up', + 'samples': mc +} + +aliases['SFweightMuDown'] = { + 'expr': 'LepSF3l__mu_'+muWP_new+'__Do', + 'samples': mc +} + + +#aliases['ttHMVA_SF_3l'] = { +# 'linesToAdd': ['.L %s/src/PlotsConfigurations/Configurations/patches/compute_SF_BETA.C+' % os.getenv('CMSSW_BASE')], +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'total_SF'), +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Up_0'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_up', 0), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Up_1'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_up', 1), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Up_2'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_up', 2), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Down_0'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_down', 0), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Down_1'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_down', 1), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_SF_Down_2'] = { +# 'class': 'compute_SF', +# 'args' : ('2018', 3, 'single_SF_down', 2), +# 'nominalOnly' : True, +# 'samples': mc +#} + +#aliases['ttHMVA_3l_ele_SF_Up'] = { +# 'expr' : '(ttHMVA_SF_Up_0[0]*(abs(Lepton_pdgId[0]) == 11) + (abs(Lepton_pdgId[0]) == 13)) *\ +# (ttHMVA_SF_Up_1[0]*(abs(Lepton_pdgId[1]) == 11) + (abs(Lepton_pdgId[1]) == 13)) *\ +# (ttHMVA_SF_Up_2[0]*(abs(Lepton_pdgId[2]) == 11) + (abs(Lepton_pdgId[2]) == 13))', +# 'nominalOnly' : True, +# 'samples' : mc +#} + +#aliases['ttHMVA_3l_ele_SF_Down'] = { +# 'expr' : '(ttHMVA_SF_Down_0[0]*(abs(Lepton_pdgId[0]) == 11) + (abs(Lepton_pdgId[0]) == 13)) *\ +# (ttHMVA_SF_Down_1[0]*(abs(Lepton_pdgId[1]) == 11) + (abs(Lepton_pdgId[1]) == 13)) *\ +# (ttHMVA_SF_Down_2[0]*(abs(Lepton_pdgId[2]) == 11) + (abs(Lepton_pdgId[2]) == 13))', +# 'nominalOnly' : True, +# 'samples' : mc +#} + +#aliases['ttHMVA_3l_mu_SF_Up'] = { +# 'expr' : '(ttHMVA_SF_Up_0[0]*(abs(Lepton_pdgId[0]) == 13) + (abs(Lepton_pdgId[0]) == 11)) *\ +# (ttHMVA_SF_Up_1[0]*(abs(Lepton_pdgId[1]) == 13) + (abs(Lepton_pdgId[1]) == 11)) *\ +# (ttHMVA_SF_Up_2[0]*(abs(Lepton_pdgId[2]) == 13) + (abs(Lepton_pdgId[2]) == 11))', +# 'nominalOnly' : True, +# 'samples' : mc +#} + +#aliases['ttHMVA_3l_mu_SF_Down'] = { +# 'expr' : '(ttHMVA_SF_Down_0[0]*(abs(Lepton_pdgId[0]) == 13) + (abs(Lepton_pdgId[0]) == 11)) *\ +# (ttHMVA_SF_Down_1[0]*(abs(Lepton_pdgId[1]) == 13) + (abs(Lepton_pdgId[1]) == 11)) *\ +# (ttHMVA_SF_Down_2[0]*(abs(Lepton_pdgId[2]) == 13) + (abs(Lepton_pdgId[2]) == 11))', +# 'nominalOnly' : True, +# 'samples' : mc +#} + +# In WpWmJJ_EWK events, partons [0] and [1] are always the decay products of the first W +aliases['lhe_mW1'] = { + 'expr': 'TMath::Sqrt(2. * Take(LHEPart_pt, 0) * Take(LHEPart_pt, 1) * (TMath::CosH(Take(LHEPart_eta, 0) - Take(LHEPart_eta, 1)) - TMath::Cos(Take(LHEPart_phi, 0) - Take(LHEPart_phi, 1))))', + 'samples': ['WWewk'] +} + +# and [2] [3] are the second W +aliases['lhe_mW2'] = { + 'expr': 'TMath::Sqrt(2. * Take(LHEPart_pt, 2) * Take(LHEPart_pt, 3) * (TMath::CosH(Take(LHEPart_eta, 2) - Take(LHEPart_eta, 3)) - TMath::Cos(Take(LHEPart_phi, 2) - Take(LHEPart_phi, 3))))', + 'samples': ['WWewk'] +} + +aliases['gstarHigh'] = { + 'expr': 'Gen_ZGstar_mass <0 || Gen_ZGstar_mass > 4', + 'samples': ['WZ'] +} + +# aliases['nCleanGenJet'] = { +# 'linesToAdd': ['.L %s/src/PlotsConfigurations/Configurations/Differential/ngenjet.cc+' % os.getenv('CMSSW_BASE') +# ], +# 'class': 'CountGenJet', +# 'samples': mc +# } + +# configurations = os.path.abspath('.') + '/' +# print(configurations) +# print('\n\n\n') + +# aliases['nCleanGenJet'] = { +# 'linesToAdd': ['.L %sngenjet.cc+' % configurations], +# 'class': 'CountGenJet', +# 'args': 'nLeptonGen, LeptonGen_isPrompt,\ +# LeptonGen_pdgId, LeptonGen_pt, LeptonGen_eta, LeptonGen_phi, \ +# LeptonGen_mass, nPhotonGen, PhotonGen_pt, PhotonGen_eta,PhotonGen_phi, \ +# PhotonGen_mass, nGenJet, GenJet_pt, GenJet_eta, GenJet_phi', +# 'samples': mc +# } + +#puidSFSource = '%s/src/LatinoAnalysis/NanoGardener/python/data/JetPUID_effcyandSF.root' % os.getenv('CMSSW_BASE') +#puidSFSource = '%s/src/PlotsConfigurations/Configurations/patches/PUID_80XTraining_EffSFandUncties.root' % os.getenv('CMSSW_BASE') + +#aliases['PUJetIdSF'] = { +# 'linesToAdd': [ +# 'gSystem->AddIncludePath("-I%s/src");' % os.getenv('CMSSW_BASE'), +# '.L %s/src/PlotsConfigurations/Configurations/patches/pujetidsf_event_new.cc+' % os.getenv('CMSSW_BASE') +# ], +# 'class': 'PUJetIdEventSF', +# 'args': (puidSFSource, '2018', 'loose'), +# 'samples': mc +#} diff --git a/ZH3l_BDT/zh3l_run2_rdf/configuration.py b/ZH3l_BDT/zh3l_run2_rdf/configuration.py new file mode 100644 index 00000000..de531ef0 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/configuration.py @@ -0,0 +1,92 @@ +import sys,os + +# tag used to identify the configuration folder version +tag = 'ZH3l_BDTrun2' + +# file to use as runner script, default uses mkShapesRDF.shapeAnalysis.runner, otherwise specify path to script +runnerFile = "default" + +# output file name +outputFile = "mkShapes__{}.root".format(tag) + +# path to ouput folder +outputFolder = "/eos/user/" + os.getlogin()[0] + "/" + os.getlogin() + "/mkShapesRDF_rootfiles/" + tag + "/rootFile/" + +# path to batch folder (used for condor submission) +batchFolder = "condor" + +# path to configuration folder (will contain all the compiled configuration files) +configsFolder = "configs" + +# luminosity to normalize to (in 1/fb) +# https://github.com/latinos/mkShapesRDF/blob/Run3/mkShapesRDF/processor/data/TrigMaker_cfg.py#L1016 +lumi = 8.174732641 + +# file with TTree aliases +aliasesFile = 'aliases.py' + +# file with list of variables +variablesFile = 'variables.py' + +# file with list of cuts +cutsFile = 'cuts.py' + +# file with list of samples +samplesFile = 'samples.py' + +# file with list of samples +plotFile = 'plot.py' + +# structure file for datacard +structureFile = 'structure.py' + +# nuisances file for mkDatacards and for mkShape +nuisancesFile = 'nuisances.py' + +# path to folder where to save plots +plotPath = "plots_" + tag + +# this lines are executed right before the runner on the condor node +mountEOS = [ + # "export KRB5CCNAME=/home/gpizzati/krb5\n", +] + +# list of imports to import when compiling the whole configuration folder, it should not contain imports used by configuration.py +imports = ["os", "glob", ("collections", "OrderedDict"), "ROOT"] + +# list of files to compile +filesToExec = [ + samplesFile, + aliasesFile, + cutsFile, + variablesFile, + plotFile, + nuisancesFile, + structureFile, +] + +# list of variables to keep in the compiled configuration folder +varsToKeep = [ + "batchVars", + "outputFolder", + "batchFolder", + "configsFolder", + "outputFile", + "runnerFile", + "tag", + "samples", + "aliases", + "variables", + ("cuts", {"cuts": "cuts", "preselections": "preselections"}), + ("plot", {"plot": "plot", "groupPlot": "groupPlot", "legend": "legend"}), + "nuisances", + "structure", + "lumi", +] + +# list of variables to keep in the batch submission script (script.py) +batchVars = varsToKeep[varsToKeep.index("samples") :] + + +varsToKeep += ['plotPath'] + diff --git a/ZH3l_BDT/zh3l_run2_rdf/cuts.py b/ZH3l_BDT/zh3l_run2_rdf/cuts.py new file mode 100644 index 00000000..a620ab34 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/cuts.py @@ -0,0 +1,139 @@ +cuts = {} + +preselections = 'Alt( Lepton_pt, 0, 0)>25 \ + && Alt( Lepton_pt, 1, 0)>20 \ + && Alt( Lepton_pt, 2, 0)>15 \ + && Alt( Lepton_pt, 3, 0)<10 \ + && (WH3l_mOSll[0] < 0 || WH3l_mOSll[0] > 12) \ + && (WH3l_mOSll[1] < 0 || WH3l_mOSll[1] > 12) \ + && (WH3l_mOSll[2] < 0 || WH3l_mOSll[2] > 12) \ + && abs(WH3l_chlll) == 1 \ + ' + +cuts['zmass_cut'] = 'WH3l_ZVeto < 25' + +cuts['jet_cut_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + ' + +cuts['bveto_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + ' + +cuts['z4lveto_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + ' + +cuts['zh3l_SR_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetjj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetjj > 3.14159/2 \ + ' + +cuts['met_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + ' + +cuts['zh3l_SR_2j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetjj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_2j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetjj > 3.14159/2 \ + ' + +cuts['jet_cut_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + ' + +cuts['bveto_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + ' + +cuts['z4lveto_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + ' + +cuts['zh3l_SR_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetj > 3.14159/2 \ + ' + +cuts['met_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + ' + +cuts['zh3l_SR_1j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_1j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetj > 3.14159/2 \ + ' + +''' + #11 = e +# 13 = mu +# 15 = tau +''' diff --git a/ZH3l_BDT/zh3l_run2_rdf/macros/ch_lll.cc b/ZH3l_BDT/zh3l_run2_rdf/macros/ch_lll.cc new file mode 100644 index 00000000..de6ef009 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/macros/ch_lll.cc @@ -0,0 +1,41 @@ +#ifndef whchlll +#define whchlll + + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" + +using namespace ROOT; +using namespace ROOT::VecOps; + +float ch_lll(int nLepton, + RVecF Lepton_pt, + RVecF Lepton_eta, + RVecF Lepton_phi, + RVecF Lepton_pdgId){ + + + // Create default value + float ch_lll = -9999.0; + + // Check that we have at least 3 good leptons + if (nLepton < 3) return ch_lll; + + bool WH3l_ok = abs( abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2] ) <= 1; + if (!WH3l_ok) return ch_lll; + + ch_lll = abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2]; + + return ch_lll; +} + + +#endif diff --git a/ZH3l_BDT/zh3l_run2_rdf/macros/flag_ossf.cc b/ZH3l_BDT/zh3l_run2_rdf/macros/flag_ossf.cc new file mode 100644 index 00000000..a46acea2 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/macros/flag_ossf.cc @@ -0,0 +1,57 @@ +#ifndef flagossf +#define flagossf + + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" + +using namespace ROOT; +using namespace ROOT::VecOps; + +bool flag_ossf(int nLepton, + RVecF Lepton_pt, + RVecF Lepton_eta, + RVecF Lepton_phi, + RVecF Lepton_pdgId){ + + // Create default value + bool flag_OSSF = false; + + // Check that we have at least 3 good leptons + if (nLepton < 3) return flag_OSSF; + + bool WH3l_ok = abs( abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2] ) <= 1; + if (!WH3l_ok) return flag_OSSF; + + // Initialize leptons 4-vectors + std::vector leptons_vector = { + ROOT::Math::PtEtaPhiMVector(Lepton_pt[0],Lepton_eta[0],Lepton_phi[0],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[1],Lepton_eta[1],Lepton_phi[1],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[2],Lepton_eta[2],Lepton_phi[2],0) + }; + + // Compute minimum difference |mll - mZ| + float minmllDiffToZ = 9999.0; + for (uint i = 0; i < 3; i++){ + for (uint j = i+1; j < 3; j++){ + if ( Lepton_pdgId[i] + Lepton_pdgId[j] != 0 ) continue; + float mllDiffToZ = abs( (leptons_vector[i] + leptons_vector[j]).M() - 91.1876 ); + if ( mllDiffToZ < minmllDiffToZ ) minmllDiffToZ = mllDiffToZ; + } + } + + if (minmllDiffToZ != 9999.0) flag_OSSF = true; + + return flag_OSSF; +} + + +#endif diff --git a/ZH3l_BDT/zh3l_run2_rdf/macros/mOS_ll.cc b/ZH3l_BDT/zh3l_run2_rdf/macros/mOS_ll.cc new file mode 100644 index 00000000..1ec3c638 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/macros/mOS_ll.cc @@ -0,0 +1,54 @@ +#ifndef mosll +#define mosll + + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" + +using namespace ROOT; +using namespace ROOT::VecOps; + +ROOT::RVecF mOS_ll(int nLepton, + RVecF Lepton_pt, + RVecF Lepton_eta, + RVecF Lepton_phi, + RVecF Lepton_pdgId){ + + // Create default value + ROOT::RVecF mOSll_vector = {-9999.0, -9999.0, -9999.0}; + + // Check that we have at least 3 good leptons + if (nLepton < 3) return mOSll_vector; + + bool WH3l_ok = abs( abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2] ) <= 1; + if (!WH3l_ok) return mOSll_vector; + + // Initialize leptons 4-vectors + std::vector leptons_vector = { + ROOT::Math::PtEtaPhiMVector(Lepton_pt[0],Lepton_eta[0],Lepton_phi[0],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[1],Lepton_eta[1],Lepton_phi[1],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[2],Lepton_eta[2],Lepton_phi[2],0) + }; + + // Loop over the three leptons + for (uint i = 0; i < 3; i++){ + for (uint j = i+1; j < 3; j++){ + if (Lepton_pdgId[i]*Lepton_pdgId[j] < 0) + mOSll_vector.push_back( (leptons_vector[i]+leptons_vector[j]).M() ); + else + mOSll_vector.push_back(-9999.0); + } + } + return mOSll_vector; +} + + +#endif diff --git a/ZH3l_BDT/zh3l_run2_rdf/macros/m_lll.cc b/ZH3l_BDT/zh3l_run2_rdf/macros/m_lll.cc new file mode 100644 index 00000000..46ce151b --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/macros/m_lll.cc @@ -0,0 +1,49 @@ +#ifndef whmlll +#define whmlll + + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" + +using namespace ROOT; +using namespace ROOT::VecOps; + +float m_lll(int nLepton, + RVecF Lepton_pt, + RVecF Lepton_eta, + RVecF Lepton_phi, + RVecF Lepton_pdgId){ + + + // Create default value + float mlll = -9999.0; + + // Check that we have at least 3 good leptons + if (nLepton < 3) return mlll; + + bool WH3l_ok = abs( abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2] ) <= 1; + if (!WH3l_ok) return mlll; + + // Initialize leptons 4-vectors + std::vector leptons_vector = { + ROOT::Math::PtEtaPhiMVector(Lepton_pt[0],Lepton_eta[0],Lepton_phi[0],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[1],Lepton_eta[1],Lepton_phi[1],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[2],Lepton_eta[2],Lepton_phi[2],0) + }; + + mlll = (leptons_vector[0] + leptons_vector[1] + leptons_vector[2]).M(); + + return mlll; + +} + + +#endif diff --git a/ZH3l_BDT/zh3l_run2_rdf/macros/z_veto.cc b/ZH3l_BDT/zh3l_run2_rdf/macros/z_veto.cc new file mode 100644 index 00000000..a527c0c4 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/macros/z_veto.cc @@ -0,0 +1,56 @@ +#ifndef zveto +#define zveto + + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" +#include "TLorentzVector.h" + +using namespace ROOT; +using namespace ROOT::VecOps; + +float z_veto(int nLepton, + RVecF Lepton_pt, + RVecF Lepton_eta, + RVecF Lepton_phi, + RVecF Lepton_pdgId){ + + + // Create default value + float z_veto = 9999.0; + + // Check that we have at least 3 good leptons + if (nLepton < 3) return z_veto; + + bool WH3l_ok = abs( abs(Lepton_pdgId[0])/Lepton_pdgId[0] + abs(Lepton_pdgId[1])/Lepton_pdgId[1] + abs(Lepton_pdgId[2])/Lepton_pdgId[2] ) <= 1; + if (!WH3l_ok) return z_veto; + + // Initialize leptons 4-vectors + std::vector leptons_vector = { + ROOT::Math::PtEtaPhiMVector(Lepton_pt[0],Lepton_eta[0],Lepton_phi[0],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[1],Lepton_eta[1],Lepton_phi[1],0), + ROOT::Math::PtEtaPhiMVector(Lepton_pt[2],Lepton_eta[2],Lepton_phi[2],0) + }; + + // Compute minimum difference |mll - mZ| + float minmllDiffToZ = 9999.0; + for (uint i = 0; i < 3; i++){ + for (uint j = i+1; j < 3; j++){ + if ( Lepton_pdgId[i] + Lepton_pdgId[j] != 0 ) continue; + float mllDiffToZ = abs( (leptons_vector[i] + leptons_vector[j]).M() - 91.1876 ); + if ( mllDiffToZ < minmllDiffToZ ) minmllDiffToZ = mllDiffToZ; + } + } + + return minmllDiffToZ; +} + + +#endif diff --git a/ZH3l_BDT/zh3l_run2_rdf/ngenjet.cc b/ZH3l_BDT/zh3l_run2_rdf/ngenjet.cc new file mode 100644 index 00000000..99893569 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/ngenjet.cc @@ -0,0 +1,121 @@ + +#ifndef ngenjet +#define ngenjet + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" + +using namespace ROOT; +using namespace ROOT::VecOps; +double CountGenJet( + int nLeptonGen, + RVecB LeptonGen_isPrompt, + RVecI LeptonGen_pdgId, + RVecF LeptonGen_pt, + RVecF LeptonGen_eta, + RVecF LeptonGen_phi, + RVecF LeptonGen_mass, + int nPhotonGen, + RVecF PhotonGen_pt, + RVecF PhotonGen_eta, + RVecF PhotonGen_phi, + RVecF PhotonGen_mass, + int nGenJet, + RVecF GenJet_pt, + RVecF GenJet_eta, + RVecF GenJet_phi + ){ + unsigned nJ = nGenJet; + + unsigned nL = nLeptonGen; + + std::vector iPromptL{}; + iPromptL.reserve(nL); + + for (unsigned iL{0}; iL != nL; ++iL) { + if (!LeptonGen_isPrompt[iL]) + continue; + + unsigned absId{static_cast(std::abs(LeptonGen_pdgId[iL]))}; + if (absId != 11 && absId != 13) + continue; + + iPromptL.push_back(iL); + } + + if (iPromptL.size() == 0) { + unsigned n{0}; + for (unsigned iJ{0}; iJ != nJ; ++iJ) { + if (GenJet_pt[iJ] > 30.) + ++n; + } + return n; + } + + std::vector dressedLeptons{}; + for (unsigned iL : iPromptL) { + dressedLeptons.push_back( + ROOT::Math::PtEtaPhiMVector( + LeptonGen_pt[iL], + LeptonGen_eta[iL], + LeptonGen_phi[iL], + LeptonGen_mass[iL] + ) + ); + } + + unsigned nP = nPhotonGen; + + for (unsigned iP{0}; iP != nP; ++iP) { + double minDR2{1000.}; + int iDMin{-1}; + for (unsigned iD{0}; iD != iPromptL.size(); ++iD) { + unsigned iL{iPromptL[iD]}; + double dEta{LeptonGen_eta[iL] - PhotonGen_eta[iP]}; + double dPhi{TVector2::Phi_mpi_pi(LeptonGen_phi[iL] - PhotonGen_phi[iP])}; + double dR2{dEta * dEta + dPhi * dPhi}; + if (dR2 < minDR2) { + minDR2 = dR2; + iDMin = iD; + } + } + + if (minDR2 < 0.09) + dressedLeptons[iDMin] += ROOT::Math::PtEtaPhiMVector( + PhotonGen_pt[iP], + PhotonGen_eta[iP], + PhotonGen_phi[iP], + PhotonGen_mass[iP]); + } + + unsigned n{0}; + for (unsigned iJ{0}; iJ != nJ; ++iJ) { + if (GenJet_pt[iJ] < 30.) + continue; + + bool overlap{false}; + for (auto& p4 : dressedLeptons) { + if (p4.pt() < 10.) + continue; + + double dEta{p4.eta() - GenJet_eta[iJ]}; + double dPhi{TVector2::Phi_mpi_pi(p4.phi() - GenJet_phi[iJ])}; + if (dEta * dEta + dPhi * dPhi < 0.016) { + overlap = true; + break; + } + } + if (!overlap) + ++n; + } + return n; +} + +#endif \ No newline at end of file diff --git a/ZH3l_BDT/zh3l_run2_rdf/nuisances.py b/ZH3l_BDT/zh3l_run2_rdf/nuisances.py new file mode 100644 index 00000000..2dc2d2b2 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/nuisances.py @@ -0,0 +1,580 @@ +# nuisances +# name of samples here must match keys in samples.py + +try: + mc = [skey for skey in samples if skey != 'DATA' and not skey.startswith('Fake')] +except NameError: + mc = [] + +try: + fitcuts = [cut for cut in cuts if 'SR' in cut or 'CR' in cut] +except NameError: + fitcuts = [] + +nuisances = {} + +#### Luminosity + +nuisances['lumi_Uncorrelated'] = { + 'name': 'lumi_13TeV_2018', + 'type': 'lnN', + 'samples': dict((skey, '1.015') for skey in mc if skey not in ['WZ']), + 'cuts' : fitcuts +} + +nuisances['lumi_XYFact'] = { + 'name': 'lumi_13TeV_XYFact', + 'type': 'lnN', + 'samples': dict((skey, '1.02') for skey in mc if skey not in ['WZ']), + 'cuts' : fitcuts +} + +nuisances['lumi_LScale'] = { + 'name': 'lumi_13TeV_LSCale', + 'type': 'lnN', + 'samples': dict((skey, '1.002') for skey in mc if skey not in ['WZ']), + 'cuts' : fitcuts +} + +nuisances['lumi_CurrCalib'] = { + 'name': 'lumi_13TeV_CurrCalib', + 'type': 'lnN', + 'samples': dict((skey, '1.002') for skey in mc if skey not in ['WZ']), + 'cuts' : fitcuts +} + +#### Theoretical Systematics +# Commenting out as mkShapesRDF does not have LatinoAnalysis tools' HiggsXSection module (DS, 03Nov25). +# Scale +# from LatinoAnalysis.Tools.HiggsXSection import * +# HiggsXS = HiggsXSection() + +# nuisances['QCDscale_VH'] = { +# 'name' : 'QCDscale_VH', +# 'samples' : { +# 'WH_hww' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','WH','125.09','scale','sm'), +# 'ZH_hww' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ZH','125.09','scale','sm'), +# 'WH_htt' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','WH','125.09','scale','sm'), +# 'ZH_htt' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ZH','125.09','scale','sm') +# }, +# 'type' : 'lnN', +# 'cuts' : fitcuts +# } + +# nuisances['QCDscale_ggZH'] = { +# 'name' : 'QCDscale_ggZH', +# 'samples' : { +# 'ggZH_hww': HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ggZH','125.09','scale','sm'), +# }, +# 'type' : 'lnN', +# 'cuts' : fitcuts +# } + +# nuisances['QCDscale_ttH'] = { +# 'name' : 'QCDscale_ttH', +# 'samples' : { +# 'ttH_hww': HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ttH','125.09','scale','sm'), +# }, +# 'type' : 'lnN', +# 'cuts' : fitcuts +# } + +variations = ['Alt(LHEScaleWeight, 0, 1)', 'Alt(LHEScaleWeight, 1, 1)', 'Alt(LHEScaleWeight, 3, 1)', 'Alt(LHEScaleWeight, 5, 1)', 'Alt(LHEScaleWeight, 7, 1)', 'Alt(LHEScaleWeight, 8, 1)'] +# variations = ['Alt$(LHEScaleWeight[0],1)', 'Alt$(LHEScaleWeight[1],1)', 'Alt$(LHEScaleWeight[3],1)', 'Alt$(LHEScaleWeight[5],1)', 'Alt$(LHEScaleWeight[7],1)', 'Alt$(LHEScaleWeight[8],1)'] + +# Commenting out as mkShapesRDF could not find variations for the QCDscale nuisances (DS, 05Nov25). +# nuisances['QCDscale_V'] = { +# 'name': 'QCDscale_V', +# 'kind': 'weight_envelope', +# 'type': 'shape', +# 'samples': {'DY': variations}, +# 'AsLnN': '1', +# 'cuts' : fitcuts +# } + +# nuisances['QCDscale_VV'] = { +# 'name': 'QCDscale_VV', +# 'kind': 'weight_envelope', +# 'type': 'shape', +# 'samples': { +# 'WW' : variations, +# 'Zg' : variations, +# 'ZgS' : variations, +# 'WZ' : variations, +# 'ZZ' : variations +# }, +# 'cuts' : fitcuts +# } + +nuisances['QCDscale_WWewk'] = { + 'name': 'QCDscale_WWewk', + 'samples': { + 'WWewk': '1.11', + }, + 'type': 'lnN', + 'cuts' : fitcuts +} + + +# ggww and interference +nuisances['QCDscale_ggVV'] = { + 'name': 'QCDscale_ggVV', + 'type': 'lnN', + 'samples': { + 'ggWW': '1.15', + }, + 'cuts' : fitcuts +} + +nuisances['QCDscale_qqbar_ACCEPT'] = { + 'name' : 'QCDscale_qqbar_ACCEPT', + 'type' : 'lnN', + 'samples' : { + 'WH_hww' : '1.010', + 'ZH_hww' : '1.015', + 'WH_htt' : '1.010', + 'ZH_htt' : '1.015', + }, + 'cuts' : fitcuts +} + +nuisances['QCDscale_gg_ACCEPT'] = { + 'name' : 'QCDscale_gg_ACCEPT', + 'samples' : { + 'ggZH_hww': '1.012', + 'ggWW' : '1.012' + }, + 'type' : 'lnN', + 'cuts' : fitcuts +} + +# Commenting out as mkShapesRDF does not have LatinoAnalysis tools' HiggsXSection module (DS, 03Nov25). +# pdf +# nuisances['pdf_Higgs_gg'] = { +# 'name' : 'pdf_Higgs_gg', +# 'samples' : { +# 'ggZH_hww': HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ggZH','125.09','pdf','sm'), +# }, +# 'type' : 'lnN', +# 'cuts' : fitcuts +# } + +# nuisances['pdf_Higgs_qqbar'] = { +# 'name' : 'pdf_Higgs_qqbar', +# 'type' : 'lnN', +# 'samples' : { +# 'WH_hww' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','WH' ,'125.09','pdf','sm'), +# 'ZH_hww' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ZH' ,'125.09','pdf','sm'), +# 'WH_htt' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','WH' ,'125.09','pdf','sm'), +# 'ZH_htt' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ZH' ,'125.09','pdf','sm'), +# }, +# 'cuts' : fitcuts +# } + +# nuisances['pdf_Higgs_ttH'] = { +# 'name': 'pdf_Higgs_ttH', +# 'type': 'lnN', +# 'samples': { +# 'ttH_hww' : HiggsXS.GetHiggsProdXSNP('YR4','13TeV','ttH','125.09','pdf','sm') +# }, +# 'cuts' : fitcuts +# } + +nuisances['pdf_qqbar'] = { + 'name': 'pdf_qqbar', + 'type': 'lnN', + 'samples': { + 'Zg': '1.04', + 'ZgS': '1.04', + 'WZ': '1.04', + 'ZZ': '1.04', + }, + 'cuts' : fitcuts + } + +nuisances['pdf_Higgs_gg_ACCEPT'] = { + 'name' : 'pdf_Higgs_gg_ACCEPT', + 'samples' : { + 'ggZH_hww': '1.006', + }, + 'type' : 'lnN', + 'cuts' : fitcuts + } + +nuisances['pdf_Higgs_qqbar_ACCEPT'] = { + 'name' : 'pdf_Higgs_qqbar_ACCEPT', + 'type' : 'lnN', + 'samples' : { + 'WH_hww' : '1.003', + 'ZH_hww' : '1.002', + 'WH_htt' : '1.003', + 'ZH_htt' : '1.002', + }, + 'cuts' : fitcuts + } + +nuisances['pdf_qqbar_ACCEPT'] = { + 'name' : 'pdf_qqbar_ACCEPT', + 'type' : 'lnN', + 'samples' : { + 'WZ' : '1.001', + 'ZZ' : '1.001', + }, + 'cuts' : fitcuts + } + +nuisances['pdf_gg_ACCEPT'] = { + 'name': 'pdf_gg_ACCEPT', + 'samples': { + 'ggWW': '1.006', + }, + 'type': 'lnN', + 'cuts' : fitcuts +} + +# nuisances['PS_ISR'] = { +# 'name': 'PS_ISR', +# 'kind': 'weight', +# 'type': 'shape', +# 'samples': { +# 'ggZH_hww': ['1.066107*(nCleanGenJet==0) + 1.047857*(nCleanGenJet==1) + 1.030005*(nCleanGenJet==2) + 1.005028*(nCleanGenJet>=3)', '0.921874*(nCleanGenJet==0) + 0.941939*(nCleanGenJet==1) + 0.962282*(nCleanGenJet==2) + 0.991580*(nCleanGenJet>=3)'], +# 'ZH_hww': ['1.000684*(nCleanGenJet==0) + 1.000924*(nCleanGenJet==1) + 1.001683*(nCleanGenJet==2) + 1.002104*(nCleanGenJet>=3)', '0.999150*(nCleanGenJet==0) + 0.998821*(nCleanGenJet==1) + 0.997859*(nCleanGenJet==2) + 0.997316*(nCleanGenJet>=3)'], +# 'WZ': ['1.002552*(nCleanGenJet==0) + 1.010286*(nCleanGenJet==1) + 1.014420*(nCleanGenJet==2) + 1.006226*(nCleanGenJet>=3)', '0.996802*(nCleanGenJet==0) + 0.987227*(nCleanGenJet==1) + 0.982005*(nCleanGenJet==2) + 0.992030*(nCleanGenJet>=3)'], +# 'ZZ': ['1.003210*(nCleanGenJet==0) + 1.005480*(nCleanGenJet==1) + 1.004674*(nCleanGenJet==2) + 0.987845*(nCleanGenJet>=3)', '0.995997*(nCleanGenJet==0) + 0.993056*(nCleanGenJet==1) + 0.993659*(nCleanGenJet==2) + 1.014695*(nCleanGenJet>=3)'], +# }, +# 'cuts' : fitcuts +# } + +# nuisances['PS_FSR'] = { +# 'name': 'PS_FSR', +# 'kind': 'weight', +# 'type': 'shape', +# 'samples': { +# 'ggZH_hww': ['0.987316*(nCleanGenJet==0) + 0.986764*(nCleanGenJet==1) + 0.996498*(nCleanGenJet==2) + 1.004161*(nCleanGenJet>=3)', '1.019871*(nCleanGenJet==0) + 1.013853*(nCleanGenJet==1) + 1.005229*(nCleanGenJet==2) + 0.998573*(nCleanGenJet>=3)'], +# 'ZH_hww': ['0.992867*(nCleanGenJet==0) + 0.992845*(nCleanGenJet==1) + 0.999470*(nCleanGenJet==2) + 1.007245*(nCleanGenJet>=3)', '1.012465*(nCleanGenJet==0) + 1.012743*(nCleanGenJet==1) + 1.003215*(nCleanGenJet==2) + 0.991286*(nCleanGenJet>=3)'], +# 'WZ': ['0.992987*(nCleanGenJet==0) + 0.993725*(nCleanGenJet==1) + 1.000617*(nCleanGenJet==2) + 1.010869*(nCleanGenJet>=3)', '1.011267*(nCleanGenJet==0) + 1.010097*(nCleanGenJet==1) + 0.999445*(nCleanGenJet==2) + 0.983609*(nCleanGenJet>=3)'], +# 'ZZ': ['0.997245*(nCleanGenJet==0) + 0.998689*(nCleanGenJet==1) + 1.004475*(nCleanGenJet==2) + 1.011440*(nCleanGenJet>=3)', '1.004482*(nCleanGenJet==0) + 1.002081*(nCleanGenJet==1) + 0.992617*(nCleanGenJet==2) + 0.981314*(nCleanGenJet>=3)'], +# }, +# 'cuts' : fitcuts +# } + +nuisances['PU'] = { + 'name': 'CMS_PU_2018', + 'kind': 'weight', + 'type': 'shape', + 'samples': { + 'WZ': ['1.00103945*(puWeightUp/puWeight)', '0.99898154*(puWeightDown/puWeight)'], + 'ZZ': ['1.00125684*(puWeightUp/puWeight)', '0.99878368*(puWeightDown/puWeight)'], + 'ZH_hww': ['1.00129463*(puWeightUp/puWeight)', '0.99847264*(puWeightDown/puWeight)'], + 'ggZH_hww': ['0.99973837*(puWeightUp/puWeight)', '1.0001526*(puWeightDown/puWeight)'], + }, + 'AsLnN': '1', + 'cuts' : fitcuts +} + +### PU ID SF uncertainty +puid_syst = ['Jet_PUIDSF_up/Jet_PUIDSF', 'Jet_PUIDSF_down/Jet_PUIDSF'] + +nuisances['jetPUID'] = { + 'name': 'CMS_PUID_2018', + 'kind': 'weight', + 'type': 'shape', + 'samples': dict((skey, puid_syst) for skey in mc) +} + +nuisances['UE_whss'] = { + 'name' : 'UE_whss', + 'skipCMS' : 1, + 'type' : 'lnN', + 'samples' : { + 'WH_hww' : '1.015', + 'ZH_hww' : '1.015', + 'ggZH_hww' : '1.015', + 'WH_htt' : '1.015', + 'ZH_htt' : '1.015', + }, + 'cuts' : fitcuts +} + +nuisances['WZ3l2jnorm'] = { + 'name' : 'CMS_hww_WZ3l2jnorm', + 'samples' : { + 'WZ' : '1.00', + }, + 'type' : 'rateParam', + 'cuts' : [ + 'zh3l_WZ_CR_2j', + 'zh3l_WZ_CR_2j_met', + 'zh3l_SR_2j', + 'zh3l_SR_2j_met', + ] + } + +nuisances['WZ3l1jnorm'] = { + 'name' : 'CMS_hww_WZ3l1jnorm', + 'samples' : { + 'WZ' : '1.00', + }, + 'type' : 'rateParam', + 'cuts' : [ + 'zh3l_WZ_CR_1j', + 'zh3l_WZ_CR_1j_met', + 'zh3l_SR_1j', + 'zh3l_SR_1j_met', + ] + } + +#### Top + +apply_on = { + 'top': [ + '(topGenPt * antitopGenPt <= 0.) * 1.0816 + (topGenPt * antitopGenPt > 0.)', + '(topGenPt * antitopGenPt <= 0.) * 0.9184 + (topGenPt * antitopGenPt > 0.)' + ] +} + +nuisances['singleTopToTTbar'] = { + 'name': 'singleTopToTTbar', + 'skipCMS': 1, + 'kind': 'weight', + 'type': 'shape', + 'samples': apply_on, + 'cuts' : fitcuts +} + +nuisances['TopPtRew'] = { + 'name': 'CMS_topPtRew', # Theory uncertainty + 'kind': 'weight', + 'type': 'shape', + 'samples': {'top': ["1.", "1./Top_pTrw"]}, + 'symmetrize': True, + 'cuts' : fitcuts +} + +#### FAKES + +fakeW_EleUp = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lElUp / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_EleDown = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lElDown / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_MuUp = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lMuUp / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_MuDown = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lMuDown / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_statEleUp = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lstatElUp / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_statEleDown = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lstatElDown / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_statMuUp = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lstatMuUp / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' +fakeW_statMuDown = '( fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3lstatMuDown / fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_3l )' + +nuisances['fake_syst_e'] = { + 'name' : 'CMS_fake_syst_e', + 'type' : 'lnN', + 'samples' : { + 'Fake_e' : '1.30', + }, + 'cuts' : fitcuts +} + +nuisances['fake_syst_m'] = { + 'name' : 'CMS_fake_syst_m', + 'type' : 'lnN', + 'samples' : { + 'Fake_m' : '1.30', + }, + 'cuts' : fitcuts +} + +nuisances['fake_ele'] = { + 'name' : 'CMS_fake_e_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : { + 'Fake' : [ fakeW_EleUp , fakeW_EleDown ], + }, + 'cuts' : fitcuts +} + +nuisances['fake_ele_stat'] = { + 'name' : 'CMS_fake_stat_e_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : { + 'Fake' : [ fakeW_statEleUp , fakeW_statEleDown ], + }, + 'cuts' : fitcuts +} + +nuisances['fake_mu'] = { + 'name' : 'CMS_fake_m_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : { + 'Fake' : [ fakeW_MuUp , fakeW_MuDown ], + }, + 'cuts' : fitcuts +} + +nuisances['fake_mu_stat'] = { + 'name' : 'CMS_fake_stat_m_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : { + 'Fake' : [ fakeW_statMuUp , fakeW_statMuDown ], + }, + 'cuts' : fitcuts +} + +###### B-tagger + +for shift in ['jes', 'lf', 'hf', 'hfstats1', 'hfstats2', 'lfstats1', 'lfstats2', 'cferr1', 'cferr2']: + btag_syst = ['(btagSF%sup)/(btagSF)' % shift, '(btagSF%sdown)/(btagSF)' % shift] + + name = 'CMS_btag_%s' % shift + if 'stats' in shift: + name += '_2018' + + nuisances['btag_shape_%s' % shift] = { + 'name': name, + 'kind': 'weight', + 'type': 'shape', + 'samples': dict((skey, btag_syst) for skey in mc), + 'cuts' : fitcuts + } + +#### Trigger Efficiency + +trig_syst = ['((TriggerEffWeight_3l_u)/(TriggerEffWeight_3l))*(TriggerEffWeight_3l>0.02) + (TriggerEffWeight_3l<=0.02)', '(TriggerEffWeight_3l_d)/(TriggerEffWeight_3l)'] + +nuisances['trigg'] = { + 'name' : 'CMS_eff_hwwtrigger_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : dict((skey, trig_syst) for skey in mc), + 'cuts' : fitcuts +} + +##### Electron Efficiency and energy scale + +id_syst_ele = [ 'SFweightEleUp', 'SFweightEleDown'] + +nuisances['eff_e'] = { + 'name' : 'CMS_eff_e_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : dict((skey, id_syst_ele) for skey in mc), + 'cuts' : fitcuts +} + +nuisances['electronpt'] = { + 'name' : 'CMS_scale_e_2018', + 'kind' : 'suffix', + 'type' : 'shape', + 'mapUp' : 'ElepTup', + 'mapDown' : 'ElepTdo', + 'samples' : dict((skey, ['1', '1']) for skey in mc), + 'folderUp' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__ElepTup_suffix', + 'folderDown' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__ElepTdo_suffix', + 'AsLnN' : '1', + 'cuts' : fitcuts +} + +###### Muon Efficiency and energy scale + +id_syst_mu = [ 'SFweightMuUp', 'SFweightMuDown'] + +nuisances['eff_m'] = { + 'name' : 'CMS_eff_m_2018', + 'kind' : 'weight', + 'type' : 'shape', + 'samples' : dict((skey, id_syst_mu) for skey in mc), + 'cuts' : fitcuts +} + +nuisances['muonpt'] = { + 'name' : 'CMS_scale_m_2018', + 'kind' : 'suffix', + 'type' : 'shape', + 'mapUp' : 'MupTup', + 'mapDown' : 'MupTdo', + 'samples' : dict((skey, ['1', '1']) for skey in mc), + 'folderUp' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__MupTup_suffix', + 'folderDown' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__MupTdo_suffix', + 'AsLnN' : '1', + 'cuts' : fitcuts +} + +####### Jet energy scale + +jes_systs = ['JESAbsolute','JESAbsolute_2018','JESBBEC1','JESBBEC1_2018','JESEC2','JESEC2_2018','JESFlavorQCD','JESHF','JESHF_2018','JESRelativeBal','JESRelativeSample_2018'] +folderup = "" +folderdo = "" + +for js in jes_systs: + if 'Absolute' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESAbsoluteup_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESAbsolutedo_suffix' + elif 'BBEC1' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESBBEC1up_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESBBEC1do_suffix' + elif 'EC2' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESEC2up_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESEC2do_suffix' + elif 'HF' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESHFup_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESHFdo_suffix' + elif 'Relative' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESRelativeup_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESRelativedo_suffix' + elif 'FlavorQCD' in js: + folderup = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESFlavorQCDup_suffix' + folderdo = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JESFlavorQCDdo_suffix' + + nuisances[js] = { + 'name': 'CMS_scale_'+js, + 'kind': 'suffix', + 'type': 'shape', + 'mapUp': js+'up', + 'mapDown': js+'do', + 'samples': dict((skey, ['1', '1']) for skey in mc), + 'folderUp' : folderup, + 'folderDown' : folderdo, + 'AsLnN': '1', + 'cuts' : fitcuts + } + +###### Jet energy resolution +nuisances['JER'] = { + 'name' : 'CMS_res_j_2018', + 'kind': 'suffix', + 'type': 'shape', + 'mapUp': 'JERup', + 'mapDown': 'JERdo', + 'samples': dict((skey, ['1', '1']) for skey in mc), + 'folderUp': treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JERup_suffix', + 'folderDown': treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__JERdo_suffix', + 'AsLnN' : '1' +} +##### MET energy scale + +nuisances['met'] = { + 'name' : 'CMS_scale_met_2018', + 'kind' : 'suffix', + 'type' : 'shape', + 'mapUp' : 'METup', + 'mapDown' : 'METdo', + 'samples' : dict((skey, ['1', '1']) for skey in mc), + 'folderUp' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__METup_suffix', + 'folderDown' : treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7__METdo_suffix', + 'AsLnN' : '1', + 'cuts' : fitcuts +} + +# Use the following if you want to apply the automatic combine MC stat nuisances. +nuisances['stat'] = { + 'type' : 'auto', + 'maxPoiss' : '10', + 'includeSignal' : '1', + # nuisance ['maxPoiss'] = Number of threshold events for Poisson modelling + # nuisance ['includeSignal'] = Include MC stat nuisances on signal processes (1=True, 0=False) + 'samples' : {}, + 'cuts' : fitcuts + } + +for n in nuisances.values(): + n['skipCMS'] = 1 diff --git a/ZH3l_BDT/zh3l_run2_rdf/plot.py b/ZH3l_BDT/zh3l_run2_rdf/plot.py new file mode 100644 index 00000000..e53e2f8a --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/plot.py @@ -0,0 +1,249 @@ +# plot configuration +groupPlot = {} + +# Groups of samples to improve the plots. +# If not defined, normal plots is used + +groupPlot['DY'] = { + 'nameHR' : 'DY', + 'color': 616, # kMagenta + 'isSignal' : 0, + 'samples' : ['DY'] +} + +groupPlot['WW'] = { + 'nameHR' : 'WW', + 'color': 632, # kRed + 'isSignal' : 0, + 'samples' : ['WW'] +} + +groupPlot['Zg'] = { + 'nameHR' : 'Zg', + 'color': 859, # kAzure -1 + 'isSignal' : 0, + 'samples' : ['Zg'] + } + +groupPlot['ZgS'] = { + 'nameHR' : 'ZgS', + 'color': 432, # kCyan + 'isSignal' : 0, + 'samples' : ['ZgS'] + } + +groupPlot['WZ'] = { + 'nameHR' : "WZ", + 'isSignal' : 0, + 'color' : 400, # Yellow + 'samples' : ['WZ'] + } + +groupPlot['ZZ'] = { + 'nameHR' : "ZZ", + 'isSignal' : 0, + 'color' : 617, # kViolet + 1 + 'samples' : ['ZZ'] + } + +groupPlot['VVV'] = { + 'nameHR' : 'VVV', + 'isSignal' : 0, + 'color': 806, # kOrange + 6 + 'samples' : ['VVV'] + } + + +groupPlot['top'] = { + 'nameHR' : 'top', + 'color': 416, # kGreen + 'isSignal' : 0, + 'samples' : ['top'] +} + +groupPlot['ttV'] = { + 'nameHR' : 'ttV', + 'color': 419, # kGreen+3 + 'isSignal' : 0, + 'samples' : ['ttV'] +} + +groupPlot['Fake'] = { + 'nameHR' : 'Non-prompt', + 'isSignal' : 0, + 'color': 921, # kGray + 1 + 'samples' : ['Fake_e', 'Fake_m'] + } + +groupPlot['Higgs'] = { + 'nameHR' : 'Higgs', + 'isSignal' : 1, + 'color': 632, # kRed + 'scaleMultiplicativeOverlaid' : 10.0, + 'samples' : ['WH_hww', 'ZH_hww', 'ggZH_hww', 'ZH_htt', 'ttH_hww', 'WH_htt'] + } + +# Individual plots +plot = {} + +plot['DY'] = { + 'nameHR' : 'DY', + 'color': 858, # kAzure -2 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['WW'] = { + 'nameHR' : 'WW', + 'color': 858, # kAzure -2 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +#plot['WWewk'] = { +# 'nameHR' : 'WWewk', +# 'color': 858, # kAzure -2 +# 'isSignal' : 0, +# 'isData' : 0, +# 'scale' : 1.0 +# } + +#plot['ggWW'] = { +# 'nameHR' : 'ggWW', +# 'color': 858, # kAzure -2 +# 'isSignal' : 0, +# 'isData' : 0, +# 'scale' : 1.0 +# } + +plot['Zg'] = { + 'nameHR' : 'Zg', + 'color': 859, # kAzure -1 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['ZgS'] = { + 'nameHR' : 'ZgS', + 'color': 859, # kAzure -1 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['WZ'] = { + 'nameHR' : 'WZ', + 'color': 858, # kAzure -2 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + #'scale' : 0.89 #1j norm + #'scale' : 1.22 #2j norm + } + +plot['ZZ'] = { + 'nameHR' : 'ZZ', + 'color': 856, # kAzure -4 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['VVV'] = { + 'color': 857, # kAzure -3 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['top'] = { + 'color': 419, # kGreen+3 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 # ele/mu trigger efficiency datadriven +} + +plot['ttV'] = { + 'color': 419, # kGreen+3 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 # ele/mu trigger efficiency datadriven +} + +plot['Fake_e'] = { + 'color': 921, # kGray + 1 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['Fake_m'] = { + 'color': 921, # kGray + 1 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0 + } + +plot['ttH_hww'] = { + 'nameHR' : 'ttH', + 'color': 632+3, # kRed+3 + 'isSignal' : 1, + 'isData' : 0, + 'scale' : 1 + } + +plot['ZH_hww'] = { + 'nameHR' : 'ZH', + 'color': 632+3, # kRed+3 + 'isSignal' : 1, + 'isData' : 0, + 'scale' : 1 + } + +plot['ggZH_hww'] = { + 'nameHR' : 'ggZH', + 'color': 632+4, # kRed+4 + 'isSignal' : 1, + 'isData' : 0, + 'scale' : 1 + } + +plot['WH_hww'] = { + 'nameHR' : 'WH', + 'color': 632+2, # kRed+2 + 'isSignal' : 1, + 'isData' : 0, + 'scale' : 1 + } + +# plot['WH_htt'] = { +# 'nameHR' : 'WH htt', +# 'color': 632+1, # kRed+4 +# 'isSignal' : 1, +# 'isData' : 0, +# 'scale' : 1 +# } + +plot['ZH_htt'] = { + 'nameHR' : 'ZH htt', + 'color': 632+1, # kRed+4 + 'isSignal' : 1, + 'isData' : 0, + 'scale' : 1 + } + +plot['DATA'] = { + 'nameHR' : 'Data', + 'color': 1 , + 'isSignal' : 0, + 'isData' : 1 , + 'isBlind' : 0 + } + +# additional options +legend = {} +legend['lumi'] = 'L = 59.7/fb' +legend['sqrt'] = '#sqrt{s} = 13 TeV' diff --git a/ZH3l_BDT/zh3l_run2_rdf/samples.py b/ZH3l_BDT/zh3l_run2_rdf/samples.py new file mode 100644 index 00000000..3425ab65 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/samples.py @@ -0,0 +1,416 @@ +import os +import subprocess + +# global getSampleFiles +# from LatinoAnalysis.Tools.commonTools import getSampleFiles, addSampleWeight, getBaseWnAOD +# It looks like getBaseWnAOD and addSampleWeight from Latinos does the same job as CombineBaseW function from makeShapesRDF. Similarly getSampleFiles function from Latinos is the same as nanoGetSampleFiles from makeShapesRDF. + +from mkShapesRDF.lib.search_files import SearchFiles +searchFiles = SearchFiles() +redirector = "" +limitFiles = -1 +samples = {} + +def nanoGetSampleFiles(path, name): + _files = searchFiles.searchFiles(path, name, redirector=redirector) + if limitFiles != -1 and len(_files) > limitFiles: + return [(name, _files[:limitFiles])] + else: + return [(name, _files)] + +def getSampleFilesNano(inputDir,Sample,absPath=False): + # return getSampleFiles(inputDir,Sample,absPath,'nanoLatino_') + return nanoGetSampleFiles(inputDir, Sample) # getSampleFiles replaced with nanoGetSampleFiles(path, name) (DS, 03Nov25) + +def CombineBaseW(samples, proc, samplelist): + _filtFiles = list(filter(lambda k: k[0] in samplelist, samples[proc]["name"])) + _files = list(map(lambda k: k[1], _filtFiles)) + _l = list(map(lambda k: len(k), _files)) + leastFiles = _files[_l.index(min(_l))] + dfSmall = ROOT.RDataFrame("Runs", leastFiles) + s = dfSmall.Sum("genEventSumw").GetValue() + f = ROOT.TFile(leastFiles[0]) + t = f.Get("Events") + t.GetEntry(1) + xs = t.baseW * s + + __files = [] + for f in _files: + __files += f + df = ROOT.RDataFrame("Runs", __files) + s = df.Sum("genEventSumw").GetValue() + newbaseW = str(xs / s) + return newbaseW # "/baseW is used after getBaseWnAOD/CombineBaseW calls in this code" + # weight = newbaseW + "/baseW" + + # for iSample in samplelist: + # addSampleWeight(samples, proc, iSample, weight) + + +def addSampleWeight(samples, sampleName, sampleNameType, weight): + obj = list(filter(lambda k: k[0] == sampleNameType, samples[sampleName]["name"]))[0] + samples[sampleName]["name"] = list( + filter(lambda k: k[0] != sampleNameType, samples[sampleName]["name"]) + ) + if len(obj) > 2: + samples[sampleName]["name"].append( + (obj[0], obj[1], obj[2] + "*(" + weight + ")") + ) + else: + samples[sampleName]["name"].append((obj[0], obj[1], "(" + weight + ")")) + + +############################################## +###### Tree Directory according to site ###### +############################################## + +SITE=os.uname()[1] +xrootdPath='' +if 'iihe' in SITE : + xrootdPath = 'dcap://maite.iihe.ac.be/' + treeBaseDir = '/pnfs/iihe/cms/store/user/xjanssen/HWW2015/' +elif 'cern' in SITE : + #xrootdPath='root://eoscms.cern.ch/' + treeBaseDir = '/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/' + +directory = treeBaseDir+'Autumn18_102X_nAODv7_Full2018v7/MCl1loose2018v7__MCCorr2018v7__l2loose__l2tightOR2018v7' + +################################################ +############ NUMBER OF LEPTONS ################# +################################################ + +#Nlep='2' +Nlep='3' +#Nlep='4' + +################################################ +############### Lepton WP ###################### +################################################ + +eleWP='mvaFall17V1Iso_WP90' +#eleWP='mvaFall17V1Iso_WP90_SS' +#eleWP='mvaFall17V2Iso_WP90' +#eleWP='mvaFall17V2Iso_WP90_SS' +muWP ='cut_Tight_HWWW' +eleWP_new = 'mvaFall17V1Iso_WP90_tthmva_70' +muWP_new = 'cut_Tight_HWWW_tthmva_80' + +LepWPCut = 'LepCut'+Nlep+'l__ele_'+eleWP_new+'__mu_'+muWP_new +#LepWPCut = 'LepCut'+Nlep+'l__ele_'+eleWP+'__mu_'+muWP +#LepWPweight = 'ttHMVA_SF_3l[0]' #SF for new WPs, defined in aliases +LepWPweight = 'LepSF'+Nlep+'l__ele_'+eleWP_new+'__mu_'+muWP_new + +################################################ +############ BASIC MC WEIGHTS ################## +################################################ + +XSWeight = 'XSWeight' +SFweight = 'SFweight'+Nlep+'l*'+LepWPweight+'*'+LepWPCut+'*Jet_PUIDSF' +PromptGenLepMatch = 'PromptGenLepMatch'+Nlep+'l' + +################################################ +############## FAKE WEIGHTS #################### +################################################ + +#eleWP_new = 'mvaFall17V1Iso_WP90_tthmva_70' +#muWP_new = 'cut_Tight_HWWW_tthmva_80' + +if Nlep == '2' : + fakeW = 'fakeW2l_ele_'+eleWP_new+'_mu_'+muWP_new + #fakeW = 'fakeW2l_ele_'+eleWP+'_mu_'+muWP +else: + fakeW = 'fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_'+Nlep+'l' + #fakeW = 'fakeW_ele_'+eleWP+'_mu_'+muWP+'_'+Nlep+'l' + +################################################ +############### B-Tag WP ###################### +################################################ + +SFweight += '*btagSF' #define in aliases.py + +################################################ +############ MET FILTERS ################### +################################################ + +METFilter_MC = 'METFilter_MC' +METFilter_DATA = 'METFilter_DATA' + +################################################ +############ DATA DECLARATION ################## +################################################ + +DataRun = [ + ['A','Run2018A-02Apr2020-v1'] , + ['B','Run2018B-02Apr2020-v1'] , + ['C','Run2018C-02Apr2020-v1'] , + ['D','Run2018D-02Apr2020-v1'] , + ] + +DataSets = ['MuonEG','DoubleMuon','SingleMuon','EGamma'] + +DataTrig = { + 'MuonEG' : 'Trigger_ElMu' , + 'DoubleMuon' : '!Trigger_ElMu && Trigger_dblMu' , + 'SingleMuon' : '!Trigger_ElMu && !Trigger_dblMu && Trigger_sngMu' , + 'EGamma' : '!Trigger_ElMu && !Trigger_dblMu && !Trigger_sngMu && (Trigger_sngEl || Trigger_dblEl)' , + } + +########################################### +############# BACKGROUNDS ############### +########################################### + +############ DY ############ + +ptllDYW_NLO = '(0.87*(gen_ptll<10)+(0.379119+0.099744*gen_ptll-0.00487351*gen_ptll**2+9.19509e-05*gen_ptll**3-6.0212e-07*gen_ptll**4)*(gen_ptll>=10 && gen_ptll<45)+(9.12137e-01+1.11957e-04*gen_ptll-3.15325e-06*gen_ptll**2-4.29708e-09*gen_ptll**3+3.35791e-11*gen_ptll**4)*(gen_ptll>=45 && gen_ptll<200) + 1*(gen_ptll>200))' +ptllDYW_LO = '((0.632927+0.0456956*gen_ptll-0.00154485*gen_ptll*gen_ptll+2.64397e-05*gen_ptll*gen_ptll*gen_ptll-2.19374e-07*gen_ptll*gen_ptll*gen_ptll*gen_ptll+6.99751e-10*gen_ptll*gen_ptll*gen_ptll*gen_ptll*gen_ptll)*(gen_ptll>0)*(gen_ptll<100)+(1.41713-0.00165342*gen_ptll)*(gen_ptll>=100)*(gen_ptll<300)+1*(gen_ptll>=300))' +Zgfilter = '( !(Sum(PhotonGen_isPrompt==1 && PhotonGen_pt>15 && abs(PhotonGen_eta)<2.6) > 0 && Sum(LeptonGen_isPrompt==1 && LeptonGen_pt>15)>=2) )' #Zg sample uses photon pt > 15, lepton pt > 15 + +samples['DY'] = { 'name' : getSampleFilesNano(directory,'DYJetsToLL_M-10to50-LO') + + getSampleFilesNano(directory,'DYJetsToLL_M-10to50-LO_ext1') + + getSampleFilesNano(directory,'DYJetsToLL_M-50-LO') + + getSampleFilesNano(directory, 'DYJetsToLL_M-50') + + getSampleFilesNano(directory, 'DYJetsToLL_M-50_ext2') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-100to200') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-200to400') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-400to600') + + getSampleFilesNano(directory,'DYJetsToLL_M-4to50_HT-600toInf') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-70to100') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-100to200') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-200to400') + # + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-400to600') + # + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-400to600_ext2') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-600to800') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-800to1200') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-1200to2500') + + getSampleFilesNano(directory,'DYJetsToLL_M-50_HT-2500toInf'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC +'*'+Zgfilter, + 'FilesPerJob' : 5, + 'suppressNegative' :['all'], + 'suppressNegativeNuisances' :['all'], + } + +M10baseW = '1'#CombineBaseW(directory,'Autumn18_102X_nAODv7_Full2018v7',['DYJetsToLL_M-10to50-LO','DYJetsToLL_M-10to50-LO_ext1']) +#HT400baseW = getBaseWnAOD(directory,'Autumn18_102X_nAODv7_Full2018v7',['DYJetsToLL_M-50_HT-400to600']) + +addSampleWeight(samples,'DY','DYJetsToLL_M-10to50-LO' ,ptllDYW_LO+'*(LHE_HT<100.0)*'+M10baseW+'/baseW') +addSampleWeight(samples,'DY','DYJetsToLL_M-10to50-LO_ext1' ,ptllDYW_LO+'*(LHE_HT<100.0)*'+M10baseW+'/baseW') +addSampleWeight(samples,'DY','DYJetsToLL_M-50-LO' ,ptllDYW_LO+'*(LHE_HT<70.0)') +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-100to200' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-200to400' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-400to600' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-4to50_HT-600toInf' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-70to100' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-100to200' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-200to400' ,ptllDYW_LO) +#addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-400to600' ,ptllDYW_LO+'*'+HT400baseW+'/baseW') +#addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-400to600_ext2',ptllDYW_LO+'*'+HT400baseW+'/baseW') +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-600to800' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-800to1200' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-1200to2500' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_HT-2500toInf' ,ptllDYW_LO) +addSampleWeight(samples,'DY', 'DYJetsToLL_M-50' ,ptllDYW_LO) +addSampleWeight(samples,'DY','DYJetsToLL_M-50_ext2' ,ptllDYW_LO) + +############ Top ############ + +samples['top'] = { 'name' : getSampleFilesNano(directory,'TTTo2L2Nu') + + getSampleFilesNano(directory,'ST_s-channel_ext1') + # + getSampleFilesNano(directory,'ST_t-channel_antitop') + + getSampleFilesNano(directory,'ST_t-channel_top') + + getSampleFilesNano(directory,'ST_tW_antitop_ext1') + + getSampleFilesNano(directory,'ST_tW_top_ext1'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + 'FilesPerJob' : 6, + } + +addSampleWeight(samples,'top','TTTo2L2Nu','Top_pTrw') + +samples['ttV'] = { 'name' : getSampleFilesNano(directory,'TTWJetsToLNu') + + getSampleFilesNano(directory,'TTZjets'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + 'FilesPerJob' : 5, + } + + +############ WW ############ + + +samples['WW'] = { 'name' : getSampleFilesNano(directory,'WWTo2L2Nu'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*nllW', + } + +#samples['WWewk'] = { 'name' : getSampleFilesNano(directory,'WpWmJJ_EWK_QCD_noTop_noHiggs') +# + getSampleFilesNano(directory,'WpWpJJ_EWK_QCD'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*(Sum$(abs(GenPart_pdgId)==6 || GenPart_pdgId==25)==0)*(lhe_mW1[0] > 60. && lhe_mW1[0] < 100. && lhe_mW2[0] > 60. && lhe_mW2[0] < 100.)', +# } + +#samples['ggWW'] = { 'name' : getSampleFilesNano(directory,'GluGluToWWToENEN') +# + getSampleFilesNano(directory,'GluGluToWWToENMN') +# + getSampleFilesNano(directory,'GluGluToWWToENTN') +# + getSampleFilesNano(directory,'GluGluToWWToMNEN') +# + getSampleFilesNano(directory,'GluGluToWWToMNMN') +# + getSampleFilesNano(directory,'GluGluToWWToMNTN') +# + getSampleFilesNano(directory,'GluGluToWWToTNEN') +# + getSampleFilesNano(directory,'GluGluToWWToTNMN') +# + getSampleFilesNano(directory,'GluGluToWWToTNTN'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*1.53/1.4', +# } + +###### Zg + +samples['Zg'] = { 'name' : getSampleFilesNano(directory,'ZGToLLG'), + 'weight' : XSWeight+'*'+SFweight+'*'+METFilter_MC + '*(Gen_ZGstar_mass <= 0)', + 'FilesPerJob' : 6 , + } + +###### Zg* + +samples['ZgS'] = { 'name' : getSampleFilesNano(directory,'ZGToLLG'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*(Gen_ZGstar_mass > 0)', + 'FilesPerJob' : 4 , + } + +##### WZ + +samples['WZ'] = { 'name': getSampleFilesNano(directory,'WZTo3LNu_mllmin01') + + getSampleFilesNano(directory,'WZTo2L2Q'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*(gstarHigh)' , + 'FilesPerJob' : 5 , + } + +##### ZZ + +samples['ZZ'] = { 'name' : getSampleFilesNano(directory,'ZZTo2L2Nu_ext1') + + getSampleFilesNano(directory,'ZZTo2L2Nu_ext2') + + getSampleFilesNano(directory,'ZZTo2L2Q') + + getSampleFilesNano(directory,'ZZTo4L_ext1') + + getSampleFilesNano(directory,'ZZTo4L_ext2') + #+ getSampleFilesNano(directory,'ggZZ4m') #Missing file for ElepTup + + getSampleFilesNano(directory,'ggZZ4m_ext1') + + getSampleFilesNano(directory,'ggZZ4t') + + getSampleFilesNano(directory,'ggZZ2e2t') + + getSampleFilesNano(directory,'ggZZ2m2t') + + getSampleFilesNano(directory,'ggZZ2e2m'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC, + 'FilesPerJob' : 3, + } + +ZZ2LbaseW = '1'#CombineBaseW(directory,'Autumn18_102X_nAODv7_Full2018v7',['ZZTo2L2Nu_ext1','ZZTo2L2Nu_ext2']) +ZZ4LbaseW = '1'#CombineBaseW(directory,'Autumn18_102X_nAODv7_Full2018v7',['ZZTo4L_ext1', 'ZZTo4L_ext2']) +#ggZZbaseW = getBaseWnAOD(directory,'Autumn18_102X_nAODv7_Full2018v7',['ggZZ4m', 'ggZZ4m_ext1']) + +addSampleWeight(samples,'ZZ','ZZTo2L2Nu_ext1',"1.07*"+ZZ2LbaseW+"/baseW") ## The non-ggZZ NNLO/NLO k-factor, cited from https://arxiv.org/abs/1405.2219v1 +addSampleWeight(samples,'ZZ','ZZTo2L2Nu_ext2',"1.07*"+ZZ2LbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ZZTo2L2Q', "1.07") +addSampleWeight(samples,'ZZ','ZZTo4L_ext1', "1.07*"+ZZ4LbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ZZTo4L_ext2', "1.07*"+ZZ4LbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ggZZ2e2t', "1.68") ## The NLO/LO k-factor, cited from https://arxiv.org/abs/1509.06734v1 +addSampleWeight(samples,'ZZ','ggZZ2m2t', "1.68") +addSampleWeight(samples,'ZZ','ggZZ2e2m', "1.68") +#addSampleWeight(samples,'ZZ','ggZZ4m', "1.68*"+ggZZbaseW+"/baseW") +addSampleWeight(samples,'ZZ','ggZZ4m_ext1', "1.68") +addSampleWeight(samples,'ZZ','ggZZ4t', "1.68") + +############ VVV ############ + +samples['VVV'] = { 'name' : getSampleFilesNano(directory,'ZZZ') + + getSampleFilesNano(directory,'WZZ') + + getSampleFilesNano(directory,'WWZ') + + getSampleFilesNano(directory,'WWW'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +########################################## +################ SIGNALS ################# +########################################## + +############ ZH H->WW ############ + +samples['ZH_hww'] = { 'name' : getSampleFilesNano(directory,'HZJ_HToWW_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +samples['ggZH_hww'] = { 'name' : getSampleFilesNano(directory,'GluGluZH_HToWW_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +############ WH H->WW ############ + +samples['WH_hww'] = { 'name' : getSampleFilesNano(directory,'HWplusJ_HToWW_M125') + + getSampleFilesNano(directory,'HWminusJ_HToWW_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +############ ttH ############ + +samples['ttH_hww'] = { 'name' : getSampleFilesNano(directory,'ttHToNonbb_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +############ bbH ############ +# Not available for Latinos 2016 v6 + +############ H->TauTau ############ + +#samples['ggH_htt'] = { 'name' : getSampleFilesNano(directory,'GluGluHToTauTau_M125'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , +# } + +#samples['qqH_htt'] = { 'name' : getSampleFilesNano(directory,'VBFHToTauTau_M125'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , +# } + +samples['ZH_htt'] = { 'name' : getSampleFilesNano(directory,'HZJ_HToTauTau_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +samples['WH_htt'] = { 'name' : getSampleFilesNano(directory,'HWplusJ_HToTauTau_M125') + + getSampleFilesNano(directory,'HWminusJ_HToTauTau_M125'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +########################################### +################## FAKE ################### +########################################### + +samples['Fake'] = { 'name': [ ] , + 'weight' : fakeW+'*'+METFilter_DATA, + 'weights' : [ ] , + 'isData': ['all'], + 'FilesPerJob' : 500 , + 'suppressNegativeNuisances' :['all'], + } + +directory = treeBaseDir+'Run2018_102X_nAODv7_Full2018v7/DATAl1loose2018v7__l2loose__fakeW/' +#directory = treeBaseDir+'Run2018_102X_nAODv6_Full2018v6/DATAl1loose2018v6__l2loose__fakeW/' +for Run in DataRun : + for DataSet in DataSets : + FileTarget = getSampleFilesNano(directory,DataSet+'_'+Run[1],True) + for iFile in FileTarget: + samples['Fake']['name'].append(iFile) + samples['Fake']['weights'].append(DataTrig[DataSet]) + +samples['Fake']['subsamples'] = { + 'e': 'abs(ZH3l_pdgid_l) == 11', + 'm': 'abs(ZH3l_pdgid_l) == 13' +} + +########################################### +################## DATA ################### +########################################### + +samples['DATA'] = { 'name': [ ] , + 'weight' : METFilter_DATA+'*'+LepWPCut, + 'weights' : [ ], + 'isData': ['all'], + 'FilesPerJob' : 500 , + } + +directory = treeBaseDir+'/Run2018_102X_nAODv7_Full2018v7/DATAl1loose2018v7__l2loose__l2tightOR2018v7/' +for Run in DataRun : + for DataSet in DataSets : + FileTarget = getSampleFilesNano(directory,DataSet+'_'+Run[1],True) + for iFile in FileTarget: + samples['DATA']['name'].append(iFile) + samples['DATA']['weights'].append(DataTrig[DataSet]) + diff --git a/ZH3l_BDT/zh3l_run2_rdf/structure.py b/ZH3l_BDT/zh3l_run2_rdf/structure.py new file mode 100644 index 00000000..44147676 --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/structure.py @@ -0,0 +1,113 @@ +# structure configuration for datacard +# keys here must match keys in samples.py +structure = {} + +# Backgrounds +structure['Fake_e'] = { + 'isSignal' : 0, + 'isData' : 0, + } + +structure['Fake_m'] = { + 'isSignal' : 0, + 'isData' : 0, + } + +structure['DY'] = { + 'isSignal' : 0, + 'isData' : 0, + 'removeFromCuts' : ['zh3l_SR_1j'] + } + +structure['Zg'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['ZgS'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['WW'] = { + 'isSignal' : 0, + 'isData' : 0, + 'removeFromCuts' : ['zh3l_SR_1j','zh3l_SR_2j'] + } + +#structure['WWewk'] = { +# 'isSignal' : 0, +# 'isData' : 0 +# } + +#structure['ggWW'] = { +# 'isSignal' : 0, +# 'isData' : 0 +# } + +structure['WZ'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['VVV'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['ZZ'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['ttV'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['top'] = { + 'isSignal' : 0, + 'isData' : 0, + 'removeFromCuts' : ['zh3l_WZ_CR_1j','zh3l_SR_1j'] + } + +# Signal +structure['WH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ZH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ggZH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ttH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['WH_htt'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +structure['ZH_htt'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +# Data +structure['DATA'] = { + 'isSignal' : 0, + 'isData' : 1 + } + + + + diff --git a/ZH3l_BDT/zh3l_run2_rdf/valid_data_samples.txt b/ZH3l_BDT/zh3l_run2_rdf/valid_data_samples.txt new file mode 100644 index 00000000..dfc044df --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/valid_data_samples.txt @@ -0,0 +1 @@ +/eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2_manualHADD2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_4.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_5.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_6.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_7.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_8.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_9.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_10.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_11.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_12.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_13.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_14.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_15.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_16.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_17.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_18.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_19.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_20.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_21.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_22.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_23.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_24.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_25.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_26.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_27.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_28.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_29.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_31.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_32.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_33.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_34.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_35.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_36.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_37.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_38.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_39.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_40.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_41.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_42.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_43.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_44.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_45.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_46.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_47.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_48.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_50.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_51.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_52.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_53.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_54.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_55.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_56.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_57.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_58.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_59.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_60.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_61.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_62.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_63.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_64.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_65.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_66.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_67.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_68.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_70.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_71.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_72.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_74.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_75.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_76.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_77.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_78.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_79.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_80.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_81.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_82.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_83.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_84.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_85.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_86.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_87.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_88.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_89.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_90.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DY_91.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_4.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_5.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_6.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_7.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_8.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_9.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_10.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_11.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_12.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_13.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_14.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_15.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_16.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_17.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_18.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_19.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_21.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_22.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_23.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_24.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_25.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_26.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_27.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_28.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_29.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_30.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_31.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_32.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__top_33.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_4.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_5.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_6.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_7.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_8.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_9.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_10.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_11.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_12.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_13.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_14.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_15.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttV_16.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WW_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__Zg_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__Zg_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__Zg_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZgS_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZgS_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZgS_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZgS_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_4.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_5.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_6.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_7.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_8.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_9.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_10.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_11.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_12.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_13.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_15.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_16.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WZ_17.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_2.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_3.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_4.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_5.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_6.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_7.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_8.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_9.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_10.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_11.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_12.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_13.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_14.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_15.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_16.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_17.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_18.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_19.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_20.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_21.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_22.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_23.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_24.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_25.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_26.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_27.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_28.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_29.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_30.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_31.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_32.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_33.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_34.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_35.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_36.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_37.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_38.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_39.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_40.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_41.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_42.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_43.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_44.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_45.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_46.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_47.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_48.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_49.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_50.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_51.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_52.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_53.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_54.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_55.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_56.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_57.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_58.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_59.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_60.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_61.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_62.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_63.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_64.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_65.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_66.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_67.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_68.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_69.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_70.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_71.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_72.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_74.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_75.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_76.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_77.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_78.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_80.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_81.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_82.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_83.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_84.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_85.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_86.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_87.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_88.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_89.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZZ_90.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZH_hww_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ggZH_hww_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__WH_hww_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ttH_hww_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__ZH_htt_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__Fake_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__Fake_1.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DATA_0.root /eos/user/d/dshekar/mkShapesRDF_rootfiles/ZH3l_BDTrun2/rootFile/mkShapes__ZH3l_BDTrun2__ALL__DATA_1.root \ No newline at end of file diff --git a/ZH3l_BDT/zh3l_run2_rdf/variables.py b/ZH3l_BDT/zh3l_run2_rdf/variables.py new file mode 100644 index 00000000..0ec4ca8a --- /dev/null +++ b/ZH3l_BDT/zh3l_run2_rdf/variables.py @@ -0,0 +1,129 @@ +# variables + +variables = {} + +#'fold' : # 0 = not fold (default), 1 = fold underflowbin, 2 = fold overflow bin, 3 = fold underflow and overflow +# The following is needed if combining plots between years: +#'doWeight' : 1 +#'binX' : 1 +#'binY' : <# of bins> + +variables['events'] = { 'name': '1', + 'range' : (1,0,2), + 'xaxis' : 'events', + 'fold' : 3 + } + +variables['njet'] = { 'name' : 'ZH3l_njet', + 'range' : (10,0,10), + 'xaxis' : 'N_{jet}', + 'fold' : 0 + } + +variables['met'] = { 'name' : 'PuppiMET_pt', + 'range' : (20,0,100), + 'xaxis' : 'Puppi MET', + 'fold' : 0 + } + +variables['pt1'] = { 'name': 'Lepton_pt[0]', + 'range' : (10,0.,200), + 'xaxis' : 'lept1_p_{T} [GeV]', + 'fold' : 0 + } + +variables['dphilmetj'] = { 'name' : 'ZH3l_dphilmetj', #FIXME + 'range' : (16,0,3.14159), + 'xaxis' : 'dphilmetj', + 'fold' : 0 + } + +variables['dphilmetjj'] = { 'name' : 'ZH3l_dphilmetjj', #FIXME + 'range' : (16,0,3.14159), + 'xaxis' : 'dphilmetjj', + 'fold' : 0, + 'cuts' : [cut for cut in cuts if '1j' not in cut] + } + +variables['pTlmetj'] = { 'name' : 'ZH3l_pTlmetj', + 'range' : (20,0,400), + 'xaxis' : 'pTlmetj', + 'fold' : 0 + } + +variables['pTlmetjj'] = { 'name' : 'ZH3l_pTlmetjj', + 'range' : (20,0,400), + 'xaxis' : 'pTlmetjj', + 'fold' : 0, + 'cuts' : [cut for cut in cuts if '1j' not in cut] + } + +variables['mTlmetjj'] = { 'name' : 'ZH3l_mTlmetjj', #FIXME + 'range' : (16,50,450), + 'xaxis' : 'mTlmetjj', + 'fold' : 3, + 'cuts' : [cut for cut in cuts if '1j' not in cut] + } + +variables['mTlmetj'] = { 'name' : 'ZH3l_mTlmetj', #FIXME + 'range' : (10,0,250), + 'xaxis' : 'mTlmetj', + 'fold' : 2, + } + +variables['ptz'] = { 'name' : 'ZH3l_pTZ', + 'range' : (20,0,400), + 'xaxis' : 'ptz', + 'fold' : 0 + } + +variables['mtw_notZ'] = { 'name' : 'ZH3l_mTlmet', #FIXME + 'range' : (20,0,200), + 'xaxis' : 'mTlmet', + 'fold' : 0 + } + +variables['mtw_fit'] = { 'name' : 'ZH3l_mTlmet', #FIXME + 'range' : (8,0,160), + 'xaxis' : 'mTlmet', + 'fold' : 2 + } + +variables['checkmZ'] = { 'name' : 'ZH3l_checkmZ', + 'range' : (20,0,200), + 'xaxis' : 'checkmZ', + 'fold' : 0 + } + +variables['ptjet0'] = { 'name' : 'CleanJet_pt[0]', + 'range' : (20,0,200), + 'xaxis' : 'Leading jet p_{T}', + 'fold' : 0 + } + +variables['ptjet1'] = { 'name' : 'CleanJet_pt[1]', + 'range' : (20,0,200), + 'xaxis' : 'Subleading jet p_{T}', + 'cuts' : [cut for cut in cuts if '1j' not in cut], + 'fold' : 0 + } + +variables['WlepId'] = { 'name' : 'ZH3l_pdgid_l', + 'range' : (31,-15.5,15.5), + 'xaxis' : 'W lepton ID', + 'fold' : 0 + } + +variables['flavor3l'] = { 'name' : '0*(abs(Lepton_pdgId[0])==13 && abs(Lepton_pdgId[1])==13 && abs(Lepton_pdgId[2])==13)+ \ + 1*(abs(Lepton_pdgId[0])==13 && abs(Lepton_pdgId[1])==13 && abs(Lepton_pdgId[2])==11)+ \ + 2*(abs(Lepton_pdgId[0])==13 && abs(Lepton_pdgId[1])==11 && abs(Lepton_pdgId[2])==13)+ \ + 3*(abs(Lepton_pdgId[0])==11 && abs(Lepton_pdgId[1])==13 && abs(Lepton_pdgId[2])==13)+ \ + 4*(abs(Lepton_pdgId[0])==13 && abs(Lepton_pdgId[1])==11 && abs(Lepton_pdgId[2])==11)+ \ + 5*(abs(Lepton_pdgId[0])==11 && abs(Lepton_pdgId[1])==13 && abs(Lepton_pdgId[2])==11)+ \ + 6*(abs(Lepton_pdgId[0])==11 && abs(Lepton_pdgId[1])==11 && abs(Lepton_pdgId[2])==13)+ \ + 7*(abs(Lepton_pdgId[0])==11 && abs(Lepton_pdgId[1])==11 && abs(Lepton_pdgId[2])==11)', + 'range' : (8,-0.5,7.5), + 'xaxis' : 'Trilepton flavor', + 'fold' : 0 + } + diff --git a/ZH3l_BDT/zh3l_run3_bdt/bdt_train/Classification_BDT.py b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/Classification_BDT.py new file mode 100644 index 00000000..69abbc55 --- /dev/null +++ b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/Classification_BDT.py @@ -0,0 +1,375 @@ +#!/usr/bin/env python +import ROOT +from ROOT import TMVA, TFile, TTree, TCut, TChain, RDataFrame +from subprocess import call +import os +from os.path import isfile +import json +import sys +import re +import numpy as np +import tensorflow as tf +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import roc_curve, roc_auc_score, accuracy_score, confusion_matrix +import matplotlib.pyplot as plt + +# import config_BDT as config +import preselections + +def make_model(input_dim): + model = tf.keras.Sequential([ + tf.keras.layers.Input(shape=(input_dim,)), + tf.keras.layers.Dense(128, activation="relu"), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Dropout(0.2), + tf.keras.layers.Dense(64, activation="relu"), + tf.keras.layers.BatchNormalization(), + tf.keras.layers.Dropout(0.2), + tf.keras.layers.Dense(32, activation="relu"), + tf.keras.layers.Dropout(0.1), + tf.keras.layers.Dense(1, activation="sigmoid") + ]) + + model.compile( + optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), + loss="binary_crossentropy", + metrics=[ + tf.keras.metrics.AUC(name="auc"), + tf.keras.metrics.BinaryAccuracy(name="accuracy") + ] + ) + return model + + +def plot_and_save(y_true, y_score, outdir): + fpr, tpr, _ = roc_curve(y_true, y_score) + auc = roc_auc_score(y_true, y_score) + + plt.figure(figsize=(7, 6)) + plt.plot(fpr, tpr, lw=2, label=f"ROC AUC = {auc:.4f}") + plt.plot([0, 1], [0, 1], "--", color="gray") + plt.xlabel("Background efficiency") + plt.ylabel("Signal efficiency") + plt.title("DNN ROC curve") + plt.legend(loc="lower right") + plt.grid(True, alpha=0.3) + plt.tight_layout() + plt.savefig(os.path.join(outdir, "roc_curve.png"), dpi=200) + plt.close() + + plt.figure(figsize=(7, 6)) + plt.hist(y_score[y_true == 1], bins=50, histtype="step", density=True, label="Signal", linewidth=2) + plt.hist(y_score[y_true == 0], bins=50, histtype="step", density=True, label="Background", linewidth=2) + plt.xlabel("DNN score") + plt.ylabel("Normalized entries") + plt.title("DNN score distribution") + plt.legend() + plt.grid(True, alpha=0.3) + plt.tight_layout() + plt.savefig(os.path.join(outdir, "score_distribution.png"), dpi=200) + plt.close() + + return fpr, tpr, auc + +def alt_to_rdf(expr): + # Replace Alt$( X[n], d ) with (X.size() > n ? X[n] : d) + return re.sub( + r'Alt\$\(\s*([A-Za-z0-9_]+)\s*\[\s*(\d+)\s*\]\s*,\s*([^\)]+)\)', + r'(\1.size() > \2 ? \1[\2] : \3)', + expr + ) + +def build_dataframe(files, branches, cut_expr, label): + dfs = [] + for f in files: + df = RDataFrame("Events", f) # Reference - https://root.cern/doc/master/classROOT_1_1RDataFrame.html + # if cut_expr and str(cut_expr).strip(): + # cut_expr_rdf = alt_to_rdf(cut_expr) + # df = df.Filter(cut_expr_rdf) # Filter rows based on user-defined conditions + # # a C++ expression is passed to the Filter() operation as a string, even if we call the method from Python. + # cols = list(branches) + # Define scalar columns for array branches + df = df.Define("CleanJet_pt_0", "CleanJet_pt.size() > 0 ? CleanJet_pt[0] : 0") + df = df.Define("Lepton_pt_0", "Lepton_pt.size() > 0 ? Lepton_pt[0] : 0") + df = df.Define("Lepton_pt_1", "Lepton_pt.size() > 1 ? Lepton_pt[1] : 0") + df = df.Define("Lepton_pt_2", "Lepton_pt.size() > 2 ? Lepton_pt[2] : 0") + if cut_expr and str(cut_expr).strip(): + cut_expr_rdf = alt_to_rdf(cut_expr) + df = df.Filter(cut_expr_rdf) + # Use the defined scalar columns + cols = [ + "CleanJet_pt_0", + "ZH3l_dphilmetjj", + "PuppiMET_pt", + "Lepton_pt_0", + "Lepton_pt_1", + "Lepton_pt_2" + ] + rdf = df.AsNumpy(cols) # AsNumpy returns the columns of RDataFrame as a dict of numpy arrays + arr = np.column_stack([rdf[c] for c in cols]) + y = np.full((arr.shape[0], 1), label, dtype=np.int32) # the full function returns a new array of a given shape and data type, entirely filled with a specified value. + dfs.append((arr, y)) + if not dfs: + return np.empty((0, len(branches))), np.empty((0, 1), dtype=np.int32) + X = np.concatenate([d[0] for d in dfs], axis=0) + y = np.concatenate([d[1] for d in dfs], axis=0) + return X, y + +# Setup Tensorflow algorithm +def runJob_TF(output_and_dataset_name=""): + outdir = f"dataset{output_and_dataset_name}" + os.makedirs(outdir, exist_ok=True) + + # Load data + Xs, ys = [], [] + branches = list(config_mvaVariables_TF) + cuts = str(config_cut) + for sample_name, sample in samples.items(): + isData = structure[sample_name]["isData"] + # Don't train on data + if (isinstance(isData, int) and isData == 1) or (not isinstance(isData, int) and "all" in isData): + continue + # Create file list + files = [] + for entry in sample["name"]: + if isinstance(entry, (list, tuple)) and len(entry) >= 2: # translated from TMVA config file line 'for name, *location_weights in sample['name']:' + locations = entry[1] + for loc in locations: + files.append(loc) + + if len(files) == 0: + continue + label = 1 if structure[sample_name]["isSignal"] == 1 else 0 + X, y = build_dataframe(files, branches, cuts, label) + if X.shape[0] > 0: + Xs.append(X) + ys.append(y) + + if not Xs: + raise RuntimeError("No training data found. Check samples and file paths.") + + X = np.concatenate(Xs, axis=0).astype(np.float32) + y = np.concatenate(ys, axis=0).astype(np.int32).reshape(-1) + + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42, stratify=y + ) + + scaler = StandardScaler() # ensures that each feature has zero mean and unit variance + X_train = scaler.fit_transform(X_train) + # StandardScaler strictly uses the mean and variance calculated from the training data to transform both the training and testing datasets. It doesn't calculate new statistics for testing data, even if the testing data has different upper and lower limits -> comes with risk of data leakage, out-of-boud values, etc. + X_test = scaler.transform(X_test) + + np.save(os.path.join(outdir, "feature_names.npy"), np.array(branches, dtype=object)) + np.save(os.path.join(outdir, "X_train.npy"), X_train) + np.save(os.path.join(outdir, "X_test.npy"), X_test) + np.save(os.path.join(outdir, "y_train.npy"), y_train) + np.save(os.path.join(outdir, "y_test.npy"), y_test) + + model = make_model(X_train.shape[1]) + + callbacks = [ + tf.keras.callbacks.EarlyStopping( + monitor="val_auc", + patience=20, + mode="max", + restore_best_weights=True + ), + tf.keras.callbacks.ReduceLROnPlateau( + monitor="val_loss", + factor=0.5, + patience=10, + min_lr=1e-6 + ), + tf.keras.callbacks.ModelCheckpoint( + filepath=os.path.join(outdir, "best_model.keras"), + monitor="val_auc", + mode="max", + save_best_only=True + ) + ] + + history = model.fit( + X_train, y_train, + validation_split=0.2, + epochs=500, + batch_size=1024, + callbacks=callbacks, + verbose=2 + ) + + model.save(os.path.join(outdir, "final_model.keras")) + + y_score = model.predict(X_test, batch_size=4096).reshape(-1) + y_pred = (y_score >= 0.5).astype(np.int32) + + acc = accuracy_score(y_test, y_pred) + auc = roc_auc_score(y_test, y_score) + cm = confusion_matrix(y_test, y_pred) + + np.save(os.path.join(outdir, "y_score.npy"), y_score) + + metrics = { + "accuracy": float(acc), + "auc": float(auc), + "confusion_matrix": cm.tolist(), + "n_train": int(len(y_train)), + "n_test": int(len(y_test)), + "n_features": int(X_train.shape[1]) + } + + with open(os.path.join(outdir, "metrics.json"), "w") as f: + json.dump(metrics, f, indent=2) + + hist = history.history + np.savez( + os.path.join(outdir, "training_history.npz"), + loss=np.array(hist.get("loss", [])), + val_loss=np.array(hist.get("val_loss", [])), + auc=np.array(hist.get("auc", [])), + val_auc=np.array(hist.get("val_auc", [])), + accuracy=np.array(hist.get("accuracy", [])), + val_accuracy=np.array(hist.get("val_accuracy", [])) + ) + + fpr, tpr, roc_auc = plot_and_save(y_test, y_score, outdir) + + summary_path = os.path.join(outdir, "summary.txt") + with open(summary_path, "w") as f: + f.write(f"Accuracy: {acc:.6f}\n") + f.write(f"AUC: {auc:.6f}\n") + f.write(f"Train events: {len(y_train)}\n") + f.write(f"Test events: {len(y_test)}\n") + f.write(f"Features: {X_train.shape[1]}\n") + f.write("Confusion matrix:\n") + f.write(np.array2string(cm)) + + print(f"Saved outputs to: {outdir}") + print(f"Accuracy = {acc:.6f}") + print(f"AUC = {auc:.6f}") + +# Setup TMVA +def runJob_TMVA(output_and_dataset_name = ""): + TMVA.Tools.Instance() + # TMVA.PyMethodBase.PyInitialize() + + output = TFile.Open('TMVA{}.root'.format(output_and_dataset_name), 'RECREATE') + factory = TMVA.Factory('TMVAClassification', output,'!V:!Silent:Color:DrawProgressBar:AnalysisType=Classification') + # factory = TMVA.Factory('TMVAClassification', output,'!V:!Silent:Color:DrawProgressBar:Transformations=D,G:AnalysisType=Classification') + + dataloader = TMVA.DataLoader("dataset{}".format(output_and_dataset_name)) + + for br in config_mvaVariables: + dataloader.AddVariable(br) + + for sampleName, sample in samples.items(): + isData = structure[sampleName]['isData'] + if (isinstance(isData, int) and isData == 1) or (not isinstance(isData, int) and 'all' in isData): + continue + + sample['tree'] = TChain("Events") + print("Sample name: ", sampleName) + for name, *location_weights in sample['name']: + print("Sub-sample: ", name) + locations = location_weights[0] + # weights = location_weights[1] if len(location_weights) > 1 else None + for loc in locations: + print("file: ", loc) + sample['tree'].Add(loc) + + if structure[sampleName]['isSignal']==1: + dataloader.AddSignalTree(sample['tree'], 1.0) + else: + dataloader.AddBackgroundTree(sample['tree'], 1.0) + # output_dim += 1 + # Reference: https://root.cern.ch/download/doc/tmva/TMVAUsersGuide.pdf + # Train test dataset will contain less/equal events compared to signal and background trees. How these events are chosen is given by the next line. Event weights are given by Monte Carlo generators, and may turn out to be overall very small or large. To avoid artifacts due to this, TMVA can internally renormalise the signal and background training using NormMode. + dataloader.PrepareTrainingAndTestTree(TCut(config_cut),'SplitMode=Random:NormMode=NumEvents:!V') + # dataloader.PrepareTrainingAndTestTree(TCut(config.cut),'nTrain_Signal=100000:nTrain_Background=100000:SplitMode=Random:NormMode=NumEvents:!V')#SSSF + # Table 25 in TMVA UG explains all parameters, but to summarize: + # - NTrees: number of trees in forest, + # - nCuts: Number of grid points in variable range used in finding optimal cut in node splitting + # - MaxDepth: maximum depth of a tree allowed + # - MinNodeSize: Minimum percentage of training events required in a leaf node + # - BoostType: Boosting algorithm, here Gradient boosting + # - Shrinkage: learning rate + # - UseBaggedBoost: use bagging (Bootstrap AGGregatING) ... + factory.BookMethod(dataloader, TMVA.Types.kBDT, "gBDT_D2", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" ) + factory.BookMethod(dataloader, TMVA.Types.kBDT, "gBDT_D2_S01", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=2" ) + factory.BookMethod(dataloader, TMVA.Types.kBDT, "gBDT_D2_C300", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=300:MaxDepth=2" ) + factory.BookMethod(dataloader, TMVA.Types.kBDT, "gBDT_D3", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=3" ) + factory.BookMethod(dataloader, TMVA.Types.kBDT, "gBDT_D4", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=4" ) + factory.BookMethod(dataloader, TMVA.Types.kBDT, "gBDT_D5", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=5" ) + factory.BookMethod(dataloader, TMVA.Types.kBDT, "gBDT_D6", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.5:nCuts=500:MaxDepth=6" ) + # factory.BookMethod(dataloader, TMVA.Types.kBDT, "gBDT_D2_F07" , "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.05:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ) + # factory.BookMethod(dataloader, TMVA.Types.kBDT, "gBDT_D2_S01_F07", "!H:!V:NTrees=500:MinNodeSize=1.5%:BoostType=Grad:Shrinkage=0.01:UseBaggedBoost:GradBaggingFraction=0.7:nCuts=500:MaxDepth=2" ) + # Run training, test and evaluation + factory.TrainAllMethods() + factory.TestAllMethods() + factory.EvaluateAllMethods() + + output.Close() + +if __name__ == "__main__": + + print("Input arguments: {}".format(sys.argv)) + framework = "TF" # or "TMVA" + + isDEV=False + # Load configuration + with open("configuration_BDT.py") as handle: + exec(handle.read()) # Read the file content as a string + samples={} + structure={} + cuts={} + for f in [samplesFile, structureFile, cutsFile]: + with open(f) as handle: + exec(handle.read()) + + # Reduce sample files for fast dev + if isDEV: + for sampleName, sample in samples.items(): + if sampleName not in ['DY', 'top', 'ttV', 'WW', 'Zg', 'ZgS', 'WZ', 'ZZ', 'VVV', 'ZH_hww','ggZH_hww','WH_hww','ttH_hww', 'ZH_htt', 'WH_htt', 'Fake_e', 'Fake_m']: + # if sampleName not in ['Wg','Zg','WgS','ZgS','ZZ','WZ','top','DY','WH_hww_plus','WH_hww_minus','WH_htt_plus','WH_htt_minus']: + samples.pop(sampleName) + continue + + # Define data to be loaded + with open("./preselections.py") as handle: + exec(handle.read()) + + config_cut="(({0}) && ({1}))".format(cuts['NONE'],preselections['ALL']) + + config_mvaVariables = [ + 'Alt$( CleanJet_pt[0], 0)', + 'ZH3l_dphilmetjj', + 'PuppiMET_pt', + 'Alt$( Lepton_pt[0], 0)', + 'Alt$( Lepton_pt[1], 0)', + 'Alt$( Lepton_pt[2], 0)' + ] + + config_mvaVariables_TF = [ + "CleanJet_pt_0", + "ZH3l_dphilmetjj", + "PuppiMET_pt", + "Lepton_pt_0", + "Lepton_pt_1", + "Lepton_pt_2" + ] + + if len(sys.argv) > 1: + print("Suffix is: {}".format(sys.argv[1])) + output_and_dataset = sys.argv[1] + if framework == "TF": + runJob_TF(output_and_dataset) + elif framework == "TMVA": + runJob(output_and_dataset) + os.system("mv dataset dataset{}".format(output_and_dataset)) + else: + print("No suffix, running with standard output name") + if framework == "TF": + runJob_TF() + elif framework == "TMVA": + runJob() diff --git a/ZH3l_BDT/zh3l_run3_bdt/bdt_train/config_BDT.py b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/config_BDT.py new file mode 100644 index 00000000..58bc42a7 --- /dev/null +++ b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/config_BDT.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +from __future__ import print_function +import os +from ROOT import gROOT, TFile, TChain, TCut + + + +# mvaVariables = [ +# 'CleanJet_pt[0][0]', +# 'ZH3l_dphilmetjj', +# 'PuppiMET_pt', +# 'Lepton_pt[0][0]', +# 'Lepton_pt[1][0]', +# 'Lepton_pt[2][0]' +# ] + +# : 1 : WH3l_ZVeto : 8.013e-02 +# : 2 : MinIf$(WH3l_mOSll[],WH3l_mOSll[Iteration$]>0) : 6.387e-02 +# : 3 : MinIf$(WH3l_ptOSll[],WH3l_ptOSll[Iteration$]>0) : 6.168e-02 +# : 4 : WH3l_dphilmet[0] : 5.700e-02 +# : 5 : Alt$(Lepton_pt[0],0) : 5.583e-02 +# : 6 : WH3l_dphilmet[2] : 5.460e-02 +# : 7 : WH3l_ptlll : 5.448e-02 +# : 8 : WH3l_dphilllmet : 5.393e-02 +# : 9 : WH3l_mtWWW : 5.162e-02 +# : 10 : WH3l_mlll : 5.103e-02 +# : 11 : WH3l_ptWWW : 5.083e-02 +# : 12 : MinIf$(WH3l_drOSll[],WH3l_drOSll[Iteration$]>0) : 4.966e-02 +# : 13 : PuppiMET_pt : 4.907e-02 +# : 14 : WH3l_mtlmet[1] : 4.747e-02 +# : 15 : WH3l_dphilmet[1] : 4.527e-02 +# : 16 : Alt$(Lepton_pt[1],0) : 4.456e-02 +# : 17 : WH3l_mtlmet[2] : 4.388e-02 +# : 18 : WH3l_mtlmet[0] : 4.260e-02 +# : 19 : Alt$(Lepton_pt[2],0) : 4.250e-02 diff --git a/ZH3l_BDT/zh3l_run3_bdt/bdt_train/configuration_BDT.py b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/configuration_BDT.py new file mode 100644 index 00000000..2c0c1027 --- /dev/null +++ b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/configuration_BDT.py @@ -0,0 +1,17 @@ +# example of configuration file + +import os + +tagName = '' + +# luminosity to normalize to (in 1/fb) +lumi = 8.0 + +# file with list of cuts +cutsFile = 'cuts_BDT.py' + +# file with list of samples +samplesFile = 'samples_BDT.py' + +# structure file for datacard +structureFile = 'structure.py' diff --git a/ZH3l_BDT/zh3l_run3_bdt/bdt_train/cuts_BDT.py b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/cuts_BDT.py new file mode 100644 index 00000000..7281a38a --- /dev/null +++ b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/cuts_BDT.py @@ -0,0 +1,144 @@ +# cuts + +cuts = {} +# 'MinIf$( WH3l_mOSll[], WH3l_mOSll[Iteration$] > 0) > 12 \ +# && Alt$(Lepton_pt[0],0)>25 \ +# && Alt$(Lepton_pt[1],0)>10 \ +# && Alt$(Lepton_pt[2],0)>10 \ +# && (nLepton>=3 && Alt$(Lepton_pt[3],0)<10) \ +# && abs(WH3l_chlll) == 1 \ +# && Alt$(CleanJet_pt[0], 0) < 30 \ +# && WH3l_flagOSSF == 1 \ +# && WH3l_ZVeto > 20 \ +# && PuppiMET_pt > 40 \ +# ' + +cuts['NONE'] = '1' # No cuts for BDT training + +cuts['zmass_cut'] = 'WH3l_ZVeto < 25' + +cuts['jet_cut_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + ' + +cuts['bveto_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + ' + +cuts['z4lveto_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + ' + +cuts['zh3l_SR_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetjj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetjj > 3.14159/2 \ + ' + +cuts['met_2j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + ' + +cuts['zh3l_SR_2j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetjj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_2j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) > 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetjj > 3.14159/2 \ + ' + +cuts['jet_cut_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + ' + +cuts['bveto_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + ' + +cuts['z4lveto_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + ' + +cuts['zh3l_SR_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && ZH3l_dphilmetj > 3.14159/2 \ + ' + +cuts['met_1j'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + ' + +cuts['zh3l_SR_1j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetj < 3.14159/2 \ + ' + +cuts['zh3l_WZ_CR_1j_met'] = ' Alt( CleanJet_pt, 0, 0) > 30 \ + && Alt( CleanJet_pt, 1, 0) < 30 \ + && WH3l_ZVeto < 25 \ + && bVeto \ + && ZH3l_Z4lveto > 20 \ + && PuppiMET_pt > 25 \ + && ZH3l_dphilmetj > 3.14159/2 \ + ' + +''' + #11 = e +# 13 = mu +# 15 = tau +''' diff --git a/ZH3l_BDT/zh3l_run3_bdt/bdt_train/preselections.py b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/preselections.py new file mode 100644 index 00000000..1eed05a0 --- /dev/null +++ b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/preselections.py @@ -0,0 +1,17 @@ +preselections = {} + +# preselections['ALL'] = 'Lepton_pt[0][0]>25 \ +# && Lepton_pt[1][0]>20 \ +# && Lepton_pt[2][0]>15 \ +# && Lepton_pt[3][0]<10 \ +# && (WH3l_mOSll[0] < 0 || WH3l_mOSll[0] > 12) \ +# && (WH3l_mOSll[1] < 0 || WH3l_mOSll[1] > 12) \ +# && (WH3l_mOSll[2] < 0 || WH3l_mOSll[2] > 12) \ +# && abs(WH3l_chlll) == 1 \ +# ' + +preselections['ALL'] = 'Alt$( Lepton_pt[0], 0) > 25 \ + && Alt$( Lepton_pt[1], 0) > 20 \ + && Alt$( Lepton_pt[2], 0) > 15 \ + && Alt$( Lepton_pt[3], 0) < 10 \ + ' \ No newline at end of file diff --git a/ZH3l_BDT/zh3l_run3_bdt/bdt_train/samples_BDT.py b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/samples_BDT.py new file mode 100644 index 00000000..a1327ae7 --- /dev/null +++ b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/samples_BDT.py @@ -0,0 +1,223 @@ +import os +import subprocess + +# global getSampleFiles +# from LatinoAnalysis.Tools.commonTools import getSampleFiles, addSampleWeight, getBaseWnAOD +# It looks like getBaseWnAOD and addSampleWeight from Latinos does the same job as CombineBaseW function from makeShapesRDF. Similarly getSampleFiles function from Latinos is the same as nanoGetSampleFiles from makeShapesRDF. + +from mkShapesRDF.lib.search_files import SearchFiles +searchFiles = SearchFiles() +redirector = "" +limitFiles = 1 +samples = {} + +def nanoGetSampleFiles(path, name): + _files = searchFiles.searchFiles(path, name, redirector=redirector) + if limitFiles != -1 and len(_files) > limitFiles: + print("Found {} files for sample {}".format(len(_files), name)) + return [(name, _files[:limitFiles])] + else: + print("Found {} files for sample {}".format(len(_files), name)) + return [(name, _files)] + +def getSampleFilesNano(inputDir,Sample,absPath=False): + # return getSampleFiles(inputDir,Sample,absPath,'nanoLatino_') + return nanoGetSampleFiles(inputDir, Sample) + +def CombineBaseW(samples, proc, samplelist): + _filtFiles = list(filter(lambda k: k[0] in samplelist, samples[proc]["name"])) + _files = list(map(lambda k: k[1], _filtFiles)) + _l = list(map(lambda k: len(k), _files)) + leastFiles = _files[_l.index(min(_l))] + dfSmall = ROOT.RDataFrame("Runs", leastFiles) + s = dfSmall.Sum("genEventSumw").GetValue() + f = ROOT.TFile(leastFiles[0]) + t = f.Get("Events") + t.GetEntry(1) + xs = t.baseW * s + + __files = [] + for f in _files: + __files += f + df = ROOT.RDataFrame("Runs", __files) + s = df.Sum("genEventSumw").GetValue() + newbaseW = str(xs / s) + return newbaseW # "/baseW is used after getBaseWnAOD/CombineBaseW calls in this code" + # weight = newbaseW + "/baseW" + + # for iSample in samplelist: + # addSampleWeight(samples, proc, iSample, weight) + + +def addSampleWeight(samples, sampleName, sampleNameType, weight): + obj = list(filter(lambda k: k[0] == sampleNameType, samples[sampleName]["name"]))[0] + samples[sampleName]["name"] = list( + filter(lambda k: k[0] != sampleNameType, samples[sampleName]["name"]) + ) + if len(obj) > 2: + samples[sampleName]["name"].append( + (obj[0], obj[1], obj[2] + "*(" + weight + ")") + ) + else: + samples[sampleName]["name"].append((obj[0], obj[1], "(" + weight + ")")) + + +############################################## +###### Tree Directory according to site ###### +############################################## + +SITE=os.uname()[1] +xrootdPath='' +# xrootdPath='root://eoscms.cern.ch/' +treeBaseDir = '/eos/user/d/dshekar/MCsamplesForBDTzh3l/' +# treeBaseDir = '/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/' + +directory = treeBaseDir+'Summer22_130x_nAODv12_Full2022v12/MCl2loose2022v12__MCCorr2022v12JetScaling__l2tight' + +################################################ +############ NUMBER OF LEPTONS ################# +################################################ + +#Nlep='2' +Nlep='3' +#Nlep='4' + +################################################ +############### Lepton WP ###################### +################################################ + +eleWP='mvaFall17V1Iso_WP90' +#eleWP='mvaFall17V1Iso_WP90_SS' +#eleWP='mvaFall17V2Iso_WP90' +#eleWP='mvaFall17V2Iso_WP90_SS' +muWP ='cut_Tight_HWWW' +eleWP_new = 'cutBased_MediumID_tthMVA_Run3' +muWP_new = 'cut_TightID_pfIsoTight_HWW_tthmva_67' + + +LepWPCut = 'LepCut'+Nlep+'l__ele_'+eleWP_new+'__mu_'+muWP_new +#LepWPCut = 'LepCut'+Nlep+'l__ele_'+eleWP+'__mu_'+muWP +#LepWPweight = 'ttHMVA_SF_3l[0]' #SF for new WPs, defined in aliases +LepWPweight = 'LepSF'+Nlep+'l__ele_'+eleWP_new+'__mu_'+muWP_new + +################################################ +############ BASIC MC WEIGHTS ################## +################################################ + +XSWeight = 'XSWeight' +SFweight = 'SFweight'+Nlep+'l*'+LepWPweight+'*'+LepWPCut#+'*Jet_PUIDSF' +PromptGenLepMatch = 'PromptGenLepMatch'+Nlep+'l' + +################################################ +############## FAKE WEIGHTS #################### +################################################ + +#eleWP_new = 'mvaFall17V1Iso_WP90_tthmva_70' +#muWP_new = 'cut_Tight_HWWW_tthmva_80' + +if Nlep == '2' : + fakeW = 'fakeW2l_ele_'+eleWP_new+'_mu_'+muWP_new + #fakeW = 'fakeW2l_ele_'+eleWP+'_mu_'+muWP +else: + fakeW = 'fakeW_ele_'+eleWP_new+'_mu_'+muWP_new+'_'+Nlep+'l' + #fakeW = 'fakeW_ele_'+eleWP+'_mu_'+muWP+'_'+Nlep+'l' + +################################################ +############### B-Tag WP ###################### +################################################ + +SFweight += '*btagSF' #define in aliases.py + +################################################ +############ MET FILTERS ################### +################################################ + +METFilter_MC = 'METFilter_MC' +METFilter_DATA = 'METFilter_DATA' + +################################################ +############ DATA DECLARATION ################## +################################################ + +DataRun = [ + ['C','Run2022C-ReReco-v1'], + ['D','Run2022D-ReReco-v1'], +] + +DataSets = ['MuonEG','Muon','EGamma'] + +DataTrig = { + 'MuonEG' : 'Trigger_ElMu' , + 'Muon' : '!Trigger_ElMu && (Trigger_sngMu || Trigger_dblMu)', + 'EGamma' : '!Trigger_ElMu && !Trigger_sngMu && !Trigger_dblMu && (Trigger_sngEl || Trigger_dblEl)' +} + +########################################### +############# BACKGROUNDS ############### +########################################### + +############ DY ############ + +ptllDYW_NLO = '1' +ptllDYW_LO = '1' + +##### WZ + +samples['WZ'] = { 'name': getSampleFilesNano(directory,'WZTo3LNu') + + getSampleFilesNano(directory,'WZTo2L2Q') + + getSampleFilesNano(directory,'WZ'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC+'*(gstarHigh)' , + 'FilesPerJob' : 5 , + } + +samples['ZZ'] = { 'name' : getSampleFilesNano(directory,'ZZ'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC, + 'FilesPerJob' : 3, + } + +########################################## +################ SIGNALS ################# +########################################## + +############ ZH H->WW ############ + +# samples['ZH_hww'] = { 'name' : getSampleFilesNano(directory,'HZJ_HToWW_M125'), +# 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , +# } + +samples['ggZH_hww'] = { 'name' : getSampleFilesNano(directory,'GluGluZH_Zto2L_Hto2WtoLNu2Q'), + 'weight' : XSWeight+'*'+SFweight+'*'+PromptGenLepMatch+'*'+METFilter_MC , + } + +# directory = '/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/Run2018_102X_nAODv7_Full2018v7/DATAl1loose2018v7__l2loose__fakeW/' + +############ +### DATA ### +############ + +samples['DATA'] = { + 'name': [], + 'weight': 'LepWPCut*METFilter_DATA', + 'weights': [], + 'isData': ['all'], + 'FilesPerJob': 15 +} + + +mcProduction = 'Summer22_130x_nAODv12_Full2022v12' +dataReco = 'Run2022_ReReco_nAODv12_Full2022v12' +dataSteps = 'DATAl2loose2022v12__l2loose' +treeBaseDir = '/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano' +dataDirectory = os.path.join(treeBaseDir, dataReco, dataSteps) + + +for _, sd in DataRun: + for pd in DataSets: + datatag = pd + '_' + sd + + files = nanoGetSampleFiles(dataDirectory, datatag) + + print(datatag) + + samples['DATA']['name'].extend(files) + addSampleWeight(samples, 'DATA', datatag, DataTrig[pd]) \ No newline at end of file diff --git a/ZH3l_BDT/zh3l_run3_bdt/bdt_train/structure.py b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/structure.py new file mode 100644 index 00000000..1291e9f2 --- /dev/null +++ b/ZH3l_BDT/zh3l_run3_bdt/bdt_train/structure.py @@ -0,0 +1,38 @@ +# structure configuration for datacard +# keys here must match keys in samples.py +structure = {} + +# Backgrounds + + +structure['WZ'] = { + 'isSignal' : 0, + 'isData' : 0 + } + +structure['ZZ'] = { + 'isSignal' : 0, + 'isData' : 0 + } + + +# Signal +# structure['ZH_hww'] = { +# 'isSignal' : 1, +# 'isData' : 0 +# } + +structure['ggZH_hww'] = { + 'isSignal' : 1, + 'isData' : 0 + } + +# Data +structure['DATA'] = { + 'isSignal' : 0, + 'isData' : 1 + } + + + + diff --git a/ZpTreweighting/2022_v12/aliases.py b/ZpTreweighting/2022_v12/aliases.py new file mode 100644 index 00000000..ac657ceb --- /dev/null +++ b/ZpTreweighting/2022_v12/aliases.py @@ -0,0 +1,279 @@ +import os +import copy +import inspect +import json + +configurations = os.path.realpath(inspect.getfile(inspect.currentframe())) # this file + +aliases = {} +aliases = OrderedDict() + +mc = [skey for skey in samples if skey not in ('Fake', 'DATA', 'Dyemb', 'DATA_EG', 'DATA_Mu', 'DATA_EMu', 'Fake_EG', 'Fake_Mu', 'Fake_EMu')] +# Commented out as not used (DS, 19Nov25) +# mc_emb = [skey for skey in samples if skey not in ('Fake', 'DATA', 'DATA_Mu', 'DATA_EMu', 'Fake_EG', 'Fake_Mu', 'Fake_EMu')] + +# Using LepSF2l__ele_cutBased_LooseID_tthMVA_Run3__mu_cut_TightID_pfIsoTight_HWW_tthmva_67 from latest git repo push (https://github.com/latinos/PlotsConfigurationsRun3/blob/f8a0f50dfe6301543203d9d260ad721204a2739f/ControlRegions/DY/2022_v12/aliases.py#L14-L15) +eleWP = 'cutBased_LooseID_tthMVA_Run3' +muWP = 'cut_TightID_pfIsoTight_HWW_tthmva_67' + +aliases['LepWPCut'] = { + 'expr': 'LepCut2l__ele_'+eleWP+'__mu_'+muWP, + 'samples': mc + ['DATA'], +} + +aliases['LepWPSF'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__mu_'+muWP, + 'samples': mc +} + +# gen-matching to prompt only (GenLepMatch2l matches to *any* gen lepton) +aliases['PromptGenLepMatch2l'] = { + 'expr': 'Alt(Lepton_promptgenmatched, 0, 0) * Alt(Lepton_promptgenmatched, 1, 0)', + 'samples': mc +} + +aliases['gen_Zpt'] = { + # 'linesToAdd': [".L /afs/cern.ch/user/d/dshekar/public/RDF/PlotsConfigurationsRun3/HWW_polarization/Extended/getGenZpt.cc+"], + # 'linesToAdd': ['.L /eos/user/d/dshekar/public/RDF/PlotsConfigurationsRun3/HWW_polarization/Extended/getGenZpt.cc+'], + 'linesToAdd': [ + """ +#ifndef getGenZpt +#define getGenZpt + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" + +using namespace ROOT; +using namespace ROOT::VecOps; + +double GetGenZpt( + int nGenPart, + RVecF GenPart_pt, + RVecI GenPart_pdgId, + RVecI GenPart_genPartIdxMother, + RVecI GenPart_statusFlags, + float gen_ptll + ){ + + + + // Find Gen pT of Z decaying into leptons + unsigned nGen = nGenPart; + std::vector LepCands{}; + std::vector MotherIdx{}; + std::vector MotherPdgId{}; + int pdgId, sFlag, MIdx; + bool hasZ = false; + //std::cout << "==========" << std::endl; + for (unsigned iGen{0}; iGen != nGen; ++iGen){ + pdgId = std::abs(GenPart_pdgId[iGen]); + sFlag = GenPart_statusFlags[iGen]; + //std::cout << pdgId << " ; " << sFlag << " ; " << GenPart_pt->At(iGen) << " ; " << GenPart_genPartIdxMother->At(iGen) << std::endl; + if (((pdgId == 11) || (pdgId == 13) || (pdgId == 15)) && ((sFlag >> 0 & 1) || (sFlag >> 2 & 1) || (sFlag >> 3 & 1) || (sFlag >> 4 & 1))){ + LepCands.push_back(iGen); + MIdx = GenPart_genPartIdxMother[iGen]; + MotherIdx.push_back(MIdx); + if (MIdx > -1){ + MotherPdgId.push_back(GenPart_pdgId[MIdx]); + if (GenPart_pdgId[MIdx]==23) hasZ = true; + }else{ + MotherPdgId.push_back(0); + } + } + } + + //std::cout << "Check:" << std::endl; + for (unsigned iGen{0}; iGen != LepCands.size(); ++iGen){ + for (unsigned jGen{0}; jGen != LepCands.size(); ++jGen){ + if (jGen <= iGen) continue; + //std::cout << iGen << " ; " << MotherIdx[iGen] << " ; " << jGen << " ; " << MotherIdx[jGen] << " ; " << MotherPdgId[iGen] << " ; " << hasZ << std::endl; + // Some DY samples generate the Z; others have the two leptons produced directly -> motherId is 0 for those events + if (hasZ){ + if (MotherIdx[iGen] == MotherIdx[jGen] && MotherPdgId[iGen] == 23) return GenPart_pt[MotherIdx[iGen]]; + }else{ + if (MotherIdx[iGen] == MotherIdx[jGen] && MotherIdx[iGen] == 0) return GenPart_pt[MotherIdx[iGen]]; + } + } + } + //std::cout << "Falling back!" << std::endl; + return gen_ptll; + +} + +#endif + """], + 'class': 'GetGenZpt', + 'args': 'nGenPart, GenPart_pt, GenPart_pdgId, GenPart_genPartIdxMother, GenPart_statusFlags, gen_ptll', + # 'expr': 'gen_ptll', + 'samples': mc +} + +# Jet bins +# using Alt(CleanJet_pt, n, 0) instead of Sum(CleanJet_pt >= 30) because jet pt ordering is not strictly followed in JES-varied samples + +# No jet with pt > 30 GeV +aliases['zeroJet'] = { + 'expr': 'Alt(CleanJet_pt, 0, 0) < 30.' +} + +aliases['oneJet'] = { + 'expr': 'Alt(CleanJet_pt, 0, 0) > 30.' +} + +aliases['multiJet'] = { + 'expr': 'Alt(CleanJet_pt, 1, 0) > 30.' +} + +aliases['noJetInHorn'] = { + 'expr' : 'Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0', +} + +# exec(open('dyZpTrw.py', "r").read()) +# aliases['DY_LO_ZpTrw'] = { +# 'expr': '('+DYrew['2022_NLO']['w'].replace('x', 'gen_Zpt')+')*(zeroJet)*(ptll < 50) + 1*(zeroJet)*(ptll >= 50)', +# 'samples': ['DY'] +# } +_dyzptrw_json = os.path.join(os.path.dirname(configurations), 'dyZpTrw.json') +with open(_dyzptrw_json) as _fj: + DYrew = json.load(_fj) +aliases['DY_NLO_ZpTrw'] = { + 'expr': '(' + DYrew['2022']['NLO_0j'].replace('x', 'gen_Zpt') + ')*(zeroJet)' + + ' + (' + DYrew['2022']['NLO_1j'].replace('x', 'gen_Zpt') + ')*(oneJet&& Alt(CleanJet_pt,1,0)<30)' + + ' + (' + DYrew['2022']['NLO_2j'].replace('x', 'gen_Zpt') + ')*(multiJet)', + 'samples': ['DY'] +} + +######################################################################## +# B-Tagging WP: https://btv-wiki.docs.cern.ch/ScaleFactors/Run3Summer22/ +######################################################################## + +# Algo / WP / WP cut +btagging_WPs = { + "DeepFlavB" : { + "loose" : "0.0583", + "medium" : "0.3086", + "tight" : "0.7183", + "xtight" : "0.8111", + "xxtight" : "0.9512", + }, + "RobustParTAK4B" : { + "loose" : "0.0849", + "medium" : "0.4319", + "tight" : "0.8482", + "xtight" : "0.9151", + "xxtight" : "0.9874", + }, + "PNetB" : { + "loose" : "0.0470", + "medium" : "0.2450", + "tight" : "0.6734", + "xtight" : "0.7862", + "xxtight" : "0.9610", + } +} + +# Algo / SF name +btagging_SFs = { + "DeepFlavB" : "deepjet", + "RobustParTAK4B" : "partTransformer", + "PNetB" : "partNet", +} + +# Algorithm and WP selection +bAlgo = 'PNetB' # ['DeepFlavB','RobustParTAK4B','PNetB'] +WP = 'loose' # ['loose','medium','tight','xtight','xxtight'] + +# Access information from dictionaries +bWP = btagging_WPs[bAlgo][WP] +bSF = btagging_SFs[bAlgo] + +# # B tagging selections and scale factors +aliases['bVeto'] = { + 'expr': f'Sum(CleanJet_pt > 20. && abs(CleanJet_eta) < 2.5 && Take(Jet_btag{bAlgo}, CleanJet_jetIdx) > {bWP}) == 0' +} + +aliases['bReq'] = { + 'expr': f'Sum(CleanJet_pt > 30. && abs(CleanJet_eta) < 2.5 && Take(Jet_btag{bAlgo}, CleanJet_jetIdx) > {bWP}) >= 1' +} + +# Commenting out, as this was not included in latest git repo (https://github.com/latinos/PlotsConfigurationsRun3/blob/f8a0f50dfe6301543203d9d260ad721204a2739f/ControlRegions/DY/2022_v12/aliases.py#L89-L104) +# aliases['bVetoSF'] = { +# 'expr': 'TMath::Exp(Sum(LogVec((CleanJet_pt>20 && abs(CleanJet_eta)<2.5)*Take(Jet_btagSF_{}_shape, CleanJet_jetIdx)+1*(CleanJet_pt<20 || abs(CleanJet_eta)>2.5))))'.format(bSF), +# 'samples': mc +# } + +# aliases['bReqSF'] = { +# 'expr': 'TMath::Exp(Sum(LogVec((CleanJet_pt>30 && abs(CleanJet_eta)<2.5)*Take(Jet_btagSF_{}_shape, CleanJet_jetIdx)+1*(CleanJet_pt<30 || abs(CleanJet_eta)>2.5))))'.format(bSF), +# 'samples': mc +# } + +# # Top control region +# aliases['topcr'] = { +# 'expr': 'mtw2>30 && mll>50 && ((zeroJet && !bVeto) || bReq)' +# } + +# # WW control region +# aliases['wwcr'] = { +# 'expr': 'mth>60 && mtw2>30 && mll>100 && bVeto' +# } + +# # Overall b tag SF +# aliases['btagSF'] = { +# 'expr': '(bVeto || (topcr && zeroJet))*bVetoSF + (topcr && !zeroJet)*bReqSF', +# 'samples': mc +# } + +# # Systematic uncertainty variations +# for shift in ['jes','lf','hf','lfstats1','lfstats2','hfstats1','hfstats2','cferr1','cferr2']: + +# for targ in ['bVeto', 'bReq']: +# alias = aliases['%sSF%sup' % (targ, shift)] = copy.deepcopy(aliases['%sSF' % targ]) +# alias['expr'] = alias['expr'].replace('btagSF_deepjet_shape', 'btagSF_deepjet_shape_up_%s' % shift) + +# alias = aliases['%sSF%sdown' % (targ, shift)] = copy.deepcopy(aliases['%sSF' % targ]) +# alias['expr'] = alias['expr'].replace('btagSF_deepjet_shape', 'btagSF_deepjet_shape_down_%s' % shift) + +# aliases['btagSF%sup' % shift] = { +# 'expr': aliases['btagSF']['expr'].replace('SF', 'SF' + shift + 'up'), +# 'samples': mc +# } + +# aliases['btagSF%sdown' % shift] = { +# 'expr': aliases['btagSF']['expr'].replace('SF', 'SF' + shift + 'down'), +# 'samples': mc +# } + +########################################################################## +# End of b tagging +########################################################################## + +# Data/MC scale factors and systematic uncertainties +aliases['SFweight'] = { + # 'expr': ' * '.join(['SFweight2l', 'LepWPCut', 'LepWPSF','btagSF']), + 'expr': ' * '.join(['SFweight2l', 'LepWPCut', 'LepWPSF']), + 'samples': mc +} + +aliases['SFweightEleUp'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__Up', + 'samples': mc +} +aliases['SFweightEleDown'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__Down', + 'samples': mc +} +aliases['SFweightMuUp'] = { + 'expr': 'LepSF2l__mu_'+muWP+'__Up', + 'samples': mc +} +aliases['SFweightMuDown'] = { + 'expr': 'LepSF2l__mu_'+muWP+'__Down', + 'samples': mc +} diff --git a/ZpTreweighting/2022_v12/aliases_withBtagSF.py b/ZpTreweighting/2022_v12/aliases_withBtagSF.py new file mode 100644 index 00000000..6ae664a1 --- /dev/null +++ b/ZpTreweighting/2022_v12/aliases_withBtagSF.py @@ -0,0 +1,245 @@ +import os +import copy +import inspect +import ROOT + +ROOT.gSystem.Load("libGpad.so") +ROOT.gSystem.Load("libGraf.so") + +configurations = os.path.realpath(inspect.getfile(inspect.currentframe())) # this file +macros = os.path.dirname(configurations) + '/macros/' +btagmaps = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(configurations)))) + '/utils/data/btag' +print(macros) +print(btagmaps) + +aliases = {} +aliases = OrderedDict() + +mc = [skey for skey in samples if skey not in ('Fake', 'DATA', 'Dyemb', 'DATA_EG', 'DATA_Mu', 'DATA_EMu', 'Fake_EG', 'Fake_Mu', 'Fake_EMu')] +# Commented out as not used (DS, 19Nov25) +# mc_emb = [skey for skey in samples if skey not in ('Fake', 'DATA', 'DATA_Mu', 'DATA_EMu', 'Fake_EG', 'Fake_Mu', 'Fake_EMu')] + +# Using LepSF2l__ele_cutBased_LooseID_tthMVA_Run3__mu_cut_TightID_pfIsoTight_HWW_tthmva_67 from latest git repo push (https://github.com/latinos/PlotsConfigurationsRun3/blob/f8a0f50dfe6301543203d9d260ad721204a2739f/ControlRegions/DY/2022_v12/aliases.py#L14-L15) +eleWP = 'cutBased_LooseID_tthMVA_Run3' +muWP = 'cut_TightID_pfIsoTight_HWW_tthmva_67' + +aliases['LepWPCut'] = { + 'expr': 'LepCut2l__ele_'+eleWP+'__mu_'+muWP, + 'samples': mc + ['DATA'], +} + +aliases['LepWPSF'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__mu_'+muWP, + 'samples': mc +} + +# gen-matching to prompt only (GenLepMatch2l matches to *any* gen lepton) +aliases['PromptGenLepMatch2l'] = { + 'expr': 'Alt(Lepton_promptgenmatched, 0, 0) * Alt(Lepton_promptgenmatched, 1, 0)', + 'samples': mc +} + +aliases['gen_Zpt'] = { + # 'linesToAdd': [".L /afs/cern.ch/user/d/dshekar/public/RDF/PlotsConfigurationsRun3/HWW_polarization/Extended/getGenZpt.cc+"], + # 'linesToAdd': ['.L /eos/user/d/dshekar/public/RDF/PlotsConfigurationsRun3/HWW_polarization/Extended/getGenZpt.cc+'], + 'linesToAdd': [ + """ +#ifndef getGenZpt +#define getGenZpt + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" + +using namespace ROOT; +using namespace ROOT::VecOps; + +double GetGenZpt( + int nGenPart, + RVecF GenPart_pt, + RVecI GenPart_pdgId, + RVecI GenPart_genPartIdxMother, + RVecI GenPart_statusFlags, + float gen_ptll + ){ + + + + // Find Gen pT of Z decaying into leptons + unsigned nGen = nGenPart; + std::vector LepCands{}; + std::vector MotherIdx{}; + std::vector MotherPdgId{}; + int pdgId, sFlag, MIdx; + bool hasZ = false; + //std::cout << "==========" << std::endl; + for (unsigned iGen{0}; iGen != nGen; ++iGen){ + pdgId = std::abs(GenPart_pdgId[iGen]); + sFlag = GenPart_statusFlags[iGen]; + //std::cout << pdgId << " ; " << sFlag << " ; " << GenPart_pt->At(iGen) << " ; " << GenPart_genPartIdxMother->At(iGen) << std::endl; + if (((pdgId == 11) || (pdgId == 13) || (pdgId == 15)) && ((sFlag >> 0 & 1) || (sFlag >> 2 & 1) || (sFlag >> 3 & 1) || (sFlag >> 4 & 1))){ + LepCands.push_back(iGen); + MIdx = GenPart_genPartIdxMother[iGen]; + MotherIdx.push_back(MIdx); + if (MIdx > -1){ + MotherPdgId.push_back(GenPart_pdgId[MIdx]); + if (GenPart_pdgId[MIdx]==23) hasZ = true; + }else{ + MotherPdgId.push_back(0); + } + } + } + + //std::cout << "Check:" << std::endl; + for (unsigned iGen{0}; iGen != LepCands.size(); ++iGen){ + for (unsigned jGen{0}; jGen != LepCands.size(); ++jGen){ + if (jGen <= iGen) continue; + //std::cout << iGen << " ; " << MotherIdx[iGen] << " ; " << jGen << " ; " << MotherIdx[jGen] << " ; " << MotherPdgId[iGen] << " ; " << hasZ << std::endl; + // Some DY samples generate the Z; others have the two leptons produced directly -> motherId is 0 for those events + if (hasZ){ + if (MotherIdx[iGen] == MotherIdx[jGen] && MotherPdgId[iGen] == 23) return GenPart_pt[MotherIdx[iGen]]; + }else{ + if (MotherIdx[iGen] == MotherIdx[jGen] && MotherIdx[iGen] == 0) return GenPart_pt[MotherIdx[iGen]]; + } + } + } + //std::cout << "Falling back!" << std::endl; + return gen_ptll; + +} + +#endif + """], + 'class': 'GetGenZpt', + 'args': 'nGenPart, GenPart_pt, GenPart_pdgId, GenPart_genPartIdxMother, GenPart_statusFlags, gen_ptll', + # 'expr': 'gen_ptll', + 'samples': mc +} + +# Jet bins +# using Alt(CleanJet_pt, n, 0) instead of Sum(CleanJet_pt >= 30) because jet pt ordering is not strictly followed in JES-varied samples + +# No jet with pt > 30 GeV +aliases['zeroJet'] = { + 'expr': 'Alt(CleanJet_pt, 0, 0) < 30.' +} + +aliases['oneJet'] = { + 'expr': 'Alt(CleanJet_pt, 0, 0) > 30.' +} + +aliases['multiJet'] = { + 'expr': 'Alt(CleanJet_pt, 1, 0) > 30.' +} + +aliases['noJetInHorn'] = { + 'expr' : 'Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0', +} + +######################################################################## +# B-Tagging WP: https://btv-wiki.docs.cern.ch/ScaleFactors/Run3Summer22/ +######################################################################## + +# Algo / WP / WP cut +btagging_WPs = { + "DeepFlavB" : { + "loose" : "0.0583", + "medium" : "0.3086", + "tight" : "0.7183", + "xtight" : "0.8111", + "xxtight" : "0.9512", + }, + "RobustParTAK4B" : { + "loose" : "0.0849", + "medium" : "0.4319", + "tight" : "0.8482", + "xtight" : "0.9151", + "xxtight" : "0.9874", + }, + "PNetB" : { + "loose" : "0.0470", + "medium" : "0.2450", + "tight" : "0.6734", + "xtight" : "0.7862", + "xxtight" : "0.9610", + } +} + +# Algo / SF name +btagging_SFs = { + "DeepFlavB" : "deepjet", + "RobustParTAK4B" : "partTransformer", + "PNetB" : "partNet", +} + +# Algorithm and WP selection +bAlgo = 'PNetB' # ['DeepFlavB','RobustParTAK4B','PNetB'] +WP = 'loose' # ['loose','medium','tight','xtight','xxtight'] + +# Access information from dictionaries +bWP = btagging_WPs[bAlgo][WP] +bSF = btagging_SFs[bAlgo] + +# # B tagging selections and scale factors +aliases['bVeto'] = { + 'expr': f'Sum(CleanJet_pt > 20. && abs(CleanJet_eta) < 2.5 && Take(Jet_btag{bAlgo}, CleanJet_jetIdx) > {bWP}) == 0' +} + +aliases['bReq'] = { + 'expr': f'Sum(CleanJet_pt > 30. && abs(CleanJet_eta) < 2.5 && Take(Jet_btag{bAlgo}, CleanJet_jetIdx) > {bWP}) >= 1' +} + +########################################################################## +# End of b tagging +########################################################################## + +# Top control region +aliases['topcr'] = { + 'expr': 'mtw2>30 && mll>50 && ((zeroJet && !bVeto) || bReq)' +} + +WP_eval = 'L' # ['L', 'M', 'T', 'XT', 'XXT'] +tagger = 'particleNet' +eff_map_year = '2022' # ['2022', '2022EE', '2023', '2023BPix', '2024] +year = 'Run3-22CDSep23-Summer22-NanoAODv12' # ['Run3-22CDSep23-Summer22-NanoAODv12', 'Run3-22EFGSep23-Summer22EE-NanoAODv12, 'Run3-23CSep23-Summer23-NanoAODv12', 'Run3-23DSep23-Summer23BPix-NanoAODv12', 'Run3-24CDEReprocessingFGHIPrompt-Summer24-NanoAODv15'] + +for flavour in ['bc', 'light']: + for shift in ['central', 'up_uncorrelated', 'down_uncorrelated', 'up_correlated', 'down_correlated']: + btagsf = 'btagSF' + flavour + if shift != 'central': + btagsf += '_' + shift + aliases[btagsf] = { + 'linesToAdd': [f'#include "{macros}evaluate_btagSF{flavour}.cc"'], + 'linesToProcess': [f"ROOT.gInterpreter.Declare('btagSF{flavour} btagSF{flavour}_{shift} = btagSF{flavour}(\"{btagmaps}/{eff_map_year}/bTagEff_{eff_map_year}_ttbar_{bAlgo}_loose.root\", \"{year}\");')"], + 'expr': f'btagSF{flavour}_{shift}(CleanJet_pt, CleanJet_eta, CleanJet_jetIdx, nCleanJet, Jet_hadronFlavour, Jet_btag{bAlgo}, "{WP_eval}", "{shift}", "{tagger}","{eff_map_year}")', + 'samples' : mc, + } + +# Data/MC scale factors and systematic uncertainties +aliases['SFweight'] = { + 'expr': ' * '.join(['SFweight2l', 'LepWPCut', 'LepWPSF', 'btagSFbc', 'btagSFlight']), + 'samples': mc +} + +aliases['SFweightEleUp'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__Up', + 'samples': mc +} +aliases['SFweightEleDown'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__Down', + 'samples': mc +} +aliases['SFweightMuUp'] = { + 'expr': 'LepSF2l__mu_'+muWP+'__Up', + 'samples': mc +} +aliases['SFweightMuDown'] = { + 'expr': 'LepSF2l__mu_'+muWP+'__Down', + 'samples': mc +} diff --git a/ZpTreweighting/2022_v12/aliases_woBtagSF.py b/ZpTreweighting/2022_v12/aliases_woBtagSF.py new file mode 100644 index 00000000..4f882261 --- /dev/null +++ b/ZpTreweighting/2022_v12/aliases_woBtagSF.py @@ -0,0 +1,263 @@ +import os +import copy +import inspect + +configurations = os.path.realpath(inspect.getfile(inspect.currentframe())) # this file + +aliases = {} +aliases = OrderedDict() + +mc = [skey for skey in samples if skey not in ('Fake', 'DATA', 'Dyemb', 'DATA_EG', 'DATA_Mu', 'DATA_EMu', 'Fake_EG', 'Fake_Mu', 'Fake_EMu')] +# Commented out as not used (DS, 19Nov25) +# mc_emb = [skey for skey in samples if skey not in ('Fake', 'DATA', 'DATA_Mu', 'DATA_EMu', 'Fake_EG', 'Fake_Mu', 'Fake_EMu')] + +# Using LepSF2l__ele_cutBased_LooseID_tthMVA_Run3__mu_cut_TightID_pfIsoTight_HWW_tthmva_67 from latest git repo push (https://github.com/latinos/PlotsConfigurationsRun3/blob/f8a0f50dfe6301543203d9d260ad721204a2739f/ControlRegions/DY/2022_v12/aliases.py#L14-L15) +eleWP = 'cutBased_LooseID_tthMVA_Run3' +muWP = 'cut_TightID_pfIsoTight_HWW_tthmva_67' + +aliases['LepWPCut'] = { + 'expr': 'LepCut2l__ele_'+eleWP+'__mu_'+muWP, + 'samples': mc + ['DATA'], +} + +aliases['LepWPSF'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__mu_'+muWP, + 'samples': mc +} + +# gen-matching to prompt only (GenLepMatch2l matches to *any* gen lepton) +aliases['PromptGenLepMatch2l'] = { + 'expr': 'Alt(Lepton_promptgenmatched, 0, 0) * Alt(Lepton_promptgenmatched, 1, 0)', + 'samples': mc +} + +aliases['gen_Zpt'] = { + # 'linesToAdd': [".L /afs/cern.ch/user/d/dshekar/public/RDF/PlotsConfigurationsRun3/HWW_polarization/Extended/getGenZpt.cc+"], + # 'linesToAdd': ['.L /eos/user/d/dshekar/public/RDF/PlotsConfigurationsRun3/HWW_polarization/Extended/getGenZpt.cc+'], + 'linesToAdd': [ + """ +#ifndef getGenZpt +#define getGenZpt + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" + +using namespace ROOT; +using namespace ROOT::VecOps; + +double GetGenZpt( + int nGenPart, + RVecF GenPart_pt, + RVecI GenPart_pdgId, + RVecI GenPart_genPartIdxMother, + RVecI GenPart_statusFlags, + float gen_ptll + ){ + + + + // Find Gen pT of Z decaying into leptons + unsigned nGen = nGenPart; + std::vector LepCands{}; + std::vector MotherIdx{}; + std::vector MotherPdgId{}; + int pdgId, sFlag, MIdx; + bool hasZ = false; + //std::cout << "==========" << std::endl; + for (unsigned iGen{0}; iGen != nGen; ++iGen){ + pdgId = std::abs(GenPart_pdgId[iGen]); + sFlag = GenPart_statusFlags[iGen]; + //std::cout << pdgId << " ; " << sFlag << " ; " << GenPart_pt->At(iGen) << " ; " << GenPart_genPartIdxMother->At(iGen) << std::endl; + if (((pdgId == 11) || (pdgId == 13) || (pdgId == 15)) && ((sFlag >> 0 & 1) || (sFlag >> 2 & 1) || (sFlag >> 3 & 1) || (sFlag >> 4 & 1))){ + LepCands.push_back(iGen); + MIdx = GenPart_genPartIdxMother[iGen]; + MotherIdx.push_back(MIdx); + if (MIdx > -1){ + MotherPdgId.push_back(GenPart_pdgId[MIdx]); + if (GenPart_pdgId[MIdx]==23) hasZ = true; + }else{ + MotherPdgId.push_back(0); + } + } + } + + //std::cout << "Check:" << std::endl; + for (unsigned iGen{0}; iGen != LepCands.size(); ++iGen){ + for (unsigned jGen{0}; jGen != LepCands.size(); ++jGen){ + if (jGen <= iGen) continue; + //std::cout << iGen << " ; " << MotherIdx[iGen] << " ; " << jGen << " ; " << MotherIdx[jGen] << " ; " << MotherPdgId[iGen] << " ; " << hasZ << std::endl; + // Some DY samples generate the Z; others have the two leptons produced directly -> motherId is 0 for those events + if (hasZ){ + if (MotherIdx[iGen] == MotherIdx[jGen] && MotherPdgId[iGen] == 23) return GenPart_pt[MotherIdx[iGen]]; + }else{ + if (MotherIdx[iGen] == MotherIdx[jGen] && MotherIdx[iGen] == 0) return GenPart_pt[MotherIdx[iGen]]; + } + } + } + //std::cout << "Falling back!" << std::endl; + return gen_ptll; + +} + +#endif + """], + 'class': 'GetGenZpt', + 'args': 'nGenPart, GenPart_pt, GenPart_pdgId, GenPart_genPartIdxMother, GenPart_statusFlags, gen_ptll', + # 'expr': 'gen_ptll', + 'samples': mc +} + +# Jet bins +# using Alt(CleanJet_pt, n, 0) instead of Sum(CleanJet_pt >= 30) because jet pt ordering is not strictly followed in JES-varied samples + +# No jet with pt > 30 GeV +aliases['zeroJet'] = { + 'expr': 'Alt(CleanJet_pt, 0, 0) < 30.' +} + +aliases['oneJet'] = { + 'expr': 'Alt(CleanJet_pt, 0, 0) > 30.' +} + +aliases['multiJet'] = { + 'expr': 'Alt(CleanJet_pt, 1, 0) > 30.' +} + +aliases['noJetInHorn'] = { + 'expr' : 'Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0', +} + +######################################################################## +# B-Tagging WP: https://btv-wiki.docs.cern.ch/ScaleFactors/Run3Summer22/ +######################################################################## + +# Algo / WP / WP cut +btagging_WPs = { + "DeepFlavB" : { + "loose" : "0.0583", + "medium" : "0.3086", + "tight" : "0.7183", + "xtight" : "0.8111", + "xxtight" : "0.9512", + }, + "RobustParTAK4B" : { + "loose" : "0.0849", + "medium" : "0.4319", + "tight" : "0.8482", + "xtight" : "0.9151", + "xxtight" : "0.9874", + }, + "PNetB" : { + "loose" : "0.0470", + "medium" : "0.2450", + "tight" : "0.6734", + "xtight" : "0.7862", + "xxtight" : "0.9610", + } +} + +# Algo / SF name +btagging_SFs = { + "DeepFlavB" : "deepjet", + "RobustParTAK4B" : "partTransformer", + "PNetB" : "partNet", +} + +# Algorithm and WP selection +bAlgo = 'PNetB' # ['DeepFlavB','RobustParTAK4B','PNetB'] +WP = 'loose' # ['loose','medium','tight','xtight','xxtight'] + +# Access information from dictionaries +bWP = btagging_WPs[bAlgo][WP] +bSF = btagging_SFs[bAlgo] + +# # B tagging selections and scale factors +aliases['bVeto'] = { + 'expr': f'Sum(CleanJet_pt > 20. && abs(CleanJet_eta) < 2.5 && Take(Jet_btag{bAlgo}, CleanJet_jetIdx) > {bWP}) == 0' +} + +aliases['bReq'] = { + 'expr': f'Sum(CleanJet_pt > 30. && abs(CleanJet_eta) < 2.5 && Take(Jet_btag{bAlgo}, CleanJet_jetIdx) > {bWP}) >= 1' +} + +# Commenting out, as this was not included in latest git repo (https://github.com/latinos/PlotsConfigurationsRun3/blob/f8a0f50dfe6301543203d9d260ad721204a2739f/ControlRegions/DY/2022_v12/aliases.py#L89-L104) +# aliases['bVetoSF'] = { +# 'expr': 'TMath::Exp(Sum(LogVec((CleanJet_pt>20 && abs(CleanJet_eta)<2.5)*Take(Jet_btagSF_{}_shape, CleanJet_jetIdx)+1*(CleanJet_pt<20 || abs(CleanJet_eta)>2.5))))'.format(bSF), +# 'samples': mc +# } + +# aliases['bReqSF'] = { +# 'expr': 'TMath::Exp(Sum(LogVec((CleanJet_pt>30 && abs(CleanJet_eta)<2.5)*Take(Jet_btagSF_{}_shape, CleanJet_jetIdx)+1*(CleanJet_pt<30 || abs(CleanJet_eta)>2.5))))'.format(bSF), +# 'samples': mc +# } + +# # Top control region +# aliases['topcr'] = { +# 'expr': 'mtw2>30 && mll>50 && ((zeroJet && !bVeto) || bReq)' +# } + +# # WW control region +# aliases['wwcr'] = { +# 'expr': 'mth>60 && mtw2>30 && mll>100 && bVeto' +# } + +# # Overall b tag SF +# aliases['btagSF'] = { +# 'expr': '(bVeto || (topcr && zeroJet))*bVetoSF + (topcr && !zeroJet)*bReqSF', +# 'samples': mc +# } + +# # Systematic uncertainty variations +# for shift in ['jes','lf','hf','lfstats1','lfstats2','hfstats1','hfstats2','cferr1','cferr2']: + +# for targ in ['bVeto', 'bReq']: +# alias = aliases['%sSF%sup' % (targ, shift)] = copy.deepcopy(aliases['%sSF' % targ]) +# alias['expr'] = alias['expr'].replace('btagSF_deepjet_shape', 'btagSF_deepjet_shape_up_%s' % shift) + +# alias = aliases['%sSF%sdown' % (targ, shift)] = copy.deepcopy(aliases['%sSF' % targ]) +# alias['expr'] = alias['expr'].replace('btagSF_deepjet_shape', 'btagSF_deepjet_shape_down_%s' % shift) + +# aliases['btagSF%sup' % shift] = { +# 'expr': aliases['btagSF']['expr'].replace('SF', 'SF' + shift + 'up'), +# 'samples': mc +# } + +# aliases['btagSF%sdown' % shift] = { +# 'expr': aliases['btagSF']['expr'].replace('SF', 'SF' + shift + 'down'), +# 'samples': mc +# } + +########################################################################## +# End of b tagging +########################################################################## + +# Data/MC scale factors and systematic uncertainties +aliases['SFweight'] = { + # 'expr': ' * '.join(['SFweight2l', 'LepWPCut', 'LepWPSF','btagSF']), + 'expr': ' * '.join(['SFweight2l', 'LepWPCut', 'LepWPSF']), + 'samples': mc +} + +aliases['SFweightEleUp'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__Up', + 'samples': mc +} +aliases['SFweightEleDown'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__Down', + 'samples': mc +} +aliases['SFweightMuUp'] = { + 'expr': 'LepSF2l__mu_'+muWP+'__Up', + 'samples': mc +} +aliases['SFweightMuDown'] = { + 'expr': 'LepSF2l__mu_'+muWP+'__Down', + 'samples': mc +} diff --git a/ZpTreweighting/2022_v12/automate.py b/ZpTreweighting/2022_v12/automate.py new file mode 100644 index 00000000..76e49d4d --- /dev/null +++ b/ZpTreweighting/2022_v12/automate.py @@ -0,0 +1,744 @@ +#!/usr/bin/env python3 +""" +run_ZpTrw_workflow.py +===================== +Automated Z pT reweighting extraction workflow. + +The script orchestrates the following steps: + + Pre-Phase 1 — prepare configuration files + ------------------------------------------ + 0a. Patch configuration.py: set tag = "{year}_{sample_type}_{original_tag}" + 0b. Comment out the addSampleWeight line for DY pT reweighting in samples.py + (weights cannot be applied before they are derived) + + Round 1 — ZpTreweighting analysis + ---------------------------------- + 1a. mkShapesRDF -c 1 compile configuration + 1b. mkShapesRDF -o 0 -f . -b 1 submit HTCondor jobs + 1c. (wait) poll condor until all jobs finish + 1d. mkShapesRDF -o 2 -f . merge individual ROOT files (hadd) + 2. extract_Zptrw.py extract the Z pT reweighting function + and overwrite dyZpTrw.json + 2b. Move extract_Zptrw.py plots to plots_{year}_{sample_type}_obtainWeights/ + 2c. mkPlot --onlyPlot cratio create comparison plots on the merged file + + Round 2 — main analysis (optional, --second-analysis DIR) + ---------------------------------------------------------- + 3a. Update DYrew key in aliases.py with the correct year/sample-type + 3b. Uncomment addSampleWeight for DY pT reweighting in samples.py + 3c. mkShapesRDF -c 1 compile second analysis + 3d. mkShapesRDF -o 0 -f . -b 1 submit second-round condor jobs + 3e. (wait) poll condor until all jobs finish + 3f. mkShapesRDF -o 2 -f . merge second-round ROOT files + 3g. mkPlot --onlyPlot cratio create comparison plots for second round + +Prerequisites +------------- + * Source mkShapesRDF setup first + +Typical usage +------------- + # Run from inside ZpTreweighting/ or give the folder explicitly: + python automate.py --year 2022 --sample-type LO + + # Also kick off the second-round analysis immediately after: + python automate.py --year 2022 --sample-type LO --second-analysis ./ +""" + +import argparse +import glob +import os +import re +import shutil +import subprocess +import sys +import time + + +def banner(msg): + width = max(60, len(msg) + 4) + print("\n" + "=" * width) + print(f" {msg}") + print("=" * width) + + +def info(msg): + print(f" {msg}") + +# Run commands +def run_cmd(cmd, dry_run=False, cwd=None): + """Print and (optionally) execute *cmd*. + + *cmd* may be a list of strings or a single shell string. + Returns the process exit code (always 0 in dry-run mode). + """ + display = " ".join(cmd) if isinstance(cmd, list) else cmd + info(f"$ {display}") + if dry_run: + return 0 + result = subprocess.run(cmd, cwd=cwd, shell=isinstance(cmd, str)) + return result.returncode + + +def run_cmd_output(cmd, cwd=None): + """Run *cmd* and return *(returncode, stdout, stderr)* as strings.""" + result = subprocess.run( + cmd, + cwd=cwd, + capture_output=True, + text=True, + shell=isinstance(cmd, str), + ) + return result.returncode, result.stdout, result.stderr + + +# HTCondor job tracking +# Return the set of condor cluster IDs found in the job log files. Log files are expected at ``{batch_dir}/{tag}/**/log.txt`` +def _get_cluster_ids_from_logs(batch_dir, tag): + log_pattern = os.path.join(batch_dir, tag, "**", "log.txt") + log_files = glob.glob(log_pattern, recursive=True) + cluster_ids = set() + for log_file in log_files: + try: + with open(log_file) as fh: + for line in fh: + m = re.match(r"000 \((\d+)\.\d+\.\d+\)", line) + if m: + cluster_ids.add(m.group(1)) + except OSError: + pass + return cluster_ids + +# Return the number of jobs still in the condor queue for *cluster_id*. +def _count_condor_jobs_in_cluster(cluster_id): + rc, stdout, _ = run_cmd_output(["condor_q", str(cluster_id)]) + if rc != 0: + # cluster is gone (all jobs completed or removed) + return 0 + m = re.search(r"(\d+) jobs?", stdout) + return int(m.group(1)) if m else 0 + +# Hold until all HTCondor jobs for *tag* are no longer queued. +def wait_for_condor_jobs(batch_dir, tag, poll_interval=120, dry_run=False): + CONDOR_REGISTRATION_DELAY = 10 + + if dry_run: + info("[dry-run] Skipping condor wait.") + return + + # Give HTCondor a few seconds to register newly submitted jobs. + time.sleep(CONDOR_REGISTRATION_DELAY) + + cluster_ids = _get_cluster_ids_from_logs(batch_dir, tag) + if not cluster_ids: + info("WARNING: No condor cluster IDs found in log files. " + "Waiting 60 s and retrying once...") + time.sleep(60) + cluster_ids = _get_cluster_ids_from_logs(batch_dir, tag) + + if not cluster_ids: + info("WARNING: Still no cluster IDs found. " + "Assuming jobs have already completed or were submitted " + "outside HTCondor.") + return + + info(f"Tracking condor cluster(s): {', '.join(sorted(cluster_ids))}") + + while True: + remaining = { + cid for cid in cluster_ids + if _count_condor_jobs_in_cluster(cid) > 0 + } + if not remaining: + info("All condor jobs have completed.") + return + + total = sum(_count_condor_jobs_in_cluster(c) for c in remaining) + info( + f"[{time.strftime('%H:%M:%S')}] " + f"{total} job(s) still queued in " + f"cluster(s) {', '.join(sorted(remaining))}. " + f"Polling again in {poll_interval} s..." + ) + time.sleep(poll_interval) + + +# Configuration reader +# Execute *cfg_file* and return a dict with analysis settings. +def read_configuration(cfg_file): + ns = { + "__file__": os.path.abspath(cfg_file), + "os": os, + "sys": sys, + } + try: + with open(cfg_file) as fh: + exec(compile(fh.read(), cfg_file, "exec"), ns) + except Exception as exc: + # configuration.py may call os.getlogin() which can fail in some + # environments; fall through with whatever was captured so far. + info(f"WARNING: Error while parsing {cfg_file}: {exc}") + + tag = ns.get("tag", "ZpTreweighting") + output_folder = ns.get( + "outputFolder", + os.path.join( + "/eos/user", + os.environ.get("USER", "unknown")[0], + os.environ.get("USER", "unknown"), + "mkShapesRDF_rootfiles", + tag, + "rootFile", + ), + ) + return { + "tag": tag, + "outputFolder": output_folder.rstrip("/"), + "outputFile": ns.get("outputFile", f"mkShapes__{tag}.root"), + "batchFolder": ns.get("batchFolder", "condor"), + } + + +# Workflow phases +# Phase 1a+1b — compile and submit condor jobs. +def phase1_submit(zptrw_dir, dry_run=False): + banner("Phase 1a: Compile ZpTreweighting configuration") + rc = run_cmd(["mkShapesRDF", "-c", "1"], dry_run=dry_run, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: mkShapesRDF -c 1 failed (exit code {rc})") + + banner("Phase 1b: Submit ZpTreweighting condor jobs") + rc = run_cmd( + ["mkShapesRDF", "-o", "0", "-f", ".", "-b", "1"], + dry_run=dry_run, + cwd=zptrw_dir, + ) + if rc != 0: + sys.exit(f"ERROR: mkShapesRDF -o 0 failed (exit code {rc})") + + +# Phase 1c — wait for all condor jobs to finish. +def phase1_wait(zptrw_dir, cfg, poll_interval=120, dry_run=False): + banner("Phase 1c: Waiting for HTCondor jobs to complete") + batch_dir = os.path.join(zptrw_dir, cfg["batchFolder"]) + wait_for_condor_jobs( + batch_dir=batch_dir, + tag=cfg["tag"], + poll_interval=poll_interval, + dry_run=dry_run, + ) + + +# Phase 1d — merge individual job ROOT files (hadd). +def phase1_merge(zptrw_dir, dry_run=False): + banner("Phase 1d: Merge ROOT files") + rc = run_cmd( + ["mkShapesRDF", "-o", "2", "-f", "."], + dry_run=dry_run, + cwd=zptrw_dir, + ) + if rc != 0: + sys.exit(f"ERROR: Phase 1d - mkShapesRDF -o 2 (merge) failed (exit code {rc})") + + +# Configuration / samples file patching helpers + +# Append '{_suffix}' and prepend '{year}_{sample_type}_' to the tag variable in configuration.py. +def patch_configuration_tag(cfg_file, year, sample_type, suffix, dry_run=False): + banner("Patching configuration.py: prepending year/sample-type to tag") + + if not os.path.exists(cfg_file): + info(f"WARNING: {cfg_file} not found; skipping tag patch.") + return + + with open(cfg_file) as fh: + content = fh.read() + + prefix = f"{year}_{sample_type}_" + # Match: tag = "..." or tag = '...' (not already prefixed). + # Build pattern with f-string so the negative lookahead uses the actual prefix. + pattern = re.compile( + r"""^(\s*tag\s*=\s*)["'][^"']*["']""", + re.MULTILINE, + ) + + def _replace(m): + new_tag = f'{year}_{sample_type}_ZpTreweighting_{suffix}' + info(f" tag: → '{new_tag}'") + return f'{m.group(1)}"{new_tag}"' + + new_content, count = pattern.subn(_replace, content, count=1) + if count == 0: + info("WARNING: Could not find 'tag = ...' line in configuration.py; " + "skipping tag patch. Verify that configuration.py contains a " + "tag = \"\" assignment at module level.") + return + + if not dry_run: + with open(cfg_file, "w") as fh: + fh.write(new_content) + info(f" Updated: {cfg_file}") + else: + info("[dry-run] Would update configuration.py tag.") + + +# Comment out the addSampleWeight line for DY pT reweighting in samples.py. +def comment_addsampleweight_dy(samples_file, dry_run=False): + banner("Commenting out addSampleWeight for DY pT reweighting in samples.py") + + if not os.path.exists(samples_file): + info(f"WARNING: {samples_file} not found; skipping.") + return + + with open(samples_file) as fh: + content = fh.read() + + # Match an un-commented addSampleWeight call referencing a DY_*_ZpTrw weight. + # Pattern breakdown: + # ^(?![ \t]*#) — line must not start with optional whitespace + '#' + # ([ \t]*addSampleWeight — capture indentation + function name + # \s*\([^)]* — opening paren and any args + # ['"]DY_..._ZpTrw['"] — the DY ZpTrw weight argument + # [^)]*\)) — remaining args + closing paren + pattern = re.compile( + r"^(?![ \t]*#)([ \t]*addSampleWeight\s*\([^)]*['\"]DY_[A-Za-z0-9]+_ZpTrw['\"][^)]*\))", + re.MULTILINE, + ) + + if not pattern.search(content): + info("WARNING: No uncommented addSampleWeight DY ZpTrw line found; skipping.") + return + + new_content = pattern.sub(r"# \1", content) + info(" Commented out addSampleWeight DY ZpTrw line.") + + if not dry_run: + with open(samples_file, "w") as fh: + fh.write(new_content) + info(f" Updated: {samples_file}") + else: + info("[dry-run] Would comment out addSampleWeight DY ZpTrw line.") + +# Uncomment the addSampleWeight line for DY pT reweighting in samples.py. +def uncomment_addsampleweight_dy(samples_file, dry_run=False): + banner("Uncommenting addSampleWeight for DY pT reweighting in samples.py") + + if not os.path.exists(samples_file): + info(f"WARNING: {samples_file} not found; skipping.") + return + + with open(samples_file) as fh: + content = fh.read() + + # Match a commented addSampleWeight call referencing a DY_*_ZpTrw weight. + pattern = re.compile( + r"^([ \t]*)#[ \t]*(addSampleWeight\s*\([^)]*['\"]DY_[A-Za-z0-9]+_ZpTrw['\"][^)]*\))", + re.MULTILINE, + ) + + if not pattern.search(content): + info("WARNING: No commented addSampleWeight DY ZpTrw line found; skipping.") + return + + new_content = pattern.sub(r"\1\2", content) + info(" Uncommented addSampleWeight DY ZpTrw line.") + + if not dry_run: + with open(samples_file, "w") as fh: + fh.write(new_content) + info(f" Updated: {samples_file}") + else: + info("[dry-run] Would uncomment addSampleWeight DY ZpTrw line.") + +# Replace DYrew['old_year']['old_type'] with DYrew['{year}']['{sample_type}'] in aliases.py. +def update_aliases_dyrew_key(aliases_file, year, sample_type, dry_run=False): + banner(f"Updating DYrew key in {os.path.basename(aliases_file)}") + + if not os.path.exists(aliases_file): + info(f"WARNING: {aliases_file} not found; skipping DYrew key update.") + return + + with open(aliases_file) as fh: + content = fh.read() + + # Match DYrew['oldyear']['oldtype_jetbin'] or DYrew["oldyear"]["oldtype_jetbin"] + pattern = re.compile( + r"""DYrew\[\s*(['"])[^'"]+\1\s*\]\[\s*(['"])[^'"]+_(0j|1j|2j)\2\s*\]""" + ) + + def replacer(match): + quote1, quote2, jetbin = match.group(1), match.group(2), match.group(3) + return f"DYrew[{quote1}{year}{quote1}][{quote2}{sample_type}_{jetbin}{quote2}]" + + matches = pattern.findall(content) + if not matches: + info(f"No DYrew['...']['...'] references found in {aliases_file}; skipping.") + return + + new_content = pattern.sub(replacer, content) + info(f" Updated {len(matches)} DYrew key reference(s) → ['{year}']['{sample_type}_']") + + if not dry_run: + with open(aliases_file, "w") as fh: + fh.write(new_content) + info(f" Updated: {aliases_file}") + else: + info(f"[dry-run] Would update DYrew key in {aliases_file}.") + +# Plot helpers +# Move plots produced by extract_Zptrw.py into plots_{year}_{sample_type}_obtainWeights/. +def move_zptrw_plots(zptrw_dir, year, sample_type, folder_suffix="", dry_run=False): + banner("Moving extract_Zptrw.py plots to archive folder") + + target_dir = os.path.join(zptrw_dir, f"extractPlots_{year}_{sample_type}_{folder_suffix}") + plot_files = ( + glob.glob(os.path.join(zptrw_dir, "ZpTreweighting_*.pdf")) + + glob.glob(os.path.join(zptrw_dir, "ZpTreweighting_*.png")) + ) + + if not plot_files: + info("No ZpTreweighting_*.pdf/png files found to move.") + return + + info(f"Target folder: {target_dir}") + for src in plot_files: + dest = os.path.join(target_dir, os.path.basename(src)) + info(f" {os.path.basename(src)} → {os.path.relpath(dest, zptrw_dir)}") + if not dry_run: + os.makedirs(target_dir, exist_ok=True) + shutil.move(src, dest) + + if dry_run: + info("[dry-run] Would create target folder and move plot files.") + +# Run ``mkPlot --onlyPlot cratio --showIntegralLegend 1 --fileFormats png``. +def run_mkplot(analysis_dir, dry_run=False): + banner(f"Running mkPlot in {analysis_dir}") + rc = run_cmd( + ["mkPlot", "--onlyPlot", "cratio", "--showIntegralLegend", "1", + "--fileFormats", "png"], + dry_run=dry_run, + cwd=analysis_dir, + ) + if rc != 0: + info(f"WARNING: mkPlot exited with code {rc}. Continuing workflow.") + +# Phase 2 — run extract_Zptrw.py to derive weights and update dyZpTrw.json, or make the ratio plots after applying the weights. +def phase2_extract(zptrw_dir, cfg, year="2022", sample_type="LO", run_fit = "", dry_run=False): + banner("Phase 2: Extract Z pT reweighting function → update dyZpTrw.json") + + merged_file = os.path.join(cfg["outputFolder"], cfg["outputFile"]) + dyzptrw_json = os.path.join(zptrw_dir, "dyZpTrw.json") + extract_script = os.path.join(zptrw_dir, "extract_Zptrw.py") + + if not dry_run and not os.path.exists(merged_file): + sys.exit( + f"ERROR: Merged ROOT file not found:\n" + f" {merged_file}\n" + f"Run 'mkShapesRDF -o 2 -f .' in {zptrw_dir} first." + ) + for njet in [0, 1, 2]: + # derive weights in Z->MuMu channel, 0 jet bin + if run_fit == "-f": + # define normalization when fitting is requested, else, just plot + cmd = [sys.executable, extract_script, "-f", "-n", "2", "-c", "mm", "-nj", str(njet), "--input", merged_file, "--write-json", dyzptrw_json, "--year", year, "--sample-type", sample_type] + else: + cmd = [sys.executable, extract_script, "-c", "mm", "-nj", str(njet), "--input", merged_file, "--write-json", dyzptrw_json, "--year", year, "--sample-type", sample_type] + rc = run_cmd(cmd, dry_run=dry_run, cwd=zptrw_dir) + # Make plots in Z->ee channel, 0 jet bin + cmd = [sys.executable, extract_script, "-c", "ee", "-nj", str(njet), "--input", merged_file] + rc = run_cmd(cmd, dry_run=dry_run, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: extract_Zptrw.py failed (exit code {rc})") + info(f"dyZpTrw.json updated for nJet={njet} → {dyzptrw_json}") + +# Phase 3 — prepare, compile, submit, wait, merge, and plot second-round analysis. +def phase3_second_round(second_dir, year, sample_type, poll_interval=120, + skip_second_wait=False, skip_second_merge=False, + dry_run=False): + banner(f"Phase 3: Second-round mkShapesRDF in\n {second_dir}") + + # Update aliases.py and samples.py before compiling + aliases_file = os.path.join(second_dir, "aliases.py") + update_aliases_dyrew_key(aliases_file, year, sample_type, dry_run=dry_run) + + samples_file = os.path.join(second_dir, "samples.py") + uncomment_addsampleweight_dy(samples_file, dry_run=dry_run) + + rc = run_cmd(["mkShapesRDF", "-c", "1"], dry_run=dry_run, cwd=second_dir) + if rc != 0: + sys.exit( + f"ERROR: mkShapesRDF -c 1 failed in {second_dir} (exit code {rc})" + ) + + rc = run_cmd( + ["mkShapesRDF", "-o", "0", "-f", ".", "-b", "1"], + dry_run=dry_run, + cwd=second_dir, + ) + if rc != 0: + sys.exit( + f"ERROR: mkShapesRDF -o 0 failed in {second_dir} (exit code {rc})" + ) + + # Wait for second-round condor jobs + second_cfg_file = os.path.join(second_dir, "configuration.py") + has_cfg = os.path.exists(second_cfg_file) + + if not skip_second_wait: + if has_cfg or dry_run: + banner("Phase 3c: Waiting for second-round HTCondor jobs to complete") + second_cfg = ( + read_configuration(second_cfg_file) + if has_cfg + else {"tag": "unknown", "batchFolder": "condor", + "outputFolder": ".", "outputFile": "output.root"} + ) + batch_dir = os.path.join(second_dir, second_cfg["batchFolder"]) + wait_for_condor_jobs( + batch_dir=batch_dir, + tag=second_cfg["tag"], + poll_interval=poll_interval, + dry_run=dry_run, + ) + else: + info(f"WARNING: No configuration.py found in {second_dir}; " + "skipping second-round condor wait.") + else: + info("\n[skip-second-wait] Skipping second-round condor wait.") + + # Merge second-round ROOT files + if not skip_second_merge: + banner("Phase 3d: Merge second-round ROOT files") + rc = run_cmd( + ["mkShapesRDF", "-o", "2", "-f", "."], + dry_run=dry_run, + cwd=second_dir, + ) + if rc != 0: + sys.exit( + f"ERROR: mkShapesRDF -o 2 (merge) failed in {second_dir} " + f"(exit code {rc})" + ) + else: + info("\n[skip-second-merge] Skipping second-round ROOT file merge.") + + # Create RDF plots for the second-round merged output + run_mkplot(second_dir, dry_run=dry_run) + + +# CLI +def parse_args(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + # ---- paths ---- + parser.add_argument( + "--zptrw-folder", + default=".", + metavar="DIR", + help="Path to the ZpTreweighting analysis folder " + "(default: current directory).", + ) + parser.add_argument( + "--second-analysis", + default=None, + metavar="DIR", + help="Path to the second-round analysis folder. " + "When given, Phase 3 updates aliases/samples, compiles, submits, " + "waits, merges, and runs mkPlot there after dyZpTrw.py has been updated.", + ) + + # ---- physics ---- + parser.add_argument( + "--year", + default="2022", + help="Year key written to DYrew in dyZpTrw.json (default: '2022').", + ) + parser.add_argument( + "--sample-type", + default="LO", + help="Sample-type key written to DYrew in dyZpTrw.json " + "(default: 'LO').", + ) + + # ---- condor polling ---- + parser.add_argument( + "--poll-interval", + type=int, + default=120, + metavar="SECONDS", + help="Seconds between condor_q polls while waiting for jobs " + "(default: 120).", + ) + + # ---- skip flags ---- + skip = parser.add_argument_group("skip flags (for re-running partial workflow)") + skip.add_argument( + "--skip-submit", + action="store_true", + help="Skip Phase 1a+1b (assume jobs are already running or done).", + ) + skip.add_argument( + "--skip-wait", + action="store_true", + help="Skip Phase 1c (assume all jobs have already finished).", + ) + skip.add_argument( + "--skip-merge", + action="store_true", + help="Skip Phase 1d (assume the merged ROOT file already exists).", + ) + skip.add_argument( + "--skip-extract", + action="store_true", + help="Skip Phase 2 (assume dyZpTrw.json is already up to date).", + ) + skip.add_argument( + "--skip-second-wait", + action="store_true", + help="Skip waiting for second-round HTCondor jobs (Phase 3c).", + ) + skip.add_argument( + "--skip-second-merge", + action="store_true", + help="Skip merging second-round ROOT files (Phase 3d).", + ) + + # ---- misc ---- + parser.add_argument( + "--dry-run", + action="store_true", + help="Print every command that would be run without executing it.", + ) + + return parser.parse_args() + + +def main(): + args = parse_args() + + zptrw_dir = os.path.abspath(args.zptrw_folder) + if not os.path.isdir(zptrw_dir): + sys.exit(f"ERROR: ZpTreweighting folder not found: {zptrw_dir}") + + cfg_file = os.path.join(zptrw_dir, "configuration.py") + if not os.path.exists(cfg_file): + sys.exit(f"ERROR: configuration.py not found in {zptrw_dir}") + + banner("Z pT reweighting workflow") + info(f"ZpTreweighting folder : {zptrw_dir}") + if args.dry_run: + info("*** DRY-RUN mode — no commands will be executed ***") + + # ---- Pre-Phase 1: Patch configuration.py tag and comment out DY pT rw weight ---- + patch_configuration_tag(cfg_file, args.year, args.sample_type, suffix="obtainWeights", dry_run=args.dry_run) + + cfg = read_configuration(cfg_file) + info(f"tag : {cfg['tag']}") + info(f"outputFolder : {cfg['outputFolder']}") + info(f"outputFile : {cfg['outputFile']}") + info(f"batchFolder : {cfg['batchFolder']}") + + samples_file = os.path.join(zptrw_dir, "samples.py") + comment_addsampleweight_dy(samples_file, dry_run=args.dry_run) + + # ---- Phase 1a+1b: Submit ---- + if not args.skip_submit: + phase1_submit(zptrw_dir, dry_run=args.dry_run) + else: + info("\n[skip-submit] Skipping job submission.") + + # ---- Phase 1c: Wait ---- + if not args.skip_wait and not args.skip_submit: + phase1_wait( + zptrw_dir, + cfg, + poll_interval=args.poll_interval, + dry_run=args.dry_run, + ) + elif args.skip_wait: + info("\n[skip-wait] Skipping condor wait.") + + # ---- Phase 1d: Merge ---- + if not args.skip_merge: + phase1_merge(zptrw_dir, dry_run=args.dry_run) + else: + info("\n[skip-merge] Skipping ROOT file merge.") + + # ---- Phase 2: Extract + Update ---- + if not args.skip_extract: + phase2_extract(zptrw_dir, cfg, year=args.year, sample_type=args.sample_type, run_fit="-f", dry_run=args.dry_run) + # Move plots produced by extract_Zptrw.py to archive folder + move_zptrw_plots(zptrw_dir, args.year, args.sample_type, folder_suffix = "obtainWeights", dry_run=args.dry_run) + # Create RDF plots from the merged ROOT file + run_mkplot(zptrw_dir, dry_run=args.dry_run) + # Rename condor and config folders: + cmd = ["mv", os.path.join(zptrw_dir, "condor/"), os.path.join(zptrw_dir, f"condor_{args.year}_{args.sample_type}_obtainWeights")] + rc = run_cmd(cmd, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: failed to rename condor directory (exit code {rc})") + cmd = ["pwd"] + rc = run_cmd(cmd, cwd=zptrw_dir) + cmd = ["mv", os.path.join(zptrw_dir, "configs/"), os.path.join(zptrw_dir, f"configs_{args.year}_{args.sample_type}_obtainWeights")] + rc = run_cmd(cmd, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: failed to rename configs directory (exit code {rc})") + + else: + info("\n[skip-extract] Skipping weight extraction, plot archiving, and mkPlot " + "(assumes dyZpTrw.json and plots are already up to date).") + + # ---- Phase 3 (optional): Second analysis ---- + if args.second_analysis: + second_dir = os.path.abspath(args.second_analysis) + if not os.path.isdir(second_dir): + sys.exit(f"ERROR: Second-analysis folder not found: {second_dir}") + + patch_configuration_tag(cfg_file, args.year, args.sample_type, suffix="afterReweighting", dry_run=args.dry_run) + cfg = read_configuration(cfg_file) + info(f"tag : {cfg['tag']}") + info(f"outputFolder : {cfg['outputFolder']}") + info(f"outputFile : {cfg['outputFile']}") + info(f"batchFolder : {cfg['batchFolder']}") + + phase3_second_round( + second_dir, + year=args.year, + sample_type=args.sample_type, + poll_interval=args.poll_interval, + skip_second_wait=args.skip_second_wait, + skip_second_merge=args.skip_second_merge, + dry_run=args.dry_run, + ) + phase2_extract(zptrw_dir, cfg, year=args.year, sample_type=args.sample_type, run_fit="", dry_run=args.dry_run) + # Move plots produced by extract_Zptrw.py to folder + move_zptrw_plots(zptrw_dir, args.year, args.sample_type, folder_suffix = "afterReweighting", dry_run=args.dry_run) + # Create comparison plots from the merged ROOT file + run_mkplot(zptrw_dir, dry_run=args.dry_run) + # Rename log, condor, and config folders: + cmd = ["mv", os.path.join(zptrw_dir, "condor/"), os.path.join(zptrw_dir, f"condor_{args.year}_{args.sample_type}_afterReweighting")] + rc = run_cmd(cmd, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: failed to rename condor directory (exit code {rc})") + cmd = ["mv", os.path.join(zptrw_dir, "configs/"), os.path.join(zptrw_dir, f"configs_{args.year}_{args.sample_type}_afterReweighting")] + rc = run_cmd(cmd, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: failed to rename configs directory (exit code {rc})") + + banner("Workflow complete!") + if args.second_analysis: + info("Second-round analysis finished. Plots are in the analysis folder.") + else: + info("dyZpTrw.json has been updated.") + info("To run the second-round analysis:") + info(" cd ") + info(" mkShapesRDF -c 1") + info(" mkShapesRDF -o 0 -f . -b 1") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ZpTreweighting/2022_v12/configuration.py b/ZpTreweighting/2022_v12/configuration.py new file mode 100644 index 00000000..6ffa9224 --- /dev/null +++ b/ZpTreweighting/2022_v12/configuration.py @@ -0,0 +1,91 @@ +import sys,os + +# tag used to identify the configuration folder version +tag = "ZpTreweighting" + +# file to use as runner script, default uses mkShapesRDF.shapeAnalysis.runner, otherwise specify path to script +runnerFile = "default" + +# output file name +outputFile = "mkShapes__{}.root".format(tag) + +# path to ouput folder +outputFolder = "/eos/user/" + os.getlogin()[0] + "/" + os.getlogin() + "/mkShapesRDF_rootfiles/" + tag + "/rootFile/" + +# path to batch folder (used for condor submission) +batchFolder = "condor" + +# path to configuration folder (will contain all the compiled configuration files) +configsFolder = "configs" + +# luminosity to normalize to (in 1/fb) +# https://github.com/latinos/mkShapesRDF/blob/Run3/mkShapesRDF/processor/data/TrigMaker_cfg.py#L1016 +lumi = 8.174732641 + +# file with dict of aliases to define +aliasesFile = "aliases.py" + +# file with dict of variables +variablesFile = "variables.py" + +# file with dict of cuts +cutsFile = "cuts.py" + +# file with dict of samples +samplesFile = "samples.py" + +# file with dict of samples +plotFile = "plot.py" + +# file with dict of structure (used to define combine processes) +structureFile = "structure.py" + +# nuisances file for mkDatacards and for mkShape +nuisancesFile = "nuisances.py" + +# path to folder where to save plots +plotPath = "plots_" + tag + +# this lines are executed right before the runner on the condor node +mountEOS = [ + # "export KRB5CCNAME=/home/gpizzati/krb5\n", +] + +# list of imports to import when compiling the whole configuration folder, it should not contain imports used by configuration.py +imports = ["os", "glob", ("collections", "OrderedDict"), "ROOT"] + +# list of files to compile +filesToExec = [ + samplesFile, + aliasesFile, + variablesFile, + cutsFile, + plotFile, + nuisancesFile, + structureFile, +] + +# list of variables to keep in the compiled configuration folder +varsToKeep = [ + "batchVars", + "outputFolder", + "batchFolder", + "configsFolder", + "outputFile", + "runnerFile", + "tag", + "samples", + "aliases", + "variables", + ("cuts", {"cuts": "cuts", "preselections": "preselections"}), + ("plot", {"plot": "plot", "groupPlot": "groupPlot", "legend": "legend"}), + "nuisances", + "structure", + "lumi", +] + +# list of variables to keep in the batch submission script (script.py) +batchVars = varsToKeep[varsToKeep.index("samples") :] + + +varsToKeep += ['plotPath'] diff --git a/ZpTreweighting/2022_v12/cuts.py b/ZpTreweighting/2022_v12/cuts.py new file mode 100644 index 00000000..41a5ca00 --- /dev/null +++ b/ZpTreweighting/2022_v12/cuts.py @@ -0,0 +1,59 @@ +cuts = {} + +# Preselections - applied to all the cuts, noJetInHorn replaced by zeroJet +preselections = 'Lepton_pt[0] > 25 \ + && Lepton_pt[1] > 13 \ + && (nLepton >= 2 && Alt(Lepton_pt,2,0) < 10) \ + && abs(Lepton_eta[0]) < 2.5 \ + && abs(Lepton_eta[1]) < 2.5 \ + && mll > 60 \ + && mll < 120 \ +' +# Remove zeroJet preselection as weights are being derived in nJet bins (DS, 18Mar26) + # && zeroJet \ + +# Individual cuts and categories + +# Commenting out the inclusive cuts (DS, 19Nov25) +# cuts['Zee_incl'] = '(Lepton_pdgId[0] * Lepton_pdgId[1] == -11*11)' +# cuts['Zmm_incl'] = '(Lepton_pdgId[0] * Lepton_pdgId[1] == -13*13)' + +cuts['Zee'] = { + 'expr' : '(Lepton_pdgId[0] * Lepton_pdgId[1] == -11*11)', + 'categories' : { + '0j' : 'zeroJet', + '1j' : 'oneJet && Alt(CleanJet_pt,1,0)<30', + '2j' : 'multiJet', + } +} + +cuts['Zmm'] = { + 'expr' : '(Lepton_pdgId[0] * Lepton_pdgId[1] == -13*13)', + 'categories' : { + '0j' : 'zeroJet', + '1j' : 'oneJet && Alt(CleanJet_pt,1,0)<30', + '2j' : 'multiJet', + } +} + +# cuts['Zee_noJetInHorn_incl'] = '(Lepton_pdgId[0] * Lepton_pdgId[1] == -11*11) && Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0' + +# cuts['Zmm_noJetInHorn_incl'] = '(Lepton_pdgId[0] * Lepton_pdgId[1] == -13*13) && Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0' + +# cuts['Zee_noJetInHorn'] = { +# 'expr' : '(Lepton_pdgId[0] * Lepton_pdgId[1] == -11*11) && Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0', +# 'categories' : { +# '0j' : 'zeroJet', +# '1j' : 'oneJet && Alt(CleanJet_pt,1,0)<30', +# '2j' : 'multiJet', +# } +# } + +# cuts['Zmm_noJetInHorn'] = { +# 'expr' : '(Lepton_pdgId[0] * Lepton_pdgId[1] == -13*13) && Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0', +# 'categories' : { +# '0j' : 'zeroJet', +# '1j' : 'oneJet && Alt(CleanJet_pt,1,0)<30', +# '2j' : 'multiJet', +# } +# } diff --git a/ZpTreweighting/2022_v12/dyZpTrw.json b/ZpTreweighting/2022_v12/dyZpTrw.json new file mode 100644 index 00000000..3a247a9b --- /dev/null +++ b/ZpTreweighting/2022_v12/dyZpTrw.json @@ -0,0 +1,17 @@ +{ + "2022": { + "NLO_0j": "0.843196*(0.211634*TMath::Erf((x-9.120552)/3.728196)-0.024628*x+0.000400*TMath::Sq(x)+1.361984)", + "NLO_1j": "0.843196*(0.211634*TMath::Erf((x-9.120552)/3.728196)-0.024628*x+0.000400*TMath::Sq(x)+1.361984)", + "NLO_2j": "0.843196*(0.211634*TMath::Erf((x-9.120552)/3.728196)-0.024628*x+0.000400*TMath::Sq(x)+1.361984)" + }, + "2023": { + "NLO_0j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)", + "NLO_1j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)", + "NLO_2j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)" + }, + "2024": { + "NLO_0j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)", + "NLO_1j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)", + "NLO_2j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)" + } +} \ No newline at end of file diff --git a/ZpTreweighting/2022_v12/extract_Zptrw.py b/ZpTreweighting/2022_v12/extract_Zptrw.py new file mode 100644 index 00000000..ef8bff4c --- /dev/null +++ b/ZpTreweighting/2022_v12/extract_Zptrw.py @@ -0,0 +1,311 @@ +# ================================= +# Danush Shekar (UIC), 9Dec25 +# ================================= +import json +import os +import ROOT +import mplhep as hep +import matplotlib.pyplot as plt +import numpy as np +from ROOT import TFitResultPtr +import argparse +# CMS plot style +hep.style.use("CMS") +style = hep.style.CMS +style["font.size"] = 18 +plt.style.use(style) + +parser = argparse.ArgumentParser(description='Extract data and fit with Gaussian.') +parser.add_argument('-f', action='store_true', help='Fit the ratio plot using Erf.') +parser.add_argument('-n', type=int, default=0, help='Normalization method:\n1: Ratio of integral of MC over weights*MC.\n2: Normalize MC to data integral before calculating rw factor.') +parser.add_argument('-c', type=str, default="mm", help='Z decay channel.') +parser.add_argument('-nj', type=int, default=0, help='Jet bin category(number of jet bins).') +parser.add_argument('-i', '--input', default='mkShapes__ZpTreweighting.root', help='Path to the merged ROOT file (default: mkShapes__ZpTreweighting.root)') +parser.add_argument('--write-json', default=None, help='If given, write the updated dyZpTrw.json to this path after a successful fit (requires -f). The file is overwritten.') +parser.add_argument('--year', default='2022', help="Year key in the DYrew dict written to dyZpTrw.json (default: '2022')") +parser.add_argument('--sample-type', default='LO', help="Sample-type key in the DYrew dict written to dyZpTrw.json (default: 'LO')") +parser.add_argument('--plot-xrange', type=float, default=80, help='maximum X-axis range for the plots (default: 80)') +parser.add_argument('--fit-xrange', type=float, default=50, help='maximum X-axis range for the fits (default: 50)') +args = parser.parse_args() + +root_file = ROOT.TFile(args.input) +channel = f"Z{args.c}_{args.nj}j" +zee_dir = root_file.Get(channel) +ptll_dir = zee_dir.Get("ptll") + +histo_DY = ptll_dir.Get("histo_DY") +histo_DATA = ptll_dir.Get("histo_DATA") +histo_top = ptll_dir.Get("histo_top") +histo_diboson = ptll_dir.Get("histo_diboson") +histo_SMhiggs = ptll_dir.Get("histo_SMhiggs") + +# Subtract DY backgrounds from DATA +histo_trueData = histo_DATA.Clone("histo_trueData") # Create a clone for the result +histo_trueData.Add(histo_top, -1) +histo_trueData.Add(histo_diboson, -1) +histo_trueData.Add(histo_SMhiggs, -1) + +plot_range = [0, args.plot_xrange] +fit_range = [0, args.fit_xrange] + +def calc_norm_factor(dy_hist, fitting_function, fit_params): + numerator = 0.0 + denominator = 0.0 + first_bin = dy_hist.FindBin(fit_range[0]) + last_bin = dy_hist.FindBin(fit_range[1]) + print("\nNOTE: Using fit function (", fitting_function,") to calculate normalization factor.\n") # [0]*TMath::Erf((x-[1])/[2]) + [3]*x + [4]*x**2 + [5] + for bin_idx in range(first_bin, last_bin + 1): + mc_events = dy_hist.GetBinContent(bin_idx) + # weight = histo_ratio.GetBinContent(bin_idx) + binCenter = dy_hist.GetXaxis().GetBinCenter(bin_idx) + weight = fit_params[0]*ROOT.TMath.Erf((binCenter - fit_params[1])/fit_params[2]) + fit_params[3]*binCenter + fit_params[4]*binCenter**2 + fit_params[5] + numerator += mc_events + denominator += mc_events * weight + norm_factor = numerator / denominator if denominator != 0 else 1.0 + print("Normalization factor:", norm_factor) + return norm_factor + +# # Rebin hists +# histo_trueData.Rebin(4) +# histo_DY.Rebin(4) + +# Calculate the integral/sum of histo_DY and histo_ratio for x axis in [0, fit_range[1]) +integral_histo_DY = histo_DY.Integral(histo_DY.FindBin(fit_range[0]), histo_DY.FindBin(fit_range[1]) - 1) +integral_histo_DATA = histo_trueData.Integral(histo_trueData.FindBin(fit_range[0]), histo_trueData.FindBin(fit_range[1]) - 1) +norm_factor2 = integral_histo_DATA/integral_histo_DY +print("Normalization factor 2:", norm_factor2) +if args.n == 2: + histo_DY.Scale(norm_factor2) + integral_histo_DYscaled = histo_DY.Integral(histo_DY.FindBin(fit_range[0]), histo_DY.FindBin(fit_range[1]) - 1) + +# Create a ratio plot of DATA to DY +histo_ratio = histo_trueData.Clone("histo_ratio") +histo_ratio.Divide(histo_DY) + +integral_histo_ratio = histo_ratio.Integral(histo_ratio.FindBin(fit_range[0]), histo_ratio.FindBin(fit_range[1]) - 1) + +# fitting_functions = ["[0]*x**6 + [1]*x**5 + [2]*x**4 + [3]*x**3 + [4]*x**2 + [5]*x + [6]", "[0]*([1]*TMath::Erf((x-[2])/[3]) + [4]*x + [5]*x**2)"] +fitting_functions = ["([0]*TMath::Erf((x-[1])/[2]) + [3]*x + [4]*TMath::Sq(x) + [5])"] +initial_guesses = [[0.0, 5.0, 10.0, 0.0, 0.0, 1.0]] +save_name_suffixes = [channel] +for fitfunc, initguess, savename in zip(fitting_functions, initial_guesses, save_name_suffixes): + c = ROOT.TCanvas("c", "c", 1000, 1000) + c.Divide(1,2) + + # Top pad: main plot + pad1 = c.cd(1) + pad1.SetPad(0.0, 0.30, 1.0, 1.0) + pad1.SetBottomMargin(0.02) + pad1.SetLogy() # if you want log-y + + histo_DY.SetTitle("") + histo_DY.GetYaxis().SetTitle("Events / 5 GeV") + histo_DY.GetXaxis().SetLabelSize(0) + histo_DY.GetYaxis().SetTitleSize(0.06) + histo_DY.GetYaxis().SetTitleOffset(0.8) + histo_DY.GetYaxis().SetLabelSize(0.05) + histo_DY.GetXaxis().SetRangeUser(plot_range[0], plot_range[1]) + # Draw DY as reference, then data points + # histo_DY.SetLineColor(ROOT.kBlue) + histo_DY.SetStats(False) + histo_DY.SetFillStyle(3344) + histo_DY.SetFillColorAlpha(ROOT.kBlue, 0.1) + histo_DY.Draw("HIST") + histo_trueData.SetMarkerStyle(8) + histo_trueData.SetMarkerColor(ROOT.kBlack) + histo_trueData.SetMarkerSize(0.8) + histo_trueData.GetXaxis().SetRangeUser(plot_range[0], plot_range[1]) + histo_trueData.Draw("E1 SAME") + + # CMS label + label = ROOT.TLatex() + label.SetNDC(True) + label.SetTextSize(0.040) + label.DrawLatex(0.12, 0.92, "#bf{CMS} #it{Preliminary}") + label.DrawLatex(0.55, 0.92, "L = 8.2 fb^{-1} (#sqrt{s} = 13.6 TeV)") + label.DrawLatex(0.15, 0.2, f"num(DY) events in ({fit_range[0]},{fit_range[1]}) GeV = {integral_histo_DY:.3f}") + label.DrawLatex(0.15, 0.15, f"num(DATA) events in ({fit_range[0]},{fit_range[1]}) GeV = {integral_histo_DATA:.3f}") + if args.n == 2: + label.DrawLatex(0.15, 0.1, f"num(DY normalized) events in ({fit_range[0],fit_range[1]}) GeV = {integral_histo_DYscaled:.3f}") + + + leg = ROOT.TLegend(0.60, 0.70, 0.88, 0.88) + leg.SetBorderSize(0) + leg.SetFillStyle(0) + leg.AddEntry(histo_trueData, "Data - BG", "pe") + leg.AddEntry(histo_DY, "DY", "f") + leg.Draw() + + # Bottom pad: ratio + pad2 = c.cd(2) + pad2.SetPad(0.0, 0.0, 1.0, 0.30) + pad2.SetTopMargin(0.02) + pad2.SetBottomMargin(0.35) + + h_ratio_points = histo_ratio.Clone("h_ratio_points") + h_ratio_points.SetTitle("") + h_ratio_points.SetMarkerStyle(20) + h_ratio_points.SetMarkerSize(0.8) + h_ratio_points.SetLineColor(ROOT.kBlack) + h_ratio_points.SetMarkerColor(ROOT.kBlack) + + h_ratio_points.GetYaxis().SetTitle("(Data-BG)/DY") + h_ratio_points.GetYaxis().SetNdivisions(505) + h_ratio_points.GetYaxis().SetTitleSize(0.12) + h_ratio_points.GetYaxis().SetTitleOffset(0.30) + h_ratio_points.GetYaxis().SetLabelSize(0.08) + h_ratio_points.GetXaxis().SetTitle("p_{T}^{ll} [GeV]") + h_ratio_points.GetXaxis().SetTitleSize(0.12) + h_ratio_points.GetXaxis().SetLabelSize(0.10) + h_ratio_points.GetXaxis().SetRangeUser(plot_range[0], plot_range[1]) + + h_ratio_points.SetMinimum(0.0) + h_ratio_points.SetMaximum(2.0) + + h_ratio_points.Draw("E1") + h_ratio_points.SetStats(False) + + # horizontal line at 1 + line = ROOT.TLine(h_ratio_points.GetXaxis().GetXmin(), 1.0, + h_ratio_points.GetXaxis().GetXmax(), 1.0) + line.SetLineColor(ROOT.kGray+2) + line.SetLineStyle(2) + line.Draw("SAME") + if args.f: + # --- Fit the ratio with an error function --- + # erf(x; p0, p1, p2, p3) = p0 + p1 * TMath::Erf((x - p2)/p3) + fit_func = ROOT.TF1("fit_erf", + fitfunc, + fit_range[0],#histo_ratio.GetXaxis().GetXmin(), + fit_range[1])#histo_ratio.GetXaxis().GetXmax()) + fit_func.SetParameters(*initguess) # reasonable starting values + fit_func.SetLineColor(ROOT.kRed) + + fit_result = h_ratio_points.Fit(fit_func, "RVS") # fit only in the visible x-range + if fit_result and fit_result.IsValid(): + chi2 = fit_result.Chi2() + ndf = fit_result.Ndf() + chi2_ndf = chi2 / ndf if ndf != 0 else float('inf') + print(f" ============ Chi2/NDF: {chi2_ndf:.3f} ============") + latex = ROOT.TLatex() + latex.SetNDC(True) + latex.SetTextSize(0.08) + latex.SetTextColor(ROOT.kBlack) + latex.DrawLatex(0.15, 0.55, f"#chi^{{2}}/ndf = {chi2_ndf:.2f}") + func_formula = fit_func.GetTitle() # or fit_func.GetExpFormula() for TF1 + param_values = [fit_func.GetParameter(i) for i in range(fit_func.GetNpar())] + param_str = ", ".join([f"p{i}={v:.2f}" for i, v in enumerate(param_values)]) + # # Display fit function and parameters + # latex.DrawLatex(0.15, 0.65, f"f(x) = {func_formula}") + # latex.DrawLatex(0.15, 0.75, param_str) + else: + print("Fit failed or invalid - cannot compute Chi2/NDF") + + # Plot only until 50 GeV, the value after 50 GeV will be the fit function's value at 50 GeV + fit_func.Draw("SAME") + const_val = fit_func.Eval(fit_range[1]) + const_func = ROOT.TF1("const_func", f"{const_val}", fit_range[1], plot_range[1]) # NOTE - assuming plot_range[1] is greater than fit_range[1] + const_func.SetLineColor(ROOT.kRed) + # const_func.SetLineStyle(ROOT.kDashed) + const_func.Draw("L SAME") + fit_func.Print("V") + c.SaveAs(f"ZpTreweighting_with_ratio_{savename}.pdf") + c.Close() # Close the canvas after saving + # c.SaveAs(f"ZpTreweighting_with_ratio_{savename}.png") + + # Commented out as plot quality is very bad + c_ratio_only = ROOT.TCanvas("c_ratio_only", "c_ratio_only", 800, 800) + h_ratio_points.Draw("E1") + h_ratio_points.GetYaxis().SetTitle("(Data-BG)/DY") + h_ratio_points.GetYaxis().SetTitleSize(0.05) + h_ratio_points.GetYaxis().SetTitleOffset(0.8) + h_ratio_points.GetYaxis().SetLabelSize(0.04) + h_ratio_points.GetXaxis().SetTitle("p_{T}^{ll} [GeV]") + h_ratio_points.GetXaxis().SetTitleSize(0.05) + h_ratio_points.GetXaxis().SetLabelSize(0.03) + line.Draw("SAME") + if args.f: + # Plot only until 50 GeV, the value after 50 GeV will be the fit function's value at 50 GeV + fit_func.Draw("SAME") + # Draw constant for x > 50 GeV + const_val = fit_func.Eval(fit_range[1]) + const_func = ROOT.TF1("const_func", f"{const_val}", fit_range[1], plot_range[1]) # NOTE - assuming plot_range[1] is greater than fit_range[1] + const_func.SetLineColor(ROOT.kRed) + const_func.Draw("SAME") + if fit_result and fit_result.IsValid(): + latex = ROOT.TLatex() + latex.SetNDC(True) + latex.SetTextSize(0.025) + latex.SetTextColor(ROOT.kBlack) + latex.DrawLatex(0.15, 0.15, f"#chi^{{2}}/ndf = {chi2_ndf:.2f}") + func_formula = fit_func.GetTitle() # or fit_func.GetExpFormula() for TF1 + param_values = [fit_func.GetParameter(i) for i in range(fit_func.GetNpar())] + param_str = ", ".join([f"p{i}={v:.2f}" for i, v in enumerate(param_values)]) + # Display fit function and parameters + latex.DrawLatex(0.15, 0.35, f"f(x) = {func_formula}") + latex.DrawLatex(0.15, 0.3, param_str) + if args.n == 1: + norm_factor = calc_norm_factor(histo_DY, fitfunc, param_values) + latex.DrawLatex(0.15, 0.25, f"Normalization factor = {norm_factor:.2f}") + print(f"Normalization factor = {norm_factor}") + formula = fit_func.GetTitle() # e.g., "[0]*x + [1]" + n_params = fit_func.GetNpar() + params = [fit_func.GetParameter(i) for i in range(n_params)] + # Replace [i] with parameter values + for i, p in enumerate(params): + formula = formula.replace(f"[{i}]", f"{p:.3f}") + print(f"Fit function with parameters: {formula}") + + c_ratio_only.SaveAs(f"ZpTreweighting_ratio_fit_{savename}.pdf") + # c_ratio_only.SaveAs(f"ZpTreweighting_ratio_fit_{savename}.png") + c_ratio_only.Close() # Close the canvas after saving +print(f"Integral of DY histogram from {fit_range[0]} to {fit_range[1]} GeV: {integral_histo_DY}") +print(f"Integral of ratio histogram from {fit_range[0]} to {fit_range[1]} GeV: {integral_histo_ratio}") + +# Update dyZpTrw.json +if args.write_json is not None and args.f: + wrote = False + # 'fit_func', 'fit_result', 'fitfunc' are in scope from the last for-loop + # iteration (Python loop variables persist after the loop). + if fit_result and fit_result.IsValid(): + # Build a ROOT / C++ compatible formula string with full precision. + root_formula = fitfunc # e.g. "[0]*TMath::Erf(...) + [3]*x + [4]*x**2 + [5]" + n_params = fit_func.GetNpar() + params = [fit_func.GetParameter(i) for i in range(n_params)] + const_val = fit_func.Eval(fit_range[1]) + for i, p in enumerate(params): + root_formula = root_formula.replace(f"[{i}]", f"{p:.6f}") + # Convert Python-style x**2 to ROOT / C++ TMath::Sq(x) + # root_formula = root_formula.replace("x**2", "TMath::Sq(x)") + # Tidy up double signs that can appear after parameter substitution + root_formula = root_formula.replace("+ -", "- ") + root_formula = root_formula.replace("- -", "+ ") + piecewise_formula = f"({root_formula})*(x<{fit_range[1]}) + ({const_val:.6f})*(x>={fit_range[1]})" + + # Prepend the integral normalization factor if methodology 2 is chosen + if args.n == 1: + full_expr = f"{norm_factor}*{piecewise_formula}" + else: + full_expr = piecewise_formula + + # Read the existing JSON so other years/types are preserved. + existing = {} + if os.path.exists(args.write_json): + try: + with open(args.write_json) as _fj: + existing = json.load(_fj) + except json.JSONDecodeError as _e: + print(f"WARNING: Existing JSON file '{args.write_json}' is malformed " + f"({_e}); it will be overwritten.") + # Update only the requested year / sample-type key. + sample_key = f"{args.sample_type}_{args.nj}j" + existing.setdefault(args.year, {})[sample_key] = full_expr + + with open(args.write_json, "w") as _fj: + json.dump(existing, _fj, indent=4) + _fj.write("\n") + print(f"\nWrote updated dyZpTrw.json → {args.write_json}") + print(f" [{args.year}][{sample_key}]: {full_expr}") + wrote = True + else: + print("\nWARNING: Fit did not converge; dyZpTrw.json was NOT updated.") \ No newline at end of file diff --git a/ZpTreweighting/2022_v12/nuisances.py b/ZpTreweighting/2022_v12/nuisances.py new file mode 100644 index 00000000..17c37734 --- /dev/null +++ b/ZpTreweighting/2022_v12/nuisances.py @@ -0,0 +1,52 @@ +print(treeBaseDir) +def makeMCDirectory(var=''): + _treeBaseDir = treeBaseDir + '' + if useXROOTD: + _treeBaseDir = redirector + treeBaseDir + if var== '': + return '/'.join([_treeBaseDir, mcProduction, mcSteps]) + else: + return '/'.join([_treeBaseDir, mcProduction, mcSteps + '__' + var]) + + +# merge cuts +_mergedCuts = [] +for cut in list(cuts.keys()): + __cutExpr = '' + if type(cuts[cut]) == dict: + __cutExpr = cuts[cut]['expr'] + for cat in list(cuts[cut]['categories'].keys()): + _mergedCuts.append(cut + '_' + cat) + elif type(cuts[cut]) == str: + _mergedCuts.append(cut) + + +# Dfinitions of groups of samples +mc = [skey for skey in samples if skey not in ('DATA')] + +nuisances = {} + + +################################ EXPERIMENTAL UNCERTAINTIES ################################# + +#### Luminosity + +# https://twiki.cern.ch/twiki/bin/view/CMS/LumiRecommendationsRun3 +nuisances['lumi_2022'] = { + 'name' : 'lumi_2022', + 'type' : 'lnN', + 'samples' : dict((skey, '1.014') for skey in mc) +} + +### MC statistical uncertainty +autoStats = True +if autoStats: + ## Use the following if you want to apply the automatic combine MC stat nuisances. + nuisances['stat'] = { + 'type': 'auto', + 'maxPoiss': '10', + 'includeSignal': '0', + # nuisance ['maxPoiss'] = Number of threshold events for Poisson modelling + # nuisance ['includeSignal'] = Include MC stat nuisances on signal processes (1=True, 0=False) + 'samples': {} + } diff --git a/ZpTreweighting/2022_v12/plot.py b/ZpTreweighting/2022_v12/plot.py new file mode 100644 index 00000000..b1877b01 --- /dev/null +++ b/ZpTreweighting/2022_v12/plot.py @@ -0,0 +1,67 @@ +# Group plot +# Groups of samples to improve the plots. +# If not defined, normal plots is used + +groupPlot = {} + +groupPlot['DY'] = { + 'nameHR' : 'DY', + 'isSignal' : 0, + 'color' : 420, # kGreen+4 + 'samples' : ['DY'] +} + +groupPlot['background'] = { + 'nameHR' : 'background', + 'isSignal' : 0, + 'color' : 851, # kAzure -9 + 'samples' : ['top', 'diboson', 'SMhiggs'] +} + + +# Plot +# keys here must match keys in samples.py + +plot = {} + +plot['DY'] = { + 'color' : 420, # kGreen+4 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +plot['top'] = { + 'color' : 400, # kYellow + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +plot['diboson'] = { + 'color' : 851, # kAzure -9 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +plot['SMhiggs'] = { + 'color' : 632+3, # kRed+3 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +# Data +plot['DATA'] = { + 'nameHR' : 'Data', + 'color' : 1 , + 'isSignal' : 0, + 'isData' : 1 , + 'isBlind' : 0 +} + +# Legend definition +legend = {} +legend['lumi'] = 'L = 8.2 fb^{-1}' +legend['sqrt'] = '#sqrt{s} = 13.6 TeV' diff --git a/ZpTreweighting/2022_v12/samples.py b/ZpTreweighting/2022_v12/samples.py new file mode 100644 index 00000000..bc8ab026 --- /dev/null +++ b/ZpTreweighting/2022_v12/samples.py @@ -0,0 +1,223 @@ +# Danush Shekar (UIC) +# Built on top of the DY CR 2022v12 config from this file: https://github.com/latinos/PlotsConfigurationsRun3/blob/2040efccdab8bc42d670a5e2cae8676d7b0f106c/ControlRegions/DY/2022_v12/samples.py + + +from mkShapesRDF.lib.search_files import SearchFiles + +searchFiles = SearchFiles() + +redirector = "" +useXROOTD = False +dataset_samples = 'amassiro' + +# MC: /eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/Summer22_130x_nAODv12_Full2022v12/MCl2loose2022v12__MCCorr2022v12JetScaling__l2tight +# DATA: /eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/Run2022_ReReco_nAODv12_Full2022v12/DATAl2loose2022v12__l2tight + +if dataset_samples == 'calderon': + mcProduction = 'Summer22_130x_nAODv12_Full2022v12' + mcSteps = 'MCl2loose2022v12__MCCorr2022v12JetScaling__sblancof__l2tight' # Using DYJetsToLL_M-50-LO from Calderon (DS, 22Nov25) + dataReco = 'Run2022_ReReco_nAODv12_Full2022v12' + dataSteps = 'DATAl2loose2022v12__sblancof__l2loose' +elif dataset_samples == 'amassiro': + mcProduction = 'Summer22_130x_nAODv12_Full2022v12_OLD' # new datasets were produced around 11Apr26, using old datasets to compare with prior results (DS, 13Apr26) + mcSteps = 'MCl2loose2022v12__MCCorr2022v12JetScaling__l2tight' # Using DYto2L-2Jets_MLL-50 from Amassiro (DS, 21Nov25) + dataReco = 'Run2022_ReReco_nAODv12_Full2022v12_OLD' # new datasets were produced around 11Apr26, using old datasets to compare with prior results (DS, 13Apr26) + dataSteps = 'DATAl2loose2022v12__l2loose' # Choose l2loose sample but apply tight selections in analysis (eleWP and muWP) + +# fakeSteps = 'DATAl1loose2022EFGv12__fakeW' + +############################################## +###### Tree base directory for the site ###### +############################################## +treeBaseDir = f'/eos/cms/store/group/phys_higgs/cmshww/{dataset_samples}/HWWNano' +limitFiles = -1 # For running on smaller set of samples (DS, 21Nov25) + +def makeMCDirectory(var=""): + _treeBaseDir = treeBaseDir + "" + if redirector != "": + _treeBaseDir = redirector + treeBaseDir + if var == "": + return "/".join([_treeBaseDir, mcProduction, mcSteps]) + else: + return "/".join([_treeBaseDir, mcProduction, mcSteps + "__" + var]) + + +mcDirectory = makeMCDirectory() +# fakeDirectory = os.path.join(treeBaseDir, dataReco, fakeSteps) +dataDirectory = os.path.join(treeBaseDir, dataReco, dataSteps) + +samples = {} + + +def nanoGetSampleFiles(path, name): + _files = searchFiles.searchFiles(path, name, redirector=redirector) + if limitFiles != -1 and len(_files) > limitFiles: + return [(name, _files[:limitFiles])] + else: + return [(name, _files)] + + +def CombineBaseW(samples, proc, samplelist): + _filtFiles = list(filter(lambda k: k[0] in samplelist, samples[proc]["name"])) + _files = list(map(lambda k: k[1], _filtFiles)) + _l = list(map(lambda k: len(k), _files)) + leastFiles = _files[_l.index(min(_l))] + dfSmall = ROOT.RDataFrame("Runs", leastFiles) + s = dfSmall.Sum("genEventSumw").GetValue() + f = ROOT.TFile(leastFiles[0]) + t = f.Get("Events") + t.GetEntry(1) + xs = t.baseW * s + + __files = [] + for f in _files: + __files += f + df = ROOT.RDataFrame("Runs", __files) + s = df.Sum("genEventSumw").GetValue() + newbaseW = str(xs / s) + weight = newbaseW + "/baseW" + + for iSample in samplelist: + addSampleWeight(samples, proc, iSample, weight) + + +def addSampleWeight(samples, sampleName, sampleNameType, weight): + obj = list(filter(lambda k: k[0] == sampleNameType, samples[sampleName]["name"]))[0] + samples[sampleName]["name"] = list( + filter(lambda k: k[0] != sampleNameType, samples[sampleName]["name"]) + ) + if len(obj) > 2: + samples[sampleName]["name"].append( + (obj[0], obj[1], obj[2] + "*(" + weight + ")") + ) + else: + samples[sampleName]["name"].append((obj[0], obj[1], "(" + weight + ")")) + + +################################################ +############ DATA DECLARATION ################## +################################################ + +DataRun = [ + ['B','Run2022B-ReReco-v1'], + ['C','Run2022C-ReReco-v1'], + ['D','Run2022D-ReReco-v1'], +] + +# ['E','Run2022E-Prompt-v1'], +# ['F','Run2022F-Prompt-v1'], +# ['G','Run2022G-Prompt-v1'], + +DataSets = ['MuonEG','SingleMuon','Muon','EGamma'] + +# Putting for later: HLT selections (DS, 19Nov25) +DataTrig = { + 'MuonEG' : ' Trigger_ElMu' , + 'SingleMuon' : '!Trigger_ElMu && Trigger_sngMu' , + 'Muon' : '!Trigger_ElMu && (Trigger_sngMu || Trigger_dblMu)', + 'EGamma' : '!Trigger_ElMu && !Trigger_sngMu && !Trigger_dblMu && (Trigger_sngEl || Trigger_dblEl)' +} + + +######################################### +############ MC COMMON ################## +######################################### + +# SFweight does not include btag weights +mcCommonWeightNoMatch = 'XSWeight*METFilter_Common*SFweight' +mcCommonWeight = 'XSWeight*METFilter_Common*PromptGenLepMatch2l*SFweight' + +#mcCommonWeight = 'XSWeight*METFilter_Common*SFweight' + +########################################### +############# BACKGROUNDS ############### +########################################### + +# DY +if dataset_samples == 'calderon': + files = nanoGetSampleFiles(mcDirectory, 'DYJetsToLL_M-50-LO') +elif dataset_samples == 'amassiro': + files = nanoGetSampleFiles(mcDirectory, 'DYto2L-2Jets_MLL-50') + + +samples['DY'] = { + 'name': files, + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + +addSampleWeight(samples,'DY','DYto2L-2Jets_MLL-50','DY_NLO_ZpTrw') + +# remove backgrounds from data for ZpT reweighting: +top_samples = ['TTTo2L2Nu', 'TWminusto2L2Nu', 'TbarWplusto2L2Nu']#, 'ST_tW_top'] +diboson_samples = ['WWTo2L2Nu', 'WZTo3LNu', 'GluGlutoContintoWWtoENuENu', 'GluGlutoContintoWWtoENuMuNu', 'GluGlutoContintoWWtoENuTauNu', 'GluGlutoContintoWWtoMuNuENu', 'GluGlutoContintoWWtoMuNuMuNu', 'GluGlutoContintoWWtoMuNuTauNu', 'GluGlutoContintoWWtoTauNuENu', 'GluGlutoContintoWWtoTauNuMuNu', 'GluGlutoContintoWWtoTauNuTauNu', 'WGtoLNuG-1J_PTG10to100', 'WGtoLNuG-1J_PTG100to200', 'WGtoLNuG-1J_PTG200to400', 'WGtoLNuG-1J_PTG400to600', 'WGtoLNuG-1J_PTG600'] +higgs_samples = ['GluGluHToWWTo2L2Nu_M125', 'VBFHToWWTo2L2Nu_M125'] + +samples['top'] = { + 'name': + nanoGetSampleFiles(mcDirectory, 'ST_t-channel_top') + \ + nanoGetSampleFiles(mcDirectory, 'ST_t-channel_antitop') + \ + nanoGetSampleFiles(mcDirectory, 'ST_s-channel_plus') + \ + nanoGetSampleFiles(mcDirectory, 'ST_s-channel_minus') + \ + nanoGetSampleFiles(mcDirectory, 'ST_tW_top') + \ + nanoGetSampleFiles(mcDirectory, 'ST_tW_antitop') + \ + nanoGetSampleFiles(mcDirectory, 'TTTo2L2Nu') + \ + nanoGetSampleFiles(mcDirectory, 'TWminusto2L2Nu') + \ + nanoGetSampleFiles(mcDirectory, 'TbarWplusto2L2Nu'), + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + +samples['diboson'] = { + 'name': nanoGetSampleFiles(mcDirectory, 'WWTo2L2Nu') + \ + nanoGetSampleFiles(mcDirectory, 'WZTo3LNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoENuENu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoENuMuNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoENuTauNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoMuNuENu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoMuNuMuNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoMuNuTauNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoTauNuENu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoTauNuMuNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoTauNuTauNu') + \ + nanoGetSampleFiles(mcDirectory, 'WGtoLNuG-1J_PTG10to100') + \ + nanoGetSampleFiles(mcDirectory, 'WGtoLNuG-1J_PTG100to200') + \ + nanoGetSampleFiles(mcDirectory, 'WGtoLNuG-1J_PTG200to400') + \ + nanoGetSampleFiles(mcDirectory, 'WGtoLNuG-1J_PTG400to600') + \ + nanoGetSampleFiles(mcDirectory, 'WGtoLNuG-1J_PTG600'), + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + +samples['SMhiggs'] = { + 'name': nanoGetSampleFiles(mcDirectory, 'GluGluHToWWTo2L2Nu_M125') + \ + nanoGetSampleFiles(mcDirectory, 'VBFHToWWTo2L2Nu_M125'), + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + + +########################################### +################## DATA ################### +########################################### + +samples['DATA'] = { + 'name': [], + 'weight': 'LepWPCut*METFilter_DATA', + 'weights': [], + 'isData': ['all'], + 'FilesPerJob': 15 +} + +for _, sd in DataRun: + for pd in DataSets: + datatag = pd + '_' + sd + + if (pd == "SingleMuon" and _ in ["D"]) or (pd == "Muon" and _ == "B"): + continue + files = nanoGetSampleFiles(dataDirectory, datatag) + + print(datatag) + + samples['DATA']['name'].extend(files) + addSampleWeight(samples, 'DATA', datatag, DataTrig[pd]) diff --git a/ZpTreweighting/2022_v12/structure.py b/ZpTreweighting/2022_v12/structure.py new file mode 100644 index 00000000..4c0f5428 --- /dev/null +++ b/ZpTreweighting/2022_v12/structure.py @@ -0,0 +1,30 @@ +# structure configuration for datacard + +structure = {} + +# keys here must match keys in samples.py +structure['DY'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +structure['top'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +structure['diboson'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +structure['SMhiggs'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +# data +structure['DATA'] = { + 'isSignal' : 0, + 'isData' : 1 +} diff --git a/ZpTreweighting/2022_v12/variables.py b/ZpTreweighting/2022_v12/variables.py new file mode 100644 index 00000000..a00ceb55 --- /dev/null +++ b/ZpTreweighting/2022_v12/variables.py @@ -0,0 +1,176 @@ +# variables +variables = {} + +variables['events'] = { + 'name' : '1', + 'range' : (1,0,2), + 'xaxis' : 'events', + 'fold' : 3 +} + +variables['nvtx'] = { + 'name' : 'PV_npvsGood', + 'range' : (100, 0, 100), + 'xaxis' : 'number of vertices', + 'fold' : 3 +} + +variables['mll'] = { + 'name': 'mll', + 'range' : (50,50,150), + 'xaxis' : 'm_{ll} [GeV]', + 'fold' : 0 +} + +variables['ptll'] = { + 'name': 'ptll', + 'range' : (40,0,80), + 'xaxis' : 'p_{T}^{ll} [GeV]', + 'fold' : 0 +} + +# Data samples do not have gen-level information +# variables['gen_ptll'] = { +# 'name': 'gen_ptll', +# 'range' : (40,0,200), +# 'xaxis' : 'gen p_{T}^{ll} [GeV]', +# 'fold' : 0 +# } + +# variables['gen_Zpt'] = { +# 'name': 'gen_Zpt(nGenPart, GenPart_pt, GenPart_pdgId, GenPart_genPartIdxMother, GenPart_statusFlags, gen_ptll)', +# 'range' : (40,0,200), +# 'xaxis' : 'Gen p_{T}^{Z} [GeV]', +# 'fold' : 0 +# } + +variables['drll'] = { + 'name': 'drll', + 'range' : (50, 0,5), + 'xaxis' : '#Delta R_{ll}', + 'fold' : 0 +} + +variables['dphill'] = { + 'name': 'dphill', + 'range' : (50,0,5), + 'xaxis' : '#Delta #phi_{ll}', + 'fold' : 0 +} + +variables['pt1'] = { + 'name': 'Lepton_pt[0]', + 'range' : (40,0,200), + 'xaxis' : 'p_{T} 1st lep', + 'fold' : 3 +} + +variables['pt2'] = { + 'name': 'Lepton_pt[1]', + 'range' : (40,0,160), + 'xaxis' : 'p_{T} 2nd lep', + 'fold' : 3 +} + +variables['eta1'] = { + 'name': 'Lepton_eta[0]', + 'range' : (50,-2.5,2.5), + 'xaxis' : '#eta 1st lep', + 'fold' : 3 +} + +variables['eta2'] = { + 'name': 'Lepton_eta[1]', + 'range' : (50,-2.5,2.5), + 'xaxis' : '#eta 2nd lep', + 'fold' : 3 +} + + +# B Tag +variables['jetdeepb'] = { + 'name': 'Alt(Take(Jet_btagDeepFlavB, CleanJet_jetIdx), 0, -99)', + 'range' : (40,-1,1), + 'xaxis' : 'B tagger 1st jet (DeepB)', + 'fold' : 2 +} + +variables['jetParT'] = { + 'name': 'Alt(Take(Jet_btagRobustParTAK4B, CleanJet_jetIdx), 0, -99)', + 'range' : (40,-1,1), + 'xaxis' : 'B tagger 1st jet (RobustParT AK4B)', + 'fold' : 2 +} + +variables['jetPNetB'] = { + 'name': 'Alt(Take(Jet_btagPNetB, CleanJet_jetIdx), 0, -99)', + 'range' : (40,-1,1), + 'xaxis' : 'B tagger 1st jet (ParticleNet B)', + 'fold' : 2 +} + + +# MET +variables['trkMet'] = { + 'name': 'TkMET_pt', + 'range' : (20,0,200), + 'xaxis' : 'trk met [GeV]', + 'fold' : 3 +} + +variables['puppimet'] = { + 'name': 'PuppiMET_pt', + 'range' : (20,0,200), + 'xaxis' : 'Puppi MET p_{T} [GeV]', + 'fold' : 3 +} + +############# New Jet processing +variables['njet'] = { + 'name': 'Sum(CleanJet_pt>30)', + 'range' : (5,0,5), + 'xaxis' : 'Number of jets', + 'fold' : 2 +} + +variables['jetpt1'] = { + 'name': 'Alt(CleanJet_pt, 0, -99) - 9999.9*(CleanJet_pt[0]<30)', + 'range' : (40,0,200), + 'xaxis' : 'p_{T} 1st jet', + 'fold' : 0 +} + +variables['jetpt2'] = { + 'name': 'Alt(CleanJet_pt, 1, -99) - 9999.9*(CleanJet_pt[1]<30)', + 'range' : (40,0,200), + 'xaxis' : 'p_{T} 2nd jet', + 'fold' : 0 +} + +variables['jeteta1'] = { + 'name': 'Alt(CleanJet_eta, 0, -99) - 9999.9*(CleanJet_pt[0]<30)', + 'range' : (30,-4.7,4.7), + 'xaxis' : '#eta 1st jet', + 'fold' : 0 +} + +variables['jeteta1_fine_binning'] = { + 'name': 'Alt(CleanJet_eta, 0, -99) - 9999.9*(CleanJet_pt[0]<30)', + 'range' : (94,-4.7,4.7), + 'xaxis' : '#eta 1st jet', + 'fold' : 0 +} + +variables['jeteta2'] = { + 'name': 'Alt(CleanJet_eta, 1, -99) - 9999.9*(CleanJet_pt[1]<30)', + 'range' : (30,-4.7,4.7), + 'xaxis' : '#eta 2nd jet', + 'fold' : 0 +} + +variables['jeteta2_fine_binning'] = { + 'name': 'Alt(CleanJet_eta, 1, -99) - 9999.9*(CleanJet_pt[1]<30)', + 'range' : (94,-4.7,4.7), + 'xaxis' : '#eta 2nd jet', + 'fold' : 0 +} diff --git a/ZpTreweighting/2023_v12/aliases.py b/ZpTreweighting/2023_v12/aliases.py new file mode 100644 index 00000000..cb46c4ed --- /dev/null +++ b/ZpTreweighting/2023_v12/aliases.py @@ -0,0 +1,261 @@ +import os +import copy +import inspect +import json + +configurations = os.path.realpath(inspect.getfile(inspect.currentframe())) # this file + +aliases = {} +aliases = OrderedDict() + +mc = [skey for skey in samples if skey not in ('Fake', 'DATA', 'Dyemb', 'DATA_EG', 'DATA_Mu', 'DATA_EMu', 'Fake_EG', 'Fake_Mu', 'Fake_EMu')] +# Commented out as not used (DS, 19Nov25) +# mc_emb = [skey for skey in samples if skey not in ('Fake', 'DATA', 'DATA_Mu', 'DATA_EMu', 'Fake_EG', 'Fake_Mu', 'Fake_EMu')] + +# Using LepSF2l__ele_cutBased_LooseID_tthMVA_Run3__mu_cut_TightID_pfIsoTight_HWW_tthmva_67 from latest git repo push (https://github.com/latinos/PlotsConfigurationsRun3/blob/f8a0f50dfe6301543203d9d260ad721204a2739f/ControlRegions/DY/2022_v12/aliases.py#L14-L15) +eleWP = 'cutBased_LooseID_tthMVA_Run3' +muWP = 'cut_TightID_pfIsoTight_HWW_tthmva_67' + +aliases['LepWPCut'] = { + 'expr': 'LepCut2l__ele_'+eleWP+'__mu_'+muWP, + 'samples': mc + ['DATA'], +} + +aliases['LepWPSF'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__mu_'+muWP, + 'samples': mc +} + +# gen-matching to prompt only (GenLepMatch2l matches to *any* gen lepton) +aliases['PromptGenLepMatch2l'] = { + 'expr': 'Alt(Lepton_promptgenmatched, 0, 0) * Alt(Lepton_promptgenmatched, 1, 0)', + 'samples': mc +} + +aliases['gen_Zpt'] = { + # 'linesToAdd': [".L /afs/cern.ch/user/d/dshekar/public/RDF/PlotsConfigurationsRun3/HWW_polarization/Extended/getGenZpt.cc+"], + # 'linesToAdd': ['.L /eos/user/d/dshekar/public/RDF/PlotsConfigurationsRun3/HWW_polarization/Extended/getGenZpt.cc+'], + 'linesToAdd': [ + """ +#ifndef getGenZpt +#define getGenZpt + +#include + +#include "TVector2.h" +#include "Math/Vector4Dfwd.h" +#include "Math/GenVector/LorentzVector.h" +#include "Math/GenVector/PtEtaPhiM4D.h" + +#include +#include "ROOT/RVec.hxx" + +using namespace ROOT; +using namespace ROOT::VecOps; + +double GetGenZpt( + int nGenPart, + RVecF GenPart_pt, + RVecI GenPart_pdgId, + RVecI GenPart_genPartIdxMother, + RVecI GenPart_statusFlags, + float gen_ptll + ){ + + + + // Find Gen pT of Z decaying into leptons + unsigned nGen = nGenPart; + std::vector LepCands{}; + std::vector MotherIdx{}; + std::vector MotherPdgId{}; + int pdgId, sFlag, MIdx; + bool hasZ = false; + //std::cout << "==========" << std::endl; + for (unsigned iGen{0}; iGen != nGen; ++iGen){ + pdgId = std::abs(GenPart_pdgId[iGen]); + sFlag = GenPart_statusFlags[iGen]; + //std::cout << pdgId << " ; " << sFlag << " ; " << GenPart_pt->At(iGen) << " ; " << GenPart_genPartIdxMother->At(iGen) << std::endl; + if (((pdgId == 11) || (pdgId == 13) || (pdgId == 15)) && ((sFlag >> 0 & 1) || (sFlag >> 2 & 1) || (sFlag >> 3 & 1) || (sFlag >> 4 & 1))){ + LepCands.push_back(iGen); + MIdx = GenPart_genPartIdxMother[iGen]; + MotherIdx.push_back(MIdx); + if (MIdx > -1){ + MotherPdgId.push_back(GenPart_pdgId[MIdx]); + if (GenPart_pdgId[MIdx]==23) hasZ = true; + }else{ + MotherPdgId.push_back(0); + } + } + } + + //std::cout << "Check:" << std::endl; + for (unsigned iGen{0}; iGen != LepCands.size(); ++iGen){ + for (unsigned jGen{0}; jGen != LepCands.size(); ++jGen){ + if (jGen <= iGen) continue; + //std::cout << iGen << " ; " << MotherIdx[iGen] << " ; " << jGen << " ; " << MotherIdx[jGen] << " ; " << MotherPdgId[iGen] << " ; " << hasZ << std::endl; + // Some DY samples generate the Z; others have the two leptons produced directly -> motherId is 0 for those events + if (hasZ){ + if (MotherIdx[iGen] == MotherIdx[jGen] && MotherPdgId[iGen] == 23) return GenPart_pt[MotherIdx[iGen]]; + }else{ + if (MotherIdx[iGen] == MotherIdx[jGen] && MotherIdx[iGen] == 0) return GenPart_pt[MotherIdx[iGen]]; + } + } + } + //std::cout << "Falling back!" << std::endl; + return gen_ptll; + +} + +#endif + """], + 'class': 'GetGenZpt', + 'args': 'nGenPart, GenPart_pt, GenPart_pdgId, GenPart_genPartIdxMother, GenPart_statusFlags, gen_ptll', + # 'expr': 'gen_ptll', + 'samples': mc +} + +# Jet bins +# using Alt(CleanJet_pt, n, 0) instead of Sum(CleanJet_pt >= 30) because jet pt ordering is not strictly followed in JES-varied samples + +# No jet with pt > 30 GeV +aliases['zeroJet'] = { + 'expr': 'Alt(CleanJet_pt, 0, 0) < 30.' +} + +aliases['oneJet'] = { + 'expr': 'Alt(CleanJet_pt, 0, 0) > 30.' +} + +aliases['multiJet'] = { + 'expr': 'Alt(CleanJet_pt, 1, 0) > 30.' +} + +aliases['noJetInHorn'] = { + 'expr' : 'Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0', +} + +# exec(open('dyZpTrw.py', "r").read()) +# aliases['DY_LO_ZpTrw'] = { +# 'expr': '('+DYrew['2022_NLO']['w'].replace('x', 'gen_Zpt')+')*(zeroJet)*(ptll < 50) + 1*(zeroJet)*(ptll >= 50)', +# 'samples': ['DY'] +# } +_dyzptrw_json = os.path.join(os.path.dirname(configurations), 'dyZpTrw.json') +with open(_dyzptrw_json) as _fj: + DYrew = json.load(_fj) +aliases['DY_NLO_ZpTrw'] = { + 'expr': '(' + DYrew['2023']['NLO_0j'].replace('x', 'gen_Zpt') + ')*(zeroJet)' + + ' + (' + DYrew['2023']['NLO_1j'].replace('x', 'gen_Zpt') + ')*(oneJet&& Alt(CleanJet_pt,1,0)<30)' + + ' + (' + DYrew['2023']['NLO_2j'].replace('x', 'gen_Zpt') + ')*(multiJet)', + 'samples': ['DY'] +} + +######################################################################## +# B-Tagging WP: https://btv-wiki.docs.cern.ch/ScaleFactors/Run3Summer22/ +######################################################################## + +# Algo / WP / WP cut +btagging_WPs = { + "DeepFlavB" : {"loose":"0.0479", "medium":"0.2431", "tight":"0.6553", "xtight":"0.7667", "xxtight":"0.9459"}, + "RobustParTAK4B" : {"loose":"0.0681", "medium":"0.3487", "tight":"0.7969", "xtight":"0.8882", "xxtight":"0.9883"}, + "PNetB" : {"loose":"0.0358", "medium":"0.1917", "tight":"0.6172", "xtight":"0.7515", "xxtight":"0.9659"} +} + +# Algo / SF name +btagging_SFs = { + "DeepFlavB" : "deepjet", + "RobustParTAK4B" : "partTransformer", + "PNetB" : "partNet", +} + +# Algorithm and WP selection +bAlgo = 'PNetB' # ['DeepFlavB','RobustParTAK4B','PNetB'] +WP = 'loose' # ['loose','medium','tight','xtight','xxtight'] + +# Access information from dictionaries +bWP = btagging_WPs[bAlgo][WP] +bSF = btagging_SFs[bAlgo] + +# # B tagging selections and scale factors +aliases['bVeto'] = { + 'expr': f'Sum(CleanJet_pt > 20. && abs(CleanJet_eta) < 2.5 && Take(Jet_btag{bAlgo}, CleanJet_jetIdx) > {bWP}) == 0' +} + +aliases['bReq'] = { + 'expr': f'Sum(CleanJet_pt > 30. && abs(CleanJet_eta) < 2.5 && Take(Jet_btag{bAlgo}, CleanJet_jetIdx) > {bWP}) >= 1' +} + +# Commenting out, as this was not included in latest git repo (https://github.com/latinos/PlotsConfigurationsRun3/blob/f8a0f50dfe6301543203d9d260ad721204a2739f/ControlRegions/DY/2022_v12/aliases.py#L89-L104) +# aliases['bVetoSF'] = { +# 'expr': 'TMath::Exp(Sum(LogVec((CleanJet_pt>20 && abs(CleanJet_eta)<2.5)*Take(Jet_btagSF_{}_shape, CleanJet_jetIdx)+1*(CleanJet_pt<20 || abs(CleanJet_eta)>2.5))))'.format(bSF), +# 'samples': mc +# } + +# aliases['bReqSF'] = { +# 'expr': 'TMath::Exp(Sum(LogVec((CleanJet_pt>30 && abs(CleanJet_eta)<2.5)*Take(Jet_btagSF_{}_shape, CleanJet_jetIdx)+1*(CleanJet_pt<30 || abs(CleanJet_eta)>2.5))))'.format(bSF), +# 'samples': mc +# } + +# # Top control region +# aliases['topcr'] = { +# 'expr': 'mtw2>30 && mll>50 && ((zeroJet && !bVeto) || bReq)' +# } + +# # WW control region +# aliases['wwcr'] = { +# 'expr': 'mth>60 && mtw2>30 && mll>100 && bVeto' +# } + +# # Overall b tag SF +# aliases['btagSF'] = { +# 'expr': '(bVeto || (topcr && zeroJet))*bVetoSF + (topcr && !zeroJet)*bReqSF', +# 'samples': mc +# } + +# # Systematic uncertainty variations +# for shift in ['jes','lf','hf','lfstats1','lfstats2','hfstats1','hfstats2','cferr1','cferr2']: + +# for targ in ['bVeto', 'bReq']: +# alias = aliases['%sSF%sup' % (targ, shift)] = copy.deepcopy(aliases['%sSF' % targ]) +# alias['expr'] = alias['expr'].replace('btagSF_deepjet_shape', 'btagSF_deepjet_shape_up_%s' % shift) + +# alias = aliases['%sSF%sdown' % (targ, shift)] = copy.deepcopy(aliases['%sSF' % targ]) +# alias['expr'] = alias['expr'].replace('btagSF_deepjet_shape', 'btagSF_deepjet_shape_down_%s' % shift) + +# aliases['btagSF%sup' % shift] = { +# 'expr': aliases['btagSF']['expr'].replace('SF', 'SF' + shift + 'up'), +# 'samples': mc +# } + +# aliases['btagSF%sdown' % shift] = { +# 'expr': aliases['btagSF']['expr'].replace('SF', 'SF' + shift + 'down'), +# 'samples': mc +# } + +########################################################################## +# End of b tagging +########################################################################## + +# Data/MC scale factors and systematic uncertainties +aliases['SFweight'] = { + # 'expr': ' * '.join(['SFweight2l', 'LepWPCut', 'LepWPSF','btagSF']), + 'expr': ' * '.join(['SFweight2l', 'LepWPCut', 'LepWPSF']), + 'samples': mc +} + +aliases['SFweightEleUp'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__Up', + 'samples': mc +} +aliases['SFweightEleDown'] = { + 'expr': 'LepSF2l__ele_'+eleWP+'__Down', + 'samples': mc +} +aliases['SFweightMuUp'] = { + 'expr': 'LepSF2l__mu_'+muWP+'__Up', + 'samples': mc +} +aliases['SFweightMuDown'] = { + 'expr': 'LepSF2l__mu_'+muWP+'__Down', + 'samples': mc +} diff --git a/ZpTreweighting/2023_v12/automate.py b/ZpTreweighting/2023_v12/automate.py new file mode 100644 index 00000000..76e49d4d --- /dev/null +++ b/ZpTreweighting/2023_v12/automate.py @@ -0,0 +1,744 @@ +#!/usr/bin/env python3 +""" +run_ZpTrw_workflow.py +===================== +Automated Z pT reweighting extraction workflow. + +The script orchestrates the following steps: + + Pre-Phase 1 — prepare configuration files + ------------------------------------------ + 0a. Patch configuration.py: set tag = "{year}_{sample_type}_{original_tag}" + 0b. Comment out the addSampleWeight line for DY pT reweighting in samples.py + (weights cannot be applied before they are derived) + + Round 1 — ZpTreweighting analysis + ---------------------------------- + 1a. mkShapesRDF -c 1 compile configuration + 1b. mkShapesRDF -o 0 -f . -b 1 submit HTCondor jobs + 1c. (wait) poll condor until all jobs finish + 1d. mkShapesRDF -o 2 -f . merge individual ROOT files (hadd) + 2. extract_Zptrw.py extract the Z pT reweighting function + and overwrite dyZpTrw.json + 2b. Move extract_Zptrw.py plots to plots_{year}_{sample_type}_obtainWeights/ + 2c. mkPlot --onlyPlot cratio create comparison plots on the merged file + + Round 2 — main analysis (optional, --second-analysis DIR) + ---------------------------------------------------------- + 3a. Update DYrew key in aliases.py with the correct year/sample-type + 3b. Uncomment addSampleWeight for DY pT reweighting in samples.py + 3c. mkShapesRDF -c 1 compile second analysis + 3d. mkShapesRDF -o 0 -f . -b 1 submit second-round condor jobs + 3e. (wait) poll condor until all jobs finish + 3f. mkShapesRDF -o 2 -f . merge second-round ROOT files + 3g. mkPlot --onlyPlot cratio create comparison plots for second round + +Prerequisites +------------- + * Source mkShapesRDF setup first + +Typical usage +------------- + # Run from inside ZpTreweighting/ or give the folder explicitly: + python automate.py --year 2022 --sample-type LO + + # Also kick off the second-round analysis immediately after: + python automate.py --year 2022 --sample-type LO --second-analysis ./ +""" + +import argparse +import glob +import os +import re +import shutil +import subprocess +import sys +import time + + +def banner(msg): + width = max(60, len(msg) + 4) + print("\n" + "=" * width) + print(f" {msg}") + print("=" * width) + + +def info(msg): + print(f" {msg}") + +# Run commands +def run_cmd(cmd, dry_run=False, cwd=None): + """Print and (optionally) execute *cmd*. + + *cmd* may be a list of strings or a single shell string. + Returns the process exit code (always 0 in dry-run mode). + """ + display = " ".join(cmd) if isinstance(cmd, list) else cmd + info(f"$ {display}") + if dry_run: + return 0 + result = subprocess.run(cmd, cwd=cwd, shell=isinstance(cmd, str)) + return result.returncode + + +def run_cmd_output(cmd, cwd=None): + """Run *cmd* and return *(returncode, stdout, stderr)* as strings.""" + result = subprocess.run( + cmd, + cwd=cwd, + capture_output=True, + text=True, + shell=isinstance(cmd, str), + ) + return result.returncode, result.stdout, result.stderr + + +# HTCondor job tracking +# Return the set of condor cluster IDs found in the job log files. Log files are expected at ``{batch_dir}/{tag}/**/log.txt`` +def _get_cluster_ids_from_logs(batch_dir, tag): + log_pattern = os.path.join(batch_dir, tag, "**", "log.txt") + log_files = glob.glob(log_pattern, recursive=True) + cluster_ids = set() + for log_file in log_files: + try: + with open(log_file) as fh: + for line in fh: + m = re.match(r"000 \((\d+)\.\d+\.\d+\)", line) + if m: + cluster_ids.add(m.group(1)) + except OSError: + pass + return cluster_ids + +# Return the number of jobs still in the condor queue for *cluster_id*. +def _count_condor_jobs_in_cluster(cluster_id): + rc, stdout, _ = run_cmd_output(["condor_q", str(cluster_id)]) + if rc != 0: + # cluster is gone (all jobs completed or removed) + return 0 + m = re.search(r"(\d+) jobs?", stdout) + return int(m.group(1)) if m else 0 + +# Hold until all HTCondor jobs for *tag* are no longer queued. +def wait_for_condor_jobs(batch_dir, tag, poll_interval=120, dry_run=False): + CONDOR_REGISTRATION_DELAY = 10 + + if dry_run: + info("[dry-run] Skipping condor wait.") + return + + # Give HTCondor a few seconds to register newly submitted jobs. + time.sleep(CONDOR_REGISTRATION_DELAY) + + cluster_ids = _get_cluster_ids_from_logs(batch_dir, tag) + if not cluster_ids: + info("WARNING: No condor cluster IDs found in log files. " + "Waiting 60 s and retrying once...") + time.sleep(60) + cluster_ids = _get_cluster_ids_from_logs(batch_dir, tag) + + if not cluster_ids: + info("WARNING: Still no cluster IDs found. " + "Assuming jobs have already completed or were submitted " + "outside HTCondor.") + return + + info(f"Tracking condor cluster(s): {', '.join(sorted(cluster_ids))}") + + while True: + remaining = { + cid for cid in cluster_ids + if _count_condor_jobs_in_cluster(cid) > 0 + } + if not remaining: + info("All condor jobs have completed.") + return + + total = sum(_count_condor_jobs_in_cluster(c) for c in remaining) + info( + f"[{time.strftime('%H:%M:%S')}] " + f"{total} job(s) still queued in " + f"cluster(s) {', '.join(sorted(remaining))}. " + f"Polling again in {poll_interval} s..." + ) + time.sleep(poll_interval) + + +# Configuration reader +# Execute *cfg_file* and return a dict with analysis settings. +def read_configuration(cfg_file): + ns = { + "__file__": os.path.abspath(cfg_file), + "os": os, + "sys": sys, + } + try: + with open(cfg_file) as fh: + exec(compile(fh.read(), cfg_file, "exec"), ns) + except Exception as exc: + # configuration.py may call os.getlogin() which can fail in some + # environments; fall through with whatever was captured so far. + info(f"WARNING: Error while parsing {cfg_file}: {exc}") + + tag = ns.get("tag", "ZpTreweighting") + output_folder = ns.get( + "outputFolder", + os.path.join( + "/eos/user", + os.environ.get("USER", "unknown")[0], + os.environ.get("USER", "unknown"), + "mkShapesRDF_rootfiles", + tag, + "rootFile", + ), + ) + return { + "tag": tag, + "outputFolder": output_folder.rstrip("/"), + "outputFile": ns.get("outputFile", f"mkShapes__{tag}.root"), + "batchFolder": ns.get("batchFolder", "condor"), + } + + +# Workflow phases +# Phase 1a+1b — compile and submit condor jobs. +def phase1_submit(zptrw_dir, dry_run=False): + banner("Phase 1a: Compile ZpTreweighting configuration") + rc = run_cmd(["mkShapesRDF", "-c", "1"], dry_run=dry_run, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: mkShapesRDF -c 1 failed (exit code {rc})") + + banner("Phase 1b: Submit ZpTreweighting condor jobs") + rc = run_cmd( + ["mkShapesRDF", "-o", "0", "-f", ".", "-b", "1"], + dry_run=dry_run, + cwd=zptrw_dir, + ) + if rc != 0: + sys.exit(f"ERROR: mkShapesRDF -o 0 failed (exit code {rc})") + + +# Phase 1c — wait for all condor jobs to finish. +def phase1_wait(zptrw_dir, cfg, poll_interval=120, dry_run=False): + banner("Phase 1c: Waiting for HTCondor jobs to complete") + batch_dir = os.path.join(zptrw_dir, cfg["batchFolder"]) + wait_for_condor_jobs( + batch_dir=batch_dir, + tag=cfg["tag"], + poll_interval=poll_interval, + dry_run=dry_run, + ) + + +# Phase 1d — merge individual job ROOT files (hadd). +def phase1_merge(zptrw_dir, dry_run=False): + banner("Phase 1d: Merge ROOT files") + rc = run_cmd( + ["mkShapesRDF", "-o", "2", "-f", "."], + dry_run=dry_run, + cwd=zptrw_dir, + ) + if rc != 0: + sys.exit(f"ERROR: Phase 1d - mkShapesRDF -o 2 (merge) failed (exit code {rc})") + + +# Configuration / samples file patching helpers + +# Append '{_suffix}' and prepend '{year}_{sample_type}_' to the tag variable in configuration.py. +def patch_configuration_tag(cfg_file, year, sample_type, suffix, dry_run=False): + banner("Patching configuration.py: prepending year/sample-type to tag") + + if not os.path.exists(cfg_file): + info(f"WARNING: {cfg_file} not found; skipping tag patch.") + return + + with open(cfg_file) as fh: + content = fh.read() + + prefix = f"{year}_{sample_type}_" + # Match: tag = "..." or tag = '...' (not already prefixed). + # Build pattern with f-string so the negative lookahead uses the actual prefix. + pattern = re.compile( + r"""^(\s*tag\s*=\s*)["'][^"']*["']""", + re.MULTILINE, + ) + + def _replace(m): + new_tag = f'{year}_{sample_type}_ZpTreweighting_{suffix}' + info(f" tag: → '{new_tag}'") + return f'{m.group(1)}"{new_tag}"' + + new_content, count = pattern.subn(_replace, content, count=1) + if count == 0: + info("WARNING: Could not find 'tag = ...' line in configuration.py; " + "skipping tag patch. Verify that configuration.py contains a " + "tag = \"\" assignment at module level.") + return + + if not dry_run: + with open(cfg_file, "w") as fh: + fh.write(new_content) + info(f" Updated: {cfg_file}") + else: + info("[dry-run] Would update configuration.py tag.") + + +# Comment out the addSampleWeight line for DY pT reweighting in samples.py. +def comment_addsampleweight_dy(samples_file, dry_run=False): + banner("Commenting out addSampleWeight for DY pT reweighting in samples.py") + + if not os.path.exists(samples_file): + info(f"WARNING: {samples_file} not found; skipping.") + return + + with open(samples_file) as fh: + content = fh.read() + + # Match an un-commented addSampleWeight call referencing a DY_*_ZpTrw weight. + # Pattern breakdown: + # ^(?![ \t]*#) — line must not start with optional whitespace + '#' + # ([ \t]*addSampleWeight — capture indentation + function name + # \s*\([^)]* — opening paren and any args + # ['"]DY_..._ZpTrw['"] — the DY ZpTrw weight argument + # [^)]*\)) — remaining args + closing paren + pattern = re.compile( + r"^(?![ \t]*#)([ \t]*addSampleWeight\s*\([^)]*['\"]DY_[A-Za-z0-9]+_ZpTrw['\"][^)]*\))", + re.MULTILINE, + ) + + if not pattern.search(content): + info("WARNING: No uncommented addSampleWeight DY ZpTrw line found; skipping.") + return + + new_content = pattern.sub(r"# \1", content) + info(" Commented out addSampleWeight DY ZpTrw line.") + + if not dry_run: + with open(samples_file, "w") as fh: + fh.write(new_content) + info(f" Updated: {samples_file}") + else: + info("[dry-run] Would comment out addSampleWeight DY ZpTrw line.") + +# Uncomment the addSampleWeight line for DY pT reweighting in samples.py. +def uncomment_addsampleweight_dy(samples_file, dry_run=False): + banner("Uncommenting addSampleWeight for DY pT reweighting in samples.py") + + if not os.path.exists(samples_file): + info(f"WARNING: {samples_file} not found; skipping.") + return + + with open(samples_file) as fh: + content = fh.read() + + # Match a commented addSampleWeight call referencing a DY_*_ZpTrw weight. + pattern = re.compile( + r"^([ \t]*)#[ \t]*(addSampleWeight\s*\([^)]*['\"]DY_[A-Za-z0-9]+_ZpTrw['\"][^)]*\))", + re.MULTILINE, + ) + + if not pattern.search(content): + info("WARNING: No commented addSampleWeight DY ZpTrw line found; skipping.") + return + + new_content = pattern.sub(r"\1\2", content) + info(" Uncommented addSampleWeight DY ZpTrw line.") + + if not dry_run: + with open(samples_file, "w") as fh: + fh.write(new_content) + info(f" Updated: {samples_file}") + else: + info("[dry-run] Would uncomment addSampleWeight DY ZpTrw line.") + +# Replace DYrew['old_year']['old_type'] with DYrew['{year}']['{sample_type}'] in aliases.py. +def update_aliases_dyrew_key(aliases_file, year, sample_type, dry_run=False): + banner(f"Updating DYrew key in {os.path.basename(aliases_file)}") + + if not os.path.exists(aliases_file): + info(f"WARNING: {aliases_file} not found; skipping DYrew key update.") + return + + with open(aliases_file) as fh: + content = fh.read() + + # Match DYrew['oldyear']['oldtype_jetbin'] or DYrew["oldyear"]["oldtype_jetbin"] + pattern = re.compile( + r"""DYrew\[\s*(['"])[^'"]+\1\s*\]\[\s*(['"])[^'"]+_(0j|1j|2j)\2\s*\]""" + ) + + def replacer(match): + quote1, quote2, jetbin = match.group(1), match.group(2), match.group(3) + return f"DYrew[{quote1}{year}{quote1}][{quote2}{sample_type}_{jetbin}{quote2}]" + + matches = pattern.findall(content) + if not matches: + info(f"No DYrew['...']['...'] references found in {aliases_file}; skipping.") + return + + new_content = pattern.sub(replacer, content) + info(f" Updated {len(matches)} DYrew key reference(s) → ['{year}']['{sample_type}_']") + + if not dry_run: + with open(aliases_file, "w") as fh: + fh.write(new_content) + info(f" Updated: {aliases_file}") + else: + info(f"[dry-run] Would update DYrew key in {aliases_file}.") + +# Plot helpers +# Move plots produced by extract_Zptrw.py into plots_{year}_{sample_type}_obtainWeights/. +def move_zptrw_plots(zptrw_dir, year, sample_type, folder_suffix="", dry_run=False): + banner("Moving extract_Zptrw.py plots to archive folder") + + target_dir = os.path.join(zptrw_dir, f"extractPlots_{year}_{sample_type}_{folder_suffix}") + plot_files = ( + glob.glob(os.path.join(zptrw_dir, "ZpTreweighting_*.pdf")) + + glob.glob(os.path.join(zptrw_dir, "ZpTreweighting_*.png")) + ) + + if not plot_files: + info("No ZpTreweighting_*.pdf/png files found to move.") + return + + info(f"Target folder: {target_dir}") + for src in plot_files: + dest = os.path.join(target_dir, os.path.basename(src)) + info(f" {os.path.basename(src)} → {os.path.relpath(dest, zptrw_dir)}") + if not dry_run: + os.makedirs(target_dir, exist_ok=True) + shutil.move(src, dest) + + if dry_run: + info("[dry-run] Would create target folder and move plot files.") + +# Run ``mkPlot --onlyPlot cratio --showIntegralLegend 1 --fileFormats png``. +def run_mkplot(analysis_dir, dry_run=False): + banner(f"Running mkPlot in {analysis_dir}") + rc = run_cmd( + ["mkPlot", "--onlyPlot", "cratio", "--showIntegralLegend", "1", + "--fileFormats", "png"], + dry_run=dry_run, + cwd=analysis_dir, + ) + if rc != 0: + info(f"WARNING: mkPlot exited with code {rc}. Continuing workflow.") + +# Phase 2 — run extract_Zptrw.py to derive weights and update dyZpTrw.json, or make the ratio plots after applying the weights. +def phase2_extract(zptrw_dir, cfg, year="2022", sample_type="LO", run_fit = "", dry_run=False): + banner("Phase 2: Extract Z pT reweighting function → update dyZpTrw.json") + + merged_file = os.path.join(cfg["outputFolder"], cfg["outputFile"]) + dyzptrw_json = os.path.join(zptrw_dir, "dyZpTrw.json") + extract_script = os.path.join(zptrw_dir, "extract_Zptrw.py") + + if not dry_run and not os.path.exists(merged_file): + sys.exit( + f"ERROR: Merged ROOT file not found:\n" + f" {merged_file}\n" + f"Run 'mkShapesRDF -o 2 -f .' in {zptrw_dir} first." + ) + for njet in [0, 1, 2]: + # derive weights in Z->MuMu channel, 0 jet bin + if run_fit == "-f": + # define normalization when fitting is requested, else, just plot + cmd = [sys.executable, extract_script, "-f", "-n", "2", "-c", "mm", "-nj", str(njet), "--input", merged_file, "--write-json", dyzptrw_json, "--year", year, "--sample-type", sample_type] + else: + cmd = [sys.executable, extract_script, "-c", "mm", "-nj", str(njet), "--input", merged_file, "--write-json", dyzptrw_json, "--year", year, "--sample-type", sample_type] + rc = run_cmd(cmd, dry_run=dry_run, cwd=zptrw_dir) + # Make plots in Z->ee channel, 0 jet bin + cmd = [sys.executable, extract_script, "-c", "ee", "-nj", str(njet), "--input", merged_file] + rc = run_cmd(cmd, dry_run=dry_run, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: extract_Zptrw.py failed (exit code {rc})") + info(f"dyZpTrw.json updated for nJet={njet} → {dyzptrw_json}") + +# Phase 3 — prepare, compile, submit, wait, merge, and plot second-round analysis. +def phase3_second_round(second_dir, year, sample_type, poll_interval=120, + skip_second_wait=False, skip_second_merge=False, + dry_run=False): + banner(f"Phase 3: Second-round mkShapesRDF in\n {second_dir}") + + # Update aliases.py and samples.py before compiling + aliases_file = os.path.join(second_dir, "aliases.py") + update_aliases_dyrew_key(aliases_file, year, sample_type, dry_run=dry_run) + + samples_file = os.path.join(second_dir, "samples.py") + uncomment_addsampleweight_dy(samples_file, dry_run=dry_run) + + rc = run_cmd(["mkShapesRDF", "-c", "1"], dry_run=dry_run, cwd=second_dir) + if rc != 0: + sys.exit( + f"ERROR: mkShapesRDF -c 1 failed in {second_dir} (exit code {rc})" + ) + + rc = run_cmd( + ["mkShapesRDF", "-o", "0", "-f", ".", "-b", "1"], + dry_run=dry_run, + cwd=second_dir, + ) + if rc != 0: + sys.exit( + f"ERROR: mkShapesRDF -o 0 failed in {second_dir} (exit code {rc})" + ) + + # Wait for second-round condor jobs + second_cfg_file = os.path.join(second_dir, "configuration.py") + has_cfg = os.path.exists(second_cfg_file) + + if not skip_second_wait: + if has_cfg or dry_run: + banner("Phase 3c: Waiting for second-round HTCondor jobs to complete") + second_cfg = ( + read_configuration(second_cfg_file) + if has_cfg + else {"tag": "unknown", "batchFolder": "condor", + "outputFolder": ".", "outputFile": "output.root"} + ) + batch_dir = os.path.join(second_dir, second_cfg["batchFolder"]) + wait_for_condor_jobs( + batch_dir=batch_dir, + tag=second_cfg["tag"], + poll_interval=poll_interval, + dry_run=dry_run, + ) + else: + info(f"WARNING: No configuration.py found in {second_dir}; " + "skipping second-round condor wait.") + else: + info("\n[skip-second-wait] Skipping second-round condor wait.") + + # Merge second-round ROOT files + if not skip_second_merge: + banner("Phase 3d: Merge second-round ROOT files") + rc = run_cmd( + ["mkShapesRDF", "-o", "2", "-f", "."], + dry_run=dry_run, + cwd=second_dir, + ) + if rc != 0: + sys.exit( + f"ERROR: mkShapesRDF -o 2 (merge) failed in {second_dir} " + f"(exit code {rc})" + ) + else: + info("\n[skip-second-merge] Skipping second-round ROOT file merge.") + + # Create RDF plots for the second-round merged output + run_mkplot(second_dir, dry_run=dry_run) + + +# CLI +def parse_args(): + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + # ---- paths ---- + parser.add_argument( + "--zptrw-folder", + default=".", + metavar="DIR", + help="Path to the ZpTreweighting analysis folder " + "(default: current directory).", + ) + parser.add_argument( + "--second-analysis", + default=None, + metavar="DIR", + help="Path to the second-round analysis folder. " + "When given, Phase 3 updates aliases/samples, compiles, submits, " + "waits, merges, and runs mkPlot there after dyZpTrw.py has been updated.", + ) + + # ---- physics ---- + parser.add_argument( + "--year", + default="2022", + help="Year key written to DYrew in dyZpTrw.json (default: '2022').", + ) + parser.add_argument( + "--sample-type", + default="LO", + help="Sample-type key written to DYrew in dyZpTrw.json " + "(default: 'LO').", + ) + + # ---- condor polling ---- + parser.add_argument( + "--poll-interval", + type=int, + default=120, + metavar="SECONDS", + help="Seconds between condor_q polls while waiting for jobs " + "(default: 120).", + ) + + # ---- skip flags ---- + skip = parser.add_argument_group("skip flags (for re-running partial workflow)") + skip.add_argument( + "--skip-submit", + action="store_true", + help="Skip Phase 1a+1b (assume jobs are already running or done).", + ) + skip.add_argument( + "--skip-wait", + action="store_true", + help="Skip Phase 1c (assume all jobs have already finished).", + ) + skip.add_argument( + "--skip-merge", + action="store_true", + help="Skip Phase 1d (assume the merged ROOT file already exists).", + ) + skip.add_argument( + "--skip-extract", + action="store_true", + help="Skip Phase 2 (assume dyZpTrw.json is already up to date).", + ) + skip.add_argument( + "--skip-second-wait", + action="store_true", + help="Skip waiting for second-round HTCondor jobs (Phase 3c).", + ) + skip.add_argument( + "--skip-second-merge", + action="store_true", + help="Skip merging second-round ROOT files (Phase 3d).", + ) + + # ---- misc ---- + parser.add_argument( + "--dry-run", + action="store_true", + help="Print every command that would be run without executing it.", + ) + + return parser.parse_args() + + +def main(): + args = parse_args() + + zptrw_dir = os.path.abspath(args.zptrw_folder) + if not os.path.isdir(zptrw_dir): + sys.exit(f"ERROR: ZpTreweighting folder not found: {zptrw_dir}") + + cfg_file = os.path.join(zptrw_dir, "configuration.py") + if not os.path.exists(cfg_file): + sys.exit(f"ERROR: configuration.py not found in {zptrw_dir}") + + banner("Z pT reweighting workflow") + info(f"ZpTreweighting folder : {zptrw_dir}") + if args.dry_run: + info("*** DRY-RUN mode — no commands will be executed ***") + + # ---- Pre-Phase 1: Patch configuration.py tag and comment out DY pT rw weight ---- + patch_configuration_tag(cfg_file, args.year, args.sample_type, suffix="obtainWeights", dry_run=args.dry_run) + + cfg = read_configuration(cfg_file) + info(f"tag : {cfg['tag']}") + info(f"outputFolder : {cfg['outputFolder']}") + info(f"outputFile : {cfg['outputFile']}") + info(f"batchFolder : {cfg['batchFolder']}") + + samples_file = os.path.join(zptrw_dir, "samples.py") + comment_addsampleweight_dy(samples_file, dry_run=args.dry_run) + + # ---- Phase 1a+1b: Submit ---- + if not args.skip_submit: + phase1_submit(zptrw_dir, dry_run=args.dry_run) + else: + info("\n[skip-submit] Skipping job submission.") + + # ---- Phase 1c: Wait ---- + if not args.skip_wait and not args.skip_submit: + phase1_wait( + zptrw_dir, + cfg, + poll_interval=args.poll_interval, + dry_run=args.dry_run, + ) + elif args.skip_wait: + info("\n[skip-wait] Skipping condor wait.") + + # ---- Phase 1d: Merge ---- + if not args.skip_merge: + phase1_merge(zptrw_dir, dry_run=args.dry_run) + else: + info("\n[skip-merge] Skipping ROOT file merge.") + + # ---- Phase 2: Extract + Update ---- + if not args.skip_extract: + phase2_extract(zptrw_dir, cfg, year=args.year, sample_type=args.sample_type, run_fit="-f", dry_run=args.dry_run) + # Move plots produced by extract_Zptrw.py to archive folder + move_zptrw_plots(zptrw_dir, args.year, args.sample_type, folder_suffix = "obtainWeights", dry_run=args.dry_run) + # Create RDF plots from the merged ROOT file + run_mkplot(zptrw_dir, dry_run=args.dry_run) + # Rename condor and config folders: + cmd = ["mv", os.path.join(zptrw_dir, "condor/"), os.path.join(zptrw_dir, f"condor_{args.year}_{args.sample_type}_obtainWeights")] + rc = run_cmd(cmd, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: failed to rename condor directory (exit code {rc})") + cmd = ["pwd"] + rc = run_cmd(cmd, cwd=zptrw_dir) + cmd = ["mv", os.path.join(zptrw_dir, "configs/"), os.path.join(zptrw_dir, f"configs_{args.year}_{args.sample_type}_obtainWeights")] + rc = run_cmd(cmd, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: failed to rename configs directory (exit code {rc})") + + else: + info("\n[skip-extract] Skipping weight extraction, plot archiving, and mkPlot " + "(assumes dyZpTrw.json and plots are already up to date).") + + # ---- Phase 3 (optional): Second analysis ---- + if args.second_analysis: + second_dir = os.path.abspath(args.second_analysis) + if not os.path.isdir(second_dir): + sys.exit(f"ERROR: Second-analysis folder not found: {second_dir}") + + patch_configuration_tag(cfg_file, args.year, args.sample_type, suffix="afterReweighting", dry_run=args.dry_run) + cfg = read_configuration(cfg_file) + info(f"tag : {cfg['tag']}") + info(f"outputFolder : {cfg['outputFolder']}") + info(f"outputFile : {cfg['outputFile']}") + info(f"batchFolder : {cfg['batchFolder']}") + + phase3_second_round( + second_dir, + year=args.year, + sample_type=args.sample_type, + poll_interval=args.poll_interval, + skip_second_wait=args.skip_second_wait, + skip_second_merge=args.skip_second_merge, + dry_run=args.dry_run, + ) + phase2_extract(zptrw_dir, cfg, year=args.year, sample_type=args.sample_type, run_fit="", dry_run=args.dry_run) + # Move plots produced by extract_Zptrw.py to folder + move_zptrw_plots(zptrw_dir, args.year, args.sample_type, folder_suffix = "afterReweighting", dry_run=args.dry_run) + # Create comparison plots from the merged ROOT file + run_mkplot(zptrw_dir, dry_run=args.dry_run) + # Rename log, condor, and config folders: + cmd = ["mv", os.path.join(zptrw_dir, "condor/"), os.path.join(zptrw_dir, f"condor_{args.year}_{args.sample_type}_afterReweighting")] + rc = run_cmd(cmd, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: failed to rename condor directory (exit code {rc})") + cmd = ["mv", os.path.join(zptrw_dir, "configs/"), os.path.join(zptrw_dir, f"configs_{args.year}_{args.sample_type}_afterReweighting")] + rc = run_cmd(cmd, cwd=zptrw_dir) + if rc != 0: + sys.exit(f"ERROR: failed to rename configs directory (exit code {rc})") + + banner("Workflow complete!") + if args.second_analysis: + info("Second-round analysis finished. Plots are in the analysis folder.") + else: + info("dyZpTrw.json has been updated.") + info("To run the second-round analysis:") + info(" cd ") + info(" mkShapesRDF -c 1") + info(" mkShapesRDF -o 0 -f . -b 1") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/ZpTreweighting/2023_v12/configuration.py b/ZpTreweighting/2023_v12/configuration.py new file mode 100644 index 00000000..4beaa2de --- /dev/null +++ b/ZpTreweighting/2023_v12/configuration.py @@ -0,0 +1,90 @@ +import sys,os + +# tag used to identify the configuration folder version +tag = "ZpTreweighting" + +# file to use as runner script, default uses mkShapesRDF.shapeAnalysis.runner, otherwise specify path to script +runnerFile = "default" + +# output file name +outputFile = "mkShapes__{}.root".format(tag) + +# path to ouput folder +outputFolder = "/eos/user/" + os.getlogin()[0] + "/" + os.getlogin() + "/mkShapesRDF_rootfiles/" + tag + "/rootFile/" + +# path to batch folder (used for condor submission) +batchFolder = "condor" + +# path to configuration folder (will contain all the compiled configuration files) +configsFolder = "configs" + +# luminosity to normalize to (in 1/fb) +lumi = 17.794 + +# file with dict of aliases to define +aliasesFile = "aliases.py" + +# file with dict of variables +variablesFile = "variables.py" + +# file with dict of cuts +cutsFile = "cuts.py" + +# file with dict of samples +samplesFile = "samples.py" + +# file with dict of samples +plotFile = "plot.py" + +# file with dict of structure (used to define combine processes) +structureFile = "structure.py" + +# nuisances file for mkDatacards and for mkShape +nuisancesFile = "nuisances.py" + +# path to folder where to save plots +plotPath = "plots_" + tag + +# this lines are executed right before the runner on the condor node +mountEOS = [ + # "export KRB5CCNAME=/home/gpizzati/krb5\n", +] + +# list of imports to import when compiling the whole configuration folder, it should not contain imports used by configuration.py +imports = ["os", "glob", ("collections", "OrderedDict"), "ROOT"] + +# list of files to compile +filesToExec = [ + samplesFile, + aliasesFile, + variablesFile, + cutsFile, + plotFile, + nuisancesFile, + structureFile, +] + +# list of variables to keep in the compiled configuration folder +varsToKeep = [ + "batchVars", + "outputFolder", + "batchFolder", + "configsFolder", + "outputFile", + "runnerFile", + "tag", + "samples", + "aliases", + "variables", + ("cuts", {"cuts": "cuts", "preselections": "preselections"}), + ("plot", {"plot": "plot", "groupPlot": "groupPlot", "legend": "legend"}), + "nuisances", + "structure", + "lumi", +] + +# list of variables to keep in the batch submission script (script.py) +batchVars = varsToKeep[varsToKeep.index("samples") :] + + +varsToKeep += ['plotPath'] diff --git a/ZpTreweighting/2023_v12/cuts.py b/ZpTreweighting/2023_v12/cuts.py new file mode 100644 index 00000000..41a5ca00 --- /dev/null +++ b/ZpTreweighting/2023_v12/cuts.py @@ -0,0 +1,59 @@ +cuts = {} + +# Preselections - applied to all the cuts, noJetInHorn replaced by zeroJet +preselections = 'Lepton_pt[0] > 25 \ + && Lepton_pt[1] > 13 \ + && (nLepton >= 2 && Alt(Lepton_pt,2,0) < 10) \ + && abs(Lepton_eta[0]) < 2.5 \ + && abs(Lepton_eta[1]) < 2.5 \ + && mll > 60 \ + && mll < 120 \ +' +# Remove zeroJet preselection as weights are being derived in nJet bins (DS, 18Mar26) + # && zeroJet \ + +# Individual cuts and categories + +# Commenting out the inclusive cuts (DS, 19Nov25) +# cuts['Zee_incl'] = '(Lepton_pdgId[0] * Lepton_pdgId[1] == -11*11)' +# cuts['Zmm_incl'] = '(Lepton_pdgId[0] * Lepton_pdgId[1] == -13*13)' + +cuts['Zee'] = { + 'expr' : '(Lepton_pdgId[0] * Lepton_pdgId[1] == -11*11)', + 'categories' : { + '0j' : 'zeroJet', + '1j' : 'oneJet && Alt(CleanJet_pt,1,0)<30', + '2j' : 'multiJet', + } +} + +cuts['Zmm'] = { + 'expr' : '(Lepton_pdgId[0] * Lepton_pdgId[1] == -13*13)', + 'categories' : { + '0j' : 'zeroJet', + '1j' : 'oneJet && Alt(CleanJet_pt,1,0)<30', + '2j' : 'multiJet', + } +} + +# cuts['Zee_noJetInHorn_incl'] = '(Lepton_pdgId[0] * Lepton_pdgId[1] == -11*11) && Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0' + +# cuts['Zmm_noJetInHorn_incl'] = '(Lepton_pdgId[0] * Lepton_pdgId[1] == -13*13) && Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0' + +# cuts['Zee_noJetInHorn'] = { +# 'expr' : '(Lepton_pdgId[0] * Lepton_pdgId[1] == -11*11) && Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0', +# 'categories' : { +# '0j' : 'zeroJet', +# '1j' : 'oneJet && Alt(CleanJet_pt,1,0)<30', +# '2j' : 'multiJet', +# } +# } + +# cuts['Zmm_noJetInHorn'] = { +# 'expr' : '(Lepton_pdgId[0] * Lepton_pdgId[1] == -13*13) && Sum(CleanJet_pt > 30 && CleanJet_pt < 50 && abs(CleanJet_eta) > 2.6 && abs(CleanJet_eta) < 3.1) == 0', +# 'categories' : { +# '0j' : 'zeroJet', +# '1j' : 'oneJet && Alt(CleanJet_pt,1,0)<30', +# '2j' : 'multiJet', +# } +# } diff --git a/ZpTreweighting/2023_v12/dyZpTrw.json b/ZpTreweighting/2023_v12/dyZpTrw.json new file mode 100644 index 00000000..3a247a9b --- /dev/null +++ b/ZpTreweighting/2023_v12/dyZpTrw.json @@ -0,0 +1,17 @@ +{ + "2022": { + "NLO_0j": "0.843196*(0.211634*TMath::Erf((x-9.120552)/3.728196)-0.024628*x+0.000400*TMath::Sq(x)+1.361984)", + "NLO_1j": "0.843196*(0.211634*TMath::Erf((x-9.120552)/3.728196)-0.024628*x+0.000400*TMath::Sq(x)+1.361984)", + "NLO_2j": "0.843196*(0.211634*TMath::Erf((x-9.120552)/3.728196)-0.024628*x+0.000400*TMath::Sq(x)+1.361984)" + }, + "2023": { + "NLO_0j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)", + "NLO_1j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)", + "NLO_2j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)" + }, + "2024": { + "NLO_0j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)", + "NLO_1j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)", + "NLO_2j": "1.0*(1.0*TMath::Erf((x-1.0)/1.0)-1.0*x+1.0*TMath::Sq(x)+1.0)" + } +} \ No newline at end of file diff --git a/ZpTreweighting/2023_v12/dyZpTrw.py b/ZpTreweighting/2023_v12/dyZpTrw.py new file mode 100644 index 00000000..ebeea418 --- /dev/null +++ b/ZpTreweighting/2023_v12/dyZpTrw.py @@ -0,0 +1,6 @@ +{ + "2022_NLO": { + "w2": "1.077825*(0.167840*TMath::Erf((x-9.120662)/3.728448)-0.019531*x+0.000317*TMath::Sq(x)+1.080110)" + "w": "1.0133492539254507*0.843196*(0.211634*TMath::Erf((x-9.120552)/3.728196)-0.024628*x+0.000400*TMath::Sq(x)+1.361984)", + } +} \ No newline at end of file diff --git a/ZpTreweighting/2023_v12/extract_Zptrw.py b/ZpTreweighting/2023_v12/extract_Zptrw.py new file mode 100644 index 00000000..ef8bff4c --- /dev/null +++ b/ZpTreweighting/2023_v12/extract_Zptrw.py @@ -0,0 +1,311 @@ +# ================================= +# Danush Shekar (UIC), 9Dec25 +# ================================= +import json +import os +import ROOT +import mplhep as hep +import matplotlib.pyplot as plt +import numpy as np +from ROOT import TFitResultPtr +import argparse +# CMS plot style +hep.style.use("CMS") +style = hep.style.CMS +style["font.size"] = 18 +plt.style.use(style) + +parser = argparse.ArgumentParser(description='Extract data and fit with Gaussian.') +parser.add_argument('-f', action='store_true', help='Fit the ratio plot using Erf.') +parser.add_argument('-n', type=int, default=0, help='Normalization method:\n1: Ratio of integral of MC over weights*MC.\n2: Normalize MC to data integral before calculating rw factor.') +parser.add_argument('-c', type=str, default="mm", help='Z decay channel.') +parser.add_argument('-nj', type=int, default=0, help='Jet bin category(number of jet bins).') +parser.add_argument('-i', '--input', default='mkShapes__ZpTreweighting.root', help='Path to the merged ROOT file (default: mkShapes__ZpTreweighting.root)') +parser.add_argument('--write-json', default=None, help='If given, write the updated dyZpTrw.json to this path after a successful fit (requires -f). The file is overwritten.') +parser.add_argument('--year', default='2022', help="Year key in the DYrew dict written to dyZpTrw.json (default: '2022')") +parser.add_argument('--sample-type', default='LO', help="Sample-type key in the DYrew dict written to dyZpTrw.json (default: 'LO')") +parser.add_argument('--plot-xrange', type=float, default=80, help='maximum X-axis range for the plots (default: 80)') +parser.add_argument('--fit-xrange', type=float, default=50, help='maximum X-axis range for the fits (default: 50)') +args = parser.parse_args() + +root_file = ROOT.TFile(args.input) +channel = f"Z{args.c}_{args.nj}j" +zee_dir = root_file.Get(channel) +ptll_dir = zee_dir.Get("ptll") + +histo_DY = ptll_dir.Get("histo_DY") +histo_DATA = ptll_dir.Get("histo_DATA") +histo_top = ptll_dir.Get("histo_top") +histo_diboson = ptll_dir.Get("histo_diboson") +histo_SMhiggs = ptll_dir.Get("histo_SMhiggs") + +# Subtract DY backgrounds from DATA +histo_trueData = histo_DATA.Clone("histo_trueData") # Create a clone for the result +histo_trueData.Add(histo_top, -1) +histo_trueData.Add(histo_diboson, -1) +histo_trueData.Add(histo_SMhiggs, -1) + +plot_range = [0, args.plot_xrange] +fit_range = [0, args.fit_xrange] + +def calc_norm_factor(dy_hist, fitting_function, fit_params): + numerator = 0.0 + denominator = 0.0 + first_bin = dy_hist.FindBin(fit_range[0]) + last_bin = dy_hist.FindBin(fit_range[1]) + print("\nNOTE: Using fit function (", fitting_function,") to calculate normalization factor.\n") # [0]*TMath::Erf((x-[1])/[2]) + [3]*x + [4]*x**2 + [5] + for bin_idx in range(first_bin, last_bin + 1): + mc_events = dy_hist.GetBinContent(bin_idx) + # weight = histo_ratio.GetBinContent(bin_idx) + binCenter = dy_hist.GetXaxis().GetBinCenter(bin_idx) + weight = fit_params[0]*ROOT.TMath.Erf((binCenter - fit_params[1])/fit_params[2]) + fit_params[3]*binCenter + fit_params[4]*binCenter**2 + fit_params[5] + numerator += mc_events + denominator += mc_events * weight + norm_factor = numerator / denominator if denominator != 0 else 1.0 + print("Normalization factor:", norm_factor) + return norm_factor + +# # Rebin hists +# histo_trueData.Rebin(4) +# histo_DY.Rebin(4) + +# Calculate the integral/sum of histo_DY and histo_ratio for x axis in [0, fit_range[1]) +integral_histo_DY = histo_DY.Integral(histo_DY.FindBin(fit_range[0]), histo_DY.FindBin(fit_range[1]) - 1) +integral_histo_DATA = histo_trueData.Integral(histo_trueData.FindBin(fit_range[0]), histo_trueData.FindBin(fit_range[1]) - 1) +norm_factor2 = integral_histo_DATA/integral_histo_DY +print("Normalization factor 2:", norm_factor2) +if args.n == 2: + histo_DY.Scale(norm_factor2) + integral_histo_DYscaled = histo_DY.Integral(histo_DY.FindBin(fit_range[0]), histo_DY.FindBin(fit_range[1]) - 1) + +# Create a ratio plot of DATA to DY +histo_ratio = histo_trueData.Clone("histo_ratio") +histo_ratio.Divide(histo_DY) + +integral_histo_ratio = histo_ratio.Integral(histo_ratio.FindBin(fit_range[0]), histo_ratio.FindBin(fit_range[1]) - 1) + +# fitting_functions = ["[0]*x**6 + [1]*x**5 + [2]*x**4 + [3]*x**3 + [4]*x**2 + [5]*x + [6]", "[0]*([1]*TMath::Erf((x-[2])/[3]) + [4]*x + [5]*x**2)"] +fitting_functions = ["([0]*TMath::Erf((x-[1])/[2]) + [3]*x + [4]*TMath::Sq(x) + [5])"] +initial_guesses = [[0.0, 5.0, 10.0, 0.0, 0.0, 1.0]] +save_name_suffixes = [channel] +for fitfunc, initguess, savename in zip(fitting_functions, initial_guesses, save_name_suffixes): + c = ROOT.TCanvas("c", "c", 1000, 1000) + c.Divide(1,2) + + # Top pad: main plot + pad1 = c.cd(1) + pad1.SetPad(0.0, 0.30, 1.0, 1.0) + pad1.SetBottomMargin(0.02) + pad1.SetLogy() # if you want log-y + + histo_DY.SetTitle("") + histo_DY.GetYaxis().SetTitle("Events / 5 GeV") + histo_DY.GetXaxis().SetLabelSize(0) + histo_DY.GetYaxis().SetTitleSize(0.06) + histo_DY.GetYaxis().SetTitleOffset(0.8) + histo_DY.GetYaxis().SetLabelSize(0.05) + histo_DY.GetXaxis().SetRangeUser(plot_range[0], plot_range[1]) + # Draw DY as reference, then data points + # histo_DY.SetLineColor(ROOT.kBlue) + histo_DY.SetStats(False) + histo_DY.SetFillStyle(3344) + histo_DY.SetFillColorAlpha(ROOT.kBlue, 0.1) + histo_DY.Draw("HIST") + histo_trueData.SetMarkerStyle(8) + histo_trueData.SetMarkerColor(ROOT.kBlack) + histo_trueData.SetMarkerSize(0.8) + histo_trueData.GetXaxis().SetRangeUser(plot_range[0], plot_range[1]) + histo_trueData.Draw("E1 SAME") + + # CMS label + label = ROOT.TLatex() + label.SetNDC(True) + label.SetTextSize(0.040) + label.DrawLatex(0.12, 0.92, "#bf{CMS} #it{Preliminary}") + label.DrawLatex(0.55, 0.92, "L = 8.2 fb^{-1} (#sqrt{s} = 13.6 TeV)") + label.DrawLatex(0.15, 0.2, f"num(DY) events in ({fit_range[0]},{fit_range[1]}) GeV = {integral_histo_DY:.3f}") + label.DrawLatex(0.15, 0.15, f"num(DATA) events in ({fit_range[0]},{fit_range[1]}) GeV = {integral_histo_DATA:.3f}") + if args.n == 2: + label.DrawLatex(0.15, 0.1, f"num(DY normalized) events in ({fit_range[0],fit_range[1]}) GeV = {integral_histo_DYscaled:.3f}") + + + leg = ROOT.TLegend(0.60, 0.70, 0.88, 0.88) + leg.SetBorderSize(0) + leg.SetFillStyle(0) + leg.AddEntry(histo_trueData, "Data - BG", "pe") + leg.AddEntry(histo_DY, "DY", "f") + leg.Draw() + + # Bottom pad: ratio + pad2 = c.cd(2) + pad2.SetPad(0.0, 0.0, 1.0, 0.30) + pad2.SetTopMargin(0.02) + pad2.SetBottomMargin(0.35) + + h_ratio_points = histo_ratio.Clone("h_ratio_points") + h_ratio_points.SetTitle("") + h_ratio_points.SetMarkerStyle(20) + h_ratio_points.SetMarkerSize(0.8) + h_ratio_points.SetLineColor(ROOT.kBlack) + h_ratio_points.SetMarkerColor(ROOT.kBlack) + + h_ratio_points.GetYaxis().SetTitle("(Data-BG)/DY") + h_ratio_points.GetYaxis().SetNdivisions(505) + h_ratio_points.GetYaxis().SetTitleSize(0.12) + h_ratio_points.GetYaxis().SetTitleOffset(0.30) + h_ratio_points.GetYaxis().SetLabelSize(0.08) + h_ratio_points.GetXaxis().SetTitle("p_{T}^{ll} [GeV]") + h_ratio_points.GetXaxis().SetTitleSize(0.12) + h_ratio_points.GetXaxis().SetLabelSize(0.10) + h_ratio_points.GetXaxis().SetRangeUser(plot_range[0], plot_range[1]) + + h_ratio_points.SetMinimum(0.0) + h_ratio_points.SetMaximum(2.0) + + h_ratio_points.Draw("E1") + h_ratio_points.SetStats(False) + + # horizontal line at 1 + line = ROOT.TLine(h_ratio_points.GetXaxis().GetXmin(), 1.0, + h_ratio_points.GetXaxis().GetXmax(), 1.0) + line.SetLineColor(ROOT.kGray+2) + line.SetLineStyle(2) + line.Draw("SAME") + if args.f: + # --- Fit the ratio with an error function --- + # erf(x; p0, p1, p2, p3) = p0 + p1 * TMath::Erf((x - p2)/p3) + fit_func = ROOT.TF1("fit_erf", + fitfunc, + fit_range[0],#histo_ratio.GetXaxis().GetXmin(), + fit_range[1])#histo_ratio.GetXaxis().GetXmax()) + fit_func.SetParameters(*initguess) # reasonable starting values + fit_func.SetLineColor(ROOT.kRed) + + fit_result = h_ratio_points.Fit(fit_func, "RVS") # fit only in the visible x-range + if fit_result and fit_result.IsValid(): + chi2 = fit_result.Chi2() + ndf = fit_result.Ndf() + chi2_ndf = chi2 / ndf if ndf != 0 else float('inf') + print(f" ============ Chi2/NDF: {chi2_ndf:.3f} ============") + latex = ROOT.TLatex() + latex.SetNDC(True) + latex.SetTextSize(0.08) + latex.SetTextColor(ROOT.kBlack) + latex.DrawLatex(0.15, 0.55, f"#chi^{{2}}/ndf = {chi2_ndf:.2f}") + func_formula = fit_func.GetTitle() # or fit_func.GetExpFormula() for TF1 + param_values = [fit_func.GetParameter(i) for i in range(fit_func.GetNpar())] + param_str = ", ".join([f"p{i}={v:.2f}" for i, v in enumerate(param_values)]) + # # Display fit function and parameters + # latex.DrawLatex(0.15, 0.65, f"f(x) = {func_formula}") + # latex.DrawLatex(0.15, 0.75, param_str) + else: + print("Fit failed or invalid - cannot compute Chi2/NDF") + + # Plot only until 50 GeV, the value after 50 GeV will be the fit function's value at 50 GeV + fit_func.Draw("SAME") + const_val = fit_func.Eval(fit_range[1]) + const_func = ROOT.TF1("const_func", f"{const_val}", fit_range[1], plot_range[1]) # NOTE - assuming plot_range[1] is greater than fit_range[1] + const_func.SetLineColor(ROOT.kRed) + # const_func.SetLineStyle(ROOT.kDashed) + const_func.Draw("L SAME") + fit_func.Print("V") + c.SaveAs(f"ZpTreweighting_with_ratio_{savename}.pdf") + c.Close() # Close the canvas after saving + # c.SaveAs(f"ZpTreweighting_with_ratio_{savename}.png") + + # Commented out as plot quality is very bad + c_ratio_only = ROOT.TCanvas("c_ratio_only", "c_ratio_only", 800, 800) + h_ratio_points.Draw("E1") + h_ratio_points.GetYaxis().SetTitle("(Data-BG)/DY") + h_ratio_points.GetYaxis().SetTitleSize(0.05) + h_ratio_points.GetYaxis().SetTitleOffset(0.8) + h_ratio_points.GetYaxis().SetLabelSize(0.04) + h_ratio_points.GetXaxis().SetTitle("p_{T}^{ll} [GeV]") + h_ratio_points.GetXaxis().SetTitleSize(0.05) + h_ratio_points.GetXaxis().SetLabelSize(0.03) + line.Draw("SAME") + if args.f: + # Plot only until 50 GeV, the value after 50 GeV will be the fit function's value at 50 GeV + fit_func.Draw("SAME") + # Draw constant for x > 50 GeV + const_val = fit_func.Eval(fit_range[1]) + const_func = ROOT.TF1("const_func", f"{const_val}", fit_range[1], plot_range[1]) # NOTE - assuming plot_range[1] is greater than fit_range[1] + const_func.SetLineColor(ROOT.kRed) + const_func.Draw("SAME") + if fit_result and fit_result.IsValid(): + latex = ROOT.TLatex() + latex.SetNDC(True) + latex.SetTextSize(0.025) + latex.SetTextColor(ROOT.kBlack) + latex.DrawLatex(0.15, 0.15, f"#chi^{{2}}/ndf = {chi2_ndf:.2f}") + func_formula = fit_func.GetTitle() # or fit_func.GetExpFormula() for TF1 + param_values = [fit_func.GetParameter(i) for i in range(fit_func.GetNpar())] + param_str = ", ".join([f"p{i}={v:.2f}" for i, v in enumerate(param_values)]) + # Display fit function and parameters + latex.DrawLatex(0.15, 0.35, f"f(x) = {func_formula}") + latex.DrawLatex(0.15, 0.3, param_str) + if args.n == 1: + norm_factor = calc_norm_factor(histo_DY, fitfunc, param_values) + latex.DrawLatex(0.15, 0.25, f"Normalization factor = {norm_factor:.2f}") + print(f"Normalization factor = {norm_factor}") + formula = fit_func.GetTitle() # e.g., "[0]*x + [1]" + n_params = fit_func.GetNpar() + params = [fit_func.GetParameter(i) for i in range(n_params)] + # Replace [i] with parameter values + for i, p in enumerate(params): + formula = formula.replace(f"[{i}]", f"{p:.3f}") + print(f"Fit function with parameters: {formula}") + + c_ratio_only.SaveAs(f"ZpTreweighting_ratio_fit_{savename}.pdf") + # c_ratio_only.SaveAs(f"ZpTreweighting_ratio_fit_{savename}.png") + c_ratio_only.Close() # Close the canvas after saving +print(f"Integral of DY histogram from {fit_range[0]} to {fit_range[1]} GeV: {integral_histo_DY}") +print(f"Integral of ratio histogram from {fit_range[0]} to {fit_range[1]} GeV: {integral_histo_ratio}") + +# Update dyZpTrw.json +if args.write_json is not None and args.f: + wrote = False + # 'fit_func', 'fit_result', 'fitfunc' are in scope from the last for-loop + # iteration (Python loop variables persist after the loop). + if fit_result and fit_result.IsValid(): + # Build a ROOT / C++ compatible formula string with full precision. + root_formula = fitfunc # e.g. "[0]*TMath::Erf(...) + [3]*x + [4]*x**2 + [5]" + n_params = fit_func.GetNpar() + params = [fit_func.GetParameter(i) for i in range(n_params)] + const_val = fit_func.Eval(fit_range[1]) + for i, p in enumerate(params): + root_formula = root_formula.replace(f"[{i}]", f"{p:.6f}") + # Convert Python-style x**2 to ROOT / C++ TMath::Sq(x) + # root_formula = root_formula.replace("x**2", "TMath::Sq(x)") + # Tidy up double signs that can appear after parameter substitution + root_formula = root_formula.replace("+ -", "- ") + root_formula = root_formula.replace("- -", "+ ") + piecewise_formula = f"({root_formula})*(x<{fit_range[1]}) + ({const_val:.6f})*(x>={fit_range[1]})" + + # Prepend the integral normalization factor if methodology 2 is chosen + if args.n == 1: + full_expr = f"{norm_factor}*{piecewise_formula}" + else: + full_expr = piecewise_formula + + # Read the existing JSON so other years/types are preserved. + existing = {} + if os.path.exists(args.write_json): + try: + with open(args.write_json) as _fj: + existing = json.load(_fj) + except json.JSONDecodeError as _e: + print(f"WARNING: Existing JSON file '{args.write_json}' is malformed " + f"({_e}); it will be overwritten.") + # Update only the requested year / sample-type key. + sample_key = f"{args.sample_type}_{args.nj}j" + existing.setdefault(args.year, {})[sample_key] = full_expr + + with open(args.write_json, "w") as _fj: + json.dump(existing, _fj, indent=4) + _fj.write("\n") + print(f"\nWrote updated dyZpTrw.json → {args.write_json}") + print(f" [{args.year}][{sample_key}]: {full_expr}") + wrote = True + else: + print("\nWARNING: Fit did not converge; dyZpTrw.json was NOT updated.") \ No newline at end of file diff --git a/ZpTreweighting/2023_v12/nuisances.py b/ZpTreweighting/2023_v12/nuisances.py new file mode 100644 index 00000000..bd87d720 --- /dev/null +++ b/ZpTreweighting/2023_v12/nuisances.py @@ -0,0 +1,52 @@ +print(treeBaseDir) +def makeMCDirectory(var=''): + _treeBaseDir = treeBaseDir + '' + if useXROOTD: + _treeBaseDir = redirector + treeBaseDir + if var== '': + return '/'.join([_treeBaseDir, mcProduction, mcSteps]) + else: + return '/'.join([_treeBaseDir, mcProduction, mcSteps + '__' + var]) + + +# merge cuts +_mergedCuts = [] +for cut in list(cuts.keys()): + __cutExpr = '' + if type(cuts[cut]) == dict: + __cutExpr = cuts[cut]['expr'] + for cat in list(cuts[cut]['categories'].keys()): + _mergedCuts.append(cut + '_' + cat) + elif type(cuts[cut]) == str: + _mergedCuts.append(cut) + + +# Dfinitions of groups of samples +mc = [skey for skey in samples if skey not in ('DATA')] + +nuisances = {} + + +################################ EXPERIMENTAL UNCERTAINTIES ################################# + +#### Luminosity + +# https://twiki.cern.ch/twiki/bin/view/CMS/LumiRecommendationsRun3 +nuisances['lumi_2023'] = { + 'name' : 'lumi_2023', + 'type' : 'lnN', + 'samples' : dict((skey, '1.013') for skey in mc) +} + +### MC statistical uncertainty +autoStats = True +if autoStats: + ## Use the following if you want to apply the automatic combine MC stat nuisances. + nuisances['stat'] = { + 'type': 'auto', + 'maxPoiss': '10', + 'includeSignal': '0', + # nuisance ['maxPoiss'] = Number of threshold events for Poisson modelling + # nuisance ['includeSignal'] = Include MC stat nuisances on signal processes (1=True, 0=False) + 'samples': {} + } diff --git a/ZpTreweighting/2023_v12/plot.py b/ZpTreweighting/2023_v12/plot.py new file mode 100644 index 00000000..c2bef728 --- /dev/null +++ b/ZpTreweighting/2023_v12/plot.py @@ -0,0 +1,67 @@ +# Group plot +# Groups of samples to improve the plots. +# If not defined, normal plots is used + +groupPlot = {} + +groupPlot['DY'] = { + 'nameHR' : 'DY', + 'isSignal' : 0, + 'color' : 420, # kGreen+4 + 'samples' : ['DY'] +} + +groupPlot['background'] = { + 'nameHR' : 'background', + 'isSignal' : 0, + 'color' : 851, # kAzure -9 + 'samples' : ['top', 'diboson', 'SMhiggs'] +} + + +# Plot +# keys here must match keys in samples.py + +plot = {} + +plot['DY'] = { + 'color' : 420, # kGreen+4 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +plot['top'] = { + 'color' : 400, # kYellow + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +plot['diboson'] = { + 'color' : 851, # kAzure -9 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +plot['SMhiggs'] = { + 'color' : 632+3, # kRed+3 + 'isSignal' : 0, + 'isData' : 0, + 'scale' : 1.0, +} + +# Data +plot['DATA'] = { + 'nameHR' : 'Data', + 'color' : 1 , + 'isSignal' : 0, + 'isData' : 1 , + 'isBlind' : 0 +} + +# Legend definition +legend = {} +legend['lumi'] = 'L = 17.8 fb^{-1}' +legend['sqrt'] = '#sqrt{s} = 13.6 TeV' diff --git a/ZpTreweighting/2023_v12/samples.py b/ZpTreweighting/2023_v12/samples.py new file mode 100644 index 00000000..ccbdfaae --- /dev/null +++ b/ZpTreweighting/2023_v12/samples.py @@ -0,0 +1,205 @@ +from mkShapesRDF.lib.search_files import SearchFiles + +searchFiles = SearchFiles() +redirector = "" + +useXROOTD = False + +# MC: /eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/Summer22_130x_nAODv12_Full2022v12/MCl2loose2022v12__MCCorr2022v12JetScaling__l2tight +# DATA: /eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/Run2022_ReReco_nAODv12_Full2022v12/DATAl2loose2022v12__l2tight +mcProduction = 'Summer23_130x_nAODv12_Full2023v12_OLD' # new datasets were produced around 11Apr26, using old datasets to compare with prior results (DS, 13Apr26) +mcSteps = 'MCl2loose2023v12__MCCorr2023v12JetScaling__l2tight' # Using DYto2L-2Jets_MLL-50 from Amassiro (DS, 21Nov25) +dataReco = 'Run2023_Prompt_nAODv12_Full2023v12_OLD' # new datasets were produced around 11Apr26, using old datasets to compare with prior results (DS, 13Apr26) +dataSteps = 'DATAl2loose2023v12__l2loose' # Choose l2loose sample but apply tight selections in analysis (eleWP and muWP) + +# fakeSteps = 'DATAl1loose2022EFGv12__fakeW' + +############################################## +###### Tree base directory for the site ###### +############################################## +treeBaseDir = f'/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano' +limitFiles = -1 # For running on smaller set of samples (DS, 21Nov25) + +def makeMCDirectory(var=""): + _treeBaseDir = treeBaseDir + "" + if redirector != "": + _treeBaseDir = redirector + treeBaseDir + if var == "": + return "/".join([_treeBaseDir, mcProduction, mcSteps]) + else: + return "/".join([_treeBaseDir, mcProduction, mcSteps + "__" + var]) + + +mcDirectory = makeMCDirectory() +# fakeDirectory = os.path.join(treeBaseDir, dataReco, fakeSteps) +dataDirectory = os.path.join(treeBaseDir, dataReco, dataSteps) + +samples = {} + + +def nanoGetSampleFiles(path, name): + _files = searchFiles.searchFiles(path, name, redirector=redirector) + if limitFiles != -1 and len(_files) > limitFiles: + return [(name, _files[:limitFiles])] + else: + return [(name, _files)] + + +def CombineBaseW(samples, proc, samplelist): + _filtFiles = list(filter(lambda k: k[0] in samplelist, samples[proc]["name"])) + _files = list(map(lambda k: k[1], _filtFiles)) + _l = list(map(lambda k: len(k), _files)) + leastFiles = _files[_l.index(min(_l))] + dfSmall = ROOT.RDataFrame("Runs", leastFiles) + s = dfSmall.Sum("genEventSumw").GetValue() + f = ROOT.TFile(leastFiles[0]) + t = f.Get("Events") + t.GetEntry(1) + xs = t.baseW * s + + __files = [] + for f in _files: + __files += f + df = ROOT.RDataFrame("Runs", __files) + s = df.Sum("genEventSumw").GetValue() + newbaseW = str(xs / s) + weight = newbaseW + "/baseW" + + for iSample in samplelist: + addSampleWeight(samples, proc, iSample, weight) + + +def addSampleWeight(samples, sampleName, sampleNameType, weight): + obj = list(filter(lambda k: k[0] == sampleNameType, samples[sampleName]["name"]))[0] + samples[sampleName]["name"] = list( + filter(lambda k: k[0] != sampleNameType, samples[sampleName]["name"]) + ) + if len(obj) > 2: + samples[sampleName]["name"].append( + (obj[0], obj[1], obj[2] + "*(" + weight + ")") + ) + else: + samples[sampleName]["name"].append((obj[0], obj[1], "(" + weight + ")")) + + +################################################ +############ DATA DECLARATION ################## +################################################ + +# Putting for later: HLT selections (DS, 19Nov25) +DataRun = [ + ['Cv1','Run2023C-Prompt-v1'], + ['Cv2','Run2023C-Prompt-v2'], + ['Cv3','Run2023C-Prompt-v3'], + ['Cv4','Run2023C-Prompt-v4'], +] + + +DataSets = ['MuonEG','Muon0','Muon1','EGamma0','EGamma1'] + +DataTrig = { + 'MuonEG' : 'Trigger_ElMu' , + 'Muon0' : '!Trigger_ElMu && (Trigger_sngMu || Trigger_dblMu)', + 'Muon1' : '!Trigger_ElMu && (Trigger_sngMu || Trigger_dblMu)', + 'EGamma0' : '!Trigger_ElMu && !Trigger_sngMu && !Trigger_dblMu && (Trigger_sngEl || Trigger_dblEl)', + 'EGamma1' : '!Trigger_ElMu && !Trigger_sngMu && !Trigger_dblMu && (Trigger_sngEl || Trigger_dblEl)' +} + +######################################### +############ MC COMMON ################## +######################################### + +# SFweight does not include btag weights +mcCommonWeightNoMatch = 'XSWeight*METFilter_Common*SFweight' +mcCommonWeight = 'XSWeight*METFilter_Common*PromptGenLepMatch2l*SFweight' + +#mcCommonWeight = 'XSWeight*METFilter_Common*SFweight' + +########################################### +############# BACKGROUNDS ############### +########################################### + +# DY +files = nanoGetSampleFiles(mcDirectory, 'DYto2L-2Jets_MLL-50') + + +samples['DY'] = { + 'name': files, + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + +addSampleWeight(samples,'DY','DYto2L-2Jets_MLL-50','DY_NLO_ZpTrw') + +# remove backgrounds from data for ZpT reweighting: +top_samples = ['TTTo2L2Nu', 'TWminusto2L2Nu', 'TbarWplusto2L2Nu']#, 'ST_tW_top'] +diboson_samples = ['WWTo2L2Nu', 'WZTo3LNu', 'GluGlutoContintoWWtoENuENu', 'GluGlutoContintoWWtoENuMuNu', 'GluGlutoContintoWWtoENuTauNu', 'GluGlutoContintoWWtoMuNuENu', 'GluGlutoContintoWWtoMuNuMuNu', 'GluGlutoContintoWWtoMuNuTauNu', 'GluGlutoContintoWWtoTauNuENu', 'GluGlutoContintoWWtoTauNuMuNu', 'GluGlutoContintoWWtoTauNuTauNu', 'WGtoLNuG-1J_PTG10to100', 'WGtoLNuG-1J_PTG100to200', 'WGtoLNuG-1J_PTG200to400', 'WGtoLNuG-1J_PTG400to600', 'WGtoLNuG-1J_PTG600'] +higgs_samples = ['GluGluHToWWTo2L2Nu_M125', 'VBFHToWWTo2L2Nu_M125'] + +samples['top'] = { + 'name': + nanoGetSampleFiles(mcDirectory, 'ST_t-channel_top') + \ + nanoGetSampleFiles(mcDirectory, 'ST_t-channel_antitop') + \ + nanoGetSampleFiles(mcDirectory, 'ST_s-channel_plus') + \ + nanoGetSampleFiles(mcDirectory, 'ST_s-channel_minus') + \ + nanoGetSampleFiles(mcDirectory, 'ST_tW_top') + \ + nanoGetSampleFiles(mcDirectory, 'ST_tW_antitop') + \ + nanoGetSampleFiles(mcDirectory, 'TTTo2L2Nu') + \ + nanoGetSampleFiles(mcDirectory, 'TWminusto2L2Nu') + \ + nanoGetSampleFiles(mcDirectory, 'TbarWplusto2L2Nu'), + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + +samples['diboson'] = { + 'name': nanoGetSampleFiles(mcDirectory, 'WWTo2L2Nu') + \ + nanoGetSampleFiles(mcDirectory, 'WZTo3LNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoENuENu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoENuMuNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoENuTauNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoMuNuENu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoMuNuMuNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoMuNuTauNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoTauNuENu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoTauNuMuNu') + \ + nanoGetSampleFiles(mcDirectory, 'GluGlutoContintoWWtoTauNuTauNu') + \ + nanoGetSampleFiles(mcDirectory, 'WGtoLNuG-1J_PTG10to100') + \ + nanoGetSampleFiles(mcDirectory, 'WGtoLNuG-1J_PTG100to200') + \ + nanoGetSampleFiles(mcDirectory, 'WGtoLNuG-1J_PTG200to400') + \ + nanoGetSampleFiles(mcDirectory, 'WGtoLNuG-1J_PTG400to600') + \ + nanoGetSampleFiles(mcDirectory, 'WGtoLNuG-1J_PTG600'), + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + +samples['SMhiggs'] = { + 'name': nanoGetSampleFiles(mcDirectory, 'GluGluHToWWTo2L2Nu_M125') + \ + nanoGetSampleFiles(mcDirectory, 'VBFHToWWTo2L2Nu_M125'), + 'weight': mcCommonWeight, + 'FilesPerJob': 2, +} + + +########################################### +################## DATA ################### +########################################### + +samples['DATA'] = { + 'name': [], + 'weight': 'LepWPCut*METFilter_DATA', + 'weights': [], + 'isData': ['all'], + 'FilesPerJob': 15 +} + +for _, sd in DataRun: + for pd in DataSets: + datatag = pd + '_' + sd + + files = nanoGetSampleFiles(dataDirectory, datatag) + + print(datatag) + + samples['DATA']['name'].extend(files) + addSampleWeight(samples, 'DATA', datatag, DataTrig[pd]) + diff --git a/ZpTreweighting/2023_v12/structure.py b/ZpTreweighting/2023_v12/structure.py new file mode 100644 index 00000000..4c0f5428 --- /dev/null +++ b/ZpTreweighting/2023_v12/structure.py @@ -0,0 +1,30 @@ +# structure configuration for datacard + +structure = {} + +# keys here must match keys in samples.py +structure['DY'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +structure['top'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +structure['diboson'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +structure['SMhiggs'] = { + 'isSignal' : 0, + 'isData' : 0 +} + +# data +structure['DATA'] = { + 'isSignal' : 0, + 'isData' : 1 +} diff --git a/ZpTreweighting/2023_v12/variables.py b/ZpTreweighting/2023_v12/variables.py new file mode 100644 index 00000000..a00ceb55 --- /dev/null +++ b/ZpTreweighting/2023_v12/variables.py @@ -0,0 +1,176 @@ +# variables +variables = {} + +variables['events'] = { + 'name' : '1', + 'range' : (1,0,2), + 'xaxis' : 'events', + 'fold' : 3 +} + +variables['nvtx'] = { + 'name' : 'PV_npvsGood', + 'range' : (100, 0, 100), + 'xaxis' : 'number of vertices', + 'fold' : 3 +} + +variables['mll'] = { + 'name': 'mll', + 'range' : (50,50,150), + 'xaxis' : 'm_{ll} [GeV]', + 'fold' : 0 +} + +variables['ptll'] = { + 'name': 'ptll', + 'range' : (40,0,80), + 'xaxis' : 'p_{T}^{ll} [GeV]', + 'fold' : 0 +} + +# Data samples do not have gen-level information +# variables['gen_ptll'] = { +# 'name': 'gen_ptll', +# 'range' : (40,0,200), +# 'xaxis' : 'gen p_{T}^{ll} [GeV]', +# 'fold' : 0 +# } + +# variables['gen_Zpt'] = { +# 'name': 'gen_Zpt(nGenPart, GenPart_pt, GenPart_pdgId, GenPart_genPartIdxMother, GenPart_statusFlags, gen_ptll)', +# 'range' : (40,0,200), +# 'xaxis' : 'Gen p_{T}^{Z} [GeV]', +# 'fold' : 0 +# } + +variables['drll'] = { + 'name': 'drll', + 'range' : (50, 0,5), + 'xaxis' : '#Delta R_{ll}', + 'fold' : 0 +} + +variables['dphill'] = { + 'name': 'dphill', + 'range' : (50,0,5), + 'xaxis' : '#Delta #phi_{ll}', + 'fold' : 0 +} + +variables['pt1'] = { + 'name': 'Lepton_pt[0]', + 'range' : (40,0,200), + 'xaxis' : 'p_{T} 1st lep', + 'fold' : 3 +} + +variables['pt2'] = { + 'name': 'Lepton_pt[1]', + 'range' : (40,0,160), + 'xaxis' : 'p_{T} 2nd lep', + 'fold' : 3 +} + +variables['eta1'] = { + 'name': 'Lepton_eta[0]', + 'range' : (50,-2.5,2.5), + 'xaxis' : '#eta 1st lep', + 'fold' : 3 +} + +variables['eta2'] = { + 'name': 'Lepton_eta[1]', + 'range' : (50,-2.5,2.5), + 'xaxis' : '#eta 2nd lep', + 'fold' : 3 +} + + +# B Tag +variables['jetdeepb'] = { + 'name': 'Alt(Take(Jet_btagDeepFlavB, CleanJet_jetIdx), 0, -99)', + 'range' : (40,-1,1), + 'xaxis' : 'B tagger 1st jet (DeepB)', + 'fold' : 2 +} + +variables['jetParT'] = { + 'name': 'Alt(Take(Jet_btagRobustParTAK4B, CleanJet_jetIdx), 0, -99)', + 'range' : (40,-1,1), + 'xaxis' : 'B tagger 1st jet (RobustParT AK4B)', + 'fold' : 2 +} + +variables['jetPNetB'] = { + 'name': 'Alt(Take(Jet_btagPNetB, CleanJet_jetIdx), 0, -99)', + 'range' : (40,-1,1), + 'xaxis' : 'B tagger 1st jet (ParticleNet B)', + 'fold' : 2 +} + + +# MET +variables['trkMet'] = { + 'name': 'TkMET_pt', + 'range' : (20,0,200), + 'xaxis' : 'trk met [GeV]', + 'fold' : 3 +} + +variables['puppimet'] = { + 'name': 'PuppiMET_pt', + 'range' : (20,0,200), + 'xaxis' : 'Puppi MET p_{T} [GeV]', + 'fold' : 3 +} + +############# New Jet processing +variables['njet'] = { + 'name': 'Sum(CleanJet_pt>30)', + 'range' : (5,0,5), + 'xaxis' : 'Number of jets', + 'fold' : 2 +} + +variables['jetpt1'] = { + 'name': 'Alt(CleanJet_pt, 0, -99) - 9999.9*(CleanJet_pt[0]<30)', + 'range' : (40,0,200), + 'xaxis' : 'p_{T} 1st jet', + 'fold' : 0 +} + +variables['jetpt2'] = { + 'name': 'Alt(CleanJet_pt, 1, -99) - 9999.9*(CleanJet_pt[1]<30)', + 'range' : (40,0,200), + 'xaxis' : 'p_{T} 2nd jet', + 'fold' : 0 +} + +variables['jeteta1'] = { + 'name': 'Alt(CleanJet_eta, 0, -99) - 9999.9*(CleanJet_pt[0]<30)', + 'range' : (30,-4.7,4.7), + 'xaxis' : '#eta 1st jet', + 'fold' : 0 +} + +variables['jeteta1_fine_binning'] = { + 'name': 'Alt(CleanJet_eta, 0, -99) - 9999.9*(CleanJet_pt[0]<30)', + 'range' : (94,-4.7,4.7), + 'xaxis' : '#eta 1st jet', + 'fold' : 0 +} + +variables['jeteta2'] = { + 'name': 'Alt(CleanJet_eta, 1, -99) - 9999.9*(CleanJet_pt[1]<30)', + 'range' : (30,-4.7,4.7), + 'xaxis' : '#eta 2nd jet', + 'fold' : 0 +} + +variables['jeteta2_fine_binning'] = { + 'name': 'Alt(CleanJet_eta, 1, -99) - 9999.9*(CleanJet_pt[1]<30)', + 'range' : (94,-4.7,4.7), + 'xaxis' : '#eta 2nd jet', + 'fold' : 0 +} diff --git a/ZpTreweighting/readme.md b/ZpTreweighting/readme.md new file mode 100644 index 00000000..6d643852 --- /dev/null +++ b/ZpTreweighting/readme.md @@ -0,0 +1,12 @@ +## Deriving and applying ZpT reweights for all jet bin categories for a certain year: +Go within a certain config folder (divided by years) and run automate.py with required parameters values. Example: +`python3 automate.py --second-analysis ./ --year 2023 --sample-type NLO | tee automation_terminal_output.txt` + +It is advised to run the above codeline on a lingering screen (tmux or screen) owing to ~40 minutes of runtime (20 for each mkShapesRDF run), but not strictly necessary. + +In order to produce the 2D histograms looking at gen_pTll vs pTll, an example runline(s) from the ZpTreweighting/ folder are as follows: +''' +root +.x twoDhists.cc(2022, "DY", "DeepFlavB", "loose") +''' +The arguments (2022, "DY", "DeepFlavB", "loose") are the default ones, so those could be omitted. \ No newline at end of file diff --git a/ZpTreweighting/twoDhists.cc b/ZpTreweighting/twoDhists.cc new file mode 100644 index 00000000..5a4d3e11 --- /dev/null +++ b/ZpTreweighting/twoDhists.cc @@ -0,0 +1,362 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include "TH1.h" +#include "TH2.h" +#include "TFile.h" +#include "TTree.h" +#include "TString.h" +#include "TSystem.h" +#include "TROOT.h" +#include "TObjString.h" +#include "TChain.h" +#include "TCut.h" +#include "TGraphAsymmErrors.h" +#include "TLorentzVector.h" +#include "TLine.h" +#include "TCanvas.h" +#include +#include + +#include "ROOT/RVec.hxx" +using namespace ROOT; +using namespace ROOT::VecOps; + +struct dataset { + int year; + std::map samples; + std::map> algo; +}; + +dataset mkDataset(int year) { + TH1::SetDefaultSumw2(true); + dataset d; + d.year = year; + if (year == 2022) { + d.samples = { + {"DY" , "/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano/Summer22_130x_nAODv12_Full2022v12/MCl2loose2022v12__MCCorr2022v12JetScaling__l2tight/nanoLatino_DYto2L-2Jets_MLL-50__part*.root"} + // The DYto2L-2Jets_MLL-50 sample contains in input cards: + // set run_card ptj 10 (minimum jet pT = 10 GeV) + // set run_card mll_sf 50.0 (minimum ll invariant mass = 50 GeV) + }; + d.algo = { + {"DeepFlavB", {{"loose" , 0.0583}, {"medium" , 0.3086}, {"tight" , 0.7183}}}, + {"RobustParTAK4B", {{"loose", 0.0849}, {"medium", 0.4319}, {"tight", 0.8482}}}, + {"PNetB", {{"loose", 0.0470}, {"medium", 0.2450}, {"tight", 0.6734}}} + }; + } + + for (auto const &elem : d.algo) { + std::cout << elem.first << "\n"; + auto const &inner_map = elem.second; + for (auto const [key, value] : inner_map) + { + std::cout << key << value << "\n"; + } + } + return d; +} + +// Count the number of jets above a defined threshold (used for counting number of clean jets) +int CountJetsAbovePt(float* pts, Int_t njet, float threshold) { + int count = 0; + for (Int_t i = 0; i < njet; ++i) { + if (pts[i] > threshold) ++count; + } + return count; +} + +// Calculate delta R between two vectors +float deltaR(float GenJet_eta, float GenJet_phi, float Lepton_eta, float Lepton_phi) { + float dEta = GenJet_eta - Lepton_eta; + float dPhi = GenJet_phi - Lepton_phi; + while (dPhi > M_PI) dPhi -= 2 * M_PI; + while (dPhi <= -M_PI) dPhi += 2 * M_PI; + return std::sqrt(dEta * dEta + dPhi * dPhi); +} + +// Count number of clean jets (pT above a defined threshold) not matched to any lepton +int CountJetsAbovePtNoLeps(float* pts, Int_t njet, float threshold, Int_t nLepton, float* Lepton_phi, float* Lepton_eta, float* GenJet_phi, float* GenJet_eta) { + int count = 0; + for (Int_t i = 0; i < njet; ++i) { + if (pts[i] > threshold) { + bool matched = false; + for (Int_t l = 0; l < nLepton; ++l) { + if (deltaR(GenJet_eta[i], GenJet_phi[i], Lepton_eta[l], Lepton_phi[l]) < 0.4) { + matched = true; + break; + } + } + if (!matched) { + ++count; + } + } + } + return count; +} + +void twoDhists( + int year = 2022, + std::string process = "DY", + std::string algo = "PNetB", + std::string const WP = "loose" +){ + dataset d = mkDataset(year); + std::cout << d.samples[process] << "\n"; + std::cout << d.algo[algo][WP] << "\n"; + double wp = d.algo[algo][WP]; + + TH1::SetDefaultSumw2(true); + TString fname = "pT2Dhist_" + std::to_string(year) + "_" + process + "_" + algo + "_" + WP + ".root"; + TString samples = d.samples[process]; + TFile* outfile = new TFile(fname, "RECREATE"); + + TChain *Events = new TChain("Events"); + Events->Add(samples); + + Events->SetBranchStatus("*", 0); + Events->SetBranchStatus("XSWeight", 1); + // Gen variables + Events->SetBranchStatus("nGenJet", 1); + Events->SetBranchStatus("GenJet_pt", 1); + Events->SetBranchStatus("GenJet_eta", 1); + Events->SetBranchStatus("GenJet_phi", 1); + Events->SetBranchStatus("nLeptonGen", 1); + Events->SetBranchStatus("LeptonGen_pt", 1); + Events->SetBranchStatus("LeptonGen_eta", 1); + Events->SetBranchStatus("LeptonGen_phi", 1); + Events->SetBranchStatus("gen_ptll", 1); + // Reco variables + Events->SetBranchStatus("nCleanJet", 1); + Events->SetBranchStatus("CleanJet_pt", 1); + Events->SetBranchStatus("CleanJet_phi", 1); + Events->SetBranchStatus("CleanJet_eta", 1); + Events->SetBranchStatus("nLepton", 1); + Events->SetBranchStatus("Lepton_pt", 1); + Events->SetBranchStatus("Lepton_eta", 1); + Events->SetBranchStatus("Lepton_phi", 1); + Events->SetBranchStatus("Lepton_pdgId", 1); + Events->SetBranchStatus("mll", 1); + Events->SetBranchStatus("ptll", 1); + + double XSWeight; + // Gen variables + float GenJet_pt[200]; + float GenJet_eta[200]; + float GenJet_phi[200]; + Int_t nGenJet; + Int_t nLeptonGen; + float LeptonGen_pt[100]; + float LeptonGen_eta[100]; + float LeptonGen_phi[100]; + float gen_ptll; + // Reco variables + Int_t nCleanJet; + float CleanJet_pt[200]; + float CleanJet_phi[200]; + float CleanJet_eta[200]; + Int_t nLepton; + float Lepton_pt[100]; + float Lepton_eta[100]; + float Lepton_phi[100]; + int Lepton_pdgId[100]; + Double_t mll, ptll; + + Events->SetBranchAddress("XSWeight", &XSWeight); + Events->SetBranchAddress("nGenJet", &nGenJet); + Events->SetBranchAddress("GenJet_pt", &GenJet_pt); + Events->SetBranchAddress("GenJet_eta", &GenJet_eta); + Events->SetBranchAddress("GenJet_phi", &GenJet_phi); + Events->SetBranchAddress("nLeptonGen", &nLeptonGen); + Events->SetBranchAddress("LeptonGen_pt", LeptonGen_pt); + Events->SetBranchAddress("LeptonGen_eta", LeptonGen_eta); + Events->SetBranchAddress("LeptonGen_phi", LeptonGen_phi); + Events->SetBranchAddress("gen_ptll", &gen_ptll); + + Events->SetBranchAddress("nCleanJet", &nCleanJet); + Events->SetBranchAddress("CleanJet_pt", CleanJet_pt); + Events->SetBranchAddress("CleanJet_phi", CleanJet_phi); + Events->SetBranchAddress("CleanJet_eta", CleanJet_eta); + Events->SetBranchAddress("nLepton", &nLepton); + Events->SetBranchAddress("Lepton_pt", Lepton_pt); + Events->SetBranchAddress("Lepton_eta", Lepton_eta); + Events->SetBranchAddress("Lepton_phi", Lepton_phi); + Events->SetBranchAddress("Lepton_pdgId", Lepton_pdgId); + Events->SetBranchAddress("mll", &mll); + Events->SetBranchAddress("ptll", &ptll); + + + /* + ptbins follows the BTV recommendation + https://btv-wiki.docs.cern.ch/PerformanceCalibration/fixedWPSFRecommendations/#b-tagging-efficiencies-in-simulation + etabins can be changed to match one's needs + */ + + Float_t ptbins[11] = {0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}; + Float_t jetbins[11] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + + TH1F *ptll_pull = new TH1F{"ptll_pull", "ptll_pull", 100, -2, 2}; + ptll_pull->GetXaxis()->SetTitle("(reco ptll - gen ptll)/gen ptll"); + + TH1F *gen_jet_pt = new TH1F{"gen_jet_pt", "gen_jet_pt", 50, 0, 100}; + gen_jet_pt->GetXaxis()->SetTitle("pT [GeV]"); + + TH2F *genReco_pT_0j = new TH2F{"gen_vs_reco_pt_0j", "gen_vs_reco_pt_0j", 10, ptbins, 10, ptbins}; + genReco_pT_0j->GetYaxis()->SetTitle("p_{T} [GeV]"); + genReco_pT_0j->GetXaxis()->SetTitle("gen p_{T} [GeV]"); + + TH2F *genReco_pT_1j = new TH2F{"gen_vs_reco_pt_1j", "gen_vs_reco_pt_1j", 10, ptbins, 10, ptbins}; + genReco_pT_1j->GetYaxis()->SetTitle("p_{T} [GeV]"); + genReco_pT_1j->GetXaxis()->SetTitle("gen p_{T} [GeV]"); + + TH2F *genReco_pT_2j = new TH2F{"gen_vs_reco_pt_2j", "gen_vs_reco_pt_2j", 10, ptbins, 10, ptbins}; + genReco_pT_2j->GetYaxis()->SetTitle("p_{T} [GeV]"); + genReco_pT_2j->GetXaxis()->SetTitle("gen p_{T} [GeV]"); + + TH2F *genReco_pT_3pj = new TH2F{"gen_vs_reco_pt_3pj", "gen_vs_reco_pt_3pj", 10, ptbins, 10, ptbins}; + genReco_pT_3pj->GetYaxis()->SetTitle("p_{T} [GeV]"); + genReco_pT_3pj->GetXaxis()->SetTitle("gen p_{T} [GeV]"); + + TH2F *genReco_pT_inclJets = new TH2F{"gen_vs_reco_pt_inclJets", "gen_vs_reco_pt_inclJets", 10, ptbins, 10, ptbins}; + genReco_pT_inclJets->GetYaxis()->SetTitle("p_{T} [GeV]"); + genReco_pT_inclJets->GetXaxis()->SetTitle("gen p_{T} [GeV]"); + + TH2F *gen_vs_reco_nJet = new TH2F{"gen_vs_reco_nJet", "gen_vs_reco_nJet", 10, jetbins, 10, jetbins}; + gen_vs_reco_nJet->GetYaxis()->SetTitle("Reco nJet"); + gen_vs_reco_nJet->GetXaxis()->SetTitle("Gen nJet"); + + TH2F *gen_vs_reco_nJetNoLep = new TH2F{"gen_vs_reco_nJet_noLeptons", "gen_vs_reco_nJet_noLeptons", 10, jetbins, 10, jetbins}; + gen_vs_reco_nJetNoLep->GetYaxis()->SetTitle("Reco nJet"); + gen_vs_reco_nJetNoLep->GetXaxis()->SetTitle("Gen nJet"); + TH2F *gen_vs_reco_nJetNoLepBoth = new TH2F{"gen_vs_reco_nJet_noLeptonsBoth", "gen_vs_reco_nJet_noLeptonsBoth", 10, jetbins, 10, jetbins}; + gen_vs_reco_nJetNoLepBoth->GetYaxis()->SetTitle("Reco nJet"); + gen_vs_reco_nJetNoLepBoth->GetXaxis()->SetTitle("Gen nJet"); + + int entries = Events->GetEntries(); + for (unsigned int i = 0; i < Events->GetEntries(); i ++) + { + Events->GetEntry(i); + if (i%100000 == 0) + { + std::cout << "Processing entry # " << i << " : " << ((float)i+1)*100/entries << " %\n"; + } + // Preselection of the HWW analysis, change it if needed + if (nLepton < 2 || Lepton_pt[0] < 25 || Lepton_pt[1] < 13 || Lepton_pt[2] > 10 || abs(Lepton_eta[0]) > 2.5 || abs(Lepton_eta[1]) > 2.5 || + mll < 60 || mll > 120 || Lepton_pdgId[0] * Lepton_pdgId[1] != -11 * 13) + continue; + + ptll_pull->Fill((ptll - gen_ptll)/gen_ptll, XSWeight); + genReco_pT_inclJets->Fill(gen_ptll, ptll, XSWeight); + if (nCleanJet == 0) + {genReco_pT_0j->Fill(gen_ptll, ptll, XSWeight);} + else if (nCleanJet == 1) + {genReco_pT_1j->Fill(gen_ptll, ptll, XSWeight);} + else if (nCleanJet == 2) + {genReco_pT_2j->Fill(gen_ptll, ptll, XSWeight);} + else if (nCleanJet >= 3) + {genReco_pT_3pj->Fill(gen_ptll, ptll, XSWeight);} + + int nRecoJet = CountJetsAbovePt(CleanJet_pt, nCleanJet, 30.); + int nCleanGenJet = CountJetsAbovePt(GenJet_pt, nGenJet, 30.); + gen_vs_reco_nJet->Fill(nCleanGenJet, nRecoJet, XSWeight); // Sum(CleanJet_pt>30) = nRecoJet + // gen_vs_reco_nJet->Fill(nGenJet, Sum(CleanJet_pt>30), XSWeight); + + int nCleanGenJetNoLep = CountJetsAbovePtNoLeps(GenJet_pt, nGenJet, 30., nLeptonGen, LeptonGen_phi, LeptonGen_eta, GenJet_phi, GenJet_eta); + gen_vs_reco_nJetNoLep->Fill(nCleanGenJetNoLep, nRecoJet, XSWeight); // Sum(CleanJet_pt>30) = nRecoJet + int nRecoJetNoLep = CountJetsAbovePtNoLeps(CleanJet_pt, nCleanJet, 30., nLepton, Lepton_phi, Lepton_eta, CleanJet_phi, CleanJet_eta); + gen_vs_reco_nJetNoLepBoth->Fill(nCleanGenJetNoLep, nRecoJetNoLep, XSWeight); // Sum(CleanJet_pt>30) = nRecoJet + + for (int gj = 0; gj < nGenJet; gj++) + {gen_jet_pt->Fill(GenJet_pt[gj]);} + } + ptll_pull->Write(); + gen_jet_pt->Write(); + genReco_pT_inclJets->Write(); + genReco_pT_0j->Write(); + genReco_pT_1j->Write(); + genReco_pT_2j->Write(); + genReco_pT_3pj->Write(); + gen_vs_reco_nJet->Write(); + gen_vs_reco_nJetNoLep->Write(); + gen_vs_reco_nJetNoLepBoth->Write(); + + auto normalize_columns = [](TH2F *h) { + int nx = h->GetNbinsX(); + int ny = h->GetNbinsY(); + for (int ix = 1; ix <= nx; ++ix) { // ROOT bins start at 1 + double col_sum = 0.0; + for (int iy = 1; iy <= ny; ++iy) { + col_sum += h->GetBinContent(ix, iy); + } + if (col_sum > 0) { + for (int iy = 1; iy <= ny; ++iy) { + double val = h->GetBinContent(ix, iy); + h->SetBinContent(ix, iy, val / col_sum); + } + } + } + }; + + auto draw_and_save = [&](TH2F *h, const char *qty, const char *tag) { + gStyle->SetOptStat(0); + TCanvas *c = new TCanvas(Form("c_%s",tag), "",800,600); + normalize_columns(h); + c->cd(); + h->Draw("COLZ"); + gPad->Update(); // Ensure stat box is created + // TPaveStats *st = (TPaveStats*)h->GetListOfFunctions()->FindObject("stats"); + // if (st) { + // std::cout<<" Stat box found! "<SetX1NDC(0.15); // left + // st->SetX2NDC(0.45); // right + // st->SetY1NDC(0.7); // top + // st->SetY2NDC(0.9); // bottom + // st->Draw(); + // } + // gPad->Update(); // Ensure stat box is created + TString cname = Form("genReco_%s2Dhist_%s_%d_%s_%s_%s.png", + qty, tag, year, process.c_str(), algo.c_str(), WP.c_str()); + c->SaveAs(cname); + delete c; + }; + + TCanvas *c1 = new TCanvas("c1", "",800,600); + c1->cd(); + ptll_pull->Draw(); + ptll_pull->Fit("gaus"); // Fit with Gaussian + TString cname1 = "ptll_pull_" + std::to_string(year) + "_" + process + "_" + algo + "_" + WP + ".png"; + c1->SaveAs(cname1); + delete c1; + + TCanvas *c2 = new TCanvas("c2", "",800,600); + c2->cd(); + gen_jet_pt->Draw(); + // gen_jet_pt->Fit("gaus"); // Fit with Gaussian + TString cname2 = "gen_jet_pt" + std::to_string(year) + "_" + process + "_" + algo + "_" + WP + ".png"; + c2->SaveAs(cname2); + delete c2; + + // draw_and_save(genReco_pT_inclJets, "pT", "inclJets"); + // draw_and_save(genReco_pT_0j, "pT", "0j"); + // draw_and_save(genReco_pT_1j, "pT", "1j"); + // draw_and_save(genReco_pT_2j, "pT", "2j"); + // draw_and_save(genReco_pT_3pj, "pT", "3pj"); + draw_and_save(gen_vs_reco_nJet, "nJet", ""); + draw_and_save(gen_vs_reco_nJetNoLep, "nJetNoLeptons", ""); + draw_and_save(gen_vs_reco_nJetNoLepBoth, "nJetNoLeptonsBoth", ""); + + + +}