diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3a80a27 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +**/__pycache__/** +/automation/logs/** +/automation/queue.txt +**.log \ No newline at end of file diff --git a/automation/README.md b/automation/README.md new file mode 100644 index 0000000..b27124f --- /dev/null +++ b/automation/README.md @@ -0,0 +1,46 @@ +# Automation Tool Kit +Tool kit to (semi) automize production of DQM plot from NanoAODs stored on tier 0. The automation is based on the cron job scheduler which executes bash scripts and commands periodically. The following commands will be useful: + +- show list of scheduled cron jobs: `acrontab -l` +- remove all scheduled cron jobs: `acrontab -r` +- open cron job editior: `acrontab -e` + +last acron command: +``` +0 * * * * lxplus cd /afs/cern.ch/user/l/lebeling/MacrosNtuples/automation && sh cron_job.sh >>cron.log 2>&1 +``` + + +Inside the cron jib editior: +- save changes via ctrl+o +- close editior via ctrl+x + +Before running the automation tool kit, adjust the output path (i.e. the directory in which all plots and histogram are deployed) in: `utils.py` -> `dqm_prefix` + +To run the automation tool kit, open the cron editor and paste the following command (replace *PATH* by the actual installation path on lxplus): +``` +* */1 * * * lxplus cd /PATH/MacrosNtuples/automation && sh cron_job.sh >>cron.log 2>&1 +``` +Cron will execute the command once every hour saving the output messages (and errors) into the *cron.log* logfile. More details on how to configure the timing of a cron job, can be found [here](https://crontab.guru). + + +## automation steps +The different processing steps are summarized as: + +1) histograms +Run `python3 make_hists.py` to produce `.root` files containing histograms for all data types (i.e. EGamma, Muon, JetMet). Which selections are run is specified in `config.yaml` (see scritps). If the respective output file already exists, the histogram production is skipped. + +2) merge per run +Run `python3 merge_per_run.py` to merge (i.e. hadd) the histogram files per run. If the respective output file already exists and is newer than all base histogram files, the merging is skipped. + +3) merge per era/week +Run `python3 merge_per_era.py` to further merge (i.e. hadd) the histograms per era (i.e. Run2024H) and per week using the merged histograms per run. If the respective output file already exists and is newer than all base histogram files, the merging is skipped. + +4) merge per type +Run `python3 merge_total.py` to merge (i.e. hadd) all histograms of one data type (i.e. EGamma, Muon, JetMet) using the merged histograms per era. If the respective output file already exists and is newer than all base histogram files, the merging is skipped. + +5) plotting +Run `make_plots.py` to produce png/pdf plots from all merged histograms (merge per run/era/week/total). The plotting repective scripts are specified `condfig.yaml`. If the png/pdf files already exist and are newer than the histogram files, the plotting is skipped. + +## htcondor setup +All prduction steps listed above can be run on htcondor. Using the flag `--htcondor`, the repective plotting scripts are not directly executed but instead written into the `queue.txt` file. With `condor_submit submit.txt`, all commands in the queue are submitted to htcondor. This mode is highly recommended to (re-)run all files currently stored on tier 0. diff --git a/automation/config.yaml b/automation/config.yaml new file mode 100644 index 0000000..407c12f --- /dev/null +++ b/automation/config.yaml @@ -0,0 +1,45 @@ +JetMET: + datasets: + - 'JetMET0' + - 'JetMET1' + eras: + - 'Run2025*' + scripts: + - 'python3 ../l1macros/performances_nano.py -i $INFILE -o $OUTDIR/all_DiJet.root -c DiJet' + plotting: + - 'python3 ../plotting/make_DiJet_plots.py --dir $OUTDIR --config ../config_cards/full_DiJet.yaml' + +EGamma: + datasets: + - 'EGamma0' + - 'EGamma1' + - 'EGamma2' + - 'EGamma3' + eras: + - 'Run2025*' + scripts: + - 'python3 ../l1macros/performances_nano.py -i $INFILE -o $OUTDIR/all_PhotonJet.root -c PhotonJet' + - 'python3 ../l1macros/performances_nano.py -i $INFILE -o $OUTDIR/all_ZToEE.root -c ZToEE' + - 'python3 ../l1macros/performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/oug_zee_dqmoff.root -c ZToEEDQMOff' + plotting: + - 'python3 ../plotting/make_ZToEE_plots.py --dir $OUTDIR --config ../config_cards/full_ZToEE.yaml' + - 'python3 ../plotting/make_PhotonJet_plots.py --dir $OUTDIR --config ../config_cards/full_PhotonJet.yaml' + +Muon: + datasets: + - 'Muon0' + - 'Muon1' + eras: + - 'Run2025*' + scripts: + - 'python3 ../l1macros/performances_nano.py -i $INFILE -o $OUTDIR/all_ZToMuMu.root -c ZToMuMu' + - 'python3 ../l1macros/performances_nano.py -i $INFILE -o $OUTDIR/all_MuonJet.root -c MuonJet' #TODO not working + - 'python3 ../l1macros/performances_nano.py -i $INFILE -o $OUTDIR/all_ZToTauTau.root -c ZToTauTau' + - 'python3 ../l1macros/performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/out_zmumu_dqmoffl.root -c ZToMuMuDQMOff' + - 'python3 ../l1macros/performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/out_jets_dqmoff.root -c JetsDQMOff' + - 'python3 ../l1macros/performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/out_ztautau_dqmoff.root -c ZToTauTauDQMOff' + - 'python3 ../l1macros/performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/out_etsum_dqmoff.root -c EtSumDQMOff' + plotting: + - 'python3 ../plotting/make_ZToMuMu_plots.py --dir $OUTDIR --config ../config_cards/full_ZToMuMu.yaml' + - 'python3 ../plotting/make_ZToTauTau_plots.py --dir $OUTDIR --config ../config_cards/full_ZToTauTau.yaml' + - 'python3 ../plotting/make_MuonJet_plots.py --dir $OUTDIR --config ../config_cards/full_MuonJet.yaml' diff --git a/automation/cron_job.sh b/automation/cron_job.sh new file mode 100755 index 0000000..53ae864 --- /dev/null +++ b/automation/cron_job.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# python3 make_hists.py +# python3 merge_per_run.py +# python3 merge_per_era.py +# python3 merge_total.py +# python3 make_plots.py +date +echo "All done!" \ No newline at end of file diff --git a/automation/make_hists.py b/automation/make_hists.py new file mode 100644 index 0000000..510f084 --- /dev/null +++ b/automation/make_hists.py @@ -0,0 +1,52 @@ +#!/bin/python3 +import yaml, os +from glob import glob +from utils import htcondor_flag, parse_file, run_command, write_queue, tier0, dqm_prefix + +config_file = yaml.safe_load(open('config.yaml', 'r')) + +htcondor = htcondor_flag() + + +for label, config in config_file.items(): + + #step 1 - find all files on tier 0 + #fnames = [glob(f"{tier0}/{era}/{dataset}/NANOAOD/PromptReco-v*/*/*/*/*/*.root") for era in config["eras"] for dataset in config["datasets"]] + fnames = [] + for era in config["eras"]: + for dataset in config["datasets"]: + files = glob(f"{tier0}/{era}/{dataset}/NANOAOD/PromptReco-v*/*/*/*/*/*.root") + #print(files) + for f in files: + part = f.split("/") + run_str = part[-4] + part[-3] + run_num = int(run_str) + if run_num >= 392241: + fnames.append(f) + #fnames = [item for sublist in fnames1 for item in sublist] + #step 2 - remove files that have already been processed + for file in fnames: + output_path = dqm_prefix + parse_file(file) + num_root_files = len(glob(f"{output_path}/*.root")) + if num_root_files > 0: + fnames.remove(file) + print(file + " not processed") + + + #step 3 - run scripts + #enumerate over all files + for i, file in enumerate(fnames): + #if not htcondor and i == 10: break + + print(f"Processing file {file}") + + output_path = dqm_prefix + parse_file(file) + + for cmd in config["scripts"]: + cmd = cmd.replace("$OUTDIR", output_path) + cmd = cmd.replace("$INFILE", file) + + os.makedirs(output_path, exist_ok=True) + + if htcondor: write_queue(cmd) + else: run_command(cmd, output_path + "/log.txt") diff --git a/automation/make_plots.py b/automation/make_plots.py new file mode 100644 index 0000000..fd2237c --- /dev/null +++ b/automation/make_plots.py @@ -0,0 +1,35 @@ +#!/bin/python3 + +import os, argparse, yaml +from glob import glob +from utils import run_command, write_queue, htcondor_flag, dqm_prefix + +# load config +config_dict = yaml.safe_load(open('config.yaml', 'r')) + +htcondor = htcondor_flag() + +# main logic: glob files merged root files and make plots +for label, config in config_dict.items(): + pattern = os.path.join(dqm_prefix, '**', label,'**', 'merged') + merged_dirs = glob(pattern, recursive=True) + + for merged_dir in merged_dirs: + + # abort plotting if all .png files are newer than all .root files + t_newest, t_oldest = 0, 0 + root_files = glob(f"{merged_dir}/*.root") + png_files = glob(f"{merged_dir}/plotsL1Run3/*.png") + if len(root_files) > 0: t_newest = max(os.path.getctime(f) for f in root_files) + if len(png_files) > 0: t_oldest = min(os.path.getctime(f) for f in png_files) + if t_oldest > t_newest: + print('skipping: ' + merged_dir) + continue + + for cmd in config["plotting"]: + print(80*"#"+'\n'+f"plotting for {merged_dir}") + os.makedirs(merged_dir + '/plotsL1Run3', exist_ok=True) + cmd = cmd.replace("$OUTDIR", merged_dir) + print(cmd) + if htcondor: write_queue(cmd) + else: run_command(cmd, merged_dir + '/log.txt') diff --git a/automation/merge_per_era.py b/automation/merge_per_era.py new file mode 100644 index 0000000..87acafa --- /dev/null +++ b/automation/merge_per_era.py @@ -0,0 +1,39 @@ +#!/bin/python3 + +from glob import glob +from utils import hadd, get_weeks, htcondor_flag, dqm_prefix + +htcondor = htcondor_flag() + +# collect all histogram root files merged by run +all_files = glob(f"{dqm_prefix}/*/*/*/*/*/merged/*.root") #change later to dqm_prefix + +weeks = get_weeks() + +# group files by week and era +file_groups = {} +for file in all_files: + parts = file.split('/') + filename = parts[-1] + run = int(parts[-3]) + era = parts[-6] + label = parts[-7] + + # group by week - not all run in list? + if run in weeks.keys(): + week = weeks[run] + target = f"{dqm_prefix}/Weekly/{week}/{label}/merged/{filename}" + if target not in file_groups: + file_groups[target] = [] + file_groups[target].append(file) + + # group by era + target = f"{dqm_prefix}/{label}/{era}/merged/{filename}" + if target not in file_groups: + file_groups[target] = [] + file_groups[target].append(file) + + +# Hadd grouped files +for target, files in file_groups.items(): + hadd(target, files, htcondor) diff --git a/automation/merge_per_run.py b/automation/merge_per_run.py new file mode 100644 index 0000000..15e2ef6 --- /dev/null +++ b/automation/merge_per_run.py @@ -0,0 +1,33 @@ +#!/bin/python3 + +from glob import glob +from utils import hadd, clean, htcondor_flag, dqm_prefix + +# parse arguments +htcondor = htcondor_flag() + +# collect all base histogram root files +all_files = glob(f"{dqm_prefix}/*/*/*/*/*/*/*.root") +all_files = [f for f in all_files if 'merged' not in f] +cleaned_files = clean(all_files) + +# group files by runnum, by era, and by year +file_groups = {} +for file in cleaned_files: + parts = file.split('/') + run = int(parts[-3]) + era = parts[-6] + dataset = parts[-7] + + filename = parts[-1] + filehash = parts[-2] + + # group by runnum + target = file.replace(filehash, "merged") + if target not in file_groups: + file_groups[target] = [] + file_groups[target].append(file) + +# Hadd grouped files +for target, files in file_groups.items(): + hadd(target, files, htcondor) diff --git a/automation/merge_total.py b/automation/merge_total.py new file mode 100644 index 0000000..908ad1a --- /dev/null +++ b/automation/merge_total.py @@ -0,0 +1,29 @@ +#!/bin/python3 + +from glob import glob +from utils import hadd, clean, get_weeks, htcondor_flag, dqm_prefix + +htcondor = htcondor_flag() + +# collect all histogram root files merged by run +all_files = glob(f'{dqm_prefix}/*/*/merged/*.root') #change later to dqm_prefix +cleaned_files = clean(all_files) + +# group by type (i.e. Muon, EGamma, JetMet, etc.) +file_groups = {} +for file in cleaned_files: + parts = file.split('/') + filename = parts[-1] + era = parts[-3] + label = parts[-4] + + target = f"{dqm_prefix}/{label}/merged/{filename}" + if target not in file_groups: + file_groups[target] = [] + file_groups[target].append(file) + + +# Hadd grouped files +for target, files in file_groups.items(): + hadd(target, files, htcondor) + #print(target, files) diff --git a/automation/submit.txt b/automation/submit.txt new file mode 100644 index 0000000..3f973fb --- /dev/null +++ b/automation/submit.txt @@ -0,0 +1,11 @@ +executable = wrapper.py +arguments = $(cmd) +output = logs/$(ClusterId).$(ProcId).out +error = logs/$(ClusterId).$(ProcId).err +log = logs/$(ClusterId).$(ProcId).log ++JobFlavour = espresso + +should_transfer_files = yes +when_to_transfer_output = on_exit + +queue cmd from queue.txt \ No newline at end of file diff --git a/automation/utils.py b/automation/utils.py new file mode 100644 index 0000000..66cc5b5 --- /dev/null +++ b/automation/utils.py @@ -0,0 +1,86 @@ +import os, subprocess, argparse, uproot +import pandas as pd + +#dqm_prefix = '/eos/cms/store/group/dpg_trigger/comm_trigger/L1Trigger/cmsl1dpg/www/DQM/T0PromptNanoMonit' +dqm_prefix = "/eos/user/l/lebeling/www/DQM" +tier0 = "/eos/cms/tier0/store/data" + + +def run_command(cmd, log_file = "log.txt"): + with open(log_file, "a") as f: + subprocess.run(cmd, shell=True, stdout=f, stderr=f) + + +def parse_file(fname): + dataset = fname.split("/")[7] + run = int("".join(fname.split("/")[11:13])) + base_fname = fname.split("/")[-1].replace(".root","") + era = fname.split("/")[6] + reco_version = fname.split("/")[9] + + year = ''.join([char for char in era if char.isdigit()]) + label = ''.join([char for char in dataset if not char.isdigit()]) + + #return f"{year}/{label}/{era}/{run}/{base_fname}" + return f"/{label}/{era}/{dataset}/{reco_version}/{run}/{base_fname}" + + +def write_queue(script, infile = "", outdir = ""): + cmd = script.replace("$INFILE", infile).replace("$OUTDIR", outdir) + cmd = cmd.replace(" ", "___") + with open("queue.txt", "a") as f: + f.write(cmd + "\n") + + +# return weeks as dict with runnum as key -> weeks[runx] = 42 +def get_weeks(year=2024): + oms_path = f"/eos/cms/store/group/tsg/STEAM/OMSRateNtuple/{year}/physics.root" + with uproot.open(oms_path) as f: + df = f["tree"].arrays( + filter_name = ['run', 'year', 'month', 'day'], + library = "pd" + ) + df['date'] = pd.to_datetime(df[['year', 'month', 'day']]) + df['week'] = df['date'].dt.isocalendar().week + + min_run = df.groupby('week')['run'].min() + max_run = df.groupby('week')['run'].max() + + weeks = {} + for _, row in df.iterrows(): + w = row['week'] + r = row['run'] + min_r = min_run[w] + max_r = max_run[w] + weeks[r] = f'Week{w}_{min_r}-{max_r}' + + return weeks + + +def hadd(target, files, htcondor = False): + os.makedirs(os.path.dirname(target), exist_ok=True) + + # abort if merged file already exists, and it is newer than all base files + if os.path.exists(target): + target_time = os.path.getctime(target) + files_time = max([os.path.getctime(file) for file in files]) + if target_time > files_time: + print(f"skipping {target} - newer than all base files") + return + + print(f"Hadding files with target {target}") + cmd = f'hadd -f {target} ' + ' '.join(files) + if htcondor: write_queue(cmd) + else: run_command(cmd, os.path.dirname(target)+"/log.txt") + + +def htcondor_flag(): + parser = argparse.ArgumentParser() + parser.add_argument('--htcondor', action='store_true', help='run on ht condor') + args = parser.parse_args() + if args.htcondor: os.system('rm -rf queue.txt') + return args.htcondor + + +def clean(files): + return [file for file in files if os.path.getsize(file) >= 1600] diff --git a/automation/wrapper.py b/automation/wrapper.py new file mode 100755 index 0000000..67030d3 --- /dev/null +++ b/automation/wrapper.py @@ -0,0 +1,18 @@ +#!/bin/python3 + +import argparse +import os + +automation_path = os.path.dirname(os.path.abspath(__file__)) + +# parse commands to be executed as arguments +parser = argparse.ArgumentParser(description="wrapper running script on htcondor") +parser.add_argument('cmd', nargs='+', type=str, help='commands to be executed') +args = parser.parse_args() + +concatenated_cmd = ' '.join(args.cmd) +concatenated_cmd = concatenated_cmd.replace("___", " ") +concatenated_cmd = f'cd {automation_path}; ' + concatenated_cmd + +print('command executed: ' + concatenated_cmd) +os.system(concatenated_cmd) diff --git a/config_cards/full_MuonJet.yaml b/config_cards/full_MuonJet.yaml index c15e440..6302c72 100644 --- a/config_cards/full_MuonJet.yaml +++ b/config_cards/full_MuonJet.yaml @@ -27,7 +27,7 @@ Prefiring: true # Efficiency vs Run Nb and vs Eta Phi Efficiency: true # MET plots -MET_plots: true +MET_plots: false # HF noise HF_noise: true ### Settings for the plots in bins of nvtx diff --git a/helpers/helper_nano.py b/helpers/helper_nano.py index 0c57026..9d2a2a2 100644 --- a/helpers/helper_nano.py +++ b/helpers/helper_nano.py @@ -61,7 +61,7 @@ def make_filter(golden_json): } for(unsigned int i = 0;i< (Jet_pt).size();i++ ){ cout << "jet Pt, Eta, Phi: " << (Jet_pt)[i]<<", "<<(Jet_eta)[i]<<", "<<(Jet_phi)[i]<=6&&Jet_pt>500&&Jet_muEF<0.5&&Jet_chEmEF<0.5&&Jet_neEmEF<0.8') - + #df = df.Define('isHighPtJet','Jet_jetId>=6&&Jet_pt>500&&Jet_muEF<0.5&&Jet_chEmEF<0.5&&Jet_neEmEF<0.8') + df = df.Define('isHighPtJet', 'passPFJetID && Jet_pt > 500 && Jet_muEF < 0.5 && Jet_chEmEF < 0.5 && Jet_neEmEF < 0.8') + df = df.Filter('Sum(isHighPtJet)==2','=2 jets with pt>500 GeV') df = df.Filter('isHighPtJet[0]&&isHighPtJet[1]','First 2 jets are the cleaned jets') df = df.Define('highPtJet_Pt','Jet_pt[isHighPtJet]') @@ -634,11 +635,30 @@ def L1ETMHF(df): return df +def PassPFJetID(df): + # Jet ID based on energy fractions and multiplicities + df = df.Define("absJetEta", "abs(Jet_eta)") + df = df.Define("passPFJetID", + """ + (absJetEta <= 2.6 && Jet_neHEF < 0.90 && Jet_neEmEF < 0.90 && Jet_nConstituents > 1 && + Jet_muEF < 0.80 && Jet_chHEF > 0.01 && Jet_chMultiplicity > 0 && Jet_chEmEF < 0.80) || + + (absJetEta > 2.6 && absJetEta <= 2.7 && Jet_neHEF < 0.90 && Jet_neEmEF < 0.99 && + Jet_muEF < 0.80 && Jet_chEmEF < 0.80) || + + (absJetEta > 2.7 && absJetEta <= 3.0 && Jet_neHEF < 0.9999) || + + (absJetEta > 3.0 && absJetEta <= 5.0 && Jet_neEmEF < 0.90 && Jet_neMultiplicity > 2) + """ + ) + + return df def CleanJets(df): #List of cleaned jets (noise cleaning + lepton/photon overlap removal) - df = df.Define('_jetPassID', 'Jet_jetId>=4') - df = df.Define('isCleanJet','_jetPassID&&Jet_pt>30&&Jet_muEF<0.5&&Jet_chEmEF<0.5') + #df = df.Define('_jetPassID', 'Jet_jetId>=4') + #df = df.Define('isCleanJet','_jetPassID&&Jet_pt>30&&Jet_muEF<0.5&&Jet_chEmEF<0.5') + df = df.Define('isCleanJet', 'passPFJetID && Jet_pt > 30 && Jet_muEF < 0.5 && Jet_chEmEF < 0.5') df = df.Define('cleanJet_Pt','Jet_pt[isCleanJet]') df = df.Define('cleanJet_Eta','Jet_eta[isCleanJet]') df = df.Define('cleanJet_Phi','Jet_phi[isCleanJet]') @@ -731,8 +751,8 @@ def EtSum(df, suffix = ''): # Normal (MET only) trigger histos['HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_DiJet140_70_Mjj900'+suffix] = df.Filter('HLT_PFMETNoMu120_PFMHTNoMu120_IDTight&&vbf_selection').Histo1D(ROOT.RDF.TH1DModel('h_HLT_PFMETNoMu120_PFMHTNoMu120_IDTight_DiJet140_70_Mjj900'+suffix, '', len(jetmetpt_bins)-1, array('d',jetmetpt_bins)), 'MetNoMu') - # VBF (Met + jet) trigger - histos['HLT_DiJet110_35_Mjj650_PFMET110_DiJet140_70_Mjj900'+suffix] = df.Filter('HLT_DiJet110_35_Mjj650_PFMET110&&vbf_selection').Histo1D(ROOT.RDF.TH1DModel('h_HLT_DiJet110_35_Mjj650_PFMET110_DiJet140_70_Mjj900'+suffix, '', len(jetmetpt_bins)-1, array('d',jetmetpt_bins)), 'MetNoMu') + ## VBF (Met + jet) trigger + # histos['HLT_DiJet110_35_Mjj650_PFMET110_DiJet140_70_Mjj900'+suffix] = df.Filter('HLT_DiJet110_35_Mjj650_PFMET110&&vbf_selection').Histo1D(ROOT.RDF.TH1DModel('h_HLT_DiJet110_35_Mjj650_PFMET110_DiJet140_70_Mjj900'+suffix, '', len(jetmetpt_bins)-1, array('d',jetmetpt_bins)), 'MetNoMu') # VBF trigger if max(runnb_bins) > 367661: diff --git a/helpers/helper_nano_dqmoff.py b/helpers/helper_nano_dqmoff.py index 4157e40..80a9a64 100644 --- a/helpers/helper_nano_dqmoff.py +++ b/helpers/helper_nano_dqmoff.py @@ -196,9 +196,10 @@ def DQMOff_JetSelection(df): return false; ''') - df = df.Define('isGoodJet', 'Jet_jetId>=4') - df = df.Filter('Sum(isGoodJet)>0') - df = df.Define('isLead', 'isLeadJet(Jet_pt, isGoodJet)') + #df = df.Define('isGoodJet', 'Jet_jetId>=4') + #df = df.Filter('Sum(isGoodJet)>0') + df = df.Filter('Sum(passPFJetID)>0') + df = df.Define('isLead', 'isLeadJet(Jet_pt, passPFJetID)') df = df.Define('leadJetPt', 'Jet_pt[isLead]') df = df.Define('leadJetEta','Jet_eta[isLead]') @@ -225,6 +226,25 @@ def DQMOff_EtSumSelection(df): return df +def PassPFJetID(df): + # Jet ID based on energy fractions and multiplicities + df = df.Define("absJetEta", "abs(Jet_eta)") + df = df.Define("passPFJetID", + """ + (absJetEta <= 2.6 && Jet_neHEF < 0.90 && Jet_neEmEF < 0.90 && Jet_nConstituents > 1 && + Jet_muEF < 0.80 && Jet_chHEF > 0.01 && Jet_chMultiplicity > 0 && Jet_chEmEF < 0.80) || + + (absJetEta > 2.6 && absJetEta <= 2.7 && Jet_neHEF < 0.90 && Jet_neEmEF < 0.99 && + Jet_muEF < 0.80 && Jet_chEmEF < 0.80) || + + (absJetEta > 2.7 && absJetEta <= 3.0 && Jet_neHEF < 0.9999) || + + (absJetEta > 3.0 && absJetEta <= 5.0 && Jet_neEmEF < 0.90 && Jet_neMultiplicity > 2) + """ + ) + + return df + def ZEE_DQMOff_Plots(df, suffix = ''): diff --git a/l1macros/cron_job_runner.py b/l1macros/cron_job_runner.py new file mode 100644 index 0000000..7e99329 --- /dev/null +++ b/l1macros/cron_job_runner.py @@ -0,0 +1,133 @@ +#!/bin/python3 + +import os, sys, time, subprocess +from glob import glob + +path_prefix = "/eos/cms/tier0/store/data" + +config_dict = { + "JetMET" : # for JetMET plots + { + "datasets" : ["JetMET0","JetMET1"], + "eras" : ["Run2024*"], + "scripts": [ + "python3 performances_nano.py -i $INFILE -o $OUTDIR/all_DiJet.root -c DiJet", + "python3 ../plotting/make_DiJet_plots.py --dir $OUTDIR --config ../config_cards/full_DiJet.yaml", + ] + }, + "EGamma" : # for JetMET plots + { + "datasets" : ["EGamma0","EGamma1"], + "eras" : ["Run2024*"], + "scripts": [ + "python3 performances_nano.py -i $INFILE -o $OUTDIR/all_PhotonJet.root -c PhotonJet", + "python3 performances_nano.py -i $INFILE -o $OUTDIR/all_ZToEE.root -c ZToEE", + + ## OFF DQM + "python3 performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/oug_zee_dqmoff.root -c ZToEEDQMOff", + ## Plot + "python3 ../plotting/make_ZToEE_plots.py --dir $OUTDIR --config ../config_cards/full_ZToEE.yaml", + "python3 ../plotting/make_PhotonJet_plots.py --dir $OUTDIR --config ../config_cards/full_PhotonJet.yaml", + ] + }, + "Muon" : # for JetMET plots + { + "datasets" : ["Muon0","Muon1"], + "eras" : ["Run2024*"], + "scripts" : [ + "/bin/python3 performances_nano.py -i $INFILE -o $OUTDIR/all_ZToMuMu.root -c ZToMuMu", + "/bin/python3 performances_nano.py -i $INFILE -o $OUTDIR/all_MuonJet.root -c MuonJet", + "/bin/python3 performances_nano.py -i $INFILE -o $OUTDIR/all_ZToTauTau.root -c ZToTauTau ", + ## OFF DQM + "/bin/python3 performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/out_zmumu_dqmoffl.root -c ZToMuMuDQMOff", + "/bin/python3 performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/out_jets_dqmoff.root -c JetsDQMOff ", + "/bin/python3 performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/out_ztautau_dqmoff.root -c ZToTauTauDQMOff", + "/bin/python3 performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/out_zmumu_dqmoffl.root -c ZToMuMuDQMOff", + "/bin/python3 performances_nano_dqmoff.py -i $INFILE -o $OUTDIR/out_etsum_dqmoff.root -c EtSumDQMOff", + ## plotting + "/bin/python3 ../plotting/make_ZToMuMu_plots.py --dir $OUTDIR --config ../config_cards/full_ZToMuMu.yaml", + "/bin/python3 ../plotting/make_ZToTauTau_plots.py --dir $OUTDIR --config ../config_cards/full_ZToTauTau.yaml", + "/bin/python3 ../plotting/make_MuonJet_plots.py --dir $OUTDIR --config ../config_cards/full_MuonJet.yaml", + ] + } +} + + +import random + +for label, config in config_dict.items(): + print(80*"#") + print(80*"#") + print(f" Running plots for {label}") + print(80*"#") + + fnames = [] + for dataset in config["datasets"]: + for era in config["eras"]: + fnames += glob(f"{path_prefix}/{era}/{dataset}/NANOAOD/PromptReco-v*/*/*/*/*/*.root") + + #print(fnames) + if len(fnames) > 0: + # do random choice for now + fname = random.choice(fnames) + + ## take the latest file from T0 eos + #fname = fnames[-1] + else: + continue + + ## decode file path to run, era etc + fname_split = fname.split("/") + dataset = fname.split("/")[7] + run = int("".join(fname.split("/")[11:13])) + base_fname = fname.split("/")[-1].replace(".root","") + era = fname.split("/")[6] + reco_version = fname.split("/")[9] + + outdir = f"{era}/{dataset}/{reco_version}/{run}/{base_fname}" + + # check output exists + #out_web_path = "/eos/cms/store/group/dpg_trigger/comm_trigger/L1Trigger/cmsl1dpg/www/DQM/T0PromptNanoMonit/" + outdir + out_web_path = "/eos/home-l/lebeling/www/DQM/" + outdir + + if os.path.exists(out_web_path): + # out_web_path + "_1" + print("Output already exists!") + print(out_web_path) + continue + else: + os.makedirs(out_web_path) + os.makedirs(out_web_path+"/plotsL1Run3") # for plots + ### Main part: run the performance code + + #script_dir = "/eos/cms/store/group/dpg_trigger/comm_trigger/L1Trigger/cmsl1dpg/MacrosNtuples/l1macros" + script_dir = "/eos/home-l/lebeling/projects/MacrosNtuples/l1macros" + os.chdir(script_dir) + + for script in config["scripts"]: + print(80*"#") + print(80*"#") + print(script) + print(80*"#") + + script = script.replace("$INFILE",fname).replace("$OUTDIR",out_web_path) + + print(f"Going to process {fname} and store output here: {out_web_path}") + #ret = subprocess.run([script_path, out_web_path, fname, ">> logs"],) + # print(script.split(" ")) + if "/" in script.split(" ")[-1]: + log_fname = out_web_path + "/" + os.path.basename(script.split(" ")[-1])+".log" + else: + log_fname = out_web_path + "/" + script.split(" ")[-1]+".log" + print(f"Writing logs to {log_fname}") + with open(log_fname, "w") as f: + ret = subprocess.run( + #script.split(" "), + script, + shell = True, + stdout=f, + stderr=f + ) + + ### Hadd the outputs of a full run? + # break diff --git a/l1macros/performances_nano.py b/l1macros/performances_nano.py index a69fdd3..f6fb37a 100644 --- a/l1macros/performances_nano.py +++ b/l1macros/performances_nano.py @@ -121,7 +121,8 @@ def main(): h.set_runnb_bins(df) #Define ETMHF - df = h.L1ETMHF(df) + #df = h.L1ETMHF(df) + if args.outputFile == '': args.outputFile = 'output_'+args.channel+'.root' out = ROOT.TFile(args.outputFile, "recreate") @@ -134,7 +135,9 @@ def main(): # add nvtx histo nvtx_histo = df.Histo1D(ROOT.RDF.TH1DModel("h_nvtx" , "Number of reco vertices;N_{vtx};Events" , 100, 0., 100.), "PV_npvs") - + # Define PF JetID + df = h.PassPFJetID(df) + if args.channel == 'PhotonJet': df = h.SinglePhotonSelection(df) diff --git a/l1macros/performances_nano_dqmoff.py b/l1macros/performances_nano_dqmoff.py index 4afb420..dbf3e8d 100644 --- a/l1macros/performances_nano_dqmoff.py +++ b/l1macros/performances_nano_dqmoff.py @@ -126,6 +126,8 @@ def main(): nvtx_histo = df.Histo1D(ROOT.RDF.TH1DModel("h_nvtx" , "Number of reco vertices;N_{vtx};Events" , 100, 0., 100.), "PV_npvs") nvtx_histo.GetValue().Write() + # Define PF JetID + df = h.PassPFJetID(df) if args.channel == 'ZToEEDQMOff': diff --git a/plotting/make_DiJet_plots.py b/plotting/make_DiJet_plots.py index 7901955..3a4f369 100644 --- a/plotting/make_DiJet_plots.py +++ b/plotting/make_DiJet_plots.py @@ -1,10 +1,11 @@ eventselection='dijet' -subfolder='/plotsL1Run3' +subfolder='/plots_dijet' channelname='DiJet' import yaml import drawplots import argparse +import os def main(): parser = argparse.ArgumentParser( @@ -30,6 +31,8 @@ def main(): bins = config['PU_plots']['nvtx_bins'] suffixes += ['_nvtx{}to{}'.format(bins[i], bins[i+1]) for i in range(len(bins) - 1)] + os.makedirs(args.dir + subfolder, exist_ok=True) + drawplots.makedist( inputFiles_list = [input_file], saveplot = True, diff --git a/plotting/make_MuonJet_plots.py b/plotting/make_MuonJet_plots.py index 27e84ac..9349fd9 100644 --- a/plotting/make_MuonJet_plots.py +++ b/plotting/make_MuonJet_plots.py @@ -1,10 +1,11 @@ eventselection='#mu+jet' -subfolder='/plotsL1Run3' +subfolder='/plots_muonjet' channelname='MuonJet' import yaml import drawplots import argparse +import os def main(): parser = argparse.ArgumentParser( @@ -30,6 +31,8 @@ def main(): bins = config['PU_plots']['nvtx_bins'] suffixes += ['_nvtx{}to{}'.format(bins[i], bins[i+1]) for i in range(len(bins) - 1)] + os.makedirs(args.dir + subfolder, exist_ok=True) + # NVTX distribution: drawplots.makedist( diff --git a/plotting/make_PhotonJet_plots.py b/plotting/make_PhotonJet_plots.py index 072f6db..c869e98 100644 --- a/plotting/make_PhotonJet_plots.py +++ b/plotting/make_PhotonJet_plots.py @@ -1,10 +1,11 @@ eventselection='#gamma+jet' -subfolder='/plotsL1Run3' +subfolder='/plots_photonjet' channelname='PhotonJet' import yaml import drawplots import argparse +import os def main(): parser = argparse.ArgumentParser( @@ -30,6 +31,8 @@ def main(): bins = config['PU_plots']['nvtx_bins'] suffixes += ['_nvtx{}to{}'.format(bins[i], bins[i+1]) for i in range(len(bins) - 1)] + os.makedirs(args.dir + subfolder, exist_ok=True) + # NVTX distribution: drawplots.makedist( diff --git a/plotting/make_ZToEE_plots.py b/plotting/make_ZToEE_plots.py index f9e210c..eddd13d 100644 --- a/plotting/make_ZToEE_plots.py +++ b/plotting/make_ZToEE_plots.py @@ -1,11 +1,12 @@ # make_ZToEE_plots.py, a program to draw the L1Studies plots obtained from the histograms extracted from NanoAOD eventselection='Z#rightarrow ee' -subfolder='/plotsL1Run3' +subfolder='/plots_ztoee' channelname='ZToEE' import yaml import drawplots import argparse +import os def main(): parser = argparse.ArgumentParser( @@ -31,6 +32,8 @@ def main(): bins = config['PU_plots']['nvtx_bins'] suffixes += ['_nvtx{}to{}'.format(bins[i], bins[i+1]) for i in range(len(bins) - 1)] + os.makedirs(args.dir + subfolder, exist_ok=True) + # NVTX distribution: drawplots.makedist( inputFiles_list = [input_file], diff --git a/plotting/make_ZToMuMu_plots.py b/plotting/make_ZToMuMu_plots.py index 849cd4b..d679bdb 100644 --- a/plotting/make_ZToMuMu_plots.py +++ b/plotting/make_ZToMuMu_plots.py @@ -1,11 +1,12 @@ # make_mu_plots.py, a program to draw the L1Studies plots obtained from the histograms extracted from NanoAOD eventselection='Z#rightarrow #mu#mu' -subfolder='/plotsL1Run3' +subfolder='/plots_ztomumu' channelname='ZToMuMu' import yaml import drawplots import argparse +import os def main(): parser = argparse.ArgumentParser( @@ -31,6 +32,8 @@ def main(): bins = config['PU_plots']['nvtx_bins'] suffixes += ['_nvtx{}to{}'.format(bins[i], bins[i+1]) for i in range(len(bins) - 1)] + os.makedirs(args.dir + subfolder, exist_ok=True) + # NVTX distribution: drawplots.makedist( inputFiles_list = [input_file], diff --git a/plotting/make_ZToTauTau_plots.py b/plotting/make_ZToTauTau_plots.py index 5d10935..d5ae8fb 100644 --- a/plotting/make_ZToTauTau_plots.py +++ b/plotting/make_ZToTauTau_plots.py @@ -1,9 +1,11 @@ eventselection='#mu+#tau_{h}' -subfolder='/plotsL1Run3' +subfolder='/plots_ztotautau' channelname='ZToTauTau' + import yaml import drawplots import argparse +import os def main(): parser = argparse.ArgumentParser( @@ -28,6 +30,8 @@ def main(): if config['PU_plots']['make_histos']: bins = config['PU_plots']['nvtx_bins'] suffixes += ['_nvtx{}to{}'.format(bins[i], bins[i+1]) for i in range(len(bins) - 1)] + + os.makedirs(args.dir + subfolder, exist_ok=True) # NVTX distribution: