LDMX-Software · tomeichlersmith · May 15, 2025 · May 5, 2025 · May 5, 2025 · May 5, 2025
diff --git a/.github/validation_samples/ecal_pn/config.py b/.github/validation_samples/ecal_pn/config.py
@@ -32,6 +32,7 @@
 import LDMX.Ecal.ecal_hardcoded_conditions
 import LDMX.Ecal.digi as ecal_digi
 import LDMX.Ecal.vetos as ecal_vetos
+import LDMX.Ecal.ecalClusters as ecal_cluster
 
 # Load the HCAL modules
 import LDMX.Hcal.HcalGeometry
@@ -83,6 +84,7 @@
 p.sequence.extend([
         ecal_digi.EcalDigiProducer(),
         ecal_digi.EcalRecProducer(), 
+        ecal_cluster.EcalClusterProducer(),
         ecal_veto,
         hcal_digi_reco,
         hcal_veto,
@@ -91,6 +93,7 @@
         trigScintTrack,
         count, TriggerProcessor('trigger', 8000.),
         dqm.PhotoNuclearDQM(),
+        dqm.EcalClusterAnalyzer()
         ])
 
 p.sequence.extend(dqm.all_dqm)

diff --git a/.github/validation_samples/inclusive/config.py b/.github/validation_samples/inclusive/config.py
@@ -32,6 +32,7 @@
 import LDMX.Ecal.ecal_hardcoded_conditions
 import LDMX.Ecal.digi as ecal_digi
 import LDMX.Ecal.vetos as ecal_vetos
+import LDMX.Ecal.ecalClusters as ecal_cluster
 
 # Load the HCAL modules
 import LDMX.Hcal.HcalGeometry
@@ -83,6 +84,7 @@
 p.sequence.extend([
         ecal_digi.EcalDigiProducer(),
         ecal_digi.EcalRecProducer(), 
+        ecal_cluster.EcalClusterProducer(),
         ecalVeto,
         hcal_digi_reco,
         hcal_veto,
@@ -91,6 +93,7 @@
         trigScintTrack,
         count, TriggerProcessor('trigger', 8000.),
         dqm.PhotoNuclearDQM(),
+        dqm.EcalClusterAnalyzer()
         ])
 
 p.sequence.extend(dqm.all_dqm)
diff --git a/.github/validation_samples/it_pileup/config.py b/.github/validation_samples/it_pileup/config.py
@@ -48,6 +48,7 @@
 
 from LDMX.Ecal import digi as eDigi
 from LDMX.Ecal import vetos
+import LDMX.Ecal.ecalClusters as ecal_cluster
 from LDMX.Hcal import digi as hDigi
 
 # this is hardwired into the code to be appended to the sim hits collections
@@ -194,14 +195,15 @@
 p.sequence.extend(full_tracking_sequence.dqm_sequence)
 
 p.sequence.extend([
-    ecalDigi, ecalReco, ecalVeto,
+    ecalDigi, ecalReco, ecalVeto, ecal_cluster.EcalClusterProducer(),
     hcal_digi_reco, 
     hcal_veto,
     *ts_digis,
     *ts_clusters,
     trigScintTrack,
     count, TriggerProcessor('trigger', 8000.),
     dqm.PhotoNuclearDQM(),
+    dqm.EcalClusterAnalyzer()
 ])
 
 p.sequence.extend(dqm_with_overlay)

diff --git a/DQM/include/DQM/EcalClusterAnalyzer.h b/DQM/include/DQM/EcalClusterAnalyzer.h
@@ -0,0 +1,52 @@
+/**
+ * @file EcalClusterAnalyzer.h
+ * @brief Analysis of cluster performance
+ * @author Ella Viirola, Lund University
+ */
+
+#ifndef DQM_ECALCLUSTERANALYZER_H
+#define DQM_ECALCLUSTERANALYZER_H
+
+// LDMX Framework
+#include "Framework/Configure/Parameters.h"
+#include "Framework/EventProcessor.h"
+
+namespace dqm {
+
+/**
+ * @class EcalClusterAnalyzer
+ * @brief
+ */
+class EcalClusterAnalyzer : public framework::Analyzer {
+ public:
+  EcalClusterAnalyzer(const std::string& name, framework::Process& process)
+      : Analyzer(name, process) {}
+  ~EcalClusterAnalyzer() override = default;
+  void configure(framework::config::Parameters& ps) override;
+  void analyze(const framework::Event& event) override;
+
+ private:
+  int nbr_of_electrons_;
+
+  // Collection Name for SimHits
+  std::string ecal_sim_hit_coll_;
+
+  // Pass Name for SimHits
+  std::string ecal_sim_hit_pass_;
+
+  // Collection Name for RecHits
+  std::string rec_hit_coll_name_;
+
+  // Pass Name for RecHits
+  std::string rec_hit_pass_name_;
+
+  // Collection Name for clusters
+  std::string cluster_coll_name_;
+
+  // Pass Name for clusters
+  std::string cluster_pass_name_;
+};
+
+}  // namespace dqm
+
+#endif
diff --git a/DQM/python/dqm.py b/DQM/python/dqm.py
@@ -804,6 +804,38 @@ def __init__(self, name='SampleValidation') :
         self.build1DHistogram("pdgid_hardbremdaughters", "ID of hard brem daughters", 20, 0, 20)
         self.build1DHistogram("startZ_hardbremdaughters", "Start z position of hard brem daughters  [mm]", 200, -1000, 1000)
 
+class EcalClusterAnalyzer(ldmxcfg.Analyzer) :
+    """Analyze clustering"""
+
+    def __init__(self,name='EcalClusterAnalyzer') :
+        super().__init__(name, "dqm::EcalClusterAnalyzer", 'DQM')
+
+        self.nbr_of_electrons = 2
+
+        self.ecal_sim_hit_coll = "EcalSimHits"
+        self.ecal_sim_hit_pass = "" #use whatever pass is available
+
+        # Pass name for ecal digis and rec hits
+        self.rec_hit_coll_name = 'EcalRecHits'
+        self.rec_hit_pass_name = ''
+
+        self.cluster_coll_name = 'ecalClusters'
+        self.cluster_pass_name = ''
+
+        # Need to mod for more than two electrons
+        self.build1DHistogram("ancestors", "Ancestors of particles", 4, 0, 3)
+
+        self.build1DHistogram("same_ancestor", "Percentage of hits in cluster coming from the electron that produced most hits", 21, 0, 105)
+        self.build1DHistogram("energy_percentage", "Percentage of energy in cluster coming from the electron that produced most of energy", 21, 0, 105)
+        self.build1DHistogram("mixed_hit_energy", "Percentage of total energy coming from hits with energy contributions from more than one electron", 21, 0, 105)
+        self.build1DHistogram("clusterless_hits", "Number of hits not in a cluster", 10, 0, 200)
+        self.build1DHistogram("clusterless_hits_percentage", "Percentage of hits not in a cluster", 21, 0, 105)
+        self.build1DHistogram("total_rechits_in_event", "Rechits per event", 20, 0, 500)
+        self.build1DHistogram("correctly_predicted_events", "Correctly predicted events", 3, 0, 3)
+
+        self.build2DHistogram("total_energy_vs_hits", "Total energy (edep)", 30, 0, 150, "Hits in cluster", 20, 0, 200)
+        self.build2DHistogram("total_energy_vs_purity", "Total energy (edep)", 30, 0, 150, "Energy purity %", 21, 0, 105)
+        self.build2DHistogram("distance_energy_purity", "Distance in xy-plane", 20, 0, 220, "Energy purity %", 21, 0, 105)
 
 ecal_dqm = [
         EcalDigiVerify(),
@@ -855,4 +887,4 @@ def __init__(self, name='SampleValidation') :
         ]
 
 
-all_dqm = ecal_dqm + hcal_dqm + trigScint_dqm + trigger_dqm
+all_dqm = ecal_dqm + hcal_dqm + trigScint_dqm + trigger_dqm
diff --git a/DQM/src/DQM/EcalClusterAnalyzer.cxx b/DQM/src/DQM/EcalClusterAnalyzer.cxx
@@ -0,0 +1,185 @@
+/**
+ * @file EcalClusterAnalyzer.cxx
+ * @brief Analysis of cluster performance
+ * @author Ella Viirola, Lund University
+ */
+
+#include "DQM/EcalClusterAnalyzer.h"
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+
+#include "DetDescr/SimSpecialID.h"
+#include "Ecal/Event/EcalCluster.h"
+#include "Ecal/Event/EcalHit.h"
+#include "SimCore/Event/SimCalorimeterHit.h"
+#include "SimCore/Event/SimTrackerHit.h"
+
+namespace dqm {
+
+void EcalClusterAnalyzer::configure(framework::config::Parameters& ps) {
+  nbr_of_electrons_ = ps.getParameter<int>("nbr_of_electrons");
+
+  ecal_sim_hit_coll_ = ps.getParameter<std::string>("ecal_sim_hit_coll");
+  ecal_sim_hit_pass_ = ps.getParameter<std::string>("ecal_sim_hit_pass");
+
+  rec_hit_coll_name_ = ps.getParameter<std::string>("rec_hit_coll_name");
+  rec_hit_pass_name_ = ps.getParameter<std::string>("rec_hit_pass_name");
+
+  cluster_coll_name_ = ps.getParameter<std::string>("cluster_coll_name");
+  cluster_pass_name_ = ps.getParameter<std::string>("cluster_pass_name");
+  return;
+}
+
+void EcalClusterAnalyzer::analyze(const framework::Event& event) {
+  const auto& ecal_rec_hits{event.getCollection<ldmx::EcalHit>(
+      rec_hit_coll_name_, rec_hit_pass_name_)};
+  const auto& ecal_sim_hits{event.getCollection<ldmx::SimCalorimeterHit>(
+      ecal_sim_hit_coll_, ecal_sim_hit_pass_)};
+  const auto& ecal_clusters{event.getCollection<ldmx::EcalCluster>(
+      cluster_coll_name_, cluster_pass_name_)};
+
+  if (ecal_clusters.size() == nbr_of_electrons_)
+    histograms_.fill("correctly_predicted_events", 1);  // correct
+  else if (ecal_clusters.size() < nbr_of_electrons_)
+    histograms_.fill("correctly_predicted_events", 0);  // undercounting
+  else if (ecal_clusters.size() > nbr_of_electrons_)
+    histograms_.fill("correctly_predicted_events", 2);  // overcounting
+
+  std::unordered_map<int, std::pair<int, std::vector<double>>> hitInfo;
+  hitInfo.reserve(ecal_rec_hits.size());
+
+  double dist;
+  if (nbr_of_electrons_ == 2) {
+    // Measures distance between two electrons in the ECal scoring plane
+    // TODO: generalize for n electrons
+    std::vector<float> pos1;
+    std::vector<float> pos2;
+    bool p1 = false;
+    bool p2 = false;
+
+    const auto& ecal_sp_hits{
+        event.getCollection<ldmx::SimTrackerHit>("EcalScoringPlaneHits")};
+    for (const ldmx::SimTrackerHit& spHit : ecal_sp_hits) {
+      if (spHit.getTrackID() == 1) {
+        pos1 = spHit.getPosition();
+        p1 = true;
+      } else if (spHit.getTrackID() == 2) {
+        pos2 = spHit.getPosition();
+        p2 = true;
+      }
+    }
+    if (p1 && p2)
+      dist = std::sqrt(std::pow((pos1[0] - pos2[0]), 2) +
+                       std::pow((pos1[1] - pos2[1]), 2));
+  }
+
+  for (const auto& hit : ecal_rec_hits) {
+    auto it = std::find_if(
+        ecal_sim_hits.begin(), ecal_sim_hits.end(),
+        [&hit](const auto& simHit) { return simHit.getID() == hit.getID(); });
+    if (it != ecal_sim_hits.end()) {
+      // if found a simhit matching this rechit
+      int ancestor = 0;
+      int prevAncestor = 0;
+      bool tagged = false;
+      int tag = 0;
+      std::vector<double> edep;
+      edep.resize(nbr_of_electrons_ + 1);
+      for (int i = 0; i < it->getNumberOfContribs(); i++) {
+        // for each contrib in this simhit
+        const auto& c = it->getContrib(i);
+        // get origin electron ID
+        ancestor = c.originID;
+        // store energy from this contrib at index = origin electron ID
+        if (ancestor <= nbr_of_electrons_) edep[ancestor] += c.edep;
+        if (!tagged && i != 0 && prevAncestor != ancestor) {
+          // if origin electron ID does not match previous origin electron ID
+          // this hit has contributions from several electrons, ie mixed case
+          tag = 0;
+          tagged = true;
+        }
+        prevAncestor = ancestor;
+      }
+      if (!tagged) {
+        // if not tagged, hit was from a single electron
+        tag = prevAncestor;
+      }
+      histograms_.fill("ancestors", tag);
+      hitInfo.insert({hit.getID(), std::make_pair(tag, edep)});
+    }
+  }
+
+  int clusteredHits = 0;
+
+  for (const auto& cl : ecal_clusters) {
+    // for each cluster
+    // total number of hits coming from electron, index = electron ID
+    std::vector<double> n;
+    n.resize(nbr_of_electrons_ + 1);
+    // total number of energy coming from electron, index = electron ID
+    std::vector<double> e;
+    e.resize(nbr_of_electrons_ + 1);
+    double eSum = 0.;
+    double nSum = 0.;
+
+    const auto& hitIDs = cl.getHitIDs();
+    for (const auto& id : hitIDs) {
+      // for each hit in cluster, find previously stored info
+      auto it = hitInfo.find(id);
+      if (it != hitInfo.end()) {
+        auto t = it->second;
+        auto eId = t.first;        // origin electron ID (or 0 for mixed)
+        auto energies = t.second;  // energy vector
+        n[eId]++;  // increment number of hits coming from this electron
+        nSum++;
+
+        double hitESum = 0.;
+        for (int i = 1; i < nbr_of_electrons_ + 1; i++) {
+          // loop through energy vector
+          if (energies[i] > 0.) {
+            hitESum += energies[i];
+            // add energy from electron i in this hit to total energy from
+            // electron i in cluster
+            e[i] += energies[i];
+          }
+        }
+        // if mixed hit, add the total energy of this hit to mixed hit energy
+        // counter
+        if (eId == 0) e[0] += hitESum;
+        eSum += hitESum;
+
+        clusteredHits++;
+      }
+    }
+
+    if (eSum > 0) {
+      // get largest energy contribution
+      double eMax = *max_element(e.begin(), e.end());
+      // energy purity = largest contribution / all energy
+      histograms_.fill("energy_percentage", 100. * (eMax / eSum));
+      if (e[0] > 0.) histograms_.fill("mixed_hit_energy", 100. * (e[0] / eSum));
+
+      histograms_.fill("total_energy_vs_hits", eSum, cl.getHitIDs().size());
+      histograms_.fill("total_energy_vs_purity", eSum, 100. * (eMax / eSum));
+
+      if (nbr_of_electrons_ == 2)
+        histograms_.fill("distance_energy_purity", dist, 100. * (eMax / eSum));
+    }
+    if (nSum > 0) {
+      double nMax = *max_element(n.begin(), n.end());
+      histograms_.fill("same_ancestor", 100. * (nMax / nSum));
+    }
+  }
+
+  histograms_.fill("clusterless_hits", (ecal_rec_hits.size() - clusteredHits));
+  histograms_.fill("total_rechits_in_event", ecal_rec_hits.size());
+  histograms_.fill(
+      "clusterless_hits_percentage",
+      100. * (ecal_rec_hits.size() - clusteredHits) / ecal_rec_hits.size());
+}
+
+}  // namespace dqm
+
+DECLARE_ANALYZER(dqm::EcalClusterAnalyzer)
diff --git a/Ecal/exampleConfigs/cluster.py b/Ecal/exampleConfigs/cluster.py
@@ -0,0 +1,13 @@
+from LDMX.Framework import ldmxcfg
+p = ldmxcfg.Process('cluster')
+import sys
+p.inputFiles = sys.argv[1:]
+p.outputFiles = [ 'clusters.root' ]
+p.histogramFile = 'h_clusters.root'
+
+from LDMX.Ecal.ecalClusters import *
+p.sequence = [
+    EcalClusterProducer(),
+    EcalClusterAnalyzer()
+]
+