diff --git a/device/common/include/traccc/clusterization/device/clusterization_algorithm.hpp b/device/common/include/traccc/clusterization/device/clusterization_algorithm.hpp index b8a060c230..7ac4ea46ad 100644 --- a/device/common/include/traccc/clusterization/device/clusterization_algorithm.hpp +++ b/device/common/include/traccc/clusterization/device/clusterization_algorithm.hpp @@ -110,6 +110,11 @@ class clusterization_algorithm virtual bool input_is_valid( const edm::silicon_cell_collection::const_view& cells) const = 0; + virtual void sort_cells(const unsigned int num_cells, + const edm::silicon_cell_collection::const_view& cells, + edm::silicon_cell_collection::view& new_cells) const {}; + + /// Payload for the @c ccl_kernel function struct ccl_kernel_payload { /// Number of cells in the event diff --git a/device/common/src/clusterization/clusterization_algorithm.cpp b/device/common/src/clusterization/clusterization_algorithm.cpp index d86139fa2e..661da75784 100644 --- a/device/common/src/clusterization/clusterization_algorithm.cpp +++ b/device/common/src/clusterization/clusterization_algorithm.cpp @@ -87,6 +87,9 @@ clusterization_algorithm::execute_impl( num_cells = copy().get_size(cells); } + edm::silicon_cell_collection::buffer new_cells{num_cells, mr().main}; + copy().setup(new_cells)->ignore(); + // If there are no cells, return right away. if (num_cells == 0) { if (keep_disjoint_set) { @@ -119,8 +122,10 @@ clusterization_algorithm::execute_impl( cluster_sizes = {num_cells, mr().main}; } + sort_cells(num_cells, cells, new_cells); + // Launch the CCL kernel. - ccl_kernel({num_cells, m_config, cells, det_descr, measurements, cell_links, + ccl_kernel({num_cells, m_config, new_cells, det_descr, measurements, cell_links, m_f_backup, m_gf_backup, m_adjc_backup, m_adjv_backup, m_backup_mutex.get(), disjoint_set, cluster_sizes}); diff --git a/device/cuda/include/traccc/cuda/clusterization/clusterization_algorithm.hpp b/device/cuda/include/traccc/cuda/clusterization/clusterization_algorithm.hpp index e4e92c4820..65d07210d3 100644 --- a/device/cuda/include/traccc/cuda/clusterization/clusterization_algorithm.hpp +++ b/device/cuda/include/traccc/cuda/clusterization/clusterization_algorithm.hpp @@ -55,6 +55,12 @@ class clusterization_algorithm : public device::clusterization_algorithm, /// Main CCL kernel launcher void ccl_kernel(const ccl_kernel_payload& payload) const override; + + void sort_cells(const unsigned int num_cells, + const edm::silicon_cell_collection::const_view& cells, + edm::silicon_cell_collection::view& new_cells) const override; + + /// Cluster data reification kernel launcher /// diff --git a/device/cuda/src/clusterization/clusterization_algorithm.cu b/device/cuda/src/clusterization/clusterization_algorithm.cu index 39cfad82e4..429172784d 100644 --- a/device/cuda/src/clusterization/clusterization_algorithm.cu +++ b/device/cuda/src/clusterization/clusterization_algorithm.cu @@ -18,9 +18,82 @@ // Vecmem include(s). #include +#include #include +#define SORT_THREADS_PER_BLOCK 128 + namespace traccc::cuda { +namespace kernels { +__global__ __launch_bounds__(SORT_THREADS_PER_BLOCK) void sort_cells( + const unsigned int cells_per_thread, const unsigned int num_cells, + const edm::silicon_cell_collection::const_view cells, + edm::silicon_cell_collection::view new_cells) { + using index_t = unsigned long; + const unsigned int PER_THREAD_MAX = 32; + using sort_t = + cub::BlockRadixSort; + + unsigned int partition_target_size = cells_per_thread * blockDim.x; + __shared__ typename sort_t::TempStorage tmp; + __shared__ unsigned int partition_start, partition_end; + + const edm::silicon_cell_collection::const_device cells_device(cells); + edm::silicon_cell_collection::device new_cells_device(new_cells); + + if (threadIdx.x == 0) { + unsigned int start = blockIdx.x * partition_target_size; + unsigned int end = std::min(num_cells, start + partition_target_size); + + while (start != 0 && start < num_cells && + cells_device.module_index().at(start - 1) == + cells_device.module_index().at(start)) { + ++start; + } + + while (end < num_cells && cells_device.module_index().at(end - 1) == + cells_device.module_index().at(end)) { + ++end; + } + partition_start = start; + partition_end = end; + assert(partition_start <= partition_end); + } + + __syncthreads(); + + const unsigned int partition_size = partition_end - partition_start; + + index_t keys[PER_THREAD_MAX]; + + for (unsigned int i = 0; i < PER_THREAD_MAX; ++i) { + unsigned int eff = i * blockDim.x + threadIdx.x; + keys[i] = + eff < partition_size + ? (static_cast( + cells_device.at(partition_start + eff).module_index()) + << 35 | + static_cast( + cells_device.at(partition_start + eff).channel1()) + << 24 | + static_cast( + cells_device.at(partition_start + eff).channel0()) + << 13 | + static_cast(eff)) + : std::numeric_limits::max(); + } + + sort_t(tmp).SortBlockedToStriped(keys); + + for (unsigned int i = 0; i < PER_THREAD_MAX; ++i) { + unsigned int eff = i * blockDim.x + threadIdx.x; + if (eff < partition_size) { + new_cells_device.at(partition_start + eff) = + cells_device.at(partition_start + (keys[i] & 0b1111111111111)); + } + } +} +} // namespace kernels clusterization_algorithm::clusterization_algorithm( const traccc::memory_resource& mr, vecmem::copy& copy, cuda::stream& str, @@ -38,6 +111,23 @@ bool clusterization_algorithm::input_is_valid( stream(), cells)); } +void clusterization_algorithm::sort_cells( + const unsigned int num_cells, + const edm::silicon_cell_collection::const_view& cells, + edm::silicon_cell_collection::view& new_cells) const { + + const unsigned blockSize = SORT_THREADS_PER_BLOCK; + const unsigned int cellsPerThread = 16; + const unsigned int numBlocks = + (num_cells + (blockSize * cellsPerThread) - 1) / + (blockSize * cellsPerThread); + + kernels:: + sort_cells<<>>( + cellsPerThread, num_cells, cells, new_cells); + TRACCC_CUDA_ERROR_CHECK(cudaGetLastError()); +} + void clusterization_algorithm::ccl_kernel( const ccl_kernel_payload& payload) const { diff --git a/examples/run/common/throughput_mt.ipp b/examples/run/common/throughput_mt.ipp index c72fa10255..32c97a499e 100644 --- a/examples/run/common/throughput_mt.ipp +++ b/examples/run/common/throughput_mt.ipp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2022-2025 CERN for the benefit of the ACTS project + * (c) 2022-2026 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -134,10 +134,11 @@ int throughput_mt(std::string_view description, int argc, char* argv[]) { for (std::size_t event = event_range.begin(); event != event_range.end(); ++event) { static constexpr bool DEDUPLICATE = true; + static constexpr bool RANDOMIZE = true; io::read_cells(input.at(event - input_opts.skip), event, input_opts.directory, logger().clone(), &det_descr, input_opts.format, DEDUPLICATE, - input_opts.use_acts_geom_source); + input_opts.use_acts_geom_source, RANDOMIZE); } }); } @@ -275,10 +276,9 @@ int throughput_mt(std::string_view description, int argc, char* argv[]) { // Launch the processing of the event. arena.execute([&, event]() { group.run([&, event]() { - rec_track_params.fetch_add(algs.at(static_cast( - tbb::this_task_arena::current_thread_index()))( - input[event]) - .size()); + rec_track_params.fetch_add(process_event( + tbb::this_task_arena::current_thread_index(), + input[event])); progress_bar.tick(); }); }); diff --git a/examples/run/common/throughput_st.ipp b/examples/run/common/throughput_st.ipp index 7fec0685e9..7f5bc23c31 100644 --- a/examples/run/common/throughput_st.ipp +++ b/examples/run/common/throughput_st.ipp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2022-2025 CERN for the benefit of the ACTS project + * (c) 2022-2026 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -114,9 +114,11 @@ int throughput_st(std::string_view description, int argc, char* argv[]) { i < input_opts.skip + input_opts.events; ++i) { input.emplace_back(host_mr); static constexpr bool DEDUPLICATE = true; + static constexpr bool RANDOMIZE = true; io::read_cells(input.back(), i, input_opts.directory, logger().clone(), &det_descr, input_opts.format, - DEDUPLICATE, input_opts.use_acts_geom_source); + DEDUPLICATE, input_opts.use_acts_geom_source, + RANDOMIZE); } } @@ -227,7 +229,7 @@ int throughput_st(std::string_view description, int argc, char* argv[]) { input_opts.events; // Process one event. - rec_track_params += (*alg)(input[event]).size(); + rec_track_params += process_event(input[event]); progress_bar.tick(); } } diff --git a/io/include/traccc/io/read_cells.hpp b/io/include/traccc/io/read_cells.hpp index cf41473365..b1d5e1e1e9 100644 --- a/io/include/traccc/io/read_cells.hpp +++ b/io/include/traccc/io/read_cells.hpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2022-2024 CERN for the benefit of the ACTS project + * (c) 2022-2026 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -35,13 +35,14 @@ namespace traccc::io { /// @param[in] use_acts_geometry_id Whether to treat the geometry ID as an /// "Acts geometry ID", or a /// "Detray geometry ID" +/// @param[in] randomize Whether to randomize the order of the cells or not /// void read_cells(edm::silicon_cell_collection::host& cells, std::size_t event, std::string_view directory, std::unique_ptr logger = getDummyLogger().clone(), const silicon_detector_description::host* dd = nullptr, data_format format = data_format::csv, bool deduplicate = true, - bool use_acts_geometry_id = true); + bool use_acts_geometry_id = true, bool randomize = false); /// Read cell data into memory /// @@ -55,12 +56,13 @@ void read_cells(edm::silicon_cell_collection::host& cells, std::size_t event, /// @param[in] use_acts_geometry_id Whether to treat the geometry ID as an /// "Acts geometry ID", or a /// "Detray geometry ID" +/// @param[in] randomize Whether to randomize the order of the cells or not /// void read_cells(edm::silicon_cell_collection::host& cells, std::string_view filename, std::unique_ptr logger = getDummyLogger().clone(), const silicon_detector_description::host* dd = nullptr, data_format format = data_format::csv, bool deduplicate = true, - bool use_acts_geometry_id = true); + bool use_acts_geometry_id = true, bool randomize = false); } // namespace traccc::io diff --git a/io/src/csv/read_cells.cpp b/io/src/csv/read_cells.cpp index ec0d3779cd..00a43b509e 100644 --- a/io/src/csv/read_cells.cpp +++ b/io/src/csv/read_cells.cpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2022-2024 CERN for the benefit of the ACTS project + * (c) 2022-2026 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -115,7 +116,7 @@ void read_cells(edm::silicon_cell_collection::host& cells, std::string_view filename, std::unique_ptr ilogger, const silicon_detector_description::host* dd, bool deduplicate, - bool use_acts_geometry_id) { + bool use_acts_geometry_id, bool randomize) { // Clear the output container. cells.resize(0u); @@ -140,8 +141,12 @@ void read_cells(edm::silicon_cell_collection::host& cells, } } + // Make a random number generator. In case we need to shuffle the cells. + std::mt19937 rng; + rng.seed(static_cast(0u)); + // Fill the output containers with the ordered cells and modules. - for (const auto& [geometry_id, cellz] : cellsMap) { + for (auto& [geometry_id, cellz] : cellsMap) { // Figure out the index of the detector description object, for this // group of cells. @@ -156,6 +161,13 @@ void read_cells(edm::silicon_cell_collection::host& cells, ddIndex = it->second; } + // If the user asked for the cells to be randomized, then let's do so. + // But still, in a deterministic way, so that we could compare results + // across runs. + if (randomize) { + std::shuffle(cellz.begin(), cellz.end(), rng); + } + // Add the cells to the output. for (const csv::cell& cell : cellz) { cells.push_back({cell.channel0, cell.channel1, cell.value, diff --git a/io/src/csv/read_cells.hpp b/io/src/csv/read_cells.hpp index 979fd72bdb..bd0f4b2860 100644 --- a/io/src/csv/read_cells.hpp +++ b/io/src/csv/read_cells.hpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2022-2024 CERN for the benefit of the ACTS project + * (c) 2022-2026 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -26,11 +26,14 @@ namespace traccc::io::csv { /// @param[in] use_acts_geometry_id Whether to treat the geometry ID as an /// "Acts geometry ID", or a /// "Detray geometry ID" +/// @param[in] randomize Whether to randomize the order of the cells or not +/// /// void read_cells(edm::silicon_cell_collection::host& cells, std::string_view filename, std::unique_ptr logger = getDummyLogger().clone(), const silicon_detector_description::host* dd = nullptr, - bool deduplicate = true, bool use_acts_geometry_id = true); + bool deduplicate = true, bool use_acts_geometry_id = true, + bool randomize = false); } // namespace traccc::io::csv diff --git a/io/src/read_cells.cpp b/io/src/read_cells.cpp index 2286d48be8..a050c7d9cf 100644 --- a/io/src/read_cells.cpp +++ b/io/src/read_cells.cpp @@ -1,6 +1,6 @@ /** TRACCC library, part of the ACTS project (R&D line) * - * (c) 2022-2024 CERN for the benefit of the ACTS project + * (c) 2022-2026 CERN for the benefit of the ACTS project * * Mozilla Public License Version 2.0 */ @@ -21,8 +21,8 @@ void read_cells(edm::silicon_cell_collection::host& cells, std::size_t event, std::string_view directory, std::unique_ptr ilogger, const silicon_detector_description::host* dd, - data_format format, bool deduplicate, - bool use_acts_geometry_id) { + data_format format, bool deduplicate, bool use_acts_geometry_id, + bool randomize) { switch (format) { case data_format::csv: @@ -32,8 +32,8 @@ void read_cells(edm::silicon_cell_collection::host& cells, std::size_t event, std::filesystem::path( get_event_filename(event, "-cells.csv"))) .native()), - ilogger->clone(), dd, format, deduplicate, - use_acts_geometry_id); + ilogger->clone(), dd, format, deduplicate, use_acts_geometry_id, + randomize); break; case data_format::binary: @@ -43,7 +43,7 @@ void read_cells(edm::silicon_cell_collection::host& cells, std::size_t event, std::filesystem::path( get_event_filename(event, "-cells.dat"))) .native()), - ilogger->clone(), dd, format, deduplicate); + ilogger->clone(), dd, format, deduplicate, randomize); break; default: @@ -55,13 +55,13 @@ void read_cells(edm::silicon_cell_collection::host& cells, std::string_view filename, std::unique_ptr ilogger, const silicon_detector_description::host* dd, - data_format format, bool deduplicate, - bool use_acts_geometry_id) { + data_format format, bool deduplicate, bool use_acts_geometry_id, + bool randomize) { switch (format) { case data_format::csv: csv::read_cells(cells, filename, ilogger->clone(), dd, deduplicate, - use_acts_geometry_id); + use_acts_geometry_id, randomize); break; case data_format::binary: