From b6a4f45d769b353ddcb31ede2e748c95a7f7a14b Mon Sep 17 00:00:00 2001 From: nataraj2 Date: Fri, 24 Apr 2026 12:38:34 -0700 Subject: [PATCH 1/5] Adding routines for matter spectrum coputation with amrex fft --- Source/DerivedQuantities/ParticleDerive.cpp | 171 ++++++++++++++++++++ Source/Driver/Nyx.H | 10 ++ 2 files changed, 181 insertions(+) diff --git a/Source/DerivedQuantities/ParticleDerive.cpp b/Source/DerivedQuantities/ParticleDerive.cpp index 365d8e96..f6aafb4f 100644 --- a/Source/DerivedQuantities/ParticleDerive.cpp +++ b/Source/DerivedQuantities/ParticleDerive.cpp @@ -466,3 +466,174 @@ extern "C" #ifdef __cplusplus } #endif + +void +Nyx::compute_overdensity (const MultiFab& mf_pmd, MultiFab& mf_od) +{ + AMREX_ASSERT(mf_pmd.nComp() == 1); + AMREX_ASSERT(mf_od.nComp() == 1); + AMREX_ASSERT(mf_pmd.boxArray() == mf_od.boxArray()); + AMREX_ASSERT(mf_pmd.DistributionMap() == mf_od.DistributionMap()); + + mf_od.define(mf_pmd.boxArray(), mf_pmd.DistributionMap(), 1, 0); + + // -------------------------------------------------- + // 1. Compute global mean density + // -------------------------------------------------- + + // FIX 1: correct signature is sum(comp, local) — no int nghost argument + Real rho_sum = mf_pmd.sum(0, false); + + // FIX 2: BoxArray is fully replicated on every rank, so this is already + // the global cell count. ReduceLongSum is both wrong (returns void) and + // unnecessary (would multiply the count by nprocs). + Long ncells = mf_pmd.boxArray().numPts(); + + Real rho_mean = rho_sum / static_cast(ncells); + + // -------------------------------------------------- + // 2. Compute overdensity on GPU + // δ = ρ / ρ̄ - 1 + // -------------------------------------------------- + for (MFIter mfi(mf_pmd, TilingIfNotGPU()); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox(); + auto const& rho = mf_pmd.const_array(mfi); + auto const& delta = mf_od.array(mfi); + Real inv_mean = 1.0_rt / rho_mean; + + ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) + { + delta(i,j,k) = rho(i,j,k) * inv_mean - 1.0_rt; + }); + } + if (mf_od.contains_nan()) { + std::cout << "Value of rho_mean is " << rho_mean << " " << rho_sum << std::endl; + amrex::Abort("NaNs detected in MultiFab"); + } +} + +void +Nyx::compute_matter_power_spectrum(const MultiFab& mf_od) +{ + Geometry geom = parent->Geom(0); + cMultiFab c_fft_pmd; + { + // Note that the complex Hermitian output array Y has (nx/2+1,ny,nz) elements. + // Y[nx-i,j,k] = Y[i,j,k]* + FFT::R2C fft(geom.Domain()); + auto const& [ba, dm] = fft.getSpectralDataLayout(); + c_fft_pmd.define(ba,dm,1,0); + fft.forward(mf_od, c_fft_pmd); + } + + // For simplicity, we assume the domain is a cube, and we are not + // going to worry about scaling. + + int nx = geom.Domain().length(0); + int ny = geom.Domain().length(1); + int nz = geom.Domain().length(2); + int nk = int(std::sqrt((nx/2)*(nx/2) + + (ny/2)*(ny/2) + + (nz/2)*(nz/2))) + 1; + Gpu::DeviceVector power_spec_mf_od_d(nk,Real(0.0)); + Gpu::DeviceVector counts_d(nk, 0); + Real* power_spec_mf_od_d_ptr = power_spec_mf_od_d.data(); + int* counts_d_ptr = counts_d.data(); + auto const& c_fft_pmd_arr = c_fft_pmd.const_arrays(); + Real Lx = geom.ProbLength(0); + Real Ly = geom.ProbLength(1); + Real Lz = geom.ProbLength(2); + Real dx = geom.CellSize(0); + Real dy = geom.CellSize(1); + Real dz = geom.CellSize(2); + Real kfund = 2.0 * M_PI / Lx; + ParallelFor(c_fft_pmd, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + int ki = i; + int kj = (j <= ny/2) ? j : ny-j; + int kk = (k <= nz/2) ? k : nz-k; + Real kmag = kfund * std::sqrt(ki*ki + kj*kj + kk*kk); + int di = int(kmag / kfund); + Real kx = 2.0 * M_PI * ki / Lx; + Real ky = 2.0 * M_PI * kj / Ly; + Real kz = 2.0 * M_PI * kk / Lz; + + Real wx = (kx == 0.0) ? 1.0 : sin(0.5*kx*dx)/(0.5*kx*dx); + Real wy = (ky == 0.0) ? 1.0 : sin(0.5*ky*dy)/(0.5*ky*dy); + Real wz = (kz == 0.0) ? 1.0 : sin(0.5*kz*dz)/(0.5*kz*dz); + + // CIC window (note the square per direction) + Real W_CIC = (wx*wx) * (wy*wy) * (wz*wz); + + if (di < nk) { + Real value = amrex::norm(c_fft_pmd_arr[b](i,j,k)); + // Account for Hermitian symmetry in x-direction + // Hermitian symmetry Y[nx-i,j,k] = Y[i,j,k]* + if ((i > 0) && (2*i != nx)) { + // Multiply by 2 because we have +ki and -ki + value *= Real(2.0); + } + value /= (W_CIC * W_CIC); + int weight = ((i > 0) && (2*i != nx)) ? 2 : 1; + HostDevice::Atomic::Add(power_spec_mf_od_d_ptr+di, value); + HostDevice::Atomic::Add(counts_d_ptr + di, weight); + } + }); + + Real* power_spec_mf_od_h_ptr = nullptr; + int* counts_h_ptr = nullptr; + #ifdef AMREX_USE_GPU + Gpu::HostVector power_spec_mf_od_h(counts_d.size()); + Gpu::HostVector counts_h(counts_d.size()); + Gpu::copyAsync(Gpu::deviceToHost, power_spec_mf_od_d.begin(), power_spec_mf_od_d.end(), power_spec_mf_od_h.begin()); + Gpu::copyAsync(Gpu::deviceToHost, counts_d.begin(), counts_d.end(), counts_h.begin()); + Gpu::streamSynchronize(); + power_spec_mf_od_h_ptr = power_spec_mf_od_h.data(); + counts_h_ptr = counts_h.data(); + #else + power_spec_mf_od_h_ptr = power_spec_mf_od_d.data(); + counts_h_ptr = counts_d.data(); + #endif + + ParallelDescriptor::ReduceRealSum(power_spec_mf_od_h_ptr, nk); + ParallelDescriptor::ReduceIntSum(counts_h_ptr, nk); + + for (int i = 0; i < nk; ++i) { + if (counts_h_ptr[i] > 0) { + power_spec_mf_od_h_ptr[i] /= Real(counts_h_ptr[i]); + } + } + + if (ParallelDescriptor::IOProcessor()) { + // --- Step 1: format redshift --- + char zbuf[32]; + const Real prev_time = state[State_Type].prevTime(); + const Real a_old = get_comoving_a(prev_time); + Real z_old = 1/a_old-1; + sprintf(zbuf, "%07d", int(z_old * 10000 + 0.5)); // → "0028165" + + // --- Step 2: ensure directory exists --- + fs::path dir = "MatterPk"; + if (!fs::exists(dir)) { + fs::create_directories(dir); + } + // --- Step 3: build full filename --- + fs::path filepath = dir / ("spectrum_" + std::string(zbuf) + ".txt"); + + // --- Step 4: open file --- + std::ofstream ofs(filepath); + + if (!ofs) { + throw std::runtime_error("Failed to open file: " + filepath.string()); + } + + Real dV = dx*dy*dz; + Real dom_vol = Lx*Ly*Lz; + Real dk = 2.0 * M_PI / Lx; + for (int i = 0; i < nk; ++i) { + Real k = dk * (i + 0.5); + ofs << k << " " << power_spec_mf_od_h_ptr[i]* dV*dV/dom_vol << "\n"; + } + } +} diff --git a/Source/Driver/Nyx.H b/Source/Driver/Nyx.H index f2f073b8..aed01773 100644 --- a/Source/Driver/Nyx.H +++ b/Source/Driver/Nyx.H @@ -436,6 +436,16 @@ public: // std::unique_ptr particle_derive (const std::string& name, amrex::Real time, int ngrow); + // + // Compute the overdensity field from the particle mass density field + // + void compute_overdensity (const MultiFab& mf_pmd, MultiFab& mf_od); + + // + // Compute the matter power spectrum grom the overdensity field + // + void compute_matter_power_spectrum(const MultiFab& mf_od); + // //Set time levels of state data. // From f44e91f8f4fca83c21cbfe6d366c953abbf2b8b0 Mon Sep 17 00:00:00 2001 From: nataraj2 Date: Fri, 24 Apr 2026 12:42:44 -0700 Subject: [PATCH 2/5] Including missing headers --- Source/DerivedQuantities/ParticleDerive.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Source/DerivedQuantities/ParticleDerive.cpp b/Source/DerivedQuantities/ParticleDerive.cpp index f6aafb4f..07e91299 100644 --- a/Source/DerivedQuantities/ParticleDerive.cpp +++ b/Source/DerivedQuantities/ParticleDerive.cpp @@ -1,5 +1,12 @@ #include #include +#include +#include +#include +#include + +#include +#include // for sprintf using namespace amrex; From 11ddf3f437998752565175a69b7a27b4d421ce33 Mon Sep 17 00:00:00 2001 From: nataraj2 Date: Fri, 24 Apr 2026 15:25:37 -0700 Subject: [PATCH 3/5] Adding missing namespace --- Source/DerivedQuantities/ParticleDerive.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/DerivedQuantities/ParticleDerive.cpp b/Source/DerivedQuantities/ParticleDerive.cpp index 07e91299..6f312379 100644 --- a/Source/DerivedQuantities/ParticleDerive.cpp +++ b/Source/DerivedQuantities/ParticleDerive.cpp @@ -9,6 +9,7 @@ #include // for sprintf using namespace amrex; +namespace fs = std::filesystem; std::unique_ptr Nyx::particle_derive (const std::string& name, Real time, int ngrow) From c510f7edee83fa8d0c57eb9b62b016d2a19216e6 Mon Sep 17 00:00:00 2001 From: nataraj2 Date: Sat, 25 Apr 2026 00:13:29 -0700 Subject: [PATCH 4/5] Correcting compilation error --- Source/Driver/Nyx.H | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Driver/Nyx.H b/Source/Driver/Nyx.H index aed01773..818b541b 100644 --- a/Source/Driver/Nyx.H +++ b/Source/Driver/Nyx.H @@ -439,12 +439,12 @@ public: // // Compute the overdensity field from the particle mass density field // - void compute_overdensity (const MultiFab& mf_pmd, MultiFab& mf_od); + void compute_overdensity (const amrex::MultiFab& mf_pmd, amrex::MultiFab& mf_od); // // Compute the matter power spectrum grom the overdensity field // - void compute_matter_power_spectrum(const MultiFab& mf_od); + void compute_matter_power_spectrum(const amrex::MultiFab& mf_od); // //Set time levels of state data. From c222ed535bcf33f6284f48335462843fba06bf06 Mon Sep 17 00:00:00 2001 From: nataraj2 Date: Sat, 2 May 2026 19:58:44 -0700 Subject: [PATCH 5/5] Update submodule to latest development --- subprojects/amrex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subprojects/amrex b/subprojects/amrex index d9c57608..4ceb814d 160000 --- a/subprojects/amrex +++ b/subprojects/amrex @@ -1 +1 @@ -Subproject commit d9c576086f99ac213154b2c5ab382af135f24f01 +Subproject commit 4ceb814db7d79c5858b13dab153fb04ecc3b129d