Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions Source/DerivedQuantities/ParticleDerive.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
#include <Nyx.H>
#include <Gravity.H>
#include <AMReX_FFT.H>
#include <AMReX_MultiFab.H>
#include <AMReX_ParallelDescriptor.H>
#include <AMReX_Reduce.H>

#include <filesystem>
#include <cstdio> // for sprintf

using namespace amrex;
namespace fs = std::filesystem;

std::unique_ptr<MultiFab>
Nyx::particle_derive (const std::string& name, Real time, int ngrow)
Expand Down Expand Up @@ -466,3 +474,174 @@ extern "C"
#ifdef __cplusplus
}
#endif

void
Nyx::compute_overdensity (const MultiFab& mf_pmd, MultiFab& mf_od)
{
AMREX_ASSERT(mf_pmd.nComp() == 1);
AMREX_ASSERT(mf_od.nComp() == 1);
AMREX_ASSERT(mf_pmd.boxArray() == mf_od.boxArray());
AMREX_ASSERT(mf_pmd.DistributionMap() == mf_od.DistributionMap());

mf_od.define(mf_pmd.boxArray(), mf_pmd.DistributionMap(), 1, 0);

// --------------------------------------------------
// 1. Compute global mean density
// --------------------------------------------------

// FIX 1: correct signature is sum(comp, local) — no int nghost argument
Real rho_sum = mf_pmd.sum(0, false);

// FIX 2: BoxArray is fully replicated on every rank, so this is already
// the global cell count. ReduceLongSum is both wrong (returns void) and
// unnecessary (would multiply the count by nprocs).
Long ncells = mf_pmd.boxArray().numPts();

Real rho_mean = rho_sum / static_cast<Real>(ncells);

// --------------------------------------------------
// 2. Compute overdensity on GPU
// δ = ρ / ρ̄ - 1
// --------------------------------------------------
for (MFIter mfi(mf_pmd, TilingIfNotGPU()); mfi.isValid(); ++mfi)
{
const Box& bx = mfi.tilebox();
auto const& rho = mf_pmd.const_array(mfi);
auto const& delta = mf_od.array(mfi);
Real inv_mean = 1.0_rt / rho_mean;

ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k)
{
delta(i,j,k) = rho(i,j,k) * inv_mean - 1.0_rt;
});
}
if (mf_od.contains_nan()) {
std::cout << "Value of rho_mean is " << rho_mean << " " << rho_sum << std::endl;
amrex::Abort("NaNs detected in MultiFab");
}
}

void
Nyx::compute_matter_power_spectrum(const MultiFab& mf_od)
{
Geometry geom = parent->Geom(0);
cMultiFab c_fft_pmd;
{
// Note that the complex Hermitian output array Y has (nx/2+1,ny,nz) elements.
// Y[nx-i,j,k] = Y[i,j,k]*
FFT::R2C<Real,FFT::Direction::forward> fft(geom.Domain());
auto const& [ba, dm] = fft.getSpectralDataLayout();
c_fft_pmd.define(ba,dm,1,0);
fft.forward(mf_od, c_fft_pmd);
}

// For simplicity, we assume the domain is a cube, and we are not
// going to worry about scaling.

int nx = geom.Domain().length(0);
int ny = geom.Domain().length(1);
int nz = geom.Domain().length(2);
int nk = int(std::sqrt((nx/2)*(nx/2) +
(ny/2)*(ny/2) +
(nz/2)*(nz/2))) + 1;
Gpu::DeviceVector<Real> power_spec_mf_od_d(nk,Real(0.0));
Gpu::DeviceVector<int> counts_d(nk, 0);
Real* power_spec_mf_od_d_ptr = power_spec_mf_od_d.data();
int* counts_d_ptr = counts_d.data();
auto const& c_fft_pmd_arr = c_fft_pmd.const_arrays();
Real Lx = geom.ProbLength(0);
Real Ly = geom.ProbLength(1);
Real Lz = geom.ProbLength(2);
Real dx = geom.CellSize(0);
Real dy = geom.CellSize(1);
Real dz = geom.CellSize(2);
Real kfund = 2.0 * M_PI / Lx;
ParallelFor(c_fft_pmd, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k)
{
int ki = i;
int kj = (j <= ny/2) ? j : ny-j;
int kk = (k <= nz/2) ? k : nz-k;
Real kmag = kfund * std::sqrt(ki*ki + kj*kj + kk*kk);
int di = int(kmag / kfund);
Real kx = 2.0 * M_PI * ki / Lx;
Real ky = 2.0 * M_PI * kj / Ly;
Real kz = 2.0 * M_PI * kk / Lz;

Real wx = (kx == 0.0) ? 1.0 : sin(0.5*kx*dx)/(0.5*kx*dx);
Real wy = (ky == 0.0) ? 1.0 : sin(0.5*ky*dy)/(0.5*ky*dy);
Real wz = (kz == 0.0) ? 1.0 : sin(0.5*kz*dz)/(0.5*kz*dz);

// CIC window (note the square per direction)
Real W_CIC = (wx*wx) * (wy*wy) * (wz*wz);

if (di < nk) {
Real value = amrex::norm(c_fft_pmd_arr[b](i,j,k));
// Account for Hermitian symmetry in x-direction
// Hermitian symmetry Y[nx-i,j,k] = Y[i,j,k]*
if ((i > 0) && (2*i != nx)) {
// Multiply by 2 because we have +ki and -ki
value *= Real(2.0);
}
value /= (W_CIC * W_CIC);
int weight = ((i > 0) && (2*i != nx)) ? 2 : 1;
HostDevice::Atomic::Add(power_spec_mf_od_d_ptr+di, value);
HostDevice::Atomic::Add(counts_d_ptr + di, weight);
}
});

Real* power_spec_mf_od_h_ptr = nullptr;
int* counts_h_ptr = nullptr;
#ifdef AMREX_USE_GPU
Gpu::HostVector<Real> power_spec_mf_od_h(counts_d.size());
Gpu::HostVector<int> counts_h(counts_d.size());
Gpu::copyAsync(Gpu::deviceToHost, power_spec_mf_od_d.begin(), power_spec_mf_od_d.end(), power_spec_mf_od_h.begin());
Gpu::copyAsync(Gpu::deviceToHost, counts_d.begin(), counts_d.end(), counts_h.begin());
Gpu::streamSynchronize();
power_spec_mf_od_h_ptr = power_spec_mf_od_h.data();
counts_h_ptr = counts_h.data();
#else
power_spec_mf_od_h_ptr = power_spec_mf_od_d.data();
counts_h_ptr = counts_d.data();
#endif

ParallelDescriptor::ReduceRealSum(power_spec_mf_od_h_ptr, nk);
ParallelDescriptor::ReduceIntSum(counts_h_ptr, nk);

for (int i = 0; i < nk; ++i) {
if (counts_h_ptr[i] > 0) {
power_spec_mf_od_h_ptr[i] /= Real(counts_h_ptr[i]);
}
}

if (ParallelDescriptor::IOProcessor()) {
// --- Step 1: format redshift ---
char zbuf[32];
const Real prev_time = state[State_Type].prevTime();
const Real a_old = get_comoving_a(prev_time);
Real z_old = 1/a_old-1;
sprintf(zbuf, "%07d", int(z_old * 10000 + 0.5)); // → "0028165"

// --- Step 2: ensure directory exists ---
fs::path dir = "MatterPk";
if (!fs::exists(dir)) {
fs::create_directories(dir);
}
// --- Step 3: build full filename ---
fs::path filepath = dir / ("spectrum_" + std::string(zbuf) + ".txt");

// --- Step 4: open file ---
std::ofstream ofs(filepath);

if (!ofs) {
throw std::runtime_error("Failed to open file: " + filepath.string());
}

Real dV = dx*dy*dz;
Real dom_vol = Lx*Ly*Lz;
Real dk = 2.0 * M_PI / Lx;
for (int i = 0; i < nk; ++i) {
Real k = dk * (i + 0.5);
ofs << k << " " << power_spec_mf_od_h_ptr[i]* dV*dV/dom_vol << "\n";
}
}
}
10 changes: 10 additions & 0 deletions Source/Driver/Nyx.H
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,16 @@ public:
//
std::unique_ptr<amrex::MultiFab> particle_derive (const std::string& name, amrex::Real time, int ngrow);

//
// Compute the overdensity field from the particle mass density field
//
void compute_overdensity (const amrex::MultiFab& mf_pmd, amrex::MultiFab& mf_od);

//
// Compute the matter power spectrum grom the overdensity field
//
void compute_matter_power_spectrum(const amrex::MultiFab& mf_od);

//
//Set time levels of state data.
//
Expand Down
2 changes: 1 addition & 1 deletion subprojects/amrex
Submodule amrex updated 936 files
Loading