Skip to content

Clarifying the purpose of "poisson2d", choosing clear convergence criteria, adding tests #24

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions poisson2d/README.md
Original file line number Diff line number Diff line change
@@ -15,17 +15,21 @@ The grid is a 2-dimensional array of size MxM. The timings for different values

| Language | M=100 | M=200 | M=300 |
|---------------------|-----------------|-----------------------|------------------------|
| Python (pure) | 276 | n/a | n/a |
| Cython | 1.02 | 32.8 | 229 |
| Fortran (naive) | 0.34 | 13.2 | 69.7 |
| Fortran (optimized) | 0.18 | 6.25 | 31.4 |
| C (naive) | 0.42* | 7.25 | 33.7 |
| C (optimized) | 0.37* | 6.80 | 32.8 |

* For all of these results, the amount of iterations performed by the respective codes was approximately
the same, with the exception of the 100x100 grid C codes, which did nearly a double amount of iterations
compared to the rest, for some reason. The timings are on AMD Ryzen 5 3600 @3.6GHz, using WSL2 on Windows 10.
| Python (pure) | 198 | n/a | n/a |
| Cython | 0.46 | 5.77 | 43.9 |
| Fortran (trad1) | 0.23 | 3.59 | 18.3 |
| Fortran (trad2) | 0.19 | 4.29 | 19.6 |
| C (naive) | 0.18 | 2.46 | 11.6 |
| C (optimized) | 0.13 | 1.83 | 8.26 |
| Fortran (modern) | 0.19 | n/a | 8.08 |
| Python (vectorized) | 0.92 | 10.8 | 65.7 |

* For all of these results, the amount of iterations performed was exactly the same.
The timings are on AMD Ryzen 5 3600 @3.6GHz, using WSL2 on Windows 10.

Some thoughts on the code at https://runningcrocodile.fi/articles/pythonperformance.html . Also, there was
discussion about this problem at Discourse: https://fortran-lang.discourse.group/t/performance-c-vs-fortran/1461
with important contributions from numerous users (Mason, Beliavsky, septc, implicitall, nncarlson, han190 and pmk)

Codes written by a running crocodile (traditional1.f90, traditional2.f90, naive.c, optimized.c, poisson.py, poissonvectorized.py, poisson.pyx)
and Damian Rouson (modern.f90) with the aforementioned help from Discourse.
97 changes: 97 additions & 0 deletions poisson2d/modern.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
module modern_func
!! Code by Damian Rouson, @rouson.
!! Modified by ARC, @arunningcroc

!! Define a scalar function over 2-space.
implicit none

private
public :: dp, run_modern

integer, parameter :: precision = 15, range = 50
integer, parameter :: dp = selected_real_kind(precision, range)
contains
pure real(dp) function rho(x,y)
real(dp), intent(in) :: x,y
rho = -(6.0_dp*x*y*(1.0_dp-y)-2.0_dp*x**3.0_dp)
end function

pure elemental real(dp) function boundary(y)
real(dp),intent(in) :: y
boundary = y*(1-y)
end function

pure real(dp) function analytical_sol(x,y)
real(dp), intent(in) :: x,y
analytical_sol = y*(1-y)*x**3
end function

integer function run_modern(M)
real(dp) :: dx, rho_sampled(M,M)
integer :: M,i,j

dx = 1.0/(M-1)
associate( dy => (dx) ) ! Associating with an expression provides immutable state so dy cannot be inadvertently redefined.

do concurrent(i=1:M, j=1:M)
rho_sampled(i,j) = rho(i*dx,j*dy)
end do

block ! Tighten the scoping to declutter the code above.
real(dp) :: delta_phi, avgerror
real(dp), parameter :: tolerance=1E-8_dp, analytical_tol = 5.0e-3_dp
real(dp), dimension(0:M-1,0:M-1) :: phi_prime, phi, analytical
integer :: iteration

phi = 0.
phi(M-1,:) = boundary([(i*dx,i=0,M-1)])

do concurrent(i=0:M-1, j=0:M-1)
analytical(i,j) = analytical_sol(i*dx,j*dx)
end do

phi_prime([0,M-1], 1:M-2) = phi([0,M-1], 1:M-2) ! Initialize only boundary values except corners. (Internal values will
phi_prime(1:M-2, [0,M-1]) = phi(1:M-2, [0,M-1]) ! be overwritten in the first iteration. Corners will never be used.)

delta_phi = 1.0_dp !tolerance + epsilon(tolerance) ! Ensure at least 1 iteration.
iteration = 0
do while (delta_phi > tolerance )
iteration = iteration + 1
do concurrent(i=1:M-2, j=1:M-2) ! Compute updated solution estimate at internal points.
phi_prime(i,j) = (phi(i+1,j) + phi(i-1,j) + phi(i,j+1) + phi(i,j-1))/4._dp &
+ (dx/2._dp)*(dy/2._dp)*rho_sampled(i,j)
end do
delta_phi = maxval(abs(phi_prime - phi))
phi(1:M-2, 1:M-2) = phi_prime(1:M-2, 1:M-2) ! Update internal values.
!print *, iteration
end do
phi = phi/maxval(abs(phi))
analytical = analytical/maxval(abs(analytical))
avgerror = sum(abs(phi-analytical))/(M*M)
if(avgerror > analytical_tol) then
print *, "Failed to match analytical solution", avgerror
end if
run_modern = iteration
end block


end associate
end function
end module


program poisson
!! Solve the 2D Poisson equation using a smoothing operation to iterate.
use modern_func, only: run_modern, dp
implicit none

integer :: iter
integer, parameter :: M=170
real(dp) :: t_start, t_end


call cpu_time(t_start)
iter = run_modern(M)
call cpu_time(t_end)
print *, t_end-t_start, iter
end program
85 changes: 63 additions & 22 deletions poisson2d/naive.c
Original file line number Diff line number Diff line change
@@ -3,7 +3,14 @@
#include <string.h>
#include <math.h>
#include <time.h>
#define M 300
#define M 100
#define analytical_tol 5.0e-3

/*
*
* Code by ARC, @arunningcroc
*
*/
void swap(double (*a)[M], double (*b)[M], int n)
{
double swp;
@@ -28,38 +35,69 @@ double mmax(double (*phi)[M], double (*phip)[M], int n)
}
return max;
}
void normalize(double (*phi)[M]) {
double max = -50000;
for(int i=0; i<M; i++) {
for(int j=0; j<M; j++) {
if(fabs(phi[i][j]) > max) max = fabs(phi[i][j]);
}
}
for(int i=0; i<M; i++) {
for(int j=0; j<M; j++) {
phi[i][j] = phi[i][j]/max;
}
}

}
double rho(double x, double y)
{
double s1 = 0.6;
double e1 = 0.8;
double s2 = 0.2;
double e2 = 0.4;

if (x > s1 && x < e1 && y > s1 && y < e1) {
return 1.0;
} else if (x > s2 && x < e2 && y > s2 && y < e2 ) {
return -1.0;
} else {
return 0.0;
}
return -(6.0*x*y*(1.0-y)-2.0*pow(x,3.0));
}
void run(double toler, double a)
double boundary(double y)
{
return y*(1.0-y);
}
double analytical_sol(double x,double y)
{
return y*(1.0-y)*pow(x,3.0);
}
double get_average_error(double (*sol)[M], double (*analytical)[M]) {
double sum = 0.0;
normalize(sol);
normalize(analytical);
for(int i=0; i<M; i++) {
for(int j=0; j<M; j++) {
sum += fabs(sol[i][j]-analytical[i][j]);
}
}
return sum/(M*M);
}
int run(double toler, double a)
{
double epsilon0 = 8.85e-12;
double a2;

double (*phi)[M];
double (*phip)[M];
double (*rhoa)[M];
double (*rhoa)[M];
double (*analytical)[M];
phi = malloc(sizeof(double[M][M]));
phip = malloc(sizeof(double[M][M]));
rhoa = malloc(sizeof(double[M][M]));

analytical = malloc(sizeof(double[M][M]));
for(int i=0; i<M; i++) {
for (int j=0; j<M; j++) {
analytical[i][j] = analytical_sol(i*a,j*a);
}
}
int iter = 0;

memset(phip, 0, sizeof(phip[0][0])*M*M);
memset(phi, 0, sizeof(phi[0][0])*M*M);

for (int j=0; j < M; j++) {
phip[M-1][j] = j*a*(1-j*a);
phi[M-1][j] = j*a*(1-j*a);
}
double delta = 1.0;
a2 = pow(a,2.0);
while (delta > toler) {
@@ -69,22 +107,25 @@ void run(double toler, double a)
for (int j=1; j < M-1; j++) {
phip[i][j] = (phi[i+1][j] + phi[i-1][j] +
phi[i][j+1] + phi[i][j-1])/4.0 +
a2/(4.0 * epsilon0)*rho(i*a,j*a);
a2*rho(i*a,j*a)/4.0;
}
}
delta = mmax(phi, phip, M);
swap(phi, phip, M);

}
printf("iters %d", iter);

if(get_average_error(phi, analytical) > analytical_tol) {
printf("Failed to reach analytical solution, error %f \n", get_average_error(phi, analytical));
}
return iter;
}

int main(int argc, char *argv[])
{
int iter;
clock_t start = clock();
run(1e-6, 0.01);
iter = run(1e-8, 1.0/(M-1));
clock_t end = clock();
double total = ((double)(end - start)) / CLOCKS_PER_SEC;
printf("Execution time: %f\n",total);
printf("Execution time %f, iters: %d\n",total,iter);
}
51 changes: 0 additions & 51 deletions poisson2d/naive.f90

This file was deleted.

Loading