Skip to content

Commit 5184dda

Browse files
committed
mnt: added optimize1d.c
Also benchmarks should not rely on convergence, rather a fixed number of iterations is more appropriate. The problem of inconsistent iterations between the C and Fortran versions was also fixed because of an indexing error when calculating the cell coordinates (C is 0-based, F is 1-based). So they now run the same number of iterations, regardless of M. Moved timings about to make them more comparable. Ensured that all examples now use allocate/malloc which is much more realistic. Even though it does not have the same performance it is much more appropriate for benchmark cases. Lastly, made calls more similar, C and F both call a function for the iteration step.
1 parent 2cffc7b commit 5184dda

File tree

4 files changed

+168
-48
lines changed

4 files changed

+168
-48
lines changed

poisson2d/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,5 @@ compared to the rest, for some reason. The timings are on AMD Ryzen 5 3600 @3.6G
2929
Some thoughts on the code at https://runningcrocodile.fi/articles/pythonperformance.html . Also, there was
3030
discussion about this problem at Discourse: https://fortran-lang.discourse.group/t/performance-c-vs-fortran/1461
3131
with important contributions from numerous users (Mason, Beliavsky, septc, implicitall, nncarlson, han190 and pmk)
32+
33+
The codes were rewritten for more realistic cases by zerothi.

poisson2d/optimized.f90

+52-44
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,26 @@
1-
module rhofunc
2-
implicit none
3-
public
4-
integer, parameter :: dp=kind(0.d0)
5-
6-
contains
7-
8-
pure real(dp) function rho(x,y)
9-
real(dp), intent(in) :: x,y
10-
if ( 0.6_dp < x .and. x < 0.8_dp .and. 0.6_dp < y .and. y < 0.8_dp ) then
11-
rho = 1.0_dp
12-
else if ( 0.2_dp < x .and. x < 0.4_dp .and. 0.2_dp < y .and. y < 0.4_dp ) then
13-
rho = -1.0_dp
14-
else
15-
rho = 0.0_dp
16-
end if
17-
end function rho
18-
19-
end module rhofunc
20-
211
program poisson
22-
use rhofunc, only: rho
2+
233
implicit none
4+
5+
integer, parameter :: M = 1000
246
integer, parameter :: dp=kind(0.d0)
25-
integer, parameter :: M = 300
7+
integer, parameter :: N_ITER = 10000
268
real(dp),parameter :: target=1E-1_dp
279
real(dp),parameter :: a=0.01_dp
28-
29-
integer :: i,j, iter
3010
real(dp),parameter :: epsilon0=8.85E-12_dp
31-
real(dp) :: delta, b, e, a2
11+
12+
integer :: i,j, iter
13+
real(dp) :: delta, b, e, a2
3214

3315
real(dp), allocatable :: rhoarr(:,:)
3416
real(dp), allocatable, target :: phiprime(:,:), phi(:,:)
35-
real(dp), pointer :: p(:,:), pnew(:,:)
17+
real(dp), pointer :: p(:,:), pnew(:,:), tmp(:,:)
18+
19+
call cpu_time(b)
3620

3721
allocate(phiprime(M,M), phi(M,M))
3822
allocate(rhoarr(M,M))
3923

40-
call cpu_time(b)
41-
4224
! Fortran doesn't care too much about pow
4325
! since it figures out if the exponent is an integer or not
4426
a2 = a*a / epsilon0
@@ -48,21 +30,43 @@ program poisson
4830
end do
4931
end do
5032

51-
! We only need to initialize 1 array
5233
phi(:,:) = 0.0_dp
34+
phiprime(:,:) = 0.0_dp
35+
p => phi
36+
pnew => phiprime
5337
iter = 0
5438
delta = target + 1._dp
55-
do while ( delta > target )
39+
do while ( iter < N_ITER )
5640
iter = iter + 1
5741

42+
call iterate(rhoarr, p, pnew, delta)
43+
5844
! Easer to swap pointer than copying stuff around
59-
if ( mod(iter, 2) == 1 ) then
60-
p => phi
61-
pnew => phiprime
62-
else
63-
p => phiprime
64-
pnew => phi
65-
end if
45+
! In fortran we could even use pointers
46+
! But we can just call
47+
tmp => p
48+
p => pnew
49+
pnew => tmp
50+
51+
end do
52+
53+
deallocate(phi, phiprime, rhoarr)
54+
55+
call cpu_time(e)
56+
57+
write(*,'(a)') 'fortran version'
58+
write(*,'(a,f20.10)') 'delta = ',delta
59+
write(*,'(a,i0)') 'Iterations = ',iter
60+
write(*,'(a,f20.10)') 'Time = ',e - b
61+
62+
contains
63+
64+
subroutine iterate(rhoarr, p, pnew, delta)
65+
real(dp), intent(in) :: p(:,:), rhoarr(:,:)
66+
real(dp), intent(out) :: pnew(:,:)
67+
real(dp), intent(out) :: delta
68+
69+
integer :: i, j
6670

6771
delta = 0._dp
6872
do j=2, M-1
@@ -72,13 +76,17 @@ program poisson
7276
end do
7377
end do
7478

75-
end do
76-
77-
call cpu_time(e)
79+
end subroutine iterate
7880

79-
deallocate(phi, phiprime, rhoarr)
80-
81-
write(*,'(a,i0)') 'Iterations = ',iter
82-
write(*,'(a,f20.10)') 'Time = ',e - b
81+
pure real(dp) function rho(x,y)
82+
real(dp), intent(in) :: x,y
83+
if ( 0.6_dp < x .and. x < 0.8_dp .and. 0.6_dp < y .and. y < 0.8_dp ) then
84+
rho = 1.0_dp
85+
else if ( 0.2_dp < x .and. x < 0.4_dp .and. 0.2_dp < y .and. y < 0.4_dp ) then
86+
rho = -1.0_dp
87+
else
88+
rho = 0.0_dp
89+
end if
90+
end function rho
8391

8492
end program poisson

poisson2d/optimized1d.c

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
#include <string.h>
4+
#include <math.h>
5+
#include <time.h>
6+
#define M 1000
7+
#define N_ITER 10000
8+
9+
#define IDX(i,j) (i)*M+j
10+
11+
12+
double rho(const double x, const double y) {
13+
const double s1 = 0.6;
14+
const double e1 = 0.8;
15+
const double s2 = 0.2;
16+
const double e2 = 0.4;
17+
18+
if (s1 < x && x < e1 && s1 < y && y < e1) {
19+
return 1.0;
20+
} else if ( s2 < x && x < e2 && s2 < y && y < e2 ) {
21+
return -1.0;
22+
} else {
23+
return 0.0;
24+
}
25+
}
26+
27+
double iterate(double *restrict phi, double *restrict phinew, double *restrict rhoa) {
28+
double delta = 0, err;
29+
for (int i=1; i < M-1; i++) {
30+
for (int j=1; j < M-1; j++) {
31+
phinew[IDX(i,j)] = (phi[IDX(i+1,j)] + phi[IDX(i-1,j)] + phi[IDX(i,j+1)] + phi[IDX(i,j-1)] + rhoa[IDX(i,j)])*0.25;
32+
err = fabs(phinew[IDX(i,j)] - phi[IDX(i,j)]);
33+
if ( err > delta ) delta = err;
34+
}
35+
}
36+
return delta;
37+
}
38+
39+
void init_rho(double *restrict rhoa, const double epsilon, const double a) {
40+
const double a2 = a * a / epsilon;
41+
for (int i=1; i<M-1; i++) {
42+
for (int j=1; j<M-1; j++) {
43+
rhoa[IDX(i,j)] = rho(i*a,j*a) * a2;
44+
}
45+
}
46+
}
47+
48+
49+
void run(const double toler, const double a)
50+
{
51+
double epsilon0 = 8.85e-12;
52+
53+
double *phi;
54+
double *phip;
55+
double *rhoa;
56+
double *tmp;
57+
58+
// A real world program will definitely use malloc
59+
phi = malloc(M*M*sizeof(double));
60+
phip = malloc(M*M*sizeof(double));
61+
rhoa = malloc(M*M*sizeof(double));
62+
63+
// Only need to initialize one of them!
64+
for (int i = 0 ; i < M*M ; i++ )
65+
phi[i] = 0.;
66+
for (int i = 0 ; i < M*M ; i++ )
67+
phip[i] = 0.;
68+
69+
// In C one tries to avoid using pow because
70+
// it assumes floating point powers (integers are faster)
71+
// So better do it directly
72+
init_rho(rhoa, epsilon0, a);
73+
74+
int iter = 0;
75+
double delta;
76+
while ( iter < N_ITER ) {
77+
iter += 1;
78+
79+
delta = iterate(phi, phip, rhoa);
80+
81+
// swap pointers (no copies)
82+
tmp = phi;
83+
phi = phip;
84+
phip = tmp;
85+
}
86+
87+
free(phi);
88+
free(phip);
89+
free(rhoa);
90+
91+
printf("delta = %20.10f\n",delta);
92+
printf("Iterations = %d\n", iter);
93+
}
94+
95+
int main(int argc, char *argv[])
96+
{
97+
const double target = 1e-1;
98+
const double a = 0.01;
99+
100+
clock_t start = clock();
101+
printf("c version [1d]\n");
102+
run(target, a);
103+
clock_t end = clock();
104+
double total = ((double)(end - start)) / CLOCKS_PER_SEC;
105+
printf("Time = %20.10f\n",total);
106+
}

poisson2d/optimized.c poisson2d/optimized2d.c

+8-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
#include <string.h>
44
#include <math.h>
55
#include <time.h>
6-
#define M 300
6+
#define M 1000
7+
#define N_ITER 10000
78

89

910
double ** malloc_2d(int m, int n) {
@@ -99,7 +100,7 @@ void run(double toler, double a)
99100

100101
int iter = 0;
101102
double delta;
102-
do {
103+
while ( iter < N_ITER ) {
103104
iter += 1;
104105

105106
delta = iterate(phi, phip, rhoa);
@@ -108,20 +109,23 @@ void run(double toler, double a)
108109
tmp = phi;
109110
phi = phip;
110111
phip = tmp;
111-
} while ( delta > toler );
112+
}
112113

113114
free_2d(phi);
114115
free_2d(phip);
115116
free_2d(rhoa);
116117

118+
printf("delta = %20.10f\n",delta);
117119
printf("Iterations = %d\n", iter);
118120
}
119121

120122
int main(int argc, char *argv[])
121123
{
122-
clock_t start = clock();
123124
const double target = 1e-1;
124125
const double a = 0.01;
126+
127+
clock_t start = clock();
128+
printf("c version [2d]\n");
125129
run(target, a);
126130
clock_t end = clock();
127131
double total = ((double)(end - start)) / CLOCKS_PER_SEC;

0 commit comments

Comments
 (0)