Skip to content

Commit

Permalink
Strengthen a few OpenACC test cases
Browse files Browse the repository at this point in the history
Rather than rubber-stamp whatever requested vs. actual device kernel launch
configuration happens, actually (again) verify the requested values (modulo
expected variations).

This better highlights that "AMD GCN has an upper limit of 'num_workers(16)'",
and the deficiency that "AMD GCN uses the autovectorizer for the vector
dimension: the use of a function call in vector-partitioned code [...] is not
currently supported".

And, this removes several instances of race conditions, where variables are
concurrently written to in OpenACC gang-redundant mode.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Strengthen.
	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: Likewise.
  • Loading branch information
tschwinge committed Jan 21, 2022
1 parent 23be9f8 commit 087e545
Show file tree
Hide file tree
Showing 15 changed files with 202 additions and 57 deletions.
25 changes: 18 additions & 7 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ int main ()

for (ix = 0; ix < N;ix++)
ary[ix] = -1;

#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)

#define NG 32
#define NW 32
#define VL 32
#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \
copy(ary) copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop gang worker vector
Expand All @@ -45,11 +48,19 @@ int main ()
else
ary[ix] = ix;
}

gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
gangsize = NG;
workersize = NW;
vectorsize = VL;
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN has an upper limit of 'num_workers(16)'. */
if (workersize > 16)
workersize = 16;
/* AMD GCN uses the autovectorizer for the vector dimension: the use
of a function call in vector-partitioned code in this test is not
currently supported. */
vectorsize = 1;
#endif

for (ix = 0; ix < N; ix++)
{
Expand Down
34 changes: 27 additions & 7 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,17 @@ int main ()
int ary[N];
int ix;
int exit = 0;
int gangsize = 0, workersize = 0, vectorsize = 0;
int gangsize, workersize, vectorsize;
int *gangdist, *workerdist, *vectordist;

for (ix = 0; ix < N;ix++)
ary[ix] = -1;

#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(ary) copyout(gangsize, workersize, vectorsize)
#define NG 32
#define NW 32
#define VL 32
#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \
copy(ary)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop gang worker vector
Expand All @@ -71,11 +74,23 @@ int main ()

ary[ix] = (g << 16) | (w << 8) | v;
}

gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
gangsize = NG;
workersize = NW;
vectorsize = VL;
#if defined ACC_DEVICE_TYPE_host
gangsize = 1;
workersize = 1;
vectorsize = 1;
#elif defined ACC_DEVICE_TYPE_radeon
/* AMD GCN has an upper limit of 'num_workers(16)'. */
if (workersize > 16)
workersize = 16;
/* AMD GCN uses the autovectorizer for the vector dimension: the use
of a function call in vector-partitioned code in this test is not
currently supported. */
vectorsize = 1;
#endif

gangdist = (int *) __builtin_alloca (gangsize * sizeof (int));
workerdist = (int *) __builtin_alloca (workersize * sizeof (int));
Expand All @@ -92,6 +107,11 @@ int main ()
int w = (ary[ix] >> 8) & 255;
int v = ary[ix] & 255;

if (g >= gangsize
|| w >= workersize
|| v >= vectorsize)
__builtin_abort ();

gangdist[g]++;
workerdist[w]++;
vectordist[v]++;
Expand Down
22 changes: 17 additions & 5 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ int main ()
int t = 0, h = 0;
int gangsize, workersize, vectorsize;

#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
copy(ondev) copyout(gangsize, workersize, vectorsize)
#define NG 32
#define NW 32
#define VL 32
#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \
copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop gang worker vector reduction(+:t)
Expand All @@ -42,10 +45,19 @@ int main ()
}
t += val;
}
gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
gangsize = NG;
workersize = NW;
vectorsize = VL;
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN has an upper limit of 'num_workers(16)'. */
if (workersize > 16)
workersize = 16;
/* AMD GCN uses the autovectorizer for the vector dimension: the use
of a function call in vector-partitioned code in this test is not
currently supported. */
vectorsize = 1;
#endif

for (ix = 0; ix < N; ix++)
{
Expand Down
11 changes: 9 additions & 2 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ int main ()
int t = 0, h = 0;
int vectorsize;

#pragma acc parallel vector_length(32) copy(ondev) copyout(vectorsize)
#define VL 32
#pragma acc parallel vector_length(VL) copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */
{
#pragma acc loop vector reduction (+:t)
Expand All @@ -42,8 +43,14 @@ int main ()
}
t += val;
}
vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
vectorsize = VL;
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN uses the autovectorizer for the vector dimension: the use
of a function call in vector-partitioned code in this test is not
currently supported. */
vectorsize = 1;
#endif

for (ix = 0; ix < N; ix++)
{
Expand Down
11 changes: 9 additions & 2 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ int main ()
int q = 0, h = 0;
int vectorsize;

#pragma acc parallel vector_length(32) copy(q) copy(ondev) copyout(vectorsize)
#define VL 32
#pragma acc parallel vector_length(VL) copy(q) copy(ondev)
/* { dg-note {variable 't' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-1 } */
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
Expand Down Expand Up @@ -46,8 +47,14 @@ int main ()
t += val;
}
q = t;
vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
vectorsize = VL;
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN uses the autovectorizer for the vector dimension: the use
of a function call in vector-partitioned code in this test is not
currently supported. */
vectorsize = 1;
#endif

for (ix = 0; ix < N; ix++)
{
Expand Down
13 changes: 10 additions & 3 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ int main ()
int t = 0, h = 0;
int workersize;

#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
copyout(workersize)
#define NW 32
#define VL 32
#pragma acc parallel num_workers(NW) vector_length(VL) \
copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-3 } */
{
Expand All @@ -46,8 +48,13 @@ int main ()
}
t += val;
}
workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
workersize = NW;
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN has an upper limit of 'num_workers(16)'. */
if (workersize > 16)
workersize = 16;
#endif

for (ix = 0; ix < N; ix++)
{
Expand Down
13 changes: 10 additions & 3 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ int main ()
int q = 0, h = 0;
int workersize;

#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) \
copyout(workersize)
#define NW 32
#define VL 32
#pragma acc parallel num_workers(NW) vector_length(VL) \
copy(q) copy(ondev)
/* { dg-note {variable 't' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-3 } */
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-4 } */
Expand Down Expand Up @@ -50,8 +52,13 @@ int main ()
t += val;
}
q = t;
workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
workersize = NW;
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN has an upper limit of 'num_workers(16)'. */
if (workersize > 16)
workersize = 16;
#endif

for (ix = 0; ix < N; ix++)
{
Expand Down
19 changes: 15 additions & 4 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ int main ()
int t = 0, h = 0;
int workersize, vectorsize;

#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
copyout(workersize, vectorsize)
#define NW 32
#define VL 32
#pragma acc parallel num_workers(NW) vector_length(VL) \
copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop worker vector reduction (+:t)
Expand All @@ -42,9 +44,18 @@ int main ()
}
t += val;
}
workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
workersize = NW;
vectorsize = VL;
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN has an upper limit of 'num_workers(16)'. */
if (workersize > 16)
workersize = 16;
/* AMD GCN uses the autovectorizer for the vector dimension: the use
of a function call in vector-partitioned code in this test is not
currently supported. */
vectorsize = 1;
#endif

for (ix = 0; ix < N; ix++)
{
Expand Down
13 changes: 10 additions & 3 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ int main ()
for (ix = 0; ix < N;ix++)
ary[ix] = -1;

#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
copyout(vectorsize)
#define VL 32
#pragma acc parallel vector_length(VL) \
copy(ary) copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop vector
Expand All @@ -44,8 +45,14 @@ int main ()
else
ary[ix] = ix;
}
vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
vectorsize = VL;
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN uses the autovectorizer for the vector dimension: the use
of a function call in vector-partitioned code in this test is not
currently supported. */
vectorsize = 1;
#endif

for (ix = 0; ix < N; ix++)
{
Expand Down
13 changes: 10 additions & 3 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ int main ()
for (ix = 0; ix < N;ix++)
ary[ix] = -1;

#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
copyout(workersize)
#define NW 32
#define VL 32
#pragma acc parallel num_workers(NW) vector_length(VL) \
copy(ary) copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
/* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "vector" { target *-*-* } .-3 } */
{
Expand All @@ -48,8 +50,13 @@ int main ()
else
ary[ix] = ix;
}
workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
}
workersize = NW;
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN has an upper limit of 'num_workers(16)'. */
if (workersize > 16)
workersize = 16;
#endif

for (ix = 0; ix < N; ix++)
{
Expand Down
19 changes: 15 additions & 4 deletions libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ int main ()
for (ix = 0; ix < N;ix++)
ary[ix] = -1;

#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
copyout(workersize, vectorsize)
#define NW 32
#define VL 32
#pragma acc parallel num_workers(NW) vector_length(VL) \
copy(ary) copy(ondev)
/* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
{
#pragma acc loop worker vector
Expand All @@ -44,9 +46,18 @@ int main ()
else
ary[ix] = ix;
}
workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
}
workersize = NW;
vectorsize = VL;
#ifdef ACC_DEVICE_TYPE_radeon
/* AMD GCN has an upper limit of 'num_workers(16)'. */
if (workersize > 16)
workersize = 16;
/* AMD GCN uses the autovectorizer for the vector dimension: the use
of a function call in vector-partitioned code in this test is not
currently supported. */
vectorsize = 1;
#endif

for (ix = 0; ix < N; ix++)
{
Expand Down
Loading

0 comments on commit 087e545

Please sign in to comment.