From 724da2a676f6dde14ebd0c5f621d849c5febccf9 Mon Sep 17 00:00:00 2001 From: Ryan Padrone Date: Sun, 12 Apr 2026 15:19:01 -0700 Subject: [PATCH 1/2] Add async-argument and wait-argument grammar tests on compute constructs --- Tests/acc_async_noval.F90 | 72 +++++++++++++ Tests/acc_async_noval.c | 72 +++++++++++++ Tests/acc_async_noval.cpp | 72 +++++++++++++ Tests/acc_async_sync.F90 | 72 +++++++++++++ Tests/acc_async_sync.c | 72 +++++++++++++ Tests/acc_async_sync.cpp | 72 +++++++++++++ Tests/async_int_expr.F90 | 76 ++++++++++++++ Tests/async_int_expr.c | 73 ++++++++++++++ Tests/async_int_expr.cpp | 75 ++++++++++++++ Tests/wait_argument.F90 | 206 ++++++++++++++++++++++++++++++++++++++ Tests/wait_argument.c | 199 ++++++++++++++++++++++++++++++++++++ Tests/wait_argument.cpp | 197 ++++++++++++++++++++++++++++++++++++ 12 files changed, 1258 insertions(+) create mode 100644 Tests/acc_async_noval.F90 create mode 100644 Tests/acc_async_noval.c create mode 100644 Tests/acc_async_noval.cpp create mode 100644 Tests/acc_async_sync.F90 create mode 100644 Tests/acc_async_sync.c create mode 100644 Tests/acc_async_sync.cpp create mode 100644 Tests/async_int_expr.F90 create mode 100644 Tests/async_int_expr.c create mode 100644 Tests/async_int_expr.cpp create mode 100644 Tests/wait_argument.F90 create mode 100644 Tests/wait_argument.c create mode 100644 Tests/wait_argument.cpp diff --git a/Tests/acc_async_noval.F90 b/Tests/acc_async_noval.F90 new file mode 100644 index 0000000..1ad058d --- /dev/null +++ b/Tests/acc_async_noval.F90 @@ -0,0 +1,72 @@ +! acc_async_noval.F90 +! +! Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +! Compute constructs now use the async-argument and wait-argument +! grammar consistently with the rest of the specification. +! +! Test: +! T1 – async(acc_async_noval). + +#ifndef T1 +!T1:async-argument,special-value,compute-constructs,acc_async_noval,V:3.4- + LOGICAL FUNCTION test1() + USE OPENACC + IMPLICIT NONE + INCLUDE "acc_testsuite.Fh" + INTEGER :: i, errors + REAL(8), DIMENSION(LOOPCOUNT) :: a, b, c + + errors = 0 + + SEEDDIM(1) = 1 +# ifdef SEED + SEEDDIM(1) = SEED +# endif + CALL RANDOM_SEED(PUT=SEEDDIM) + CALL RANDOM_NUMBER(a) + CALL RANDOM_NUMBER(b) + c = 0.0D0 + + !$acc data copyin(a(1:LOOPCOUNT), b(1:LOOPCOUNT)) copy(c(1:LOOPCOUNT)) + !$acc parallel loop async(acc_async_noval) + DO i = 1, LOOPCOUNT + c(i) = a(i) - b(i) + END DO + !$acc end parallel loop + + !$acc wait + !$acc end data + + DO i = 1, LOOPCOUNT + IF (ABS(c(i) - (a(i) - b(i))) .GT. PRECISION) errors = errors + 1 + END DO + + test1 = (errors .NE. 0) + END FUNCTION +#endif + + PROGRAM main + IMPLICIT NONE + INTEGER :: failcode, testrun + LOGICAL :: failed + INCLUDE "acc_testsuite.Fh" + +#ifndef T1 + LOGICAL :: test1 +#endif + + failcode = 0 + failed = .FALSE. + +#ifndef T1 + DO testrun = 1, NUM_TEST_CALLS + failed = failed .OR. test1() + END DO + IF (failed) THEN + failcode = failcode + 2 ** 0 + failed = .FALSE. + END IF +#endif + + CALL EXIT(failcode) + END PROGRAM diff --git a/Tests/acc_async_noval.c b/Tests/acc_async_noval.c new file mode 100644 index 0000000..538ca21 --- /dev/null +++ b/Tests/acc_async_noval.c @@ -0,0 +1,72 @@ +// acc_async_noval.c +// +// Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +// Compute constructs were updated to use the async-argument and +// wait-argument grammar consistently with the rest of the specification. +// +// Test: +// T1 – Explicit acc_async_noval: Uses async(acc_async_noval). + +#include "acc_testsuite.h" +#include +#include +#include + +#ifndef T1 +//T1:async-argument,special-value,compute-constructs,acc_async_noval,V:3.4- +int test1() { + int err = 0; + srand(SEED); + + real_t *a = (real_t*)malloc(n * sizeof(real_t)); + real_t *b = (real_t*)malloc(n * sizeof(real_t)); + real_t *c = (real_t*)malloc(n * sizeof(real_t)); + if (!a || !b || !c){ + return 1; + } + + for (int i = 0; i < n; ++i) { + a[i] = rand() / (real_t)(RAND_MAX / 10); + b[i] = rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + } + + #pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n]) + { + #pragma acc parallel loop async(acc_async_noval) + for (int i = 0; i < n; ++i){ + c[i] = a[i] - b[i]; + } + + #pragma acc wait + } + + for (int i = 0; i < n; ++i) { + if (fabs(c[i] - (a[i] - b[i])) > PRECISION){ + err++; + } + } + + free(a); + free(b); + free(c); + return err; +} +#endif + +int main() { + int failcode = 0; + int failed; + +#ifndef T1 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test1(); + } + if (failed){ + failcode |= (1 << 0); + } +#endif + + return failcode; +} diff --git a/Tests/acc_async_noval.cpp b/Tests/acc_async_noval.cpp new file mode 100644 index 0000000..2ff5622 --- /dev/null +++ b/Tests/acc_async_noval.cpp @@ -0,0 +1,72 @@ +// acc_async_noval.cpp +// +// Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +// Compute constructs now use the async-argument and wait-argument +// grammar consistently with the rest of the specification. +// +// Test: +// T1 – async(acc_async_noval). + +#include "acc_testsuite.h" +#include +#include +#include + +#ifndef T1 +//T1:async-argument,special-value,compute-constructs,acc_async_noval,V:3.4- +int test1() { + int err = 0; + std::srand(SEED); + + real_t *a = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *b = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *c = (real_t*)std::malloc(n * sizeof(real_t)); + if (!a || !b || !c){ + return 1; + } + + for (int i = 0; i < n; ++i) { + a[i] = std::rand() / (real_t)(RAND_MAX / 10); + b[i] = std::rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + } + + #pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n]) + { + #pragma acc parallel loop async(acc_async_noval) + for (int i = 0; i < n; ++i){ + c[i] = a[i] - b[i]; + } + + #pragma acc wait + } + + for (int i = 0; i < n; ++i) { + if (std::fabs(c[i] - (a[i] - b[i])) > PRECISION){ + err++; + } + } + + std::free(a); + std::free(b); + std::free(c); + return err; +} +#endif + +int main() { + int failcode = 0; + int failed; + +#ifndef T1 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test1(); + } + if (failed){ + failcode |= (1 << 0); + } +#endif + + return failcode; +} diff --git a/Tests/acc_async_sync.F90 b/Tests/acc_async_sync.F90 new file mode 100644 index 0000000..272f34e --- /dev/null +++ b/Tests/acc_async_sync.F90 @@ -0,0 +1,72 @@ +! acc_async_sync.F90 +! +! Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +! Compute constructs now use the async-argument and wait-argument +! grammar consistently with the rest of the specification. +! +! Test: +! T1 – async(acc_async_sync). + +#ifndef T1 +!T1:async-argument,special-value,compute-constructs,acc_async_sync,V:3.4- + LOGICAL FUNCTION test1() + USE OPENACC + IMPLICIT NONE + INCLUDE "acc_testsuite.Fh" + INTEGER :: i, errors + REAL(8), DIMENSION(LOOPCOUNT) :: a, b, c + + errors = 0 + + SEEDDIM(1) = 1 +# ifdef SEED + SEEDDIM(1) = SEED +# endif + CALL RANDOM_SEED(PUT=SEEDDIM) + CALL RANDOM_NUMBER(a) + CALL RANDOM_NUMBER(b) + c = 0.0D0 + + !$acc data copyin(a(1:LOOPCOUNT), b(1:LOOPCOUNT)) copy(c(1:LOOPCOUNT)) + !$acc parallel loop async(acc_async_sync) + DO i = 1, LOOPCOUNT + c(i) = a(i) + b(i) + END DO + !$acc end parallel loop + + !$acc wait + !$acc end data + + DO i = 1, LOOPCOUNT + IF (ABS(c(i) - (a(i) + b(i))) .GT. PRECISION) errors = errors + 1 + END DO + + test1 = (errors .NE. 0) + END FUNCTION +#endif + + PROGRAM main + IMPLICIT NONE + INTEGER :: failcode, testrun + LOGICAL :: failed + INCLUDE "acc_testsuite.Fh" + +#ifndef T1 + LOGICAL :: test1 +#endif + + failcode = 0 + failed = .FALSE. + +#ifndef T1 + DO testrun = 1, NUM_TEST_CALLS + failed = failed .OR. test1() + END DO + IF (failed) THEN + failcode = failcode + 2 ** 0 + failed = .FALSE. + END IF +#endif + + CALL EXIT(failcode) + END PROGRAM diff --git a/Tests/acc_async_sync.c b/Tests/acc_async_sync.c new file mode 100644 index 0000000..38fc09a --- /dev/null +++ b/Tests/acc_async_sync.c @@ -0,0 +1,72 @@ +// acc_async_sync.c +// +// Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +// Compute constructs were updated to use the async-argument and +// wait-argument grammar consistently with the rest of the specification. +// +// Test: +// T1 – Explicit acc_async_sync: Uses async(acc_async_sync). + +#include "acc_testsuite.h" +#include +#include +#include + +#ifndef T1 +//T1:async-argument,special-value,compute-constructs,acc_async_sync,V:3.4- +int test1() { + int err = 0; + srand(SEED); + + real_t *a = (real_t*)malloc(n * sizeof(real_t)); + real_t *b = (real_t*)malloc(n * sizeof(real_t)); + real_t *c = (real_t*)malloc(n * sizeof(real_t)); + if (!a || !b || !c){ + return 1; + } + + for (int i = 0; i < n; ++i) { + a[i] = rand() / (real_t)(RAND_MAX / 10); + b[i] = rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + } + + #pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n]) + { + #pragma acc parallel loop async(acc_async_sync) + for (int i = 0; i < n; ++i){ + c[i] = a[i] + b[i]; + } + + #pragma acc wait + } + + for (int i = 0; i < n; ++i) { + if (fabs(c[i] - (a[i] + b[i])) > PRECISION){ + err++; + } + } + + free(a); + free(b); + free(c); + return err; +} +#endif + +int main() { + int failcode = 0; + int failed; + +#ifndef T1 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test1(); + } + if (failed){ + failcode |= (1 << 0); + } +#endif + + return failcode; +} diff --git a/Tests/acc_async_sync.cpp b/Tests/acc_async_sync.cpp new file mode 100644 index 0000000..160c231 --- /dev/null +++ b/Tests/acc_async_sync.cpp @@ -0,0 +1,72 @@ +// acc_async_sync.cpp +// +// Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +// Compute constructs now use the async-argument and wait-argument +// grammar consistently with the rest of the specification. +// +// Test: +// T1 – async(acc_async_sync). + +#include "acc_testsuite.h" +#include +#include +#include + +#ifndef T1 +//T1:async-argument,special-value,compute-constructs,acc_async_sync,V:3.4- +int test1() { + int err = 0; + std::srand(SEED); + + real_t *a = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *b = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *c = (real_t*)std::malloc(n * sizeof(real_t)); + if (!a || !b || !c){ + return 1; + } + + for (int i = 0; i < n; ++i) { + a[i] = std::rand() / (real_t)(RAND_MAX / 10); + b[i] = std::rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + } + + #pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n]) + { + #pragma acc parallel loop async(acc_async_sync) + for (int i = 0; i < n; ++i){ + c[i] = a[i] + b[i]; + } + + #pragma acc wait + } + + for (int i = 0; i < n; ++i) { + if (std::fabs(c[i] - (a[i] + b[i])) > PRECISION){ + err++; + } + } + + std::free(a); + std::free(b); + std::free(c); + return err; +} +#endif + +int main() { + int failcode = 0; + int failed; + +#ifndef T1 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test1(); + } + if (failed){ + failcode |= (1 << 0); + } +#endif + + return failcode; +} diff --git a/Tests/async_int_expr.F90 b/Tests/async_int_expr.F90 new file mode 100644 index 0000000..189dd55 --- /dev/null +++ b/Tests/async_int_expr.F90 @@ -0,0 +1,76 @@ +! async_int_expr.F90 +! +! Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +! Compute constructs now use the async-argument and wait-argument +! grammar consistently with the rest of the specification. +! +! Test: +! T1 – Basic async expression and wait. + +#ifndef T1 +!T1:async,wait,runtime,compute-constructs,baseline,V:3.4- + LOGICAL FUNCTION test1() + USE OPENACC + IMPLICIT NONE + INCLUDE "acc_testsuite.Fh" + INTEGER :: i, errors + INTEGER :: q, q0 + REAL(8), DIMENSION(LOOPCOUNT) :: a, b, c + + errors = 0 + + SEEDDIM(1) = 1 +# ifdef SEED + SEEDDIM(1) = SEED +# endif + CALL RANDOM_SEED(PUT=SEEDDIM) + CALL RANDOM_NUMBER(a) + CALL RANDOM_NUMBER(b) + c = 0.0D0 + + q = 4 + q0 = q - q + + !$acc data copyin(a(1:LOOPCOUNT), b(1:LOOPCOUNT)) copy(c(1:LOOPCOUNT)) + !$acc parallel loop present(a(1:LOOPCOUNT), b(1:LOOPCOUNT), c(1:LOOPCOUNT)) async(q0) + DO i = 1, LOOPCOUNT + c(i) = a(i) + b(i) + END DO + !$acc end parallel loop + + !$acc wait + !$acc end data + + DO i = 1, LOOPCOUNT + IF (ABS(c(i) - (a(i) + b(i))) .GT. PRECISION) errors = errors + 1 + END DO + + test1 = (errors .NE. 0) + END FUNCTION +#endif + + PROGRAM main + IMPLICIT NONE + INTEGER :: failcode, testrun + LOGICAL :: failed + INCLUDE "acc_testsuite.Fh" + +#ifndef T1 + LOGICAL :: test1 +#endif + + failcode = 0 + failed = .FALSE. + +#ifndef T1 + DO testrun = 1, NUM_TEST_CALLS + failed = failed .OR. test1() + END DO + IF (failed) THEN + failcode = failcode + 2 ** 0 + failed = .FALSE. + END IF +#endif + + CALL EXIT(failcode) + END PROGRAM diff --git a/Tests/async_int_expr.c b/Tests/async_int_expr.c new file mode 100644 index 0000000..dda8f72 --- /dev/null +++ b/Tests/async_int_expr.c @@ -0,0 +1,73 @@ +// async_int_expr.c +// +// Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +// Compute constructs were updated to use the async-argument and +// wait-argument grammar consistently with the rest of the specification. +// +// Test: +// T1 – Basic async usage: Runs a parallel loop using async(q0) where q0 +// is an integer expression, followed by wait with no argument. + +#include "acc_testsuite.h" +#include +#include +#include + +//T1:async,wait,runtime,compute-constructs,baseline,V:3.4- +#ifndef T1 +int test1() { + int err = 0; + srand(SEED); + + real_t *a = (real_t*)malloc(n * sizeof(real_t)); + real_t *b = (real_t*)malloc(n * sizeof(real_t)); + real_t *c = (real_t*)malloc(n * sizeof(real_t)); + if (!a || !b || !c){ + return 1; + } + + for (int i = 0; i < n; ++i) { + a[i] = rand() / (real_t)(RAND_MAX / 10); + b[i] = rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + } + + int q = 4; + int q0 = q - q; /* int-expr evaluating to 0 => nonnegative scalar integer expression */ + + #pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n]) + { + #pragma acc parallel loop present(a[0:n], b[0:n], c[0:n]) async(q0) + for (int i = 0; i < n; ++i) { + c[i] = a[i] + b[i]; + } + + #pragma acc wait + } + + for (int i = 0; i < n; ++i) { + if (fabs(c[i] - (a[i] + b[i])) > PRECISION){ + err++; + } + } + + free(a); + free(b); + free(c); + return err; +} +#endif + +int main() { + int failcode = 0; + int failed = 0; +#ifndef T1 + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test1(); + } + if (failed){ + failcode |= (1 << 0); + } +#endif + return failcode; +} diff --git a/Tests/async_int_expr.cpp b/Tests/async_int_expr.cpp new file mode 100644 index 0000000..fd38822 --- /dev/null +++ b/Tests/async_int_expr.cpp @@ -0,0 +1,75 @@ +// async_int_expr.cpp +// +// Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +// Compute constructs now use the async-argument and wait-argument +// grammar consistently with the rest of the specification. +// +// Test: +// T1 – Basic async expression and wait. + +#include "acc_testsuite.h" +#include +#include +#include + +#ifndef T1 +//T1:async,wait,runtime,compute-constructs,baseline,V:3.4- +int test1() { + int err = 0; + std::srand(SEED); + + real_t *a = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *b = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *c = (real_t*)std::malloc(n * sizeof(real_t)); + if (!a || !b || !c){ + return 1; + } + + for (int i = 0; i < n; ++i) { + a[i] = std::rand() / (real_t)(RAND_MAX / 10); + b[i] = std::rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + } + + int q = 4; + int q0 = q - q; + + #pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n]) + { + #pragma acc parallel loop present(a[0:n], b[0:n], c[0:n]) async(q0) + for (int i = 0; i < n; ++i){ + c[i] = a[i] + b[i]; + } + + #pragma acc wait + } + + for (int i = 0; i < n; ++i) { + if (std::fabs(c[i] - (a[i] + b[i])) > PRECISION){ + err++; + } + } + + std::free(a); + std::free(b); + std::free(c); + return err; +} +#endif + +int main() { + int failcode = 0; + int failed; + +#ifndef T1 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test1(); + } + if (failed){ + failcode |= (1 << 0); + } +#endif + + return failcode; +} diff --git a/Tests/wait_argument.F90 b/Tests/wait_argument.F90 new file mode 100644 index 0000000..c2d94d4 --- /dev/null +++ b/Tests/wait_argument.F90 @@ -0,0 +1,206 @@ +! acc_wait_argument.F90 +! +! Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +! Compute constructs now use the async-argument and wait-argument +! grammar consistently with the rest of the specification. +! +! Tests: +! T1 – wait(queues: ...) syntax. +! T2 – wait(devnum: ...) syntax. +! T3 – wait(devnum: ... : queues: ...) syntax. + + +#ifndef T1 +!T1:wait-argument,syntax,compute-constructs,queues-modifier,V:3.4- + LOGICAL FUNCTION test1() + USE OPENACC + IMPLICIT NONE + INCLUDE "acc_testsuite.Fh" + INTEGER :: i, errors + REAL(8), DIMENSION(LOOPCOUNT) :: a, b, c, d, e + + errors = 0 + + SEEDDIM(1) = 1 +# ifdef SEED + SEEDDIM(1) = SEED +# endif + CALL RANDOM_SEED(PUT=SEEDDIM) + CALL RANDOM_NUMBER(a) + CALL RANDOM_NUMBER(b) + c = 0.0D0 + d = 0.0D0 + e = 0.0D0 + + !$acc enter data copyin(a(1:LOOPCOUNT), b(1:LOOPCOUNT)) create(c(1:LOOPCOUNT), d(1:LOOPCOUNT), e(1:LOOPCOUNT)) + + !$acc parallel loop present(a(1:LOOPCOUNT), c(1:LOOPCOUNT)) async(1) + DO i = 1, LOOPCOUNT + c(i) = a(i) * 2.0D0 + END DO + !$acc end parallel loop + + !$acc parallel loop present(b(1:LOOPCOUNT), d(1:LOOPCOUNT)) async(2) + DO i = 1, LOOPCOUNT + d(i) = b(i) * 3.0D0 + END DO + !$acc end parallel loop + + ! Spec-valid OpenACC 3.4 wait-argument form under test: + !$acc parallel loop present(c(1:LOOPCOUNT), d(1:LOOPCOUNT), e(1:LOOPCOUNT)) async(3) wait(queues: 1, 2) + DO i = 1, LOOPCOUNT + e(i) = c(i) + d(i) + END DO + !$acc end parallel loop + + !$acc update self(e(1:LOOPCOUNT)) async(3) + DO WHILE (.NOT. acc_async_test(3)) + END DO + + DO i = 1, LOOPCOUNT + IF (ABS(e(i) - (a(i)*2.0D0 + b(i)*3.0D0)) .GT. PRECISION) errors = errors + 1 + END DO + + !$acc exit data delete(a(1:LOOPCOUNT), b(1:LOOPCOUNT), c(1:LOOPCOUNT), d(1:LOOPCOUNT), e(1:LOOPCOUNT)) + + test1 = (errors .NE. 0) + END FUNCTION +#endif + +#ifndef T2 +!T2:wait-argument,syntax,compute-constructs,devnum-prefix,V:3.4- + LOGICAL FUNCTION test2() + USE OPENACC + IMPLICIT NONE + INCLUDE "acc_testsuite.Fh" + INTEGER :: i, errors + REAL(8), DIMENSION(LOOPCOUNT) :: a, b + + errors = 0 + a = 1.0D0 + b = 0.0D0 + + !$acc data copy(a(1:LOOPCOUNT), b(1:LOOPCOUNT)) + !$acc parallel loop async(1) + DO i = 1, LOOPCOUNT + b(i) = a(i) * 2.0D0 + END DO + !$acc end parallel loop + + ! Spec-valid OpenACC 3.4 wait-argument form under test: + !$acc parallel loop async(2) wait(devnum: 0 : 1) + DO i = 1, LOOPCOUNT + a(i) = b(i) + 1.0D0 + END DO + !$acc end parallel loop + + !$acc update self(a(1:LOOPCOUNT)) async(2) + DO WHILE (.NOT. acc_async_test(2)) + END DO + !$acc end data + + DO i = 1, LOOPCOUNT + IF (ABS(a(i) - 3.0D0) .GT. PRECISION) errors = errors + 1 + END DO + + test2 = (errors .NE. 0) + END FUNCTION +#endif + +#ifndef T3 +!T3:wait-argument,syntax,compute-constructs,devnum-queues-prefix,V:3.4- + LOGICAL FUNCTION test3() + USE OPENACC + IMPLICIT NONE + INCLUDE "acc_testsuite.Fh" + INTEGER :: i, errors + REAL(8), DIMENSION(LOOPCOUNT) :: a, b, c + + errors = 0 + + SEEDDIM(1) = 1 +# ifdef SEED + SEEDDIM(1) = SEED +# endif + CALL RANDOM_SEED(PUT=SEEDDIM) + CALL RANDOM_NUMBER(a) + CALL RANDOM_NUMBER(b) + c = 0.0D0 + + !$acc data copyin(a(1:LOOPCOUNT), b(1:LOOPCOUNT)) copy(c(1:LOOPCOUNT)) + !$acc parallel loop async(1) + DO i = 1, LOOPCOUNT + c(i) = a(i) + b(i) + END DO + !$acc end parallel loop + + ! Spec-valid OpenACC 3.4 wait-argument combined form under test: + !$acc parallel loop async(2) wait(devnum: 0 : queues: 1) + DO i = 1, LOOPCOUNT + c(i) = c(i) * 2.0D0 + END DO + !$acc end parallel loop + + !$acc update self(c(1:LOOPCOUNT)) async(2) + DO WHILE (.NOT. acc_async_test(2)) + END DO + !$acc end data + + DO i = 1, LOOPCOUNT + IF (ABS(c(i) - 2.0D0*(a(i) + b(i))) .GT. PRECISION) errors = errors + 1 + END DO + + test3 = (errors .NE. 0) + END FUNCTION +#endif + + + PROGRAM main + IMPLICIT NONE + INTEGER :: failcode, testrun + LOGICAL :: failed + INCLUDE "acc_testsuite.Fh" + +#ifndef T1 + LOGICAL :: test1 +#endif +#ifndef T2 + LOGICAL :: test2 +#endif +#ifndef T3 + LOGICAL :: test3 +#endif + + failcode = 0 + failed = .FALSE. + +#ifndef T1 + DO testrun = 1, NUM_TEST_CALLS + failed = failed .OR. test1() + END DO + IF (failed) THEN + failcode = failcode + 2 ** 0 + failed = .FALSE. + END IF +#endif +#ifndef T2 + DO testrun = 1, NUM_TEST_CALLS + failed = failed .OR. test2() + END DO + IF (failed) THEN + failcode = failcode + 2 ** 1 + failed = .FALSE. + END IF +#endif +#ifndef T3 + DO testrun = 1, NUM_TEST_CALLS + failed = failed .OR. test3() + END DO + IF (failed) THEN + failcode = failcode + 2 ** 2 + failed = .FALSE. + END IF +#endif + + CALL EXIT(failcode) + END PROGRAM diff --git a/Tests/wait_argument.c b/Tests/wait_argument.c new file mode 100644 index 0000000..10a2505 --- /dev/null +++ b/Tests/wait_argument.c @@ -0,0 +1,199 @@ +// acc_wait_argument.c +// +// Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +// Compute constructs were updated to use the async-argument and +// wait-argument grammar consistently with the rest of the specification +// +// Tests: +// T1 – queues modifier: Uses wait(queues: 1,2) on a compute construct. +// T2 – devnum modifier: Uses wait(devnum: 0 : 1). +// T3 – devnum + queues: Uses wait(devnum: 0 : queues: 1). + +#include "acc_testsuite.h" +#include +#include +#include + +#ifndef T1 +int test1() { + int err = 0; + srand(SEED); + + real_t *a = (real_t*)malloc(n * sizeof(real_t)); + real_t *b = (real_t*)malloc(n * sizeof(real_t)); + real_t *c = (real_t*)malloc(n * sizeof(real_t)); + real_t *d = (real_t*)malloc(n * sizeof(real_t)); + real_t *e = (real_t*)malloc(n * sizeof(real_t)); + if (!a || !b || !c || !d || !e){ + return 1; + } + + for (int i = 0; i < n; ++i) { + a[i] = rand() / (real_t)(RAND_MAX / 10); + b[i] = rand() / (real_t)(RAND_MAX / 10); + c[i] = d[i] = e[i] = 0; + } + + #pragma acc enter data copyin(a[0:n], b[0:n]) create(c[0:n], d[0:n], e[0:n]) + + #pragma acc parallel loop present(a[0:n], c[0:n]) async(1) + for (int i = 0; i < n; ++i){ + c[i] = a[i] * (real_t)2; + } + + #pragma acc parallel loop present(b[0:n], d[0:n]) async(2) + for (int i = 0; i < n; ++i){ + d[i] = b[i] * (real_t)3; + } + + /* Spec-valid wait-argument form under test */ + #pragma acc parallel loop present(c[0:n], d[0:n], e[0:n]) async(3) wait(queues: 1, 2) + for (int i = 0; i < n; ++i){ + e[i] = c[i] + d[i]; + } + + #pragma acc update self(e[0:n]) async(3) + while (!acc_async_test(3)) { } + + for (int i = 0; i < n; ++i) { + if (fabs(e[i] - (a[i]*(real_t)2 + b[i]*(real_t)3)) > PRECISION){ + err++; + } + } + + #pragma acc exit data delete(a[0:n], b[0:n], c[0:n], d[0:n], e[0:n]) + + free(a); + free(b); + free(c); + free(d); + free(e); + return err; +} +#endif + +#ifndef T2 +int test2() { + int err = 0; + srand(SEED); + + real_t *a = (real_t*)malloc(n * sizeof(real_t)); + real_t *b = (real_t*)malloc(n * sizeof(real_t)); + if (!a || !b){ + return 1; + } + + for (int i = 0; i < n; ++i){ + a[i] = (real_t)1; b[i] = (real_t)0; + } + + #pragma acc data copy(a[0:n], b[0:n]) + { + #pragma acc parallel loop async(1) + for (int i = 0; i < n; ++i){ + b[i] = a[i] * (real_t)2; + } + + /* wait(devnum: 0 : 1) => wait on queue 1 for device 0 */ + #pragma acc parallel loop async(2) wait(devnum: 0 : 1) + for (int i = 0; i < n; ++i){ + a[i] = b[i] + (real_t)1; + } + + #pragma acc update self(a[0:n]) async(2) + while (!acc_async_test(2)) { } + } + + for (int i = 0; i < n; ++i) { + if (fabs(a[i] - (real_t)3) > PRECISION){ + err++; + } + } + + free(a); + free(b); + return err; +} +#endif + +#ifndef T3 +int test3() { + int err = 0; + srand(SEED); + + real_t *a = (real_t*)malloc(n * sizeof(real_t)); + real_t *b = (real_t*)malloc(n * sizeof(real_t)); + real_t *c = (real_t*)malloc(n * sizeof(real_t)); + if (!a || !b || !c){ + return 1; + } + + for (int i = 0; i < n; ++i) { + a[i] = rand() / (real_t)(RAND_MAX / 10); + b[i] = rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + } + + #pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n]) + { + #pragma acc parallel loop async(1) + for (int i = 0; i < n; ++i){ + c[i] = a[i] + b[i]; + } + + #pragma acc parallel loop async(2) wait(devnum: 0 : queues: 1) + for (int i = 0; i < n; ++i){ + c[i] = c[i] * (real_t)2; + } + + #pragma acc update self(c[0:n]) async(2) + while (!acc_async_test(2)) { } + } + + for (int i = 0; i < n; ++i) { + if (fabs(c[i] - (real_t)2*(a[i] + b[i])) > PRECISION){ + err++; + } + } + + free(a); + free(b); + free(c); + return err; +} +#endif + +int main() { + int failcode = 0; + int failed; + +#ifndef T1 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test1(); + } + if (failed){ + failcode |= (1 << 0); + } +#endif +#ifndef T2 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test2(); + } + if (failed){ + failcode |= (1 << 1); + } +#endif +#ifndef T3 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test3(); + } + if (failed){ + failcode |= (1 << 2); + } +#endif + + return failcode; +} diff --git a/Tests/wait_argument.cpp b/Tests/wait_argument.cpp new file mode 100644 index 0000000..3fdb20b --- /dev/null +++ b/Tests/wait_argument.cpp @@ -0,0 +1,197 @@ +// acc_wait_argument.cpp +// +// Feature under test (OpenACC 3.4, Sections 2.5 and 2.16, March 2026): +// Compute constructs now use the async-argument and wait-argument +// grammar consistently with the rest of the specification. +// +// Tests: +// T1 – wait(queues: ...) syntax. +// T2 – wait(devnum: ...) syntax. +// T3 – wait(devnum: ... : queues: ...) syntax. + +#include "acc_testsuite.h" +#include +#include +#include + +#ifndef T1 +int test1() { + int err = 0; + std::srand(SEED); + + real_t *a = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *b = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *c = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *d = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *e = (real_t*)std::malloc(n * sizeof(real_t)); + if (!a || !b || !c || !d || !e){ + return 1; + } + + for (int i = 0; i < n; ++i) { + a[i] = std::rand() / (real_t)(RAND_MAX / 10); + b[i] = std::rand() / (real_t)(RAND_MAX / 10); + c[i] = d[i] = e[i] = 0; + } + + #pragma acc enter data copyin(a[0:n], b[0:n]) create(c[0:n], d[0:n], e[0:n]) + + #pragma acc parallel loop present(a[0:n], c[0:n]) async(1) + for (int i = 0; i < n; ++i){ + c[i] = a[i] * (real_t)2; + } + + #pragma acc parallel loop present(b[0:n], d[0:n]) async(2) + for (int i = 0; i < n; ++i){ + d[i] = b[i] * (real_t)3; + } + + #pragma acc parallel loop present(c[0:n], d[0:n], e[0:n]) async(3) wait(queues: 1, 2) + for (int i = 0; i < n; ++i){ + e[i] = c[i] + d[i]; + } + + #pragma acc update self(e[0:n]) async(3) + while (!acc_async_test(3)) { } + + for (int i = 0; i < n; ++i) { + if (std::fabs(e[i] - (a[i]*(real_t)2 + b[i]*(real_t)3)) > PRECISION){ + err++; + } + } + + #pragma acc exit data delete(a[0:n], b[0:n], c[0:n], d[0:n], e[0:n]) + + std::free(a); + std::free(b); + std::free(c); + std::free(d); + std::free(e); + return err; +} +#endif + +#ifndef T2 +int test2() { + int err = 0; + std::srand(SEED); + + real_t *a = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *b = (real_t*)std::malloc(n * sizeof(real_t)); + if (!a || !b){ + return 1; + } + + for (int i = 0; i < n; ++i){ + a[i] = (real_t)1; + b[i] = (real_t)0; + } + + #pragma acc data copy(a[0:n], b[0:n]) + { + #pragma acc parallel loop async(1) + for (int i = 0; i < n; ++i){ + b[i] = a[i] * (real_t)2; + } + + #pragma acc parallel loop async(2) wait(devnum: 0 : 1) + for (int i = 0; i < n; ++i){ + a[i] = b[i] + (real_t)1; + } + + #pragma acc update self(a[0:n]) async(2) + while (!acc_async_test(2)) { } + } + + for (int i = 0; i < n; ++i) { + if (std::fabs(a[i] - (real_t)3) > PRECISION){ + err++; + } + } + + std::free(a); + std::free(b); + return err; +} +#endif + +#ifndef T3 +int test3() { + int err = 0; + std::srand(SEED); + + real_t *a = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *b = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *c = (real_t*)std::malloc(n * sizeof(real_t)); + if (!a || !b || !c){ + return 1; + } + + for (int i = 0; i < n; ++i) { + a[i] = std::rand() / (real_t)(RAND_MAX / 10); + b[i] = std::rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + } + + #pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n]) + { + #pragma acc parallel loop async(1) + for (int i = 0; i < n; ++i){ + c[i] = a[i] + b[i]; + } + + #pragma acc parallel loop async(2) wait(devnum: 0 : queues: 1) + for (int i = 0; i < n; ++i){ + c[i] = c[i] * (real_t)2; + } + + #pragma acc update self(c[0:n]) async(2) + while (!acc_async_test(2)) { } + } + + for (int i = 0; i < n; ++i) { + if (std::fabs(c[i] - (real_t)2*(a[i] + b[i])) > PRECISION){ + err++; + } + } + + std::free(a); + std::free(b); + std::free(c); + return err; +} +#endif + +int main() { + int failcode = 0; + int failed; + +#ifndef T1 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test1(); + } + if (failed){ + failcode |= (1 << 0); + } +#endif +#ifndef T2 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test2(); + } + if (failed){ + failcode |= (1 << 1); + } +#endif +#ifndef T3 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test3(); + } + if (failed){ + failcode |= (1 << 2); + } +#endif + return failcode; +} From b50533cab1502f3af33958bfbf25a7f5750e1004 Mon Sep 17 00:00:00 2001 From: Ryan Padrone Date: Sun, 26 Apr 2026 16:54:15 -0700 Subject: [PATCH 2/2] Add async-argument and wait-argument grammar tests on compute constructs --- Tests/acc_async_noval.F90 | 73 +++++++++++++++++++++++++++++++++++- Tests/acc_async_noval.c | 79 +++++++++++++++++++++++++++++++++++++++ Tests/acc_async_noval.cpp | 78 +++++++++++++++++++++++++++++++++++++- Tests/acc_async_sync.F90 | 57 ++++++++++++++++++++++++++++ Tests/acc_async_sync.c | 67 +++++++++++++++++++++++++++++++++ Tests/acc_async_sync.cpp | 65 ++++++++++++++++++++++++++++++++ 6 files changed, 417 insertions(+), 2 deletions(-) diff --git a/Tests/acc_async_noval.F90 b/Tests/acc_async_noval.F90 index 1ad058d..0113e09 100644 --- a/Tests/acc_async_noval.F90 +++ b/Tests/acc_async_noval.F90 @@ -6,6 +6,7 @@ ! ! Test: ! T1 – async(acc_async_noval). +! T2 – Behavior check: verifies async with no argument behaves like async(acc_async_noval). #ifndef T1 !T1:async-argument,special-value,compute-constructs,acc_async_noval,V:3.4- @@ -44,6 +45,65 @@ LOGICAL FUNCTION test1() test1 = (errors .NE. 0) END FUNCTION #endif +#ifndef T2 +!T2:async-argument,special-value,compute-constructs,acc_async_noval,equivalence,V:3.4- + LOGICAL FUNCTION test2() + USE OPENACC + IMPLICIT NONE + INCLUDE "acc_testsuite.Fh" + INTEGER :: i, errors + INTEGER :: q + REAL(8), DIMENSION(LOOPCOUNT) :: a, b, c, d + + errors = 0 + q = 7 + CALL acc_set_default_async(q) + + SEEDDIM(1) = 1 +# ifdef SEED + SEEDDIM(1) = SEED +# endif + CALL RANDOM_SEED(PUT=SEEDDIM) + CALL RANDOM_NUMBER(a) + CALL RANDOM_NUMBER(b) + c = 0.0D0 + d = 0.0D0 + + !$acc enter data copyin(a(1:LOOPCOUNT), b(1:LOOPCOUNT)) create(c(1:LOOPCOUNT), d(1:LOOPCOUNT)) + + !$acc parallel loop present(a(1:LOOPCOUNT), b(1:LOOPCOUNT), c(1:LOOPCOUNT)) async + DO i = 1, LOOPCOUNT + c(i) = a(i) - b(i) + END DO + !$acc end parallel loop + + !$acc update self(c(1:LOOPCOUNT)) async(q) + DO WHILE (.NOT. acc_async_test(q)) + END DO + + !$acc parallel loop present(a(1:LOOPCOUNT), b(1:LOOPCOUNT), d(1:LOOPCOUNT)) async(acc_async_noval) + DO i = 1, LOOPCOUNT + d(i) = a(i) - b(i) + END DO + !$acc end parallel loop + + !$acc update self(d(1:LOOPCOUNT)) async(q) + DO WHILE (.NOT. acc_async_test(q)) + END DO + + IF (acc_get_default_async() .NE. q) errors = errors + 1 + + DO i = 1, LOOPCOUNT + IF (ABS(c(i) - (a(i) - b(i))) .GT. PRECISION) errors = errors + 1 + IF (ABS(d(i) - (a(i) - b(i))) .GT. PRECISION) errors = errors + 1 + IF (ABS(c(i) - d(i)) .GT. PRECISION) errors = errors + 1 + END DO + + !$acc exit data delete(a(1:LOOPCOUNT), b(1:LOOPCOUNT), c(1:LOOPCOUNT), d(1:LOOPCOUNT)) + + test2 = (errors .NE. 0) + END FUNCTION +#endif PROGRAM main IMPLICIT NONE @@ -54,7 +114,9 @@ PROGRAM main #ifndef T1 LOGICAL :: test1 #endif - +#ifndef T2 + LOGICAL :: test2 +#endif failcode = 0 failed = .FALSE. @@ -67,6 +129,15 @@ PROGRAM main failed = .FALSE. END IF #endif +#ifndef T2 + DO testrun = 1, NUM_TEST_CALLS + failed = failed .OR. test2() + END DO + IF (failed) THEN + failcode = failcode + 2 ** 1 + failed = .FALSE. + END IF +#endif CALL EXIT(failcode) END PROGRAM diff --git a/Tests/acc_async_noval.c b/Tests/acc_async_noval.c index 538ca21..e3fd5fd 100644 --- a/Tests/acc_async_noval.c +++ b/Tests/acc_async_noval.c @@ -6,6 +6,7 @@ // // Test: // T1 – Explicit acc_async_noval: Uses async(acc_async_noval). +// T2 – Behavior check: verifies async with no argument behaves like async(acc_async_noval). #include "acc_testsuite.h" #include @@ -53,6 +54,75 @@ int test1() { return err; } #endif +#ifndef T2 +//T2:async-argument,special-value,compute-constructs,acc_async_noval,equivalence,V:3.4- +int test2() { + int err = 0; + srand(SEED); + + const int q = 7; + acc_set_default_async(q); + + real_t *a = (real_t*)malloc(n * sizeof(real_t)); + real_t *b = (real_t*)malloc(n * sizeof(real_t)); + real_t *c = (real_t*)malloc(n * sizeof(real_t)); + real_t *d = (real_t*)malloc(n * sizeof(real_t)); + if (!a || !b || !c || !d){ + return 1; + } + + for (int i = 0; i < n; ++i){ + a[i] = rand() / (real_t)(RAND_MAX / 10); + b[i] = rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + d[i] = 0; + } + + #pragma acc enter data copyin(a[0:n], b[0:n]) create(c[0:n], d[0:n]) + + /* async with no argument: should behave as acc_async_noval */ + #pragma acc parallel loop present(a[0:n], b[0:n], c[0:n]) async + for (int i = 0; i < n; ++i){ + c[i] = a[i] - b[i]; + } + + #pragma acc update self(c[0:n]) async(q) + while (!acc_async_test(q)){ } + + /* Explicit acc_async_noval form */ + #pragma acc parallel loop present(a[0:n], b[0:n], d[0:n]) async(acc_async_noval) + for (int i = 0; i < n; ++i){ + d[i] = a[i] - b[i]; + } + + #pragma acc update self(d[0:n]) async(q) + while (!acc_async_test(q)){ } + + if (acc_get_default_async() != q){ + err++; + } + + for (int i = 0; i < n; ++i){ + if (fabs(c[i] - (a[i] - b[i])) > PRECISION){ + err++; + } + if (fabs(d[i] - (a[i] - b[i])) > PRECISION){ + err++; + } + if (fabs(c[i] - d[i]) > PRECISION){ + err++; + } + } + + #pragma acc exit data delete(a[0:n], b[0:n], c[0:n], d[0:n]) + + free(a); + free(b); + free(c); + free(d); + return err; +} +#endif int main() { int failcode = 0; @@ -68,5 +138,14 @@ int main() { } #endif +#ifndef T2 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test2(); + } + if (failed){ + failcode |= (1 << 1); + } +#endif return failcode; } diff --git a/Tests/acc_async_noval.cpp b/Tests/acc_async_noval.cpp index 2ff5622..1fe8f4b 100644 --- a/Tests/acc_async_noval.cpp +++ b/Tests/acc_async_noval.cpp @@ -6,6 +6,7 @@ // // Test: // T1 – async(acc_async_noval). +// T2 – Behavior check: verifies async with no argument behaves like async(acc_async_noval). #include "acc_testsuite.h" #include @@ -53,6 +54,73 @@ int test1() { return err; } #endif +#ifndef T2 +//T2:async-argument,special-value,compute-constructs,acc_async_noval,equivalence,V:3.4- +int test2() { + int err = 0; + std::srand(SEED); + + const int q = 7; + acc_set_default_async(q); + + real_t *a = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *b = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *c = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *d = (real_t*)std::malloc(n * sizeof(real_t)); + if (!a || !b || !c || !d){ + return 1; + } + + for (int i = 0; i < n; ++i){ + a[i] = std::rand() / (real_t)(RAND_MAX / 10); + b[i] = std::rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + d[i] = 0; + } + + #pragma acc enter data copyin(a[0:n], b[0:n]) create(c[0:n], d[0:n]) + + #pragma acc parallel loop present(a[0:n], b[0:n], c[0:n]) async + for (int i = 0; i < n; ++i){ + c[i] = a[i] - b[i]; + } + + #pragma acc update self(c[0:n]) async(q) + while (!acc_async_test(q)){ } + + #pragma acc parallel loop present(a[0:n], b[0:n], d[0:n]) async(acc_async_noval) + for (int i = 0; i < n; ++i){ + d[i] = a[i] - b[i]; + } + + #pragma acc update self(d[0:n]) async(q) + while (!acc_async_test(q)){ } + + if (acc_get_default_async() != q){ + err++; + } + + for (int i = 0; i < n; ++i){ + if (std::fabs(c[i] - (a[i] - b[i])) > PRECISION){ + err++; + } + if (std::fabs(d[i] - (a[i] - b[i])) > PRECISION){ + err++; + } + if (std::fabs(c[i] - d[i]) > PRECISION){ + err++; + } + } + + #pragma acc exit data delete(a[0:n], b[0:n], c[0:n], d[0:n]) + + std::free(a); + std::free(b); + std::free(c); + std::free(d); + return err; +} +#endif int main() { int failcode = 0; @@ -67,6 +135,14 @@ int main() { failcode |= (1 << 0); } #endif - +#ifndef T2 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test2(); + } + if (failed){ + failcode |= (1 << 1); + } +#endif return failcode; } diff --git a/Tests/acc_async_sync.F90 b/Tests/acc_async_sync.F90 index 272f34e..006c43e 100644 --- a/Tests/acc_async_sync.F90 +++ b/Tests/acc_async_sync.F90 @@ -6,6 +6,7 @@ ! ! Test: ! T1 – async(acc_async_sync). +! T2 – Behavior check: verifies no async clause behaves like async(acc_async_sync). #ifndef T1 !T1:async-argument,special-value,compute-constructs,acc_async_sync,V:3.4- @@ -44,6 +45,50 @@ LOGICAL FUNCTION test1() test1 = (errors .NE. 0) END FUNCTION #endif +#ifndef T2 +!T2:async-argument,special-value,compute-constructs,acc_async_sync,equivalence,V:3.4- + LOGICAL FUNCTION test2() + USE OPENACC + IMPLICIT NONE + INCLUDE "acc_testsuite.Fh" + INTEGER :: i, errors + REAL(8), DIMENSION(LOOPCOUNT) :: a, b, c, d + + errors = 0 + + SEEDDIM(1) = 1 +# ifdef SEED + SEEDDIM(1) = SEED +# endif + CALL RANDOM_SEED(PUT=SEEDDIM) + CALL RANDOM_NUMBER(a) + CALL RANDOM_NUMBER(b) + c = 0.0D0 + d = 0.0D0 + + !$acc data copyin(a(1:LOOPCOUNT), b(1:LOOPCOUNT)) copy(c(1:LOOPCOUNT), d(1:LOOPCOUNT)) + !$acc parallel loop + DO i = 1, LOOPCOUNT + c(i) = a(i) + b(i) + END DO + !$acc end parallel loop + + !$acc parallel loop async(acc_async_sync) + DO i = 1, LOOPCOUNT + d(i) = a(i) + b(i) + END DO + !$acc end parallel loop + !$acc end data + + DO i = 1, LOOPCOUNT + IF (ABS(c(i) - (a(i) + b(i))) .GT. PRECISION) errors = errors + 1 + IF (ABS(d(i) - (a(i) + b(i))) .GT. PRECISION) errors = errors + 1 + IF (ABS(c(i) - d(i)) .GT. PRECISION) errors = errors + 1 + END DO + + test2 = (errors .NE. 0) + END FUNCTION +#endif PROGRAM main IMPLICIT NONE @@ -54,6 +99,9 @@ PROGRAM main #ifndef T1 LOGICAL :: test1 #endif +#ifndef T2 + LOGICAL :: test2 +#endif failcode = 0 failed = .FALSE. @@ -67,6 +115,15 @@ PROGRAM main failed = .FALSE. END IF #endif +#ifndef T2 + DO testrun = 1, NUM_TEST_CALLS + failed = failed .OR. test2() + END DO + IF (failed) THEN + failcode = failcode + 2 ** 1 + failed = .FALSE. + END IF +#endif CALL EXIT(failcode) END PROGRAM diff --git a/Tests/acc_async_sync.c b/Tests/acc_async_sync.c index 38fc09a..c7b0fc7 100644 --- a/Tests/acc_async_sync.c +++ b/Tests/acc_async_sync.c @@ -6,6 +6,7 @@ // // Test: // T1 – Explicit acc_async_sync: Uses async(acc_async_sync). +// T2 – Behavior check: verifies no async clause behaves like async(acc_async_sync). #include "acc_testsuite.h" #include @@ -54,6 +55,62 @@ int test1() { } #endif +#ifndef T2 +//T2:async-argument,special-value,compute-constructs,acc_async_sync,equivalence,V:3.4- +int test2() { + int err = 0; + srand(SEED); + + real_t *a = (real_t*)malloc(n * sizeof(real_t)); + real_t *b = (real_t*)malloc(n * sizeof(real_t)); + real_t *c = (real_t*)malloc(n * sizeof(real_t)); + real_t *d = (real_t*)malloc(n * sizeof(real_t)); + if (!a || !b || !c || !d){ + return 1; + } + + for (int i = 0; i < n; ++i){ + a[i] = rand() / (real_t)(RAND_MAX / 10); + b[i] = rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + d[i] = 0; + } + + #pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n], d[0:n]) + { + /* No async clause: should behave as if async argument is acc_async_sync */ + #pragma acc parallel loop + for (int i = 0; i < n; ++i){ + c[i] = a[i] + b[i]; + } + + /* Explicit acc_async_sync form */ + #pragma acc parallel loop async(acc_async_sync) + for (int i = 0; i < n; ++i){ + d[i] = a[i] + b[i]; + } + } + + for (int i = 0; i < n; ++i){ + if (fabs(c[i] - (a[i] + b[i])) > PRECISION){ + err++; + } + if (fabs(d[i] - (a[i] + b[i])) > PRECISION){ + err++; + } + if (fabs(c[i] - d[i]) > PRECISION){ + err++; + } + } + + free(a); + free(b); + free(c); + free(d); + return err; +} +#endif + int main() { int failcode = 0; int failed; @@ -68,5 +125,15 @@ int main() { } #endif +#ifndef T2 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test2(); + } + if (failed){ + failcode |= (1 << 1); + } +#endif + return failcode; } diff --git a/Tests/acc_async_sync.cpp b/Tests/acc_async_sync.cpp index 160c231..00c2ca0 100644 --- a/Tests/acc_async_sync.cpp +++ b/Tests/acc_async_sync.cpp @@ -6,6 +6,7 @@ // // Test: // T1 – async(acc_async_sync). +// T2 – Behavior check: verifies no async clause behaves like async(acc_async_sync). #include "acc_testsuite.h" #include @@ -54,6 +55,60 @@ int test1() { } #endif +#ifndef T2 +//T2:async-argument,special-value,compute-constructs,acc_async_sync,equivalence,V:3.4- +int test2() { + int err = 0; + std::srand(SEED); + + real_t *a = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *b = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *c = (real_t*)std::malloc(n * sizeof(real_t)); + real_t *d = (real_t*)std::malloc(n * sizeof(real_t)); + if (!a || !b || !c || !d){ + return 1; + } + + for (int i = 0; i < n; ++i){ + a[i] = std::rand() / (real_t)(RAND_MAX / 10); + b[i] = std::rand() / (real_t)(RAND_MAX / 10); + c[i] = 0; + d[i] = 0; + } + + #pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n], d[0:n]) + { + #pragma acc parallel loop + for (int i = 0; i < n; ++i){ + c[i] = a[i] + b[i]; + } + + #pragma acc parallel loop async(acc_async_sync) + for (int i = 0; i < n; ++i){ + d[i] = a[i] + b[i]; + } + } + + for (int i = 0; i < n; ++i){ + if (std::fabs(c[i] - (a[i] + b[i])) > PRECISION){ + err++; + } + if (std::fabs(d[i] - (a[i] + b[i])) > PRECISION){ + err++; + } + if (std::fabs(c[i] - d[i]) > PRECISION){ + err++; + } + } + + std::free(a); + std::free(b); + std::free(c); + std::free(d); + return err; +} +#endif + int main() { int failcode = 0; int failed; @@ -68,5 +123,15 @@ int main() { } #endif +#ifndef T2 + failed = 0; + for (int i = 0; i < NUM_TEST_CALLS; ++i){ + failed += test2(); + } + if (failed){ + failcode |= (1 << 1); + } +#endif + return failcode; }