Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
195 changes: 195 additions & 0 deletions Tests/acc_nested_internal_procedure.F90
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
! acc_nested_internal_procedure.F90
!
! Feature under test (OpenACC 3.4; clarified procedure-boundary analysis):
! - Clarified analysis of implicit data attributes and parallelism across
! boundaries of procedures that can appear within other procedures,
! including Fortran internal procedures (nested subprograms).
!
! Notes
! - T1 (internal procedure called from parallel loop): Enters an OpenACC
! data region, then executes a parallel loop that calls a contained
! subroutine which references host-associated variables (outer-scope
! arrays/scalars). The contained subroutine is marked device-callable
! via '!$acc routine seq'. Exercises implicit data analysis across an
! internal-procedure boundary.
! - T2 (acc loop inside internal procedure): Enters an OpenACC 'parallel'
! region and calls a contained subroutine that contains an '!$acc loop'
! directive. Exercises loop/parallelism association and “orphaned loop”
! handling across an internal-procedure boundary.

#ifndef T1
!T1:runtime,data,implicit-data,procedure-boundary,fortran-internal-procedure,construct-independent,V:3.4-
LOGICAL FUNCTION test1()
USE OPENACC
IMPLICIT NONE
INCLUDE "acc_testsuite.Fh"

INTEGER :: i
INTEGER :: errors
REAL(8), DIMENSION(LOOPCOUNT) :: a, b, c
REAL(8), DIMENSION(LOOPCOUNT) :: c_ref
REAL(8) :: alpha

errors = 0

SEEDDIM(1) = 1
# ifdef SEED
SEEDDIM(1) = SEED
# endif
CALL RANDOM_SEED(PUT=SEEDDIM)
CALL RANDOM_NUMBER(a)
CALL RANDOM_NUMBER(b)
c = 0.0D0

alpha = 3.0D0

! Reference result on host
DO i = 1, LOOPCOUNT
c_ref(i) = a(i) + b(i) + alpha
END DO

!$acc data copyin(a(1:LOOPCOUNT), b(1:LOOPCOUNT)) copy(c(1:LOOPCOUNT))
! Call an INTERNAL procedure from inside a parallel loop.
! This stresses that data/attribute analysis remains correct across
! the internal-procedure boundary, including host-associated scalars.
!$acc parallel loop default(present) firstprivate(alpha)
DO i = 1, LOOPCOUNT
CALL do_point(i)
END DO
!$acc end parallel loop
!$acc end data

DO i = 1, LOOPCOUNT
IF (ABS(c(i) - c_ref(i)) .GT. PRECISION) THEN
errors = errors + 1
END IF
END DO

IF (errors .EQ. 0) THEN
test1 = .FALSE.
ELSE
test1 = .TRUE.
END IF

RETURN

CONTAINS

SUBROUTINE do_point(idx)
IMPLICIT NONE
INTEGER, INTENT(IN) :: idx
! Mark internal procedure as device-callable.
!$acc routine seq
c(idx) = a(idx) + b(idx) + alpha
END SUBROUTINE do_point

END FUNCTION
#endif

#ifndef T2
!T2:runtime,data,parallelism,procedure-boundary,fortran-internal-procedure,construct-independent,V:3.4-
LOGICAL FUNCTION test2()
USE OPENACC
IMPLICIT NONE
INCLUDE "acc_testsuite.Fh"

INTEGER :: i
INTEGER :: errors
REAL(8), DIMENSION(LOOPCOUNT) :: a, b, c
REAL(8), DIMENSION(LOOPCOUNT) :: c_ref
REAL(8) :: beta

errors = 0

SEEDDIM(1) = 1
# ifdef SEED
SEEDDIM(1) = SEED
# endif
CALL RANDOM_SEED(PUT=SEEDDIM)
CALL RANDOM_NUMBER(a)
CALL RANDOM_NUMBER(b)
c = 0.0D0

beta = 1.5D0

! Reference result on host
DO i = 1, LOOPCOUNT
c_ref(i) = beta * a(i) - b(i)
END DO

!$acc data copyin(a(1:LOOPCOUNT), b(1:LOOPCOUNT)) copy(c(1:LOOPCOUNT))
! Enter a parallel region, then run an internal procedure that
! contains the ACC LOOP directive (parallelism/loop context crossing
! an internal-procedure boundary).
!$acc parallel default(present) firstprivate(beta)
CALL do_loop()
!$acc end parallel
!$acc end data

DO i = 1, LOOPCOUNT
IF (ABS(c(i) - c_ref(i)) .GT. PRECISION) THEN
errors = errors + 1
END IF
END DO

IF (errors .EQ. 0) THEN
test2 = .FALSE.
ELSE
test2 = .TRUE.
END IF

RETURN

CONTAINS

SUBROUTINE do_loop()
IMPLICIT NONE
INTEGER :: j
!$acc routine seq
!$acc loop
DO j = 1, LOOPCOUNT
c(j) = beta * a(j) - b(j)
END DO
END SUBROUTINE do_loop

END FUNCTION
#endif

PROGRAM main
IMPLICIT NONE
INTEGER :: failcode, testrun
LOGICAL :: failed
INCLUDE "acc_testsuite.Fh"

#ifndef T1
LOGICAL :: test1
#endif
#ifndef T2
LOGICAL :: test2
#endif

failcode = 0
failed = .FALSE.

#ifndef T1
DO testrun = 1, NUM_TEST_CALLS
failed = failed .OR. test1()
END DO
IF (failed) THEN
failcode = failcode + 2 ** 0
failed = .FALSE.
END IF
#endif

#ifndef T2
DO testrun = 1, NUM_TEST_CALLS
failed = failed .OR. test2()
END DO
IF (failed) THEN
failcode = failcode + 2 ** 1
failed = .FALSE.
END IF
#endif

CALL EXIT(failcode)
END PROGRAM
201 changes: 201 additions & 0 deletions Tests/acc_nested_internal_procedure.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
// acc_nested_procedure_boundaries.cpp
//
// Feature under test (OpenACC 3.4; clarified procedure-boundary analysis):
// - Clarified analysis of implicit data attributes and parallelism across
// boundaries of procedures that can appear within other procedures
// (e.g., C++ lambdas, C++ class member functions).
//
// Notes:
// - T1 (C++ lambda in parallel loop): Calls a lambda from within an
// OpenACC 'parallel loop' region while relying on the surrounding data
// region + default(present) and a captured scalar. Exercises implicit
// data attribute analysis for variables referenced through a lambda.
// - T2 (C++ member function in parallel loop): Calls a class member
// function from within an OpenACC 'parallel loop' region using a
// firstprivate object containing pointer members and a scalar member.
// Exercises procedure-boundary analysis for class/struct objects and
// pointer members used on the device.
// - T3 (acc loop inside a lambda): Places an OpenACC 'loop' directive
// inside a lambda body and invokes it within an OpenACC 'parallel'
// region. Exercises loop/parallelism assoc

#include "acc_testsuite.h"

#include <cstdlib>
#include <cmath>

#ifndef T1
//T1:runtime,data,implicit-data,procedure-boundary,cxx-lambda,construct-independent,V:3.4-
int test1() {
int err = 0;
srand(SEED);

real_t *a = (real_t*)malloc(n * sizeof(real_t));
real_t *b = (real_t*)malloc(n * sizeof(real_t));
real_t *c = (real_t*)malloc(n * sizeof(real_t));
if (!a || !b || !c) return 1;

for (int i = 0; i < n; ++i) {
a[i] = rand() / (real_t)(RAND_MAX / 10);
b[i] = rand() / (real_t)(RAND_MAX / 10);
c[i] = 0;
}

// Capture-by-value scalar should behave like firstprivate in device code.
const real_t scale = (real_t)3.0;

#pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n])
{
// Lambda defined inside the region; called from within parallel loop.
auto op = [=](int i) {
// Uses a, b, c from the surrounding scope; compiler must analyze
// those references correctly across the lambda boundary.
c[i] = a[i] + b[i] + scale;
};

#pragma acc parallel loop default(present)
for (int i = 0; i < n; ++i) {
op(i);
}
}

for (int i = 0; i < n; ++i) {
real_t expect = a[i] + b[i] + scale;
if (fabs(c[i] - expect) > PRECISION) err++;
}

free(a);
free(b);
free(c);
return err;
}
#endif

#ifndef T2
//T2:runtime,data,implicit-data,procedure-boundary,cxx-member-function,construct-independent,V:3.4-
int test2() {
int err = 0;
srand(SEED);

real_t *a = (real_t*)malloc(n * sizeof(real_t));
real_t *b = (real_t*)malloc(n * sizeof(real_t));
real_t *c = (real_t*)malloc(n * sizeof(real_t));
if (!a || !b || !c) return 1;

for (int i = 0; i < n; ++i) {
a[i] = rand() / (real_t)(RAND_MAX / 10);
b[i] = rand() / (real_t)(RAND_MAX / 10);
c[i] = 0;
}

struct Worker {
real_t *a;
real_t *b;
real_t *c;
real_t bias; // scalar member should transfer with firstprivate-like behavior

// Member function called on device; references through member pointers.
void work(int i) const {
c[i] = a[i] * bias + b[i];
}
};

Worker w;
w.a = a; w.b = b; w.c = c;
w.bias = (real_t)2.0;

#pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n])
{
// w is used inside device region; it should be handled as firstprivate
// and its pointer members should refer to present device data.
#pragma acc parallel loop default(present) firstprivate(w)
for (int i = 0; i < n; ++i) {
w.work(i);
}
}

for (int i = 0; i < n; ++i) {
real_t expect = a[i] * w.bias + b[i];
if (fabs(c[i] - expect) > PRECISION) err++;
}

free(a);
free(b);
free(c);
return err;
}
#endif

#ifndef T3
//T3:runtime,data,parallelism,procedure-boundary,cxx-lambda-loop,construct-independent,V:3.4-
int test3() {
int err = 0;
srand(SEED);

real_t *a = (real_t*)malloc(n * sizeof(real_t));
real_t *b = (real_t*)malloc(n * sizeof(real_t));
real_t *c = (real_t*)malloc(n * sizeof(real_t));
if (!a || !b || !c) return 1;

for (int i = 0; i < n; ++i) {
a[i] = rand() / (real_t)(RAND_MAX / 10);
b[i] = rand() / (real_t)(RAND_MAX / 10);
c[i] = 0;
}

const real_t alpha = (real_t)1.5;

#pragma acc data copyin(a[0:n], b[0:n]) copy(c[0:n])
{
// We place the OpenACC loop directive inside a lambda body.
// This stresses that loop/parallel analysis remains valid across
// the lambda boundary (i.e., directives in nested procedures).
auto do_loop = [=]() {
#pragma acc loop
for (int i = 0; i < n; ++i) {
c[i] = alpha * a[i] - b[i];
}
};

#pragma acc parallel default(present)
{
do_loop();
}
}

for (int i = 0; i < n; ++i) {
real_t expect = alpha * a[i] - b[i];
if (fabs(c[i] - expect) > PRECISION) err++;
}

free(a);
free(b);
free(c);
return err;
}
#endif

int main() {
int failcode = 0;
int failed;

#ifndef T1
failed = 0;
for (int i = 0; i < NUM_TEST_CALLS; ++i) failed += test1();
if (failed != 0) failcode += (1 << 0);
#endif

#ifndef T2
failed = 0;
for (int i = 0; i < NUM_TEST_CALLS; ++i) failed += test2();
if (failed != 0) failcode += (1 << 1);
#endif

#ifndef T3
failed = 0;
for (int i = 0; i < NUM_TEST_CALLS; ++i) failed += test3();
if (failed != 0) failcode += (1 << 2);
#endif

return failcode;
}