1 ! Offloading test checking the use of the depend clause on the target construct
2 ! REQUIRES: flang, amdgcn-amd-amdhsa
3 ! UNSUPPORTED: nvptx64-nvidia-cuda
4 ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
5 ! UNSUPPORTED: aarch64-unknown-linux-gnu
6 ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
7 ! UNSUPPORTED: x86_64-unknown-linux-gnu
8 ! UNSUPPORTED: x86_64-unknown-linux-gnu-LTO
10 ! RUN: %libomptarget-compile-fortran-run-and-check-generic
15 FUNCTION omp_get_device_num() BIND(C
)
16 USE, INTRINSIC :: iso_c_binding
, ONLY
: C_INT
17 integer :: omp_get_device_num
18 END FUNCTION omp_get_device_num
22 print*, "======= FORTRAN Test passed! ======="
23 print*, "foo(5) returned ", a
, ", expected 6\n"
28 integer, intent(in
) :: N
29 integer, intent(out
) :: r
30 integer :: z
, i
, accumulator
34 !$omp parallel num_threads(3)
36 ! A single thread will then create two tasks - one is the 'producer' and
37 ! potentially slower task that updates 'z' to 'N'. The second is an
38 ! offloaded target task that increments 'z'. If the depend clauses work
39 ! properly, the target task should wait for the 'producer' task to
40 ! complete before incrementing 'z'. We use 'omp single' here because the
41 ! depend clause establishes dependencies between sibling tasks only.
42 ! This is the easiest way of creating two sibling tasks.
44 !$omp task depend(out: z) shared(z)
46 ! dumb loop nest to slow down the update of 'z'.
47 ! Adding a function call slows down the producer to the point
48 ! that removing the depend clause from the target construct below
49 ! frequently results in the wrong answer.
50 accumulator
= accumulator
+ omp_get_device_num()
55 ! z is 5 now. Increment z to 6.
56 !$omp target map(tofrom: z) depend(in:z)
61 ! Use 'accumulator' so it is not optimized away by the compiler.
66 !CHECK: ======= FORTRAN Test passed! =======
67 !CHECK: foo(5) returned 6 , expected 6