1 ! Offloading test checking the use of the depend clause on
3 ! REQUIRES: flang, amdgcn-amd-amdhsa
4 ! UNSUPPORTED: nvptx64-nvidia-cuda
5 ! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
6 ! UNSUPPORTED: aarch64-unknown-linux-gnu
7 ! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
8 ! UNSUPPORTED: x86_64-pc-linux-gnu
9 ! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
11 ! RUN: %libomptarget-compile-fortran-run-and-check-generic
16 FUNCTION omp_get_device_num() BIND(C
)
17 USE, INTRINSIC :: iso_c_binding
, ONLY
: C_INT
18 integer :: omp_get_device_num
19 END FUNCTION omp_get_device_num
23 print*, "======= FORTRAN Test passed! ======="
24 print*, "foo(5) returned ", a
, ", expected 6\n"
29 integer, intent(in
) :: N
30 integer, intent(out
) :: r
31 integer :: z
, i
, accumulator
35 !$omp parallel num_threads(3)
37 ! A single thread will then create two tasks - one is the 'producer' and
38 ! potentially slower task that updates 'z' to 'N'. The second is an
39 ! offloaded target task that increments 'z'. If the depend clauses work
40 ! properly, the target task should wait for the 'producer' task to
41 ! complete before incrementing 'z'. We use 'omp single' here because the
42 ! depend clause establishes dependencies between sibling tasks only.
43 ! This is the easiest way of creating two sibling tasks.
45 !$omp task depend(out: z) shared(z)
47 ! dumb loop nest to slow down the update of 'z'.
48 ! Adding a function call slows down the producer to the point
49 ! that removing the depend clause from the target construct below
50 ! frequently results in the wrong answer.
51 accumulator
= accumulator
+ omp_get_device_num()
56 ! z is 5 now. Increment z to 6.
57 !$omp target map(tofrom: z) depend(in:z)
62 ! Use 'accumulator' so it is not optimized away by the compiler.
67 !CHECK: ======= FORTRAN Test passed! =======
68 !CHECK: foo(5) returned 6 , expected 6