1 /* Test 'acc enter/exit data' regions with 'acc update'. */
8 main (int argc
, char **argv
)
10 int N
= 128; //1024 * 1024;
11 float *a
, *b
, *c
, *d
, *e
;
15 nbytes
= N
* sizeof (float);
17 a
= (float *) malloc (nbytes
);
18 b
= (float *) malloc (nbytes
);
19 c
= (float *) malloc (nbytes
);
20 d
= (float *) malloc (nbytes
);
21 e
= (float *) malloc (nbytes
);
23 for (i
= 0; i
< N
; i
++)
29 #pragma acc enter data copyin (a[0:N]) copyin (b[0:N]) copyin (N) async
30 #pragma acc parallel present (a[0:N], b[0:N]) async wait
32 for (i
= 0; i
< N
; i
++)
35 #pragma acc update host (a[0:N], b[0:N]) async wait
38 for (i
= 0; i
< N
; i
++)
47 for (i
= 0; i
< N
; i
++)
53 #pragma acc update device (a[0:N], b[0:N]) async (1)
54 #pragma acc parallel present (a[0:N], b[0:N]) async (1)
56 for (i
= 0; i
< N
; i
++)
59 #pragma acc update host (a[0:N], b[0:N]) async (1) wait (1)
62 for (i
= 0; i
< N
; i
++)
71 for (i
= 0; i
< N
; i
++)
79 #pragma acc update device (a[0:N]) async (1)
80 #pragma acc update device (b[0:N]) async (2)
81 #pragma acc enter data copyin (c[0:N], d[0:N]) async (3)
83 #pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1,2)
85 for (i
= 0; i
< N
; i
++)
86 b
[i
] = (a
[i
] * a
[i
] * a
[i
]) / a
[i
];
88 #pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1,3)
90 for (i
= 0; i
< N
; i
++)
91 c
[i
] = (a
[i
] + a
[i
] + a
[i
] + a
[i
]) / a
[i
];
93 #pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1,3)
95 for (i
= 0; i
< N
; i
++)
96 d
[i
] = ((a
[i
] * a
[i
] + a
[i
]) / a
[i
]) - a
[i
];
98 #pragma acc update host (a[0:N], b[0:N], c[0:N], d[0:N]) async (1) wait (1,2,3)
101 for (i
= 0; i
< N
; i
++)
116 for (i
= 0; i
< N
; i
++)
125 #pragma acc update device (a[0:N], b[0:N], c[0:N], d[0:N]) async (1)
126 #pragma acc enter data copyin (e[0:N]) async (5)
128 #pragma acc parallel present (a[0:N], b[0:N]) async (1) wait (1)
129 for (int ii
= 0; ii
< N
; ii
++)
130 b
[ii
] = (a
[ii
] * a
[ii
] * a
[ii
]) / a
[ii
];
132 #pragma acc parallel present (a[0:N], c[0:N]) async (2) wait (1)
133 for (int ii
= 0; ii
< N
; ii
++)
134 c
[ii
] = (a
[ii
] + a
[ii
] + a
[ii
] + a
[ii
]) / a
[ii
];
136 #pragma acc parallel present (a[0:N], d[0:N]) async (3) wait (1)
137 for (int ii
= 0; ii
< N
; ii
++)
138 d
[ii
] = ((a
[ii
] * a
[ii
] + a
[ii
]) / a
[ii
]) - a
[ii
];
140 #pragma acc parallel present (a[0:N], b[0:N], c[0:N], d[0:N], e[0:N]) \
141 wait (1, 2, 3, 5) async (4)
142 for (int ii
= 0; ii
< N
; ii
++)
143 e
[ii
] = a
[ii
] + b
[ii
] + c
[ii
] + d
[ii
];
145 #pragma acc exit data copyout (a[0:N]) copyout (b[0:N]) copyout (c[0:N]) \
146 copyout (d[0:N]) copyout (e[0:N]) wait (1, 2, 3, 4) async (1)
147 #pragma acc exit data delete (N) wait(1) async(2)
150 for (i
= 0; i
< N
; i
++)