1 /* { dg-additional-options "-fopenacc-dim=16:16" } */
6 #include <gomp-constants.h>
9 static int __attribute__ ((noinline
)) coord ()
13 if (acc_on_device (acc_device_not_host
))
17 g
= __builtin_goacc_parlevel_id (GOMP_DIM_GANG
);
18 w
= __builtin_goacc_parlevel_id (GOMP_DIM_WORKER
);
19 v
= __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR
);
20 res
= (1 << 24) | (g
<< 16) | (w
<< 8) | v
;
26 int check (const int *ary
, int size
, int gp
, int wp
, int vp
)
30 int *gangs
= (int *)__builtin_alloca (gp
* sizeof (int));
31 int *workers
= (int *)__builtin_alloca (wp
* sizeof (int));
32 int *vectors
= (int *)__builtin_alloca (vp
* sizeof (int));
35 memset (gangs
, 0, gp
* sizeof (int));
36 memset (workers
, 0, wp
* sizeof (int));
37 memset (vectors
, 0, vp
* sizeof (int));
39 for (ix
= 0; ix
< size
; ix
++)
41 int g
= (ary
[ix
] >> 16) & 0xff;
42 int w
= (ary
[ix
] >> 8) & 0xff;
43 int v
= (ary
[ix
] >> 0) & 0xff;
45 if (g
>= gp
|| w
>= wp
|| v
>= vp
)
47 printf ("unexpected cpu %#x used\n", ary
[ix
]);
56 offloaded
+= ary
[ix
] >> 24;
62 if (offloaded
!= size
)
64 printf ("offloaded %d times, expected %d\n", offloaded
, size
);
68 for (ix
= 0; ix
< gp
; ix
++)
69 if (gangs
[ix
] != gangs
[0])
71 printf ("gang %d not used %d times\n", ix
, gangs
[0]);
75 for (ix
= 0; ix
< wp
; ix
++)
76 if (workers
[ix
] != workers
[0])
78 printf ("worker %d not used %d times\n", ix
, workers
[0]);
82 for (ix
= 0; ix
< vp
; ix
++)
83 if (vectors
[ix
] != vectors
[0])
85 printf ("vector %d not used %d times\n", ix
, vectors
[0]);
94 int test_1 (int gp
, int wp
, int vp
)
99 #pragma acc parallel copyout (ary)
101 #pragma acc loop gang (static:1)
102 for (int ix
= 0; ix
< N
; ix
++)
106 exit
|= check (ary
, N
, gp
, 1, 1);
108 #pragma acc parallel copyout (ary)
110 #pragma acc loop worker
111 for (int ix
= 0; ix
< N
; ix
++)
115 exit
|= check (ary
, N
, 1, wp
, 1);
117 #pragma acc parallel copyout (ary)
119 #pragma acc loop vector
120 for (int ix
= 0; ix
< N
; ix
++)
124 exit
|= check (ary
, N
, 1, 1, vp
);
131 #ifdef ACC_DEVICE_TYPE_radeon
132 /* AMD GCN uses the autovectorizer for the vector dimension: the use
133 of a function call in vector-partitioned code in this test is not
134 currently supported. */
135 return test_1 (16, 16, 1);
137 return test_1 (16, 16, 32);