1 // REQUIRES: nvptx-registered-target
3 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -target-cpu sm_30 %s -o - | FileCheck %s --check-prefix=NO_SYNC
4 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -target-cpu sm_30 -target-feature +ptx70 -DSYNC -DCUDA_VERSION=9000 %s -o - | FileCheck %s --check-prefix=SYNC
6 #include "Inputs/cuda.h"
8 __device__ void *memcpy(void *dest, const void *src, size_t n);
11 #include <__clang_cuda_intrinsics.h>
13 __device__ void use(unsigned long long, long long);
15 // Test function, 4 shfl calls.
16 // NO_SYNC: define{{.*}} @_Z14test_long_longv
17 // NO_SYNC: call noundef i64 @_Z6__shflyii(
18 // NO_SYNC: call noundef i64 @_Z6__shflxii(
20 // SYNC: define{{.*}} @_Z14test_long_longv
21 // SYNC: call noundef i64 @_Z11__shfl_syncjyii(
22 // SYNC: call noundef i64 @_Z11__shfl_syncjxii(
24 // unsigned long long -> long long
25 // NO_SYNC: define{{.*}} @_Z6__shflyii
26 // NO_SYNC: call noundef i64 @_Z6__shflxii(
28 // long long -> int + int
29 // NO_SYNC: define{{.*}} @_Z6__shflxii
30 // NO_SYNC: call noundef i32 @_Z6__shfliii(
31 // NO_SYNC: call noundef i32 @_Z6__shfliii(
33 // NO_SYNC: define{{.*}} @_Z6__shfliii
34 // NO_SYNC: call i32 @llvm.nvvm.shfl.idx.i32
36 // unsigned long long -> long long
37 // SYNC: _Z11__shfl_syncjyii
38 // SYNC: call noundef i64 @_Z11__shfl_syncjxii(
40 // long long -> int + int
41 // SYNC: define{{.*}} @_Z11__shfl_syncjxii
42 // SYNC: call noundef i32 @_Z11__shfl_syncjiii(
43 // SYNC: call noundef i32 @_Z11__shfl_syncjiii(
45 // SYNC: define{{.*}} @_Z11__shfl_syncjiii
46 // SYNC: call i32 @llvm.nvvm.shfl.sync.idx.i32
48 __device__ void test_long_long() {
49 unsigned long long ull = 13;
52 ull = __shfl(ull, 7, 32);
53 ll = __shfl(ll, 7, 32);
56 ull = __shfl_sync(0x11, ull, 7, 32);
57 ll = __shfl_sync(0x11, ll, 7, 32);