1 // REQUIRES: x86-registered-target
2 // REQUIRES: amdgpu-registered-target
4 // RUN: %clang -### -x hip --gpu-max-threads-per-block=1024 %s 2>&1 | FileCheck %s
6 // Check that there are commands for both host- and device-side compilations.
8 // CHECK: "-cc1" {{.*}} "-fcuda-is-device"
9 // CHECK-SAME: "--gpu-max-threads-per-block=1024"
11 // RUN: %clang -### -nogpuinc -nogpulib -fgpu-allow-device-init \
12 // RUN: %s 2>&1 | FileCheck -check-prefix=DEVINIT %s
13 // DEVINIT: "-cc1" {{.*}}"-fgpu-allow-device-init"
14 // DEVINIT: "-cc1" {{.*}}"-fgpu-allow-device-init"
16 // Check -fgpu-default-stream=per-thread.
17 // RUN: %clang -### -nogpuinc -nogpulib -fgpu-default-stream=per-thread \
18 // RUN: %s -save-temps 2>&1 | FileCheck -check-prefix=PTH %s
19 // PTH: "-cc1"{{.*}} "-E" {{.*}}"-fgpu-default-stream=per-thread"
20 // PTH: "-cc1"{{.*}} "-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
21 // PTH: "-cc1"{{.*}} "-E" {{.*}}"-fgpu-default-stream=per-thread"
22 // PTH: "-cc1"{{.*}} "-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
24 // RUN: %clang -### -x hip --target=x86_64-pc-windows-msvc -fms-extensions \
25 // RUN: -mllvm -amdgpu-early-inline-all=true %s 2>&1 | \
26 // RUN: FileCheck -check-prefix=MLLVM %s
27 // MLLVM-NOT: "-mllvm"{{.*}}"-amdgpu-early-inline-all=true"{{.*}}"-mllvm"{{.*}}"-amdgpu-early-inline-all=true"
29 // RUN: %clang -### -Xarch_device -g -nogpulib --cuda-gpu-arch=gfx900 \
30 // RUN: -Xarch_device -fcf-protection=branch \
31 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEV %s
32 // DEV: "-cc1"{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
33 // DEV: "-cc1"{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch"
34 // DEV-NOT: clang{{.*}} {{.*}} "-debug-info-kind={{.*}}"
36 // RUN: %clang -### -Xarch_host -g -nogpulib --cuda-gpu-arch=gfx900 \
37 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=HOST %s
38 // HOST-NOT: "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
39 // HOST-NOT: "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
40 // HOST: clang{{.*}} "-debug-info-kind={{.*}}"
42 // RUN: %clang -### -nogpuinc -nogpulib -munsafe-fp-atomics \
43 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
44 // UNSAFE-FP-ATOMICS: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics"
46 // RUN: %clang -### -nogpuinc -nogpulib \
47 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEFAULT-UNSAFE-FP-ATOMICS %s
48 // DEFAULT-UNSAFE-FP-ATOMICS-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics"
50 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-exclude-wrong-side-overloads \
51 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=FIX-OVERLOAD %s
52 // FIX-OVERLOAD: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fgpu-exclude-wrong-side-overloads" "-fgpu-defer-diag"
53 // FIX-OVERLOAD: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fgpu-exclude-wrong-side-overloads" "-fgpu-defer-diag"
55 // Check -mconstructor-aliases is not passed to device compilation.
57 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
58 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=CTA %s
59 // CTA: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-mconstructor-aliases"
60 // CTA-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mconstructor-aliases"
62 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
63 // RUN: --offload-arch=gfx906 -fgpu-inline-threshold=1000 %s 2>&1 | FileCheck -check-prefix=THRESH %s
64 // THRESH: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mllvm" "-inline-threshold=1000"
65 // THRESH-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-inline-threshold=1000"
67 // Check -foffload-lto=thin translated correctly.
69 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
70 // RUN: --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
71 // RUN: | FileCheck -check-prefix=HIPTHINLTO %s
73 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
74 // RUN: --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
75 // RUN: | FileCheck -check-prefix=HIPTHINLTO %s
77 // Ensure we don't error about -fwhole-program-vtables for the non-device offload compile.
78 // HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
79 // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
80 // HIPTHINLTO: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" {{.*}} "-fwhole-program-vtables"
81 // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
82 // HIPTHINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all"
84 // Check that -flto=thin is handled correctly, particularly with -fwhole-program-vtables.
86 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
87 // RUN: --cuda-gpu-arch=gfx906 -flto=thin -fwhole-program-vtables %s 2>&1 \
88 // RUN: | FileCheck -check-prefix=THINLTO %s
90 // Ensure we don't error about -fwhole-program-vtables for the device offload compile. We should
91 // drop -fwhole-program-vtables for the device offload compile and pass it through for the
92 // non-device offload compile along with -flto=thin.
93 // THINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
94 // THINLTO-NOT: "-cc1"{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
95 // THINLTO: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto=thin" {{.*}} "-fwhole-program-vtables"
96 // THINLTO-NOT: "-cc1"{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
98 // Check -fopenmp is allowed with HIP but -fopenmp-targets= is not allowed.
100 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
101 // RUN: --offload-arch=gfx906 -fopenmp %s 2>&1 | FileCheck -check-prefix=OMP %s
102 // OMP-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp"
103 // OMP: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fopenmp"
105 // RUN: not %clang --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
106 // RUN: --offload-arch=gfx906 -fopenmp -fopenmp-targets=amdgcn %s 2>&1 \
107 // RUN: | FileCheck -check-prefix=OMPTGT %s
108 // OMPTGT: unsupported option '-fopenmp-targets=' for language mode 'HIP'
110 // Check -Xoffload-linker option is passed to lld.
112 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
113 // RUN: --cuda-gpu-arch=gfx906 -fgpu-rdc -Xoffload-linker --build-id=md5 %s 2>&1 \
114 // RUN: | FileCheck -check-prefix=OFL-LINK %s
115 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
116 // RUN: --cuda-gpu-arch=gfx906 -Xoffload-linker --build-id=md5 %s 2>&1 \
117 // RUN: | FileCheck -check-prefix=OFL-LINK %s
118 // OFL-LINK: lld{{.*}}"--build-id=md5"
120 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
121 // RUN: --offload-arch=gfx906 -fhip-kernel-arg-name %s 2>&1 \
122 // RUN: | FileCheck -check-prefix=KAN %s
123 // KAN: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fhip-kernel-arg-name"
125 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
126 // RUN: --offload-arch=gfx906 %s 2>&1 \
127 // RUN: | FileCheck -check-prefix=KANNEG %s
128 // KANNEG-NOT: "-fhip-kernel-arg-name"