1 // REQUIRES: x86-registered-target
2 // REQUIRES: amdgpu-registered-target
4 // RUN: %clang -### -x hip -nogpulib -nogpuinc --gpu-max-threads-per-block=1024 %s 2>&1 | FileCheck %s
6 // Check that there are commands for both host- and device-side compilations.
8 // CHECK: "-cc1" {{.*}} "-fcuda-is-device"
9 // CHECK-SAME: "--gpu-max-threads-per-block=1024"
11 // RUN: %clang -### -nogpuinc -nogpulib -fgpu-allow-device-init \
12 // RUN: %s 2>&1 | FileCheck -check-prefix=DEVINIT %s
13 // DEVINIT: "-cc1" {{.*}}"-fgpu-allow-device-init"
14 // DEVINIT: "-cc1" {{.*}}"-fgpu-allow-device-init"
16 // Check -fgpu-default-stream=per-thread.
17 // RUN: %clang -### -nogpuinc -nogpulib -fgpu-default-stream=per-thread \
18 // RUN: %s -save-temps 2>&1 | FileCheck -check-prefix=PTH %s
19 // PTH: "-cc1"{{.*}} "-E" {{.*}}"-fgpu-default-stream=per-thread"
20 // PTH: "-cc1"{{.*}} "-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
21 // PTH: "-cc1"{{.*}} "-E" {{.*}}"-fgpu-default-stream=per-thread"
22 // PTH: "-cc1"{{.*}} "-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
24 // Check -mprintf-kind=hostcall
25 // RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-unknown-linux-gnu -mprintf-kind=hostcall %s -save-temps 2>&1 | FileCheck -check-prefix=HOSTC %s
26 // HOSTC: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-mprintf-kind=hostcall" "-Werror=format-invalid-specifier"{{.*}}"-E" {{.*}}
27 // HOSTC: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=hostcall" "-Werror=format-invalid-specifier" {{.*}}"-x" "hip-cpp-output"
28 // HOSTC: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=hostcall" "-Werror=format-invalid-specifier" {{.*}}"-x" "ir"
29 // HOSTC: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}} "-E" {{.*}}
30 // HOSTC: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"
32 // Check -mprintf-kind=buffered
33 // RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-unknown-linux-gnu -mprintf-kind=buffered %s -save-temps 2>&1 | FileCheck -check-prefix=BUFF %s
34 // BUFF: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-mprintf-kind=buffered" "-Werror=format-invalid-specifier"{{.*}}"-E" {{.*}}
35 // BUFF: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=buffered" "-Werror=format-invalid-specifier" {{.*}}"-x" "hip-cpp-output"
36 // BUFF: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=buffered" "-Werror=format-invalid-specifier" {{.*}}"-x" "ir"
37 // BUFF: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}} "-E" {{.*}}
38 // BUFF: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"
40 // RUN: %clang -### -x hip -nogpulib -nogpuinc --target=x86_64-pc-windows-msvc -fms-extensions \
41 // RUN: -mllvm -amdgpu-early-inline-all=true %s 2>&1 | \
42 // RUN: FileCheck -check-prefix=MLLVM %s
43 // MLLVM-NOT: "-mllvm"{{.*}}"-amdgpu-early-inline-all=true"{{.*}}"-mllvm"{{.*}}"-amdgpu-early-inline-all=true"
45 // RUN: %clang -### -Xarch_device -g -nogpulib -nogpuinc --cuda-gpu-arch=gfx900 \
46 // RUN: -Xarch_device -fcf-protection=branch -Xarch_device -mllvm=--inline-threshold=100 \
47 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEV %s
48 // DEV: "-cc1"{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch" {{.*}}"-mllvm" "--inline-threshold=100"
49 // DEV: "-cc1"{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch" {{.*}}"-mllvm" "--inline-threshold=100"
50 // DEV-NOT: clang{{.*}} {{.*}} "-debug-info-kind={{.*}}"
52 // RUN: %clang -### -Xarch_host -g -nogpulib -nogpuinc --cuda-gpu-arch=gfx900 \
53 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=HOST %s
54 // HOST-NOT: "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
55 // HOST-NOT: "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
56 // HOST: clang{{.*}} "-debug-info-kind={{.*}}"
58 // RUN: %clang -### -nogpuinc -nogpulib -munsafe-fp-atomics \
59 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
60 // UNSAFE-FP-ATOMICS: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics"
62 // RUN: %clang -### -nogpuinc -nogpulib \
63 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEFAULT-UNSAFE-FP-ATOMICS %s
64 // DEFAULT-UNSAFE-FP-ATOMICS-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics"
66 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-exclude-wrong-side-overloads \
67 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=FIX-OVERLOAD %s
68 // FIX-OVERLOAD: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fgpu-exclude-wrong-side-overloads" "-fgpu-defer-diag"
69 // FIX-OVERLOAD: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fgpu-exclude-wrong-side-overloads" "-fgpu-defer-diag"
71 // Check -mconstructor-aliases is not passed to device compilation.
73 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
74 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=CTA %s
75 // CTA: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-mconstructor-aliases"
76 // CTA-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mconstructor-aliases"
78 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
79 // RUN: --offload-arch=gfx906 -fgpu-inline-threshold=1000 %s 2>&1 | FileCheck -check-prefix=THRESH %s
80 // THRESH: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mllvm" "-inline-threshold=1000"
81 // THRESH-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-inline-threshold=1000"
83 // Check -foffload-lto=thin translated correctly.
85 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
86 // RUN: --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
87 // RUN: | FileCheck -check-prefix=HIPTHINLTO %s
89 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
90 // RUN: --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
91 // RUN: | FileCheck -check-prefix=HIPTHINLTO %s
93 // Ensure we don't error about -fwhole-program-vtables for the non-device offload compile.
94 // HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
95 // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
96 // HIPTHINLTO: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" {{.*}} "-fwhole-program-vtables"
97 // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
98 // HIPTHINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all"
100 // Check that -flto=thin is handled correctly, particularly with -fwhole-program-vtables.
102 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
103 // RUN: --cuda-gpu-arch=gfx906 -flto=thin -fwhole-program-vtables %s 2>&1 \
104 // RUN: | FileCheck -check-prefix=THINLTO %s
106 // Ensure we don't error about -fwhole-program-vtables for the device offload compile. We should
107 // drop -fwhole-program-vtables for the device offload compile and pass it through for the
108 // non-device offload compile along with -flto=thin.
109 // THINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
110 // THINLTO-NOT: "-cc1"{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
111 // THINLTO: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto=thin" {{.*}} "-fwhole-program-vtables"
112 // THINLTO-NOT: "-cc1"{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
114 // Check -fopenmp=libomp is allowed with HIP but -fopenmp-targets= is not allowed.
116 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
117 // RUN: --offload-arch=gfx906 -fopenmp=libomp %s 2>&1 | FileCheck -check-prefix=OMP %s
118 // OMP-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp"
119 // OMP: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fopenmp"
121 // RUN: not %clang --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
122 // RUN: --offload-arch=gfx906 -fopenmp=libomp -fopenmp-targets=amdgcn %s 2>&1 \
123 // RUN: | FileCheck -check-prefix=OMPTGT %s
124 // OMPTGT: unsupported option '-fopenmp-targets=' for language mode 'HIP'
126 // Check -Xoffload-linker option is passed to lld.
128 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
129 // RUN: --cuda-gpu-arch=gfx906 -fgpu-rdc -Xoffload-linker --build-id=md5 %s 2>&1 \
130 // RUN: | FileCheck -check-prefix=OFL-LINK %s
131 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
132 // RUN: --cuda-gpu-arch=gfx906 -Xoffload-linker --build-id=md5 %s 2>&1 \
133 // RUN: | FileCheck -check-prefix=OFL-LINK %s
134 // OFL-LINK: lld{{.*}}"--build-id=md5"
136 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
137 // RUN: --offload-arch=gfx906 -fhip-kernel-arg-name %s 2>&1 \
138 // RUN: | FileCheck -check-prefix=KAN %s
139 // KAN: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fhip-kernel-arg-name"
141 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
142 // RUN: --offload-arch=gfx906 %s 2>&1 \
143 // RUN: | FileCheck -check-prefix=KANNEG %s
144 // KANNEG-NOT: "-fhip-kernel-arg-name"
146 // RUN: %clang -### -nogpuinc -nogpulib -mno-amdgpu-ieee -mamdgpu-ieee \
147 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=IEEE-ON %s
148 // IEEE-ON-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-m{{(no-)?}}amdgpu-ieee"
150 // RUN: %clang -### -nogpuinc -nogpulib -mamdgpu-ieee -mno-amdgpu-ieee -ffast-math \
151 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefixes=IEEE-OFF %s
152 // IEEE-OFF: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mno-amdgpu-ieee"
154 // RUN: %clang -### -nogpuinc -nogpulib -mamdgpu-ieee -mno-amdgpu-ieee -ffast-math \
155 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefixes=IEEE-OFF-NEG %s
156 // IEEE-OFF-NEG-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mamdgpu-ieee"
158 // Check -fno-hip-fp32-correctly-rounded-divide-sqrt is passed to -cc1 but
159 // (default) -fhip-fp32-correctly-rounded-divide-sqrt is not.
161 // RUN: %clang -### -nogpuinc -nogpulib -fno-hip-fp32-correctly-rounded-divide-sqrt \
162 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefixes=NOCRDS %s
163 // NOCRDS: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fno-hip-fp32-correctly-rounded-divide-sqrt"
165 // RUN: %clang -### -nogpuinc -nogpulib \
166 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefixes=CRDS %s
167 // CRDS-NOT: "-f{{(no-)?}}hip-fp32-correctly-rounded-divide-sqrt"
169 // RUN: %clang -### -nogpuinc -nogpulib -fhip-fp32-correctly-rounded-divide-sqrt \
170 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefixes=CRDS %s
171 // CRDS-NOT: "-f{{(no-)?}}hip-fp32-correctly-rounded-divide-sqrt"
173 // Check -fgpu-approx-transcendentals is passed to clang -cc1 but
174 // (default) -fno-gpu-approx-transcendentals is not.
175 // -ffast-math implies -fgpu-approx-transcendentals, which can be overridden
176 // by -fno-gpu-approx-transcendentals.
178 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-approx-transcendentals \
179 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=APPROX %s
181 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -ffast-math \
182 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=APPROX %s
184 // APPROX: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fgpu-approx-transcendentals"
185 // APPROX: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fgpu-approx-transcendentals"
187 // RUN: %clang -### -nogpuinc -nogpulib -fno-gpu-approx-transcendentals \
188 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOAPPROX %s
190 // RUN: %clang -### -nogpuinc -nogpulib \
191 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOAPPROX %s
193 // RUN: %clang -### -nogpuinc -nogpulib -ffast-math -fno-fast-math \
194 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOAPPROX %s
196 // RUN: %clang -### -nogpuinc -nogpulib -ffast-math -fno-gpu-approx-transcendentals \
197 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOAPPROX %s
199 // NOAPPROX-NOT: "-f{{(no-)?}}gpu-approx-transcendentals"
201 // Check no warnings for -fgpu-approx-transcendentals.
203 // RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-approx-transcendentals \
204 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | count 0
206 // RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nostdinc -nostdlib -fgpu-approx-transcendentals \
207 // RUN: -x c++ %s 2>&1 | count 0
208 / Check -fno-offload-uniform-block is passed to clang -cc1 but
209 // (default) -fno-offload-uniform-block is not.
211 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
212 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOUNIBLK %s
214 // NOUNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fno-offload-uniform-block"
215 // NOUNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fno-offload-uniform-block"
217 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -foffload-uniform-block \
218 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNIBLK %s
220 // UNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-foffload-uniform-block"
221 // UNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-foffload-uniform-block"
223 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
224 // RUN: --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEFUNIBLK %s
226 // DEFUNIBLK-NOT: "-f{{(no-)?}}offload-uniform-block"
228 // Check no warnings for -f[no-]offload-uniform-block.
230 // RUN: %clang -fdriver-only -c -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
231 // RUN: -foffload-uniform-block --cuda-gpu-arch=gfx906 %s 2>&1 | count 0