1 // Tests CUDA compilation pipeline construction in Driver.
2 // REQUIRES: x86-registered-target
3 // REQUIRES: nvptx-registered-target
5 // Simple compilation case. Compile device-side to PTX assembly and make sure
6 // we use it on the host side.
7 // RUN: %clang -### -target x86_64-linux-gnu -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
8 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
9 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
10 // RUN: -check-prefix NOLINK %s
12 // Typical compilation + link case.
13 // RUN: %clang -### -target x86_64-linux-gnu --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
14 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
15 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
16 // RUN: -check-prefix LINK %s
18 // Verify that --cuda-host-only disables device-side compilation, but doesn't
19 // disable host-side compilation/linking.
20 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
21 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
22 // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
24 // Verify that --cuda-device-only disables host-side compilation and linking.
25 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
26 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
27 // RUN: -check-prefix NOHOST -check-prefix NOLINK %s
29 // Check that the last of --cuda-compile-host-device, --cuda-host-only, and
30 // --cuda-device-only wins.
32 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
33 // RUN: --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
34 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
35 // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
37 // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \
38 // RUN: --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
39 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
40 // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
42 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-host-only \
43 // RUN: --cuda-device-only %s 2>&1 \
44 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
45 // RUN: -check-prefix NOHOST -check-prefix NOLINK %s
47 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
48 // RUN: --cuda-device-only %s 2>&1 \
49 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
50 // RUN: -check-prefix NOHOST -check-prefix NOLINK %s
52 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-host-only \
53 // RUN: --cuda-compile-host-device %s 2>&1 \
54 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
55 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
56 // RUN: -check-prefix LINK %s
58 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-device-only \
59 // RUN: --cuda-compile-host-device %s 2>&1 \
60 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
61 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
62 // RUN: -check-prefix LINK %s
64 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to
65 // device compilation.
66 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=sm_30 -c %s 2>&1 \
67 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
68 // RUN: -check-prefix DEVICE-SM30 -check-prefix HOST \
69 // RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
71 // Verify that there is one device-side compilation per --cuda-gpu-arch args
72 // and that all results are included on the host side.
73 // RUN: not %clang -### --target=x86_64-linux-gnu \
74 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
75 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
76 // RUN: -check-prefixes DEVICE-SM30,DEVICE2-SM35 \
77 // RUN: -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \
78 // RUN: -check-prefixes HOST,HOST-NOSAVE,NOLINK %s
80 // Verify that device-side results are passed to the correct tool when
81 // -save-temps is used.
82 // RUN: not %clang -### --target=x86_64-linux-gnu -save-temps -c %s 2>&1 \
83 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \
84 // RUN: -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s
86 // Verify that device-side results are passed to the correct tool when
87 // -fno-integrated-as is used.
88 // RUN: not %clang -### --target=x86_64-linux-gnu -fno-integrated-as -c %s 2>&1 \
89 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
90 // RUN: -check-prefix HOST -check-prefix HOST-NOSAVE \
91 // RUN: -check-prefix HOST-AS -check-prefix NOLINK %s
93 // Verify that --[no-]cuda-gpu-arch arguments are handled correctly.
94 // a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X
95 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-device-only \
96 // RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \
97 // RUN: --no-cuda-gpu-arch=sm_50 \
99 // RUN: | FileCheck -check-prefixes ARCH-SM30,NOARCH-SM35,NOARCH-SM50 %s
101 // b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X
102 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-device-only \
103 // RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \
104 // RUN: --no-cuda-gpu-arch=sm_50 \
106 // RUN: | FileCheck -check-prefixes ARCH-SM30,NOARCH-SM35,NOARCH-SM50 %s
108 // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
109 // we default to sm_35 -- same as if no --cuda-gpu-arch were passed.
110 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-device-only \
111 // RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \
112 // RUN: --no-cuda-gpu-arch=sm_50 --no-cuda-gpu-arch=sm_30 \
114 // RUN: | FileCheck -check-prefixes NOARCH-SM30,ARCH-SM35,NOARCH-SM50 %s
116 // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
117 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-device-only \
118 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\
119 // RUN: --no-cuda-gpu-arch=sm_50 \
121 // RUN: | FileCheck -check-prefixes ARCH-SM30,ARCH-SM35,NOARCH-SM50 %s
123 // e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X
124 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-device-only \
125 // RUN: --no-cuda-gpu-arch=sm_50 --no-cuda-gpu-arch=sm_30 \
126 // RUN: --cuda-gpu-arch=sm_50 --cuda-gpu-arch=sm_30 \
128 // RUN: | FileCheck -check-prefixes ARCH-SM30,NOARCH-SM35,ARCH-SM50 %s
130 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
131 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
132 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
133 // RUN: --no-cuda-gpu-arch=all \
134 // RUN: --cuda-gpu-arch=sm_50 \
135 // RUN: -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
136 // RUN: | FileCheck -check-prefixes NOARCH-SM30,NOARCH-SM35,ARCH-SM50 %s
138 // g) There's no --cuda-gpu-arch=all
139 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-device-only \
140 // RUN: --cuda-gpu-arch=all \
142 // RUN: | FileCheck -check-prefix ARCHALLERROR %s
145 // Verify that --[no-]cuda-include-ptx arguments are handled correctly.
146 // a) by default we're including PTX for all GPUs.
147 // RUN: not %clang -### --target=x86_64-linux-gnu \
148 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
150 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
152 // b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs
153 // RUN: not %clang -### --target=x86_64-linux-gnu \
154 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
155 // RUN: --no-cuda-include-ptx=all \
157 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,NOPTX-SM30 %s
159 // c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only.
160 // RUN: not %clang -### --target=x86_64-linux-gnu \
161 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
162 // RUN: --no-cuda-include-ptx=sm_35 \
164 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,PTX-SM30 %s
165 // RUN: not %clang -### --target=x86_64-linux-gnu \
166 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
167 // RUN: --no-cuda-include-ptx=sm_30 \
169 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,NOPTX-SM30 %s
171 // d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all
172 // RUN: not %clang -### --target=x86_64-linux-gnu \
173 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
174 // RUN: --no-cuda-include-ptx=all --cuda-include-ptx=all \
176 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
178 // e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX
179 // RUN: not %clang -### --target=x86_64-linux-gnu \
180 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
181 // RUN: --no-cuda-include-ptx=sm_30 --cuda-include-ptx=all \
183 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
185 // Verify -flto=thin -fwhole-program-vtables handling. This should result in
186 // both options being passed to the host compilation, with neither passed to
187 // the device compilation.
188 // RUN: not %clang -### --target=x86_64-linux-gnu -c -flto=thin -fwhole-program-vtables %s 2>&1 \
189 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
190 // THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
192 // ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30"
193 // NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30"
194 // ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35"
195 // NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35"
196 // ARCH-SM50: "-cc1"{{.*}}"-target-cpu" "sm_50"
197 // NOARCH-SM50-NOT: "-cc1"{{.*}}"-target-cpu" "sm_50"
198 // ARCHALLERROR: error: unsupported CUDA gpu architecture: all
200 // Match device-side preprocessor and compiler phases with -save-temps.
201 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
202 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
203 // DEVICE-SAVE-SAME: "-fcuda-is-device"
204 // DEVICE-SAVE-SAME: "-x" "cuda"
206 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
207 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
208 // DEVICE-SAVE-SAME: "-fcuda-is-device"
209 // DEVICE-SAVE-SAME: "-x" "cuda-cpp-output"
211 // Match the job that produces PTX assembly.
212 // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
213 // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
214 // THINLTOWPD-NOT: "-flto=thin"
215 // DEVICE-SAME: "-fcuda-is-device"
216 // DEVICE-SM30-SAME: "-target-cpu" "sm_30"
217 // THINLTOWPD-NOT: "-fwhole-program-vtables"
218 // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]"
219 // DEVICE-NOSAVE-SAME: "-x" "cuda"
220 // DEVICE-SAVE-SAME: "-x" "ir"
222 // Match the call to ptxas (which assembles PTX to SASS).
224 // DEVICE-SM30-DAG: "--gpu-name" "sm_30"
225 // DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]"
226 // DEVICE-DAG: "[[PTXFILE]]"
228 // Match another device-side compilation.
229 // DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
230 // DEVICE2-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
231 // DEVICE2-SAME: "-fcuda-is-device"
232 // DEVICE2-SM35-SAME: "-target-cpu" "sm_35"
233 // DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]"
234 // DEVICE2-SAME: "-x" "cuda"
236 // Match another call to ptxas.
238 // DEVICE2-SM35-DAG: "--gpu-name" "sm_35"
239 // DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]"
240 // DEVICE2-DAG: "[[PTXFILE2]]"
242 // Match no device-side compilation.
243 // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
244 // NODEVICE-NOT: "-fcuda-is-device"
246 // INCLUDES-DEVICE:fatbinary
247 // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]"
248 // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]"
249 // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]"
250 // INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]"
251 // INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]"
253 // Match host-side preprocessor job with -save-temps.
254 // HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
255 // HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
256 // HOST-SAVE-NOT: "-fcuda-is-device"
257 // HOST-SAVE-SAME: "-x" "cuda"
259 // Match host-side compilation.
260 // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
261 // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
262 // THINLTOWPD-SAME: "-flto=thin"
263 // HOST-NOT: "-fcuda-is-device"
264 // There is only one GPU binary after combining it with fatbinary!
265 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
266 // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
267 // There is only one GPU binary after combining it with fatbinary.
268 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
269 // THINLTOWPD-SAME: "-fwhole-program-vtables"
270 // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
271 // HOST-NOSAVE-SAME: "-x" "cuda"
272 // HOST-SAVE-SAME: "-x" "cuda-cpp-output"
274 // Match external assembler that uses compilation output.
275 // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]"
277 // Match no GPU code inclusion.
278 // NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary"
280 // Match no host compilation.
281 // NOHOST-NOT: "-cc1" "-triple"
282 // NOHOST-NOT: "-x" "cuda"
285 // LINK: "{{.*}}{{ld|link}}{{(.exe)?}}"
286 // LINK-SAME: "[[HOSTOUTPUT]]"
289 // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}"
291 // FATBIN-COMMON:fatbinary
292 // FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
293 // FATBIN-COMMON: "--image=profile=sm_30,file=
294 // PTX-SM30: "--image=profile=compute_30,file=
295 // NOPTX-SM30-NOT: "--image=profile=compute_30,file=
296 // FATBIN-COMMON: "--image=profile=sm_35,file=
297 // PTX-SM35: "--image=profile=compute_35,file=
298 // NOPTX-SM35-NOT: "--image=profile=compute_35,file=