1 // Tests the bindings generated for a CUDA offloading target for different
3 // - Number of gpu architectures;
4 // - Host/device-only compilation;
5 // - User-requested final phase - binary or assembly.
6 // It parallels cuda-phases.cu test, but verifies whether output file is temporary or not.
8 // It's hard to check whether file name is temporary in a portable
9 // way. Instead we check whether we've generated a permanent name on
10 // device side, which appends '-device-cuda-<triple>' suffix.
12 // REQUIRES: powerpc-registered-target
13 // REQUIRES: nvptx-registered-target
16 // Test single gpu architecture with complete compilation.
17 // No intermediary device files should have "-device-cuda..." in the name.
19 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s 2>&1 \
20 // RUN: | FileCheck -check-prefix=BIN %s
21 // BIN: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
22 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
23 // BIN: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
24 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
25 // BIN: # "nvptx64-nvidia-cuda" - "NVPTX::Linker",{{.*}} output:
26 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
27 // BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
28 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
29 // BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
32 // Test single gpu architecture up to the assemble phase.
34 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
35 // RUN: | FileCheck -check-prefix=ASM %s
36 // ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
37 // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
40 // Test two gpu architectures with complete compilation.
42 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
43 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
44 // RUN: | FileCheck -check-prefixes=BIN2,AOUT %s
45 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
46 // RUN: --offload-arch=sm_30,sm_35 %s 2>&1 \
47 // RUN: | FileCheck -check-prefixes=BIN2,AOUT %s
48 // .. same, but with explicitly specified output.
49 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
50 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -o %t/out 2>&1 \
51 // RUN: | FileCheck -check-prefixes=BIN2,TOUT %s
52 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
53 // RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
54 // RUN: | FileCheck -check-prefixes=BIN2,TOUT %s
55 // BIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
56 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
57 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
58 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
59 // BIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
60 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
61 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
62 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
63 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Linker",{{.*}} output:
64 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
65 // BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
66 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
67 // AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
68 // TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
70 // .. same, but with -fsyntax-only
71 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
72 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
73 // RUN: | FileCheck -check-prefix=SYN %s
74 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
75 // RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
76 // RUN: | FileCheck -check-prefix=SYN %s
77 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
78 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
79 // RUN: | FileCheck -check-prefix=SYN %s
80 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
81 // RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
82 // RUN: | FileCheck -check-prefix=SYN %s
84 // SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
85 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
86 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
89 // .. and with --offload-new-driver
90 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
91 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 --offload-new-driver %s 2>&1 \
92 // RUN: | FileCheck -check-prefix=NDSYN %s
93 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
94 // RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
95 // RUN: | FileCheck -check-prefix=NDSYN %s
96 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
97 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --offload-new-driver 2>&1 \
98 // RUN: | FileCheck -check-prefix=NDSYN %s
99 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
100 // RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
101 // RUN: | FileCheck -check-prefix=NDSYN %s
102 // NDSYN-NOT: inputs:
103 // NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
104 // NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
105 // NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
106 // NDSYN-NOT: inputs:
110 // Test two gpu architectures up to the assemble phase.
112 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
113 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
114 // RUN: | FileCheck -check-prefix=ASM2 %s
115 // ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
116 // ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
117 // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
120 // Test one or more gpu architecture with complete compilation in host-only
123 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
124 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \
125 // RUN: | FileCheck -check-prefix=HBIN %s
126 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
127 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \
128 // RUN: | FileCheck -check-prefix=HBIN %s
129 // HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
130 // HBIN-NOT: cuda-bindings-device-cuda-nvptx64
131 // HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
134 // Test one or more gpu architecture up to the assemble phase in host-only
137 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
138 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \
139 // RUN: | FileCheck -check-prefix=HASM %s
140 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
141 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S 2>&1 \
142 // RUN: | FileCheck -check-prefix=HASM %s
143 // HASM: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
146 // Test single gpu architecture with complete compilation in device-only
149 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
150 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \
151 // RUN: | FileCheck -check-prefix=DBIN %s
152 // DBIN: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
153 // DBIN-NOT: cuda-bindings-device-cuda-nvptx64
154 // DBIN: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.o"
157 // Test single gpu architecture up to the assemble phase in device-only
160 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
161 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \
162 // RUN: | FileCheck -check-prefix=DASM %s
163 // DASM: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
166 // Test two gpu architectures with complete compilation in device-only
169 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
170 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
171 // RUN: | FileCheck -check-prefix=DBIN2 %s
172 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
173 // DBIN2-NOT: cuda-bindings-device-cuda-nvptx64
174 // DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.o"
175 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
176 // DBIN2-NOT: cuda-bindings-device-cuda-nvptx64
177 // DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.o"
180 // Test two gpu architectures up to the assemble phase in device-only
183 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
184 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
185 // RUN: | FileCheck -check-prefix=DASM2 %s
186 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
187 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
190 // Ensure we output the user's specified name in device-only mode.
192 // RUN: %clang -target powerpc64le-ibm-linux-gnu -### \
193 // RUN: --cuda-gpu-arch=sm_52 --cuda-device-only -c -o foo.o --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
194 // RUN: | FileCheck -check-prefix=D_ONLY %s
195 // RUN: %clang -target powerpc64le-ibm-linux-gnu -### --offload-new-driver \
196 // RUN: --cuda-gpu-arch=sm_52 --cuda-device-only -c -o foo.o --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
197 // RUN: | FileCheck -check-prefix=D_ONLY %s
201 // Check to make sure we can generate multiple outputs for device-only
202 // compilation and fail with '-o'.
204 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-bindings \
205 // RUN: --offload-arch=sm_70 --offload-arch=sm_52 --offload-device-only -c %s 2>&1 \
206 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY %s
207 // MULTI-D-ONLY: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_70:.+]]"
208 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_70]]"], output: "[[CUBIN_70:.+]]"
209 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_52:.+]]"
210 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_52]]"], output: "[[CUBIN_52:.+]]"
212 // RUN: not %clang -### --target=powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-bindings \
213 // RUN: --offload-arch=sm_70 --offload-arch=sm_52 --offload-device-only -c -o %t %s 2>&1 \
214 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY-O %s
215 // MULTI-D-ONLY-O: error: cannot specify -o when generating multiple output files
218 // Check to ensure that we can use '-fsyntax-only' for CUDA output with the new
221 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver \
222 // RUN: -fsyntax-only --offload-arch=sm_70 --offload-arch=sm_52 -c --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda %s 2>&1 \
223 // RUN: | FileCheck -check-prefix=SYNTAX-ONLY %s
224 // SYNTAX-ONLY: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fsyntax-only"
225 // SYNTAX-ONLY: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fsyntax-only"
226 // SYNTAX-ONLY: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"{{.*}}"-fsyntax-only"
229 // Check to ensure that we can use '-save-temps' when operating in RDC-mode.
231 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu -save-temps --offload-new-driver \
232 // RUN: -fgpu-rdc --offload-arch=sm_70 --offload-arch=sm_52 -c --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda %s 2>&1 \
233 // RUN: | FileCheck -check-prefix=SAVE-TEMPS %s
234 // SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_52"
235 // SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
236 // SAVE-TEMPS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"
239 // Check to ensure that we cannot use '-foffload' when not operating in RDC-mode.
241 // RUN: not %clang -### --target=powerpc64le-ibm-linux-gnu -fno-gpu-rdc --offload-new-driver \
242 // RUN: -foffload-lto --offload-arch=sm_70 --offload-arch=sm_52 -c %s 2>&1 \
243 // RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
244 // LTO-NO-RDC: error: unsupported option '-foffload-lto' for language mode '-fno-gpu-rdc'