1 // Tests the phases generated for a CUDA offloading target for different
3 // - Number of gpu architectures;
4 // - Host/device-only compilation;
5 // - User-requested final phase - binary or assembly.
7 // REQUIRES: x86-registered-target
8 // REQUIRES: amdgpu-registered-target
10 // Test single gpu architecture with complete compilation.
12 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
13 // RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \
14 // RUN: | FileCheck -check-prefixes=BIN,NRD,OLD %s
15 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
16 // RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \
17 // RUN: | FileCheck -check-prefixes=BIN,NRD,NEW %s
19 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
20 // RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
21 // RUN: | FileCheck -check-prefixes=BIN,RDC %s
23 // BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
24 // BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
25 // BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
26 // RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
27 // RDC-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
29 // BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]])
30 // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
31 // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
32 // NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
33 // NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
34 // RDC-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
35 // BIN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
36 // BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
37 // NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
38 // RDC-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])
40 // NRD-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
41 // RDC-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object
42 // NRD-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
43 // NRD-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
44 // OLD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
45 // NEW-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, image, (host-[[T]])
46 // RDC-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]])
49 // Test single gpu architecture up to the assemble phase.
51 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
52 // RUN: --cuda-gpu-arch=gfx803 %s -S 2>&1 \
53 // RUN: | FileCheck -check-prefixes=ASM %s
54 // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
55 // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
56 // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
58 // ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]])
59 // ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]])
60 // ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]])
61 // ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]])
64 // Test two gpu architectures with complete compilation with -fno-gpu-rdc.
66 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
67 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \
68 // RUN: | FileCheck -check-prefixes=NRD2,NCL2 %s
70 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
71 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -c 2>&1 \
72 // RUN: | FileCheck -check-prefixes=NRD2 %s
74 // NRD2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
75 // NRD2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
76 // NRD2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
78 // NRD2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]])
79 // NRD2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
80 // NRD2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
81 // NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]])
82 // NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]])
83 // NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]])
84 // NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
86 // NRD2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
87 // NRD2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
88 // NRD2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
89 // NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]])
90 // NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]])
91 // NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]])
92 // NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
93 // NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-[[T]])
94 // NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir
95 // NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
96 // NRD2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
97 // NCL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]])
100 // Test two gpu architectures with complete compilation with -fgpu-rdc.
102 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
103 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc 2>&1 \
104 // RUN: | FileCheck -check-prefixes=RDC2,RCL2 %s
106 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
107 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc -c 2>&1 \
108 // RUN: | FileCheck -check-prefixes=RDC2,RC2 %s
110 // RCL2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
111 // RCL2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
112 // RCL2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
113 // RCL2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
114 // RCL2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
116 // RDC2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
117 // RDC2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
118 // RDC2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
119 // RDC2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH1]])
120 // RCL2-DAG: [[P8:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH1]])
121 // RCL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
122 // RC2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, ir
124 // RDC2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
125 // RDC2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
126 // RDC2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
127 // RDC2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, ir, (device-[[T]], [[ARCH2]])
128 // RCL2-DAG: [[P15:[0-9]+]]: linker, {[[P13]]}, image, (device-[[T]], [[ARCH2]])
129 // RCL2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
130 // RC2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P13]]}, ir
132 // RC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
133 // RC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
134 // RC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
135 // RC2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
136 // RC2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
138 // RCL2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, object, (device-[[T]])
139 // RCL2-DAG: [[P22:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, object
140 // RCL2-DAG: [[P23:[0-9]+]]: linker, {[[P20]], [[P22]]}, image, (host-[[T]])
141 // RC2-DAG: [[P23:[0-9]+]]: clang-offload-bundler, {[[P9]], [[P16]], [[P20]]}, object, (host-[[T]])
144 // Test two gpu architecturess up to the assemble phase.
146 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
147 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -S 2>&1 \
148 // RUN: | FileCheck -check-prefixes=ASM2 %s
149 // ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
150 // ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
151 // ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]])
152 // ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]])
153 // ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
154 // ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
155 // ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]])
156 // ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]])
157 // ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]])
158 // ASM2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]])
161 // Test single gpu architecture with complete compilation in host-only
164 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
165 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only 2>&1 \
166 // RUN: | FileCheck -check-prefixes=HBIN %s
167 // HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
168 // HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
169 // HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
170 // HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
171 // HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
172 // HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
175 // Test single gpu architecture up to the assemble phase in host-only
178 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
179 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only -S 2>&1 \
180 // RUN: | FileCheck -check-prefixes=HASM %s
181 // HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
182 // HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
183 // HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
184 // HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
188 // Test two gpu architectures with complete compilation in host-only
191 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
192 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only 2>&1 \
193 // RUN: | FileCheck -check-prefixes=HBIN2 %s
194 // HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
195 // HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
196 // HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
197 // HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
198 // HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
199 // HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
203 // Test two gpu architectures up to the assemble phase in host-only
206 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
207 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only -S \
208 // RUN: 2>&1 | FileCheck -check-prefixes=HASM2 %s
209 // HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
210 // HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
211 // HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
212 // HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
216 // Test single gpu architecture with complete compilation in device-only
219 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
220 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \
221 // RUN: | FileCheck -check-prefixes=DBIN %s
223 // Test single gpu architecture with complete compilation in device-only
224 // compilation mode with an unused host linker flag.
226 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
227 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -Wl,--disable-new-dtags 2>&1 \
228 // RUN: | FileCheck -check-prefixes=DBIN %s
230 // DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
231 // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
232 // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
233 // DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
234 // DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
235 // DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
236 // DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image
237 // DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, )
238 // DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P7]]}, hip-fatbin
242 // Test single gpu architecture up to the assemble phase in device-only
245 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
246 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -S --no-gpu-bundle-output 2>&1 \
247 // RUN: | FileCheck -check-prefixes=DASM %s
248 // DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
249 // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
250 // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
251 // DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
252 // DASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler
253 // DASM-NOT: clang-offload-bundler
257 // Test single gpu architecture with compile to relocatable in device-only
260 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
261 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
262 // RUN: | FileCheck -check-prefixes=RELOC %s
264 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
265 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable -Wl,--disable-new-dtags \
266 // RUN: 2>&1 | FileCheck -check-prefixes=RELOC %s
268 // RELOC-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
269 // RELOC-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
270 // RELOC-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
271 // RELOC-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
272 // RELOC-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
274 // RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
278 // Test two gpu architectures with compile to relocatable in device-only
281 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
282 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
283 // RUN: | FileCheck -check-prefixes=RELOC2 %s
285 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
286 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -fhip-emit-relocatable \
287 // RUN: -Wl,--disable-new-dtags 2>&1 | FileCheck -check-prefixes=RELOC2 %s
289 // RELOC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
290 // RELOC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
291 // RELOC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
292 // RELOC2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
293 // RELOC2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
294 // RELOC2-NOT: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
295 // RELOC2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
296 // RELOC2-DAG: [[P6:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH2:gfx900]])
297 // RELOC2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
298 // RELOC2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
299 // RELOC2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]])
300 // RELOC2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
301 // RELOC2-NOT: linker
302 // RELOC2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P10]]}, object
306 // Test two gpu architectures with complete compilation in device-only
309 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
310 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
311 // RUN: 2>&1 | FileCheck -check-prefixes=DBIN2 %s
313 // Test two gpu architectures with complete compilation in device-only
314 // compilation mode with an unused host linker flag.
316 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
317 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
318 // RUN: -Wl,--disable-new-dtags 2>&1 | FileCheck -check-prefixes=DBIN2 %s
320 // DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
321 // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
322 // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
323 // DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
324 // DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
325 // DBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
326 // DBIN2-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image
327 // DBIN2-DAG: [[P7:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
328 // DBIN2-DAG: [[P8:[0-9]+]]: preprocessor, {[[P7]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
329 // DBIN2-DAG: [[P9:[0-9]+]]: compiler, {[[P8]]}, ir, (device-[[T]], [[ARCH2]])
330 // DBIN2-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (device-[[T]], [[ARCH2]])
331 // DBIN2-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (device-[[T]], [[ARCH2]])
332 // DBIN2-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (device-[[T]], [[ARCH2]])
333 // DBIN2-DAG: [[P13:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P12]]}, image
334 // DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip, )
335 // DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P14]]}, hip-fatbin
339 // Test two gpu architectures up to the assemble phase in device-only
342 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
343 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
344 // RUN: --cuda-device-only -S -o %t.s 2>&1 \
345 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-NOBUNDLE %s
346 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
347 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
348 // RUN: --cuda-device-only -S -o %t.s --no-gpu-bundle-output 2>&1 \
349 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-NOBUNDLE %s
350 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
351 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
352 // RUN: --cuda-device-only -S 2>&1 \
353 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-NOBUNDLE %s
354 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
355 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
356 // RUN: --cuda-device-only -S --gpu-bundle-output 2>&1 \
357 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-BUNDLE %s
358 // DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
359 // DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
360 // DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
361 // DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
362 // DASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler
363 // DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
364 // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
365 // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
366 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
367 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, assembler
368 // DASM2-BUNDLE: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, assembler, (device-hip, )
369 // DASM2-NOBUNDLE-NOT: clang-offload-bundler, {[[P4]], [[P9]]}, assembler, (device-hip, )
373 // Test linking two objects with two gpu architectures.
375 // RUN: rm -rf %t && mkdir %t
376 // RUN: touch %t/obj1.o %t/obj2.o
378 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
379 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o 2>&1 \
380 // RUN: | FileCheck -check-prefixes=L2,NL2 %s
382 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
383 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
384 // RUN: -fgpu-rdc 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-EM %s
386 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
387 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
388 // RUN: -fgpu-rdc --cuda-device-only 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
390 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
391 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
392 // RUN: -fgpu-rdc --cuda-device-only -Wl,--disable-new-dtags 2>&1 \
393 // RUN: | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
395 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
396 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
397 // RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output 2>&1 \
398 // RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
400 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
401 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
402 // RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output -Wl,--disable-new-dtags 2>&1 \
403 // RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
405 // L2-DAG: [[P0:[0-9]+]]: input, "{{.*}}obj1.o", object
406 // RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object
407 // L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object
408 // RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object
410 // RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T:hip]], [[ARCH1:gfx803]])
411 // RL2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P4]]}, image
412 // RL2-DAG: [[P6:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH2:gfx900]])
413 // RL2-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image
414 // RL2-DEV-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, hip-fatbin, (device-[[T]])
415 // RL2-DEV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, hip-fatbin
416 // RL2-EM-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, object, (device-[[T]])
417 // RL2-EM-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, object
418 // RL2-NB-NOT: linker
419 // RL2-NB-NOT: offload
421 // NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image
422 // RL2-EM-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]])
423 // RL2-DEV-NOT: linker
426 // Test one gpu architectures up to the preprocessor expansion output phase in device-only
427 // compilation mode. no bundle.
429 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
430 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
431 // RUN: --cuda-device-only -E 2>&1 \
432 // RUN: | FileCheck -check-prefixes=PPE,PPEN %s
434 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
435 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
436 // RUN: --cuda-device-only -E --no-gpu-bundle-output 2>&1 \
437 // RUN: | FileCheck -check-prefixes=PPE,PPEN %s
439 // Test one gpu architectures up to the preprocessor expansion output phase in device-only
440 // compilation mode. bundle.
442 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
443 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
444 // RUN: --cuda-device-only -E --gpu-bundle-output 2>&1 \
445 // RUN: | FileCheck -check-prefixes=PPE,PPEB %s
447 // Test two gpu architectures up to the preprocessor expansion output phase in device-only
448 // compilation mode. no bundle.
450 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
451 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
452 // RUN: --cuda-device-only -E 2>&1 \
453 // RUN: | FileCheck -check-prefixes=PPE2,PPE2N %s
455 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
456 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
457 // RUN: --cuda-device-only -E --no-gpu-bundle-output 2>&1 \
458 // RUN: | FileCheck -check-prefixes=PPE2,PPE2N %s
460 // Test two gpu architectures up to the preprocessor expansion output phase in device-only
461 // compilation mode. bundle.
463 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
464 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
465 // RUN: --cuda-device-only -E --gpu-bundle-output 2>&1 \
466 // RUN: | FileCheck -check-prefixes=PPE2,PPE2B %s
468 // Test one gpu architectures up to the LLVM IR output phase in device-only
469 // compilation mode. no bundle.
471 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
472 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
473 // RUN: --cuda-device-only -c -emit-llvm 2>&1 \
474 // RUN: | FileCheck -check-prefixes=LLVM %s
476 // Test two gpu architectures up to the LLVM IR output phase in device-only
477 // compilation mode. bundle.
479 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
480 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
481 // RUN: --cuda-device-only -c -emit-llvm -o %t.bc --gpu-bundle-output 2>&1 \
482 // RUN: | FileCheck -check-prefixes=LLVM2 %s
484 // Test two gpu architectures up to the LLVM IR output phase in device-only
485 // compilation mode with bundled preprocessor expansion as input. bundle.
487 // RUN: %clang -x hip-cpp-output --target=x86_64-unknown-linux-gnu \
488 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
489 // RUN: --cuda-device-only -c -emit-llvm -o %t.bc --gpu-bundle-output 2>&1 \
490 // RUN: | FileCheck -check-prefixes=PPELLVM2 %s
492 // PPE-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
493 // PPE-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
494 // PPE-DAG: [[P2:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P1]]}, [[T]]-cpp-output
495 // PPEB-DAG: [[P3:[0-9]+]]: clang-offload-bundler, {[[P2]]}, [[T]]-cpp-output, (device-hip, )
496 // PPEN-NOT: clang-offload-bundler, {{.*}}, [[T]]-cpp-output, (device-hip, )
499 // PPE2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
500 // PPE2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
501 // PPE2-DAG: [[P2:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P1]]}, [[T]]-cpp-output
502 // PPE2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
503 // PPE2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
504 // PPE2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, [[T]]-cpp-output
505 // PPE2B-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P2]], [[P9]]}, [[T]]-cpp-output, (device-hip, )
506 // PPE2N-NOT: clang-offload-bundler, {{.*}}, [[T]]-cpp-output, (device-hip, )
509 // LLVM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
510 // LLVM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
511 // LLVM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
512 // LLVM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-[[T]], [[ARCH]])
513 // LLVM-NOT: clang-offload-bundler
516 // LLVM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
517 // LLVM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
518 // LLVM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
519 // LLVM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-[[T]], [[ARCH]])
520 // LLVM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, ir
521 // LLVM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
522 // LLVM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
523 // LLVM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
524 // LLVM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
525 // LLVM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, ir
526 // LLVM2-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, ir, (device-hip, )
529 // PPELLVM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]]-cpp-output
530 // PPELLVM2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, hip-cpp-output
531 // PPELLVM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH:gfx803]])
532 // PPELLVM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-[[T]], [[ARCH]])
533 // PPELLVM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, ir
534 // PPELLVM2-DAG: [[P7:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH2:gfx900]])
535 // PPELLVM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
536 // PPELLVM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, ir
537 // PPELLVM2-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, ir, (device-hip, )
538 // PPELLVM2-NOT: host
540 // Test mixed HIP and C++ compilation. HIP program should have HIP offload kind.
541 // C++ program should have no offload kind.
543 // Test compile empty.hip and empty.cpp.
544 // RUN: %clang --target=x86_64-unknown-linux-gnu \
545 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
546 // RUN: -c %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
547 // RUN: %clang --target=x86_64-unknown-linux-gnu \
548 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
549 // RUN: -c %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
551 // Test compile and link empty.hip and empty.cpp.
552 // RUN: %clang --target=x86_64-unknown-linux-gnu \
553 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
554 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
555 // RUN: %clang --target=x86_64-unknown-linux-gnu \
556 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
557 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
559 // Test compile and link empty.hip and empty.cpp with --hip-link -fgpu-rdc.
560 // RUN: %clang --target=x86_64-unknown-linux-gnu --hip-link -fgpu-rdc \
561 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
562 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
563 // RUN: %clang --target=x86_64-unknown-linux-gnu --hip-link -fgpu-rdc \
564 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
565 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
567 // Test compile and link -x hip empty.hip and -x c++ empty.cpp.
568 // RUN: %clang --target=x86_64-unknown-linux-gnu \
569 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
570 // RUN: -x hip %S/Inputs/empty.hip -x c++ %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
571 // RUN: %clang --target=x86_64-unknown-linux-gnu \
572 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
573 // RUN: -x hip %S/Inputs/empty.hip -x c++ %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
575 // Test compile and link -x hip empty.hip and empty.cpp.
576 // RUN: %clang --target=x86_64-unknown-linux-gnu \
577 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
578 // RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2 %s
579 // RUN: %clang --target=x86_64-unknown-linux-gnu \
580 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
581 // RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2-NEG %s
583 // Test compile and link empty.hip and -x hip empty.cpp.
584 // RUN: %clang --target=x86_64-unknown-linux-gnu \
585 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
586 // RUN: %S/Inputs/empty.hip -x hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2 %s
587 // RUN: %clang --target=x86_64-unknown-linux-gnu \
588 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
589 // RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2-NEG %s
591 // MIXED-DAG: input, "{{.*}}empty.hip", hip, (host-hip)
592 // MIXED-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx803)
593 // MIXED-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx900)
594 // MIXED-DAG: input, "{{.*}}empty.cpp", c++
595 // MIXED-NEG-NOT: input, "{{.*}}empty.cpp", c++, (host-hip)
596 // MIXED-NEG-NOT: input, "{{.*}}empty.cpp", c++, (device-hip
598 // MIXED2-DAG: input, "{{.*}}empty.hip", hip, (host-hip)
599 // MIXED2-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx803)
600 // MIXED2-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx900)
601 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (host-hip)
602 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx803)
603 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx900)
604 // MIXED2-NEG-NOT: input, "{{.*}}empty.cpp", c++
606 // Test HIP bitcode to bitcode linking. Input should be bundled or unbundled bitcode, and
607 // output should be unbundled linked bitcode
609 // RUN: touch %t/bitcodeA.bc
610 // RUN: touch %t/bitcodeB.bc
611 // RUN: %clang -ccc-print-phases --hip-link -emit-llvm --cuda-device-only \
612 // RUN: --offload-arch=gfx906 %t/bitcodeA.bc %t/bitcodeB.bc 2>&1 \
613 // RUN: | FileCheck -check-prefixes=CHECK %s
615 // CHECK: [[A0:[0-9]+]]: input, "{{.*}}bitcodeA.bc", ir
616 // CHECK: [[A1:[0-9]+]]: clang-offload-unbundler, {[[A0]]}, ir
617 // CHECK: [[A2:[0-9]+]]: compiler, {[[A1]]}, ir, (device-hip, [[ARCH:gfx906]])
618 // CHECK: [[A3:[0-9]+]]: backend, {[[A2]]}, ir, (device-hip, [[ARCH]])
620 // CHECK: [[B0:[0-9]+]]: input, "{{.*}}bitcodeB.bc", ir
621 // CHECK: [[B1:[0-9]+]]: clang-offload-unbundler, {[[B0]]}, ir
622 // CHECK: [[B2:[0-9]+]]: compiler, {[[B1]]}, ir, (device-hip, [[ARCH]])
623 // CHECK: [[B3:[0-9]+]]: backend, {[[B2]]}, ir, (device-hip, [[ARCH]])
625 // CHECK: [[L0:[0-9]+]]: linker, {[[A3]], [[B3]]}, ir, (device-hip, [[ARCH]])
626 // CHECK: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[L0]]}, ir
629 // Test the bindings using the new driver in LTO-mode.
631 // RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-phases \
632 // RUN: --offload-arch=gfx90a --offload-arch=gfx908 -foffload-lto -fgpu-rdc -c %s 2>&1 \
633 // RUN: | FileCheck -check-prefix=LTO %s
634 // LTO: 0: input, "[[INPUT:.+]]", hip, (host-hip)
635 // LTO-NEXT: 1: preprocessor, {0}, hip-cpp-output, (host-hip)
636 // LTO-NEXT: 2: compiler, {1}, ir, (host-hip)
637 // LTO-NEXT: 3: input, "[[INPUT]]", hip, (device-hip, gfx908)
638 // LTO-NEXT: 4: preprocessor, {3}, hip-cpp-output, (device-hip, gfx908)
639 // LTO-NEXT: 5: compiler, {4}, ir, (device-hip, gfx908)
640 // LTO-NEXT: 6: backend, {5}, lto-bc, (device-hip, gfx908)
641 // LTO-NEXT: 7: offload, "device-hip (amdgcn-amd-amdhsa:gfx908)" {6}, lto-bc
642 // LTO-NEXT: 8: input, "[[INPUT]]", hip, (device-hip, gfx90a)
643 // LTO-NEXT: 9: preprocessor, {8}, hip-cpp-output, (device-hip, gfx90a)
644 // LTO-NEXT: 10: compiler, {9}, ir, (device-hip, gfx90a)
645 // LTO-NEXT: 11: backend, {10}, lto-bc, (device-hip, gfx90a)
646 // LTO-NEXT: 12: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {11}, lto-bc
647 // LTO-NEXT: 13: clang-offload-packager, {7, 12}, image, (device-hip)
648 // LTO-NEXT: 14: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, "device-hip (x86_64-unknown-linux-gnu)" {13}, ir
649 // LTO-NEXT: 15: backend, {14}, assembler, (host-hip)
650 // LTO-NEXT: 16: assembler, {15}, object, (host-hip)