1 // Tests the phases generated for a CUDA offloading target for different
3 // - Number of gpu architectures;
4 // - Host/device-only compilation;
5 // - User-requested final phase - binary or assembly.
7 // Test single gpu architecture with complete compilation.
9 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
10 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \
11 // RUN: | FileCheck -check-prefixes=BIN,NRD,OLD %s
12 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
13 // RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \
14 // RUN: | FileCheck -check-prefixes=BIN,NRD,NEW %s
16 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
17 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
18 // RUN: | FileCheck -check-prefixes=BIN,RDC %s
20 // BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
21 // BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
22 // BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
23 // RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
24 // RDC-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
26 // BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]])
27 // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
28 // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
29 // NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
30 // NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
31 // RDC-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
32 // BIN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
33 // BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
34 // NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
35 // RDC-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])
37 // NRD-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
38 // RDC-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object
39 // NRD-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
40 // NRD-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
41 // OLD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
42 // NEW-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, image, (host-[[T]])
43 // RDC-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]])
46 // Test single gpu architecture up to the assemble phase.
48 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
49 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s -S 2>&1 \
50 // RUN: | FileCheck -check-prefixes=ASM %s
51 // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
52 // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
53 // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
55 // ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]])
56 // ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]])
57 // ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]])
58 // ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]])
61 // Test two gpu architectures with complete compilation with -fno-gpu-rdc.
63 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
64 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \
65 // RUN: | FileCheck -check-prefixes=NRD2,NCL2 %s
67 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
68 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -c 2>&1 \
69 // RUN: | FileCheck -check-prefixes=NRD2 %s
71 // NRD2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
72 // NRD2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
73 // NRD2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
75 // NRD2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]])
76 // NRD2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
77 // NRD2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
78 // NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]])
79 // NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]])
80 // NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]])
81 // NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
83 // NRD2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
84 // NRD2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
85 // NRD2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
86 // NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]])
87 // NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]])
88 // NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]])
89 // NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
90 // NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-[[T]])
91 // NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir
92 // NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
93 // NRD2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
94 // NCL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]])
97 // Test two gpu architectures with complete compilation with -fgpu-rdc.
99 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
100 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc 2>&1 \
101 // RUN: | FileCheck -check-prefixes=RDC2,RCL2 %s
103 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
104 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc -c 2>&1 \
105 // RUN: | FileCheck -check-prefixes=RDC2,RC2 %s
107 // RCL2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
108 // RCL2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
109 // RCL2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
110 // RCL2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
111 // RCL2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
113 // RDC2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
114 // RDC2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
115 // RDC2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
116 // RDC2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH1]])
117 // RCL2-DAG: [[P8:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH1]])
118 // RCL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
119 // RC2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, ir
121 // RDC2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
122 // RDC2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
123 // RDC2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
124 // RDC2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, ir, (device-[[T]], [[ARCH2]])
125 // RCL2-DAG: [[P15:[0-9]+]]: linker, {[[P13]]}, image, (device-[[T]], [[ARCH2]])
126 // RCL2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
127 // RC2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P13]]}, ir
129 // RC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
130 // RC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
131 // RC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
132 // RC2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
133 // RC2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
135 // RCL2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, object, (device-[[T]])
136 // RCL2-DAG: [[P22:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, object
137 // RCL2-DAG: [[P23:[0-9]+]]: linker, {[[P20]], [[P22]]}, image, (host-[[T]])
138 // RC2-DAG: [[P23:[0-9]+]]: clang-offload-bundler, {[[P9]], [[P16]], [[P20]]}, object, (host-[[T]])
141 // Test two gpu architecturess up to the assemble phase.
143 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
144 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -S 2>&1 \
145 // RUN: | FileCheck -check-prefixes=ASM2 %s
146 // ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
147 // ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
148 // ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]])
149 // ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]])
150 // ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
151 // ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
152 // ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]])
153 // ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]])
154 // ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]])
155 // ASM2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]])
158 // Test single gpu architecture with complete compilation in host-only
161 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
162 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s --cuda-host-only 2>&1 \
163 // RUN: | FileCheck -check-prefixes=HBIN %s
164 // HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
165 // HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
166 // HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
167 // HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
168 // HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
169 // HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
172 // Test single gpu architecture up to the assemble phase in host-only
175 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
176 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s --cuda-host-only -S 2>&1 \
177 // RUN: | FileCheck -check-prefixes=HASM %s
178 // HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
179 // HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
180 // HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
181 // HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
185 // Test two gpu architectures with complete compilation in host-only
188 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
189 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only 2>&1 \
190 // RUN: | FileCheck -check-prefixes=HBIN2 %s
191 // HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
192 // HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
193 // HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
194 // HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
195 // HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
196 // HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
200 // Test two gpu architectures up to the assemble phase in host-only
203 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
204 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only -S \
205 // RUN: 2>&1 | FileCheck -check-prefixes=HASM2 %s
206 // HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
207 // HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
208 // HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
209 // HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
213 // Test single gpu architecture with complete compilation in device-only
216 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
217 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \
218 // RUN: | FileCheck -check-prefixes=DBIN %s
220 // Test single gpu architecture with complete compilation in device-only
221 // compilation mode with an unused host linker flag.
223 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
224 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 %s --cuda-device-only -Wl,--disable-new-dtags 2>&1 \
225 // RUN: | FileCheck -check-prefixes=DBIN %s
227 // DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
228 // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
229 // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
230 // DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
231 // DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
232 // DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
233 // DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image
234 // DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, )
235 // DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P7]]}, hip-fatbin
239 // Test single gpu architecture up to the assemble phase in device-only
242 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
243 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -S --no-gpu-bundle-output 2>&1 \
244 // RUN: | FileCheck -check-prefixes=DASM %s
245 // DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
246 // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
247 // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
248 // DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
249 // DASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler
250 // DASM-NOT: clang-offload-bundler
254 // Test single gpu architecture with compile to relocatable in device-only
257 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
258 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
259 // RUN: | FileCheck -check-prefixes=RELOC %s
261 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
262 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable -Wl,--disable-new-dtags \
263 // RUN: 2>&1 | FileCheck -check-prefixes=RELOC %s
265 // RELOC-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
266 // RELOC-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
267 // RELOC-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
268 // RELOC-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
269 // RELOC-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
271 // RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
275 // Test two gpu architectures with compile to relocatable in device-only
278 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
279 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
280 // RUN: | FileCheck -check-prefixes=RELOC2 %s
282 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
283 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -fhip-emit-relocatable \
284 // RUN: -Wl,--disable-new-dtags 2>&1 | FileCheck -check-prefixes=RELOC2 %s
286 // RELOC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
287 // RELOC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
288 // RELOC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
289 // RELOC2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
290 // RELOC2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
291 // RELOC2-NOT: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
292 // RELOC2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
293 // RELOC2-DAG: [[P6:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH2:gfx900]])
294 // RELOC2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
295 // RELOC2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
296 // RELOC2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]])
297 // RELOC2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
298 // RELOC2-NOT: linker
299 // RELOC2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P10]]}, object
303 // Test two gpu architectures with complete compilation in device-only
306 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
307 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
308 // RUN: 2>&1 | FileCheck -check-prefixes=DBIN2 %s
310 // Test two gpu architectures with complete compilation in device-only
311 // compilation mode with an unused host linker flag.
313 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases --no-offload-new-driver \
314 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
315 // RUN: -Wl,--disable-new-dtags 2>&1 | FileCheck -check-prefixes=DBIN2 %s
317 // DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
318 // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
319 // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
320 // DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
321 // DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
322 // DBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
323 // DBIN2-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image
324 // DBIN2-DAG: [[P7:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
325 // DBIN2-DAG: [[P8:[0-9]+]]: preprocessor, {[[P7]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
326 // DBIN2-DAG: [[P9:[0-9]+]]: compiler, {[[P8]]}, ir, (device-[[T]], [[ARCH2]])
327 // DBIN2-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (device-[[T]], [[ARCH2]])
328 // DBIN2-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (device-[[T]], [[ARCH2]])
329 // DBIN2-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (device-[[T]], [[ARCH2]])
330 // DBIN2-DAG: [[P13:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P12]]}, image
331 // DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip, )
332 // DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P14]]}, hip-fatbin
336 // Test two gpu architectures up to the assemble phase in device-only
339 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
340 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
341 // RUN: --cuda-device-only -S -o %t.s 2>&1 \
342 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-NOBUNDLE %s
343 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
344 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
345 // RUN: --cuda-device-only -S -o %t.s --no-gpu-bundle-output 2>&1 \
346 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-NOBUNDLE %s
347 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
348 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
349 // RUN: --cuda-device-only -S 2>&1 \
350 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-NOBUNDLE %s
351 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
352 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
353 // RUN: --cuda-device-only -S --gpu-bundle-output 2>&1 \
354 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-BUNDLE %s
355 // DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
356 // DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
357 // DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
358 // DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
359 // DASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler
360 // DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
361 // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
362 // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
363 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
364 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, assembler
365 // DASM2-BUNDLE: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, assembler, (device-hip, )
366 // DASM2-NOBUNDLE-NOT: clang-offload-bundler, {[[P4]], [[P9]]}, assembler, (device-hip, )
370 // Test linking two objects with two gpu architectures.
372 // RUN: rm -rf %t && mkdir %t
373 // RUN: touch %t/obj1.o %t/obj2.o
375 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
376 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o 2>&1 \
377 // RUN: | FileCheck -check-prefixes=L2,NL2 %s
379 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
380 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
381 // RUN: -fgpu-rdc 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-EM %s
383 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
384 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
385 // RUN: -fgpu-rdc --cuda-device-only 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
387 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
388 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
389 // RUN: -fgpu-rdc --cuda-device-only -Wl,--disable-new-dtags 2>&1 \
390 // RUN: | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
392 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
393 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
394 // RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output 2>&1 \
395 // RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
397 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
398 // RUN: --no-offload-new-driver --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
399 // RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output -Wl,--disable-new-dtags 2>&1 \
400 // RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
402 // L2-DAG: [[P0:[0-9]+]]: input, "{{.*}}obj1.o", object
403 // RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object
404 // L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object
405 // RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object
407 // RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T:hip]], [[ARCH1:gfx803]])
408 // RL2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P4]]}, image
409 // RL2-DAG: [[P6:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH2:gfx900]])
410 // RL2-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image
411 // RL2-DEV-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, hip-fatbin, (device-[[T]])
412 // RL2-DEV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, hip-fatbin
413 // RL2-EM-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, object, (device-[[T]])
414 // RL2-EM-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, object
415 // RL2-NB-NOT: linker
416 // RL2-NB-NOT: offload
418 // NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image
419 // RL2-EM-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]])
420 // RL2-DEV-NOT: linker
423 // Test one gpu architectures up to the preprocessor expansion output phase in device-only
424 // compilation mode. no bundle.
426 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
427 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
428 // RUN: --no-offload-new-driver --cuda-device-only -E 2>&1 \
429 // RUN: | FileCheck -check-prefixes=PPE,PPEN %s
431 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
432 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
433 // RUN: --no-offload-new-driver --cuda-device-only -E --no-gpu-bundle-output 2>&1 \
434 // RUN: | FileCheck -check-prefixes=PPE,PPEN %s
436 // Test one gpu architectures up to the preprocessor expansion output phase in device-only
437 // compilation mode. bundle.
439 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
440 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
441 // RUN: --no-offload-new-driver --cuda-device-only -E --gpu-bundle-output 2>&1 \
442 // RUN: | FileCheck -check-prefixes=PPE,PPEB %s
444 // Test two gpu architectures up to the preprocessor expansion output phase in device-only
445 // compilation mode. no bundle.
447 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
448 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
449 // RUN: --no-offload-new-driver --cuda-device-only -E 2>&1 \
450 // RUN: | FileCheck -check-prefixes=PPE2,PPE2N %s
452 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
453 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
454 // RUN: --no-offload-new-driver --cuda-device-only -E --no-gpu-bundle-output 2>&1 \
455 // RUN: | FileCheck -check-prefixes=PPE2,PPE2N %s
457 // Test two gpu architectures up to the preprocessor expansion output phase in device-only
458 // compilation mode. bundle.
460 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
461 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
462 // RUN: --no-offload-new-driver --cuda-device-only -E --gpu-bundle-output 2>&1 \
463 // RUN: | FileCheck -check-prefixes=PPE2,PPE2B %s
465 // Test one gpu architectures up to the LLVM IR output phase in device-only
466 // compilation mode. no bundle.
468 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
469 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
470 // RUN: --no-offload-new-driver --cuda-device-only -c -emit-llvm 2>&1 \
471 // RUN: | FileCheck -check-prefixes=LLVM %s
473 // Test two gpu architectures up to the LLVM IR output phase in device-only
474 // compilation mode. bundle.
476 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
477 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
478 // RUN: --no-offload-new-driver --cuda-device-only -c -emit-llvm -o %t.bc --gpu-bundle-output 2>&1 \
479 // RUN: | FileCheck -check-prefixes=LLVM2 %s
481 // Test two gpu architectures up to the LLVM IR output phase in device-only
482 // compilation mode with bundled preprocessor expansion as input. bundle.
484 // RUN: %clang -x hip-cpp-output --target=x86_64-unknown-linux-gnu \
485 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
486 // RUN: --no-offload-new-driver --cuda-device-only -c -emit-llvm -o %t.bc --gpu-bundle-output 2>&1 \
487 // RUN: | FileCheck -check-prefixes=PPELLVM2 %s
489 // PPE-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
490 // PPE-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
491 // PPE-DAG: [[P2:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P1]]}, [[T]]-cpp-output
492 // PPEB-DAG: [[P3:[0-9]+]]: clang-offload-bundler, {[[P2]]}, [[T]]-cpp-output, (device-hip, )
493 // PPEN-NOT: clang-offload-bundler, {{.*}}, [[T]]-cpp-output, (device-hip, )
496 // PPE2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
497 // PPE2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
498 // PPE2-DAG: [[P2:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P1]]}, [[T]]-cpp-output
499 // PPE2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
500 // PPE2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
501 // PPE2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, [[T]]-cpp-output
502 // PPE2B-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P2]], [[P9]]}, [[T]]-cpp-output, (device-hip, )
503 // PPE2N-NOT: clang-offload-bundler, {{.*}}, [[T]]-cpp-output, (device-hip, )
506 // LLVM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
507 // LLVM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
508 // LLVM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
509 // LLVM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-[[T]], [[ARCH]])
510 // LLVM-NOT: clang-offload-bundler
513 // LLVM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
514 // LLVM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
515 // LLVM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
516 // LLVM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-[[T]], [[ARCH]])
517 // LLVM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, ir
518 // LLVM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
519 // LLVM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
520 // LLVM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
521 // LLVM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
522 // LLVM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, ir
523 // LLVM2-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, ir, (device-hip, )
526 // PPELLVM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]]-cpp-output
527 // PPELLVM2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, hip-cpp-output
528 // PPELLVM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH:gfx803]])
529 // PPELLVM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-[[T]], [[ARCH]])
530 // PPELLVM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, ir
531 // PPELLVM2-DAG: [[P7:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH2:gfx900]])
532 // PPELLVM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
533 // PPELLVM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, ir
534 // PPELLVM2-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, ir, (device-hip, )
535 // PPELLVM2-NOT: host
537 // Test mixed HIP and C++ compilation. HIP program should have HIP offload kind.
538 // C++ program should have no offload kind.
540 // Test compile empty.hip and empty.cpp.
541 // RUN: %clang --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
542 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
543 // RUN: -c %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
544 // RUN: %clang --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
545 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
546 // RUN: -c %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
548 // Test compile and link empty.hip and empty.cpp.
549 // RUN: %clang --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
550 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
551 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
552 // RUN: %clang --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
553 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
554 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
556 // Test compile and link empty.hip and empty.cpp with --hip-link -fgpu-rdc.
557 // RUN: %clang --target=x86_64-unknown-linux-gnu --hip-link -fgpu-rdc --no-offload-new-driver \
558 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
559 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
560 // RUN: %clang --target=x86_64-unknown-linux-gnu --hip-link -fgpu-rdc --no-offload-new-driver \
561 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
562 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
564 // Test compile and link -x hip empty.hip and -x c++ empty.cpp.
565 // RUN: %clang --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
566 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
567 // RUN: -x hip %S/Inputs/empty.hip -x c++ %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
568 // RUN: %clang --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
569 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
570 // RUN: -x hip %S/Inputs/empty.hip -x c++ %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
572 // Test compile and link -x hip empty.hip and empty.cpp.
573 // RUN: %clang --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
574 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
575 // RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2 %s
576 // RUN: %clang --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
577 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
578 // RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2-NEG %s
580 // Test compile and link empty.hip and -x hip empty.cpp.
581 // RUN: %clang --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
582 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
583 // RUN: %S/Inputs/empty.hip -x hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2 %s
584 // RUN: %clang --target=x86_64-unknown-linux-gnu --no-offload-new-driver \
585 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
586 // RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2-NEG %s
588 // MIXED-DAG: input, "{{.*}}empty.hip", hip, (host-hip)
589 // MIXED-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx803)
590 // MIXED-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx900)
591 // MIXED-DAG: input, "{{.*}}empty.cpp", c++
592 // MIXED-NEG-NOT: input, "{{.*}}empty.cpp", c++, (host-hip)
593 // MIXED-NEG-NOT: input, "{{.*}}empty.cpp", c++, (device-hip
595 // MIXED2-DAG: input, "{{.*}}empty.hip", hip, (host-hip)
596 // MIXED2-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx803)
597 // MIXED2-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx900)
598 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (host-hip)
599 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx803)
600 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx900)
601 // MIXED2-NEG-NOT: input, "{{.*}}empty.cpp", c++
603 // Test HIP bitcode to bitcode linking. Input should be bundled or unbundled bitcode, and
604 // output should be unbundled linked bitcode
606 // RUN: touch %t/bitcodeA.bc
607 // RUN: touch %t/bitcodeB.bc
608 // RUN: %clang -ccc-print-phases --hip-link -emit-llvm --cuda-device-only \
609 // RUN: --no-offload-new-driver --offload-arch=gfx906 %t/bitcodeA.bc %t/bitcodeB.bc 2>&1 \
610 // RUN: | FileCheck -check-prefixes=CHECK %s
612 // CHECK: [[A0:[0-9]+]]: input, "{{.*}}bitcodeA.bc", ir
613 // CHECK: [[A1:[0-9]+]]: clang-offload-unbundler, {[[A0]]}, ir
614 // CHECK: [[A2:[0-9]+]]: compiler, {[[A1]]}, ir, (device-hip, [[ARCH:gfx906]])
615 // CHECK: [[A3:[0-9]+]]: backend, {[[A2]]}, ir, (device-hip, [[ARCH]])
617 // CHECK: [[B0:[0-9]+]]: input, "{{.*}}bitcodeB.bc", ir
618 // CHECK: [[B1:[0-9]+]]: clang-offload-unbundler, {[[B0]]}, ir
619 // CHECK: [[B2:[0-9]+]]: compiler, {[[B1]]}, ir, (device-hip, [[ARCH]])
620 // CHECK: [[B3:[0-9]+]]: backend, {[[B2]]}, ir, (device-hip, [[ARCH]])
622 // CHECK: [[L0:[0-9]+]]: linker, {[[A3]], [[B3]]}, ir, (device-hip, [[ARCH]])
623 // CHECK: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[L0]]}, ir
626 // Test the bindings using the new driver in LTO-mode.
628 // RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-phases \
629 // RUN: --offload-arch=gfx90a --offload-arch=gfx908 -foffload-lto -fgpu-rdc -c %s 2>&1 \
630 // RUN: | FileCheck -check-prefix=LTO %s
631 // LTO: 0: input, "[[INPUT:.+]]", hip, (host-hip)
632 // LTO-NEXT: 1: preprocessor, {0}, hip-cpp-output, (host-hip)
633 // LTO-NEXT: 2: compiler, {1}, ir, (host-hip)
634 // LTO-NEXT: 3: input, "[[INPUT]]", hip, (device-hip, gfx908)
635 // LTO-NEXT: 4: preprocessor, {3}, hip-cpp-output, (device-hip, gfx908)
636 // LTO-NEXT: 5: compiler, {4}, ir, (device-hip, gfx908)
637 // LTO-NEXT: 6: backend, {5}, lto-bc, (device-hip, gfx908)
638 // LTO-NEXT: 7: offload, "device-hip (amdgcn-amd-amdhsa:gfx908)" {6}, lto-bc
639 // LTO-NEXT: 8: input, "[[INPUT]]", hip, (device-hip, gfx90a)
640 // LTO-NEXT: 9: preprocessor, {8}, hip-cpp-output, (device-hip, gfx90a)
641 // LTO-NEXT: 10: compiler, {9}, ir, (device-hip, gfx90a)
642 // LTO-NEXT: 11: backend, {10}, lto-bc, (device-hip, gfx90a)
643 // LTO-NEXT: 12: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {11}, lto-bc
644 // LTO-NEXT: 13: clang-offload-packager, {7, 12}, image, (device-hip)
645 // LTO-NEXT: 14: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, "device-hip (x86_64-unknown-linux-gnu)" {13}, ir
646 // LTO-NEXT: 15: backend, {14}, assembler, (host-hip)
647 // LTO-NEXT: 16: assembler, {15}, object, (host-hip)
650 // Test the new driver when not bundling
652 // RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-phases \
653 // RUN: --offload-device-only --offload-arch=gfx90a -emit-llvm -c %s 2>&1 \
654 // RUN: | FileCheck -check-prefix=DEVICE-ONLY %s
655 // DEVICE-ONLY: 0: input, "[[INPUT:.+]]", hip, (device-hip, gfx90a)
656 // DEVICE-ONLY-NEXT: 1: preprocessor, {0}, hip-cpp-output, (device-hip, gfx90a)
657 // DEVICE-ONLY-NEXT: 2: compiler, {1}, ir, (device-hip, gfx90a)
658 // DEVICE-ONLY-NEXT: 3: backend, {2}, ir, (device-hip, gfx90a)
659 // DEVICE-ONLY-NEXT: 4: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {3}, none