clang/test/Driver/hip-phases.hip

   1 // Tests the phases generated for a CUDA offloading target for different
   2 // combinations of:
   3 // - Number of gpu architectures;
   4 // - Host/device-only compilation;
   5 // - User-requested final phase - binary or assembly.
   6
   7 // REQUIRES: x86-registered-target
   8 // REQUIRES: amdgpu-registered-target
   9 //
  10 // Test single gpu architecture with complete compilation.
  11 //
  12 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
  13 // RUN: --cuda-gpu-arch=gfx803 %s 2>&1 \
  14 // RUN: | FileCheck -check-prefixes=BIN,NRD,OLD %s
  15 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
  16 // RUN: --offload-new-driver --cuda-gpu-arch=gfx803 %s 2>&1 \
  17 // RUN: | FileCheck -check-prefixes=BIN,NRD,NEW %s
  18 //
  19 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
  20 // RUN: --cuda-gpu-arch=gfx803 -fgpu-rdc %s 2>&1 \
  21 // RUN: | FileCheck -check-prefixes=BIN,RDC %s
  22 //
  23 // BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
  24 // BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
  25 // BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
  26 // RDC-DAG: [[P12:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
  27 // RDC-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
  28
  29 // BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]])
  30 // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
  31 // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
  32 // NRD-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
  33 // NRD-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
  34 // RDC-DAG: [[P7:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH]])
  35 // BIN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]])
  36 // BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image
  37 // NRD-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]])
  38 // RDC-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, object, (device-[[T]])
  39
  40 // NRD-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir
  41 // RDC-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, object
  42 // NRD-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
  43 // NRD-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
  44 // OLD-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
  45 // NEW-DAG: [[P14:[0-9]+]]: clang-linker-wrapper, {[[P13]]}, image, (host-[[T]])
  46 // RDC-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P11]]}, image, (host-[[T]])
  47
  48 //
  49 // Test single gpu architecture up to the assemble phase.
  50 //
  51 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
  52 // RUN: --cuda-gpu-arch=gfx803 %s -S 2>&1 \
  53 // RUN: | FileCheck -check-prefixes=ASM %s
  54 // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
  55 // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
  56 // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
  57
  58 // ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]])
  59 // ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]])
  60 // ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]])
  61 // ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]])
  62
  63 //
  64 // Test two gpu architectures with complete compilation with -fno-gpu-rdc.
  65 //
  66 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
  67 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s 2>&1 \
  68 // RUN: | FileCheck -check-prefixes=NRD2,NCL2 %s
  69
  70 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
  71 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -c 2>&1 \
  72 // RUN: | FileCheck -check-prefixes=NRD2 %s
  73
  74 // NRD2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
  75 // NRD2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
  76 // NRD2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
  77
  78 // NRD2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]])
  79 // NRD2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
  80 // NRD2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
  81 // NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]])
  82 // NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]])
  83 // NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]])
  84 // NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
  85
  86 // NRD2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
  87 // NRD2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
  88 // NRD2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
  89 // NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]])
  90 // NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]])
  91 // NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]])
  92 // NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
  93 // NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-[[T]])
  94 // NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir
  95 // NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
  96 // NRD2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
  97 // NCL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]])
  98
  99 //
 100 // Test two gpu architectures with complete compilation with -fgpu-rdc.
 101 //
 102 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 103 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc 2>&1 \
 104 // RUN: | FileCheck -check-prefixes=RDC2,RCL2 %s
 105
 106 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 107 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -fgpu-rdc -c 2>&1 \
 108 // RUN: | FileCheck -check-prefixes=RDC2,RC2 %s
 109
 110 // RCL2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
 111 // RCL2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
 112 // RCL2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 113 // RCL2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 114 // RCL2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
 115
 116 // RDC2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
 117 // RDC2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
 118 // RDC2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
 119 // RDC2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, ir, (device-[[T]], [[ARCH1]])
 120 // RCL2-DAG: [[P8:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH1]])
 121 // RCL2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image
 122 // RC2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, ir
 123
 124 // RDC2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
 125 // RDC2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
 126 // RDC2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
 127 // RDC2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, ir, (device-[[T]], [[ARCH2]])
 128 // RCL2-DAG: [[P15:[0-9]+]]: linker, {[[P13]]}, image, (device-[[T]], [[ARCH2]])
 129 // RCL2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image
 130 // RC2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P13]]}, ir
 131
 132 // RC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
 133 // RC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
 134 // RC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 135 // RC2-DAG: [[P19:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 136 // RC2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
 137
 138 // RCL2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, object, (device-[[T]])
 139 // RCL2-DAG: [[P22:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, object
 140 // RCL2-DAG: [[P23:[0-9]+]]: linker, {[[P20]], [[P22]]}, image, (host-[[T]])
 141 // RC2-DAG: [[P23:[0-9]+]]: clang-offload-bundler, {[[P9]], [[P16]], [[P20]]}, object, (host-[[T]])
 142
 143 //
 144 // Test two gpu architecturess up to the assemble phase.
 145 //
 146 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 147 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s -S 2>&1 \
 148 // RUN: | FileCheck -check-prefixes=ASM2 %s
 149 // ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH1:gfx803]])
 150 // ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
 151 // ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]])
 152 // ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:sm_35|gfx900]])
 153 // ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
 154 // ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
 155 // ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (host-[[T]])
 156 // ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]])
 157 // ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]])
 158 // ASM2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]])
 159
 160 //
 161 // Test single gpu architecture with complete compilation in host-only
 162 // compilation mode.
 163 //
 164 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 165 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only 2>&1 \
 166 // RUN: | FileCheck -check-prefixes=HBIN %s
 167 // HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
 168 // HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
 169 // HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 170 // HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 171 // HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
 172 // HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
 173 // HBIN-NOT: device
 174 //
 175 // Test single gpu architecture up to the assemble phase in host-only
 176 // compilation mode.
 177 //
 178 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 179 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-host-only -S 2>&1 \
 180 // RUN: | FileCheck -check-prefixes=HASM %s
 181 // HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
 182 // HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
 183 // HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 184 // HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 185 // HASM-NOT: device
 186
 187 //
 188 // Test two gpu architectures with complete compilation in host-only
 189 // compilation mode.
 190 //
 191 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 192 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only 2>&1 \
 193 // RUN: | FileCheck -check-prefixes=HBIN2 %s
 194 // HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
 195 // HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
 196 // HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 197 // HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 198 // HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
 199 // HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
 200 // HBIN2-NOT: device
 201
 202 //
 203 // Test two gpu architectures up to the assemble phase in host-only
 204 // compilation mode.
 205 //
 206 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 207 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-host-only -S \
 208 // RUN: 2>&1 | FileCheck -check-prefixes=HASM2 %s
 209 // HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (host-[[T]])
 210 // HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
 211 // HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 212 // HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 213 // HASM2-NOT: device
 214
 215 //
 216 // Test single gpu architecture with complete compilation in device-only
 217 // compilation mode.
 218 //
 219 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 220 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only 2>&1 \
 221 // RUN: | FileCheck -check-prefixes=DBIN %s
 222 //
 223 // Test single gpu architecture with complete compilation in device-only
 224 // compilation mode with an unused host linker flag.
 225 //
 226 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 227 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -Wl,--disable-new-dtags 2>&1 \
 228 // RUN: | FileCheck -check-prefixes=DBIN %s
 229
 230 // DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 231 // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 232 // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 233 // DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 234 // DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
 235 // DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
 236 // DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image
 237 // DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, )
 238 // DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P7]]}, hip-fatbin
 239 // DBIN-NOT: host
 240
 241 //
 242 // Test single gpu architecture up to the assemble phase in device-only
 243 // compilation mode.
 244 //
 245 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 246 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -S --no-gpu-bundle-output 2>&1 \
 247 // RUN: | FileCheck -check-prefixes=DASM %s
 248 // DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 249 // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 250 // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 251 // DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 252 // DASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler
 253 // DASM-NOT: clang-offload-bundler
 254 // DASM-NOT: host
 255
 256 //
 257 // Test single gpu architecture with compile to relocatable in device-only
 258 // compilation mode.
 259 //
 260 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 261 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
 262 // RUN: | FileCheck -check-prefixes=RELOC %s
 263 //
 264 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 265 // RUN: --cuda-gpu-arch=gfx803 %s --cuda-device-only -fhip-emit-relocatable -Wl,--disable-new-dtags \
 266 // RUN: 2>&1 | FileCheck -check-prefixes=RELOC %s
 267 //
 268 // RELOC-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 269 // RELOC-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 270 // RELOC-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 271 // RELOC-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 272 // RELOC-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
 273 // RELOC-NOT: linker
 274 // RELOC-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
 275 // RELOC-NOT: host
 276
 277 //
 278 // Test two gpu architectures with compile to relocatable in device-only
 279 // compilation mode.
 280 //
 281 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 282 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -fhip-emit-relocatable 2>&1 \
 283 // RUN: | FileCheck -check-prefixes=RELOC2 %s
 284 //
 285 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 286 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only -fhip-emit-relocatable \
 287 // RUN: -Wl,--disable-new-dtags 2>&1 | FileCheck -check-prefixes=RELOC2 %s
 288 //
 289 // RELOC2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 290 // RELOC2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 291 // RELOC2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 292 // RELOC2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 293 // RELOC2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
 294 // RELOC2-NOT: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
 295 // RELOC2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P4]]}, object
 296 // RELOC2-DAG: [[P6:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH2:gfx900]])
 297 // RELOC2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
 298 // RELOC2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
 299 // RELOC2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]])
 300 // RELOC2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
 301 // RELOC2-NOT: linker
 302 // RELOC2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P10]]}, object
 303 // RELOC2-NOT: host
 304
 305 //
 306 // Test two gpu architectures with complete compilation in device-only
 307 // compilation mode.
 308 //
 309 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 310 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
 311 // RUN: 2>&1 | FileCheck -check-prefixes=DBIN2 %s
 312 //
 313 // Test two gpu architectures with complete compilation in device-only
 314 // compilation mode with an unused host linker flag.
 315 //
 316 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu -ccc-print-phases \
 317 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s --cuda-device-only \
 318 // RUN: -Wl,--disable-new-dtags 2>&1 | FileCheck -check-prefixes=DBIN2 %s
 319
 320 // DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 321 // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 322 // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 323 // DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 324 // DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
 325 // DBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]])
 326 // DBIN2-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image
 327 // DBIN2-DAG: [[P7:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
 328 // DBIN2-DAG: [[P8:[0-9]+]]: preprocessor, {[[P7]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
 329 // DBIN2-DAG: [[P9:[0-9]+]]: compiler, {[[P8]]}, ir, (device-[[T]], [[ARCH2]])
 330 // DBIN2-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (device-[[T]], [[ARCH2]])
 331 // DBIN2-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (device-[[T]], [[ARCH2]])
 332 // DBIN2-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (device-[[T]], [[ARCH2]])
 333 // DBIN2-DAG: [[P13:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P12]]}, image
 334 // DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip, )
 335 // DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P14]]}, hip-fatbin
 336 // DBIN2-NOT: host
 337
 338 //
 339 // Test two gpu architectures up to the assemble phase in device-only
 340 // compilation mode.
 341 //
 342 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 343 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 344 // RUN: --cuda-device-only -S -o %t.s 2>&1 \
 345 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-NOBUNDLE %s
 346 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 347 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 348 // RUN: --cuda-device-only -S -o %t.s --no-gpu-bundle-output 2>&1 \
 349 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-NOBUNDLE %s
 350 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 351 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 352 // RUN: --cuda-device-only -S 2>&1 \
 353 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-NOBUNDLE %s
 354 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 355 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 356 // RUN: --cuda-device-only -S --gpu-bundle-output 2>&1 \
 357 // RUN: | FileCheck -check-prefixes=DASM2,DASM2-BUNDLE %s
 358 // DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 359 // DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 360 // DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 361 // DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 362 // DASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, assembler
 363 // DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
 364 // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
 365 // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
 366 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
 367 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, assembler
 368 // DASM2-BUNDLE: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, assembler, (device-hip, )
 369 // DASM2-NOBUNDLE-NOT: clang-offload-bundler, {[[P4]], [[P9]]}, assembler, (device-hip, )
 370 // DASM2-NOT: host
 371
 372 //
 373 // Test linking two objects with two gpu architectures.
 374 //
 375 // RUN: rm -rf %t && mkdir %t
 376 // RUN: touch %t/obj1.o %t/obj2.o
 377
 378 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
 379 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o 2>&1 \
 380 // RUN: | FileCheck -check-prefixes=L2,NL2 %s
 381 //
 382 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
 383 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
 384 // RUN: -fgpu-rdc 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-EM %s
 385 //
 386 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
 387 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
 388 // RUN: -fgpu-rdc --cuda-device-only 2>&1 | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
 389
 390 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
 391 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
 392 // RUN: -fgpu-rdc --cuda-device-only -Wl,--disable-new-dtags 2>&1 \
 393 // RUN: | FileCheck -check-prefixes=L2,RL2,RL2-DEV %s
 394
 395 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
 396 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
 397 // RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output 2>&1 \
 398 // RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
 399
 400 // RUN: %clang --target=x86_64-unknown-linux-gnu -ccc-print-phases --hip-link \
 401 // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %t/obj1.o %t/obj2.o \
 402 // RUN: -fgpu-rdc --cuda-device-only --no-gpu-bundle-output -Wl,--disable-new-dtags 2>&1 \
 403 // RUN: | FileCheck -check-prefixes=L2,RL2,RL2-NB %s
 404
 405 // L2-DAG: [[P0:[0-9]+]]: input, "{{.*}}obj1.o", object
 406 // RL2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, object
 407 // L2-DAG: [[P2:[0-9]+]]: input, "{{.*}}obj2.o", object
 408 // RL2-DAG: [[P3:[0-9]+]]: clang-offload-unbundler, {[[P2]]}, object
 409
 410 // RL2-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T:hip]], [[ARCH1:gfx803]])
 411 // RL2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P4]]}, image
 412 // RL2-DAG: [[P6:[0-9]+]]: linker, {[[P1]], [[P3]]}, image, (device-[[T]], [[ARCH2:gfx900]])
 413 // RL2-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, image
 414 // RL2-DEV-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, hip-fatbin, (device-[[T]])
 415 // RL2-DEV-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, hip-fatbin
 416 // RL2-EM-DAG: [[P8:[0-9]+]]: linker, {[[P5]], [[P7]]}, object, (device-[[T]])
 417 // RL2-EM-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, object
 418 // RL2-NB-NOT: linker
 419 // RL2-NB-NOT: offload
 420
 421 // NL2-DAG: [[P4:[0-9]+]]: linker, {[[P0]], [[P2]]}, image
 422 // RL2-EM-DAG: [[P4:[0-9]+]]: linker, {[[P1]], [[P3]], [[P9]]}, image, (host-[[T]])
 423 // RL2-DEV-NOT: linker
 424 // RL2-NB-NOT: host
 425
 426 // Test one gpu architectures up to the preprocessor expansion output phase in device-only
 427 // compilation mode. no bundle.
 428 //
 429 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 430 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
 431 // RUN: --cuda-device-only -E 2>&1 \
 432 // RUN: | FileCheck -check-prefixes=PPE,PPEN %s
 433
 434 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 435 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
 436 // RUN: --cuda-device-only -E --no-gpu-bundle-output 2>&1 \
 437 // RUN: | FileCheck -check-prefixes=PPE,PPEN %s
 438
 439 // Test one gpu architectures up to the preprocessor expansion output phase in device-only
 440 // compilation mode. bundle.
 441
 442 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 443 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
 444 // RUN: --cuda-device-only -E --gpu-bundle-output 2>&1 \
 445 // RUN: | FileCheck -check-prefixes=PPE,PPEB %s
 446
 447 // Test two gpu architectures up to the preprocessor expansion output phase in device-only
 448 // compilation mode. no bundle.
 449
 450 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 451 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 452 // RUN: --cuda-device-only -E 2>&1 \
 453 // RUN: | FileCheck -check-prefixes=PPE2,PPE2N %s
 454
 455 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 456 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 457 // RUN: --cuda-device-only -E --no-gpu-bundle-output 2>&1 \
 458 // RUN: | FileCheck -check-prefixes=PPE2,PPE2N %s
 459
 460 // Test two gpu architectures up to the preprocessor expansion output phase in device-only
 461 // compilation mode. bundle.
 462
 463 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 464 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 465 // RUN: --cuda-device-only -E --gpu-bundle-output 2>&1 \
 466 // RUN: | FileCheck -check-prefixes=PPE2,PPE2B %s
 467
 468 // Test one gpu architectures up to the LLVM IR output phase in device-only
 469 // compilation mode. no bundle.
 470 //
 471 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 472 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 %s \
 473 // RUN: --cuda-device-only -c -emit-llvm 2>&1 \
 474 // RUN: | FileCheck -check-prefixes=LLVM %s
 475
 476 // Test two gpu architectures up to the LLVM IR output phase in device-only
 477 // compilation mode. bundle.
 478 //
 479 // RUN: %clang -x hip --target=x86_64-unknown-linux-gnu \
 480 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 481 // RUN: --cuda-device-only -c -emit-llvm -o %t.bc --gpu-bundle-output 2>&1 \
 482 // RUN: | FileCheck -check-prefixes=LLVM2 %s
 483
 484 // Test two gpu architectures up to the LLVM IR output phase in device-only
 485 // compilation mode with bundled preprocessor expansion as input. bundle.
 486 //
 487 // RUN: %clang -x hip-cpp-output --target=x86_64-unknown-linux-gnu \
 488 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \
 489 // RUN: --cuda-device-only -c -emit-llvm -o %t.bc --gpu-bundle-output 2>&1 \
 490 // RUN: | FileCheck -check-prefixes=PPELLVM2 %s
 491
 492 // PPE-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 493 // PPE-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 494 // PPE-DAG: [[P2:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P1]]}, [[T]]-cpp-output
 495 // PPEB-DAG: [[P3:[0-9]+]]: clang-offload-bundler, {[[P2]]}, [[T]]-cpp-output, (device-hip, )
 496 // PPEN-NOT: clang-offload-bundler, {{.*}}, [[T]]-cpp-output, (device-hip, )
 497 // PPE-NOT: host
 498
 499 // PPE2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 500 // PPE2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 501 // PPE2-DAG: [[P2:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P1]]}, [[T]]-cpp-output
 502 // PPE2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
 503 // PPE2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
 504 // PPE2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, [[T]]-cpp-output
 505 // PPE2B-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P2]], [[P9]]}, [[T]]-cpp-output, (device-hip, )
 506 // PPE2N-NOT: clang-offload-bundler, {{.*}}, [[T]]-cpp-output, (device-hip, )
 507 // PPE2-NOT: host
 508
 509 // LLVM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 510 // LLVM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 511 // LLVM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 512 // LLVM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-[[T]], [[ARCH]])
 513 // LLVM-NOT: clang-offload-bundler
 514 // LLVM-NOT: host
 515
 516 // LLVM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]])
 517 // LLVM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 518 // LLVM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 519 // LLVM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-[[T]], [[ARCH]])
 520 // LLVM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, ir
 521 // LLVM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]])
 522 // LLVM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
 523 // LLVM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
 524 // LLVM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
 525 // LLVM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, ir
 526 // LLVM2-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, ir, (device-hip, )
 527 // LLVM2-NOT: host
 528
 529 // PPELLVM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]]-cpp-output
 530 // PPELLVM2-DAG: [[P1:[0-9]+]]: clang-offload-unbundler, {[[P0]]}, hip-cpp-output
 531 // PPELLVM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH:gfx803]])
 532 // PPELLVM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, ir, (device-[[T]], [[ARCH]])
 533 // PPELLVM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, ir
 534 // PPELLVM2-DAG: [[P7:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH2:gfx900]])
 535 // PPELLVM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
 536 // PPELLVM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, ir
 537 // PPELLVM2-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, ir, (device-hip, )
 538 // PPELLVM2-NOT: host
 539
 540 // Test mixed HIP and C++ compilation. HIP program should have HIP offload kind.
 541 // C++ program should have no offload kind.
 542
 543 // Test compile empty.hip and empty.cpp.
 544 // RUN: %clang --target=x86_64-unknown-linux-gnu \
 545 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 546 // RUN: -c %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
 547 // RUN: %clang --target=x86_64-unknown-linux-gnu \
 548 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 549 // RUN: -c %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
 550
 551 // Test compile and link empty.hip and empty.cpp.
 552 // RUN: %clang --target=x86_64-unknown-linux-gnu \
 553 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 554 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
 555 // RUN: %clang --target=x86_64-unknown-linux-gnu \
 556 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 557 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
 558
 559 // Test compile and link empty.hip and empty.cpp with --hip-link -fgpu-rdc.
 560 // RUN: %clang --target=x86_64-unknown-linux-gnu --hip-link -fgpu-rdc \
 561 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 562 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
 563 // RUN: %clang --target=x86_64-unknown-linux-gnu --hip-link -fgpu-rdc \
 564 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 565 // RUN: %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
 566
 567 // Test compile and link -x hip empty.hip and -x c++ empty.cpp.
 568 // RUN: %clang --target=x86_64-unknown-linux-gnu \
 569 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 570 // RUN: -x hip %S/Inputs/empty.hip -x c++ %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED %s
 571 // RUN: %clang --target=x86_64-unknown-linux-gnu \
 572 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 573 // RUN: -x hip %S/Inputs/empty.hip -x c++ %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED-NEG %s
 574
 575 // Test compile and link -x hip empty.hip and empty.cpp.
 576 // RUN: %clang --target=x86_64-unknown-linux-gnu \
 577 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 578 // RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2 %s
 579 // RUN: %clang --target=x86_64-unknown-linux-gnu \
 580 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 581 // RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2-NEG %s
 582
 583 // Test compile and link empty.hip and -x hip empty.cpp.
 584 // RUN: %clang --target=x86_64-unknown-linux-gnu \
 585 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 586 // RUN: %S/Inputs/empty.hip -x hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2 %s
 587 // RUN: %clang --target=x86_64-unknown-linux-gnu \
 588 // RUN: -ccc-print-phases --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 \
 589 // RUN: -x hip %S/Inputs/empty.hip %S/Inputs/empty.cpp 2>&1 | FileCheck -check-prefixes=MIXED2-NEG %s
 590
 591 // MIXED-DAG: input, "{{.*}}empty.hip", hip, (host-hip)
 592 // MIXED-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx803)
 593 // MIXED-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx900)
 594 // MIXED-DAG: input, "{{.*}}empty.cpp", c++
 595 // MIXED-NEG-NOT: input, "{{.*}}empty.cpp", c++, (host-hip)
 596 // MIXED-NEG-NOT: input, "{{.*}}empty.cpp", c++, (device-hip
 597
 598 // MIXED2-DAG: input, "{{.*}}empty.hip", hip, (host-hip)
 599 // MIXED2-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx803)
 600 // MIXED2-DAG: input, "{{.*}}empty.hip", hip, (device-hip, gfx900)
 601 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (host-hip)
 602 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx803)
 603 // MIXED2-DAG: input, "{{.*}}empty.cpp", hip, (device-hip, gfx900)
 604 // MIXED2-NEG-NOT: input, "{{.*}}empty.cpp", c++
 605
 606 // Test HIP bitcode to bitcode linking. Input should be bundled or unbundled bitcode, and
 607 // output should be unbundled linked bitcode
 608
 609 // RUN: touch %t/bitcodeA.bc
 610 // RUN: touch %t/bitcodeB.bc
 611 // RUN: %clang -ccc-print-phases --hip-link -emit-llvm --cuda-device-only \
 612 // RUN: --offload-arch=gfx906 %t/bitcodeA.bc %t/bitcodeB.bc 2>&1 \
 613 // RUN: | FileCheck -check-prefixes=CHECK %s
 614
 615 // CHECK: [[A0:[0-9]+]]: input, "{{.*}}bitcodeA.bc", ir
 616 // CHECK: [[A1:[0-9]+]]: clang-offload-unbundler, {[[A0]]}, ir
 617 // CHECK: [[A2:[0-9]+]]: compiler, {[[A1]]}, ir, (device-hip, [[ARCH:gfx906]])
 618 // CHECK: [[A3:[0-9]+]]: backend, {[[A2]]}, ir, (device-hip, [[ARCH]])
 619
 620 // CHECK: [[B0:[0-9]+]]: input, "{{.*}}bitcodeB.bc", ir
 621 // CHECK: [[B1:[0-9]+]]: clang-offload-unbundler, {[[B0]]}, ir
 622 // CHECK: [[B2:[0-9]+]]: compiler, {[[B1]]}, ir, (device-hip, [[ARCH]])
 623 // CHECK: [[B3:[0-9]+]]: backend, {[[B2]]}, ir, (device-hip, [[ARCH]])
 624
 625 // CHECK: [[L0:[0-9]+]]: linker, {[[A3]], [[B3]]}, ir, (device-hip, [[ARCH]])
 626 // CHECK: offload, "device-hip (amdgcn-amd-amdhsa:[[ARCH]])" {[[L0]]}, ir
 627
 628 //
 629 // Test the bindings using the new driver in LTO-mode.
 630 //
 631 // RUN: %clang -### --target=x86_64-linux-gnu --offload-new-driver -ccc-print-phases \
 632 // RUN:        --offload-arch=gfx90a --offload-arch=gfx908 -foffload-lto -fgpu-rdc -c %s 2>&1 \
 633 // RUN: | FileCheck -check-prefix=LTO %s
 634 //      LTO: 0: input, "[[INPUT:.+]]", hip, (host-hip)
 635 // LTO-NEXT: 1: preprocessor, {0}, hip-cpp-output, (host-hip)
 636 // LTO-NEXT: 2: compiler, {1}, ir, (host-hip)
 637 // LTO-NEXT: 3: input, "[[INPUT]]", hip, (device-hip, gfx908)
 638 // LTO-NEXT: 4: preprocessor, {3}, hip-cpp-output, (device-hip, gfx908)
 639 // LTO-NEXT: 5: compiler, {4}, ir, (device-hip, gfx908)
 640 // LTO-NEXT: 6: backend, {5}, lto-bc, (device-hip, gfx908)
 641 // LTO-NEXT: 7: offload, "device-hip (amdgcn-amd-amdhsa:gfx908)" {6}, lto-bc
 642 // LTO-NEXT: 8: input, "[[INPUT]]", hip, (device-hip, gfx90a)
 643 // LTO-NEXT: 9: preprocessor, {8}, hip-cpp-output, (device-hip, gfx90a)
 644 // LTO-NEXT: 10: compiler, {9}, ir, (device-hip, gfx90a)
 645 // LTO-NEXT: 11: backend, {10}, lto-bc, (device-hip, gfx90a)
 646 // LTO-NEXT: 12: offload, "device-hip (amdgcn-amd-amdhsa:gfx90a)" {11}, lto-bc
 647 // LTO-NEXT: 13: clang-offload-packager, {7, 12}, image, (device-hip)
 648 // LTO-NEXT: 14: offload, "host-hip (x86_64-unknown-linux-gnu)" {2}, "device-hip (x86_64-unknown-linux-gnu)" {13}, ir
 649 // LTO-NEXT: 15: backend, {14}, assembler, (host-hip)
 650 // LTO-NEXT: 16: assembler, {15}, object, (host-hip)