clang/test/Driver/cuda-phases.cu

   1 // Tests the phases generated for a CUDA offloading target for different
   2 // combinations of:
   3 // - Number of gpu architectures;
   4 // - Host/device-only compilation;
   5 // - User-requested final phase - binary or assembly.
   6
   7 // Test single gpu architecture with complete compilation.
   8 //
   9 // Test CUDA NVPTX phases.
  10 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
  11 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 %s 2>&1 \
  12 // RUN: | FileCheck -check-prefixes=BIN %s
  13 //
  14 // BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
  15 // BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
  16 // BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
  17 // BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:sm_30]])
  18 // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
  19 // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]])
  20 // BIN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]])
  21 // BIN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]])
  22 // BIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P7]]}, object
  23 // BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH]])" {[[P6]]}, assembler
  24 // BIN-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-[[T]])
  25 // BIN-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P10]]}, ir
  26 // BIN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]])
  27 // BIN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]])
  28 // BIN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]])
  29
  30 //
  31 // Test single gpu architecture up to the assemble phase.
  32 //
  33 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
  34 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 %s -S 2>&1 \
  35 // RUN: | FileCheck -check-prefixes=ASM %s
  36 // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
  37 // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
  38 // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
  39 // ASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
  40 // ASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler
  41 // ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]])
  42 // ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]])
  43 // ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]])
  44 // ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]])
  45
  46 //
  47 // Test two gpu architectures with complete compilation.
  48 //
  49 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
  50 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
  51 // RUN: | FileCheck -check-prefixes=BIN2 %s
  52 // BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
  53 // BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
  54 // BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
  55 // BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH1:sm_30]])
  56 // BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
  57 // BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]])
  58 // BIN2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]])
  59 // BIN2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]])
  60 // BIN2-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P7]]}, object
  61 // BIN2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH1]])" {[[P6]]}, assembler
  62 // BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]])
  63 // BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
  64 // BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]])
  65 // BIN2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]])
  66 // BIN2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]])
  67 // BIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P14]]}, object
  68 // BIN2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P13]]}, assembler
  69 // BIN2-DAG: [[P17:[0-9]+]]: linker, {[[P8]], [[P9]], [[P15]], [[P16]]}, cuda-fatbin, (device-[[T]])
  70 // BIN2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P17]]}, ir
  71 // BIN2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]])
  72 // BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]])
  73 // BIN2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]])
  74
  75 //
  76 // Test two gpu architecturess up to the assemble phase.
  77 //
  78 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
  79 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
  80 // RUN: | FileCheck -check-prefixes=ASM2 %s
  81 // ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]])
  82 // ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]])
  83 // ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]])
  84 // ASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH1]])
  85 // ASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P3]]}, assembler
  86 // ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]])
  87 // ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
  88 // ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
  89 // ASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
  90 // ASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
  91 // ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]])
  92 // ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]])
  93 // ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]])
  94 // ASM2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]])
  95
  96 //
  97 // Test single gpu architecture with complete compilation in host-only
  98 // compilation mode.
  99 //
 100 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
 101 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \
 102 // RUN: | FileCheck -check-prefixes=HBIN %s
 103 // HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
 104 // HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
 105 // HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 106 // HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 107 // HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
 108 // HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
 109 // HBIN-NOT: device
 110 //
 111 // Test single gpu architecture up to the assemble phase in host-only
 112 // compilation mode.
 113 //
 114 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
 115 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \
 116 // RUN: | FileCheck -check-prefixes=HASM %s
 117 // HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
 118 // HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
 119 // HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 120 // HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 121 // HASM-NOT: device
 122
 123 //
 124 // Test two gpu architectures with complete compilation in host-only
 125 // compilation mode.
 126 //
 127 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
 128 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \
 129 // RUN: | FileCheck -check-prefixes=HBIN2 %s
 130 // HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
 131 // HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
 132 // HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 133 // HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 134 // HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]])
 135 // HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]])
 136 // HBIN2-NOT: device
 137
 138 //
 139 // Test two gpu architectures up to the assemble phase in host-only
 140 // compilation mode.
 141 //
 142 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
 143 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S \
 144 // RUN: 2>&1 | FileCheck -check-prefixes=HASM2 %s
 145 // HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]])
 146 // HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]])
 147 // HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]])
 148 // HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]])
 149 // HASM2-NOT: device
 150
 151 //
 152 // Test single gpu architecture with complete compilation in device-only
 153 // compilation mode.
 154 //
 155 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
 156 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \
 157 // RUN: | FileCheck -check-prefixes=DBIN %s
 158 // DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
 159 // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 160 // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 161 // DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 162 // DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
 163 // DBIN-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object
 164 // DBIN-NOT: host
 165 //
 166 // Test single gpu architecture up to the assemble phase in device-only
 167 // compilation mode.
 168 //
 169 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
 170 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \
 171 // RUN: | FileCheck -check-prefixes=DASM %s
 172 // DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
 173 // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 174 // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 175 // DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 176 // DASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler
 177 // DASM-NOT: host
 178
 179 //
 180 // Test two gpu architectures with complete compilation in device-only
 181 // compilation mode.
 182 //
 183 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
 184 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
 185 // RUN: | FileCheck -check-prefixes=DBIN2 %s
 186 // DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
 187 // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 188 // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 189 // DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 190 // DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]])
 191 // DBIN2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P4]]}, object
 192 // DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]])
 193 // DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
 194 // DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]])
 195 // DBIN2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]])
 196 // DBIN2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]])
 197 // DBIN2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object
 198 // DBIN2-NOT: host
 199 //
 200 // Test two gpu architectures up to the assemble phase in device-only
 201 // compilation mode.
 202 //
 203 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \
 204 // RUN:   --no-offload-new-driver --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S \
 205 // RUN: 2>&1 | FileCheck -check-prefixes=DASM2 %s
 206 // DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]])
 207 // DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]])
 208 // DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]])
 209 // DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]])
 210 // DASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler
 211 // DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]])
 212 // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]])
 213 // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]])
 214 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]])
 215 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler
 216 // DASM2-NOT: host
 217
 218 //
 219 // Test the phases generated when using the new offloading driver.
 220 //
 221 // RUN: %clang -### --target=powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver -fgpu-rdc \
 222 // RUN:   --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER-RDC %s
 223 //      NEW-DRIVER-RDC: 0: input, "[[INPUT:.+]]", cuda
 224 // NEW-DRIVER-RDC-NEXT: 1: preprocessor, {0}, cuda-cpp-output
 225 // NEW-DRIVER-RDC-NEXT: 2: compiler, {1}, ir
 226 // NEW-DRIVER-RDC-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
 227 // NEW-DRIVER-RDC-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
 228 // NEW-DRIVER-RDC-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
 229 // NEW-DRIVER-RDC-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
 230 // NEW-DRIVER-RDC-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
 231 // NEW-DRIVER-RDC-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, object
 232 // NEW-DRIVER-RDC-NEXT: 9: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
 233 // NEW-DRIVER-RDC-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
 234 // NEW-DRIVER-RDC-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
 235 // NEW-DRIVER-RDC-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
 236 // NEW-DRIVER-RDC-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
 237 // NEW-DRIVER-RDC-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, object
 238 // NEW-DRIVER-RDC-NEXT: 15: clang-offload-packager, {8, 14}, image, (device-cuda)
 239 // NEW-DRIVER-RDC-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {15}, ir
 240 // NEW-DRIVER-RDC-NEXT: 17: backend, {16}, assembler, (host-cuda)
 241 // NEW-DRIVER-RDC-NEXT: 18: assembler, {17}, object, (host-cuda)
 242 // NEW-DRIVER-RDC-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda)
 243
 244 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
 245 // RUN:   --offload-arch=sm_52 --offload-arch=sm_70 %s 2>&1 | FileCheck --check-prefix=NEW-DRIVER %s
 246 //      NEW-DRIVER: 0: input, "[[CUDA:.+]]", cuda, (host-cuda)
 247 // NEW-DRIVER-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
 248 // NEW-DRIVER-NEXT: 2: compiler, {1}, ir, (host-cuda)
 249 // NEW-DRIVER-NEXT: 3: input, "[[CUDA]]", cuda, (device-cuda, sm_52)
 250 // NEW-DRIVER-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
 251 // NEW-DRIVER-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
 252 // NEW-DRIVER-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
 253 // NEW-DRIVER-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
 254 // NEW-DRIVER-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object
 255 // NEW-DRIVER-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70)
 256 // NEW-DRIVER-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
 257 // NEW-DRIVER-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
 258 // NEW-DRIVER-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
 259 // NEW-DRIVER-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
 260 // NEW-DRIVER-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object
 261 // NEW-DRIVER-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda)
 262 // NEW-DRIVER-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir
 263 // NEW-DRIVER-NEXT: 17: backend, {16}, assembler, (host-cuda)
 264 // NEW-DRIVER-NEXT: 18: assembler, {17}, object, (host-cuda)
 265 // NEW-DRIVER-NEXT: 19: clang-linker-wrapper, {18}, image, (host-cuda)
 266
 267 // RUN: %clang -### --target=powerpc64le-ibm-linux-gnu -ccc-print-phases --offload-new-driver \
 268 // RUN:   --offload-arch=sm_52 --offload-arch=sm_70 %s %S/Inputs/empty.cpp 2>&1 | FileCheck --check-prefix=NON-CUDA-INPUT %s
 269
 270 //      NON-CUDA-INPUT: 0: input, "[[CUDA:.+]]", cuda, (host-cuda)
 271 // NON-CUDA-INPUT-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
 272 // NON-CUDA-INPUT-NEXT: 2: compiler, {1}, ir, (host-cuda)
 273 // NON-CUDA-INPUT-NEXT: 3: input, "[[CUDA]]", cuda, (device-cuda, sm_52)
 274 // NON-CUDA-INPUT-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
 275 // NON-CUDA-INPUT-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
 276 // NON-CUDA-INPUT-NEXT: 6: backend, {5}, assembler, (device-cuda, sm_52)
 277 // NON-CUDA-INPUT-NEXT: 7: assembler, {6}, object, (device-cuda, sm_52)
 278 // NON-CUDA-INPUT-NEXT: 8: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {7}, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, object
 279 // NON-CUDA-INPUT-NEXT: 9: input, "[[CUDA]]", cuda, (device-cuda, sm_70)
 280 // NON-CUDA-INPUT-NEXT: 10: preprocessor, {9}, cuda-cpp-output, (device-cuda, sm_70)
 281 // NON-CUDA-INPUT-NEXT: 11: compiler, {10}, ir, (device-cuda, sm_70)
 282 // NON-CUDA-INPUT-NEXT: 12: backend, {11}, assembler, (device-cuda, sm_70)
 283 // NON-CUDA-INPUT-NEXT: 13: assembler, {12}, object, (device-cuda, sm_70)
 284 // NON-CUDA-INPUT-NEXT: 14: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {13}, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {12}, object
 285 // NON-CUDA-INPUT-NEXT: 15: linker, {8, 14}, cuda-fatbin, (device-cuda)
 286 // NON-CUDA-INPUT-NEXT: 16: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (nvptx64-nvidia-cuda)" {15}, ir
 287 // NON-CUDA-INPUT-NEXT: 17: backend, {16}, assembler, (host-cuda)
 288 // NON-CUDA-INPUT-NEXT: 18: assembler, {17}, object, (host-cuda)
 289 // NON-CUDA-INPUT-NEXT: 19: input, "[[CPP:.+]]", c++, (host-cuda)
 290 // NON-CUDA-INPUT-NEXT: 20: preprocessor, {19}, c++-cpp-output, (host-cuda)
 291 // NON-CUDA-INPUT-NEXT: 21: compiler, {20}, ir, (host-cuda)
 292 // NON-CUDA-INPUT-NEXT: 22: backend, {21}, assembler, (host-cuda)
 293 // NON-CUDA-INPUT-NEXT: 23: assembler, {22}, object, (host-cuda)
 294 // NON-CUDA-INPUT-NEXT: 24: clang-linker-wrapper, {18, 23}, image, (host-cuda)
 295
 296 //
 297 // Test the phases using the new driver in LTO-mode.
 298 //
 299 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-phases \
 300 // RUN:        --offload-arch=sm_70 --offload-arch=sm_52 -foffload-lto -fgpu-rdc -c %s 2>&1 \
 301 // RUN: | FileCheck -check-prefix=LTO %s
 302 //      LTO: 0: input, "[[INPUT:.+]]", cuda, (host-cuda)
 303 // LTO-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
 304 // LTO-NEXT: 2: compiler, {1}, ir, (host-cuda)
 305 // LTO-NEXT: 3: input, "[[INPUT]]", cuda, (device-cuda, sm_52)
 306 // LTO-NEXT: 4: preprocessor, {3}, cuda-cpp-output, (device-cuda, sm_52)
 307 // LTO-NEXT: 5: compiler, {4}, ir, (device-cuda, sm_52)
 308 // LTO-NEXT: 6: backend, {5}, lto-bc, (device-cuda, sm_52)
 309 // LTO-NEXT: 7: offload, "device-cuda (nvptx64-nvidia-cuda:sm_52)" {6}, lto-bc
 310 // LTO-NEXT: 8: input, "[[INPUT]]", cuda, (device-cuda, sm_70)
 311 // LTO-NEXT: 9: preprocessor, {8}, cuda-cpp-output, (device-cuda, sm_70)
 312 // LTO-NEXT: 10: compiler, {9}, ir, (device-cuda, sm_70)
 313 // LTO-NEXT: 11: backend, {10}, lto-bc, (device-cuda, sm_70)
 314 // LTO-NEXT: 12: offload, "device-cuda (nvptx64-nvidia-cuda:sm_70)" {11}, lto-bc
 315 // LTO-NEXT: 13: clang-offload-packager, {7, 12}, image, (device-cuda)
 316 // LTO-NEXT: 14: offload, "host-cuda (powerpc64le-ibm-linux-gnu)" {2}, "device-cuda (powerpc64le-ibm-linux-gnu)" {13}, ir
 317 // LTO-NEXT: 15: backend, {14}, assembler, (host-cuda)
 318 // LTO-NEXT: 16: assembler, {15}, object, (host-cuda)
 319
 320 //
 321 // Test that the new driver does not create actions for invalid architectures.
 322 //
 323 // RUN: not %clang -### --target=powerpc64le-ibm-linux-gnu --offload-new-driver \
 324 // RUN:        -ccc-print-phases --offload-arch=sm_999 -fgpu-rdc -c %s 2>&1 \
 325 // RUN: | FileCheck -check-prefix=INVALID-ARCH %s
 326 //      INVALID-ARCH: error: unsupported CUDA gpu architecture: sm_999
 327 // INVALID-ARCH-NEXT: 0: input, "[[INPUT:.+]]", cuda, (host-cuda)
 328 // INVALID-ARCH-NEXT: 1: preprocessor, {0}, cuda-cpp-output, (host-cuda)
 329 // INVALID-ARCH-NEXT: 2: compiler, {1}, ir, (host-cuda)
 330 // INVALID-ARCH-NEXT: 3: backend, {2}, assembler, (host-cuda)
 331 // INVALID-ARCH-NEXT: 4: assembler, {3}, object, (host-cuda)