clang/test/Driver/cuda-bindings.cu

   1 // Tests the bindings generated for a CUDA offloading target for different
   2 // combinations of:
   3 // - Number of gpu architectures;
   4 // - Host/device-only compilation;
   5 // - User-requested final phase - binary or assembly.
   6 // It parallels cuda-phases.cu test, but verifies whether output file is temporary or not.
   7
   8 // It's hard to check whether file name is temporary in a portable
   9 // way. Instead we check whether we've generated a permanent name on
  10 // device side, which appends '-device-cuda-<triple>' suffix.
  11
  12 // REQUIRES: powerpc-registered-target
  13 // REQUIRES: nvptx-registered-target
  14
  15 //
  16 // Test single gpu architecture with complete compilation.
  17 // No intermediary device files should have "-device-cuda..." in the name.
  18 //
  19 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s 2>&1 \
  20 // RUN: | FileCheck -check-prefix=BIN %s
  21 // BIN: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
  22 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
  23 // BIN: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
  24 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
  25 // BIN: # "nvptx64-nvidia-cuda" - "NVPTX::Linker",{{.*}} output:
  26 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
  27 // BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
  28 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
  29 // BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
  30
  31 //
  32 // Test single gpu architecture up to the assemble phase.
  33 //
  34 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
  35 // RUN: | FileCheck -check-prefix=ASM %s
  36 // ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
  37 // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
  38
  39 //
  40 // Test two gpu architectures with complete compilation.
  41 //
  42 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
  43 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
  44 // RUN: | FileCheck -check-prefixes=BIN2,AOUT %s
  45 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
  46 // RUN:       --offload-arch=sm_30,sm_35 %s 2>&1 \
  47 // RUN: | FileCheck -check-prefixes=BIN2,AOUT %s
  48 // .. same, but with explicitly specified output.
  49 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
  50 // RUN:       --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -o %t/out 2>&1 \
  51 // RUN: | FileCheck -check-prefixes=BIN2,TOUT %s
  52 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
  53 // RUN:        --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
  54 // RUN: | FileCheck -check-prefixes=BIN2,TOUT %s
  55 // BIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
  56 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  57 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
  58 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  59 // BIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
  60 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  61 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
  62 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  63 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Linker",{{.*}} output:
  64 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  65 // BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
  66 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  67 // AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
  68 // TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
  69
  70 // .. same, but with -fsyntax-only
  71 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  72 // RUN:       --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
  73 // RUN: | FileCheck -check-prefix=SYN %s
  74 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  75 // RUN:        --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
  76 // RUN: | FileCheck -check-prefix=SYN %s
  77 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  78 // RUN:       --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
  79 // RUN: | FileCheck -check-prefix=SYN %s
  80 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  81 // RUN:        --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
  82 // RUN: | FileCheck -check-prefix=SYN %s
  83 // SYN-NOT: inputs:
  84 // SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
  85 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
  86 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
  87 // SYN-NOT: inputs
  88
  89 // .. and with --offload-new-driver
  90 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  91 // RUN:       --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 --offload-new-driver %s 2>&1 \
  92 // RUN: | FileCheck -check-prefix=NDSYN %s
  93 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  94 // RUN:        --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
  95 // RUN: | FileCheck -check-prefix=NDSYN %s
  96 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  97 // RUN:       --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --offload-new-driver 2>&1 \
  98 // RUN: | FileCheck -check-prefix=NDSYN %s
  99 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
 100 // RUN:        --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
 101 // RUN: | FileCheck -check-prefix=NDSYN %s
 102 // NDSYN-NOT: inputs:
 103 // NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 104 // NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 105 // NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
 106 // NDSYN-NOT: inputs:
 107
 108
 109 //
 110 // Test two gpu architectures up to the assemble phase.
 111 //
 112 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 113 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
 114 // RUN: | FileCheck -check-prefix=ASM2 %s
 115 // ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
 116 // ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
 117 // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 118
 119 //
 120 // Test one or more gpu architecture with complete compilation in host-only
 121 // compilation mode.
 122 //
 123 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 124 // RUN:        --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \
 125 // RUN: | FileCheck -check-prefix=HBIN %s
 126 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 127 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \
 128 // RUN: | FileCheck -check-prefix=HBIN %s
 129 // HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 130 // HBIN-NOT: cuda-bindings-device-cuda-nvptx64
 131 // HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
 132
 133 //
 134 // Test one or more gpu architecture up to the assemble phase in host-only
 135 // compilation mode.
 136 //
 137 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 138 // RUN:        --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \
 139 // RUN: | FileCheck -check-prefix=HASM %s
 140 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 141 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S 2>&1 \
 142 // RUN: | FileCheck -check-prefix=HASM %s
 143 // HASM: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 144
 145 //
 146 // Test single gpu architecture with complete compilation in device-only
 147 // compilation mode.
 148 //
 149 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 150 // RUN:        --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \
 151 // RUN: | FileCheck -check-prefix=DBIN %s
 152 // DBIN: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
 153 // DBIN-NOT: cuda-bindings-device-cuda-nvptx64
 154 // DBIN: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.o"
 155
 156 //
 157 // Test single gpu architecture up to the assemble phase in device-only
 158 // compilation mode.
 159 //
 160 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 161 // RUN:        --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \
 162 // RUN: | FileCheck -check-prefix=DASM %s
 163 // DASM: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
 164
 165 //
 166 // Test two gpu architectures with complete compilation in device-only
 167 // compilation mode.
 168 //
 169 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 170 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
 171 // RUN: | FileCheck -check-prefix=DBIN2 %s
 172 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
 173 // DBIN2-NOT: cuda-bindings-device-cuda-nvptx64
 174 // DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.o"
 175 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
 176 // DBIN2-NOT: cuda-bindings-device-cuda-nvptx64
 177 // DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.o"
 178
 179 //
 180 // Test two gpu architectures up to the assemble phase in device-only
 181 // compilation mode.
 182 //
 183 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 184 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
 185 // RUN: | FileCheck -check-prefix=DASM2 %s
 186 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
 187 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
 188
 189 //
 190 // Ensure we output the user's specified name in device-only mode.
 191 //
 192 // RUN: %clang -target powerpc64le-ibm-linux-gnu -### \
 193 // RUN:        --cuda-gpu-arch=sm_52 --cuda-device-only -c -o foo.o --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
 194 // RUN: | FileCheck -check-prefix=D_ONLY %s
 195 // RUN: %clang -target powerpc64le-ibm-linux-gnu -### --offload-new-driver \
 196 // RUN:        --cuda-gpu-arch=sm_52 --cuda-device-only -c -o foo.o --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
 197 // RUN: | FileCheck -check-prefix=D_ONLY %s
 198 // D_ONLY: "foo.o"
 199
 200 //
 201 // Check to make sure we can generate multiple outputs for device-only
 202 // compilation and fail with '-o'.
 203 //
 204 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-bindings \
 205 // RUN:        --offload-arch=sm_70 --offload-arch=sm_52 --offload-device-only -c %s 2>&1 \
 206 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY %s
 207 //      MULTI-D-ONLY: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_70:.+]]"
 208 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_70]]"], output: "[[CUBIN_70:.+]]"
 209 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_52:.+]]"
 210 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_52]]"], output: "[[CUBIN_52:.+]]"
 211 //
 212 // RUN: not %clang -### --target=powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-bindings \
 213 // RUN:        --offload-arch=sm_70 --offload-arch=sm_52 --offload-device-only -c -o %t %s 2>&1 \
 214 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY-O %s
 215 // MULTI-D-ONLY-O: error: cannot specify -o when generating multiple output files
 216
 217 //
 218 // Check to ensure that we can use '-fsyntax-only' for CUDA output with the new
 219 // driver.
 220 //
 221 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver \
 222 // RUN:        -fsyntax-only --offload-arch=sm_70 --offload-arch=sm_52 -c --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda %s 2>&1 \
 223 // RUN: | FileCheck -check-prefix=SYNTAX-ONLY %s
 224 // SYNTAX-ONLY: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fsyntax-only"
 225 // SYNTAX-ONLY: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fsyntax-only"
 226 // SYNTAX-ONLY: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"{{.*}}"-fsyntax-only"
 227
 228 //
 229 // Check to ensure that we can use '-save-temps' when operating in RDC-mode.
 230 //
 231 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu -save-temps --offload-new-driver \
 232 // RUN:        -fgpu-rdc --offload-arch=sm_70 --offload-arch=sm_52 -c --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda %s 2>&1 \
 233 // RUN: | FileCheck -check-prefix=SAVE-TEMPS %s
 234 // SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_52"
 235 // SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
 236 // SAVE-TEMPS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"
 237
 238 //
 239 // Check to ensure that we cannot use '-foffload' when not operating in RDC-mode.
 240 //
 241 // RUN: not %clang -### --target=powerpc64le-ibm-linux-gnu -fno-gpu-rdc --offload-new-driver \
 242 // RUN:        -foffload-lto --offload-arch=sm_70 --offload-arch=sm_52 -c %s 2>&1 \
 243 // RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
 244 // LTO-NO-RDC: error: unsupported option '-foffload-lto' for language mode '-fno-gpu-rdc'