clang/test/Driver/cuda-bindings.cu

   1 // Tests the bindings generated for a CUDA offloading target for different
   2 // combinations of:
   3 // - Number of gpu architectures;
   4 // - Host/device-only compilation;
   5 // - User-requested final phase - binary or assembly.
   6 // It parallels cuda-phases.cu test, but verifies whether output file is temporary or not.
   7
   8 // It's hard to check whether file name is temporary in a portable
   9 // way. Instead we check whether we've generated a permanent name on
  10 // device side, which appends '-device-cuda-<triple>' suffix.
  11
  12 //
  13 // Test single gpu architecture with complete compilation.
  14 // No intermediary device files should have "-device-cuda..." in the name.
  15 //
  16 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s 2>&1 \
  17 // RUN: | FileCheck -check-prefix=BIN %s
  18 // BIN: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
  19 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
  20 // BIN: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
  21 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
  22 // BIN: # "nvptx64-nvidia-cuda" - "NVPTX::Linker",{{.*}} output:
  23 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
  24 // BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
  25 // BIN-NOT: cuda-bindings-device-cuda-nvptx64
  26 // BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
  27
  28 //
  29 // Test single gpu architecture up to the assemble phase.
  30 //
  31 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
  32 // RUN: | FileCheck -check-prefix=ASM %s
  33 // ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
  34 // ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
  35
  36 //
  37 // Test two gpu architectures with complete compilation.
  38 //
  39 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
  40 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
  41 // RUN: | FileCheck -check-prefixes=BIN2,AOUT %s
  42 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
  43 // RUN:       --offload-arch=sm_30,sm_35 %s 2>&1 \
  44 // RUN: | FileCheck -check-prefixes=BIN2,AOUT %s
  45 // .. same, but with explicitly specified output.
  46 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
  47 // RUN:       --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -o %t/out 2>&1 \
  48 // RUN: | FileCheck -check-prefixes=BIN2,TOUT %s
  49 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
  50 // RUN:        --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
  51 // RUN: | FileCheck -check-prefixes=BIN2,TOUT %s
  52 // BIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
  53 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  54 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
  55 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  56 // BIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
  57 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  58 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output:
  59 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  60 // BIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Linker",{{.*}} output:
  61 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  62 // BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
  63 // BIN2-NOT: cuda-bindings-device-cuda-nvptx64
  64 // AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
  65 // TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
  66
  67 // .. same, but with -fsyntax-only
  68 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  69 // RUN:       --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
  70 // RUN: | FileCheck -check-prefix=SYN %s
  71 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  72 // RUN:        --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
  73 // RUN: | FileCheck -check-prefix=SYN %s
  74 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  75 // RUN:       --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \
  76 // RUN: | FileCheck -check-prefix=SYN %s
  77 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  78 // RUN:        --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
  79 // RUN: | FileCheck -check-prefix=SYN %s
  80 // SYN-NOT: inputs:
  81 // SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
  82 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
  83 // SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
  84 // SYN-NOT: inputs
  85
  86 // .. and with --offload-new-driver
  87 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  88 // RUN:       --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 --offload-new-driver %s 2>&1 \
  89 // RUN: | FileCheck -check-prefix=NDSYN %s
  90 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  91 // RUN:        --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
  92 // RUN: | FileCheck -check-prefix=NDSYN %s
  93 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  94 // RUN:       --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --offload-new-driver 2>&1 \
  95 // RUN: | FileCheck -check-prefix=NDSYN %s
  96 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
  97 // RUN:        --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
  98 // RUN: | FileCheck -check-prefix=NDSYN %s
  99 // NDSYN-NOT: inputs:
 100 // NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 101 // NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
 102 // NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
 103 // NDSYN-NOT: inputs:
 104
 105
 106 //
 107 // Test two gpu architectures up to the assemble phase.
 108 //
 109 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 110 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
 111 // RUN: | FileCheck -check-prefix=ASM2 %s
 112 // ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
 113 // ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
 114 // ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 115
 116 //
 117 // Test one or more gpu architecture with complete compilation in host-only
 118 // compilation mode.
 119 //
 120 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 121 // RUN:        --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \
 122 // RUN: | FileCheck -check-prefix=HBIN %s
 123 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 124 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \
 125 // RUN: | FileCheck -check-prefix=HBIN %s
 126 // HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}}  output:
 127 // HBIN-NOT: cuda-bindings-device-cuda-nvptx64
 128 // HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
 129
 130 //
 131 // Test one or more gpu architecture up to the assemble phase in host-only
 132 // compilation mode.
 133 //
 134 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 135 // RUN:        --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \
 136 // RUN: | FileCheck -check-prefix=HASM %s
 137 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 138 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S 2>&1 \
 139 // RUN: | FileCheck -check-prefix=HASM %s
 140 // HASM: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
 141
 142 //
 143 // Test single gpu architecture with complete compilation in device-only
 144 // compilation mode.
 145 //
 146 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 147 // RUN:        --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \
 148 // RUN: | FileCheck -check-prefix=DBIN %s
 149 // DBIN: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
 150 // DBIN-NOT: cuda-bindings-device-cuda-nvptx64
 151 // DBIN: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.o"
 152
 153 //
 154 // Test single gpu architecture up to the assemble phase in device-only
 155 // compilation mode.
 156 //
 157 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 158 // RUN:        --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \
 159 // RUN: | FileCheck -check-prefix=DASM %s
 160 // DASM: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
 161
 162 //
 163 // Test two gpu architectures with complete compilation in device-only
 164 // compilation mode.
 165 //
 166 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 167 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
 168 // RUN: | FileCheck -check-prefix=DBIN2 %s
 169 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
 170 // DBIN2-NOT: cuda-bindings-device-cuda-nvptx64
 171 // DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.o"
 172 // DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
 173 // DBIN2-NOT: cuda-bindings-device-cuda-nvptx64
 174 // DBIN2: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.o"
 175
 176 //
 177 // Test two gpu architectures up to the assemble phase in device-only
 178 // compilation mode.
 179 //
 180 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
 181 // RUN:        --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
 182 // RUN: | FileCheck -check-prefix=DASM2 %s
 183 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
 184 // DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
 185
 186 //
 187 // Ensure we output the user's specified name in device-only mode.
 188 //
 189 // RUN: %clang -target powerpc64le-ibm-linux-gnu -### \
 190 // RUN:        --cuda-gpu-arch=sm_52 --cuda-device-only -c -o foo.o --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
 191 // RUN: | FileCheck -check-prefix=D_ONLY %s
 192 // RUN: %clang -target powerpc64le-ibm-linux-gnu -### --offload-new-driver \
 193 // RUN:        --cuda-gpu-arch=sm_52 --cuda-device-only -c -o foo.o --cuda-path=%S/Inputs/CUDA_80/usr/local/cuda %s 2>&1 \
 194 // RUN: | FileCheck -check-prefix=D_ONLY %s
 195 // D_ONLY: "foo.o"
 196
 197 //
 198 // Check to make sure we can generate multiple outputs for device-only
 199 // compilation and fail with '-o'.
 200 //
 201 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-bindings \
 202 // RUN:        --offload-arch=sm_70 --offload-arch=sm_52 --offload-device-only -c %s 2>&1 \
 203 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY %s
 204 //      MULTI-D-ONLY: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT:.+]]"], output: "[[PTX_70:.+]]"
 205 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_70]]"], output: "[[CUBIN_70:.+]]"
 206 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: ["[[INPUT]]"], output: "[[PTX_52:.+]]"
 207 // MULTI-D-ONLY-NEXT: # "nvptx64-nvidia-cuda" - "NVPTX::Assembler", inputs: ["[[PTX_52]]"], output: "[[CUBIN_52:.+]]"
 208 //
 209 // RUN: not %clang -### --target=powerpc64le-ibm-linux-gnu --offload-new-driver -ccc-print-bindings \
 210 // RUN:        --offload-arch=sm_70 --offload-arch=sm_52 --offload-device-only -c -o %t %s 2>&1 \
 211 // RUN: | FileCheck -check-prefix=MULTI-D-ONLY-O %s
 212 // MULTI-D-ONLY-O: error: cannot specify -o when generating multiple output files
 213
 214 //
 215 // Check to ensure that we can use '-fsyntax-only' for CUDA output with the new
 216 // driver.
 217 //
 218 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu --offload-new-driver \
 219 // RUN:        -fsyntax-only --offload-arch=sm_70 --offload-arch=sm_52 -c --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda %s 2>&1 \
 220 // RUN: | FileCheck -check-prefix=SYNTAX-ONLY %s
 221 // SYNTAX-ONLY: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fsyntax-only"
 222 // SYNTAX-ONLY: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-fsyntax-only"
 223 // SYNTAX-ONLY: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"{{.*}}"-fsyntax-only"
 224
 225 //
 226 // Check to ensure that we can use '-save-temps' when operating in RDC-mode.
 227 //
 228 // RUN: %clang -### -target powerpc64le-ibm-linux-gnu -save-temps --offload-new-driver \
 229 // RUN:        -fgpu-rdc --offload-arch=sm_70 --offload-arch=sm_52 -c --cuda-path=%S/Inputs/CUDA_111/usr/local/cuda %s 2>&1 \
 230 // RUN: | FileCheck -check-prefix=SAVE-TEMPS %s
 231 // SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_52"
 232 // SAVE-TEMPS: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
 233 // SAVE-TEMPS: "-cc1" "-triple" "powerpc64le-ibm-linux-gnu"
 234
 235 //
 236 // Check to ensure that we cannot use '-foffload' when not operating in RDC-mode.
 237 //
 238 // RUN: not %clang -### --target=powerpc64le-ibm-linux-gnu -fno-gpu-rdc --offload-new-driver \
 239 // RUN:        -foffload-lto --offload-arch=sm_70 --offload-arch=sm_52 -c %s 2>&1 \
 240 // RUN: | FileCheck -check-prefix=LTO-NO-RDC %s
 241 // LTO-NO-RDC: error: unsupported option '-foffload-lto' for language mode '-fno-gpu-rdc'