clang/test/Driver/cuda-options.cu

   1 // Tests CUDA compilation pipeline construction in Driver.
   2
   3 // Simple compilation case. Compile device-side to PTX assembly and make sure
   4 // we use it on the host side.
   5 // RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
   6 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
   7 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
   8 // RUN:    -check-prefix NOLINK %s
   9
  10 // Typical compilation + link case.
  11 // RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
  12 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  13 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
  14 // RUN:    -check-prefix LINK %s
  15
  16 // Verify that --cuda-host-only disables device-side compilation, but doesn't
  17 // disable host-side compilation/linking.
  18 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
  19 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
  20 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
  21
  22 // Verify that --cuda-device-only disables host-side compilation and linking.
  23 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -nogpulib -nogpuinc %s 2>&1 \
  24 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  25 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
  26
  27 // Check that the last of --cuda-compile-host-device, --cuda-host-only, and
  28 // --cuda-device-only wins.
  29
  30 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
  31 // RUN:    --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
  32 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
  33 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
  34
  35 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
  36 // RUN:    --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
  37 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
  38 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
  39
  40 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
  41 // RUN:    -nogpulib -nogpuinc --cuda-device-only %s 2>&1 \
  42 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  43 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
  44
  45 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
  46 // RUN:    -nogpulib -nogpuinc --cuda-device-only %s 2>&1 \
  47 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  48 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
  49
  50 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
  51 // RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
  52 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  53 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
  54 // RUN:    -check-prefix LINK %s
  55
  56 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
  57 // RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
  58 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  59 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
  60 // RUN:    -check-prefix LINK %s
  61
  62 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to
  63 // device compilation.
  64 // RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
  65 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  66 // RUN:    -check-prefix DEVICE-SM52 -check-prefix HOST \
  67 // RUN:    -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
  68
  69 // Verify that there is one device-side compilation per --cuda-gpu-arch args
  70 // and that all results are included on the host side.
  71 // RUN: %clang -### --target=x86_64-linux-gnu \
  72 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \
  73 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
  74 // RUN:             -check-prefixes DEVICE-SM52,DEVICE2-SM60 \
  75 // RUN:             -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \
  76 // RUN:             -check-prefixes HOST,HOST-NOSAVE,NOLINK %s
  77
  78 // Verify that device-side results are passed to the correct tool when
  79 // -save-temps is used.
  80 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -save-temps -c %s 2>&1 \
  81 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \
  82 // RUN:    -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s
  83
  84 // Verify that device-side results are passed to the correct tool when
  85 // -fno-integrated-as is used.
  86 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -fno-integrated-as -c %s 2>&1 \
  87 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
  88 // RUN:    -check-prefix HOST -check-prefix HOST-NOSAVE \
  89 // RUN:    -check-prefix HOST-AS -check-prefix NOLINK %s
  90
  91 // Verify that --[no-]cuda-gpu-arch arguments are handled correctly.
  92 // a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X
  93 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
  94 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_52 \
  95 // RUN:   --no-cuda-gpu-arch=sm_70 \
  96 // RUN:   -c %s 2>&1 \
  97 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
  98
  99 // b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X
 100 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
 101 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_52 \
 102 // RUN:   --no-cuda-gpu-arch=sm_70 \
 103 // RUN:   -c %s 2>&1 \
 104 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
 105
 106 // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
 107 //    we default to sm_52 -- same as if no --cuda-gpu-arch were passed.
 108 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
 109 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_60 \
 110 // RUN:   --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_60 \
 111 // RUN:   -c %s 2>&1 \
 112 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
 113
 114 // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
 115 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
 116 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52\
 117 // RUN:   --no-cuda-gpu-arch=sm_70 \
 118 // RUN:   -c %s 2>&1 \
 119 // RUN: | FileCheck -check-prefixes ARCH-SM52,ARCH-SM60,NOARCH-SM70 %s
 120
 121 // e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X
 122 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
 123 // RUN:   -nogpulib -nogpuinc --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_52 \
 124 // RUN:   --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_52 \
 125 // RUN:   -c %s 2>&1 \
 126 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
 127
 128 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
 129 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
 130 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 131 // RUN:   --no-cuda-gpu-arch=all \
 132 // RUN:   --cuda-gpu-arch=sm_70 \
 133 // RUN:   -c -nogpulib -nogpuinc %s 2>&1 \
 134 // RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
 135
 136 // g) There's no --cuda-gpu-arch=all
 137 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-device-only \
 138 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=all \
 139 // RUN:   -c %s 2>&1 \
 140 // RUN: | FileCheck -check-prefix ARCHALLERROR %s
 141
 142
 143 // Verify that --[no-]cuda-include-ptx arguments are handled correctly.
 144 // a) by default we're including PTX for all GPUs.
 145 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
 146 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 147 // RUN:   -c %s 2>&1 \
 148 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s
 149
 150 // b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs
 151 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
 152 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 153 // RUN:   --no-cuda-include-ptx=all \
 154 // RUN:   -c %s 2>&1 \
 155 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM60,NOPTX-SM52 %s
 156
 157 // c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only.
 158 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
 159 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 160 // RUN:   --no-cuda-include-ptx=sm_60 \
 161 // RUN:   -c %s 2>&1 \
 162 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM60,PTX-SM52 %s
 163 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
 164 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 165 // RUN:   --no-cuda-include-ptx=sm_52 \
 166 // RUN:   -c %s 2>&1 \
 167 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,NOPTX-SM52 %s
 168
 169 // d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all
 170 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
 171 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 172 // RUN:   --no-cuda-include-ptx=all --cuda-include-ptx=all \
 173 // RUN:   -c %s 2>&1 \
 174 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s
 175
 176 // e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX
 177 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
 178 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
 179 // RUN:   --no-cuda-include-ptx=sm_52 --cuda-include-ptx=all \
 180 // RUN:   -c %s 2>&1 \
 181 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s
 182
 183 // Verify -flto=thin -fwhole-program-vtables handling. This should result in
 184 // both options being passed to the host compilation, with neither passed to
 185 // the device compilation.
 186 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
 187 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,INCLUDES-DEVICE,NOLINK,THINLTOWPD %s
 188 // THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
 189
 190 // ARCH-SM52: "-cc1"{{.*}}"-target-cpu" "sm_52"
 191 // NOARCH-SM52-NOT: "-cc1"{{.*}}"-target-cpu" "sm_52"
 192 // ARCH-SM60: "-cc1"{{.*}}"-target-cpu" "sm_60"
 193 // NOARCH-SM60-NOT: "-cc1"{{.*}}"-target-cpu" "sm_60"
 194 // ARCH-SM70: "-cc1"{{.*}}"-target-cpu" "sm_70"
 195 // NOARCH-SM70-NOT: "-cc1"{{.*}}"-target-cpu" "sm_70"
 196 // ARCHALLERROR: error: unsupported CUDA gpu architecture: all
 197
 198 // Match device-side preprocessor and compiler phases with -save-temps.
 199 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 200 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 201 // DEVICE-SAVE-SAME: "-fcuda-is-device"
 202 // DEVICE-SAVE-SAME: "-x" "cuda"
 203
 204 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 205 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 206 // DEVICE-SAVE-SAME: "-fcuda-is-device"
 207 // DEVICE-SAVE-SAME: "-x" "cuda-cpp-output"
 208
 209 // Match the job that produces PTX assembly.
 210 // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 211 // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 212 // THINLTOWPD-NOT: "-flto=thin"
 213 // DEVICE-SAME: "-fcuda-is-device"
 214 // DEVICE-SM52-SAME: "-target-cpu" "sm_52"
 215 // THINLTOWPD-NOT: "-fwhole-program-vtables"
 216 // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]"
 217 // DEVICE-NOSAVE-SAME: "-x" "cuda"
 218 // DEVICE-SAVE-SAME: "-x" "ir"
 219
 220 // Match the call to ptxas (which assembles PTX to SASS).
 221 // DEVICE:ptxas
 222 // DEVICE-SM52-DAG: "--gpu-name" "sm_52"
 223 // DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]"
 224 // DEVICE-DAG: "[[PTXFILE]]"
 225
 226 // Match another device-side compilation.
 227 // DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 228 // DEVICE2-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
 229 // DEVICE2-SAME: "-fcuda-is-device"
 230 // DEVICE2-SM60-SAME: "-target-cpu" "sm_60"
 231 // DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]"
 232 // DEVICE2-SAME: "-x" "cuda"
 233
 234 // Match another call to ptxas.
 235 // DEVICE2: ptxas
 236 // DEVICE2-SM60-DAG: "--gpu-name" "sm_60"
 237 // DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]"
 238 // DEVICE2-DAG: "[[PTXFILE2]]"
 239
 240 // Match no device-side compilation.
 241 // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 242 // NODEVICE-NOT: "-fcuda-is-device"
 243
 244 // INCLUDES-DEVICE:fatbinary
 245 // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]"
 246 // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]"
 247 // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]"
 248 // INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]"
 249 // INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]"
 250
 251 // Match host-side preprocessor job with -save-temps.
 252 // HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
 253 // HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 254 // HOST-SAVE-NOT: "-fcuda-is-device"
 255 // HOST-SAVE-SAME: "-x" "cuda"
 256
 257 // Match host-side compilation.
 258 // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
 259 // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 260 // THINLTOWPD-SAME: "-flto=thin"
 261 // HOST-NOT: "-fcuda-is-device"
 262 // There is only one GPU binary after combining it with fatbinary!
 263 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
 264 // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
 265 // There is only one GPU binary after combining it with fatbinary.
 266 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
 267 // THINLTOWPD-SAME: "-fwhole-program-vtables"
 268 // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
 269 // HOST-NOSAVE-SAME: "-x" "cuda"
 270 // HOST-SAVE-SAME: "-x" "cuda-cpp-output"
 271
 272 // Match external assembler that uses compilation output.
 273 // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]"
 274
 275 // Match no GPU code inclusion.
 276 // NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary"
 277
 278 // Match no host compilation.
 279 // NOHOST-NOT: "-cc1" "-triple"
 280 // NOHOST-NOT: "-x" "cuda"
 281
 282 // Match linker.
 283 // LINK: "{{.*}}{{ld|link}}{{(.exe)?}}"
 284 // LINK-SAME: "[[HOSTOUTPUT]]"
 285
 286 // Match no linker.
 287 // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}"
 288
 289 // FATBIN-COMMON:fatbinary
 290 // FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
 291 // FATBIN-COMMON: "--image=profile=sm_52,file=
 292 // PTX-SM52: "--image=profile=compute_52,file=
 293 // NOPTX-SM52-NOT: "--image=profile=compute_52,file=
 294 // FATBIN-COMMON: "--image=profile=sm_60,file=
 295 // PTX-SM60: "--image=profile=compute_60,file=
 296 // NOPTX-SM60-NOT: "--image=profile=compute_60,file=