clang/test/Driver/hip-options.hip

   1 // RUN: %clang -### -x hip -nogpulib -nogpuinc --gpu-max-threads-per-block=1024 %s 2>&1 | FileCheck %s
   2
   3 // Check that there are commands for both host- and device-side compilations.
   4 //
   5 // CHECK: "-cc1" {{.*}} "-fcuda-is-device"
   6 // CHECK-SAME: "--gpu-max-threads-per-block=1024"
   7
   8 // RUN: %clang -### -nogpuinc -nogpulib -fgpu-allow-device-init \
   9 // RUN:   %s 2>&1 | FileCheck -check-prefix=DEVINIT %s
  10 // DEVINIT: "-cc1" {{.*}}"-fgpu-allow-device-init"
  11 // DEVINIT: "-cc1" {{.*}}"-fgpu-allow-device-init"
  12
  13 // Check -fgpu-default-stream=per-thread.
  14 // RUN: %clang -### -nogpuinc -nogpulib -fgpu-default-stream=per-thread \
  15 // RUN:   %s -save-temps 2>&1 | FileCheck -check-prefix=PTH %s
  16 // PTH: "-cc1"{{.*}} "-E" {{.*}}"-fgpu-default-stream=per-thread"
  17 // PTH: "-cc1"{{.*}} "-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
  18 // PTH: "-cc1"{{.*}} "-E" {{.*}}"-fgpu-default-stream=per-thread"
  19 // PTH: "-cc1"{{.*}} "-fgpu-default-stream=per-thread" {{.*}}"-x" "hip-cpp-output"
  20
  21 // Check -mprintf-kind=hostcall
  22 // RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-unknown-linux-gnu -mprintf-kind=hostcall  %s -save-temps 2>&1 | FileCheck -check-prefix=HOSTC %s
  23 // HOSTC: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-mprintf-kind=hostcall" "-Werror=format-invalid-specifier"{{.*}}"-E" {{.*}}
  24 // HOSTC: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=hostcall" "-Werror=format-invalid-specifier" {{.*}}"-x" "hip-cpp-output"
  25 // HOSTC: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=hostcall" "-Werror=format-invalid-specifier" {{.*}}"-x" "ir"
  26 // HOSTC: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}} "-E" {{.*}}
  27 // HOSTC: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"
  28
  29 // Check -mprintf-kind=buffered
  30 // RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-unknown-linux-gnu -mprintf-kind=buffered  %s -save-temps 2>&1 | FileCheck -check-prefix=BUFF %s
  31 // BUFF: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-mprintf-kind=buffered" "-Werror=format-invalid-specifier"{{.*}}"-E" {{.*}}
  32 // BUFF: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=buffered" "-Werror=format-invalid-specifier" {{.*}}"-x" "hip-cpp-output"
  33 // BUFF: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}} "-mprintf-kind=buffered" "-Werror=format-invalid-specifier" {{.*}}"-x" "ir"
  34 // BUFF: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}} "-E" {{.*}}
  35 // BUFF: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-x" "ir"
  36
  37 // RUN: %clang -### -x hip -nogpulib -nogpuinc --target=x86_64-pc-windows-msvc -fms-extensions \
  38 // RUN:   -mllvm -amdgpu-early-inline-all=true  %s 2>&1 | \
  39 // RUN:   FileCheck -check-prefix=MLLVM %s
  40 // MLLVM-NOT: "-mllvm"{{.*}}"-amdgpu-early-inline-all=true"{{.*}}"-mllvm"{{.*}}"-amdgpu-early-inline-all=true"
  41
  42 // RUN: %clang -### -Xarch_device -g -nogpulib -nogpuinc --cuda-gpu-arch=gfx900 \
  43 // RUN:   -Xarch_device -fcf-protection=branch -Xarch_device -mllvm=--inline-threshold=100 \
  44 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=DEV %s
  45 // DEV: "-cc1"{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch" {{.*}}"-mllvm" "--inline-threshold=100"
  46 // DEV: "-cc1"{{.*}} "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}" {{.*}} "-fcf-protection=branch" {{.*}}"-mllvm" "--inline-threshold=100"
  47 // DEV-NOT: clang{{.*}} {{.*}} "-debug-info-kind={{.*}}"
  48
  49 // RUN: %clang -### -Xarch_host -g -nogpulib -nogpuinc --cuda-gpu-arch=gfx900 \
  50 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=HOST %s
  51 // HOST-NOT: "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
  52 // HOST-NOT: "-fcuda-is-device" {{.*}} "-debug-info-kind={{.*}}"
  53 // HOST: clang{{.*}} "-debug-info-kind={{.*}}"
  54
  55 // RUN: %clang -### -nogpuinc -nogpulib -munsafe-fp-atomics \
  56 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
  57 // UNSAFE-FP-ATOMICS: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics"
  58
  59 // RUN: %clang -### -nogpuinc -nogpulib \
  60 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=DEFAULT-UNSAFE-FP-ATOMICS %s
  61 // DEFAULT-UNSAFE-FP-ATOMICS-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-munsafe-fp-atomics"
  62
  63 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-exclude-wrong-side-overloads \
  64 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=FIX-OVERLOAD %s
  65 // FIX-OVERLOAD: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fgpu-exclude-wrong-side-overloads" "-fgpu-defer-diag"
  66 // FIX-OVERLOAD: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fgpu-exclude-wrong-side-overloads" "-fgpu-defer-diag"
  67
  68 // Check -mconstructor-aliases is not passed to device compilation.
  69
  70 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
  71 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=CTA %s
  72 // CTA: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-mconstructor-aliases"
  73 // CTA-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mconstructor-aliases"
  74
  75 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
  76 // RUN:   --offload-arch=gfx906 -fgpu-inline-threshold=1000 %s 2>&1 | FileCheck -check-prefix=THRESH %s
  77 // THRESH: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mllvm" "-inline-threshold=1000"
  78 // THRESH-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-inline-threshold=1000"
  79
  80 // Check -foffload-lto=thin translated correctly.
  81
  82 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
  83 // RUN:   --cuda-gpu-arch=gfx906 -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
  84 // RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
  85
  86 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
  87 // RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -foffload-lto=thin -fwhole-program-vtables %s 2>&1 \
  88 // RUN:   | FileCheck -check-prefix=HIPTHINLTO %s
  89
  90 // Ensure we don't error about -fwhole-program-vtables for the non-device offload compile.
  91 // HIPTHINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
  92 // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
  93 // HIPTHINLTO: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-flto=thin" "-flto-unit" {{.*}} "-fwhole-program-vtables"
  94 // HIPTHINLTO-NOT: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto-unit"
  95 // HIPTHINLTO: lld{{.*}}"-plugin-opt=mcpu=gfx906" "-plugin-opt=thinlto" "-plugin-opt=-force-import-all"
  96
  97 // Check that -flto=thin is handled correctly, particularly with -fwhole-program-vtables.
  98 //
  99 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 100 // RUN:   --cuda-gpu-arch=gfx906 -flto=thin -fwhole-program-vtables %s 2>&1 \
 101 // RUN:   | FileCheck -check-prefix=THINLTO %s
 102
 103 // Ensure we don't error about -fwhole-program-vtables for the device offload compile. We should
 104 // drop -fwhole-program-vtables for the device offload compile and pass it through for the
 105 // non-device offload compile along with -flto=thin.
 106 // THINLTO-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
 107 // THINLTO-NOT: "-cc1"{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
 108 // THINLTO: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-flto=thin" {{.*}} "-fwhole-program-vtables"
 109 // THINLTO-NOT: "-cc1"{{.*}}" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fwhole-program-vtables"
 110
 111 // Check -fopenmp=libomp is allowed with HIP but -fopenmp-targets= is not allowed.
 112
 113 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 114 // RUN:   --offload-arch=gfx906 -fopenmp=libomp %s 2>&1 | FileCheck -check-prefix=OMP %s
 115 // OMP-NOT: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fopenmp"
 116 // OMP: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fopenmp"
 117
 118 // RUN: not %clang --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 119 // RUN:   --offload-arch=gfx906 -fopenmp=libomp -fopenmp-targets=amdgcn %s 2>&1 \
 120 // RUN:   | FileCheck -check-prefix=OMPTGT %s
 121 // OMPTGT: unsupported option '-fopenmp-targets=' for language mode 'HIP'
 122
 123 // Check -Xoffload-linker option is passed to lld.
 124
 125 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 126 // RUN:   --cuda-gpu-arch=gfx906 -fgpu-rdc -Xoffload-linker --build-id=md5 %s 2>&1 \
 127 // RUN:   | FileCheck -check-prefix=OFL-LINK %s
 128 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 129 // RUN:   --cuda-gpu-arch=gfx906 -Xoffload-linker --build-id=md5 %s 2>&1 \
 130 // RUN:   | FileCheck -check-prefix=OFL-LINK %s
 131 // OFL-LINK: lld{{.*}}"--build-id=md5"
 132
 133 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 134 // RUN:   --offload-arch=gfx906 -fhip-kernel-arg-name %s 2>&1 \
 135 // RUN:   | FileCheck -check-prefix=KAN %s
 136 // KAN: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fhip-kernel-arg-name"
 137
 138 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 139 // RUN:   --offload-arch=gfx906 %s 2>&1 \
 140 // RUN:   | FileCheck -check-prefix=KANNEG %s
 141 // KANNEG-NOT: "-fhip-kernel-arg-name"
 142
 143 // RUN: %clang -### -nogpuinc -nogpulib -mno-amdgpu-ieee -mamdgpu-ieee \
 144 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefix=IEEE-ON %s
 145 // IEEE-ON-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-m{{(no-)?}}amdgpu-ieee"
 146
 147 // RUN: %clang -### -nogpuinc -nogpulib -mamdgpu-ieee -mno-amdgpu-ieee -ffast-math \
 148 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefixes=IEEE-OFF %s
 149 // IEEE-OFF: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mno-amdgpu-ieee"
 150
 151 // RUN: %clang -### -nogpuinc -nogpulib -mamdgpu-ieee -mno-amdgpu-ieee -ffast-math \
 152 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefixes=IEEE-OFF-NEG %s
 153 // IEEE-OFF-NEG-NOT: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-mamdgpu-ieee"
 154
 155 // Check -fno-hip-fp32-correctly-rounded-divide-sqrt is passed to -cc1 but
 156 // (default) -fhip-fp32-correctly-rounded-divide-sqrt is not.
 157
 158 // RUN: %clang -### -nogpuinc -nogpulib -fno-hip-fp32-correctly-rounded-divide-sqrt \
 159 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefixes=NOCRDS %s
 160 // NOCRDS: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fno-hip-fp32-correctly-rounded-divide-sqrt"
 161
 162 // RUN: %clang -### -nogpuinc -nogpulib \
 163 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefixes=CRDS %s
 164 // CRDS-NOT: "-f{{(no-)?}}hip-fp32-correctly-rounded-divide-sqrt"
 165
 166 // RUN: %clang -### -nogpuinc -nogpulib -fhip-fp32-correctly-rounded-divide-sqrt \
 167 // RUN:   --cuda-gpu-arch=gfx906  %s 2>&1 | FileCheck -check-prefixes=CRDS %s
 168 // CRDS-NOT: "-f{{(no-)?}}hip-fp32-correctly-rounded-divide-sqrt"
 169
 170 // Check -fgpu-approx-transcendentals is passed to clang -cc1 but
 171 // (default) -fno-gpu-approx-transcendentals is not.
 172 // -ffast-math implies -fgpu-approx-transcendentals, which can be overridden
 173 // by -fno-gpu-approx-transcendentals.
 174
 175 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-approx-transcendentals \
 176 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=APPROX %s
 177
 178 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -ffast-math \
 179 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=APPROX %s
 180
 181 // APPROX: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fgpu-approx-transcendentals"
 182 // APPROX: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fgpu-approx-transcendentals"
 183
 184 // RUN: %clang -### -nogpuinc -nogpulib -fno-gpu-approx-transcendentals \
 185 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOAPPROX %s
 186
 187 // RUN: %clang -### -nogpuinc -nogpulib \
 188 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOAPPROX %s
 189
 190 // RUN: %clang -### -nogpuinc -nogpulib -ffast-math -fno-fast-math \
 191 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOAPPROX %s
 192
 193 // RUN: %clang -### -nogpuinc -nogpulib -ffast-math -fno-gpu-approx-transcendentals \
 194 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOAPPROX %s
 195
 196 // NOAPPROX-NOT: "-f{{(no-)?}}gpu-approx-transcendentals"
 197
 198 // Check no warnings for -fgpu-approx-transcendentals.
 199
 200 // RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fgpu-approx-transcendentals \
 201 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | count 0
 202
 203 // RUN: %clang -fdriver-only -Werror --target=x86_64-unknown-linux-gnu -nostdinc -nostdlib -fgpu-approx-transcendentals \
 204 // RUN:   -x c++ %s 2>&1 | count 0
 205 / Check -fno-offload-uniform-block is passed to clang -cc1 but
 206 // (default) -fno-offload-uniform-block is not.
 207
 208 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
 209 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=NOUNIBLK %s
 210
 211 // NOUNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-fno-offload-uniform-block"
 212 // NOUNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-fno-offload-uniform-block"
 213
 214 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -foffload-uniform-block \
 215 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=UNIBLK %s
 216
 217 // UNIBLK: "-cc1"{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-foffload-uniform-block"
 218 // UNIBLK: "-cc1"{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-foffload-uniform-block"
 219
 220 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 221 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=DEFUNIBLK %s
 222
 223 // DEFUNIBLK-NOT: "-f{{(no-)?}}offload-uniform-block"
 224
 225 // Check no warnings for -f[no-]offload-uniform-block.
 226
 227 // RUN: %clang -fdriver-only -c -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib -fno-offload-uniform-block \
 228 // RUN:   -foffload-uniform-block --cuda-gpu-arch=gfx906 %s 2>&1 | count 0
 229
 230 // Check -Werror=atomic-alignment is passed for amdpu by default.
 231
 232 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 233 // RUN:   --cuda-gpu-arch=gfx906 %s 2>&1 | FileCheck -check-prefix=WARN-ATOMIC %s
 234 // WARN-ATOMIC: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-Werror=atomic-alignment"
 235 // WARN-ATOMIC-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-Werror=atomic-alignment"
 236
 237 // Check the default -Werror=atomic-alignment can be overridden.
 238
 239 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
 240 // RUN:   --cuda-gpu-arch=gfx906 -Xarch_device -Wno-error=atomic-alignment %s \
 241 // RUN:   2>&1 | FileCheck -check-prefix=NO-WARN-ATOMIC %s
 242 // NO-WARN-ATOMIC: clang{{.*}} "-triple" "amdgcn-amd-amdhsa" {{.*}} "-Werror=atomic-alignment" {{.*}} "-Wno-error=atomic-alignment"
 243 // NO-WARN-ATOMIC-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-Werror=atomic-alignment"
 244 // NO-WARN-ATOMIC-NOT: clang{{.*}} "-triple" "x86_64-unknown-linux-gnu" {{.*}} "-Wno-error=atomic-alignment"