llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll

   1 ; RUN: llc -mtriple=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s
   2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s
   3 ; RUN: llc -mtriple=amdgcn -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s
   4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s
   5
   6
   7 define amdgpu_kernel void @divergent_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
   8 ; GCN-LABEL: name:            divergent_fneg_f32
   9 ; GCN-LABEL: bb.0 (%ir-block.0)
  10 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
  11 ; GCN: V_XOR_B32_e64 killed %[[REG]]
  12
  13   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  14   %tid.ext = sext i32 %tid to i64
  15   %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext
  16   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  17   %val = load volatile float, ptr addrspace(1) %in.gep
  18   %fneg = fneg float %val
  19   store float %fneg, ptr addrspace(1) %out.gep
  20   ret void
  21 }
  22
  23 define amdgpu_kernel void @uniform_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
  24 ; GCN-LABEL: name:            uniform_fneg_f32
  25 ; GCN-LABEL: bb.0 (%ir-block.0)
  26 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
  27 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
  28
  29   %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx
  30   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx
  31   %val = load volatile float, ptr addrspace(1) %in.gep
  32   %fneg = fneg float %val
  33   store float %fneg, ptr addrspace(1) %out.gep
  34   ret void
  35 }
  36
  37 define amdgpu_kernel void @divergent_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
  38 ; GCN-LABEL: name:            divergent_fabs_f32
  39 ; GCN-LABEL: bb.0 (%ir-block.0)
  40 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
  41 ; GCN: V_AND_B32_e64 killed %[[REG]]
  42
  43   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  44   %tid.ext = sext i32 %tid to i64
  45   %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext
  46   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  47   %val = load volatile float, ptr addrspace(1) %in.gep
  48   %fabs = call float @llvm.fabs.f32(float %val)
  49   store float %fabs, ptr addrspace(1) %out.gep
  50   ret void
  51 }
  52
  53 define amdgpu_kernel void @uniform_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
  54 ; GCN-LABEL: name:            uniform_fabs_f32
  55 ; GCN-LABEL: bb.0 (%ir-block.0)
  56 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
  57 ; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
  58
  59   %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx
  60   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx
  61   %val = load volatile float, ptr addrspace(1) %in.gep
  62   %fabs = call float @llvm.fabs.f32(float %val)
  63   store float %fabs, ptr addrspace(1) %out.gep
  64   ret void
  65 }
  66
  67 define amdgpu_kernel void @divergent_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
  68 ; GCN-LABEL: name:            divergent_fneg_fabs_f32
  69 ; GCN-LABEL: bb.0 (%ir-block.0)
  70 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
  71 ; GCN: V_OR_B32_e64 killed %[[REG]]
  72
  73   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  74   %tid.ext = sext i32 %tid to i64
  75   %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext
  76   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
  77   %val = load volatile float, ptr addrspace(1) %in.gep
  78   %fabs = call float @llvm.fabs.f32(float %val)
  79   %fneg = fneg float %fabs
  80   store float %fneg, ptr addrspace(1) %out.gep
  81   ret void
  82 }
  83
  84 define amdgpu_kernel void @uniform_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
  85 ; GCN-LABEL: name:            uniform_fneg_fabs_f32
  86 ; GCN-LABEL: bb.0 (%ir-block.0)
  87 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
  88 ; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
  89
  90   %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx
  91   %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx
  92   %val = load volatile float, ptr addrspace(1) %in.gep
  93   %fabs = call float @llvm.fabs.f32(float %val)
  94   %fneg = fneg float %fabs
  95   store float %fneg, ptr addrspace(1) %out.gep
  96   ret void
  97 }
  98
  99
 100 define amdgpu_kernel void @divergent_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 101 ; GCN-LABEL: name:            divergent_fabs_f16
 102 ; GCN-LABEL: bb.0 (%ir-block.0)
 103 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767
 104 ; FP16: V_AND_B32_e64 killed %[[REG]]
 105
 106   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 107   %tid.ext = sext i32 %tid to i64
 108   %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext
 109   %val = load volatile half, ptr addrspace(1) %in.gep
 110   %fabs = call half @llvm.fabs.f16(half %val)
 111   store half %fabs, ptr addrspace(1) %out
 112   ret void
 113 }
 114
 115 define amdgpu_kernel void @uniform_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) {
 116 ; GCN-LABEL: name:            uniform_fabs_f16
 117 ; GCN-LABEL: bb.0 (%ir-block.0)
 118 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767
 119 ; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
 120
 121   %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx
 122   %val = load volatile half, ptr addrspace(1) %in.gep
 123   %fabs = call half @llvm.fabs.f16(half %val)
 124   store half %fabs, ptr addrspace(1) %out
 125   ret void
 126 }
 127
 128 define amdgpu_kernel void @divergent_fneg_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 129 ; GCN-LABEL: name:            divergent_fneg_f16
 130 ; GCN-LABEL: bb.0 (%ir-block.0)
 131 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
 132 ; FP16: V_XOR_B32_e64 killed %[[REG]]
 133
 134   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 135   %tid.ext = sext i32 %tid to i64
 136   %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext
 137   %val = load volatile half, ptr addrspace(1) %in.gep
 138   %fneg = fneg half %val
 139   store half %fneg, ptr addrspace(1) %out
 140   ret void
 141 }
 142
 143 define amdgpu_kernel void @uniform_fneg_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) {
 144 ; GCN-LABEL: name:            uniform_fneg_f16
 145 ; GCN-LABEL: bb.0 (%ir-block.0)
 146 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
 147 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
 148
 149   %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx
 150   %val = load volatile half, ptr addrspace(1) %in.gep
 151   %fneg = fneg half %val
 152   store half %fneg, ptr addrspace(1) %out
 153   ret void
 154 }
 155
 156 define amdgpu_kernel void @divergent_fneg_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 157 ; GCN-LABEL: name:            divergent_fneg_fabs_f16
 158 ; GCN-LABEL: bb.0 (%ir-block.0)
 159 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
 160 ; FP16: V_OR_B32_e64 killed %[[REG]]
 161
 162   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 163   %tid.ext = sext i32 %tid to i64
 164   %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext
 165   %val = load volatile half, ptr addrspace(1) %in.gep
 166   %fabs = call half @llvm.fabs.f16(half %val)
 167   %fneg = fneg half %fabs
 168   store half %fneg, ptr addrspace(1) %out
 169   ret void
 170 }
 171
 172 define amdgpu_kernel void @uniform_fneg_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) {
 173 ; GCN-LABEL: name:            uniform_fneg_fabs_f16
 174 ; GCN-LABEL: bb.0 (%ir-block.0)
 175 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768
 176 ; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
 177
 178   %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx
 179   %val = load volatile half, ptr addrspace(1) %in.gep
 180   %fabs = call half @llvm.fabs.f16(half %val)
 181   %fneg = fneg half %fabs
 182   store half %fneg, ptr addrspace(1) %out
 183   ret void
 184 }
 185
 186 define amdgpu_kernel void @divergent_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 187 ; GCN-LABEL: name:            divergent_fneg_v2f16
 188 ; GCN-LABEL: bb.0 (%ir-block.0)
 189 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
 190 ; FP16: V_XOR_B32_e64 killed %[[REG]]
 191
 192   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 193   %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
 194   %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
 195   %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
 196   %fneg = fneg <2 x half> %val
 197   store <2 x half> %fneg, ptr addrspace(1) %gep.out
 198   ret void
 199 }
 200
 201 define amdgpu_kernel void @uniform_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
 202 ; GCN-LABEL: name:            uniform_fneg_v2f16
 203 ; GCN-LABEL: bb.0 (%ir-block.0)
 204 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
 205 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]]
 206
 207   %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
 208   %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
 209   %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
 210   %fneg = fneg <2 x half> %val
 211   store <2 x half> %fneg, ptr addrspace(1) %gep.out
 212   ret void
 213 }
 214
 215 define amdgpu_kernel void @divergent_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 216 ; GCN-LABEL: name:            divergent_fabs_v2f16
 217 ; GCN-LABEL: bb.0 (%ir-block.0)
 218 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
 219 ; FP16: V_AND_B32_e64 killed %[[REG]]
 220
 221   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 222   %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
 223   %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
 224   %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
 225   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
 226   store <2 x half> %fabs, ptr addrspace(1) %gep.out
 227   ret void
 228 }
 229
 230 define amdgpu_kernel void @uniform_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
 231 ; GCN-LABEL: name:            uniform_fabs_v2f16
 232 ; GCN-LABEL: bb.0 (%ir-block.0)
 233 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879
 234 ; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]]
 235
 236   %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
 237   %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
 238   %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
 239   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
 240   store <2 x half> %fabs, ptr addrspace(1) %gep.out
 241   ret void
 242 }
 243
 244 define amdgpu_kernel void @divergent_fneg_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 245 ; GCN-LABEL: name:            divergent_fneg_fabs_v2f16
 246 ; GCN-LABEL: bb.0 (%ir-block.0)
 247 ; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
 248 ; FP16: V_OR_B32_e64 killed %[[REG]]
 249
 250   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 251   %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
 252   %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid
 253   %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
 254   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
 255   %fneg = fneg <2 x half> %fabs
 256   store <2 x half> %fneg, ptr addrspace(1) %gep.out
 257   ret void
 258 }
 259
 260 define amdgpu_kernel void @uniform_fneg_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
 261 ; GCN-LABEL: name:            uniform_fneg_fabs_v2f16
 262 ; GCN-LABEL: bb.0 (%ir-block.0)
 263 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880
 264 ; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]]
 265
 266   %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
 267   %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx
 268   %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2
 269   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
 270   %fneg = fneg <2 x half> %fabs
 271   store <2 x half> %fneg, ptr addrspace(1) %gep.out
 272   ret void
 273 }
 274
 275 define amdgpu_kernel void @divergent_fneg_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 276 ; GCN-LABEL: name:            divergent_fneg_v2f32
 277 ; GCN-LABEL: bb.0 (%ir-block.0)
 278 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
 279 ; GCN: V_XOR_B32_e64 %[[REG]]
 280 ; GCN: V_XOR_B32_e64 %[[REG]]
 281
 282   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 283   %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
 284   %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
 285   %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
 286   %fneg = fneg <2 x float> %val
 287   store <2 x float> %fneg, ptr addrspace(1) %gep.out
 288   ret void
 289 }
 290
 291 define amdgpu_kernel void @uniform_fneg_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
 292 ; GCN-LABEL: name:            uniform_fneg_v2f32
 293 ; GCN-LABEL: bb.0 (%ir-block.0)
 294 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
 295 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]]
 296 ; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]]
 297
 298   %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
 299   %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
 300   %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
 301   %fneg = fneg <2 x float> %val
 302   store <2 x float> %fneg, ptr addrspace(1) %gep.out
 303   ret void
 304 }
 305
 306 define amdgpu_kernel void @divergent_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 307 ; GCN-LABEL: name:            divergent_fabs_v2f32
 308 ; GCN-LABEL: bb.0 (%ir-block.0)
 309 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
 310 ; GCN: V_AND_B32_e64 %[[REG]]
 311 ; GCN: V_AND_B32_e64 %[[REG]]
 312
 313   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 314   %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
 315   %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
 316   %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
 317   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
 318   store <2 x float> %fabs, ptr addrspace(1) %gep.out
 319   ret void
 320 }
 321
 322 define amdgpu_kernel void @uniform_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
 323 ; GCN-LABEL: name:            uniform_fabs_v2f32
 324 ; GCN-LABEL: bb.0 (%ir-block.0)
 325 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
 326 ; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]]
 327 ; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]]
 328
 329   %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
 330   %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
 331   %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
 332   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
 333   store <2 x float> %fabs, ptr addrspace(1) %gep.out
 334   ret void
 335 }
 336
 337 define amdgpu_kernel void @divergent_fneg_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 338 ; GCN-LABEL: name:            divergent_fneg_fabs_v2f32
 339 ; GCN-LABEL: bb.0 (%ir-block.0)
 340 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
 341 ; GCN: V_OR_B32_e64 %[[REG]]
 342 ; GCN: V_OR_B32_e64 %[[REG]]
 343
 344   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 345   %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
 346   %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid
 347   %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
 348   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
 349   %fneg = fneg <2 x float> %fabs
 350   store <2 x float> %fneg, ptr addrspace(1) %gep.out
 351   ret void
 352 }
 353
 354 define amdgpu_kernel void @uniform_fneg_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) {
 355 ; GCN-LABEL: name:            uniform_fneg_fabs_v2f32
 356 ; GCN-LABEL: bb.0 (%ir-block.0)
 357 ; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
 358 ; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]]
 359 ; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]]
 360
 361   %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
 362   %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx
 363   %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4
 364   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val)
 365   %fneg = fneg <2 x float> %fabs
 366   store <2 x float> %fneg, ptr addrspace(1) %gep.out
 367   ret void
 368 }
 369
 370 define amdgpu_kernel void @divergent_fneg_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 371 ; GCN-LABEL: name:            divergent_fneg_f64
 372 ; GCN-LABEL: bb.0 (%ir-block.0)
 373 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
 374 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
 375 ; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
 376 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
 377 ; GCN: %[[XOR:[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed %[[SREG_MASK]], killed  %[[HI32]]
 378 ; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
 379 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR]], %subreg.sub1
 380
 381
 382   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 383   %tid.ext = sext i32 %tid to i64
 384   %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext
 385   %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
 386   %val = load volatile double, ptr addrspace(1) %in.gep
 387   %fneg = fneg double %val
 388   store double %fneg, ptr addrspace(1) %out.gep
 389   ret void
 390 }
 391
 392 define amdgpu_kernel void @uniform_fneg_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
 393 ; GCN-LABEL: name:            uniform_fneg_f64
 394 ; GCN-LABEL: bb.0 (%ir-block.0)
 395 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
 396 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
 397 ; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
 398 ; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
 399 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
 400 ; GCN: %[[XOR:[0-9]+]]:sreg_32 = S_XOR_B32 killed %[[HI32]], killed %[[SREG_MASK]]
 401 ; GCN: %[[XOR_COPY:[0-9]+]]:sreg_32 = COPY %[[XOR]]
 402 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_COPY]], %subreg.sub1
 403
 404   %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx
 405   %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx
 406   %val = load volatile double, ptr addrspace(1) %in.gep
 407   %fneg = fneg double %val
 408   store double %fneg, ptr addrspace(1) %out.gep
 409   ret void
 410 }
 411
 412 define amdgpu_kernel void @divergent_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 413 ; GCN-LABEL: name:            divergent_fabs_f64
 414 ; GCN-LABEL: bb.0 (%ir-block.0)
 415 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
 416 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
 417 ; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
 418 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
 419 ; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[SREG_MASK]], killed  %[[HI32]]
 420 ; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
 421 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND]], %subreg.sub1
 422
 423
 424   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 425   %tid.ext = sext i32 %tid to i64
 426   %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext
 427   %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
 428   %val = load volatile double, ptr addrspace(1) %in.gep
 429   %fabs = call double @llvm.fabs.f64(double %val)
 430   store double %fabs, ptr addrspace(1) %out.gep
 431   ret void
 432 }
 433
 434 define amdgpu_kernel void @uniform_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
 435 ; GCN-LABEL: name:            uniform_fabs_f64
 436 ; GCN-LABEL: bb.0 (%ir-block.0)
 437 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
 438 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
 439 ; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
 440 ; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
 441 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
 442 ; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %[[HI32]], killed %[[SREG_MASK]]
 443 ; GCN: %[[AND_COPY:[0-9]+]]:sreg_32 = COPY %[[AND]]
 444 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_COPY]], %subreg.sub1
 445
 446
 447   %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx
 448   %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx
 449   %val = load volatile double, ptr addrspace(1) %in.gep
 450   %fabs = call double @llvm.fabs.f64(double %val)
 451   store double %fabs, ptr addrspace(1) %out.gep
 452   ret void
 453 }
 454
 455 define amdgpu_kernel void @divergent_fneg_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 456 ; GCN-LABEL: name:            divergent_fneg_fabs_f64
 457 ; GCN-LABEL: bb.0 (%ir-block.0)
 458 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
 459 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
 460 ; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1
 461 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
 462 ; GCN: %[[OR:[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed %[[SREG_MASK]], killed  %[[HI32]]
 463 ; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0
 464 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR]], %subreg.sub1
 465
 466
 467   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 468   %tid.ext = sext i32 %tid to i64
 469   %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext
 470   %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext
 471   %val = load volatile double, ptr addrspace(1) %in.gep
 472   %fabs = call double @llvm.fabs.f64(double %val)
 473   %fneg = fneg double %fabs
 474   store double %fneg, ptr addrspace(1) %out.gep
 475   ret void
 476 }
 477
 478 define amdgpu_kernel void @uniform_fneg_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) {
 479 ; GCN-LABEL: name:            uniform_fneg_fabs_f64
 480 ; GCN-LABEL: bb.0 (%ir-block.0)
 481 ; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64
 482 ; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR
 483 ; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0
 484 ; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1
 485 ; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
 486 ; GCN: %[[OR:[0-9]+]]:sreg_32 = S_OR_B32 killed %[[HI32]], killed %[[SREG_MASK]]
 487 ; GCN: %[[OR_COPY:[0-9]+]]:sreg_32 = COPY %[[OR]]
 488 ; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_COPY]], %subreg.sub1
 489
 490
 491   %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx
 492   %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx
 493   %val = load volatile double, ptr addrspace(1) %in.gep
 494   %fabs = call double @llvm.fabs.f64(double %val)
 495   %fneg = fneg double %fabs
 496   store double %fneg, ptr addrspace(1) %out.gep
 497   ret void
 498 }
 499
 500 declare float @llvm.fabs.f32(float)
 501 declare half @llvm.fabs.f16(half)
 502 declare double @llvm.fabs.f64(double)
 503 declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
 504 declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
 505
 506 declare i32 @llvm.amdgcn.workitem.id.x()