test/CodeGen/AMDGPU/and.ll

   1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
   2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
   3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
   4
   5 declare i32 @llvm.r600.read.tidig.x() #0
   6
   7 ; FUNC-LABEL: {{^}}test2:
   8 ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
   9 ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  10
  11 ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
  12 ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
  13
  14 define amdgpu_kernel void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
  15   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
  16   %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
  17   %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
  18   %result = and <2 x i32> %a, %b
  19   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
  20   ret void
  21 }
  22
  23 ; FUNC-LABEL: {{^}}test4:
  24 ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  25 ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  26 ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  27 ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
  28
  29
  30 ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
  31 ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
  32 ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
  33 ; SI: s_and_b32 s{{[0-9]+, s[0-9]+, s[0-9]+}}
  34
  35 define amdgpu_kernel void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
  36   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
  37   %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
  38   %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
  39   %result = and <4 x i32> %a, %b
  40   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
  41   ret void
  42 }
  43
  44 ; FUNC-LABEL: {{^}}s_and_i32:
  45 ; SI: s_and_b32
  46 define amdgpu_kernel void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
  47   %and = and i32 %a, %b
  48   store i32 %and, i32 addrspace(1)* %out, align 4
  49   ret void
  50 }
  51
  52 ; FUNC-LABEL: {{^}}s_and_constant_i32:
  53 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
  54 define amdgpu_kernel void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
  55   %and = and i32 %a, 1234567
  56   store i32 %and, i32 addrspace(1)* %out, align 4
  57   ret void
  58 }
  59
  60 ; FIXME: We should really duplicate the constant so that the SALU use
  61 ; can fold into the s_and_b32 and the VALU one is materialized
  62 ; directly without copying from the SGPR.
  63
  64 ; Second use is a VGPR use of the constant.
  65 ; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_0:
  66 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687
  67 ; SI-DAG: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]]
  68 ; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
  69 ; SI: buffer_store_dword [[VK]]
  70 define amdgpu_kernel void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32 %b) {
  71   %and = and i32 %a, 1234567
  72
  73   ; Just to stop future replacement of copy to vgpr + store with VALU op.
  74   %foo = add i32 %and, %b
  75   store volatile i32 %foo, i32 addrspace(1)* %out
  76   store volatile i32 1234567, i32 addrspace(1)* %out
  77   ret void
  78 }
  79
  80 ; Second use is another SGPR use of the constant.
  81 ; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_1:
  82 ; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687
  83 ; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]]
  84 ; SI: s_add_i32
  85 ; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, [[K]]
  86 ; SI: buffer_store_dword [[VK]]
  87 define amdgpu_kernel void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) {
  88   %and = and i32 %a, 1234567
  89   %foo = add i32 %and, 1234567
  90   %bar = add i32 %foo, %b
  91   store volatile i32 %bar, i32 addrspace(1)* %out
  92   ret void
  93 }
  94
  95 ; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr:
  96 ; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
  97 define amdgpu_kernel void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
  98   %tid = call i32 @llvm.r600.read.tidig.x() #0
  99   %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
 100   %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
 101   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
 102   %a = load i32, i32 addrspace(1)* %gep.a
 103   %b = load i32, i32 addrspace(1)* %gep.b
 104   %and = and i32 %a, %b
 105   store i32 %and, i32 addrspace(1)* %gep.out
 106   ret void
 107 }
 108
 109 ; FUNC-LABEL: {{^}}v_and_i32_sgpr_vgpr:
 110 ; SI-DAG: s_load_dword [[SA:s[0-9]+]]
 111 ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
 112 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
 113 define amdgpu_kernel void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) {
 114   %tid = call i32 @llvm.r600.read.tidig.x() #0
 115   %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
 116   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
 117   %b = load i32, i32 addrspace(1)* %gep.b
 118   %and = and i32 %a, %b
 119   store i32 %and, i32 addrspace(1)* %gep.out
 120   ret void
 121 }
 122
 123 ; FUNC-LABEL: {{^}}v_and_i32_vgpr_sgpr:
 124 ; SI-DAG: s_load_dword [[SA:s[0-9]+]]
 125 ; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
 126 ; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
 127 define amdgpu_kernel void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) {
 128   %tid = call i32 @llvm.r600.read.tidig.x() #0
 129   %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
 130   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
 131   %a = load i32, i32 addrspace(1)* %gep.a
 132   %and = and i32 %a, %b
 133   store i32 %and, i32 addrspace(1)* %gep.out
 134   ret void
 135 }
 136
 137 ; FUNC-LABEL: {{^}}v_and_constant_i32
 138 ; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
 139 define amdgpu_kernel void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
 140   %tid = call i32 @llvm.r600.read.tidig.x() #0
 141   %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
 142   %a = load i32, i32 addrspace(1)* %gep, align 4
 143   %and = and i32 %a, 1234567
 144   store i32 %and, i32 addrspace(1)* %out, align 4
 145   ret void
 146 }
 147
 148 ; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
 149 ; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
 150 define amdgpu_kernel void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
 151   %tid = call i32 @llvm.r600.read.tidig.x() #0
 152   %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
 153   %a = load i32, i32 addrspace(1)* %gep, align 4
 154   %and = and i32 %a, 64
 155   store i32 %and, i32 addrspace(1)* %out, align 4
 156   ret void
 157 }
 158
 159 ; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
 160 ; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
 161 define amdgpu_kernel void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
 162   %tid = call i32 @llvm.r600.read.tidig.x() #0
 163   %gep = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
 164   %a = load i32, i32 addrspace(1)* %gep, align 4
 165   %and = and i32 %a, -16
 166   store i32 %and, i32 addrspace(1)* %out, align 4
 167   ret void
 168 }
 169
 170 ; FUNC-LABEL: {{^}}s_and_i64
 171 ; SI: s_and_b64
 172 define amdgpu_kernel void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
 173   %and = and i64 %a, %b
 174   store i64 %and, i64 addrspace(1)* %out, align 8
 175   ret void
 176 }
 177
 178 ; FIXME: Should use SGPRs
 179 ; FUNC-LABEL: {{^}}s_and_i1:
 180 ; SI: v_and_b32
 181 define amdgpu_kernel void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
 182   %and = and i1 %a, %b
 183   store i1 %and, i1 addrspace(1)* %out
 184   ret void
 185 }
 186
 187 ; FUNC-LABEL: {{^}}s_and_constant_i64:
 188 ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000{{$}}
 189 ; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80{{$}}
 190 ; SI: buffer_store_dwordx2
 191 define amdgpu_kernel void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
 192   %and = and i64 %a, 549756338176
 193   store i64 %and, i64 addrspace(1)* %out, align 8
 194   ret void
 195 }
 196
 197 ; FUNC-LABEL: {{^}}s_and_multi_use_constant_i64:
 198 ; XSI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x80000{{$}}
 199 ; XSI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0x80{{$}}
 200 ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}
 201 define amdgpu_kernel void @s_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
 202   %and0 = and i64 %a, 549756338176
 203   %and1 = and i64 %b, 549756338176
 204   store volatile i64 %and0, i64 addrspace(1)* %out
 205   store volatile i64 %and1, i64 addrspace(1)* %out
 206   ret void
 207 }
 208
 209 ; FUNC-LABEL: {{^}}s_and_32_bit_constant_i64:
 210 ; SI: s_load_dwordx2
 211 ; SI-NOT: and
 212 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687{{$}}
 213 ; SI-NOT: and
 214 ; SI: buffer_store_dwordx2
 215 define amdgpu_kernel void @s_and_32_bit_constant_i64(i64 addrspace(1)* %out, i64 %a) {
 216   %and = and i64 %a, 1234567
 217   store i64 %and, i64 addrspace(1)* %out, align 8
 218   ret void
 219 }
 220
 221 ; FUNC-LABEL: {{^}}s_and_multi_use_inline_imm_i64:
 222 ; SI: s_load_dwordx2
 223 ; SI: s_load_dword [[A:s[0-9]+]]
 224 ; SI: s_load_dword [[B:s[0-9]+]]
 225 ; SI: s_load_dwordx2
 226 ; SI-NOT: and
 227 ; SI: s_lshl_b32 [[A]], [[A]], 1
 228 ; SI: s_lshl_b32 [[B]], [[B]], 1
 229 ; SI: s_and_b32 s{{[0-9]+}}, [[A]], 62
 230 ; SI: s_and_b32 s{{[0-9]+}}, [[B]], 62
 231 ; SI-NOT: and
 232 ; SI: buffer_store_dwordx2
 233 define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 %a, i64 %b, i64 %c) {
 234   %shl.a = shl i64 %a, 1
 235   %shl.b = shl i64 %b, 1
 236   %and0 = and i64 %shl.a, 62
 237   %and1 = and i64 %shl.b, 62
 238   %add0 = add i64 %and0, %c
 239   %add1 = add i64 %and1, %c
 240   store volatile i64 %add0, i64 addrspace(1)* %out
 241   store volatile i64 %add1, i64 addrspace(1)* %out
 242   ret void
 243 }
 244
 245 ; FUNC-LABEL: {{^}}v_and_i64:
 246 ; SI: v_and_b32
 247 ; SI: v_and_b32
 248 define amdgpu_kernel void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
 249   %tid = call i32 @llvm.r600.read.tidig.x() #0
 250   %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
 251   %a = load i64, i64 addrspace(1)* %gep.a, align 8
 252   %gep.b = getelementptr i64, i64 addrspace(1)* %bptr, i32 %tid
 253   %b = load i64, i64 addrspace(1)* %gep.b, align 8
 254   %and = and i64 %a, %b
 255   store i64 %and, i64 addrspace(1)* %out, align 8
 256   ret void
 257 }
 258
 259 ; FUNC-LABEL: {{^}}v_and_constant_i64:
 260 ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0xab19b207, {{v[0-9]+}}
 261 ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, 0x11e, {{v[0-9]+}}
 262 ; SI: buffer_store_dwordx2
 263 define amdgpu_kernel void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
 264   %tid = call i32 @llvm.r600.read.tidig.x() #0
 265   %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
 266   %a = load i64, i64 addrspace(1)* %gep.a, align 8
 267   %and = and i64 %a, 1231231234567
 268   store i64 %and, i64 addrspace(1)* %out, align 8
 269   ret void
 270 }
 271
 272 ; FUNC-LABEL: {{^}}v_and_multi_use_constant_i64:
 273 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO0:[0-9]+]]:[[HI0:[0-9]+]]{{\]}}
 274 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}}
 275 ; SI-DAG: s_movk_i32 [[KHI:s[0-9]+]], 0x11e{{$}}
 276 ; SI-DAG: s_mov_b32 [[KLO:s[0-9]+]], 0xab19b207{{$}}
 277 ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], v[[LO0]]
 278 ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], v[[HI0]]
 279 ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], v[[LO1]]
 280 ; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], v[[HI1]]
 281 ; SI: buffer_store_dwordx2
 282 ; SI: buffer_store_dwordx2
 283 define amdgpu_kernel void @v_and_multi_use_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
 284   %a = load volatile i64, i64 addrspace(1)* %aptr
 285   %b = load volatile i64, i64 addrspace(1)* %aptr
 286   %and0 = and i64 %a, 1231231234567
 287   %and1 = and i64 %b, 1231231234567
 288   store volatile i64 %and0, i64 addrspace(1)* %out
 289   store volatile i64 %and1, i64 addrspace(1)* %out
 290   ret void
 291 }
 292
 293 ; FUNC-LABEL: {{^}}v_and_multi_use_inline_imm_i64:
 294 ; SI: buffer_load_dwordx2 v{{\[}}[[LO0:[0-9]+]]:[[HI0:[0-9]+]]{{\]}}
 295 ; SI-NOT: and
 296 ; SI: buffer_load_dwordx2 v{{\[}}[[LO1:[0-9]+]]:[[HI1:[0-9]+]]{{\]}}
 297 ; SI-NOT: and
 298 ; SI: v_and_b32_e32 v[[RESLO0:[0-9]+]], 63, v[[LO0]]
 299 ; SI: v_and_b32_e32 v[[RESLO1:[0-9]+]], 63, v[[LO1]]
 300 ; SI-NOT: and
 301 ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO0]]
 302 ; SI: buffer_store_dwordx2 v{{\[}}[[RESLO1]]
 303 define amdgpu_kernel void @v_and_multi_use_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
 304   %a = load volatile i64, i64 addrspace(1)* %aptr
 305   %b = load volatile i64, i64 addrspace(1)* %aptr
 306   %and0 = and i64 %a, 63
 307   %and1 = and i64 %b, 63
 308   store volatile i64 %and0, i64 addrspace(1)* %out
 309   store volatile i64 %and1, i64 addrspace(1)* %out
 310   ret void
 311 }
 312
 313 ; FUNC-LABEL: {{^}}v_and_i64_32_bit_constant:
 314 ; SI: {{buffer|flat}}_load_dword [[VAL:v[0-9]+]]
 315 ; SI-NOT: and
 316 ; SI: v_and_b32_e32 {{v[0-9]+}}, 0x12d687, [[VAL]]
 317 ; SI-NOT: and
 318 ; SI: buffer_store_dwordx2
 319 define amdgpu_kernel void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
 320   %tid = call i32 @llvm.r600.read.tidig.x() #0
 321   %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
 322   %a = load i64, i64 addrspace(1)* %gep.a, align 8
 323   %and = and i64 %a, 1234567
 324   store i64 %and, i64 addrspace(1)* %out, align 8
 325   ret void
 326 }
 327
 328 ; FUNC-LABEL: {{^}}v_and_inline_imm_i64:
 329 ; SI: {{buffer|flat}}_load_dword v{{[0-9]+}}
 330 ; SI-NOT: and
 331 ; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
 332 ; SI-NOT: and
 333 ; SI: buffer_store_dwordx2
 334 define amdgpu_kernel void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
 335   %tid = call i32 @llvm.r600.read.tidig.x() #0
 336   %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
 337   %a = load i64, i64 addrspace(1)* %gep.a, align 8
 338   %and = and i64 %a, 64
 339   store i64 %and, i64 addrspace(1)* %out, align 8
 340   ret void
 341 }
 342
 343 ; FIXME: Should be able to reduce load width
 344 ; FUNC-LABEL: {{^}}v_and_inline_neg_imm_i64:
 345 ; SI: {{buffer|flat}}_load_dwordx2 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
 346 ; SI-NOT: and
 347 ; SI: v_and_b32_e32 v[[VAL_LO]], -8, v[[VAL_LO]]
 348 ; SI-NOT: and
 349 ; SI: buffer_store_dwordx2 v{{\[}}[[VAL_LO]]:[[VAL_HI]]{{\]}}
 350 define amdgpu_kernel void @v_and_inline_neg_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
 351   %tid = call i32 @llvm.r600.read.tidig.x() #0
 352   %gep.a = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
 353   %a = load i64, i64 addrspace(1)* %gep.a, align 8
 354   %and = and i64 %a, -8
 355   store i64 %and, i64 addrspace(1)* %out, align 8
 356   ret void
 357 }
 358
 359 ; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64
 360 ; SI: s_load_dword
 361 ; SI-NOT: and
 362 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 64
 363 ; SI-NOT: and
 364 ; SI: buffer_store_dword
 365 define amdgpu_kernel void @s_and_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 366   %and = and i64 %a, 64
 367   store i64 %and, i64 addrspace(1)* %out, align 8
 368   ret void
 369 }
 370
 371 ; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64_noshrink:
 372 ; SI: s_load_dword [[A:s[0-9]+]]
 373 ; SI: s_lshl_b32 [[A]], [[A]], 1{{$}}
 374 ; SI-NOT: and
 375 ; SI: s_and_b32 s{{[0-9]+}}, [[A]], 64
 376 ; SI-NOT: and
 377 ; SI: s_add_u32
 378 ; SI-NEXT: s_addc_u32
 379 define amdgpu_kernel void @s_and_inline_imm_64_i64_noshrink(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a, i64 %b) {
 380   %shl = shl i64 %a, 1
 381   %and = and i64 %shl, 64
 382   %add = add i64 %and, %b
 383   store i64 %add, i64 addrspace(1)* %out, align 8
 384   ret void
 385 }
 386
 387 ; FUNC-LABEL: {{^}}s_and_inline_imm_1_i64
 388 ; SI: s_load_dwordx2
 389 ; SI-NOT: and
 390 ; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 1
 391 ; SI-NOT: and
 392 ; SI: buffer_store_dwordx2
 393 define amdgpu_kernel void @s_and_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 394   %and = and i64 %a, 1
 395   store i64 %and, i64 addrspace(1)* %out, align 8
 396   ret void
 397 }
 398
 399 ; FUNC-LABEL: {{^}}s_and_inline_imm_1.0_i64
 400 ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0
 401
 402 ; SI: s_load_dwordx2
 403 ; SI: s_load_dwordx2
 404 ; SI-NOT: and
 405 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3ff00000
 406 ; SI-NOT: and
 407 ; SI: buffer_store_dwordx2
 408 define amdgpu_kernel void @s_and_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 409   %and = and i64 %a, 4607182418800017408
 410   store i64 %and, i64 addrspace(1)* %out, align 8
 411   ret void
 412 }
 413
 414 ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_1.0_i64
 415 ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1.0
 416
 417 ; SI: s_load_dwordx2
 418 ; SI: s_load_dwordx2
 419 ; SI-NOT: and
 420 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbff00000
 421 ; SI-NOT: and
 422 ; SI: buffer_store_dwordx2
 423 define amdgpu_kernel void @s_and_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 424   %and = and i64 %a, 13830554455654793216
 425   store i64 %and, i64 addrspace(1)* %out, align 8
 426   ret void
 427 }
 428
 429 ; FUNC-LABEL: {{^}}s_and_inline_imm_0.5_i64
 430 ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0.5
 431
 432 ; SI: s_load_dwordx2
 433 ; SI: s_load_dwordx2
 434 ; SI-NOT: and
 435 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x3fe00000
 436 ; SI-NOT: and
 437 ; SI: buffer_store_dwordx2
 438 define amdgpu_kernel void @s_and_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 439   %and = and i64 %a, 4602678819172646912
 440   store i64 %and, i64 addrspace(1)* %out, align 8
 441   ret void
 442 }
 443
 444 ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_0.5_i64:
 445 ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -0.5
 446
 447 ; SI: s_load_dwordx2
 448 ; SI: s_load_dwordx2
 449 ; SI-NOT: and
 450 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xbfe00000
 451 ; SI-NOT: and
 452 ; SI: buffer_store_dwordx2
 453 define amdgpu_kernel void @s_and_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 454   %and = and i64 %a, 13826050856027422720
 455   store i64 %and, i64 addrspace(1)* %out, align 8
 456   ret void
 457 }
 458
 459 ; FUNC-LABEL: {{^}}s_and_inline_imm_2.0_i64:
 460 ; SI: s_load_dwordx2
 461 ; SI: s_load_dwordx2
 462 ; SI-NOT: and
 463 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 2.0
 464 ; SI-NOT: and
 465 ; SI: buffer_store_dwordx2
 466 define amdgpu_kernel void @s_and_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 467   %and = and i64 %a, 4611686018427387904
 468   store i64 %and, i64 addrspace(1)* %out, align 8
 469   ret void
 470 }
 471
 472 ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_2.0_i64:
 473 ; SI: s_load_dwordx2
 474 ; SI: s_load_dwordx2
 475 ; SI-NOT: and
 476 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, -2.0
 477 ; SI-NOT: and
 478 ; SI: buffer_store_dwordx2
 479 define amdgpu_kernel void @s_and_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 480   %and = and i64 %a, 13835058055282163712
 481   store i64 %and, i64 addrspace(1)* %out, align 8
 482   ret void
 483 }
 484
 485 ; FUNC-LABEL: {{^}}s_and_inline_imm_4.0_i64:
 486 ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 4.0
 487
 488 ; SI: s_load_dwordx2
 489 ; SI: s_load_dwordx2
 490 ; SI-NOT: and
 491 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0x40100000
 492 ; SI-NOT: and
 493 ; SI: buffer_store_dwordx2
 494 define amdgpu_kernel void @s_and_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 495   %and = and i64 %a, 4616189618054758400
 496   store i64 %and, i64 addrspace(1)* %out, align 8
 497   ret void
 498 }
 499
 500 ; FUNC-LABEL: {{^}}s_and_inline_imm_neg_4.0_i64:
 501 ; XSI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -4.0
 502
 503 ; SI: s_load_dwordx2
 504 ; SI: s_load_dwordx2
 505 ; SI-NOT: and
 506 ; SI: s_and_b32 {{s[0-9]+}}, {{s[0-9]+}}, 0xc0100000
 507 ; SI-NOT: and
 508 ; SI: buffer_store_dwordx2
 509 define amdgpu_kernel void @s_and_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 510   %and = and i64 %a, 13839561654909534208
 511   store i64 %and, i64 addrspace(1)* %out, align 8
 512   ret void
 513 }
 514
 515
 516 ; Test with the 64-bit integer bitpattern for a 32-bit float in the
 517 ; low 32-bits, which is not a valid 64-bit inline immmediate.
 518
 519 ; FUNC-LABEL: {{^}}s_and_inline_imm_f32_4.0_i64:
 520 ; SI: s_load_dwordx2
 521 ; SI: s_load_dword s
 522 ; SI-NOT: and
 523 ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0
 524 ; SI-NOT: and
 525 ; SI: buffer_store_dwordx2
 526 define amdgpu_kernel void @s_and_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 527   %and = and i64 %a, 1082130432
 528   store i64 %and, i64 addrspace(1)* %out, align 8
 529   ret void
 530 }
 531
 532 ; FUNC-LABEL: {{^}}s_and_inline_imm_f32_neg_4.0_i64:
 533 ; SI: s_load_dwordx2
 534 ; SI: s_load_dwordx2
 535 ; SI-NOT: and
 536 ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0
 537 ; SI-NOT: and
 538 ; SI: buffer_store_dwordx2
 539 define amdgpu_kernel void @s_and_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 540   %and = and i64 %a, -1065353216
 541   store i64 %and, i64 addrspace(1)* %out, align 8
 542   ret void
 543 }
 544
 545 ; Shift into upper 32-bits
 546 ; SI: s_load_dwordx2
 547 ; SI: s_load_dwordx2
 548 ; SI-NOT: and
 549 ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, 4.0
 550 ; SI-NOT: and
 551 ; SI: buffer_store_dwordx2
 552 define amdgpu_kernel void @s_and_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 553   %and = and i64 %a, 4647714815446351872
 554   store i64 %and, i64 addrspace(1)* %out, align 8
 555   ret void
 556 }
 557
 558 ; FUNC-LABEL: {{^}}s_and_inline_high_imm_f32_neg_4.0_i64:
 559 ; SI: s_load_dwordx2
 560 ; SI: s_load_dwordx2
 561 ; SI-NOT: and
 562 ; SI: s_and_b32 s[[K_HI:[0-9]+]], s{{[0-9]+}}, -4.0
 563 ; SI-NOT: and
 564 ; SI: buffer_store_dwordx2
 565 define amdgpu_kernel void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
 566   %and = and i64 %a, 13871086852301127680
 567   store i64 %and, i64 addrspace(1)* %out, align 8
 568   ret void
 569 }
 570 attributes #0 = { nounwind readnone }