test/CodeGen/AMDGPU/permute.ll

   1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
   2
   3 ; GCN-LABEL: {{^}}lsh8_or_and:
   4 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400
   5 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
   6 define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
   7 bb:
   8   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   9   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
  10   %tmp = load i32, i32 addrspace(1)* %gep, align 4
  11   %tmp2 = shl i32 %tmp, 8
  12   %tmp3 = and i32 %arg1, 255
  13   %tmp4 = or i32 %tmp2, %tmp3
  14   store i32 %tmp4, i32 addrspace(1)* %gep, align 4
  15   ret void
  16 }
  17
  18 ; GCN-LABEL: {{^}}lsr24_or_and:
  19 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
  20 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
  21 define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
  22 bb:
  23   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
  24   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
  25   %tmp = load i32, i32 addrspace(1)* %gep, align 4
  26   %tmp2 = lshr i32 %tmp, 24
  27   %tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00
  28   %tmp4 = or i32 %tmp2, %tmp3
  29   store i32 %tmp4, i32 addrspace(1)* %gep, align 4
  30   ret void
  31 }
  32
  33 ; GCN-LABEL: {{^}}and_or_lsr24:
  34 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
  35 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
  36 define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
  37 bb:
  38   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
  39   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
  40   %tmp = load i32, i32 addrspace(1)* %gep, align 4
  41   %tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00
  42   %tmp3 = lshr i32 %arg1, 24
  43   %tmp4 = or i32 %tmp2, %tmp3
  44   %tmp5 = xor i32 %tmp4, -2147483648
  45   store i32 %tmp5, i32 addrspace(1)* %gep, align 4
  46   ret void
  47 }
  48
  49 ; GCN-LABEL: {{^}}and_or_and:
  50 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500
  51 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
  52 define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
  53 bb:
  54   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
  55   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
  56   %tmp = load i32, i32 addrspace(1)* %gep, align 4
  57   %tmp2 = and i32 %tmp, -16711936
  58   %tmp3 = and i32 %arg1, 16711935
  59   %tmp4 = or i32 %tmp2, %tmp3
  60   store i32 %tmp4, i32 addrspace(1)* %gep, align 4
  61   ret void
  62 }
  63
  64 ; GCN-LABEL: {{^}}lsh8_or_lsr24:
  65 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050403
  66 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
  67 define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
  68 bb:
  69   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
  70   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
  71   %tmp = load i32, i32 addrspace(1)* %gep, align 4
  72   %tmp2 = shl i32 %tmp, 8
  73   %tmp3 = lshr i32 %arg1, 24
  74   %tmp4 = or i32 %tmp2, %tmp3
  75   store i32 %tmp4, i32 addrspace(1)* %gep, align 4
  76   ret void
  77 }
  78
  79 ; GCN-LABEL: {{^}}lsh16_or_lsr24:
  80 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03
  81 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
  82 define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
  83 bb:
  84   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
  85   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
  86   %tmp = load i32, i32 addrspace(1)* %gep, align 4
  87   %tmp2 = shl i32 %tmp, 16
  88   %tmp3 = lshr i32 %arg1, 24
  89   %tmp4 = or i32 %tmp2, %tmp3
  90   store i32 %tmp4, i32 addrspace(1)* %gep, align 4
  91   ret void
  92 }
  93
  94 ; GCN-LABEL: {{^}}and_xor_and:
  95 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
  96 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
  97 define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
  98 bb:
  99   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
 100   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
 101   %tmp = load i32, i32 addrspace(1)* %gep, align 4
 102   %tmp2 = and i32 %tmp, -16776961
 103   %tmp3 = and i32 %arg1, 16776960
 104   %tmp4 = xor i32 %tmp2, %tmp3
 105   store i32 %tmp4, i32 addrspace(1)* %gep, align 4
 106   ret void
 107 }
 108
 109 ; GCN-LABEL: {{^}}and_or_or_and:
 110 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
 111 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
 112 define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
 113 bb:
 114   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
 115   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
 116   %tmp = load i32, i32 addrspace(1)* %gep, align 4
 117   %and = and i32 %tmp, 16711935     ; 0x00ff00ff
 118   %tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00
 119   %tmp2 = or i32 %tmp1, -65536
 120   %tmp3 = or i32 %tmp2, %and
 121   store i32 %tmp3, i32 addrspace(1)* %gep, align 4
 122   ret void
 123 }
 124
 125 ; GCN-LABEL: {{^}}and_or_and_shl:
 126 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
 127 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
 128 define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
 129 bb:
 130   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
 131   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
 132   %tmp = load i32, i32 addrspace(1)* %gep, align 4
 133   %tmp2 = shl i32 %tmp, 16
 134   %tmp3 = and i32 %arg1, 65535
 135   %tmp4 = or i32 %tmp2, %tmp3
 136   %and = and i32 %tmp4, 4278190335
 137   store i32 %and, i32 addrspace(1)* %gep, align 4
 138   ret void
 139 }
 140
 141 ; GCN-LABEL: {{^}}or_and_or:
 142 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
 143 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
 144 define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
 145 bb:
 146   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
 147   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
 148   %tmp = load i32, i32 addrspace(1)* %gep, align 4
 149   %or1 = or i32 %tmp, 16776960    ; 0x00ffff00
 150   %or2 = or i32 %arg1, 4278190335 ; 0xff0000ff
 151   %and = and i32 %or1, %or2
 152   store i32 %and, i32 addrspace(1)* %gep, align 4
 153   ret void
 154 }
 155
 156 ; GCN-LABEL: {{^}}known_ffff0500:
 157 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
 158 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
 159 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
 160 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
 161 define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
 162 bb:
 163   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
 164   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
 165   %load = load i32, i32 addrspace(1)* %gep, align 4
 166   %mask1 = or i32 %arg1, 32768 ; 0x8000
 167   %mask2 = or i32 %load, 4
 168   %and = and i32 %mask2, 16711935     ; 0x00ff00ff
 169   %tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00
 170   %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
 171   %tmp3 = or i32 %tmp2, %and
 172   store i32 %tmp3, i32 addrspace(1)* %gep, align 4
 173   %v = and i32 %tmp3, 4294934532 ; 0xffff8004
 174   store i32 %v, i32 addrspace(1)* %arg, align 4
 175   ret void
 176 }
 177
 178 ; GCN-LABEL: {{^}}known_050c0c00:
 179 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
 180 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}}
 181 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
 182 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
 183 define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
 184 bb:
 185   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
 186   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
 187   %tmp = load i32, i32 addrspace(1)* %gep, align 4
 188   %tmp2 = shl i32 %tmp, 16
 189   %mask = or i32 %arg1, 4
 190   %tmp3 = and i32 %mask, 65535
 191   %tmp4 = or i32 %tmp2, %tmp3
 192   %and = and i32 %tmp4, 4278190335
 193   store i32 %and, i32 addrspace(1)* %gep, align 4
 194   %v = and i32 %and, 16776964
 195   store i32 %v, i32 addrspace(1)* %arg, align 4
 196   ret void
 197 }
 198
 199 ; GCN-LABEL: {{^}}known_ffff8004:
 200 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
 201 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
 202 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
 203 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
 204 define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
 205 bb:
 206   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
 207   %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
 208   %load = load i32, i32 addrspace(1)* %gep, align 4
 209   %mask1 = or i32 %arg1, 4
 210   %mask2 = or i32 %load, 32768 ; 0x8000
 211   %and = and i32 %mask1, 16711935     ; 0x00ff00ff
 212   %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
 213   %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
 214   %tmp3 = or i32 %tmp2, %and
 215   store i32 %tmp3, i32 addrspace(1)* %gep, align 4
 216   %v = and i32 %tmp3, 4294934532 ; 0xffff8004
 217   store i32 %v, i32 addrspace(1)* %arg, align 4
 218   ret void
 219 }
 220
 221 declare i32 @llvm.amdgcn.workitem.id.x()