1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3 ; GCN-LABEL: {{^}}lsh8_or_and:
4 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400
5 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
6 define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
8 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
9 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
10 %tmp = load i32, i32 addrspace(1)* %gep, align 4
11 %tmp2 = shl i32 %tmp, 8
12 %tmp3 = and i32 %arg1, 255
13 %tmp4 = or i32 %tmp2, %tmp3
14 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
18 ; GCN-LABEL: {{^}}lsr24_or_and:
19 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
20 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
21 define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
23 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
24 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
25 %tmp = load i32, i32 addrspace(1)* %gep, align 4
26 %tmp2 = lshr i32 %tmp, 24
27 %tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00
28 %tmp4 = or i32 %tmp2, %tmp3
29 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
33 ; GCN-LABEL: {{^}}and_or_lsr24:
34 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
35 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
36 define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
38 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
39 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
40 %tmp = load i32, i32 addrspace(1)* %gep, align 4
41 %tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00
42 %tmp3 = lshr i32 %arg1, 24
43 %tmp4 = or i32 %tmp2, %tmp3
44 %tmp5 = xor i32 %tmp4, -2147483648
45 store i32 %tmp5, i32 addrspace(1)* %gep, align 4
49 ; GCN-LABEL: {{^}}and_or_and:
50 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500
51 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
52 define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
54 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
55 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
56 %tmp = load i32, i32 addrspace(1)* %gep, align 4
57 %tmp2 = and i32 %tmp, -16711936
58 %tmp3 = and i32 %arg1, 16711935
59 %tmp4 = or i32 %tmp2, %tmp3
60 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
64 ; GCN-LABEL: {{^}}lsh8_or_lsr24:
65 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050403
66 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
67 define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
69 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
70 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
71 %tmp = load i32, i32 addrspace(1)* %gep, align 4
72 %tmp2 = shl i32 %tmp, 8
73 %tmp3 = lshr i32 %arg1, 24
74 %tmp4 = or i32 %tmp2, %tmp3
75 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
79 ; GCN-LABEL: {{^}}lsh16_or_lsr24:
80 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03
81 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
82 define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
84 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
85 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
86 %tmp = load i32, i32 addrspace(1)* %gep, align 4
87 %tmp2 = shl i32 %tmp, 16
88 %tmp3 = lshr i32 %arg1, 24
89 %tmp4 = or i32 %tmp2, %tmp3
90 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
94 ; GCN-LABEL: {{^}}and_xor_and:
95 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
96 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
97 define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
99 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
100 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
101 %tmp = load i32, i32 addrspace(1)* %gep, align 4
102 %tmp2 = and i32 %tmp, -16776961
103 %tmp3 = and i32 %arg1, 16776960
104 %tmp4 = xor i32 %tmp2, %tmp3
105 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
109 ; GCN-LABEL: {{^}}and_or_or_and:
110 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
111 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
112 define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
114 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
115 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
116 %tmp = load i32, i32 addrspace(1)* %gep, align 4
117 %and = and i32 %tmp, 16711935 ; 0x00ff00ff
118 %tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00
119 %tmp2 = or i32 %tmp1, -65536
120 %tmp3 = or i32 %tmp2, %and
121 store i32 %tmp3, i32 addrspace(1)* %gep, align 4
125 ; GCN-LABEL: {{^}}and_or_and_shl:
126 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
127 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
128 define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
130 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
131 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
132 %tmp = load i32, i32 addrspace(1)* %gep, align 4
133 %tmp2 = shl i32 %tmp, 16
134 %tmp3 = and i32 %arg1, 65535
135 %tmp4 = or i32 %tmp2, %tmp3
136 %and = and i32 %tmp4, 4278190335
137 store i32 %and, i32 addrspace(1)* %gep, align 4
141 ; GCN-LABEL: {{^}}or_and_or:
142 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
143 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
144 define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
146 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
147 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
148 %tmp = load i32, i32 addrspace(1)* %gep, align 4
149 %or1 = or i32 %tmp, 16776960 ; 0x00ffff00
150 %or2 = or i32 %arg1, 4278190335 ; 0xff0000ff
151 %and = and i32 %or1, %or2
152 store i32 %and, i32 addrspace(1)* %gep, align 4
156 ; GCN-LABEL: {{^}}known_ffff0500:
157 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
158 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
159 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
160 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
161 define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
163 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
164 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
165 %load = load i32, i32 addrspace(1)* %gep, align 4
166 %mask1 = or i32 %arg1, 32768 ; 0x8000
167 %mask2 = or i32 %load, 4
168 %and = and i32 %mask2, 16711935 ; 0x00ff00ff
169 %tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00
170 %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
171 %tmp3 = or i32 %tmp2, %and
172 store i32 %tmp3, i32 addrspace(1)* %gep, align 4
173 %v = and i32 %tmp3, 4294934532 ; 0xffff8004
174 store i32 %v, i32 addrspace(1)* %arg, align 4
178 ; GCN-LABEL: {{^}}known_050c0c00:
179 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
180 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}}
181 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
182 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
183 define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
185 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
186 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
187 %tmp = load i32, i32 addrspace(1)* %gep, align 4
188 %tmp2 = shl i32 %tmp, 16
189 %mask = or i32 %arg1, 4
190 %tmp3 = and i32 %mask, 65535
191 %tmp4 = or i32 %tmp2, %tmp3
192 %and = and i32 %tmp4, 4278190335
193 store i32 %and, i32 addrspace(1)* %gep, align 4
194 %v = and i32 %and, 16776964
195 store i32 %v, i32 addrspace(1)* %arg, align 4
199 ; GCN-LABEL: {{^}}known_ffff8004:
200 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
201 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
202 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
203 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
204 define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
206 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
207 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
208 %load = load i32, i32 addrspace(1)* %gep, align 4
209 %mask1 = or i32 %arg1, 4
210 %mask2 = or i32 %load, 32768 ; 0x8000
211 %and = and i32 %mask1, 16711935 ; 0x00ff00ff
212 %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
213 %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
214 %tmp3 = or i32 %tmp2, %and
215 store i32 %tmp3, i32 addrspace(1)* %gep, align 4
216 %v = and i32 %tmp3, 4294934532 ; 0xffff8004
217 store i32 %v, i32 addrspace(1)* %arg, align 4
221 declare i32 @llvm.amdgcn.workitem.id.x()