1 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
3 ; GCN-LABEL: {{^}}lsh8_or_and:
4 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400
5 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
6 define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
8 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
9 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
10 %tmp = load i32, i32 addrspace(1)* %gep, align 4
11 %tmp2 = shl i32 %tmp, 8
12 %tmp3 = and i32 %arg1, 255
13 %tmp4 = or i32 %tmp2, %tmp3
14 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
18 ; GCN-LABEL: {{^}}lsr24_or_and:
19 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
20 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
21 define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
23 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
24 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
25 %tmp = load i32, i32 addrspace(1)* %gep, align 4
26 %tmp2 = lshr i32 %tmp, 24
27 %tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00
28 %tmp4 = or i32 %tmp2, %tmp3
29 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
33 ; GCN-LABEL: {{^}}and_or_lsr24:
34 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
35 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
36 define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
38 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
39 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
40 %tmp = load i32, i32 addrspace(1)* %gep, align 4
41 %tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00
42 %tmp3 = lshr i32 %arg1, 24
43 %tmp4 = or i32 %tmp2, %tmp3
44 %tmp5 = xor i32 %tmp4, -2147483648
45 store i32 %tmp5, i32 addrspace(1)* %gep, align 4
49 ; GCN-LABEL: {{^}}and_or_and:
50 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500
51 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
52 define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
54 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
55 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
56 %tmp = load i32, i32 addrspace(1)* %gep, align 4
57 %tmp2 = and i32 %tmp, -16711936
58 %tmp3 = and i32 %arg1, 16711935
59 %tmp4 = or i32 %tmp2, %tmp3
60 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
64 ; GCN-LABEL: {{^}}lsh8_or_lsr24:
65 ; GCN: v_alignbit_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, 24
66 define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
68 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
69 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
70 %tmp = load i32, i32 addrspace(1)* %gep, align 4
71 %tmp2 = shl i32 %tmp, 8
72 %tmp3 = lshr i32 %arg1, 24
73 %tmp4 = or i32 %tmp2, %tmp3
74 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
78 ; GCN-LABEL: {{^}}lsh16_or_lsr24:
79 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03
80 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
81 define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
83 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
84 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
85 %tmp = load i32, i32 addrspace(1)* %gep, align 4
86 %tmp2 = shl i32 %tmp, 16
87 %tmp3 = lshr i32 %arg1, 24
88 %tmp4 = or i32 %tmp2, %tmp3
89 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
93 ; GCN-LABEL: {{^}}and_xor_and:
94 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
95 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
96 define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
98 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
99 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
100 %tmp = load i32, i32 addrspace(1)* %gep, align 4
101 %tmp2 = and i32 %tmp, -16776961
102 %tmp3 = and i32 %arg1, 16776960
103 %tmp4 = xor i32 %tmp2, %tmp3
104 store i32 %tmp4, i32 addrspace(1)* %gep, align 4
108 ; GCN-LABEL: {{^}}and_or_or_and:
109 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
110 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
111 define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
113 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
114 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
115 %tmp = load i32, i32 addrspace(1)* %gep, align 4
116 %and = and i32 %tmp, 16711935 ; 0x00ff00ff
117 %tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00
118 %tmp2 = or i32 %tmp1, -65536
119 %tmp3 = or i32 %tmp2, %and
120 store i32 %tmp3, i32 addrspace(1)* %gep, align 4
124 ; GCN-LABEL: {{^}}and_or_and_shl:
125 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
126 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
127 define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
129 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
130 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
131 %tmp = load i32, i32 addrspace(1)* %gep, align 4
132 %tmp2 = shl i32 %tmp, 16
133 %tmp3 = and i32 %arg1, 65535
134 %tmp4 = or i32 %tmp2, %tmp3
135 %and = and i32 %tmp4, 4278190335
136 store i32 %and, i32 addrspace(1)* %gep, align 4
140 ; GCN-LABEL: {{^}}or_and_or:
141 ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
142 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
143 define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
145 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
146 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
147 %tmp = load i32, i32 addrspace(1)* %gep, align 4
148 %or1 = or i32 %tmp, 16776960 ; 0x00ffff00
149 %or2 = or i32 %arg1, 4278190335 ; 0xff0000ff
150 %and = and i32 %or1, %or2
151 store i32 %and, i32 addrspace(1)* %gep, align 4
155 ; GCN-LABEL: {{^}}known_ffff0500:
156 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
157 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
158 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
159 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
160 define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
162 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
163 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
164 %load = load i32, i32 addrspace(1)* %gep, align 4
165 %mask1 = or i32 %arg1, 32768 ; 0x8000
166 %mask2 = or i32 %load, 4
167 %and = and i32 %mask2, 16711935 ; 0x00ff00ff
168 %tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00
169 %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
170 %tmp3 = or i32 %tmp2, %and
171 store i32 %tmp3, i32 addrspace(1)* %gep, align 4
172 %v = and i32 %tmp3, 4294934532 ; 0xffff8004
173 store i32 %v, i32 addrspace(1)* %arg, align 4
177 ; GCN-LABEL: {{^}}known_050c0c00:
178 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
179 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}}
180 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
181 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
182 define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
184 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
185 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
186 %tmp = load i32, i32 addrspace(1)* %gep, align 4
187 %tmp2 = shl i32 %tmp, 16
188 %mask = or i32 %arg1, 4
189 %tmp3 = and i32 %mask, 65535
190 %tmp4 = or i32 %tmp2, %tmp3
191 %and = and i32 %tmp4, 4278190335
192 store i32 %and, i32 addrspace(1)* %gep, align 4
193 %v = and i32 %and, 16776964
194 store i32 %v, i32 addrspace(1)* %arg, align 4
198 ; GCN-LABEL: {{^}}known_ffff8004:
199 ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
200 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
201 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
202 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
203 define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
205 %id = tail call i32 @llvm.amdgcn.workitem.id.x()
206 %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
207 %load = load i32, i32 addrspace(1)* %gep, align 4
208 %mask1 = or i32 %arg1, 4
209 %mask2 = or i32 %load, 32768 ; 0x8000
210 %and = and i32 %mask1, 16711935 ; 0x00ff00ff
211 %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
212 %tmp2 = or i32 %tmp1, 4294901760 ; 0xffff0000
213 %tmp3 = or i32 %tmp2, %and
214 store i32 %tmp3, i32 addrspace(1)* %gep, align 4
215 %v = and i32 %tmp3, 4294934532 ; 0xffff8004
216 store i32 %v, i32 addrspace(1)* %arg, align 4
220 declare i32 @llvm.amdgcn.workitem.id.x()