1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel < %s | FileCheck -check-prefix=GISEL %s
5 define i16 @csh_16(i16 %a, i16 %b) {
8 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9 ; CHECK-NEXT: v_lshlrev_b16_e32 v2, v1, v0
10 ; CHECK-NEXT: v_lshrrev_b16_e32 v3, v1, v0
11 ; CHECK-NEXT: v_ashrrev_i16_e32 v0, v1, v0
12 ; CHECK-NEXT: v_add_u16_e32 v1, v2, v3
13 ; CHECK-NEXT: v_add_u16_e32 v0, v1, v0
14 ; CHECK-NEXT: s_setpc_b64 s[30:31]
16 ; GISEL-LABEL: csh_16:
18 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19 ; GISEL-NEXT: v_and_b32_e32 v1, 15, v1
20 ; GISEL-NEXT: v_lshlrev_b16_e32 v2, v1, v0
21 ; GISEL-NEXT: v_lshrrev_b16_e32 v3, v1, v0
22 ; GISEL-NEXT: v_ashrrev_i16_e32 v0, v1, v0
23 ; GISEL-NEXT: v_add_u16_e32 v1, v2, v3
24 ; GISEL-NEXT: v_add_u16_e32 v0, v1, v0
25 ; GISEL-NEXT: s_setpc_b64 s[30:31]
27 %shl = shl i16 %a, %and
28 %lshr = lshr i16 %a, %and
29 %ashr = ashr i16 %a, %and
30 %ret.0 = add i16 %shl, %lshr
31 %ret = add i16 %ret.0, %ashr
35 define i32 @csh_32(i32 %a, i32 %b) {
36 ; CHECK-LABEL: csh_32:
38 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; CHECK-NEXT: v_lshlrev_b32_e32 v2, v1, v0
40 ; CHECK-NEXT: v_lshrrev_b32_e32 v3, v1, v0
41 ; CHECK-NEXT: v_ashrrev_i32_e32 v0, v1, v0
42 ; CHECK-NEXT: v_add3_u32 v0, v2, v3, v0
43 ; CHECK-NEXT: s_setpc_b64 s[30:31]
45 ; GISEL-LABEL: csh_32:
47 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48 ; GISEL-NEXT: v_and_b32_e32 v1, 31, v1
49 ; GISEL-NEXT: v_lshlrev_b32_e32 v2, v1, v0
50 ; GISEL-NEXT: v_lshrrev_b32_e32 v3, v1, v0
51 ; GISEL-NEXT: v_ashrrev_i32_e32 v0, v1, v0
52 ; GISEL-NEXT: v_add3_u32 v0, v2, v3, v0
53 ; GISEL-NEXT: s_setpc_b64 s[30:31]
55 %shl = shl i32 %a, %and
56 %lshr = lshr i32 %a, %and
57 %ashr = ashr i32 %a, %and
58 %ret.0 = add i32 %shl, %lshr
59 %ret = add i32 %ret.0, %ashr
63 define amdgpu_ps i32 @s_csh_32_0(i32 inreg %a, i32 inreg %b) {
64 ; CHECK-LABEL: s_csh_32_0:
66 ; CHECK-NEXT: s_lshl_b32 s2, s0, s1
67 ; CHECK-NEXT: s_lshr_b32 s3, s0, s1
68 ; CHECK-NEXT: s_ashr_i32 s0, s0, s1
69 ; CHECK-NEXT: s_add_i32 s1, s2, s3
70 ; CHECK-NEXT: s_add_i32 s0, s1, s0
71 ; CHECK-NEXT: ; return to shader part epilog
73 ; GISEL-LABEL: s_csh_32_0:
75 ; GISEL-NEXT: s_lshl_b32 s2, s0, s1
76 ; GISEL-NEXT: s_lshr_b32 s3, s0, s1
77 ; GISEL-NEXT: s_ashr_i32 s0, s0, s1
78 ; GISEL-NEXT: s_add_i32 s1, s2, s3
79 ; GISEL-NEXT: s_add_i32 s0, s1, s0
80 ; GISEL-NEXT: ; return to shader part epilog
82 %shl = shl i32 %a, %and
83 %lshr = lshr i32 %a, %and
84 %ashr = ashr i32 %a, %and
85 %ret.0 = add i32 %shl, %lshr
86 %ret = add i32 %ret.0, %ashr
90 define amdgpu_ps i32 @s_csh_32_1(i32 inreg %a, i32 inreg %b) {
91 ; CHECK-LABEL: s_csh_32_1:
93 ; CHECK-NEXT: s_lshl_b32 s2, s0, s1
94 ; CHECK-NEXT: s_lshr_b32 s3, s0, s1
95 ; CHECK-NEXT: s_ashr_i32 s0, s0, s1
96 ; CHECK-NEXT: s_add_i32 s1, s2, s3
97 ; CHECK-NEXT: s_add_i32 s0, s1, s0
98 ; CHECK-NEXT: ; return to shader part epilog
100 ; GISEL-LABEL: s_csh_32_1:
102 ; GISEL-NEXT: s_lshl_b32 s2, s0, s1
103 ; GISEL-NEXT: s_lshr_b32 s3, s0, s1
104 ; GISEL-NEXT: s_ashr_i32 s0, s0, s1
105 ; GISEL-NEXT: s_add_i32 s1, s2, s3
106 ; GISEL-NEXT: s_add_i32 s0, s1, s0
107 ; GISEL-NEXT: ; return to shader part epilog
108 %and = and i32 %b, 127
109 %shl = shl i32 %a, %and
110 %lshr = lshr i32 %a, %and
111 %ashr = ashr i32 %a, %and
112 %ret.0 = add i32 %shl, %lshr
113 %ret = add i32 %ret.0, %ashr
117 define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) {
118 ; CHECK-LABEL: csh_v4i32:
120 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121 ; CHECK-NEXT: v_lshlrev_b32_e32 v8, v7, v3
122 ; CHECK-NEXT: v_lshlrev_b32_e32 v9, v6, v2
123 ; CHECK-NEXT: v_lshlrev_b32_e32 v10, v5, v1
124 ; CHECK-NEXT: v_lshlrev_b32_e32 v11, v4, v0
125 ; CHECK-NEXT: v_lshrrev_b32_e32 v12, v7, v3
126 ; CHECK-NEXT: v_lshrrev_b32_e32 v13, v6, v2
127 ; CHECK-NEXT: v_lshrrev_b32_e32 v14, v5, v1
128 ; CHECK-NEXT: v_lshrrev_b32_e32 v15, v4, v0
129 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, v7, v3
130 ; CHECK-NEXT: v_ashrrev_i32_e32 v2, v6, v2
131 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, v5, v1
132 ; CHECK-NEXT: v_ashrrev_i32_e32 v0, v4, v0
133 ; CHECK-NEXT: v_add3_u32 v0, v11, v15, v0
134 ; CHECK-NEXT: v_add3_u32 v1, v10, v14, v1
135 ; CHECK-NEXT: v_add3_u32 v2, v9, v13, v2
136 ; CHECK-NEXT: v_add3_u32 v3, v8, v12, v3
137 ; CHECK-NEXT: s_setpc_b64 s[30:31]
139 ; GISEL-LABEL: csh_v4i32:
141 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142 ; GISEL-NEXT: v_and_b32_e32 v4, 31, v4
143 ; GISEL-NEXT: v_and_b32_e32 v5, 31, v5
144 ; GISEL-NEXT: v_and_b32_e32 v6, 31, v6
145 ; GISEL-NEXT: v_and_b32_e32 v7, 31, v7
146 ; GISEL-NEXT: v_lshlrev_b32_e32 v8, v4, v0
147 ; GISEL-NEXT: v_lshlrev_b32_e32 v9, v5, v1
148 ; GISEL-NEXT: v_lshlrev_b32_e32 v10, v6, v2
149 ; GISEL-NEXT: v_lshlrev_b32_e32 v11, v7, v3
150 ; GISEL-NEXT: v_lshrrev_b32_e32 v12, v4, v0
151 ; GISEL-NEXT: v_lshrrev_b32_e32 v13, v5, v1
152 ; GISEL-NEXT: v_lshrrev_b32_e32 v14, v6, v2
153 ; GISEL-NEXT: v_lshrrev_b32_e32 v15, v7, v3
154 ; GISEL-NEXT: v_ashrrev_i32_e32 v0, v4, v0
155 ; GISEL-NEXT: v_ashrrev_i32_e32 v1, v5, v1
156 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, v6, v2
157 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, v7, v3
158 ; GISEL-NEXT: v_add3_u32 v0, v8, v12, v0
159 ; GISEL-NEXT: v_add3_u32 v1, v9, v13, v1
160 ; GISEL-NEXT: v_add3_u32 v2, v10, v14, v2
161 ; GISEL-NEXT: v_add3_u32 v3, v11, v15, v3
162 ; GISEL-NEXT: s_setpc_b64 s[30:31]
163 %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
164 %shl = shl <4 x i32> %a, %and
165 %lshr = lshr <4 x i32> %a, %and
166 %ashr = ashr <4 x i32> %a, %and
167 %ret.0 = add <4 x i32> %shl, %lshr
168 %ret = add <4 x i32> %ret.0, %ashr
172 define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b) {
173 ; CHECK-LABEL: s_csh_v4i32:
175 ; CHECK-NEXT: s_lshl_b32 s8, s0, s4
176 ; CHECK-NEXT: s_lshl_b32 s9, s1, s5
177 ; CHECK-NEXT: s_lshl_b32 s10, s2, s6
178 ; CHECK-NEXT: s_lshl_b32 s11, s3, s7
179 ; CHECK-NEXT: s_lshr_b32 s12, s0, s4
180 ; CHECK-NEXT: s_lshr_b32 s13, s1, s5
181 ; CHECK-NEXT: s_lshr_b32 s14, s2, s6
182 ; CHECK-NEXT: s_lshr_b32 s15, s3, s7
183 ; CHECK-NEXT: s_ashr_i32 s3, s3, s7
184 ; CHECK-NEXT: s_ashr_i32 s2, s2, s6
185 ; CHECK-NEXT: s_ashr_i32 s1, s1, s5
186 ; CHECK-NEXT: s_ashr_i32 s0, s0, s4
187 ; CHECK-NEXT: s_add_i32 s4, s11, s15
188 ; CHECK-NEXT: s_add_i32 s5, s10, s14
189 ; CHECK-NEXT: s_add_i32 s6, s9, s13
190 ; CHECK-NEXT: s_add_i32 s7, s8, s12
191 ; CHECK-NEXT: s_add_i32 s0, s7, s0
192 ; CHECK-NEXT: s_add_i32 s1, s6, s1
193 ; CHECK-NEXT: s_add_i32 s2, s5, s2
194 ; CHECK-NEXT: s_add_i32 s3, s4, s3
195 ; CHECK-NEXT: ; return to shader part epilog
197 ; GISEL-LABEL: s_csh_v4i32:
199 ; GISEL-NEXT: s_mov_b32 s8, 31
200 ; GISEL-NEXT: s_mov_b32 s9, s8
201 ; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9]
202 ; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], s[8:9]
203 ; GISEL-NEXT: s_lshl_b32 s8, s0, s4
204 ; GISEL-NEXT: s_lshl_b32 s9, s1, s5
205 ; GISEL-NEXT: s_lshl_b32 s10, s2, s6
206 ; GISEL-NEXT: s_lshl_b32 s11, s3, s7
207 ; GISEL-NEXT: s_lshr_b32 s12, s0, s4
208 ; GISEL-NEXT: s_lshr_b32 s13, s1, s5
209 ; GISEL-NEXT: s_lshr_b32 s14, s2, s6
210 ; GISEL-NEXT: s_lshr_b32 s15, s3, s7
211 ; GISEL-NEXT: s_ashr_i32 s0, s0, s4
212 ; GISEL-NEXT: s_ashr_i32 s1, s1, s5
213 ; GISEL-NEXT: s_ashr_i32 s2, s2, s6
214 ; GISEL-NEXT: s_ashr_i32 s3, s3, s7
215 ; GISEL-NEXT: s_add_i32 s4, s8, s12
216 ; GISEL-NEXT: s_add_i32 s5, s9, s13
217 ; GISEL-NEXT: s_add_i32 s6, s10, s14
218 ; GISEL-NEXT: s_add_i32 s7, s11, s15
219 ; GISEL-NEXT: s_add_i32 s0, s4, s0
220 ; GISEL-NEXT: s_add_i32 s1, s5, s1
221 ; GISEL-NEXT: s_add_i32 s2, s6, s2
222 ; GISEL-NEXT: s_add_i32 s3, s7, s3
223 ; GISEL-NEXT: ; return to shader part epilog
224 %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
225 %shl = shl <4 x i32> %a, %and
226 %lshr = lshr <4 x i32> %a, %and
227 %ashr = ashr <4 x i32> %a, %and
228 %ret.0 = add <4 x i32> %shl, %lshr
229 %ret = add <4 x i32> %ret.0, %ashr
233 define i64 @csh_64(i64 %a, i64 %b) {
234 ; CHECK-LABEL: csh_64:
236 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237 ; CHECK-NEXT: v_lshlrev_b64 v[3:4], v2, v[0:1]
238 ; CHECK-NEXT: v_lshrrev_b64 v[5:6], v2, v[0:1]
239 ; CHECK-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1]
240 ; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v3, v5
241 ; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v4, v6, vcc
242 ; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
243 ; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
244 ; CHECK-NEXT: s_setpc_b64 s[30:31]
246 ; GISEL-LABEL: csh_64:
248 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
249 ; GISEL-NEXT: v_and_b32_e32 v6, 63, v2
250 ; GISEL-NEXT: v_lshlrev_b64 v[2:3], v6, v[0:1]
251 ; GISEL-NEXT: v_lshrrev_b64 v[4:5], v6, v[0:1]
252 ; GISEL-NEXT: v_ashrrev_i64 v[0:1], v6, v[0:1]
253 ; GISEL-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
254 ; GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
255 ; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0
256 ; GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
257 ; GISEL-NEXT: s_setpc_b64 s[30:31]
258 %and = and i64 %b, 63
259 %shl = shl i64 %a, %and
260 %lshr = lshr i64 %a, %and
261 %ashr = ashr i64 %a, %and
262 %ret.0 = add i64 %shl, %lshr
263 %ret = add i64 %ret.0, %ashr
267 define amdgpu_ps i64 @s_csh_64_0(i64 inreg %a, i64 inreg %b) {
268 ; CHECK-LABEL: s_csh_64_0:
270 ; CHECK-NEXT: s_lshl_b64 s[4:5], s[0:1], s2
271 ; CHECK-NEXT: s_lshr_b64 s[6:7], s[0:1], s2
272 ; CHECK-NEXT: s_ashr_i64 s[0:1], s[0:1], s2
273 ; CHECK-NEXT: s_add_u32 s2, s4, s6
274 ; CHECK-NEXT: s_addc_u32 s3, s5, s7
275 ; CHECK-NEXT: s_add_u32 s0, s2, s0
276 ; CHECK-NEXT: s_addc_u32 s1, s3, s1
277 ; CHECK-NEXT: ; return to shader part epilog
279 ; GISEL-LABEL: s_csh_64_0:
281 ; GISEL-NEXT: s_and_b64 s[2:3], s[2:3], 63
282 ; GISEL-NEXT: s_lshl_b64 s[4:5], s[0:1], s2
283 ; GISEL-NEXT: s_lshr_b64 s[6:7], s[0:1], s2
284 ; GISEL-NEXT: s_ashr_i64 s[0:1], s[0:1], s2
285 ; GISEL-NEXT: s_add_u32 s2, s4, s6
286 ; GISEL-NEXT: s_addc_u32 s3, s5, s7
287 ; GISEL-NEXT: s_add_u32 s0, s2, s0
288 ; GISEL-NEXT: s_addc_u32 s1, s3, s1
289 ; GISEL-NEXT: ; return to shader part epilog
290 %and = and i64 %b, 63
291 %shl = shl i64 %a, %and
292 %lshr = lshr i64 %a, %and
293 %ashr = ashr i64 %a, %and
294 %ret.0 = add i64 %shl, %lshr
295 %ret = add i64 %ret.0, %ashr
299 define amdgpu_ps i64 @s_csh_64_1(i64 inreg %a, i64 inreg %b) {
300 ; CHECK-LABEL: s_csh_64_1:
302 ; CHECK-NEXT: s_lshl_b64 s[4:5], s[0:1], s2
303 ; CHECK-NEXT: s_lshr_b64 s[6:7], s[0:1], s2
304 ; CHECK-NEXT: s_ashr_i64 s[0:1], s[0:1], s2
305 ; CHECK-NEXT: s_add_u32 s2, s4, s6
306 ; CHECK-NEXT: s_addc_u32 s3, s5, s7
307 ; CHECK-NEXT: s_add_u32 s0, s2, s0
308 ; CHECK-NEXT: s_addc_u32 s1, s3, s1
309 ; CHECK-NEXT: ; return to shader part epilog
311 ; GISEL-LABEL: s_csh_64_1:
313 ; GISEL-NEXT: s_and_b64 s[2:3], s[2:3], 0xff
314 ; GISEL-NEXT: s_lshl_b64 s[4:5], s[0:1], s2
315 ; GISEL-NEXT: s_lshr_b64 s[6:7], s[0:1], s2
316 ; GISEL-NEXT: s_ashr_i64 s[0:1], s[0:1], s2
317 ; GISEL-NEXT: s_add_u32 s2, s4, s6
318 ; GISEL-NEXT: s_addc_u32 s3, s5, s7
319 ; GISEL-NEXT: s_add_u32 s0, s2, s0
320 ; GISEL-NEXT: s_addc_u32 s1, s3, s1
321 ; GISEL-NEXT: ; return to shader part epilog
322 %and = and i64 %b, 255
323 %shl = shl i64 %a, %and
324 %lshr = lshr i64 %a, %and
325 %ashr = ashr i64 %a, %and
326 %ret.0 = add i64 %shl, %lshr
327 %ret = add i64 %ret.0, %ashr
331 define i32 @cshl_or(i32 %a, i32 %b) {
332 ; CHECK-LABEL: cshl_or:
334 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; CHECK-NEXT: v_lshl_or_b32 v0, v0, v1, v0
336 ; CHECK-NEXT: s_setpc_b64 s[30:31]
338 ; GISEL-LABEL: cshl_or:
340 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
341 ; GISEL-NEXT: v_and_b32_e32 v1, 31, v1
342 ; GISEL-NEXT: v_lshl_or_b32 v0, v0, v1, v0
343 ; GISEL-NEXT: s_setpc_b64 s[30:31]
344 %and = and i32 %b, 31
345 %shl = shl i32 %a, %and
346 %or = or i32 %shl, %a
350 define i32 @cshl_add(i32 %a, i32 %b, i32 %c) {
351 ; CHECK-LABEL: cshl_add:
353 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354 ; CHECK-NEXT: v_lshl_add_u32 v0, v0, v1, v2
355 ; CHECK-NEXT: s_setpc_b64 s[30:31]
357 ; GISEL-LABEL: cshl_add:
359 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
360 ; GISEL-NEXT: v_and_b32_e32 v1, 31, v1
361 ; GISEL-NEXT: v_lshl_add_u32 v0, v0, v1, v2
362 ; GISEL-NEXT: s_setpc_b64 s[30:31]
363 %and = and i32 %b, 31
364 %shl = shl i32 %a, %and
365 %add = add i32 %shl, %c
369 define i32 @add_cshl(i32 %a, i32 %b) {
370 ; CHECK-LABEL: add_cshl:
372 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373 ; CHECK-NEXT: v_add_lshl_u32 v0, v0, v1, v1
374 ; CHECK-NEXT: s_setpc_b64 s[30:31]
376 ; GISEL-LABEL: add_cshl:
378 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GISEL-NEXT: v_and_b32_e32 v2, 31, v1
380 ; GISEL-NEXT: v_add_lshl_u32 v0, v0, v1, v2
381 ; GISEL-NEXT: s_setpc_b64 s[30:31]
382 %add = add i32 %a, %b
383 %and = and i32 %b, 31
384 %shl = shl i32 %add, %and