1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
7 define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
8 ; GFX6-LABEL: s_fshl_i7:
10 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
11 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
12 ; GFX6-NEXT: s_movk_i32 s3, 0x7f
13 ; GFX6-NEXT: s_and_b32 s2, s2, s3
14 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x60001
15 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
16 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
17 ; GFX6-NEXT: v_mul_lo_u32 v1, -7, v0
18 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
19 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
20 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
21 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7
22 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
23 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
24 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
25 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
26 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
27 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
28 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
29 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0
30 ; GFX6-NEXT: v_and_b32_e32 v0, s3, v0
31 ; GFX6-NEXT: v_and_b32_e32 v1, s3, v1
32 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
33 ; GFX6-NEXT: v_lshr_b32_e32 v1, s1, v1
34 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
35 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
36 ; GFX6-NEXT: ; return to shader part epilog
38 ; GFX8-LABEL: s_fshl_i7:
40 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
41 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
42 ; GFX8-NEXT: s_movk_i32 s3, 0x7f
43 ; GFX8-NEXT: s_and_b32 s2, s2, s3
44 ; GFX8-NEXT: s_and_b32 s1, s1, s3
45 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
46 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
47 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
48 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
49 ; GFX8-NEXT: v_mul_lo_u32 v1, -7, v0
50 ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
51 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
52 ; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
53 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7
54 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
55 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
56 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
57 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
58 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
59 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
60 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
61 ; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0
62 ; GFX8-NEXT: v_and_b32_e32 v0, s3, v0
63 ; GFX8-NEXT: v_and_b32_e32 v1, s3, v1
64 ; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0
65 ; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1
66 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
67 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
68 ; GFX8-NEXT: ; return to shader part epilog
70 ; GFX9-LABEL: s_fshl_i7:
72 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
73 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
74 ; GFX9-NEXT: s_movk_i32 s3, 0x7f
75 ; GFX9-NEXT: s_and_b32 s2, s2, s3
76 ; GFX9-NEXT: s_and_b32 s1, s1, s3
77 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
78 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
79 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
80 ; GFX9-NEXT: s_lshr_b32 s1, s1, 1
81 ; GFX9-NEXT: v_mul_lo_u32 v1, -7, v0
82 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
83 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
84 ; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
85 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7
86 ; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
87 ; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
88 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
89 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
90 ; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
91 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
92 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
93 ; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0
94 ; GFX9-NEXT: v_and_b32_e32 v0, s3, v0
95 ; GFX9-NEXT: v_and_b32_e32 v1, s3, v1
96 ; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0
97 ; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1
98 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
99 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
100 ; GFX9-NEXT: ; return to shader part epilog
102 ; GFX10-LABEL: s_fshl_i7:
104 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
105 ; GFX10-NEXT: s_movk_i32 s3, 0x7f
106 ; GFX10-NEXT: s_and_b32 s2, s2, s3
107 ; GFX10-NEXT: s_and_b32 s1, s1, s3
108 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
109 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
110 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
111 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
112 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
113 ; GFX10-NEXT: v_mul_lo_u32 v1, -7, v0
114 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
115 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
116 ; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
117 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7
118 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
119 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
120 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
121 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
122 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
123 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
124 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
125 ; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0
126 ; GFX10-NEXT: v_and_b32_e32 v0, s3, v0
127 ; GFX10-NEXT: v_and_b32_e32 v1, s3, v1
128 ; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0
129 ; GFX10-NEXT: v_lshrrev_b16 v1, v1, s1
130 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
131 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
132 ; GFX10-NEXT: ; return to shader part epilog
133 %result = call i7 @llvm.fshl.i7(i7 %lhs, i7 %rhs, i7 %amt)
137 define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
138 ; GFX6-LABEL: v_fshl_i7:
140 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
142 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3
143 ; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2
144 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 6
145 ; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
146 ; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
147 ; GFX6-NEXT: v_mul_lo_u32 v4, -7, v3
148 ; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
149 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
150 ; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
151 ; GFX6-NEXT: v_mov_b32_e32 v4, 0x7f
152 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7
153 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
154 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
155 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
156 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
157 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
158 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
159 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
160 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2
161 ; GFX6-NEXT: v_and_b32_e32 v2, v2, v4
162 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
163 ; GFX6-NEXT: v_and_b32_e32 v2, v3, v4
164 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
165 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
166 ; GFX6-NEXT: s_setpc_b64 s[30:31]
168 ; GFX8-LABEL: v_fshl_i7:
170 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
172 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3
173 ; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2
174 ; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
175 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
176 ; GFX8-NEXT: v_mul_lo_u32 v4, -7, v3
177 ; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
178 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
179 ; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
180 ; GFX8-NEXT: v_mov_b32_e32 v4, 0x7f
181 ; GFX8-NEXT: v_and_b32_e32 v1, v1, v4
182 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1
183 ; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7
184 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
185 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
186 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
187 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
188 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
189 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
190 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
191 ; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2
192 ; GFX8-NEXT: v_and_b32_e32 v2, v2, v4
193 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
194 ; GFX8-NEXT: v_and_b32_e32 v2, v3, v4
195 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
196 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
197 ; GFX8-NEXT: s_setpc_b64 s[30:31]
199 ; GFX9-LABEL: v_fshl_i7:
201 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
202 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
203 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
204 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2
205 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
206 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3
207 ; GFX9-NEXT: v_mul_lo_u32 v4, -7, v3
208 ; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4
209 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4
210 ; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
211 ; GFX9-NEXT: v_mov_b32_e32 v4, 0x7f
212 ; GFX9-NEXT: v_and_b32_e32 v1, v1, v4
213 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 1, v1
214 ; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7
215 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
216 ; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
217 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
218 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
219 ; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
220 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
221 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
222 ; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2
223 ; GFX9-NEXT: v_and_b32_e32 v2, v2, v4
224 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
225 ; GFX9-NEXT: v_and_b32_e32 v2, v3, v4
226 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
227 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
228 ; GFX9-NEXT: s_setpc_b64 s[30:31]
230 ; GFX10-LABEL: v_fshl_i7:
232 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
234 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
235 ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2
236 ; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1
237 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3
238 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
239 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
240 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3
241 ; GFX10-NEXT: v_mul_lo_u32 v4, -7, v3
242 ; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4
243 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4
244 ; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
245 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7
246 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
247 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
248 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
249 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
250 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
251 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
252 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
253 ; GFX10-NEXT: v_mov_b32_e32 v3, 0x7f
254 ; GFX10-NEXT: v_sub_nc_u16 v4, 6, v2
255 ; GFX10-NEXT: v_and_b32_e32 v2, v2, v3
256 ; GFX10-NEXT: v_and_b32_e32 v3, v4, v3
257 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0
258 ; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1
259 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
260 ; GFX10-NEXT: s_setpc_b64 s[30:31]
261 %result = call i7 @llvm.fshl.i7(i7 %lhs, i7 %rhs, i7 %amt)
265 define amdgpu_ps i8 @s_fshl_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) {
266 ; GFX6-LABEL: s_fshl_i8:
268 ; GFX6-NEXT: s_and_b32 s3, s2, 7
269 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
270 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x70001
271 ; GFX6-NEXT: s_lshl_b32 s0, s0, s3
272 ; GFX6-NEXT: s_lshr_b32 s1, s1, s2
273 ; GFX6-NEXT: s_or_b32 s0, s0, s1
274 ; GFX6-NEXT: ; return to shader part epilog
276 ; GFX8-LABEL: s_fshl_i8:
278 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
279 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
280 ; GFX8-NEXT: s_and_b32 s3, s2, 7
281 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
282 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
283 ; GFX8-NEXT: s_lshl_b32 s0, s0, s3
284 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
285 ; GFX8-NEXT: s_or_b32 s0, s0, s1
286 ; GFX8-NEXT: ; return to shader part epilog
288 ; GFX9-LABEL: s_fshl_i8:
290 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
291 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
292 ; GFX9-NEXT: s_and_b32 s3, s2, 7
293 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
294 ; GFX9-NEXT: s_lshr_b32 s1, s1, 1
295 ; GFX9-NEXT: s_lshl_b32 s0, s0, s3
296 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
297 ; GFX9-NEXT: s_or_b32 s0, s0, s1
298 ; GFX9-NEXT: ; return to shader part epilog
300 ; GFX10-LABEL: s_fshl_i8:
302 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
303 ; GFX10-NEXT: s_and_b32 s3, s2, 7
304 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
305 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
306 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
307 ; GFX10-NEXT: s_lshl_b32 s0, s0, s3
308 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
309 ; GFX10-NEXT: s_or_b32 s0, s0, s1
310 ; GFX10-NEXT: ; return to shader part epilog
311 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 %amt)
315 define i8 @v_fshl_i8(i8 %lhs, i8 %rhs, i8 %amt) {
316 ; GFX6-LABEL: v_fshl_i8:
318 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v2
320 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
321 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
322 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 7
323 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
324 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
325 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
326 ; GFX6-NEXT: s_setpc_b64 s[30:31]
328 ; GFX8-LABEL: v_fshl_i8:
330 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
331 ; GFX8-NEXT: v_and_b32_e32 v3, 7, v2
332 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
333 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0
334 ; GFX8-NEXT: v_mov_b32_e32 v3, 1
335 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
336 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
337 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
338 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
339 ; GFX8-NEXT: s_setpc_b64 s[30:31]
341 ; GFX9-LABEL: v_fshl_i8:
343 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344 ; GFX9-NEXT: v_and_b32_e32 v3, 7, v2
345 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
346 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0
347 ; GFX9-NEXT: v_mov_b32_e32 v3, 1
348 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
349 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
350 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
351 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
352 ; GFX9-NEXT: s_setpc_b64 s[30:31]
354 ; GFX10-LABEL: v_fshl_i8:
356 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
358 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
359 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
360 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
361 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
362 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
363 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0
364 ; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1
365 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
366 ; GFX10-NEXT: s_setpc_b64 s[30:31]
367 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 %amt)
371 define amdgpu_ps i8 @s_fshl_i8_4(i8 inreg %lhs, i8 inreg %rhs) {
372 ; GFX6-LABEL: s_fshl_i8_4:
374 ; GFX6-NEXT: s_lshl_b32 s0, s0, 4
375 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x40004
376 ; GFX6-NEXT: s_or_b32 s0, s0, s1
377 ; GFX6-NEXT: ; return to shader part epilog
379 ; GFX8-LABEL: s_fshl_i8_4:
381 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
382 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
383 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4
384 ; GFX8-NEXT: s_lshr_b32 s1, s1, 4
385 ; GFX8-NEXT: s_or_b32 s0, s0, s1
386 ; GFX8-NEXT: ; return to shader part epilog
388 ; GFX9-LABEL: s_fshl_i8_4:
390 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
391 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
392 ; GFX9-NEXT: s_lshl_b32 s0, s0, 4
393 ; GFX9-NEXT: s_lshr_b32 s1, s1, 4
394 ; GFX9-NEXT: s_or_b32 s0, s0, s1
395 ; GFX9-NEXT: ; return to shader part epilog
397 ; GFX10-LABEL: s_fshl_i8_4:
399 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
400 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4
401 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
402 ; GFX10-NEXT: s_lshr_b32 s1, s1, 4
403 ; GFX10-NEXT: s_or_b32 s0, s0, s1
404 ; GFX10-NEXT: ; return to shader part epilog
405 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 4)
409 define i8 @v_fshl_i8_4(i8 %lhs, i8 %rhs) {
410 ; GFX6-LABEL: v_fshl_i8_4:
412 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
413 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
414 ; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 4
415 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
416 ; GFX6-NEXT: s_setpc_b64 s[30:31]
418 ; GFX8-LABEL: v_fshl_i8_4:
420 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421 ; GFX8-NEXT: v_mov_b32_e32 v2, 4
422 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0
423 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
424 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
425 ; GFX8-NEXT: s_setpc_b64 s[30:31]
427 ; GFX9-LABEL: v_fshl_i8_4:
429 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430 ; GFX9-NEXT: s_mov_b32 s4, 4
431 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0
432 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
433 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
434 ; GFX9-NEXT: s_setpc_b64 s[30:31]
436 ; GFX10-LABEL: v_fshl_i8_4:
438 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
440 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
441 ; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0
442 ; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1
443 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
444 ; GFX10-NEXT: s_setpc_b64 s[30:31]
445 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 4)
449 define amdgpu_ps i8 @s_fshl_i8_5(i8 inreg %lhs, i8 inreg %rhs) {
450 ; GFX6-LABEL: s_fshl_i8_5:
452 ; GFX6-NEXT: s_lshl_b32 s0, s0, 5
453 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x50003
454 ; GFX6-NEXT: s_or_b32 s0, s0, s1
455 ; GFX6-NEXT: ; return to shader part epilog
457 ; GFX8-LABEL: s_fshl_i8_5:
459 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
460 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
461 ; GFX8-NEXT: s_lshl_b32 s0, s0, 5
462 ; GFX8-NEXT: s_lshr_b32 s1, s1, 3
463 ; GFX8-NEXT: s_or_b32 s0, s0, s1
464 ; GFX8-NEXT: ; return to shader part epilog
466 ; GFX9-LABEL: s_fshl_i8_5:
468 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
469 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
470 ; GFX9-NEXT: s_lshl_b32 s0, s0, 5
471 ; GFX9-NEXT: s_lshr_b32 s1, s1, 3
472 ; GFX9-NEXT: s_or_b32 s0, s0, s1
473 ; GFX9-NEXT: ; return to shader part epilog
475 ; GFX10-LABEL: s_fshl_i8_5:
477 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
478 ; GFX10-NEXT: s_lshl_b32 s0, s0, 5
479 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
480 ; GFX10-NEXT: s_lshr_b32 s1, s1, 3
481 ; GFX10-NEXT: s_or_b32 s0, s0, s1
482 ; GFX10-NEXT: ; return to shader part epilog
483 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 5)
487 define i8 @v_fshl_i8_5(i8 %lhs, i8 %rhs) {
488 ; GFX6-LABEL: v_fshl_i8_5:
490 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
491 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 5, v0
492 ; GFX6-NEXT: v_bfe_u32 v1, v1, 3, 5
493 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
494 ; GFX6-NEXT: s_setpc_b64 s[30:31]
496 ; GFX8-LABEL: v_fshl_i8_5:
498 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499 ; GFX8-NEXT: v_mov_b32_e32 v2, 3
500 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 5, v0
501 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
502 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
503 ; GFX8-NEXT: s_setpc_b64 s[30:31]
505 ; GFX9-LABEL: v_fshl_i8_5:
507 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
508 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
509 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 5, v0
510 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
511 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
512 ; GFX9-NEXT: s_setpc_b64 s[30:31]
514 ; GFX10-LABEL: v_fshl_i8_5:
516 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
518 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
519 ; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0
520 ; GFX10-NEXT: v_lshrrev_b16 v1, 3, v1
521 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
522 ; GFX10-NEXT: s_setpc_b64 s[30:31]
523 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 5)
527 define amdgpu_ps i16 @s_fshl_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) {
528 ; GFX6-LABEL: s_fshl_v2i8:
530 ; GFX6-NEXT: s_and_b32 s5, s2, 7
531 ; GFX6-NEXT: s_lshr_b32 s3, s0, 8
532 ; GFX6-NEXT: s_lshr_b32 s4, s2, 8
533 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
534 ; GFX6-NEXT: s_lshl_b32 s0, s0, s5
535 ; GFX6-NEXT: s_bfe_u32 s5, s1, 0x70001
536 ; GFX6-NEXT: s_lshr_b32 s2, s5, s2
537 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008
538 ; GFX6-NEXT: s_or_b32 s0, s0, s2
539 ; GFX6-NEXT: s_and_b32 s2, s4, 7
540 ; GFX6-NEXT: s_andn2_b32 s4, 7, s4
541 ; GFX6-NEXT: s_lshr_b32 s1, s1, 1
542 ; GFX6-NEXT: s_lshl_b32 s2, s3, s2
543 ; GFX6-NEXT: s_lshr_b32 s1, s1, s4
544 ; GFX6-NEXT: s_movk_i32 s6, 0xff
545 ; GFX6-NEXT: s_or_b32 s1, s2, s1
546 ; GFX6-NEXT: s_and_b32 s1, s1, s6
547 ; GFX6-NEXT: s_and_b32 s0, s0, s6
548 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8
549 ; GFX6-NEXT: s_or_b32 s0, s0, s1
550 ; GFX6-NEXT: ; return to shader part epilog
552 ; GFX8-LABEL: s_fshl_v2i8:
554 ; GFX8-NEXT: s_and_b32 s6, s2, 7
555 ; GFX8-NEXT: s_lshr_b32 s3, s0, 8
556 ; GFX8-NEXT: s_lshl_b32 s0, s0, s6
557 ; GFX8-NEXT: s_movk_i32 s6, 0xff
558 ; GFX8-NEXT: s_lshr_b32 s4, s1, 8
559 ; GFX8-NEXT: s_and_b32 s1, s1, s6
560 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
561 ; GFX8-NEXT: s_lshr_b32 s5, s2, 8
562 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
563 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
564 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
565 ; GFX8-NEXT: s_or_b32 s0, s0, s1
566 ; GFX8-NEXT: s_and_b32 s1, s5, 7
567 ; GFX8-NEXT: s_lshl_b32 s1, s3, s1
568 ; GFX8-NEXT: s_and_b32 s3, s4, s6
569 ; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
570 ; GFX8-NEXT: s_andn2_b32 s2, 7, s5
571 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
572 ; GFX8-NEXT: s_lshr_b32 s2, s3, s2
573 ; GFX8-NEXT: s_or_b32 s1, s1, s2
574 ; GFX8-NEXT: s_and_b32 s1, s1, s6
575 ; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000
576 ; GFX8-NEXT: s_and_b32 s0, s0, s6
577 ; GFX8-NEXT: s_lshl_b32 s1, s1, s2
578 ; GFX8-NEXT: s_or_b32 s0, s0, s1
579 ; GFX8-NEXT: ; return to shader part epilog
581 ; GFX9-LABEL: s_fshl_v2i8:
583 ; GFX9-NEXT: s_and_b32 s6, s2, 7
584 ; GFX9-NEXT: s_lshr_b32 s3, s0, 8
585 ; GFX9-NEXT: s_lshl_b32 s0, s0, s6
586 ; GFX9-NEXT: s_movk_i32 s6, 0xff
587 ; GFX9-NEXT: s_lshr_b32 s4, s1, 8
588 ; GFX9-NEXT: s_and_b32 s1, s1, s6
589 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
590 ; GFX9-NEXT: s_lshr_b32 s5, s2, 8
591 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
592 ; GFX9-NEXT: s_lshr_b32 s1, s1, 1
593 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
594 ; GFX9-NEXT: s_or_b32 s0, s0, s1
595 ; GFX9-NEXT: s_and_b32 s1, s5, 7
596 ; GFX9-NEXT: s_lshl_b32 s1, s3, s1
597 ; GFX9-NEXT: s_and_b32 s3, s4, s6
598 ; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
599 ; GFX9-NEXT: s_andn2_b32 s2, 7, s5
600 ; GFX9-NEXT: s_lshr_b32 s3, s3, 1
601 ; GFX9-NEXT: s_lshr_b32 s2, s3, s2
602 ; GFX9-NEXT: s_or_b32 s1, s1, s2
603 ; GFX9-NEXT: s_and_b32 s1, s1, s6
604 ; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000
605 ; GFX9-NEXT: s_and_b32 s0, s0, s6
606 ; GFX9-NEXT: s_lshl_b32 s1, s1, s2
607 ; GFX9-NEXT: s_or_b32 s0, s0, s1
608 ; GFX9-NEXT: ; return to shader part epilog
610 ; GFX10-LABEL: s_fshl_v2i8:
612 ; GFX10-NEXT: s_lshr_b32 s4, s1, 8
613 ; GFX10-NEXT: s_movk_i32 s6, 0xff
614 ; GFX10-NEXT: s_lshr_b32 s5, s2, 8
615 ; GFX10-NEXT: s_and_b32 s4, s4, s6
616 ; GFX10-NEXT: s_and_b32 s7, s2, 7
617 ; GFX10-NEXT: s_and_b32 s1, s1, s6
618 ; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000
619 ; GFX10-NEXT: s_lshr_b32 s3, s0, 8
620 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
621 ; GFX10-NEXT: s_lshl_b32 s0, s0, s7
622 ; GFX10-NEXT: s_and_b32 s7, s5, 7
623 ; GFX10-NEXT: s_andn2_b32 s5, 7, s5
624 ; GFX10-NEXT: s_lshr_b32 s4, s4, 1
625 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
626 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
627 ; GFX10-NEXT: s_lshl_b32 s3, s3, s7
628 ; GFX10-NEXT: s_lshr_b32 s4, s4, s5
629 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
630 ; GFX10-NEXT: s_or_b32 s2, s3, s4
631 ; GFX10-NEXT: s_or_b32 s0, s0, s1
632 ; GFX10-NEXT: s_and_b32 s1, s2, s6
633 ; GFX10-NEXT: s_bfe_u32 s2, 8, 0x100000
634 ; GFX10-NEXT: s_and_b32 s0, s0, s6
635 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2
636 ; GFX10-NEXT: s_or_b32 s0, s0, s1
637 ; GFX10-NEXT: ; return to shader part epilog
638 %lhs = bitcast i16 %lhs.arg to <2 x i8>
639 %rhs = bitcast i16 %rhs.arg to <2 x i8>
640 %amt = bitcast i16 %amt.arg to <2 x i8>
641 %result = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
642 %cast.result = bitcast <2 x i8> %result to i16
646 define i16 @v_fshl_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) {
647 ; GFX6-LABEL: v_fshl_v2i8:
649 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
650 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2
651 ; GFX6-NEXT: v_and_b32_e32 v5, 7, v2
652 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
653 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
654 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
655 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v5, v0
656 ; GFX6-NEXT: v_bfe_u32 v5, v1, 1, 7
657 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v5
658 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
659 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v4
660 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
661 ; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8
662 ; GFX6-NEXT: v_and_b32_e32 v4, 7, v4
663 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
664 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3
665 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v4, v1
666 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
667 ; GFX6-NEXT: v_mov_b32_e32 v2, 0xff
668 ; GFX6-NEXT: v_and_b32_e32 v1, v1, v2
669 ; GFX6-NEXT: v_and_b32_e32 v0, v0, v2
670 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
671 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
672 ; GFX6-NEXT: s_setpc_b64 s[30:31]
674 ; GFX8-LABEL: v_fshl_v2i8:
676 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
677 ; GFX8-NEXT: v_and_b32_e32 v6, 7, v2
678 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
679 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2
680 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
681 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0
682 ; GFX8-NEXT: v_mov_b32_e32 v6, 1
683 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
684 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
685 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
686 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
687 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
688 ; GFX8-NEXT: v_and_b32_e32 v1, 7, v5
689 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v5
690 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
691 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v1, v3
692 ; GFX8-NEXT: v_lshrrev_b16_sdwa v3, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
693 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v3
694 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
695 ; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1
696 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
697 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
698 ; GFX8-NEXT: s_setpc_b64 s[30:31]
700 ; GFX9-LABEL: v_fshl_v2i8:
702 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
703 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2
704 ; GFX9-NEXT: v_and_b32_e32 v6, 7, v2
705 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
706 ; GFX9-NEXT: s_mov_b32 s4, 1
707 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
708 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
709 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
710 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
711 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v6, v0
712 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
713 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
714 ; GFX9-NEXT: v_and_b32_e32 v1, 7, v5
715 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v5
716 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
717 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, v1, v3
718 ; GFX9-NEXT: v_lshrrev_b16_sdwa v3, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
719 ; GFX9-NEXT: v_lshrrev_b16_e32 v2, v2, v3
720 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2
721 ; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1
722 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
723 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
724 ; GFX9-NEXT: s_setpc_b64 s[30:31]
726 ; GFX10-LABEL: v_fshl_v2i8:
728 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
729 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
730 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2
731 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1
732 ; GFX10-NEXT: s_movk_i32 s4, 0xff
733 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v0
734 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v2
735 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v3
736 ; GFX10-NEXT: v_and_b32_e32 v4, s4, v4
737 ; GFX10-NEXT: v_and_b32_e32 v1, s4, v1
738 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
739 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
740 ; GFX10-NEXT: v_and_b32_e32 v6, 7, v6
741 ; GFX10-NEXT: v_lshrrev_b16 v4, 1, v4
742 ; GFX10-NEXT: v_and_b32_e32 v7, 7, v7
743 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
744 ; GFX10-NEXT: v_lshlrev_b16 v3, v3, v5
745 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0
746 ; GFX10-NEXT: v_lshrrev_b16 v4, v6, v4
747 ; GFX10-NEXT: v_lshrrev_b16 v1, v7, v1
748 ; GFX10-NEXT: v_or_b32_e32 v2, v3, v4
749 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
750 ; GFX10-NEXT: v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
751 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
752 ; GFX10-NEXT: s_setpc_b64 s[30:31]
753 %lhs = bitcast i16 %lhs.arg to <2 x i8>
754 %rhs = bitcast i16 %rhs.arg to <2 x i8>
755 %amt = bitcast i16 %amt.arg to <2 x i8>
756 %result = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
757 %cast.result = bitcast <2 x i8> %result to i16
761 define amdgpu_ps i32 @s_fshl_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) {
762 ; GFX6-LABEL: s_fshl_v4i8:
764 ; GFX6-NEXT: s_and_b32 s9, s2, 7
765 ; GFX6-NEXT: s_lshr_b32 s3, s0, 8
766 ; GFX6-NEXT: s_lshr_b32 s4, s0, 16
767 ; GFX6-NEXT: s_lshr_b32 s5, s0, 24
768 ; GFX6-NEXT: s_lshr_b32 s6, s2, 8
769 ; GFX6-NEXT: s_lshr_b32 s7, s2, 16
770 ; GFX6-NEXT: s_lshr_b32 s8, s2, 24
771 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
772 ; GFX6-NEXT: s_lshl_b32 s0, s0, s9
773 ; GFX6-NEXT: s_bfe_u32 s9, s1, 0x70001
774 ; GFX6-NEXT: s_lshr_b32 s2, s9, s2
775 ; GFX6-NEXT: s_or_b32 s0, s0, s2
776 ; GFX6-NEXT: s_and_b32 s2, s6, 7
777 ; GFX6-NEXT: s_lshl_b32 s2, s3, s2
778 ; GFX6-NEXT: s_bfe_u32 s3, s1, 0x80008
779 ; GFX6-NEXT: s_andn2_b32 s6, 7, s6
780 ; GFX6-NEXT: s_lshr_b32 s3, s3, 1
781 ; GFX6-NEXT: s_lshr_b32 s3, s3, s6
782 ; GFX6-NEXT: s_or_b32 s2, s2, s3
783 ; GFX6-NEXT: s_and_b32 s3, s7, 7
784 ; GFX6-NEXT: s_lshl_b32 s3, s4, s3
785 ; GFX6-NEXT: s_bfe_u32 s4, s1, 0x80010
786 ; GFX6-NEXT: s_andn2_b32 s6, 7, s7
787 ; GFX6-NEXT: s_lshr_b32 s4, s4, 1
788 ; GFX6-NEXT: s_movk_i32 s10, 0xff
789 ; GFX6-NEXT: s_lshr_b32 s4, s4, s6
790 ; GFX6-NEXT: s_or_b32 s3, s3, s4
791 ; GFX6-NEXT: s_and_b32 s4, s8, 7
792 ; GFX6-NEXT: s_andn2_b32 s6, 7, s8
793 ; GFX6-NEXT: s_lshr_b32 s1, s1, 25
794 ; GFX6-NEXT: s_and_b32 s2, s2, s10
795 ; GFX6-NEXT: s_lshl_b32 s4, s5, s4
796 ; GFX6-NEXT: s_lshr_b32 s1, s1, s6
797 ; GFX6-NEXT: s_and_b32 s0, s0, s10
798 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8
799 ; GFX6-NEXT: s_or_b32 s1, s4, s1
800 ; GFX6-NEXT: s_or_b32 s0, s0, s2
801 ; GFX6-NEXT: s_and_b32 s2, s3, s10
802 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
803 ; GFX6-NEXT: s_and_b32 s1, s1, s10
804 ; GFX6-NEXT: s_or_b32 s0, s0, s2
805 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24
806 ; GFX6-NEXT: s_or_b32 s0, s0, s1
807 ; GFX6-NEXT: ; return to shader part epilog
809 ; GFX8-LABEL: s_fshl_v4i8:
811 ; GFX8-NEXT: s_movk_i32 s13, 0xff
812 ; GFX8-NEXT: s_lshr_b32 s6, s1, 8
813 ; GFX8-NEXT: s_lshr_b32 s7, s1, 16
814 ; GFX8-NEXT: s_lshr_b32 s8, s1, 24
815 ; GFX8-NEXT: s_and_b32 s1, s1, s13
816 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
817 ; GFX8-NEXT: s_lshr_b32 s9, s2, 8
818 ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
819 ; GFX8-NEXT: s_lshr_b32 s11, s2, 24
820 ; GFX8-NEXT: s_and_b32 s12, s2, 7
821 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
822 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
823 ; GFX8-NEXT: s_lshr_b32 s3, s0, 8
824 ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
825 ; GFX8-NEXT: s_lshr_b32 s5, s0, 24
826 ; GFX8-NEXT: s_lshl_b32 s0, s0, s12
827 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
828 ; GFX8-NEXT: s_or_b32 s0, s0, s1
829 ; GFX8-NEXT: s_and_b32 s1, s9, 7
830 ; GFX8-NEXT: s_lshl_b32 s1, s3, s1
831 ; GFX8-NEXT: s_and_b32 s3, s6, s13
832 ; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
833 ; GFX8-NEXT: s_andn2_b32 s2, 7, s9
834 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
835 ; GFX8-NEXT: s_lshr_b32 s2, s3, s2
836 ; GFX8-NEXT: s_or_b32 s1, s1, s2
837 ; GFX8-NEXT: s_and_b32 s2, s10, 7
838 ; GFX8-NEXT: s_lshl_b32 s2, s4, s2
839 ; GFX8-NEXT: s_and_b32 s4, s7, s13
840 ; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
841 ; GFX8-NEXT: s_andn2_b32 s3, 7, s10
842 ; GFX8-NEXT: s_lshr_b32 s4, s4, 1
843 ; GFX8-NEXT: s_lshr_b32 s3, s4, s3
844 ; GFX8-NEXT: s_or_b32 s2, s2, s3
845 ; GFX8-NEXT: s_and_b32 s3, s11, 7
846 ; GFX8-NEXT: s_and_b32 s1, s1, s13
847 ; GFX8-NEXT: s_andn2_b32 s4, 7, s11
848 ; GFX8-NEXT: s_lshl_b32 s3, s5, s3
849 ; GFX8-NEXT: s_lshr_b32 s5, s8, 1
850 ; GFX8-NEXT: s_and_b32 s0, s0, s13
851 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8
852 ; GFX8-NEXT: s_lshr_b32 s4, s5, s4
853 ; GFX8-NEXT: s_or_b32 s0, s0, s1
854 ; GFX8-NEXT: s_and_b32 s1, s2, s13
855 ; GFX8-NEXT: s_or_b32 s3, s3, s4
856 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
857 ; GFX8-NEXT: s_or_b32 s0, s0, s1
858 ; GFX8-NEXT: s_and_b32 s1, s3, s13
859 ; GFX8-NEXT: s_lshl_b32 s1, s1, 24
860 ; GFX8-NEXT: s_or_b32 s0, s0, s1
861 ; GFX8-NEXT: ; return to shader part epilog
863 ; GFX9-LABEL: s_fshl_v4i8:
865 ; GFX9-NEXT: s_movk_i32 s13, 0xff
866 ; GFX9-NEXT: s_lshr_b32 s6, s1, 8
867 ; GFX9-NEXT: s_lshr_b32 s7, s1, 16
868 ; GFX9-NEXT: s_lshr_b32 s8, s1, 24
869 ; GFX9-NEXT: s_and_b32 s1, s1, s13
870 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
871 ; GFX9-NEXT: s_lshr_b32 s9, s2, 8
872 ; GFX9-NEXT: s_lshr_b32 s10, s2, 16
873 ; GFX9-NEXT: s_lshr_b32 s11, s2, 24
874 ; GFX9-NEXT: s_and_b32 s12, s2, 7
875 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
876 ; GFX9-NEXT: s_lshr_b32 s1, s1, 1
877 ; GFX9-NEXT: s_lshr_b32 s3, s0, 8
878 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
879 ; GFX9-NEXT: s_lshr_b32 s5, s0, 24
880 ; GFX9-NEXT: s_lshl_b32 s0, s0, s12
881 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
882 ; GFX9-NEXT: s_or_b32 s0, s0, s1
883 ; GFX9-NEXT: s_and_b32 s1, s9, 7
884 ; GFX9-NEXT: s_lshl_b32 s1, s3, s1
885 ; GFX9-NEXT: s_and_b32 s3, s6, s13
886 ; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
887 ; GFX9-NEXT: s_andn2_b32 s2, 7, s9
888 ; GFX9-NEXT: s_lshr_b32 s3, s3, 1
889 ; GFX9-NEXT: s_lshr_b32 s2, s3, s2
890 ; GFX9-NEXT: s_or_b32 s1, s1, s2
891 ; GFX9-NEXT: s_and_b32 s2, s10, 7
892 ; GFX9-NEXT: s_lshl_b32 s2, s4, s2
893 ; GFX9-NEXT: s_and_b32 s4, s7, s13
894 ; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000
895 ; GFX9-NEXT: s_andn2_b32 s3, 7, s10
896 ; GFX9-NEXT: s_lshr_b32 s4, s4, 1
897 ; GFX9-NEXT: s_lshr_b32 s3, s4, s3
898 ; GFX9-NEXT: s_or_b32 s2, s2, s3
899 ; GFX9-NEXT: s_and_b32 s3, s11, 7
900 ; GFX9-NEXT: s_and_b32 s1, s1, s13
901 ; GFX9-NEXT: s_andn2_b32 s4, 7, s11
902 ; GFX9-NEXT: s_lshl_b32 s3, s5, s3
903 ; GFX9-NEXT: s_lshr_b32 s5, s8, 1
904 ; GFX9-NEXT: s_and_b32 s0, s0, s13
905 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8
906 ; GFX9-NEXT: s_lshr_b32 s4, s5, s4
907 ; GFX9-NEXT: s_or_b32 s0, s0, s1
908 ; GFX9-NEXT: s_and_b32 s1, s2, s13
909 ; GFX9-NEXT: s_or_b32 s3, s3, s4
910 ; GFX9-NEXT: s_lshl_b32 s1, s1, 16
911 ; GFX9-NEXT: s_or_b32 s0, s0, s1
912 ; GFX9-NEXT: s_and_b32 s1, s3, s13
913 ; GFX9-NEXT: s_lshl_b32 s1, s1, 24
914 ; GFX9-NEXT: s_or_b32 s0, s0, s1
915 ; GFX9-NEXT: ; return to shader part epilog
917 ; GFX10-LABEL: s_fshl_v4i8:
919 ; GFX10-NEXT: s_movk_i32 s11, 0xff
920 ; GFX10-NEXT: s_lshr_b32 s6, s1, 8
921 ; GFX10-NEXT: s_lshr_b32 s7, s1, 16
922 ; GFX10-NEXT: s_lshr_b32 s8, s1, 24
923 ; GFX10-NEXT: s_and_b32 s1, s1, s11
924 ; GFX10-NEXT: s_lshr_b32 s9, s2, 8
925 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
926 ; GFX10-NEXT: s_lshr_b32 s10, s2, 16
927 ; GFX10-NEXT: s_lshr_b32 s12, s2, 24
928 ; GFX10-NEXT: s_and_b32 s13, s2, 7
929 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
930 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
931 ; GFX10-NEXT: s_lshr_b32 s3, s0, 8
932 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
933 ; GFX10-NEXT: s_and_b32 s2, s6, s11
934 ; GFX10-NEXT: s_and_b32 s6, s9, 7
935 ; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000
936 ; GFX10-NEXT: s_andn2_b32 s9, 7, s9
937 ; GFX10-NEXT: s_lshr_b32 s2, s2, 1
938 ; GFX10-NEXT: s_lshr_b32 s4, s0, 16
939 ; GFX10-NEXT: s_lshr_b32 s5, s0, 24
940 ; GFX10-NEXT: s_lshl_b32 s0, s0, s13
941 ; GFX10-NEXT: s_lshl_b32 s3, s3, s6
942 ; GFX10-NEXT: s_lshr_b32 s2, s2, s9
943 ; GFX10-NEXT: s_or_b32 s0, s0, s1
944 ; GFX10-NEXT: s_or_b32 s1, s3, s2
945 ; GFX10-NEXT: s_and_b32 s2, s7, s11
946 ; GFX10-NEXT: s_and_b32 s3, s10, 7
947 ; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000
948 ; GFX10-NEXT: s_andn2_b32 s6, 7, s10
949 ; GFX10-NEXT: s_lshr_b32 s2, s2, 1
950 ; GFX10-NEXT: s_lshl_b32 s3, s4, s3
951 ; GFX10-NEXT: s_lshr_b32 s2, s2, s6
952 ; GFX10-NEXT: s_and_b32 s4, s12, 7
953 ; GFX10-NEXT: s_andn2_b32 s6, 7, s12
954 ; GFX10-NEXT: s_lshr_b32 s7, s8, 1
955 ; GFX10-NEXT: s_lshl_b32 s4, s5, s4
956 ; GFX10-NEXT: s_lshr_b32 s5, s7, s6
957 ; GFX10-NEXT: s_or_b32 s2, s3, s2
958 ; GFX10-NEXT: s_and_b32 s1, s1, s11
959 ; GFX10-NEXT: s_or_b32 s3, s4, s5
960 ; GFX10-NEXT: s_and_b32 s0, s0, s11
961 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8
962 ; GFX10-NEXT: s_and_b32 s2, s2, s11
963 ; GFX10-NEXT: s_or_b32 s0, s0, s1
964 ; GFX10-NEXT: s_lshl_b32 s1, s2, 16
965 ; GFX10-NEXT: s_and_b32 s2, s3, s11
966 ; GFX10-NEXT: s_or_b32 s0, s0, s1
967 ; GFX10-NEXT: s_lshl_b32 s1, s2, 24
968 ; GFX10-NEXT: s_or_b32 s0, s0, s1
969 ; GFX10-NEXT: ; return to shader part epilog
970 %lhs = bitcast i32 %lhs.arg to <4 x i8>
971 %rhs = bitcast i32 %rhs.arg to <4 x i8>
972 %amt = bitcast i32 %amt.arg to <4 x i8>
973 %result = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
974 %cast.result = bitcast <4 x i8> %result to i32
978 define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) {
979 ; GFX6-LABEL: v_fshl_v4i8:
981 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982 ; GFX6-NEXT: v_lshrrev_b32_e32 v6, 8, v2
983 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v2
984 ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 24, v2
985 ; GFX6-NEXT: v_and_b32_e32 v9, 7, v2
986 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
987 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
988 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0
989 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0
990 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
991 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
992 ; GFX6-NEXT: v_bfe_u32 v9, v1, 1, 7
993 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v9
994 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
995 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v6
996 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
997 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3
998 ; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8
999 ; GFX6-NEXT: v_and_b32_e32 v6, 7, v6
1000 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1001 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
1002 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1003 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v7
1004 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
1005 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v3, v4
1006 ; GFX6-NEXT: v_bfe_u32 v4, v1, 16, 8
1007 ; GFX6-NEXT: v_and_b32_e32 v6, 7, v6
1008 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 1, v4
1009 ; GFX6-NEXT: s_movk_i32 s4, 0xff
1010 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4
1011 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v8
1012 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
1013 ; GFX6-NEXT: v_and_b32_e32 v4, 7, v8
1014 ; GFX6-NEXT: v_and_b32_e32 v6, 7, v6
1015 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 25, v1
1016 ; GFX6-NEXT: v_and_b32_e32 v2, s4, v2
1017 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, v4, v5
1018 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v6, v1
1019 ; GFX6-NEXT: v_and_b32_e32 v0, s4, v0
1020 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
1021 ; GFX6-NEXT: v_or_b32_e32 v1, v4, v1
1022 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1023 ; GFX6-NEXT: v_and_b32_e32 v2, s4, v3
1024 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1025 ; GFX6-NEXT: v_and_b32_e32 v1, s4, v1
1026 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1027 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
1028 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1029 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1031 ; GFX8-LABEL: v_fshl_v4i8:
1033 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1034 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2
1035 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1036 ; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2
1037 ; GFX8-NEXT: v_and_b32_e32 v8, 7, v2
1038 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
1039 ; GFX8-NEXT: v_mov_b32_e32 v10, 1
1040 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
1041 ; GFX8-NEXT: v_lshrrev_b16_sdwa v11, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1042 ; GFX8-NEXT: v_lshlrev_b16_e32 v8, v8, v0
1043 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v11
1044 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
1045 ; GFX8-NEXT: v_or_b32_e32 v2, v8, v2
1046 ; GFX8-NEXT: v_and_b32_e32 v8, 7, v5
1047 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5
1048 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1049 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
1050 ; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1051 ; GFX8-NEXT: v_mov_b32_e32 v9, 0xff
1052 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, v8, v3
1053 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v5, v4
1054 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4
1055 ; GFX8-NEXT: v_and_b32_e32 v4, 7, v6
1056 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v6
1057 ; GFX8-NEXT: v_and_b32_sdwa v6, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1058 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
1059 ; GFX8-NEXT: v_lshrrev_b16_e32 v6, 1, v6
1060 ; GFX8-NEXT: v_lshlrev_b16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1061 ; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v6
1062 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v5
1063 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v7
1064 ; GFX8-NEXT: v_xor_b32_e32 v6, -1, v7
1065 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1066 ; GFX8-NEXT: v_mov_b32_e32 v5, 1
1067 ; GFX8-NEXT: v_and_b32_e32 v6, 7, v6
1068 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1069 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v6, v1
1070 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1071 ; GFX8-NEXT: v_mov_b32_e32 v1, 8
1072 ; GFX8-NEXT: s_movk_i32 s4, 0xff
1073 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1074 ; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1075 ; GFX8-NEXT: v_and_b32_e32 v2, s4, v4
1076 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1077 ; GFX8-NEXT: v_and_b32_e32 v0, s4, v0
1078 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
1079 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1080 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
1081 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1083 ; GFX9-LABEL: v_fshl_v4i8:
1085 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2
1087 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1088 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2
1089 ; GFX9-NEXT: v_and_b32_e32 v8, 7, v2
1090 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
1091 ; GFX9-NEXT: s_mov_b32 s5, 1
1092 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
1093 ; GFX9-NEXT: v_lshrrev_b16_sdwa v10, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1094 ; GFX9-NEXT: v_lshlrev_b16_e32 v8, v8, v0
1095 ; GFX9-NEXT: v_lshrrev_b16_e32 v2, v2, v10
1096 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
1097 ; GFX9-NEXT: v_or_b32_e32 v2, v8, v2
1098 ; GFX9-NEXT: v_and_b32_e32 v8, 7, v5
1099 ; GFX9-NEXT: v_xor_b32_e32 v5, -1, v5
1100 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1101 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
1102 ; GFX9-NEXT: v_lshrrev_b16_sdwa v4, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1103 ; GFX9-NEXT: v_mov_b32_e32 v9, 0xff
1104 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, v8, v3
1105 ; GFX9-NEXT: v_lshrrev_b16_e32 v4, v5, v4
1106 ; GFX9-NEXT: v_or_b32_e32 v3, v3, v4
1107 ; GFX9-NEXT: v_and_b32_e32 v4, 7, v6
1108 ; GFX9-NEXT: v_xor_b32_e32 v5, -1, v6
1109 ; GFX9-NEXT: v_and_b32_sdwa v6, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1110 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
1111 ; GFX9-NEXT: v_lshrrev_b16_e32 v6, 1, v6
1112 ; GFX9-NEXT: v_lshlrev_b16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1113 ; GFX9-NEXT: v_lshrrev_b16_e32 v5, v5, v6
1114 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v5
1115 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v7
1116 ; GFX9-NEXT: v_xor_b32_e32 v6, -1, v7
1117 ; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1118 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
1119 ; GFX9-NEXT: v_and_b32_e32 v6, 7, v6
1120 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1121 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v6, v1
1122 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
1123 ; GFX9-NEXT: v_mov_b32_e32 v1, 8
1124 ; GFX9-NEXT: s_movk_i32 s4, 0xff
1125 ; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1126 ; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1
1127 ; GFX9-NEXT: v_and_b32_e32 v2, s4, v4
1128 ; GFX9-NEXT: v_and_b32_e32 v0, s4, v0
1129 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1130 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1131 ; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0
1132 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1134 ; GFX10-LABEL: v_fshl_v4i8:
1136 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1137 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1138 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v2
1139 ; GFX10-NEXT: v_and_b32_e32 v11, 7, v2
1140 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1141 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1142 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0
1143 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1
1144 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 16, v2
1145 ; GFX10-NEXT: v_xor_b32_e32 v10, -1, v2
1146 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v2
1147 ; GFX10-NEXT: v_lshlrev_b16 v0, v11, v0
1148 ; GFX10-NEXT: v_xor_b32_e32 v11, -1, v8
1149 ; GFX10-NEXT: v_and_b32_e32 v8, 7, v8
1150 ; GFX10-NEXT: v_mov_b32_e32 v13, 0xff
1151 ; GFX10-NEXT: s_movk_i32 s4, 0xff
1152 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
1153 ; GFX10-NEXT: v_and_b32_e32 v12, s4, v1
1154 ; GFX10-NEXT: v_and_b32_e32 v6, s4, v6
1155 ; GFX10-NEXT: v_lshlrev_b16 v3, v8, v3
1156 ; GFX10-NEXT: v_xor_b32_e32 v8, -1, v9
1157 ; GFX10-NEXT: v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1158 ; GFX10-NEXT: v_xor_b32_e32 v13, -1, v2
1159 ; GFX10-NEXT: v_and_b32_e32 v11, 7, v11
1160 ; GFX10-NEXT: v_lshrrev_b16 v6, 1, v6
1161 ; GFX10-NEXT: v_and_b32_e32 v9, 7, v9
1162 ; GFX10-NEXT: v_and_b32_e32 v8, 7, v8
1163 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
1164 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
1165 ; GFX10-NEXT: v_and_b32_e32 v13, 7, v13
1166 ; GFX10-NEXT: v_lshrrev_b16 v7, 1, v7
1167 ; GFX10-NEXT: v_and_b32_e32 v10, 7, v10
1168 ; GFX10-NEXT: v_lshrrev_b16 v12, 1, v12
1169 ; GFX10-NEXT: v_lshrrev_b16 v6, v11, v6
1170 ; GFX10-NEXT: v_lshlrev_b16 v4, v9, v4
1171 ; GFX10-NEXT: v_lshrrev_b16 v1, v8, v1
1172 ; GFX10-NEXT: v_lshlrev_b16 v2, v2, v5
1173 ; GFX10-NEXT: v_lshrrev_b16 v5, v13, v7
1174 ; GFX10-NEXT: v_lshrrev_b16 v7, v10, v12
1175 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v6
1176 ; GFX10-NEXT: v_mov_b32_e32 v6, 8
1177 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1
1178 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v5
1179 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v7
1180 ; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1181 ; GFX10-NEXT: v_and_b32_e32 v1, s4, v1
1182 ; GFX10-NEXT: v_and_b32_e32 v2, s4, v2
1183 ; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v3
1184 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1185 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2
1186 ; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2
1187 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1188 %lhs = bitcast i32 %lhs.arg to <4 x i8>
1189 %rhs = bitcast i32 %rhs.arg to <4 x i8>
1190 %amt = bitcast i32 %amt.arg to <4 x i8>
1191 %result = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1192 %cast.result = bitcast <4 x i8> %result to i32
1193 ret i32 %cast.result
1196 define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) {
1197 ; GFX6-LABEL: s_fshl_i24:
1199 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1200 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
1201 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1202 ; GFX6-NEXT: s_mov_b32 s3, 0xffffff
1203 ; GFX6-NEXT: s_and_b32 s2, s2, s3
1204 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1205 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
1206 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x170001
1207 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, v0
1208 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
1209 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1210 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
1211 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
1212 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
1213 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
1214 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1215 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1216 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
1217 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1218 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1219 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0
1220 ; GFX6-NEXT: v_and_b32_e32 v0, s3, v0
1221 ; GFX6-NEXT: v_and_b32_e32 v1, s3, v1
1222 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
1223 ; GFX6-NEXT: v_lshr_b32_e32 v1, s1, v1
1224 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1225 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
1226 ; GFX6-NEXT: ; return to shader part epilog
1228 ; GFX8-LABEL: s_fshl_i24:
1230 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1231 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
1232 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1233 ; GFX8-NEXT: s_mov_b32 s3, 0xffffff
1234 ; GFX8-NEXT: s_and_b32 s2, s2, s3
1235 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1236 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
1237 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x170001
1238 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v0
1239 ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
1240 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
1241 ; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
1242 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
1243 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
1244 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
1245 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1246 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1247 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
1248 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1249 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1250 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
1251 ; GFX8-NEXT: v_and_b32_e32 v0, s3, v0
1252 ; GFX8-NEXT: v_and_b32_e32 v1, s3, v1
1253 ; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0
1254 ; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s1
1255 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1256 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1257 ; GFX8-NEXT: ; return to shader part epilog
1259 ; GFX9-LABEL: s_fshl_i24:
1261 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1262 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
1263 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1264 ; GFX9-NEXT: s_mov_b32 s3, 0xffffff
1265 ; GFX9-NEXT: s_and_b32 s2, s2, s3
1266 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1267 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
1268 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x170001
1269 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v0
1270 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
1271 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
1272 ; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
1273 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
1274 ; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
1275 ; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
1276 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1277 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1278 ; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
1279 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1280 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1281 ; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0
1282 ; GFX9-NEXT: v_and_b32_e32 v1, s3, v1
1283 ; GFX9-NEXT: v_and_b32_e32 v0, s3, v0
1284 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s1
1285 ; GFX9-NEXT: v_lshl_or_b32 v0, s0, v0, v1
1286 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1287 ; GFX9-NEXT: ; return to shader part epilog
1289 ; GFX10-LABEL: s_fshl_i24:
1291 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1292 ; GFX10-NEXT: s_mov_b32 s3, 0xffffff
1293 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x170001
1294 ; GFX10-NEXT: s_and_b32 s2, s2, s3
1295 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
1296 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1297 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
1298 ; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0
1299 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
1300 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
1301 ; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
1302 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
1303 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
1304 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1305 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1306 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1307 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1308 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1309 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1310 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0
1311 ; GFX10-NEXT: v_and_b32_e32 v0, s3, v0
1312 ; GFX10-NEXT: v_and_b32_e32 v1, s3, v1
1313 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s1
1314 ; GFX10-NEXT: v_lshl_or_b32 v0, s0, v0, v1
1315 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1316 ; GFX10-NEXT: ; return to shader part epilog
1317 %result = call i24 @llvm.fshl.i24(i24 %lhs, i24 %rhs, i24 %amt)
1321 define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
1322 ; GFX6-LABEL: v_fshl_i24:
1324 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1326 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3
1327 ; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffffe8
1328 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1329 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 23
1330 ; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1331 ; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
1332 ; GFX6-NEXT: v_mul_lo_u32 v4, v4, v3
1333 ; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
1334 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
1335 ; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
1336 ; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffff
1337 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
1338 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
1339 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
1340 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1341 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1342 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
1343 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1344 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1345 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
1346 ; GFX6-NEXT: v_and_b32_e32 v2, v2, v4
1347 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
1348 ; GFX6-NEXT: v_and_b32_e32 v2, v3, v4
1349 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1350 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1351 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1353 ; GFX8-LABEL: v_fshl_i24:
1355 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1357 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3
1358 ; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffffe8
1359 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1360 ; GFX8-NEXT: v_bfe_u32 v1, v1, 1, 23
1361 ; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1362 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
1363 ; GFX8-NEXT: v_mul_lo_u32 v4, v4, v3
1364 ; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
1365 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
1366 ; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
1367 ; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffff
1368 ; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24
1369 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
1370 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
1371 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1372 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1373 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
1374 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1375 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1376 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
1377 ; GFX8-NEXT: v_and_b32_e32 v2, v2, v4
1378 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v2, v0
1379 ; GFX8-NEXT: v_and_b32_e32 v2, v3, v4
1380 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1381 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1382 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1384 ; GFX9-LABEL: v_fshl_i24:
1386 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1387 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1388 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
1389 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffffe8
1390 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1391 ; GFX9-NEXT: v_bfe_u32 v1, v1, 1, 23
1392 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1393 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3
1394 ; GFX9-NEXT: v_mul_lo_u32 v4, v4, v3
1395 ; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4
1396 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4
1397 ; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
1398 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffff
1399 ; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
1400 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
1401 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
1402 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1403 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1404 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
1405 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1406 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1407 ; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2
1408 ; GFX9-NEXT: v_and_b32_e32 v3, v3, v4
1409 ; GFX9-NEXT: v_and_b32_e32 v2, v2, v4
1410 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, v3, v1
1411 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v2, v1
1412 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1414 ; GFX10-LABEL: v_fshl_i24:
1416 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1417 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
1418 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1419 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1420 ; GFX10-NEXT: v_bfe_u32 v1, v1, 1, 23
1421 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3
1422 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1423 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3
1424 ; GFX10-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3
1425 ; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4
1426 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4
1427 ; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
1428 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
1429 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
1430 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1431 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1432 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1433 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1434 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1435 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1436 ; GFX10-NEXT: v_mov_b32_e32 v3, 0xffffff
1437 ; GFX10-NEXT: v_sub_nc_u32_e32 v4, 23, v2
1438 ; GFX10-NEXT: v_and_b32_e32 v2, v2, v3
1439 ; GFX10-NEXT: v_and_b32_e32 v4, v4, v3
1440 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, v4, v1
1441 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, v2, v1
1442 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1443 %result = call i24 @llvm.fshl.i24(i24 %lhs, i24 %rhs, i24 %amt)
1447 define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
1448 ; GFX6-LABEL: s_fshl_v2i24:
1450 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1451 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
1452 ; GFX6-NEXT: s_movk_i32 s9, 0xff
1453 ; GFX6-NEXT: s_mov_b32 s11, 0x80008
1454 ; GFX6-NEXT: s_lshr_b32 s6, s0, 16
1455 ; GFX6-NEXT: s_lshr_b32 s7, s0, 24
1456 ; GFX6-NEXT: s_and_b32 s10, s0, s9
1457 ; GFX6-NEXT: s_bfe_u32 s0, s0, s11
1458 ; GFX6-NEXT: s_lshl_b32 s0, s0, 8
1459 ; GFX6-NEXT: s_and_b32 s6, s6, s9
1460 ; GFX6-NEXT: s_or_b32 s0, s10, s0
1461 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
1462 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1463 ; GFX6-NEXT: s_lshr_b32 s8, s1, 8
1464 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
1465 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1466 ; GFX6-NEXT: s_and_b32 s1, s1, s9
1467 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
1468 ; GFX6-NEXT: s_or_b32 s0, s0, s6
1469 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8
1470 ; GFX6-NEXT: s_and_b32 s6, s8, s9
1471 ; GFX6-NEXT: s_or_b32 s1, s7, s1
1472 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
1473 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
1474 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1475 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1476 ; GFX6-NEXT: s_or_b32 s1, s1, s6
1477 ; GFX6-NEXT: s_lshr_b32 s6, s2, 16
1478 ; GFX6-NEXT: s_lshr_b32 s7, s2, 24
1479 ; GFX6-NEXT: s_and_b32 s10, s2, s9
1480 ; GFX6-NEXT: s_bfe_u32 s2, s2, s11
1481 ; GFX6-NEXT: v_mul_lo_u32 v2, v1, v0
1482 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8
1483 ; GFX6-NEXT: s_and_b32 s6, s6, s9
1484 ; GFX6-NEXT: s_or_b32 s2, s10, s2
1485 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
1486 ; GFX6-NEXT: s_lshr_b32 s8, s3, 8
1487 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
1488 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1489 ; GFX6-NEXT: s_and_b32 s3, s3, s9
1490 ; GFX6-NEXT: s_or_b32 s2, s2, s6
1491 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8
1492 ; GFX6-NEXT: s_and_b32 s6, s8, s9
1493 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2
1494 ; GFX6-NEXT: s_or_b32 s3, s7, s3
1495 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
1496 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000
1497 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1498 ; GFX6-NEXT: s_or_b32 s3, s3, s6
1499 ; GFX6-NEXT: s_lshr_b32 s6, s4, 16
1500 ; GFX6-NEXT: s_lshr_b32 s7, s4, 24
1501 ; GFX6-NEXT: s_and_b32 s10, s4, s9
1502 ; GFX6-NEXT: s_bfe_u32 s4, s4, s11
1503 ; GFX6-NEXT: s_lshl_b32 s4, s4, 8
1504 ; GFX6-NEXT: s_and_b32 s6, s6, s9
1505 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1506 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1507 ; GFX6-NEXT: s_or_b32 s4, s10, s4
1508 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
1509 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
1510 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
1511 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1512 ; GFX6-NEXT: s_or_b32 s4, s4, s6
1513 ; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0
1514 ; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1515 ; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
1516 ; GFX6-NEXT: s_lshr_b32 s8, s5, 8
1517 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
1518 ; GFX6-NEXT: s_and_b32 s5, s5, s9
1519 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, v2
1520 ; GFX6-NEXT: s_lshl_b32 s5, s5, 8
1521 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
1522 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0
1523 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1524 ; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1
1525 ; GFX6-NEXT: s_and_b32 s6, s8, s9
1526 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1527 ; GFX6-NEXT: s_or_b32 s5, s7, s5
1528 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
1529 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0
1530 ; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000
1531 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1532 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1533 ; GFX6-NEXT: s_or_b32 s5, s5, s6
1534 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1535 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
1536 ; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1
1537 ; GFX6-NEXT: s_mov_b32 s6, 0xffffff
1538 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0
1539 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24
1540 ; GFX6-NEXT: v_and_b32_e32 v0, s6, v0
1541 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
1542 ; GFX6-NEXT: s_lshr_b32 s0, s2, 1
1543 ; GFX6-NEXT: v_and_b32_e32 v2, s6, v3
1544 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
1545 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1
1546 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1547 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1548 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1549 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1550 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1551 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1552 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1553 ; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffff
1554 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1
1555 ; GFX6-NEXT: v_and_b32_e32 v1, v1, v4
1556 ; GFX6-NEXT: s_lshr_b32 s0, s3, 1
1557 ; GFX6-NEXT: v_and_b32_e32 v2, v2, v4
1558 ; GFX6-NEXT: v_lshl_b32_e32 v1, s1, v1
1559 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
1560 ; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8
1561 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1562 ; GFX6-NEXT: v_and_b32_e32 v2, s9, v0
1563 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3
1564 ; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8
1565 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1566 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1567 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
1568 ; GFX6-NEXT: v_and_b32_e32 v2, s9, v1
1569 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2
1570 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1571 ; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8
1572 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
1573 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
1574 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
1575 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
1576 ; GFX6-NEXT: v_readfirstlane_b32 s1, v1
1577 ; GFX6-NEXT: ; return to shader part epilog
1579 ; GFX8-LABEL: s_fshl_v2i24:
1581 ; GFX8-NEXT: s_lshr_b32 s6, s0, 8
1582 ; GFX8-NEXT: s_movk_i32 s10, 0xff
1583 ; GFX8-NEXT: s_and_b32 s6, s6, s10
1584 ; GFX8-NEXT: s_bfe_u32 s11, 8, 0x100000
1585 ; GFX8-NEXT: s_lshr_b32 s7, s0, 16
1586 ; GFX8-NEXT: s_lshr_b32 s8, s0, 24
1587 ; GFX8-NEXT: s_and_b32 s0, s0, s10
1588 ; GFX8-NEXT: s_lshl_b32 s6, s6, s11
1589 ; GFX8-NEXT: s_or_b32 s0, s0, s6
1590 ; GFX8-NEXT: s_and_b32 s6, s7, s10
1591 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1592 ; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
1593 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
1594 ; GFX8-NEXT: s_lshr_b32 s9, s1, 8
1595 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
1596 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
1597 ; GFX8-NEXT: s_and_b32 s1, s1, s10
1598 ; GFX8-NEXT: s_or_b32 s0, s0, s6
1599 ; GFX8-NEXT: s_lshl_b32 s1, s1, s11
1600 ; GFX8-NEXT: s_and_b32 s6, s9, s10
1601 ; GFX8-NEXT: s_or_b32 s1, s8, s1
1602 ; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
1603 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
1604 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
1605 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1606 ; GFX8-NEXT: s_or_b32 s1, s1, s6
1607 ; GFX8-NEXT: s_lshr_b32 s6, s2, 8
1608 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
1609 ; GFX8-NEXT: s_and_b32 s6, s6, s10
1610 ; GFX8-NEXT: s_lshr_b32 s7, s2, 16
1611 ; GFX8-NEXT: s_lshr_b32 s8, s2, 24
1612 ; GFX8-NEXT: s_and_b32 s2, s2, s10
1613 ; GFX8-NEXT: s_lshl_b32 s6, s6, s11
1614 ; GFX8-NEXT: s_or_b32 s2, s2, s6
1615 ; GFX8-NEXT: s_and_b32 s6, s7, s10
1616 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1617 ; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
1618 ; GFX8-NEXT: v_mul_lo_u32 v2, v1, v0
1619 ; GFX8-NEXT: s_lshr_b32 s9, s3, 8
1620 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
1621 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
1622 ; GFX8-NEXT: s_and_b32 s3, s3, s10
1623 ; GFX8-NEXT: s_or_b32 s2, s2, s6
1624 ; GFX8-NEXT: s_lshl_b32 s3, s3, s11
1625 ; GFX8-NEXT: s_and_b32 s6, s9, s10
1626 ; GFX8-NEXT: s_or_b32 s3, s8, s3
1627 ; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
1628 ; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
1629 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
1630 ; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
1631 ; GFX8-NEXT: s_or_b32 s3, s3, s6
1632 ; GFX8-NEXT: s_lshr_b32 s6, s4, 8
1633 ; GFX8-NEXT: s_and_b32 s6, s6, s10
1634 ; GFX8-NEXT: s_lshr_b32 s7, s4, 16
1635 ; GFX8-NEXT: s_lshr_b32 s8, s4, 24
1636 ; GFX8-NEXT: s_and_b32 s4, s4, s10
1637 ; GFX8-NEXT: s_lshl_b32 s6, s6, s11
1638 ; GFX8-NEXT: s_or_b32 s4, s4, s6
1639 ; GFX8-NEXT: s_and_b32 s6, s7, s10
1640 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1641 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1642 ; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
1643 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2
1644 ; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
1645 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
1646 ; GFX8-NEXT: s_or_b32 s4, s4, s6
1647 ; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0
1648 ; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1649 ; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2
1650 ; GFX8-NEXT: s_lshr_b32 s9, s5, 8
1651 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
1652 ; GFX8-NEXT: s_and_b32 s5, s5, s10
1653 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2
1654 ; GFX8-NEXT: s_lshl_b32 s5, s5, s11
1655 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0
1656 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0
1657 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1658 ; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1
1659 ; GFX8-NEXT: s_and_b32 s6, s9, s10
1660 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1661 ; GFX8-NEXT: s_or_b32 s5, s8, s5
1662 ; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
1663 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0
1664 ; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000
1665 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
1666 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1667 ; GFX8-NEXT: s_or_b32 s5, s5, s6
1668 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1669 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
1670 ; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1
1671 ; GFX8-NEXT: s_mov_b32 s6, 0xffffff
1672 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0
1673 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24
1674 ; GFX8-NEXT: v_and_b32_e32 v0, s6, v0
1675 ; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0
1676 ; GFX8-NEXT: s_lshr_b32 s0, s2, 1
1677 ; GFX8-NEXT: v_and_b32_e32 v2, s6, v3
1678 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0
1679 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1
1680 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
1681 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
1682 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1683 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1684 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
1685 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1686 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1687 ; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffff
1688 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1
1689 ; GFX8-NEXT: v_and_b32_e32 v1, v1, v4
1690 ; GFX8-NEXT: s_lshr_b32 s0, s3, 1
1691 ; GFX8-NEXT: v_and_b32_e32 v2, v2, v4
1692 ; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s1
1693 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0
1694 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
1695 ; GFX8-NEXT: v_mov_b32_e32 v2, 8
1696 ; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1697 ; GFX8-NEXT: v_mov_b32_e32 v4, 16
1698 ; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1699 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1700 ; GFX8-NEXT: v_or_b32_e32 v0, v3, v0
1701 ; GFX8-NEXT: v_and_b32_e32 v3, s10, v1
1702 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3
1703 ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1704 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
1705 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
1706 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1707 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1
1708 ; GFX8-NEXT: ; return to shader part epilog
1710 ; GFX9-LABEL: s_fshl_v2i24:
1712 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1713 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
1714 ; GFX9-NEXT: s_lshr_b32 s7, s0, 8
1715 ; GFX9-NEXT: s_movk_i32 s12, 0xff
1716 ; GFX9-NEXT: s_and_b32 s7, s7, s12
1717 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1718 ; GFX9-NEXT: s_bfe_u32 s13, 8, 0x100000
1719 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
1720 ; GFX9-NEXT: s_lshr_b32 s9, s0, 16
1721 ; GFX9-NEXT: s_lshr_b32 s10, s0, 24
1722 ; GFX9-NEXT: s_and_b32 s0, s0, s12
1723 ; GFX9-NEXT: s_lshl_b32 s7, s7, s13
1724 ; GFX9-NEXT: s_or_b32 s0, s0, s7
1725 ; GFX9-NEXT: s_and_b32 s7, s9, s12
1726 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
1727 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1728 ; GFX9-NEXT: s_lshr_b32 s11, s1, 8
1729 ; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
1730 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
1731 ; GFX9-NEXT: s_and_b32 s1, s1, s12
1732 ; GFX9-NEXT: v_mul_lo_u32 v2, v1, v0
1733 ; GFX9-NEXT: s_or_b32 s0, s0, s7
1734 ; GFX9-NEXT: s_lshl_b32 s1, s1, s13
1735 ; GFX9-NEXT: s_and_b32 s7, s11, s12
1736 ; GFX9-NEXT: s_or_b32 s1, s10, s1
1737 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
1738 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
1739 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
1740 ; GFX9-NEXT: s_or_b32 s1, s1, s7
1741 ; GFX9-NEXT: s_lshr_b32 s7, s2, 8
1742 ; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2
1743 ; GFX9-NEXT: s_and_b32 s7, s7, s12
1744 ; GFX9-NEXT: s_lshr_b32 s9, s2, 16
1745 ; GFX9-NEXT: s_lshr_b32 s10, s2, 24
1746 ; GFX9-NEXT: s_and_b32 s2, s2, s12
1747 ; GFX9-NEXT: s_lshl_b32 s7, s7, s13
1748 ; GFX9-NEXT: s_or_b32 s2, s2, s7
1749 ; GFX9-NEXT: s_and_b32 s7, s9, s12
1750 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
1751 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v2
1752 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1753 ; GFX9-NEXT: s_lshr_b32 s11, s3, 8
1754 ; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000
1755 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
1756 ; GFX9-NEXT: s_and_b32 s3, s3, s12
1757 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2
1758 ; GFX9-NEXT: s_or_b32 s2, s2, s7
1759 ; GFX9-NEXT: s_lshl_b32 s3, s3, s13
1760 ; GFX9-NEXT: s_and_b32 s7, s11, s12
1761 ; GFX9-NEXT: s_or_b32 s3, s10, s3
1762 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
1763 ; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
1764 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
1765 ; GFX9-NEXT: s_or_b32 s3, s3, s7
1766 ; GFX9-NEXT: s_lshr_b32 s7, s4, 8
1767 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1768 ; GFX9-NEXT: s_and_b32 s7, s7, s12
1769 ; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
1770 ; GFX9-NEXT: s_lshr_b32 s9, s4, 16
1771 ; GFX9-NEXT: s_lshr_b32 s10, s4, 24
1772 ; GFX9-NEXT: s_and_b32 s4, s4, s12
1773 ; GFX9-NEXT: s_lshl_b32 s7, s7, s13
1774 ; GFX9-NEXT: s_or_b32 s4, s4, s7
1775 ; GFX9-NEXT: s_and_b32 s7, s9, s12
1776 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
1777 ; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000
1778 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
1779 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2
1780 ; GFX9-NEXT: s_or_b32 s4, s4, s7
1781 ; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0
1782 ; GFX9-NEXT: s_lshr_b32 s11, s5, 8
1783 ; GFX9-NEXT: s_and_b32 s5, s5, s12
1784 ; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1
1785 ; GFX9-NEXT: s_lshl_b32 s5, s5, s13
1786 ; GFX9-NEXT: s_and_b32 s7, s11, s12
1787 ; GFX9-NEXT: s_or_b32 s5, s10, s5
1788 ; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000
1789 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
1790 ; GFX9-NEXT: s_bfe_u32 s5, s5, 0x100000
1791 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
1792 ; GFX9-NEXT: s_or_b32 s5, s5, s7
1793 ; GFX9-NEXT: v_add_u32_e32 v1, v2, v1
1794 ; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1
1795 ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0
1796 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0
1797 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1798 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1799 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0
1800 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1801 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24
1802 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1803 ; GFX9-NEXT: s_mov_b32 s7, 0xffffff
1804 ; GFX9-NEXT: v_sub_u32_e32 v3, 23, v0
1805 ; GFX9-NEXT: s_lshr_b32 s2, s2, 1
1806 ; GFX9-NEXT: v_and_b32_e32 v3, s7, v3
1807 ; GFX9-NEXT: v_and_b32_e32 v0, s7, v0
1808 ; GFX9-NEXT: v_lshrrev_b32_e64 v3, v3, s2
1809 ; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1
1810 ; GFX9-NEXT: v_lshl_or_b32 v0, s0, v0, v3
1811 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1
1812 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1813 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1814 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1
1815 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1816 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1817 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffff
1818 ; GFX9-NEXT: v_sub_u32_e32 v3, 23, v1
1819 ; GFX9-NEXT: v_and_b32_e32 v1, v1, v2
1820 ; GFX9-NEXT: s_lshr_b32 s0, s3, 1
1821 ; GFX9-NEXT: v_and_b32_e32 v2, v3, v2
1822 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s0
1823 ; GFX9-NEXT: s_mov_b32 s6, 8
1824 ; GFX9-NEXT: v_lshl_or_b32 v1, s1, v1, v2
1825 ; GFX9-NEXT: s_mov_b32 s8, 16
1826 ; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1827 ; GFX9-NEXT: v_and_b32_e32 v3, s12, v1
1828 ; GFX9-NEXT: v_and_or_b32 v2, v0, s12, v2
1829 ; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1830 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3
1831 ; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3
1832 ; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8
1833 ; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8
1834 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2
1835 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1836 ; GFX9-NEXT: v_readfirstlane_b32 s1, v1
1837 ; GFX9-NEXT: ; return to shader part epilog
1839 ; GFX10-LABEL: s_fshl_v2i24:
1841 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1842 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24
1843 ; GFX10-NEXT: s_movk_i32 s9, 0xff
1844 ; GFX10-NEXT: s_lshr_b32 s10, s1, 8
1845 ; GFX10-NEXT: s_bfe_u32 s11, 8, 0x100000
1846 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
1847 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1
1848 ; GFX10-NEXT: s_and_b32 s1, s1, s9
1849 ; GFX10-NEXT: s_lshr_b32 s6, s0, 8
1850 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24
1851 ; GFX10-NEXT: s_lshl_b32 s1, s1, s11
1852 ; GFX10-NEXT: s_and_b32 s6, s6, s9
1853 ; GFX10-NEXT: s_or_b32 s1, s8, s1
1854 ; GFX10-NEXT: s_lshr_b32 s8, s4, 8
1855 ; GFX10-NEXT: s_lshr_b32 s7, s0, 16
1856 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1857 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
1858 ; GFX10-NEXT: s_and_b32 s0, s0, s9
1859 ; GFX10-NEXT: s_lshl_b32 s6, s6, s11
1860 ; GFX10-NEXT: s_and_b32 s8, s8, s9
1861 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
1862 ; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
1863 ; GFX10-NEXT: s_or_b32 s0, s0, s6
1864 ; GFX10-NEXT: s_and_b32 s6, s7, s9
1865 ; GFX10-NEXT: s_and_b32 s7, s10, s9
1866 ; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0
1867 ; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1
1868 ; GFX10-NEXT: s_lshr_b32 s10, s4, 16
1869 ; GFX10-NEXT: s_lshr_b32 s12, s4, 24
1870 ; GFX10-NEXT: s_and_b32 s4, s4, s9
1871 ; GFX10-NEXT: s_lshl_b32 s8, s8, s11
1872 ; GFX10-NEXT: s_lshr_b32 s13, s5, 8
1873 ; GFX10-NEXT: s_or_b32 s4, s4, s8
1874 ; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2
1875 ; GFX10-NEXT: s_and_b32 s8, s10, s9
1876 ; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3
1877 ; GFX10-NEXT: s_bfe_u32 s8, s8, 0x100000
1878 ; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000
1879 ; GFX10-NEXT: s_lshl_b32 s8, s8, 16
1880 ; GFX10-NEXT: s_and_b32 s5, s5, s9
1881 ; GFX10-NEXT: s_or_b32 s4, s4, s8
1882 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2
1883 ; GFX10-NEXT: s_lshl_b32 s5, s5, s11
1884 ; GFX10-NEXT: s_and_b32 s8, s13, s9
1885 ; GFX10-NEXT: s_or_b32 s5, s12, s5
1886 ; GFX10-NEXT: s_bfe_u32 s8, s8, 0x100000
1887 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3
1888 ; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0
1889 ; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000
1890 ; GFX10-NEXT: s_lshl_b32 s8, s8, 16
1891 ; GFX10-NEXT: s_lshr_b32 s10, s2, 16
1892 ; GFX10-NEXT: s_or_b32 s5, s5, s8
1893 ; GFX10-NEXT: s_lshr_b32 s8, s2, 8
1894 ; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1
1895 ; GFX10-NEXT: s_and_b32 s8, s8, s9
1896 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
1897 ; GFX10-NEXT: s_and_b32 s12, s2, s9
1898 ; GFX10-NEXT: s_lshl_b32 s8, s8, s11
1899 ; GFX10-NEXT: s_and_b32 s10, s10, s9
1900 ; GFX10-NEXT: s_or_b32 s8, s12, s8
1901 ; GFX10-NEXT: s_lshr_b32 s2, s2, 24
1902 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24
1903 ; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000
1904 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0
1905 ; GFX10-NEXT: s_bfe_u32 s4, s8, 0x100000
1906 ; GFX10-NEXT: s_bfe_u32 s8, s10, 0x100000
1907 ; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000
1908 ; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000
1909 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
1910 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1
1911 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1912 ; GFX10-NEXT: s_lshl_b32 s5, s8, 16
1913 ; GFX10-NEXT: s_lshr_b32 s8, s3, 8
1914 ; GFX10-NEXT: s_and_b32 s3, s3, s9
1915 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
1916 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1917 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
1918 ; GFX10-NEXT: s_lshl_b32 s3, s3, s11
1919 ; GFX10-NEXT: s_or_b32 s4, s4, s5
1920 ; GFX10-NEXT: s_or_b32 s2, s2, s3
1921 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
1922 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1923 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1924 ; GFX10-NEXT: s_and_b32 s3, s8, s9
1925 ; GFX10-NEXT: s_mov_b32 s5, 0xffffff
1926 ; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000
1927 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
1928 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
1929 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
1930 ; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000
1931 ; GFX10-NEXT: s_lshl_b32 s3, s3, 16
1932 ; GFX10-NEXT: s_lshl_b32 s6, s6, 16
1933 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0
1934 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
1935 ; GFX10-NEXT: v_mov_b32_e32 v3, 0xffffff
1936 ; GFX10-NEXT: s_or_b32 s2, s2, s3
1937 ; GFX10-NEXT: s_lshr_b32 s3, s4, 1
1938 ; GFX10-NEXT: v_and_b32_e32 v2, s5, v2
1939 ; GFX10-NEXT: v_sub_nc_u32_e32 v4, 23, v1
1940 ; GFX10-NEXT: s_lshr_b32 s2, s2, 1
1941 ; GFX10-NEXT: v_and_b32_e32 v0, s5, v0
1942 ; GFX10-NEXT: v_and_b32_e32 v1, v1, v3
1943 ; GFX10-NEXT: v_lshrrev_b32_e64 v2, v2, s3
1944 ; GFX10-NEXT: v_and_b32_e32 v4, v4, v3
1945 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
1946 ; GFX10-NEXT: s_lshl_b32 s7, s7, 16
1947 ; GFX10-NEXT: s_or_b32 s0, s0, s6
1948 ; GFX10-NEXT: s_or_b32 s1, s1, s7
1949 ; GFX10-NEXT: v_lshrrev_b32_e64 v3, v4, s2
1950 ; GFX10-NEXT: v_lshl_or_b32 v0, s0, v0, v2
1951 ; GFX10-NEXT: s_mov_b32 s0, 8
1952 ; GFX10-NEXT: v_lshl_or_b32 v1, s1, v1, v3
1953 ; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
1954 ; GFX10-NEXT: s_mov_b32 s0, 16
1955 ; GFX10-NEXT: v_and_b32_e32 v3, s9, v1
1956 ; GFX10-NEXT: v_and_or_b32 v2, v0, s9, v2
1957 ; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
1958 ; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8
1959 ; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8
1960 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3
1961 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4
1962 ; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3
1963 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
1964 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1965 ; GFX10-NEXT: ; return to shader part epilog
1966 %lhs = bitcast i48 %lhs.arg to <2 x i24>
1967 %rhs = bitcast i48 %rhs.arg to <2 x i24>
1968 %amt = bitcast i48 %amt.arg to <2 x i24>
1969 %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
1970 %cast.result = bitcast <2 x i24> %result to i48
1971 ret i48 %cast.result
1974 define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
1975 ; GFX6-LABEL: v_fshl_v2i24:
1977 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1978 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
1979 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6
1980 ; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8
1981 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24
1982 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
1983 ; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
1984 ; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6
1985 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 23
1986 ; GFX6-NEXT: v_mul_lo_u32 v8, v7, v6
1987 ; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8
1988 ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8
1989 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v9
1990 ; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6
1991 ; GFX6-NEXT: v_mov_b32_e32 v9, 0xffffff
1992 ; GFX6-NEXT: v_and_b32_e32 v5, v5, v9
1993 ; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
1994 ; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8
1995 ; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24
1996 ; GFX6-NEXT: v_mul_lo_u32 v7, v7, v8
1997 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
1998 ; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4
1999 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2000 ; GFX6-NEXT: v_mul_hi_u32 v7, v8, v7
2001 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2002 ; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4
2003 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2004 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2005 ; GFX6-NEXT: v_add_i32_e32 v7, vcc, v8, v7
2006 ; GFX6-NEXT: v_mul_hi_u32 v7, v5, v7
2007 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4
2008 ; GFX6-NEXT: v_and_b32_e32 v4, v4, v9
2009 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0
2010 ; GFX6-NEXT: v_and_b32_e32 v4, v6, v9
2011 ; GFX6-NEXT: v_mul_lo_u32 v6, v7, 24
2012 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2013 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
2014 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6
2015 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
2016 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2017 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2018 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
2019 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2020 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2021 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
2022 ; GFX6-NEXT: v_and_b32_e32 v2, v2, v9
2023 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
2024 ; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 23
2025 ; GFX6-NEXT: v_and_b32_e32 v3, v4, v9
2026 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
2027 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
2028 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2030 ; GFX8-LABEL: v_fshl_v2i24:
2032 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2033 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2034 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6
2035 ; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8
2036 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24
2037 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2038 ; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2039 ; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6
2040 ; GFX8-NEXT: v_bfe_u32 v2, v2, 1, 23
2041 ; GFX8-NEXT: v_mul_lo_u32 v8, v7, v6
2042 ; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8
2043 ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8
2044 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v8, v9
2045 ; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6
2046 ; GFX8-NEXT: v_mov_b32_e32 v9, 0xffffff
2047 ; GFX8-NEXT: v_and_b32_e32 v5, v5, v9
2048 ; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
2049 ; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8
2050 ; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24
2051 ; GFX8-NEXT: v_mul_lo_u32 v7, v7, v8
2052 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6
2053 ; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4
2054 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2055 ; GFX8-NEXT: v_mul_hi_u32 v7, v8, v7
2056 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2057 ; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4
2058 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2059 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2060 ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7
2061 ; GFX8-NEXT: v_mul_hi_u32 v7, v5, v7
2062 ; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 23, v4
2063 ; GFX8-NEXT: v_and_b32_e32 v4, v4, v9
2064 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v4, v0
2065 ; GFX8-NEXT: v_and_b32_e32 v4, v6, v9
2066 ; GFX8-NEXT: v_mul_lo_u32 v6, v7, 24
2067 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2068 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
2069 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6
2070 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
2071 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2072 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2073 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
2074 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2075 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2076 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2
2077 ; GFX8-NEXT: v_and_b32_e32 v2, v2, v9
2078 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, v2, v1
2079 ; GFX8-NEXT: v_bfe_u32 v2, v3, 1, 23
2080 ; GFX8-NEXT: v_and_b32_e32 v3, v4, v9
2081 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v3, v2
2082 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
2083 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2085 ; GFX9-LABEL: v_fshl_v2i24:
2087 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2088 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2089 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6
2090 ; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8
2091 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24
2092 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9
2093 ; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2094 ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6
2095 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2096 ; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9
2097 ; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23
2098 ; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6
2099 ; GFX9-NEXT: v_bfe_u32 v3, v3, 1, 23
2100 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8
2101 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8
2102 ; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v9
2103 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6
2104 ; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff
2105 ; GFX9-NEXT: v_and_b32_e32 v5, v5, v9
2106 ; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8
2107 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
2108 ; GFX9-NEXT: v_mul_hi_u32 v7, v8, v7
2109 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6
2110 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
2111 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2112 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2113 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
2114 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2115 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2116 ; GFX9-NEXT: v_add_u32_e32 v6, v8, v7
2117 ; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6
2118 ; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4
2119 ; GFX9-NEXT: v_and_b32_e32 v7, v7, v9
2120 ; GFX9-NEXT: v_and_b32_e32 v4, v4, v9
2121 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
2122 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v7, v2
2123 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2
2124 ; GFX9-NEXT: v_sub_u32_e32 v2, v5, v6
2125 ; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
2126 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2127 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2128 ; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
2129 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2130 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2131 ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2
2132 ; GFX9-NEXT: v_and_b32_e32 v4, v4, v9
2133 ; GFX9-NEXT: v_and_b32_e32 v2, v2, v9
2134 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, v4, v3
2135 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v2, v3
2136 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2138 ; GFX10-LABEL: v_fshl_v2i24:
2140 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2142 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2143 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24
2144 ; GFX10-NEXT: v_mov_b32_e32 v10, 0xffffff
2145 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2146 ; GFX10-NEXT: v_bfe_u32 v2, v2, 1, 23
2147 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6
2148 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7
2149 ; GFX10-NEXT: v_and_b32_e32 v5, v5, v10
2150 ; GFX10-NEXT: v_bfe_u32 v3, v3, 1, 23
2151 ; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2152 ; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
2153 ; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6
2154 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7
2155 ; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6
2156 ; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7
2157 ; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8
2158 ; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9
2159 ; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8
2160 ; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9
2161 ; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6
2162 ; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7
2163 ; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
2164 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24
2165 ; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6
2166 ; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7
2167 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2168 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2169 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2170 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2171 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2172 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2173 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2174 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2175 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2176 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2177 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2178 ; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4
2179 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2180 ; GFX10-NEXT: v_and_b32_e32 v4, v4, v10
2181 ; GFX10-NEXT: v_and_b32_e32 v6, v6, v10
2182 ; GFX10-NEXT: v_sub_nc_u32_e32 v7, 23, v5
2183 ; GFX10-NEXT: v_and_b32_e32 v5, v5, v10
2184 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, v6, v2
2185 ; GFX10-NEXT: v_and_b32_e32 v7, v7, v10
2186 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, v4, v2
2187 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, v7, v3
2188 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, v5, v3
2189 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2190 %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2191 ret <2 x i24> %result
2194 define amdgpu_ps i32 @s_fshl_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2195 ; GFX6-LABEL: s_fshl_i32:
2197 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2198 ; GFX6-NEXT: s_not_b32 s1, s2
2199 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 1
2200 ; GFX6-NEXT: s_lshr_b32 s0, s0, 1
2201 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2202 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2203 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2204 ; GFX6-NEXT: ; return to shader part epilog
2206 ; GFX8-LABEL: s_fshl_i32:
2208 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2209 ; GFX8-NEXT: s_not_b32 s1, s2
2210 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 1
2211 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
2212 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2213 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2214 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2215 ; GFX8-NEXT: ; return to shader part epilog
2217 ; GFX9-LABEL: s_fshl_i32:
2219 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2220 ; GFX9-NEXT: s_not_b32 s1, s2
2221 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 1
2222 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
2223 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2224 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
2225 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2226 ; GFX9-NEXT: ; return to shader part epilog
2228 ; GFX10-LABEL: s_fshl_i32:
2230 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 1
2231 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
2232 ; GFX10-NEXT: s_not_b32 s1, s2
2233 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1
2234 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2235 ; GFX10-NEXT: ; return to shader part epilog
2236 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2240 define amdgpu_ps i32 @s_fshl_i32_5(i32 inreg %lhs, i32 inreg %rhs) {
2241 ; GFX6-LABEL: s_fshl_i32_5:
2243 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2244 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, -5
2245 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2246 ; GFX6-NEXT: ; return to shader part epilog
2248 ; GFX8-LABEL: s_fshl_i32_5:
2250 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2251 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, -5
2252 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2253 ; GFX8-NEXT: ; return to shader part epilog
2255 ; GFX9-LABEL: s_fshl_i32_5:
2257 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2258 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, -5
2259 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2260 ; GFX9-NEXT: ; return to shader part epilog
2262 ; GFX10-LABEL: s_fshl_i32_5:
2264 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, -5
2265 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2266 ; GFX10-NEXT: ; return to shader part epilog
2267 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5)
2271 define amdgpu_ps i32 @s_fshl_i32_8(i32 inreg %lhs, i32 inreg %rhs) {
2272 ; GFX6-LABEL: s_fshl_i32_8:
2274 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2275 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, -8
2276 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2277 ; GFX6-NEXT: ; return to shader part epilog
2279 ; GFX8-LABEL: s_fshl_i32_8:
2281 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2282 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, -8
2283 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2284 ; GFX8-NEXT: ; return to shader part epilog
2286 ; GFX9-LABEL: s_fshl_i32_8:
2288 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2289 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, -8
2290 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2291 ; GFX9-NEXT: ; return to shader part epilog
2293 ; GFX10-LABEL: s_fshl_i32_8:
2295 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, -8
2296 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2297 ; GFX10-NEXT: ; return to shader part epilog
2298 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8)
2302 define i32 @v_fshl_i32(i32 %lhs, i32 %rhs, i32 %amt) {
2303 ; GFX6-LABEL: v_fshl_i32:
2305 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2306 ; GFX6-NEXT: v_alignbit_b32 v1, v0, v1, 1
2307 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2308 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
2309 ; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, v2
2310 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2312 ; GFX8-LABEL: v_fshl_i32:
2314 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2315 ; GFX8-NEXT: v_alignbit_b32 v1, v0, v1, 1
2316 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2317 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
2318 ; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, v2
2319 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2321 ; GFX9-LABEL: v_fshl_i32:
2323 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2324 ; GFX9-NEXT: v_alignbit_b32 v1, v0, v1, 1
2325 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2326 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
2327 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2
2328 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2330 ; GFX10-LABEL: v_fshl_i32:
2332 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2333 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2334 ; GFX10-NEXT: v_alignbit_b32 v1, v0, v1, 1
2335 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2336 ; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2
2337 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2
2338 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2339 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2343 define i32 @v_fshl_i32_5(i32 %lhs, i32 %rhs) {
2344 ; GFX6-LABEL: v_fshl_i32_5:
2346 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2347 ; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, -5
2348 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2350 ; GFX8-LABEL: v_fshl_i32_5:
2352 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2353 ; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, -5
2354 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2356 ; GFX9-LABEL: v_fshl_i32_5:
2358 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2359 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, -5
2360 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2362 ; GFX10-LABEL: v_fshl_i32_5:
2364 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2365 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2366 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, -5
2367 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2368 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5)
2372 define i32 @v_fshl_i32_8(i32 %lhs, i32 %rhs) {
2373 ; GFX6-LABEL: v_fshl_i32_8:
2375 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2376 ; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, -8
2377 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2379 ; GFX8-LABEL: v_fshl_i32_8:
2381 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2382 ; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, -8
2383 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2385 ; GFX9-LABEL: v_fshl_i32_8:
2387 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2388 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, -8
2389 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2391 ; GFX10-LABEL: v_fshl_i32_8:
2393 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2394 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2395 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, -8
2396 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2397 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8)
2401 define amdgpu_ps float @v_fshl_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) {
2402 ; GFX6-LABEL: v_fshl_i32_ssv:
2404 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2405 ; GFX6-NEXT: v_alignbit_b32 v1, s0, v1, 1
2406 ; GFX6-NEXT: s_lshr_b32 s0, s0, 1
2407 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
2408 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0
2409 ; GFX6-NEXT: ; return to shader part epilog
2411 ; GFX8-LABEL: v_fshl_i32_ssv:
2413 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2414 ; GFX8-NEXT: v_alignbit_b32 v1, s0, v1, 1
2415 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
2416 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
2417 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0
2418 ; GFX8-NEXT: ; return to shader part epilog
2420 ; GFX9-LABEL: v_fshl_i32_ssv:
2422 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2423 ; GFX9-NEXT: v_alignbit_b32 v1, s0, v1, 1
2424 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
2425 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
2426 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0
2427 ; GFX9-NEXT: ; return to shader part epilog
2429 ; GFX10-LABEL: v_fshl_i32_ssv:
2431 ; GFX10-NEXT: v_alignbit_b32 v1, s0, s1, 1
2432 ; GFX10-NEXT: v_xor_b32_e32 v0, -1, v0
2433 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
2434 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v1, v0
2435 ; GFX10-NEXT: ; return to shader part epilog
2436 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2437 %cast.result = bitcast i32 %result to float
2438 ret float %cast.result
2441 define amdgpu_ps float @v_fshl_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) {
2442 ; GFX6-LABEL: v_fshl_i32_svs:
2444 ; GFX6-NEXT: s_not_b32 s1, s1
2445 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 1
2446 ; GFX6-NEXT: s_lshr_b32 s0, s0, 1
2447 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2448 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2449 ; GFX6-NEXT: ; return to shader part epilog
2451 ; GFX8-LABEL: v_fshl_i32_svs:
2453 ; GFX8-NEXT: s_not_b32 s1, s1
2454 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 1
2455 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
2456 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2457 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2458 ; GFX8-NEXT: ; return to shader part epilog
2460 ; GFX9-LABEL: v_fshl_i32_svs:
2462 ; GFX9-NEXT: s_not_b32 s1, s1
2463 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 1
2464 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
2465 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2466 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
2467 ; GFX9-NEXT: ; return to shader part epilog
2469 ; GFX10-LABEL: v_fshl_i32_svs:
2471 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, 1
2472 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
2473 ; GFX10-NEXT: s_not_b32 s1, s1
2474 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1
2475 ; GFX10-NEXT: ; return to shader part epilog
2476 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2477 %cast.result = bitcast i32 %result to float
2478 ret float %cast.result
2481 define amdgpu_ps float @v_fshl_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2482 ; GFX6-LABEL: v_fshl_i32_vss:
2484 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2485 ; GFX6-NEXT: s_not_b32 s1, s2
2486 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 1
2487 ; GFX6-NEXT: s_lshr_b32 s0, s0, 1
2488 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2489 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2490 ; GFX6-NEXT: ; return to shader part epilog
2492 ; GFX8-LABEL: v_fshl_i32_vss:
2494 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2495 ; GFX8-NEXT: s_not_b32 s1, s2
2496 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 1
2497 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
2498 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2499 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2500 ; GFX8-NEXT: ; return to shader part epilog
2502 ; GFX9-LABEL: v_fshl_i32_vss:
2504 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2505 ; GFX9-NEXT: s_not_b32 s1, s2
2506 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 1
2507 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
2508 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2509 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
2510 ; GFX9-NEXT: ; return to shader part epilog
2512 ; GFX10-LABEL: v_fshl_i32_vss:
2514 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 1
2515 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
2516 ; GFX10-NEXT: s_not_b32 s1, s2
2517 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1
2518 ; GFX10-NEXT: ; return to shader part epilog
2519 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2520 %cast.result = bitcast i32 %result to float
2521 ret float %cast.result
2524 define <2 x i32> @v_fshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) {
2525 ; GFX6-LABEL: v_fshl_v2i32:
2527 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2528 ; GFX6-NEXT: v_alignbit_b32 v2, v0, v2, 1
2529 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2530 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
2531 ; GFX6-NEXT: v_alignbit_b32 v0, v0, v2, v4
2532 ; GFX6-NEXT: v_alignbit_b32 v2, v1, v3, 1
2533 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2534 ; GFX6-NEXT: v_xor_b32_e32 v3, -1, v5
2535 ; GFX6-NEXT: v_alignbit_b32 v1, v1, v2, v3
2536 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2538 ; GFX8-LABEL: v_fshl_v2i32:
2540 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2541 ; GFX8-NEXT: v_alignbit_b32 v2, v0, v2, 1
2542 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2543 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
2544 ; GFX8-NEXT: v_alignbit_b32 v0, v0, v2, v4
2545 ; GFX8-NEXT: v_alignbit_b32 v2, v1, v3, 1
2546 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2547 ; GFX8-NEXT: v_xor_b32_e32 v3, -1, v5
2548 ; GFX8-NEXT: v_alignbit_b32 v1, v1, v2, v3
2549 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2551 ; GFX9-LABEL: v_fshl_v2i32:
2553 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2554 ; GFX9-NEXT: v_alignbit_b32 v2, v0, v2, 1
2555 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2556 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
2557 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4
2558 ; GFX9-NEXT: v_alignbit_b32 v2, v1, v3, 1
2559 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2560 ; GFX9-NEXT: v_xor_b32_e32 v3, -1, v5
2561 ; GFX9-NEXT: v_alignbit_b32 v1, v1, v2, v3
2562 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2564 ; GFX10-LABEL: v_fshl_v2i32:
2566 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2567 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2568 ; GFX10-NEXT: v_alignbit_b32 v2, v0, v2, 1
2569 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2570 ; GFX10-NEXT: v_xor_b32_e32 v4, -1, v4
2571 ; GFX10-NEXT: v_alignbit_b32 v3, v1, v3, 1
2572 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2573 ; GFX10-NEXT: v_xor_b32_e32 v5, -1, v5
2574 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4
2575 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5
2576 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2577 %result = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt)
2578 ret <2 x i32> %result
2581 define <3 x i32> @v_fshl_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) {
2582 ; GFX6-LABEL: v_fshl_v3i32:
2584 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2585 ; GFX6-NEXT: v_alignbit_b32 v3, v0, v3, 1
2586 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2587 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
2588 ; GFX6-NEXT: v_alignbit_b32 v0, v0, v3, v6
2589 ; GFX6-NEXT: v_alignbit_b32 v3, v1, v4, 1
2590 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2591 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v7
2592 ; GFX6-NEXT: v_alignbit_b32 v1, v1, v3, v4
2593 ; GFX6-NEXT: v_alignbit_b32 v3, v2, v5, 1
2594 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2
2595 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v8
2596 ; GFX6-NEXT: v_alignbit_b32 v2, v2, v3, v4
2597 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2599 ; GFX8-LABEL: v_fshl_v3i32:
2601 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2602 ; GFX8-NEXT: v_alignbit_b32 v3, v0, v3, 1
2603 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2604 ; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6
2605 ; GFX8-NEXT: v_alignbit_b32 v0, v0, v3, v6
2606 ; GFX8-NEXT: v_alignbit_b32 v3, v1, v4, 1
2607 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2608 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v7
2609 ; GFX8-NEXT: v_alignbit_b32 v1, v1, v3, v4
2610 ; GFX8-NEXT: v_alignbit_b32 v3, v2, v5, 1
2611 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v2
2612 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v8
2613 ; GFX8-NEXT: v_alignbit_b32 v2, v2, v3, v4
2614 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2616 ; GFX9-LABEL: v_fshl_v3i32:
2618 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2619 ; GFX9-NEXT: v_alignbit_b32 v3, v0, v3, 1
2620 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2621 ; GFX9-NEXT: v_xor_b32_e32 v6, -1, v6
2622 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v3, v6
2623 ; GFX9-NEXT: v_alignbit_b32 v3, v1, v4, 1
2624 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2625 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v7
2626 ; GFX9-NEXT: v_alignbit_b32 v1, v1, v3, v4
2627 ; GFX9-NEXT: v_alignbit_b32 v3, v2, v5, 1
2628 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2
2629 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v8
2630 ; GFX9-NEXT: v_alignbit_b32 v2, v2, v3, v4
2631 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2633 ; GFX10-LABEL: v_fshl_v3i32:
2635 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2636 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2637 ; GFX10-NEXT: v_alignbit_b32 v3, v0, v3, 1
2638 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2639 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v6
2640 ; GFX10-NEXT: v_alignbit_b32 v4, v1, v4, 1
2641 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2642 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v7
2643 ; GFX10-NEXT: v_alignbit_b32 v5, v2, v5, 1
2644 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v2
2645 ; GFX10-NEXT: v_xor_b32_e32 v8, -1, v8
2646 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6
2647 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7
2648 ; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8
2649 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2650 %result = call <3 x i32> @llvm.fshl.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt)
2651 ret <3 x i32> %result
2654 define <4 x i32> @v_fshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) {
2655 ; GFX6-LABEL: v_fshl_v4i32:
2657 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2658 ; GFX6-NEXT: v_alignbit_b32 v4, v0, v4, 1
2659 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2660 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
2661 ; GFX6-NEXT: v_alignbit_b32 v0, v0, v4, v8
2662 ; GFX6-NEXT: v_alignbit_b32 v4, v1, v5, 1
2663 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2664 ; GFX6-NEXT: v_xor_b32_e32 v5, -1, v9
2665 ; GFX6-NEXT: v_alignbit_b32 v1, v1, v4, v5
2666 ; GFX6-NEXT: v_alignbit_b32 v4, v2, v6, 1
2667 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2
2668 ; GFX6-NEXT: v_xor_b32_e32 v5, -1, v10
2669 ; GFX6-NEXT: v_alignbit_b32 v2, v2, v4, v5
2670 ; GFX6-NEXT: v_alignbit_b32 v4, v3, v7, 1
2671 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3
2672 ; GFX6-NEXT: v_xor_b32_e32 v5, -1, v11
2673 ; GFX6-NEXT: v_alignbit_b32 v3, v3, v4, v5
2674 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2676 ; GFX8-LABEL: v_fshl_v4i32:
2678 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2679 ; GFX8-NEXT: v_alignbit_b32 v4, v0, v4, 1
2680 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2681 ; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8
2682 ; GFX8-NEXT: v_alignbit_b32 v0, v0, v4, v8
2683 ; GFX8-NEXT: v_alignbit_b32 v4, v1, v5, 1
2684 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2685 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v9
2686 ; GFX8-NEXT: v_alignbit_b32 v1, v1, v4, v5
2687 ; GFX8-NEXT: v_alignbit_b32 v4, v2, v6, 1
2688 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v2
2689 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v10
2690 ; GFX8-NEXT: v_alignbit_b32 v2, v2, v4, v5
2691 ; GFX8-NEXT: v_alignbit_b32 v4, v3, v7, 1
2692 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3
2693 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v11
2694 ; GFX8-NEXT: v_alignbit_b32 v3, v3, v4, v5
2695 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2697 ; GFX9-LABEL: v_fshl_v4i32:
2699 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2700 ; GFX9-NEXT: v_alignbit_b32 v4, v0, v4, 1
2701 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2702 ; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8
2703 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v4, v8
2704 ; GFX9-NEXT: v_alignbit_b32 v4, v1, v5, 1
2705 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2706 ; GFX9-NEXT: v_xor_b32_e32 v5, -1, v9
2707 ; GFX9-NEXT: v_alignbit_b32 v1, v1, v4, v5
2708 ; GFX9-NEXT: v_alignbit_b32 v4, v2, v6, 1
2709 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2
2710 ; GFX9-NEXT: v_xor_b32_e32 v5, -1, v10
2711 ; GFX9-NEXT: v_alignbit_b32 v2, v2, v4, v5
2712 ; GFX9-NEXT: v_alignbit_b32 v4, v3, v7, 1
2713 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3
2714 ; GFX9-NEXT: v_xor_b32_e32 v5, -1, v11
2715 ; GFX9-NEXT: v_alignbit_b32 v3, v3, v4, v5
2716 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2718 ; GFX10-LABEL: v_fshl_v4i32:
2720 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2721 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2722 ; GFX10-NEXT: v_alignbit_b32 v4, v0, v4, 1
2723 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2724 ; GFX10-NEXT: v_xor_b32_e32 v8, -1, v8
2725 ; GFX10-NEXT: v_alignbit_b32 v5, v1, v5, 1
2726 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1
2727 ; GFX10-NEXT: v_xor_b32_e32 v9, -1, v9
2728 ; GFX10-NEXT: v_alignbit_b32 v6, v2, v6, 1
2729 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v2
2730 ; GFX10-NEXT: v_xor_b32_e32 v10, -1, v10
2731 ; GFX10-NEXT: v_alignbit_b32 v7, v3, v7, 1
2732 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 1, v3
2733 ; GFX10-NEXT: v_xor_b32_e32 v11, -1, v11
2734 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8
2735 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9
2736 ; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10
2737 ; GFX10-NEXT: v_alignbit_b32 v3, v3, v7, v11
2738 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2739 %result = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt)
2740 ret <4 x i32> %result
2743 define amdgpu_ps i16 @s_fshl_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) {
2744 ; GFX6-LABEL: s_fshl_i16:
2746 ; GFX6-NEXT: s_and_b32 s3, s2, 15
2747 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2
2748 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000
2749 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0xf0001
2750 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
2751 ; GFX6-NEXT: s_lshl_b32 s0, s0, s3
2752 ; GFX6-NEXT: s_lshr_b32 s1, s1, s2
2753 ; GFX6-NEXT: s_or_b32 s0, s0, s1
2754 ; GFX6-NEXT: ; return to shader part epilog
2756 ; GFX8-LABEL: s_fshl_i16:
2758 ; GFX8-NEXT: s_and_b32 s3, s2, 15
2759 ; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
2760 ; GFX8-NEXT: s_andn2_b32 s2, 15, s2
2761 ; GFX8-NEXT: s_lshl_b32 s0, s0, s3
2762 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
2763 ; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000
2764 ; GFX8-NEXT: s_lshr_b32 s1, s1, s3
2765 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
2766 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
2767 ; GFX8-NEXT: s_or_b32 s0, s0, s1
2768 ; GFX8-NEXT: ; return to shader part epilog
2770 ; GFX9-LABEL: s_fshl_i16:
2772 ; GFX9-NEXT: s_and_b32 s3, s2, 15
2773 ; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000
2774 ; GFX9-NEXT: s_andn2_b32 s2, 15, s2
2775 ; GFX9-NEXT: s_lshl_b32 s0, s0, s3
2776 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
2777 ; GFX9-NEXT: s_bfe_u32 s3, 1, 0x100000
2778 ; GFX9-NEXT: s_lshr_b32 s1, s1, s3
2779 ; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000
2780 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
2781 ; GFX9-NEXT: s_or_b32 s0, s0, s1
2782 ; GFX9-NEXT: ; return to shader part epilog
2784 ; GFX10-LABEL: s_fshl_i16:
2786 ; GFX10-NEXT: s_and_b32 s3, s2, 15
2787 ; GFX10-NEXT: s_andn2_b32 s2, 15, s2
2788 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
2789 ; GFX10-NEXT: s_bfe_u32 s4, 1, 0x100000
2790 ; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000
2791 ; GFX10-NEXT: s_lshr_b32 s1, s1, s4
2792 ; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000
2793 ; GFX10-NEXT: s_lshl_b32 s0, s0, s3
2794 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
2795 ; GFX10-NEXT: s_or_b32 s0, s0, s1
2796 ; GFX10-NEXT: ; return to shader part epilog
2797 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
2801 define amdgpu_ps i16 @s_fshl_i16_4(i16 inreg %lhs, i16 inreg %rhs) {
2802 ; GFX6-LABEL: s_fshl_i16_4:
2804 ; GFX6-NEXT: s_lshl_b32 s0, s0, 4
2805 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x4000c
2806 ; GFX6-NEXT: s_or_b32 s0, s0, s1
2807 ; GFX6-NEXT: ; return to shader part epilog
2809 ; GFX8-LABEL: s_fshl_i16_4:
2811 ; GFX8-NEXT: s_bfe_u32 s2, 4, 0x100000
2812 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
2813 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
2814 ; GFX8-NEXT: s_bfe_u32 s2, 12, 0x100000
2815 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
2816 ; GFX8-NEXT: s_or_b32 s0, s0, s1
2817 ; GFX8-NEXT: ; return to shader part epilog
2819 ; GFX9-LABEL: s_fshl_i16_4:
2821 ; GFX9-NEXT: s_bfe_u32 s2, 4, 0x100000
2822 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
2823 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
2824 ; GFX9-NEXT: s_bfe_u32 s2, 12, 0x100000
2825 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
2826 ; GFX9-NEXT: s_or_b32 s0, s0, s1
2827 ; GFX9-NEXT: ; return to shader part epilog
2829 ; GFX10-LABEL: s_fshl_i16_4:
2831 ; GFX10-NEXT: s_bfe_u32 s2, 4, 0x100000
2832 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
2833 ; GFX10-NEXT: s_bfe_u32 s3, 12, 0x100000
2834 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
2835 ; GFX10-NEXT: s_lshr_b32 s1, s1, s3
2836 ; GFX10-NEXT: s_or_b32 s0, s0, s1
2837 ; GFX10-NEXT: ; return to shader part epilog
2838 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 4)
2842 define amdgpu_ps i16 @s_fshl_i16_5(i16 inreg %lhs, i16 inreg %rhs) {
2843 ; GFX6-LABEL: s_fshl_i16_5:
2845 ; GFX6-NEXT: s_lshl_b32 s0, s0, 5
2846 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x5000b
2847 ; GFX6-NEXT: s_or_b32 s0, s0, s1
2848 ; GFX6-NEXT: ; return to shader part epilog
2850 ; GFX8-LABEL: s_fshl_i16_5:
2852 ; GFX8-NEXT: s_bfe_u32 s2, 5, 0x100000
2853 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
2854 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
2855 ; GFX8-NEXT: s_bfe_u32 s2, 11, 0x100000
2856 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
2857 ; GFX8-NEXT: s_or_b32 s0, s0, s1
2858 ; GFX8-NEXT: ; return to shader part epilog
2860 ; GFX9-LABEL: s_fshl_i16_5:
2862 ; GFX9-NEXT: s_bfe_u32 s2, 5, 0x100000
2863 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
2864 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
2865 ; GFX9-NEXT: s_bfe_u32 s2, 11, 0x100000
2866 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
2867 ; GFX9-NEXT: s_or_b32 s0, s0, s1
2868 ; GFX9-NEXT: ; return to shader part epilog
2870 ; GFX10-LABEL: s_fshl_i16_5:
2872 ; GFX10-NEXT: s_bfe_u32 s2, 5, 0x100000
2873 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
2874 ; GFX10-NEXT: s_bfe_u32 s3, 11, 0x100000
2875 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
2876 ; GFX10-NEXT: s_lshr_b32 s1, s1, s3
2877 ; GFX10-NEXT: s_or_b32 s0, s0, s1
2878 ; GFX10-NEXT: ; return to shader part epilog
2879 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 5)
2883 define i16 @v_fshl_i16(i16 %lhs, i16 %rhs, i16 %amt) {
2884 ; GFX6-LABEL: v_fshl_i16:
2886 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2887 ; GFX6-NEXT: v_and_b32_e32 v3, 15, v2
2888 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
2889 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v2
2890 ; GFX6-NEXT: v_bfe_u32 v3, v3, 0, 16
2891 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
2892 ; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
2893 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
2894 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
2895 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
2896 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2898 ; GFX8-LABEL: v_fshl_i16:
2900 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2901 ; GFX8-NEXT: v_and_b32_e32 v3, 15, v2
2902 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
2903 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
2904 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1
2905 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0
2906 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
2907 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
2908 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2910 ; GFX9-LABEL: v_fshl_i16:
2912 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2913 ; GFX9-NEXT: v_and_b32_e32 v3, 15, v2
2914 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
2915 ; GFX9-NEXT: v_and_b32_e32 v2, 15, v2
2916 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 1, v1
2917 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0
2918 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
2919 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
2920 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2922 ; GFX10-LABEL: v_fshl_i16:
2924 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2925 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2926 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
2927 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v2
2928 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
2929 ; GFX10-NEXT: v_and_b32_e32 v3, 15, v3
2930 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0
2931 ; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1
2932 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
2933 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2934 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
2938 define i16 @v_fshl_i16_4(i16 %lhs, i16 %rhs) {
2939 ; GFX6-LABEL: v_fshl_i16_4:
2941 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2942 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
2943 ; GFX6-NEXT: v_bfe_u32 v1, v1, 12, 4
2944 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
2945 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2947 ; GFX8-LABEL: v_fshl_i16_4:
2949 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2950 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0
2951 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 12, v1
2952 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
2953 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2955 ; GFX9-LABEL: v_fshl_i16_4:
2957 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2958 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0
2959 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 12, v1
2960 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
2961 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2963 ; GFX10-LABEL: v_fshl_i16_4:
2965 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2966 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
2967 ; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0
2968 ; GFX10-NEXT: v_lshrrev_b16 v1, 12, v1
2969 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
2970 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2971 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 4)
2975 define i16 @v_fshl_i16_5(i16 %lhs, i16 %rhs) {
2976 ; GFX6-LABEL: v_fshl_i16_5:
2978 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2979 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 5, v0
2980 ; GFX6-NEXT: v_bfe_u32 v1, v1, 11, 5
2981 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
2982 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2984 ; GFX8-LABEL: v_fshl_i16_5:
2986 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2987 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 5, v0
2988 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 11, v1
2989 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
2990 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2992 ; GFX9-LABEL: v_fshl_i16_5:
2994 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2995 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 5, v0
2996 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 11, v1
2997 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
2998 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3000 ; GFX10-LABEL: v_fshl_i16_5:
3002 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3003 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3004 ; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0
3005 ; GFX10-NEXT: v_lshrrev_b16 v1, 11, v1
3006 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3007 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3008 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 5)
3012 define amdgpu_ps half @v_fshl_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) {
3013 ; GFX6-LABEL: v_fshl_i16_ssv:
3015 ; GFX6-NEXT: v_and_b32_e32 v1, 15, v0
3016 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3017 ; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3018 ; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
3019 ; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
3020 ; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001
3021 ; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
3022 ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
3023 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
3024 ; GFX6-NEXT: ; return to shader part epilog
3026 ; GFX8-LABEL: v_fshl_i16_ssv:
3028 ; GFX8-NEXT: v_and_b32_e32 v1, 15, v0
3029 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
3030 ; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0
3031 ; GFX8-NEXT: s_bfe_u32 s0, s1, 0x100000
3032 ; GFX8-NEXT: s_bfe_u32 s1, 1, 0x100000
3033 ; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
3034 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
3035 ; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0
3036 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
3037 ; GFX8-NEXT: ; return to shader part epilog
3039 ; GFX9-LABEL: v_fshl_i16_ssv:
3041 ; GFX9-NEXT: v_and_b32_e32 v1, 15, v0
3042 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
3043 ; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0
3044 ; GFX9-NEXT: s_bfe_u32 s0, s1, 0x100000
3045 ; GFX9-NEXT: s_bfe_u32 s1, 1, 0x100000
3046 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
3047 ; GFX9-NEXT: s_lshr_b32 s0, s0, s1
3048 ; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s0
3049 ; GFX9-NEXT: v_or_b32_e32 v0, v1, v0
3050 ; GFX9-NEXT: ; return to shader part epilog
3052 ; GFX10-LABEL: v_fshl_i16_ssv:
3054 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
3055 ; GFX10-NEXT: v_and_b32_e32 v0, 15, v0
3056 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
3057 ; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000
3058 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
3059 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v1
3060 ; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0
3061 ; GFX10-NEXT: v_lshrrev_b16 v1, v1, s1
3062 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3063 ; GFX10-NEXT: ; return to shader part epilog
3064 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3065 %cast.result = bitcast i16 %result to half
3066 ret half %cast.result
3069 define amdgpu_ps half @v_fshl_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) {
3070 ; GFX6-LABEL: v_fshl_i16_svs:
3072 ; GFX6-NEXT: s_and_b32 s2, s1, 15
3073 ; GFX6-NEXT: s_andn2_b32 s1, 15, s1
3074 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
3075 ; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15
3076 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
3077 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
3078 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0
3079 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
3080 ; GFX6-NEXT: ; return to shader part epilog
3082 ; GFX8-LABEL: v_fshl_i16_svs:
3084 ; GFX8-NEXT: s_and_b32 s2, s1, 15
3085 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
3086 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
3087 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0
3088 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
3089 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0
3090 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
3091 ; GFX8-NEXT: ; return to shader part epilog
3093 ; GFX9-LABEL: v_fshl_i16_svs:
3095 ; GFX9-NEXT: s_and_b32 s2, s1, 15
3096 ; GFX9-NEXT: s_andn2_b32 s1, 15, s1
3097 ; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000
3098 ; GFX9-NEXT: v_lshrrev_b16_e32 v0, 1, v0
3099 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
3100 ; GFX9-NEXT: v_lshrrev_b16_e32 v0, s1, v0
3101 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
3102 ; GFX9-NEXT: ; return to shader part epilog
3104 ; GFX10-LABEL: v_fshl_i16_svs:
3106 ; GFX10-NEXT: v_lshrrev_b16 v0, 1, v0
3107 ; GFX10-NEXT: s_andn2_b32 s2, 15, s1
3108 ; GFX10-NEXT: s_and_b32 s1, s1, 15
3109 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
3110 ; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0
3111 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
3112 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
3113 ; GFX10-NEXT: ; return to shader part epilog
3114 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3115 %cast.result = bitcast i16 %result to half
3116 ret half %cast.result
3119 define amdgpu_ps half @v_fshl_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) {
3120 ; GFX6-LABEL: v_fshl_i16_vss:
3122 ; GFX6-NEXT: s_and_b32 s2, s1, 15
3123 ; GFX6-NEXT: s_andn2_b32 s1, 15, s1
3124 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
3125 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001
3126 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
3127 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s2, v0
3128 ; GFX6-NEXT: s_lshr_b32 s0, s0, s1
3129 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
3130 ; GFX6-NEXT: ; return to shader part epilog
3132 ; GFX8-LABEL: v_fshl_i16_vss:
3134 ; GFX8-NEXT: s_and_b32 s2, s1, 15
3135 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
3136 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, s2, v0
3137 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
3138 ; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000
3139 ; GFX8-NEXT: s_lshr_b32 s0, s0, s2
3140 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
3141 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
3142 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
3143 ; GFX8-NEXT: ; return to shader part epilog
3145 ; GFX9-LABEL: v_fshl_i16_vss:
3147 ; GFX9-NEXT: s_and_b32 s2, s1, 15
3148 ; GFX9-NEXT: s_andn2_b32 s1, 15, s1
3149 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, s2, v0
3150 ; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000
3151 ; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000
3152 ; GFX9-NEXT: s_lshr_b32 s0, s0, s2
3153 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000
3154 ; GFX9-NEXT: s_lshr_b32 s0, s0, s1
3155 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
3156 ; GFX9-NEXT: ; return to shader part epilog
3158 ; GFX10-LABEL: v_fshl_i16_vss:
3160 ; GFX10-NEXT: s_and_b32 s2, s1, 15
3161 ; GFX10-NEXT: s_andn2_b32 s1, 15, s1
3162 ; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000
3163 ; GFX10-NEXT: s_bfe_u32 s3, 1, 0x100000
3164 ; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0
3165 ; GFX10-NEXT: s_lshr_b32 s0, s0, s3
3166 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000
3167 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
3168 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
3169 ; GFX10-NEXT: ; return to shader part epilog
3170 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3171 %cast.result = bitcast i16 %result to half
3172 ret half %cast.result
3175 define amdgpu_ps i32 @s_fshl_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3176 ; GFX6-LABEL: s_fshl_v2i16:
3178 ; GFX6-NEXT: s_and_b32 s6, s4, 15
3179 ; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000
3180 ; GFX6-NEXT: s_andn2_b32 s4, 15, s4
3181 ; GFX6-NEXT: s_lshl_b32 s0, s0, s6
3182 ; GFX6-NEXT: s_mov_b32 s6, 0xf0001
3183 ; GFX6-NEXT: s_bfe_u32 s2, s2, s6
3184 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
3185 ; GFX6-NEXT: s_lshr_b32 s2, s2, s4
3186 ; GFX6-NEXT: s_or_b32 s0, s0, s2
3187 ; GFX6-NEXT: s_and_b32 s2, s5, 15
3188 ; GFX6-NEXT: s_andn2_b32 s4, 15, s5
3189 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
3190 ; GFX6-NEXT: s_lshl_b32 s1, s1, s2
3191 ; GFX6-NEXT: s_bfe_u32 s2, s3, s6
3192 ; GFX6-NEXT: s_bfe_u32 s3, s4, 0x100000
3193 ; GFX6-NEXT: s_lshr_b32 s2, s2, s3
3194 ; GFX6-NEXT: s_or_b32 s1, s1, s2
3195 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
3196 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
3197 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
3198 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3199 ; GFX6-NEXT: ; return to shader part epilog
3201 ; GFX8-LABEL: s_fshl_v2i16:
3203 ; GFX8-NEXT: s_and_b32 s6, s2, 15
3204 ; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000
3205 ; GFX8-NEXT: s_lshr_b32 s3, s0, 16
3206 ; GFX8-NEXT: s_lshr_b32 s4, s1, 16
3207 ; GFX8-NEXT: s_lshr_b32 s5, s2, 16
3208 ; GFX8-NEXT: s_andn2_b32 s2, 15, s2
3209 ; GFX8-NEXT: s_lshl_b32 s0, s0, s6
3210 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
3211 ; GFX8-NEXT: s_bfe_u32 s6, 1, 0x100000
3212 ; GFX8-NEXT: s_lshr_b32 s1, s1, s6
3213 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
3214 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
3215 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3216 ; GFX8-NEXT: s_and_b32 s1, s5, 15
3217 ; GFX8-NEXT: s_andn2_b32 s2, 15, s5
3218 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
3219 ; GFX8-NEXT: s_lshl_b32 s1, s3, s1
3220 ; GFX8-NEXT: s_lshr_b32 s3, s4, s6
3221 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
3222 ; GFX8-NEXT: s_lshr_b32 s2, s3, s2
3223 ; GFX8-NEXT: s_or_b32 s1, s1, s2
3224 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
3225 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
3226 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
3227 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3228 ; GFX8-NEXT: ; return to shader part epilog
3230 ; GFX9-LABEL: s_fshl_v2i16:
3232 ; GFX9-NEXT: s_mov_b32 s3, 0xf000f
3233 ; GFX9-NEXT: s_and_b32 s4, s2, s3
3234 ; GFX9-NEXT: s_andn2_b32 s2, s3, s2
3235 ; GFX9-NEXT: s_lshr_b32 s3, s0, 16
3236 ; GFX9-NEXT: s_lshr_b32 s5, s4, 16
3237 ; GFX9-NEXT: s_lshl_b32 s0, s0, s4
3238 ; GFX9-NEXT: s_lshl_b32 s3, s3, s5
3239 ; GFX9-NEXT: s_mov_b32 s4, 0xffff
3240 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3
3241 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
3242 ; GFX9-NEXT: s_and_b32 s1, s1, s4
3243 ; GFX9-NEXT: s_lshr_b32 s1, s1, 0x10001
3244 ; GFX9-NEXT: s_lshr_b32 s3, s3, 1
3245 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s3
3246 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
3247 ; GFX9-NEXT: s_and_b32 s1, s1, s4
3248 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
3249 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
3250 ; GFX9-NEXT: s_lshr_b32 s2, s3, s4
3251 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
3252 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3253 ; GFX9-NEXT: ; return to shader part epilog
3255 ; GFX10-LABEL: s_fshl_v2i16:
3257 ; GFX10-NEXT: s_mov_b32 s5, 0xffff
3258 ; GFX10-NEXT: s_mov_b32 s3, 0xf000f
3259 ; GFX10-NEXT: s_and_b32 s7, s1, s5
3260 ; GFX10-NEXT: s_lshr_b32 s1, s1, 16
3261 ; GFX10-NEXT: s_lshr_b32 s7, s7, 0x10001
3262 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
3263 ; GFX10-NEXT: s_and_b32 s4, s2, s3
3264 ; GFX10-NEXT: s_andn2_b32 s2, s3, s2
3265 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s7, s1
3266 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
3267 ; GFX10-NEXT: s_lshr_b32 s6, s4, 16
3268 ; GFX10-NEXT: s_lshl_b32 s0, s0, s4
3269 ; GFX10-NEXT: s_lshr_b32 s4, s1, 16
3270 ; GFX10-NEXT: s_and_b32 s1, s1, s5
3271 ; GFX10-NEXT: s_lshr_b32 s5, s2, 16
3272 ; GFX10-NEXT: s_lshl_b32 s3, s3, s6
3273 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
3274 ; GFX10-NEXT: s_lshr_b32 s2, s4, s5
3275 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3
3276 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2
3277 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3278 ; GFX10-NEXT: ; return to shader part epilog
3279 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3280 %cast = bitcast <2 x i16> %result to i32
3284 define <2 x i16> @v_fshl_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3285 ; GFX6-LABEL: v_fshl_v2i16:
3287 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3288 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v4
3289 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
3290 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3291 ; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16
3292 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3293 ; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
3294 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0
3295 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
3296 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
3297 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v5
3298 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5
3299 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3300 ; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
3301 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
3302 ; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
3303 ; GFX6-NEXT: v_bfe_u32 v3, v4, 0, 16
3304 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
3305 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
3306 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3308 ; GFX8-LABEL: v_fshl_v2i16:
3310 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3311 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2
3312 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v2
3313 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
3314 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
3315 ; GFX8-NEXT: v_lshrrev_b16_e32 v5, 1, v1
3316 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, v4, v0
3317 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v5
3318 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
3319 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v3
3320 ; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3
3321 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3322 ; GFX8-NEXT: v_mov_b32_e32 v4, 1
3323 ; GFX8-NEXT: v_and_b32_e32 v3, 15, v3
3324 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3325 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1
3326 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3327 ; GFX8-NEXT: v_mov_b32_e32 v1, 16
3328 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3329 ; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3330 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3332 ; GFX9-LABEL: v_fshl_v2i16:
3334 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3335 ; GFX9-NEXT: s_mov_b32 s4, 0xf000f
3336 ; GFX9-NEXT: v_and_b32_e32 v3, s4, v2
3337 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
3338 ; GFX9-NEXT: v_and_b32_e32 v2, s4, v2
3339 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1]
3340 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v3, v0
3341 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1
3342 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3343 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3345 ; GFX10-LABEL: v_fshl_v2i16:
3347 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3348 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3349 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
3350 ; GFX10-NEXT: s_mov_b32 s4, 0xf000f
3351 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1]
3352 ; GFX10-NEXT: v_and_b32_e32 v2, s4, v2
3353 ; GFX10-NEXT: v_and_b32_e32 v3, s4, v3
3354 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v2, v0
3355 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, v3, v1
3356 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3357 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3358 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3359 ret <2 x i16> %result
3362 define <2 x i16> @v_fshl_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
3363 ; GFX6-LABEL: v_fshl_v2i16_4_8:
3365 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3366 ; GFX6-NEXT: s_bfe_u32 s4, 4, 0x100000
3367 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
3368 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3369 ; GFX6-NEXT: s_bfe_u32 s4, 11, 0x100000
3370 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2
3371 ; GFX6-NEXT: s_bfe_u32 s4, 8, 0x100000
3372 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
3373 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1
3374 ; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
3375 ; GFX6-NEXT: s_bfe_u32 s4, 7, 0x100000
3376 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2
3377 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
3378 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3380 ; GFX8-LABEL: v_fshl_v2i16_4_8:
3382 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3383 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3384 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0
3385 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 12, v1
3386 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
3387 ; GFX8-NEXT: v_mov_b32_e32 v3, 8
3388 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2
3389 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3390 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
3391 ; GFX8-NEXT: v_mov_b32_e32 v2, 16
3392 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3393 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3394 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3396 ; GFX9-LABEL: v_fshl_v2i16_4_8:
3398 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3399 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x80004
3400 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0
3401 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x8000c
3402 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1
3403 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3404 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3406 ; GFX10-LABEL: v_fshl_v2i16_4_8:
3408 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3409 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
3410 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x80004, v0
3411 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x8000c, v1
3412 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3413 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3414 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>)
3415 ret <2 x i16> %result
3418 define amdgpu_ps float @v_fshl_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) {
3419 ; GFX6-LABEL: v_fshl_v2i16_ssv:
3421 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
3422 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3423 ; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
3424 ; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3425 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
3426 ; GFX6-NEXT: s_mov_b32 s0, 0xf0001
3427 ; GFX6-NEXT: s_bfe_u32 s2, s2, s0
3428 ; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
3429 ; GFX6-NEXT: v_lshr_b32_e32 v0, s2, v0
3430 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
3431 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
3432 ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
3433 ; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
3434 ; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16
3435 ; GFX6-NEXT: s_bfe_u32 s0, s3, s0
3436 ; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
3437 ; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2
3438 ; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
3439 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
3440 ; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
3441 ; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
3442 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3443 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3444 ; GFX6-NEXT: ; return to shader part epilog
3446 ; GFX8-LABEL: v_fshl_v2i16_ssv:
3448 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v0
3449 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
3450 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
3451 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
3452 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
3453 ; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0
3454 ; GFX8-NEXT: s_bfe_u32 s0, s1, 0x100000
3455 ; GFX8-NEXT: s_bfe_u32 s1, 1, 0x100000
3456 ; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
3457 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
3458 ; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0
3459 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
3460 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v1
3461 ; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
3462 ; GFX8-NEXT: v_and_b32_e32 v1, 15, v1
3463 ; GFX8-NEXT: s_lshr_b32 s0, s3, s1
3464 ; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2
3465 ; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s0
3466 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
3467 ; GFX8-NEXT: v_mov_b32_e32 v2, 16
3468 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3469 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3470 ; GFX8-NEXT: ; return to shader part epilog
3472 ; GFX9-LABEL: v_fshl_v2i16_ssv:
3474 ; GFX9-NEXT: s_mov_b32 s2, 0xf000f
3475 ; GFX9-NEXT: v_and_b32_e32 v1, s2, v0
3476 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v1, s0
3477 ; GFX9-NEXT: s_lshr_b32 s0, s1, 16
3478 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
3479 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
3480 ; GFX9-NEXT: s_lshr_b32 s1, s1, 0x10001
3481 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
3482 ; GFX9-NEXT: v_and_b32_e32 v0, s2, v0
3483 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s1, s0
3484 ; GFX9-NEXT: v_pk_lshrrev_b16 v0, v0, s0
3485 ; GFX9-NEXT: v_or_b32_e32 v0, v1, v0
3486 ; GFX9-NEXT: ; return to shader part epilog
3488 ; GFX10-LABEL: v_fshl_v2i16_ssv:
3490 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
3491 ; GFX10-NEXT: s_mov_b32 s2, 0xf000f
3492 ; GFX10-NEXT: s_lshr_b32 s3, s1, 16
3493 ; GFX10-NEXT: s_and_b32 s1, s1, 0xffff
3494 ; GFX10-NEXT: v_and_b32_e32 v0, s2, v0
3495 ; GFX10-NEXT: v_and_b32_e32 v1, s2, v1
3496 ; GFX10-NEXT: s_lshr_b32 s1, s1, 0x10001
3497 ; GFX10-NEXT: s_lshr_b32 s2, s3, 1
3498 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2
3499 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v0, s0
3500 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, v1, s1
3501 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3502 ; GFX10-NEXT: ; return to shader part epilog
3503 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3504 %cast = bitcast <2 x i16> %result to float
3508 define amdgpu_ps float @v_fshl_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
3509 ; GFX6-LABEL: v_fshl_v2i16_svs:
3511 ; GFX6-NEXT: s_and_b32 s4, s2, 15
3512 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2
3513 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
3514 ; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15
3515 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
3516 ; GFX6-NEXT: s_lshl_b32 s0, s0, s4
3517 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s2, v0
3518 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
3519 ; GFX6-NEXT: s_and_b32 s0, s3, 15
3520 ; GFX6-NEXT: s_andn2_b32 s2, 15, s3
3521 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
3522 ; GFX6-NEXT: s_lshl_b32 s0, s1, s0
3523 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
3524 ; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
3525 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, s1, v1
3526 ; GFX6-NEXT: v_or_b32_e32 v1, s0, v1
3527 ; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
3528 ; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
3529 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3530 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3531 ; GFX6-NEXT: ; return to shader part epilog
3533 ; GFX8-LABEL: v_fshl_v2i16_svs:
3535 ; GFX8-NEXT: s_and_b32 s4, s1, 15
3536 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
3537 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
3538 ; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
3539 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v0
3540 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
3541 ; GFX8-NEXT: s_lshl_b32 s0, s0, s4
3542 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, s1, v1
3543 ; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
3544 ; GFX8-NEXT: s_and_b32 s0, s3, 15
3545 ; GFX8-NEXT: v_mov_b32_e32 v2, 1
3546 ; GFX8-NEXT: s_andn2_b32 s1, 15, s3
3547 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
3548 ; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3549 ; GFX8-NEXT: s_lshl_b32 s0, s2, s0
3550 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0
3551 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
3552 ; GFX8-NEXT: v_mov_b32_e32 v2, 16
3553 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3554 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3555 ; GFX8-NEXT: ; return to shader part epilog
3557 ; GFX9-LABEL: v_fshl_v2i16_svs:
3559 ; GFX9-NEXT: s_mov_b32 s2, 0xf000f
3560 ; GFX9-NEXT: s_and_b32 s3, s1, s2
3561 ; GFX9-NEXT: s_andn2_b32 s1, s2, s1
3562 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
3563 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
3564 ; GFX9-NEXT: s_lshl_b32 s0, s0, s3
3565 ; GFX9-NEXT: s_lshl_b32 s2, s2, s4
3566 ; GFX9-NEXT: v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3567 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3568 ; GFX9-NEXT: v_pk_lshrrev_b16 v0, s1, v0
3569 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
3570 ; GFX9-NEXT: ; return to shader part epilog
3572 ; GFX10-LABEL: v_fshl_v2i16_svs:
3574 ; GFX10-NEXT: s_mov_b32 s2, 0xf000f
3575 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3576 ; GFX10-NEXT: s_and_b32 s3, s1, s2
3577 ; GFX10-NEXT: s_andn2_b32 s1, s2, s1
3578 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
3579 ; GFX10-NEXT: s_lshr_b32 s4, s3, 16
3580 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, s1, v0
3581 ; GFX10-NEXT: s_lshl_b32 s0, s0, s3
3582 ; GFX10-NEXT: s_lshl_b32 s1, s2, s4
3583 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
3584 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
3585 ; GFX10-NEXT: ; return to shader part epilog
3586 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3587 %cast = bitcast <2 x i16> %result to float
3591 define amdgpu_ps float @v_fshl_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3592 ; GFX6-LABEL: v_fshl_v2i16_vss:
3594 ; GFX6-NEXT: s_and_b32 s4, s2, 15
3595 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
3596 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2
3597 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
3598 ; GFX6-NEXT: s_mov_b32 s4, 0xf0001
3599 ; GFX6-NEXT: s_bfe_u32 s0, s0, s4
3600 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000
3601 ; GFX6-NEXT: s_lshr_b32 s0, s0, s2
3602 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
3603 ; GFX6-NEXT: s_and_b32 s0, s3, 15
3604 ; GFX6-NEXT: s_andn2_b32 s2, 15, s3
3605 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
3606 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1
3607 ; GFX6-NEXT: s_bfe_u32 s0, s1, s4
3608 ; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
3609 ; GFX6-NEXT: s_lshr_b32 s0, s0, s1
3610 ; GFX6-NEXT: v_or_b32_e32 v1, s0, v1
3611 ; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16
3612 ; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16
3613 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3614 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3615 ; GFX6-NEXT: ; return to shader part epilog
3617 ; GFX8-LABEL: v_fshl_v2i16_vss:
3619 ; GFX8-NEXT: s_and_b32 s4, s1, 15
3620 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
3621 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
3622 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
3623 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, s4, v0
3624 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
3625 ; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000
3626 ; GFX8-NEXT: s_lshr_b32 s0, s0, s4
3627 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
3628 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
3629 ; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
3630 ; GFX8-NEXT: s_and_b32 s0, s3, 15
3631 ; GFX8-NEXT: s_andn2_b32 s1, 15, s3
3632 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
3633 ; GFX8-NEXT: s_lshr_b32 s0, s2, s4
3634 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
3635 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3636 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
3637 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
3638 ; GFX8-NEXT: v_mov_b32_e32 v2, 16
3639 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3640 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3641 ; GFX8-NEXT: ; return to shader part epilog
3643 ; GFX9-LABEL: v_fshl_v2i16_vss:
3645 ; GFX9-NEXT: s_mov_b32 s2, 0xf000f
3646 ; GFX9-NEXT: s_and_b32 s3, s1, s2
3647 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, s3, v0
3648 ; GFX9-NEXT: s_mov_b32 s3, 0xffff
3649 ; GFX9-NEXT: s_andn2_b32 s1, s2, s1
3650 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
3651 ; GFX9-NEXT: s_and_b32 s0, s0, s3
3652 ; GFX9-NEXT: s_lshr_b32 s0, s0, 0x10001
3653 ; GFX9-NEXT: s_lshr_b32 s2, s2, 1
3654 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3655 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
3656 ; GFX9-NEXT: s_and_b32 s0, s0, s3
3657 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
3658 ; GFX9-NEXT: s_lshr_b32 s0, s0, s1
3659 ; GFX9-NEXT: s_lshr_b32 s1, s2, s3
3660 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
3661 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
3662 ; GFX9-NEXT: ; return to shader part epilog
3664 ; GFX10-LABEL: v_fshl_v2i16_vss:
3666 ; GFX10-NEXT: s_mov_b32 s3, 0xffff
3667 ; GFX10-NEXT: s_mov_b32 s2, 0xf000f
3668 ; GFX10-NEXT: s_and_b32 s5, s0, s3
3669 ; GFX10-NEXT: s_lshr_b32 s0, s0, 16
3670 ; GFX10-NEXT: s_lshr_b32 s5, s5, 0x10001
3671 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
3672 ; GFX10-NEXT: s_and_b32 s4, s1, s2
3673 ; GFX10-NEXT: s_andn2_b32 s1, s2, s1
3674 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s5, s0
3675 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, s4, v0
3676 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
3677 ; GFX10-NEXT: s_and_b32 s0, s0, s3
3678 ; GFX10-NEXT: s_lshr_b32 s3, s1, 16
3679 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
3680 ; GFX10-NEXT: s_lshr_b32 s1, s2, s3
3681 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
3682 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
3683 ; GFX10-NEXT: ; return to shader part epilog
3684 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3685 %cast = bitcast <2 x i16> %result to float
3690 ; define amdgpu_ps i48 @s_fshl_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) {
3691 ; %result = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
3692 ; %cast = bitcast <3 x i16> %result to i48
3697 ; define <3 x half> @v_fshl_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) {
3698 ; %result = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
3699 ; %cast.result = bitcast <3 x i16> %result to <3 x half>
3700 ; ret <3 x half> %cast.result
3703 define amdgpu_ps <2 x i32> @s_fshl_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) {
3704 ; GFX6-LABEL: s_fshl_v4i16:
3706 ; GFX6-NEXT: s_and_b32 s12, s8, 15
3707 ; GFX6-NEXT: s_bfe_u32 s12, s12, 0x100000
3708 ; GFX6-NEXT: s_andn2_b32 s8, 15, s8
3709 ; GFX6-NEXT: s_lshl_b32 s0, s0, s12
3710 ; GFX6-NEXT: s_mov_b32 s12, 0xf0001
3711 ; GFX6-NEXT: s_bfe_u32 s4, s4, s12
3712 ; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000
3713 ; GFX6-NEXT: s_lshr_b32 s4, s4, s8
3714 ; GFX6-NEXT: s_or_b32 s0, s0, s4
3715 ; GFX6-NEXT: s_and_b32 s4, s9, 15
3716 ; GFX6-NEXT: s_andn2_b32 s8, 15, s9
3717 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
3718 ; GFX6-NEXT: s_lshl_b32 s1, s1, s4
3719 ; GFX6-NEXT: s_bfe_u32 s4, s5, s12
3720 ; GFX6-NEXT: s_bfe_u32 s5, s8, 0x100000
3721 ; GFX6-NEXT: s_lshr_b32 s4, s4, s5
3722 ; GFX6-NEXT: s_or_b32 s1, s1, s4
3723 ; GFX6-NEXT: s_and_b32 s4, s10, 15
3724 ; GFX6-NEXT: s_andn2_b32 s5, 15, s10
3725 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
3726 ; GFX6-NEXT: s_lshl_b32 s2, s2, s4
3727 ; GFX6-NEXT: s_bfe_u32 s4, s6, s12
3728 ; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000
3729 ; GFX6-NEXT: s_lshr_b32 s4, s4, s5
3730 ; GFX6-NEXT: s_or_b32 s2, s2, s4
3731 ; GFX6-NEXT: s_and_b32 s4, s11, 15
3732 ; GFX6-NEXT: s_andn2_b32 s5, 15, s11
3733 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000
3734 ; GFX6-NEXT: s_lshl_b32 s3, s3, s4
3735 ; GFX6-NEXT: s_bfe_u32 s4, s7, s12
3736 ; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000
3737 ; GFX6-NEXT: s_lshr_b32 s4, s4, s5
3738 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000
3739 ; GFX6-NEXT: s_or_b32 s3, s3, s4
3740 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000
3741 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
3742 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3743 ; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000
3744 ; GFX6-NEXT: s_bfe_u32 s2, s3, 0x100000
3745 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
3746 ; GFX6-NEXT: s_or_b32 s1, s1, s2
3747 ; GFX6-NEXT: ; return to shader part epilog
3749 ; GFX8-LABEL: s_fshl_v4i16:
3751 ; GFX8-NEXT: s_and_b32 s12, s4, 15
3752 ; GFX8-NEXT: s_bfe_u32 s12, s12, 0x100000
3753 ; GFX8-NEXT: s_lshr_b32 s6, s0, 16
3754 ; GFX8-NEXT: s_lshr_b32 s8, s2, 16
3755 ; GFX8-NEXT: s_lshr_b32 s10, s4, 16
3756 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4
3757 ; GFX8-NEXT: s_lshl_b32 s0, s0, s12
3758 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
3759 ; GFX8-NEXT: s_bfe_u32 s12, 1, 0x100000
3760 ; GFX8-NEXT: s_lshr_b32 s2, s2, s12
3761 ; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
3762 ; GFX8-NEXT: s_lshr_b32 s2, s2, s4
3763 ; GFX8-NEXT: s_or_b32 s0, s0, s2
3764 ; GFX8-NEXT: s_and_b32 s2, s10, 15
3765 ; GFX8-NEXT: s_andn2_b32 s4, 15, s10
3766 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
3767 ; GFX8-NEXT: s_lshl_b32 s2, s6, s2
3768 ; GFX8-NEXT: s_lshr_b32 s6, s8, s12
3769 ; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
3770 ; GFX8-NEXT: s_lshr_b32 s4, s6, s4
3771 ; GFX8-NEXT: s_or_b32 s2, s2, s4
3772 ; GFX8-NEXT: s_and_b32 s4, s5, 15
3773 ; GFX8-NEXT: s_lshr_b32 s9, s3, 16
3774 ; GFX8-NEXT: s_lshr_b32 s11, s5, 16
3775 ; GFX8-NEXT: s_andn2_b32 s5, 15, s5
3776 ; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
3777 ; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
3778 ; GFX8-NEXT: s_lshr_b32 s7, s1, 16
3779 ; GFX8-NEXT: s_lshl_b32 s1, s1, s4
3780 ; GFX8-NEXT: s_lshr_b32 s3, s3, s12
3781 ; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000
3782 ; GFX8-NEXT: s_lshr_b32 s3, s3, s4
3783 ; GFX8-NEXT: s_or_b32 s1, s1, s3
3784 ; GFX8-NEXT: s_and_b32 s3, s11, 15
3785 ; GFX8-NEXT: s_andn2_b32 s4, 15, s11
3786 ; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000
3787 ; GFX8-NEXT: s_lshr_b32 s5, s9, s12
3788 ; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000
3789 ; GFX8-NEXT: s_lshl_b32 s3, s7, s3
3790 ; GFX8-NEXT: s_lshr_b32 s4, s5, s4
3791 ; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000
3792 ; GFX8-NEXT: s_or_b32 s3, s3, s4
3793 ; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000
3794 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
3795 ; GFX8-NEXT: s_or_b32 s0, s0, s2
3796 ; GFX8-NEXT: s_bfe_u32 s2, s3, 0x100000
3797 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000
3798 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
3799 ; GFX8-NEXT: s_or_b32 s1, s1, s2
3800 ; GFX8-NEXT: ; return to shader part epilog
3802 ; GFX9-LABEL: s_fshl_v4i16:
3804 ; GFX9-NEXT: s_mov_b32 s6, 0xf000f
3805 ; GFX9-NEXT: s_and_b32 s7, s4, s6
3806 ; GFX9-NEXT: s_lshr_b32 s9, s0, 16
3807 ; GFX9-NEXT: s_lshr_b32 s10, s7, 16
3808 ; GFX9-NEXT: s_lshl_b32 s0, s0, s7
3809 ; GFX9-NEXT: s_lshl_b32 s7, s9, s10
3810 ; GFX9-NEXT: s_mov_b32 s9, 0xffff
3811 ; GFX9-NEXT: s_mov_b32 s8, 0x10001
3812 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s7
3813 ; GFX9-NEXT: s_lshr_b32 s7, s2, 16
3814 ; GFX9-NEXT: s_and_b32 s2, s2, s9
3815 ; GFX9-NEXT: s_lshr_b32 s2, s2, s8
3816 ; GFX9-NEXT: s_lshr_b32 s7, s7, 1
3817 ; GFX9-NEXT: s_andn2_b32 s4, s6, s4
3818 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s7
3819 ; GFX9-NEXT: s_lshr_b32 s7, s2, 16
3820 ; GFX9-NEXT: s_and_b32 s2, s2, s9
3821 ; GFX9-NEXT: s_lshr_b32 s10, s4, 16
3822 ; GFX9-NEXT: s_lshr_b32 s2, s2, s4
3823 ; GFX9-NEXT: s_lshr_b32 s4, s7, s10
3824 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
3825 ; GFX9-NEXT: s_or_b32 s0, s0, s2
3826 ; GFX9-NEXT: s_and_b32 s2, s5, s6
3827 ; GFX9-NEXT: s_andn2_b32 s4, s6, s5
3828 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
3829 ; GFX9-NEXT: s_lshr_b32 s6, s2, 16
3830 ; GFX9-NEXT: s_lshl_b32 s1, s1, s2
3831 ; GFX9-NEXT: s_lshl_b32 s2, s5, s6
3832 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
3833 ; GFX9-NEXT: s_lshr_b32 s2, s3, 16
3834 ; GFX9-NEXT: s_and_b32 s3, s3, s9
3835 ; GFX9-NEXT: s_lshr_b32 s3, s3, s8
3836 ; GFX9-NEXT: s_lshr_b32 s2, s2, 1
3837 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s2
3838 ; GFX9-NEXT: s_lshr_b32 s3, s2, 16
3839 ; GFX9-NEXT: s_and_b32 s2, s2, s9
3840 ; GFX9-NEXT: s_lshr_b32 s5, s4, 16
3841 ; GFX9-NEXT: s_lshr_b32 s2, s2, s4
3842 ; GFX9-NEXT: s_lshr_b32 s3, s3, s5
3843 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3
3844 ; GFX9-NEXT: s_or_b32 s1, s1, s2
3845 ; GFX9-NEXT: ; return to shader part epilog
3847 ; GFX10-LABEL: s_fshl_v4i16:
3849 ; GFX10-NEXT: s_mov_b32 s10, 0xffff
3850 ; GFX10-NEXT: s_mov_b32 s6, 0xf000f
3851 ; GFX10-NEXT: s_mov_b32 s8, 0x10001
3852 ; GFX10-NEXT: s_and_b32 s12, s2, s10
3853 ; GFX10-NEXT: s_lshr_b32 s2, s2, 16
3854 ; GFX10-NEXT: s_and_b32 s7, s4, s6
3855 ; GFX10-NEXT: s_lshr_b32 s12, s12, s8
3856 ; GFX10-NEXT: s_lshr_b32 s2, s2, 1
3857 ; GFX10-NEXT: s_andn2_b32 s4, s6, s4
3858 ; GFX10-NEXT: s_lshr_b32 s9, s0, 16
3859 ; GFX10-NEXT: s_lshr_b32 s11, s7, 16
3860 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s12, s2
3861 ; GFX10-NEXT: s_lshl_b32 s0, s0, s7
3862 ; GFX10-NEXT: s_lshl_b32 s7, s9, s11
3863 ; GFX10-NEXT: s_lshr_b32 s9, s2, 16
3864 ; GFX10-NEXT: s_and_b32 s2, s2, s10
3865 ; GFX10-NEXT: s_lshr_b32 s11, s4, 16
3866 ; GFX10-NEXT: s_lshr_b32 s2, s2, s4
3867 ; GFX10-NEXT: s_lshr_b32 s4, s9, s11
3868 ; GFX10-NEXT: s_and_b32 s9, s3, s10
3869 ; GFX10-NEXT: s_lshr_b32 s3, s3, 16
3870 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4
3871 ; GFX10-NEXT: s_and_b32 s4, s5, s6
3872 ; GFX10-NEXT: s_lshr_b32 s8, s9, s8
3873 ; GFX10-NEXT: s_lshr_b32 s3, s3, 1
3874 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s7
3875 ; GFX10-NEXT: s_andn2_b32 s5, s6, s5
3876 ; GFX10-NEXT: s_lshr_b32 s6, s1, 16
3877 ; GFX10-NEXT: s_lshr_b32 s7, s4, 16
3878 ; GFX10-NEXT: s_pack_ll_b32_b16 s3, s8, s3
3879 ; GFX10-NEXT: s_lshl_b32 s1, s1, s4
3880 ; GFX10-NEXT: s_lshl_b32 s4, s6, s7
3881 ; GFX10-NEXT: s_lshr_b32 s6, s3, 16
3882 ; GFX10-NEXT: s_and_b32 s3, s3, s10
3883 ; GFX10-NEXT: s_lshr_b32 s7, s5, 16
3884 ; GFX10-NEXT: s_lshr_b32 s3, s3, s5
3885 ; GFX10-NEXT: s_lshr_b32 s5, s6, s7
3886 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4
3887 ; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5
3888 ; GFX10-NEXT: s_or_b32 s0, s0, s2
3889 ; GFX10-NEXT: s_or_b32 s1, s1, s3
3890 ; GFX10-NEXT: ; return to shader part epilog
3891 %result = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
3892 %cast.result = bitcast <4 x i16> %result to <2 x i32>
3893 ret <2 x i32> %cast.result
3896 define <4 x half> @v_fshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) {
3897 ; GFX6-LABEL: v_fshl_v4i16:
3899 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3900 ; GFX6-NEXT: v_and_b32_e32 v12, 15, v8
3901 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
3902 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
3903 ; GFX6-NEXT: v_bfe_u32 v12, v12, 0, 16
3904 ; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
3905 ; GFX6-NEXT: v_bfe_u32 v8, v8, 0, 16
3906 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v12, v0
3907 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
3908 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
3909 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v9
3910 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v9
3911 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
3912 ; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
3913 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
3914 ; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
3915 ; GFX6-NEXT: v_bfe_u32 v5, v8, 0, 16
3916 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
3917 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
3918 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v10
3919 ; GFX6-NEXT: v_xor_b32_e32 v5, -1, v10
3920 ; GFX6-NEXT: v_and_b32_e32 v5, 15, v5
3921 ; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
3922 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2
3923 ; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15
3924 ; GFX6-NEXT: v_bfe_u32 v5, v5, 0, 16
3925 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
3926 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
3927 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v11
3928 ; GFX6-NEXT: v_xor_b32_e32 v5, -1, v11
3929 ; GFX6-NEXT: v_and_b32_e32 v5, 15, v5
3930 ; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16
3931 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
3932 ; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15
3933 ; GFX6-NEXT: v_bfe_u32 v5, v5, 0, 16
3934 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
3935 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
3936 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3938 ; GFX8-LABEL: v_fshl_v4i16:
3940 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3941 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v4
3942 ; GFX8-NEXT: v_and_b32_e32 v8, 15, v4
3943 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
3944 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
3945 ; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v2
3946 ; GFX8-NEXT: v_lshlrev_b16_e32 v8, v8, v0
3947 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v9
3948 ; GFX8-NEXT: v_or_b32_e32 v4, v8, v4
3949 ; GFX8-NEXT: v_and_b32_e32 v8, 15, v6
3950 ; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6
3951 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3952 ; GFX8-NEXT: v_mov_b32_e32 v8, 1
3953 ; GFX8-NEXT: v_and_b32_e32 v6, 15, v6
3954 ; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3955 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v6, v2
3956 ; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v5
3957 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
3958 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v5
3959 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5
3960 ; GFX8-NEXT: v_and_b32_e32 v5, 15, v5
3961 ; GFX8-NEXT: v_lshrrev_b16_e32 v6, 1, v3
3962 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v1
3963 ; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v6
3964 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v5
3965 ; GFX8-NEXT: v_and_b32_e32 v5, 15, v7
3966 ; GFX8-NEXT: v_xor_b32_e32 v6, -1, v7
3967 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3968 ; GFX8-NEXT: v_mov_b32_e32 v5, 1
3969 ; GFX8-NEXT: v_and_b32_e32 v6, 15, v6
3970 ; GFX8-NEXT: v_lshrrev_b16_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3971 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, v6, v3
3972 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
3973 ; GFX8-NEXT: v_mov_b32_e32 v3, 16
3974 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3975 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
3976 ; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3977 ; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3978 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3980 ; GFX9-LABEL: v_fshl_v4i16:
3982 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3983 ; GFX9-NEXT: s_mov_b32 s4, 0xf000f
3984 ; GFX9-NEXT: v_and_b32_e32 v6, s4, v4
3985 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
3986 ; GFX9-NEXT: v_and_b32_e32 v4, s4, v4
3987 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
3988 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v6, v0
3989 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2
3990 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
3991 ; GFX9-NEXT: v_and_b32_e32 v2, s4, v5
3992 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5
3993 ; GFX9-NEXT: v_and_b32_e32 v4, s4, v4
3994 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v2, v1
3995 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v3 op_sel_hi:[0,1]
3996 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2
3997 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2
3998 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4000 ; GFX10-LABEL: v_fshl_v4i16:
4002 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4003 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4004 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
4005 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
4006 ; GFX10-NEXT: s_mov_b32 s4, 0xf000f
4007 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
4008 ; GFX10-NEXT: v_and_b32_e32 v4, s4, v4
4009 ; GFX10-NEXT: v_and_b32_e32 v6, s4, v6
4010 ; GFX10-NEXT: v_and_b32_e32 v5, s4, v5
4011 ; GFX10-NEXT: v_and_b32_e32 v7, s4, v7
4012 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1]
4013 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v4, v0
4014 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v6, v2
4015 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v5, v1
4016 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v7, v3
4017 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
4018 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
4019 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4020 %result = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4021 %cast.result = bitcast <4 x i16> %result to <4 x half>
4022 ret <4 x half> %cast.result
4025 define amdgpu_ps i64 @s_fshl_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) {
4026 ; GCN-LABEL: s_fshl_i64:
4028 ; GCN-NEXT: s_and_b64 s[6:7], s[4:5], 63
4029 ; GCN-NEXT: s_andn2_b64 s[4:5], 63, s[4:5]
4030 ; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
4031 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s6
4032 ; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s4
4033 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4034 ; GCN-NEXT: ; return to shader part epilog
4035 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
4039 define amdgpu_ps i64 @s_fshl_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
4040 ; GCN-LABEL: s_fshl_i64_5:
4042 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
4043 ; GCN-NEXT: s_lshr_b32 s2, s3, 27
4044 ; GCN-NEXT: s_mov_b32 s3, 0
4045 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4046 ; GCN-NEXT: ; return to shader part epilog
4047 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 5)
4051 define amdgpu_ps i64 @s_fshl_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
4052 ; GCN-LABEL: s_fshl_i64_32:
4054 ; GCN-NEXT: s_mov_b32 s1, s0
4055 ; GCN-NEXT: s_mov_b32 s0, 0
4056 ; GCN-NEXT: s_mov_b32 s2, s3
4057 ; GCN-NEXT: s_mov_b32 s3, s0
4058 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4059 ; GCN-NEXT: ; return to shader part epilog
4060 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 32)
4064 define amdgpu_ps i64 @s_fshl_i64_48(i64 inreg %lhs, i64 inreg %rhs) {
4065 ; GCN-LABEL: s_fshl_i64_48:
4067 ; GCN-NEXT: s_lshl_b32 s1, s0, 16
4068 ; GCN-NEXT: s_mov_b32 s0, 0
4069 ; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 16
4070 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4071 ; GCN-NEXT: ; return to shader part epilog
4072 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48)
4076 define i64 @v_fshl_i64(i64 %lhs, i64 %rhs, i64 %amt) {
4077 ; GFX6-LABEL: v_fshl_i64:
4079 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4080 ; GFX6-NEXT: v_and_b32_e32 v5, 63, v4
4081 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
4082 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 1
4083 ; GFX6-NEXT: v_and_b32_e32 v4, 63, v4
4084 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v5
4085 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v4
4086 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
4087 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
4088 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4090 ; GFX8-LABEL: v_fshl_i64:
4092 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4093 ; GFX8-NEXT: v_and_b32_e32 v5, 63, v4
4094 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
4095 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
4096 ; GFX8-NEXT: v_and_b32_e32 v4, 63, v4
4097 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1]
4098 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3]
4099 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
4100 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
4101 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4103 ; GFX9-LABEL: v_fshl_i64:
4105 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4106 ; GFX9-NEXT: v_and_b32_e32 v5, 63, v4
4107 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
4108 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
4109 ; GFX9-NEXT: v_and_b32_e32 v4, 63, v4
4110 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1]
4111 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3]
4112 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
4113 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v3
4114 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4116 ; GFX10-LABEL: v_fshl_i64:
4118 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4119 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4120 ; GFX10-NEXT: v_xor_b32_e32 v5, -1, v4
4121 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
4122 ; GFX10-NEXT: v_and_b32_e32 v4, 63, v4
4123 ; GFX10-NEXT: v_and_b32_e32 v5, 63, v5
4124 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
4125 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3]
4126 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
4127 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
4128 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4129 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
4133 define i64 @v_fshl_i64_5(i64 %lhs, i64 %rhs) {
4134 ; GFX6-LABEL: v_fshl_i64_5:
4136 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4137 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 5
4138 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 27, v3
4139 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
4140 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4142 ; GFX8-LABEL: v_fshl_i64_5:
4144 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4145 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
4146 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 27, v3
4147 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
4148 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4150 ; GFX9-LABEL: v_fshl_i64_5:
4152 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4153 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
4154 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 27, v3
4155 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
4156 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4158 ; GFX10-LABEL: v_fshl_i64_5:
4160 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4161 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4162 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
4163 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 27, v3
4164 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
4165 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4166 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 5)
4170 define i64 @v_fshl_i64_32(i64 %lhs, i64 %rhs) {
4171 ; GFX6-LABEL: v_fshl_i64_32:
4173 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4174 ; GFX6-NEXT: v_mov_b32_e32 v1, v0
4175 ; GFX6-NEXT: v_mov_b32_e32 v0, v3
4176 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4178 ; GFX8-LABEL: v_fshl_i64_32:
4180 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4181 ; GFX8-NEXT: v_mov_b32_e32 v1, v0
4182 ; GFX8-NEXT: v_mov_b32_e32 v0, v3
4183 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4185 ; GFX9-LABEL: v_fshl_i64_32:
4187 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4188 ; GFX9-NEXT: v_mov_b32_e32 v1, v0
4189 ; GFX9-NEXT: v_mov_b32_e32 v0, v3
4190 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4192 ; GFX10-LABEL: v_fshl_i64_32:
4194 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4195 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4196 ; GFX10-NEXT: v_mov_b32_e32 v1, v0
4197 ; GFX10-NEXT: v_mov_b32_e32 v0, v3
4198 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4199 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 32)
4203 define i64 @v_fshl_i64_48(i64 %lhs, i64 %rhs) {
4204 ; GFX6-LABEL: v_fshl_i64_48:
4206 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4207 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
4208 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[2:3], 16
4209 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v4
4210 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
4211 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4213 ; GFX8-LABEL: v_fshl_i64_48:
4215 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4216 ; GFX8-NEXT: v_mov_b32_e32 v4, v0
4217 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3]
4218 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
4219 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
4220 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4222 ; GFX9-LABEL: v_fshl_i64_48:
4224 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4225 ; GFX9-NEXT: v_mov_b32_e32 v4, v0
4226 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3]
4227 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4
4228 ; GFX9-NEXT: v_or_b32_e32 v1, v2, v1
4229 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4231 ; GFX10-LABEL: v_fshl_i64_48:
4233 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4234 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4235 ; GFX10-NEXT: v_mov_b32_e32 v4, v0
4236 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3]
4237 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v4
4238 ; GFX10-NEXT: v_or_b32_e32 v1, v2, v1
4239 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4240 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48)
4244 define amdgpu_ps <2 x float> @v_fshl_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) {
4245 ; GFX6-LABEL: v_fshl_i64_ssv:
4247 ; GFX6-NEXT: v_and_b32_e32 v1, 63, v0
4248 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
4249 ; GFX6-NEXT: v_and_b32_e32 v2, 63, v0
4250 ; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v1
4251 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[2:3], 1
4252 ; GFX6-NEXT: v_lshr_b64 v[2:3], s[0:1], v2
4253 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
4254 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
4255 ; GFX6-NEXT: ; return to shader part epilog
4257 ; GFX8-LABEL: v_fshl_i64_ssv:
4259 ; GFX8-NEXT: v_and_b32_e32 v1, 63, v0
4260 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
4261 ; GFX8-NEXT: v_and_b32_e32 v2, 63, v0
4262 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v1, s[0:1]
4263 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[2:3], 1
4264 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
4265 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
4266 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
4267 ; GFX8-NEXT: ; return to shader part epilog
4269 ; GFX9-LABEL: v_fshl_i64_ssv:
4271 ; GFX9-NEXT: v_and_b32_e32 v1, 63, v0
4272 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
4273 ; GFX9-NEXT: v_and_b32_e32 v2, 63, v0
4274 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v1, s[0:1]
4275 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[2:3], 1
4276 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
4277 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
4278 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v3
4279 ; GFX9-NEXT: ; return to shader part epilog
4281 ; GFX10-LABEL: v_fshl_i64_ssv:
4283 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
4284 ; GFX10-NEXT: v_and_b32_e32 v0, 63, v0
4285 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
4286 ; GFX10-NEXT: v_and_b32_e32 v2, 63, v1
4287 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
4288 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3]
4289 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
4290 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
4291 ; GFX10-NEXT: ; return to shader part epilog
4292 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
4293 %cast = bitcast i64 %result to <2 x float>
4294 ret <2 x float> %cast
4297 define amdgpu_ps <2 x float> @v_fshl_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) {
4298 ; GFX6-LABEL: v_fshl_i64_svs:
4300 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 1
4301 ; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63
4302 ; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
4303 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s2
4304 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
4305 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
4306 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
4307 ; GFX6-NEXT: ; return to shader part epilog
4309 ; GFX8-LABEL: v_fshl_i64_svs:
4311 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
4312 ; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63
4313 ; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
4314 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], s2, v[0:1]
4315 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
4316 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
4317 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v1
4318 ; GFX8-NEXT: ; return to shader part epilog
4320 ; GFX9-LABEL: v_fshl_i64_svs:
4322 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
4323 ; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63
4324 ; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
4325 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], s2, v[0:1]
4326 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
4327 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
4328 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v1
4329 ; GFX9-NEXT: ; return to shader part epilog
4331 ; GFX10-LABEL: v_fshl_i64_svs:
4333 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
4334 ; GFX10-NEXT: s_andn2_b64 s[4:5], 63, s[2:3]
4335 ; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], 63
4336 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
4337 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
4338 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
4339 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v1
4340 ; GFX10-NEXT: ; return to shader part epilog
4341 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
4342 %cast = bitcast i64 %result to <2 x float>
4343 ret <2 x float> %cast
4346 define amdgpu_ps <2 x float> @v_fshl_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) {
4347 ; GFX6-LABEL: v_fshl_i64_vss:
4349 ; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63
4350 ; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
4351 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s4
4352 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
4353 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
4354 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
4355 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
4356 ; GFX6-NEXT: ; return to shader part epilog
4358 ; GFX8-LABEL: v_fshl_i64_vss:
4360 ; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63
4361 ; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
4362 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
4363 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
4364 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
4365 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
4366 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v1
4367 ; GFX8-NEXT: ; return to shader part epilog
4369 ; GFX9-LABEL: v_fshl_i64_vss:
4371 ; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63
4372 ; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
4373 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
4374 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
4375 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
4376 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
4377 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v1
4378 ; GFX9-NEXT: ; return to shader part epilog
4380 ; GFX10-LABEL: v_fshl_i64_vss:
4382 ; GFX10-NEXT: s_and_b64 s[4:5], s[2:3], 63
4383 ; GFX10-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
4384 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
4385 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
4386 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
4387 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
4388 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v1
4389 ; GFX10-NEXT: ; return to shader part epilog
4390 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
4391 %cast = bitcast i64 %result to <2 x float>
4392 ret <2 x float> %cast
4395 define amdgpu_ps <2 x i64> @s_fshl_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) {
4396 ; GFX6-LABEL: s_fshl_v2i64:
4398 ; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], 63
4399 ; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
4400 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
4401 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
4402 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
4403 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
4404 ; GFX6-NEXT: s_and_b64 s[4:5], s[10:11], 63
4405 ; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
4406 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s4
4407 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
4408 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
4409 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
4410 ; GFX6-NEXT: ; return to shader part epilog
4412 ; GFX8-LABEL: s_fshl_v2i64:
4414 ; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], 63
4415 ; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
4416 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
4417 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
4418 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
4419 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
4420 ; GFX8-NEXT: s_and_b64 s[4:5], s[10:11], 63
4421 ; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
4422 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s4
4423 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
4424 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
4425 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
4426 ; GFX8-NEXT: ; return to shader part epilog
4428 ; GFX9-LABEL: s_fshl_v2i64:
4430 ; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], 63
4431 ; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
4432 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
4433 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
4434 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
4435 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
4436 ; GFX9-NEXT: s_and_b64 s[4:5], s[10:11], 63
4437 ; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
4438 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s4
4439 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
4440 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
4441 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
4442 ; GFX9-NEXT: ; return to shader part epilog
4444 ; GFX10-LABEL: s_fshl_v2i64:
4446 ; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], 63
4447 ; GFX10-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
4448 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
4449 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
4450 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
4451 ; GFX10-NEXT: s_and_b64 s[8:9], s[10:11], 63
4452 ; GFX10-NEXT: s_andn2_b64 s[10:11], 63, s[10:11]
4453 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
4454 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
4455 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s10
4456 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
4457 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
4458 ; GFX10-NEXT: ; return to shader part epilog
4459 %result = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
4460 ret <2 x i64> %result
4463 define <2 x i64> @v_fshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) {
4464 ; GFX6-LABEL: v_fshl_v2i64:
4466 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4467 ; GFX6-NEXT: v_and_b32_e32 v9, 63, v8
4468 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
4469 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], 1
4470 ; GFX6-NEXT: v_and_b32_e32 v8, 63, v8
4471 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v9
4472 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v8
4473 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10
4474 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], 1
4475 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
4476 ; GFX6-NEXT: v_and_b32_e32 v4, 63, v10
4477 ; GFX6-NEXT: v_and_b32_e32 v8, 63, v8
4478 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v4
4479 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v8
4480 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v5
4481 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v6
4482 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v7
4483 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4485 ; GFX8-LABEL: v_fshl_v2i64:
4487 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4488 ; GFX8-NEXT: v_and_b32_e32 v9, 63, v8
4489 ; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8
4490 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
4491 ; GFX8-NEXT: v_and_b32_e32 v8, 63, v8
4492 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1]
4493 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5]
4494 ; GFX8-NEXT: v_xor_b32_e32 v8, -1, v10
4495 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
4496 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
4497 ; GFX8-NEXT: v_and_b32_e32 v4, 63, v10
4498 ; GFX8-NEXT: v_and_b32_e32 v8, 63, v8
4499 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3]
4500 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], v8, v[6:7]
4501 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v5
4502 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
4503 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v7
4504 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4506 ; GFX9-LABEL: v_fshl_v2i64:
4508 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4509 ; GFX9-NEXT: v_and_b32_e32 v9, 63, v8
4510 ; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8
4511 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
4512 ; GFX9-NEXT: v_and_b32_e32 v8, 63, v8
4513 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1]
4514 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5]
4515 ; GFX9-NEXT: v_xor_b32_e32 v8, -1, v10
4516 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
4517 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v4
4518 ; GFX9-NEXT: v_and_b32_e32 v4, 63, v10
4519 ; GFX9-NEXT: v_and_b32_e32 v8, 63, v8
4520 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3]
4521 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], v8, v[6:7]
4522 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v5
4523 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v6
4524 ; GFX9-NEXT: v_or_b32_e32 v3, v3, v7
4525 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4527 ; GFX10-LABEL: v_fshl_v2i64:
4529 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4530 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4531 ; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8
4532 ; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10
4533 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
4534 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
4535 ; GFX10-NEXT: v_and_b32_e32 v8, 63, v8
4536 ; GFX10-NEXT: v_and_b32_e32 v9, 63, v9
4537 ; GFX10-NEXT: v_and_b32_e32 v10, 63, v10
4538 ; GFX10-NEXT: v_and_b32_e32 v11, 63, v11
4539 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
4540 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5]
4541 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], v10, v[2:3]
4542 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], v11, v[6:7]
4543 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v4
4544 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v5
4545 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v6
4546 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v7
4547 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4548 %result = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
4549 ret <2 x i64> %result
4552 define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) {
4553 ; GFX6-LABEL: s_fshl_i128:
4555 ; GFX6-NEXT: s_mov_b64 s[10:11], 0x7f
4556 ; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11]
4557 ; GFX6-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9]
4558 ; GFX6-NEXT: s_sub_i32 s9, s12, 64
4559 ; GFX6-NEXT: s_sub_i32 s10, 64, s12
4560 ; GFX6-NEXT: s_cmp_lt_u32 s12, 64
4561 ; GFX6-NEXT: s_cselect_b32 s18, 1, 0
4562 ; GFX6-NEXT: s_cmp_eq_u32 s12, 0
4563 ; GFX6-NEXT: s_cselect_b32 s19, 1, 0
4564 ; GFX6-NEXT: s_lshl_b64 s[14:15], s[0:1], s12
4565 ; GFX6-NEXT: s_lshr_b64 s[16:17], s[0:1], s10
4566 ; GFX6-NEXT: s_lshl_b64 s[12:13], s[2:3], s12
4567 ; GFX6-NEXT: s_or_b64 s[12:13], s[16:17], s[12:13]
4568 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s9
4569 ; GFX6-NEXT: s_cmp_lg_u32 s18, 0
4570 ; GFX6-NEXT: s_cselect_b64 s[14:15], s[14:15], 0
4571 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1]
4572 ; GFX6-NEXT: s_cmp_lg_u32 s19, 0
4573 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
4574 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
4575 ; GFX6-NEXT: s_lshl_b32 s5, s6, 31
4576 ; GFX6-NEXT: s_mov_b32 s4, s11
4577 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
4578 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
4579 ; GFX6-NEXT: s_sub_i32 s12, s8, 64
4580 ; GFX6-NEXT: s_sub_i32 s10, 64, s8
4581 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64
4582 ; GFX6-NEXT: s_cselect_b32 s13, 1, 0
4583 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0
4584 ; GFX6-NEXT: s_cselect_b32 s16, 1, 0
4585 ; GFX6-NEXT: s_lshr_b64 s[6:7], s[4:5], s8
4586 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[0:1], s8
4587 ; GFX6-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
4588 ; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
4589 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
4590 ; GFX6-NEXT: s_cmp_lg_u32 s13, 0
4591 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
4592 ; GFX6-NEXT: s_cmp_lg_u32 s16, 0
4593 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
4594 ; GFX6-NEXT: s_cmp_lg_u32 s13, 0
4595 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[6:7], 0
4596 ; GFX6-NEXT: s_or_b64 s[0:1], s[14:15], s[0:1]
4597 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
4598 ; GFX6-NEXT: ; return to shader part epilog
4600 ; GFX8-LABEL: s_fshl_i128:
4602 ; GFX8-NEXT: s_mov_b64 s[10:11], 0x7f
4603 ; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11]
4604 ; GFX8-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9]
4605 ; GFX8-NEXT: s_sub_i32 s9, s12, 64
4606 ; GFX8-NEXT: s_sub_i32 s10, 64, s12
4607 ; GFX8-NEXT: s_cmp_lt_u32 s12, 64
4608 ; GFX8-NEXT: s_cselect_b32 s18, 1, 0
4609 ; GFX8-NEXT: s_cmp_eq_u32 s12, 0
4610 ; GFX8-NEXT: s_cselect_b32 s19, 1, 0
4611 ; GFX8-NEXT: s_lshl_b64 s[14:15], s[0:1], s12
4612 ; GFX8-NEXT: s_lshr_b64 s[16:17], s[0:1], s10
4613 ; GFX8-NEXT: s_lshl_b64 s[12:13], s[2:3], s12
4614 ; GFX8-NEXT: s_or_b64 s[12:13], s[16:17], s[12:13]
4615 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s9
4616 ; GFX8-NEXT: s_cmp_lg_u32 s18, 0
4617 ; GFX8-NEXT: s_cselect_b64 s[14:15], s[14:15], 0
4618 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1]
4619 ; GFX8-NEXT: s_cmp_lg_u32 s19, 0
4620 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
4621 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
4622 ; GFX8-NEXT: s_lshl_b32 s5, s6, 31
4623 ; GFX8-NEXT: s_mov_b32 s4, s11
4624 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
4625 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
4626 ; GFX8-NEXT: s_sub_i32 s12, s8, 64
4627 ; GFX8-NEXT: s_sub_i32 s10, 64, s8
4628 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64
4629 ; GFX8-NEXT: s_cselect_b32 s13, 1, 0
4630 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0
4631 ; GFX8-NEXT: s_cselect_b32 s16, 1, 0
4632 ; GFX8-NEXT: s_lshr_b64 s[6:7], s[4:5], s8
4633 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[0:1], s8
4634 ; GFX8-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
4635 ; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
4636 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
4637 ; GFX8-NEXT: s_cmp_lg_u32 s13, 0
4638 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
4639 ; GFX8-NEXT: s_cmp_lg_u32 s16, 0
4640 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
4641 ; GFX8-NEXT: s_cmp_lg_u32 s13, 0
4642 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[6:7], 0
4643 ; GFX8-NEXT: s_or_b64 s[0:1], s[14:15], s[0:1]
4644 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
4645 ; GFX8-NEXT: ; return to shader part epilog
4647 ; GFX9-LABEL: s_fshl_i128:
4649 ; GFX9-NEXT: s_mov_b64 s[10:11], 0x7f
4650 ; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11]
4651 ; GFX9-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9]
4652 ; GFX9-NEXT: s_sub_i32 s9, s12, 64
4653 ; GFX9-NEXT: s_sub_i32 s10, 64, s12
4654 ; GFX9-NEXT: s_cmp_lt_u32 s12, 64
4655 ; GFX9-NEXT: s_cselect_b32 s18, 1, 0
4656 ; GFX9-NEXT: s_cmp_eq_u32 s12, 0
4657 ; GFX9-NEXT: s_cselect_b32 s19, 1, 0
4658 ; GFX9-NEXT: s_lshl_b64 s[14:15], s[0:1], s12
4659 ; GFX9-NEXT: s_lshr_b64 s[16:17], s[0:1], s10
4660 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[2:3], s12
4661 ; GFX9-NEXT: s_or_b64 s[12:13], s[16:17], s[12:13]
4662 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s9
4663 ; GFX9-NEXT: s_cmp_lg_u32 s18, 0
4664 ; GFX9-NEXT: s_cselect_b64 s[14:15], s[14:15], 0
4665 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1]
4666 ; GFX9-NEXT: s_cmp_lg_u32 s19, 0
4667 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
4668 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
4669 ; GFX9-NEXT: s_lshl_b32 s5, s6, 31
4670 ; GFX9-NEXT: s_mov_b32 s4, s11
4671 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
4672 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
4673 ; GFX9-NEXT: s_sub_i32 s12, s8, 64
4674 ; GFX9-NEXT: s_sub_i32 s10, 64, s8
4675 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64
4676 ; GFX9-NEXT: s_cselect_b32 s13, 1, 0
4677 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0
4678 ; GFX9-NEXT: s_cselect_b32 s16, 1, 0
4679 ; GFX9-NEXT: s_lshr_b64 s[6:7], s[4:5], s8
4680 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[0:1], s8
4681 ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
4682 ; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
4683 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
4684 ; GFX9-NEXT: s_cmp_lg_u32 s13, 0
4685 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
4686 ; GFX9-NEXT: s_cmp_lg_u32 s16, 0
4687 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
4688 ; GFX9-NEXT: s_cmp_lg_u32 s13, 0
4689 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[6:7], 0
4690 ; GFX9-NEXT: s_or_b64 s[0:1], s[14:15], s[0:1]
4691 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
4692 ; GFX9-NEXT: ; return to shader part epilog
4694 ; GFX10-LABEL: s_fshl_i128:
4696 ; GFX10-NEXT: s_mov_b64 s[10:11], 0x7f
4697 ; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11]
4698 ; GFX10-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9]
4699 ; GFX10-NEXT: s_sub_i32 s9, s12, 64
4700 ; GFX10-NEXT: s_sub_i32 s10, 64, s12
4701 ; GFX10-NEXT: s_cmp_lt_u32 s12, 64
4702 ; GFX10-NEXT: s_cselect_b32 s18, 1, 0
4703 ; GFX10-NEXT: s_cmp_eq_u32 s12, 0
4704 ; GFX10-NEXT: s_cselect_b32 s19, 1, 0
4705 ; GFX10-NEXT: s_lshr_b64 s[14:15], s[0:1], s10
4706 ; GFX10-NEXT: s_lshl_b64 s[16:17], s[2:3], s12
4707 ; GFX10-NEXT: s_lshl_b64 s[12:13], s[0:1], s12
4708 ; GFX10-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17]
4709 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s9
4710 ; GFX10-NEXT: s_cmp_lg_u32 s18, 0
4711 ; GFX10-NEXT: s_cselect_b64 s[12:13], s[12:13], 0
4712 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1]
4713 ; GFX10-NEXT: s_cmp_lg_u32 s19, 0
4714 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
4715 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
4716 ; GFX10-NEXT: s_lshl_b32 s5, s6, 31
4717 ; GFX10-NEXT: s_mov_b32 s4, s11
4718 ; GFX10-NEXT: s_sub_i32 s14, s8, 64
4719 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
4720 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
4721 ; GFX10-NEXT: s_sub_i32 s9, 64, s8
4722 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64
4723 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0
4724 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0
4725 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0
4726 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s8
4727 ; GFX10-NEXT: s_lshl_b64 s[10:11], s[4:5], s9
4728 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[4:5], s8
4729 ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
4730 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s14
4731 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0
4732 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5]
4733 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0
4734 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
4735 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0
4736 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[8:9], 0
4737 ; GFX10-NEXT: s_or_b64 s[0:1], s[12:13], s[0:1]
4738 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
4739 ; GFX10-NEXT: ; return to shader part epilog
4740 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
4744 define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
4745 ; GFX6-LABEL: v_fshl_i128:
4747 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4748 ; GFX6-NEXT: s_movk_i32 s4, 0x7f
4749 ; GFX6-NEXT: v_and_b32_e32 v14, s4, v8
4750 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
4751 ; GFX6-NEXT: v_and_b32_e32 v15, s4, v8
4752 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v14
4753 ; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v14
4754 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[0:1], v8
4755 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], v14
4756 ; GFX6-NEXT: v_lshl_b64 v[12:13], v[0:1], v14
4757 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v16
4758 ; GFX6-NEXT: v_or_b32_e32 v8, v8, v10
4759 ; GFX6-NEXT: v_or_b32_e32 v9, v9, v11
4760 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
4761 ; GFX6-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc
4762 ; GFX6-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc
4763 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
4764 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
4765 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14
4766 ; GFX6-NEXT: v_cndmask_b32_e32 v12, v0, v2, vcc
4767 ; GFX6-NEXT: v_cndmask_b32_e32 v13, v1, v3, vcc
4768 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], 1
4769 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 31, v6
4770 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
4771 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[6:7], 1
4772 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v15
4773 ; GFX6-NEXT: v_subrev_i32_e32 v14, vcc, 64, v15
4774 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], v15
4775 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], v6
4776 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], v15
4777 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v14
4778 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6
4779 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7
4780 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
4781 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
4782 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
4783 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
4784 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
4785 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
4786 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
4787 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
4788 ; GFX6-NEXT: v_or_b32_e32 v0, v10, v0
4789 ; GFX6-NEXT: v_or_b32_e32 v1, v11, v1
4790 ; GFX6-NEXT: v_or_b32_e32 v2, v12, v2
4791 ; GFX6-NEXT: v_or_b32_e32 v3, v13, v3
4792 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4794 ; GFX8-LABEL: v_fshl_i128:
4796 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4797 ; GFX8-NEXT: s_movk_i32 s4, 0x7f
4798 ; GFX8-NEXT: v_and_b32_e32 v14, s4, v8
4799 ; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8
4800 ; GFX8-NEXT: v_and_b32_e32 v15, s4, v8
4801 ; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v14
4802 ; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v14
4803 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v8, v[0:1]
4804 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v14, v[2:3]
4805 ; GFX8-NEXT: v_lshlrev_b64 v[12:13], v14, v[0:1]
4806 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[0:1]
4807 ; GFX8-NEXT: v_or_b32_e32 v8, v8, v10
4808 ; GFX8-NEXT: v_or_b32_e32 v9, v9, v11
4809 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
4810 ; GFX8-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc
4811 ; GFX8-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc
4812 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
4813 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
4814 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14
4815 ; GFX8-NEXT: v_cndmask_b32_e32 v12, v0, v2, vcc
4816 ; GFX8-NEXT: v_cndmask_b32_e32 v13, v1, v3, vcc
4817 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[4:5]
4818 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 31, v6
4819 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
4820 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7]
4821 ; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v15
4822 ; GFX8-NEXT: v_subrev_u32_e32 v14, vcc, 64, v15
4823 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v15, v[0:1]
4824 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3]
4825 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v15, v[2:3]
4826 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v14, v[2:3]
4827 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6
4828 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7
4829 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
4830 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
4831 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
4832 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
4833 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
4834 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
4835 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
4836 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
4837 ; GFX8-NEXT: v_or_b32_e32 v0, v10, v0
4838 ; GFX8-NEXT: v_or_b32_e32 v1, v11, v1
4839 ; GFX8-NEXT: v_or_b32_e32 v2, v12, v2
4840 ; GFX8-NEXT: v_or_b32_e32 v3, v13, v3
4841 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4843 ; GFX9-LABEL: v_fshl_i128:
4845 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4846 ; GFX9-NEXT: s_movk_i32 s4, 0x7f
4847 ; GFX9-NEXT: v_and_b32_e32 v14, s4, v8
4848 ; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8
4849 ; GFX9-NEXT: v_and_b32_e32 v15, s4, v8
4850 ; GFX9-NEXT: v_sub_u32_e32 v8, 64, v14
4851 ; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v14
4852 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v8, v[0:1]
4853 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v14, v[2:3]
4854 ; GFX9-NEXT: v_lshlrev_b64 v[12:13], v14, v[0:1]
4855 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[0:1]
4856 ; GFX9-NEXT: v_or_b32_e32 v8, v8, v10
4857 ; GFX9-NEXT: v_or_b32_e32 v9, v9, v11
4858 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
4859 ; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc
4860 ; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc
4861 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
4862 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
4863 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14
4864 ; GFX9-NEXT: v_cndmask_b32_e32 v12, v0, v2, vcc
4865 ; GFX9-NEXT: v_cndmask_b32_e32 v13, v1, v3, vcc
4866 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[4:5]
4867 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 31, v6
4868 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2
4869 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7]
4870 ; GFX9-NEXT: v_sub_u32_e32 v6, 64, v15
4871 ; GFX9-NEXT: v_subrev_u32_e32 v14, 64, v15
4872 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v15, v[0:1]
4873 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3]
4874 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v15, v[2:3]
4875 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v14, v[2:3]
4876 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6
4877 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7
4878 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
4879 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
4880 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
4881 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
4882 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
4883 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
4884 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
4885 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
4886 ; GFX9-NEXT: v_or_b32_e32 v0, v10, v0
4887 ; GFX9-NEXT: v_or_b32_e32 v1, v11, v1
4888 ; GFX9-NEXT: v_or_b32_e32 v2, v12, v2
4889 ; GFX9-NEXT: v_or_b32_e32 v3, v13, v3
4890 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4892 ; GFX10-LABEL: v_fshl_i128:
4894 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4895 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4896 ; GFX10-NEXT: s_movk_i32 s4, 0x7f
4897 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
4898 ; GFX10-NEXT: v_and_b32_e32 v18, s4, v8
4899 ; GFX10-NEXT: v_xor_b32_e32 v8, -1, v8
4900 ; GFX10-NEXT: v_lshlrev_b32_e32 v12, 31, v6
4901 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
4902 ; GFX10-NEXT: v_sub_nc_u32_e32 v9, 64, v18
4903 ; GFX10-NEXT: v_and_b32_e32 v19, s4, v8
4904 ; GFX10-NEXT: v_lshlrev_b64 v[10:11], v18, v[2:3]
4905 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v12
4906 ; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
4907 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[0:1]
4908 ; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19
4909 ; GFX10-NEXT: v_lshlrev_b64 v[12:13], v18, v[0:1]
4910 ; GFX10-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5]
4911 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
4912 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
4913 ; GFX10-NEXT: v_or_b32_e32 v10, v8, v10
4914 ; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v19
4915 ; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
4916 ; GFX10-NEXT: v_or_b32_e32 v11, v9, v11
4917 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19
4918 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo
4919 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[6:7]
4920 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19
4921 ; GFX10-NEXT: v_or_b32_e32 v14, v14, v16
4922 ; GFX10-NEXT: v_or_b32_e32 v15, v15, v17
4923 ; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo
4924 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7]
4925 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18
4926 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v14, s4
4927 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v15, s4
4928 ; GFX10-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc_lo
4929 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v13, vcc_lo
4930 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6
4931 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6
4932 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5
4933 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, v5, s5
4934 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v0, s4
4935 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v1, s4
4936 ; GFX10-NEXT: v_or_b32_e32 v0, v12, v4
4937 ; GFX10-NEXT: v_or_b32_e32 v1, v7, v5
4938 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v6
4939 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v8
4940 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4941 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
4945 define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) {
4946 ; GFX6-LABEL: v_fshl_i128_ssv:
4948 ; GFX6-NEXT: s_movk_i32 s8, 0x7f
4949 ; GFX6-NEXT: v_and_b32_e32 v6, s8, v0
4950 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
4951 ; GFX6-NEXT: v_and_b32_e32 v7, s8, v0
4952 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v6
4953 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v0
4954 ; GFX6-NEXT: v_lshl_b64 v[2:3], s[2:3], v6
4955 ; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v6
4956 ; GFX6-NEXT: v_lshl_b64 v[4:5], s[0:1], v6
4957 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
4958 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
4959 ; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v8
4960 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
4961 ; GFX6-NEXT: s_mov_b32 s8, 0
4962 ; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
4963 ; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
4964 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
4965 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
4966 ; GFX6-NEXT: v_mov_b32_e32 v2, s2
4967 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
4968 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
4969 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
4970 ; GFX6-NEXT: s_lshl_b32 s9, s6, 31
4971 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v0, v2, vcc
4972 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
4973 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
4974 ; GFX6-NEXT: s_lshr_b64 s[2:3], s[6:7], 1
4975 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v7
4976 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v7
4977 ; GFX6-NEXT: v_lshl_b64 v[2:3], s[2:3], v2
4978 ; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v7
4979 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
4980 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
4981 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[2:3], v11
4982 ; GFX6-NEXT: v_lshr_b64 v[4:5], s[2:3], v7
4983 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
4984 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
4985 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
4986 ; GFX6-NEXT: v_mov_b32_e32 v2, s0
4987 ; GFX6-NEXT: v_mov_b32_e32 v3, s1
4988 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v7
4989 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
4990 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
4991 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
4992 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
4993 ; GFX6-NEXT: v_or_b32_e32 v0, v8, v0
4994 ; GFX6-NEXT: v_or_b32_e32 v1, v9, v1
4995 ; GFX6-NEXT: v_or_b32_e32 v2, v6, v2
4996 ; GFX6-NEXT: v_or_b32_e32 v3, v10, v3
4997 ; GFX6-NEXT: ; return to shader part epilog
4999 ; GFX8-LABEL: v_fshl_i128_ssv:
5001 ; GFX8-NEXT: s_movk_i32 s8, 0x7f
5002 ; GFX8-NEXT: v_and_b32_e32 v6, s8, v0
5003 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
5004 ; GFX8-NEXT: v_and_b32_e32 v7, s8, v0
5005 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v6
5006 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
5007 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v6, s[2:3]
5008 ; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v6
5009 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], v6, s[0:1]
5010 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
5011 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
5012 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, s[0:1]
5013 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
5014 ; GFX8-NEXT: s_mov_b32 s8, 0
5015 ; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
5016 ; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
5017 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5018 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
5019 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
5020 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
5021 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
5022 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
5023 ; GFX8-NEXT: s_lshl_b32 s9, s6, 31
5024 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v0, v2, vcc
5025 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
5026 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
5027 ; GFX8-NEXT: s_lshr_b64 s[2:3], s[6:7], 1
5028 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v7
5029 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v7, s[0:1]
5030 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3]
5031 ; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v7
5032 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
5033 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
5034 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[2:3]
5035 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v7, s[2:3]
5036 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
5037 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5038 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
5039 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
5040 ; GFX8-NEXT: v_mov_b32_e32 v3, s1
5041 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v7
5042 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
5043 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
5044 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
5045 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
5046 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
5047 ; GFX8-NEXT: v_or_b32_e32 v1, v9, v1
5048 ; GFX8-NEXT: v_or_b32_e32 v2, v6, v2
5049 ; GFX8-NEXT: v_or_b32_e32 v3, v10, v3
5050 ; GFX8-NEXT: ; return to shader part epilog
5052 ; GFX9-LABEL: v_fshl_i128_ssv:
5054 ; GFX9-NEXT: s_movk_i32 s8, 0x7f
5055 ; GFX9-NEXT: v_and_b32_e32 v6, s8, v0
5056 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
5057 ; GFX9-NEXT: v_and_b32_e32 v7, s8, v0
5058 ; GFX9-NEXT: v_sub_u32_e32 v0, 64, v6
5059 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
5060 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v6, s[2:3]
5061 ; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v6
5062 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v6, s[0:1]
5063 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
5064 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
5065 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, s[0:1]
5066 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
5067 ; GFX9-NEXT: s_mov_b32 s8, 0
5068 ; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
5069 ; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
5070 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5071 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
5072 ; GFX9-NEXT: v_mov_b32_e32 v2, s2
5073 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
5074 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
5075 ; GFX9-NEXT: s_lshl_b32 s9, s6, 31
5076 ; GFX9-NEXT: v_mov_b32_e32 v3, s3
5077 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v2, vcc
5078 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
5079 ; GFX9-NEXT: s_lshr_b64 s[2:3], s[6:7], 1
5080 ; GFX9-NEXT: v_sub_u32_e32 v2, 64, v7
5081 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
5082 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v7, s[0:1]
5083 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3]
5084 ; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v7
5085 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
5086 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
5087 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[2:3]
5088 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v7, s[2:3]
5089 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
5090 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
5091 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
5092 ; GFX9-NEXT: v_mov_b32_e32 v2, s0
5093 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
5094 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v7
5095 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
5096 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
5097 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
5098 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
5099 ; GFX9-NEXT: v_or_b32_e32 v0, v8, v0
5100 ; GFX9-NEXT: v_or_b32_e32 v1, v9, v1
5101 ; GFX9-NEXT: v_or_b32_e32 v2, v6, v2
5102 ; GFX9-NEXT: v_or_b32_e32 v3, v10, v3
5103 ; GFX9-NEXT: ; return to shader part epilog
5105 ; GFX10-LABEL: v_fshl_i128_ssv:
5107 ; GFX10-NEXT: s_movk_i32 s9, 0x7f
5108 ; GFX10-NEXT: v_xor_b32_e32 v4, -1, v0
5109 ; GFX10-NEXT: v_and_b32_e32 v12, s9, v0
5110 ; GFX10-NEXT: s_mov_b32 s8, 0
5111 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
5112 ; GFX10-NEXT: v_and_b32_e32 v13, s9, v4
5113 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v12
5114 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[2:3]
5115 ; GFX10-NEXT: s_lshl_b32 s9, s6, 31
5116 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
5117 ; GFX10-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
5118 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
5119 ; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
5120 ; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 64, v13
5121 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], v13, s[8:9]
5122 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12
5123 ; GFX10-NEXT: v_lshlrev_b64 v[4:5], v12, s[0:1]
5124 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v0
5125 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, 64, v13
5126 ; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
5127 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v1
5128 ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13
5129 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 0, v13
5130 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v0, s[6:7]
5131 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7]
5132 ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 0, v12
5133 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc_lo
5134 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc_lo
5135 ; GFX10-NEXT: v_or_b32_e32 v6, v6, v8
5136 ; GFX10-NEXT: v_or_b32_e32 v7, v7, v9
5137 ; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
5138 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo
5139 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7]
5140 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s0
5141 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s0
5142 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v8, s2, s4
5143 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, s3, s4
5144 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s8, s1
5145 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s9, s1
5146 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0
5147 ; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0
5148 ; GFX10-NEXT: v_or_b32_e32 v0, v4, v0
5149 ; GFX10-NEXT: v_or_b32_e32 v1, v5, v1
5150 ; GFX10-NEXT: v_or_b32_e32 v2, v6, v2
5151 ; GFX10-NEXT: v_or_b32_e32 v3, v7, v3
5152 ; GFX10-NEXT: ; return to shader part epilog
5153 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
5154 %cast.result = bitcast i128 %result to <4 x float>
5155 ret <4 x float> %cast.result
5158 define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) {
5159 ; GFX6-LABEL: v_fshl_i128_svs:
5161 ; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f
5162 ; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7]
5163 ; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5]
5164 ; GFX6-NEXT: s_sub_i32 s5, s8, 64
5165 ; GFX6-NEXT: s_sub_i32 s9, 64, s8
5166 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64
5167 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0
5168 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0
5169 ; GFX6-NEXT: s_cselect_b32 s13, 1, 0
5170 ; GFX6-NEXT: s_lshl_b64 s[6:7], s[0:1], s8
5171 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[0:1], s9
5172 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
5173 ; GFX6-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
5174 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s5
5175 ; GFX6-NEXT: s_cmp_lg_u32 s12, 0
5176 ; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
5177 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
5178 ; GFX6-NEXT: s_cmp_lg_u32 s13, 0
5179 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 1
5180 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1]
5181 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 31, v2
5182 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 1
5183 ; GFX6-NEXT: s_sub_i32 s2, s4, 64
5184 ; GFX6-NEXT: s_sub_i32 s3, 64, s4
5185 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
5186 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64
5187 ; GFX6-NEXT: s_cselect_b32 s5, 1, 0
5188 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0
5189 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s4
5190 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s3
5191 ; GFX6-NEXT: s_cselect_b32 s8, 1, 0
5192 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s4
5193 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s2
5194 ; GFX6-NEXT: s_and_b32 s2, 1, s5
5195 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6
5196 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7
5197 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
5198 ; GFX6-NEXT: s_and_b32 s2, 1, s8
5199 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
5200 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
5201 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
5202 ; GFX6-NEXT: s_and_b32 s2, 1, s5
5203 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
5204 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
5205 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
5206 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
5207 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
5208 ; GFX6-NEXT: v_or_b32_e32 v0, s6, v0
5209 ; GFX6-NEXT: v_or_b32_e32 v1, s7, v1
5210 ; GFX6-NEXT: v_or_b32_e32 v2, s0, v2
5211 ; GFX6-NEXT: v_or_b32_e32 v3, s1, v3
5212 ; GFX6-NEXT: ; return to shader part epilog
5214 ; GFX8-LABEL: v_fshl_i128_svs:
5216 ; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f
5217 ; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7]
5218 ; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5]
5219 ; GFX8-NEXT: s_sub_i32 s5, s8, 64
5220 ; GFX8-NEXT: s_sub_i32 s9, 64, s8
5221 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64
5222 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0
5223 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0
5224 ; GFX8-NEXT: s_cselect_b32 s13, 1, 0
5225 ; GFX8-NEXT: s_lshl_b64 s[6:7], s[0:1], s8
5226 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[0:1], s9
5227 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
5228 ; GFX8-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
5229 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s5
5230 ; GFX8-NEXT: s_cmp_lg_u32 s12, 0
5231 ; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
5232 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
5233 ; GFX8-NEXT: s_cmp_lg_u32 s13, 0
5234 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
5235 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1]
5236 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 31, v2
5237 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
5238 ; GFX8-NEXT: s_sub_i32 s2, s4, 64
5239 ; GFX8-NEXT: s_sub_i32 s3, 64, s4
5240 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
5241 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64
5242 ; GFX8-NEXT: s_cselect_b32 s5, 1, 0
5243 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0
5244 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1]
5245 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s3, v[2:3]
5246 ; GFX8-NEXT: s_cselect_b32 s8, 1, 0
5247 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], s4, v[2:3]
5248 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], s2, v[2:3]
5249 ; GFX8-NEXT: s_and_b32 s2, 1, s5
5250 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6
5251 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7
5252 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
5253 ; GFX8-NEXT: s_and_b32 s2, 1, s8
5254 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
5255 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
5256 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
5257 ; GFX8-NEXT: s_and_b32 s2, 1, s5
5258 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
5259 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
5260 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
5261 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
5262 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
5263 ; GFX8-NEXT: v_or_b32_e32 v0, s6, v0
5264 ; GFX8-NEXT: v_or_b32_e32 v1, s7, v1
5265 ; GFX8-NEXT: v_or_b32_e32 v2, s0, v2
5266 ; GFX8-NEXT: v_or_b32_e32 v3, s1, v3
5267 ; GFX8-NEXT: ; return to shader part epilog
5269 ; GFX9-LABEL: v_fshl_i128_svs:
5271 ; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f
5272 ; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7]
5273 ; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5]
5274 ; GFX9-NEXT: s_sub_i32 s5, s8, 64
5275 ; GFX9-NEXT: s_sub_i32 s9, 64, s8
5276 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64
5277 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0
5278 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0
5279 ; GFX9-NEXT: s_cselect_b32 s13, 1, 0
5280 ; GFX9-NEXT: s_lshl_b64 s[6:7], s[0:1], s8
5281 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[0:1], s9
5282 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
5283 ; GFX9-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9]
5284 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s5
5285 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0
5286 ; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
5287 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
5288 ; GFX9-NEXT: s_cmp_lg_u32 s13, 0
5289 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
5290 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[2:3], s[0:1]
5291 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 31, v2
5292 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
5293 ; GFX9-NEXT: s_sub_i32 s2, s4, 64
5294 ; GFX9-NEXT: s_sub_i32 s3, 64, s4
5295 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v4
5296 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64
5297 ; GFX9-NEXT: s_cselect_b32 s5, 1, 0
5298 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0
5299 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1]
5300 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s3, v[2:3]
5301 ; GFX9-NEXT: s_cselect_b32 s8, 1, 0
5302 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], s4, v[2:3]
5303 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], s2, v[2:3]
5304 ; GFX9-NEXT: s_and_b32 s2, 1, s5
5305 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6
5306 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7
5307 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
5308 ; GFX9-NEXT: s_and_b32 s2, 1, s8
5309 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
5310 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
5311 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
5312 ; GFX9-NEXT: s_and_b32 s2, 1, s5
5313 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
5314 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
5315 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2
5316 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
5317 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
5318 ; GFX9-NEXT: v_or_b32_e32 v0, s6, v0
5319 ; GFX9-NEXT: v_or_b32_e32 v1, s7, v1
5320 ; GFX9-NEXT: v_or_b32_e32 v2, s0, v2
5321 ; GFX9-NEXT: v_or_b32_e32 v3, s1, v3
5322 ; GFX9-NEXT: ; return to shader part epilog
5324 ; GFX10-LABEL: v_fshl_i128_svs:
5326 ; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f
5327 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
5328 ; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7]
5329 ; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5]
5330 ; GFX10-NEXT: s_sub_i32 s5, s8, 64
5331 ; GFX10-NEXT: s_sub_i32 s6, 64, s8
5332 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64
5333 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 31, v2
5334 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0
5335 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0
5336 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
5337 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0
5338 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s6
5339 ; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s8
5340 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
5341 ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
5342 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s5
5343 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0
5344 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v4
5345 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
5346 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1]
5347 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0
5348 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
5349 ; GFX10-NEXT: s_sub_i32 s0, 64, s4
5350 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1]
5351 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3]
5352 ; GFX10-NEXT: s_sub_i32 s0, s4, 64
5353 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64
5354 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3]
5355 ; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0
5356 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0
5357 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6
5358 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7
5359 ; GFX10-NEXT: s_cselect_b32 s0, 1, 0
5360 ; GFX10-NEXT: s_and_b32 s1, 1, vcc_lo
5361 ; GFX10-NEXT: s_and_b32 s0, 1, s0
5362 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3]
5363 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo
5364 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo
5365 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
5366 ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1
5367 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo
5368 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo
5369 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0
5370 ; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0
5371 ; GFX10-NEXT: v_or_b32_e32 v0, s8, v0
5372 ; GFX10-NEXT: v_or_b32_e32 v1, s9, v1
5373 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2
5374 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3
5375 ; GFX10-NEXT: ; return to shader part epilog
5376 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
5377 %cast.result = bitcast i128 %result to <4 x float>
5378 ret <4 x float> %cast.result
5381 define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) {
5382 ; GFX6-LABEL: v_fshl_i128_vss:
5384 ; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f
5385 ; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7]
5386 ; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5]
5387 ; GFX6-NEXT: s_sub_i32 s5, s8, 64
5388 ; GFX6-NEXT: s_sub_i32 s6, 64, s8
5389 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64
5390 ; GFX6-NEXT: s_cselect_b32 s9, 1, 0
5391 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0
5392 ; GFX6-NEXT: s_cselect_b32 s10, 1, 0
5393 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s6
5394 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s8
5395 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], s8
5396 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s5
5397 ; GFX6-NEXT: s_and_b32 s5, 1, s9
5398 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
5399 ; GFX6-NEXT: s_lshl_b32 s9, s2, 31
5400 ; GFX6-NEXT: s_mov_b32 s8, s7
5401 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
5402 ; GFX6-NEXT: s_and_b32 s5, 1, s10
5403 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
5404 ; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
5405 ; GFX6-NEXT: s_sub_i32 s10, s4, 64
5406 ; GFX6-NEXT: s_sub_i32 s8, 64, s4
5407 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64
5408 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6
5409 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7
5410 ; GFX6-NEXT: s_cselect_b32 s11, 1, 0
5411 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0
5412 ; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
5413 ; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
5414 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
5415 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
5416 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
5417 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0
5418 ; GFX6-NEXT: s_lshr_b64 s[6:7], s[2:3], s4
5419 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[0:1], s4
5420 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
5421 ; GFX6-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
5422 ; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
5423 ; GFX6-NEXT: s_cmp_lg_u32 s11, 0
5424 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
5425 ; GFX6-NEXT: s_cmp_lg_u32 s12, 0
5426 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
5427 ; GFX6-NEXT: s_cmp_lg_u32 s11, 0
5428 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
5429 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
5430 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
5431 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v6
5432 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v7
5433 ; GFX6-NEXT: v_or_b32_e32 v2, s2, v2
5434 ; GFX6-NEXT: v_or_b32_e32 v3, s3, v3
5435 ; GFX6-NEXT: ; return to shader part epilog
5437 ; GFX8-LABEL: v_fshl_i128_vss:
5439 ; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f
5440 ; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7]
5441 ; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5]
5442 ; GFX8-NEXT: s_sub_i32 s5, s8, 64
5443 ; GFX8-NEXT: s_sub_i32 s6, 64, s8
5444 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64
5445 ; GFX8-NEXT: s_cselect_b32 s9, 1, 0
5446 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0
5447 ; GFX8-NEXT: s_cselect_b32 s10, 1, 0
5448 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1]
5449 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3]
5450 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1]
5451 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
5452 ; GFX8-NEXT: s_and_b32 s5, 1, s9
5453 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
5454 ; GFX8-NEXT: s_lshl_b32 s9, s2, 31
5455 ; GFX8-NEXT: s_mov_b32 s8, s7
5456 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
5457 ; GFX8-NEXT: s_and_b32 s5, 1, s10
5458 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
5459 ; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
5460 ; GFX8-NEXT: s_sub_i32 s10, s4, 64
5461 ; GFX8-NEXT: s_sub_i32 s8, 64, s4
5462 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64
5463 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6
5464 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7
5465 ; GFX8-NEXT: s_cselect_b32 s11, 1, 0
5466 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0
5467 ; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
5468 ; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
5469 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
5470 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
5471 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
5472 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0
5473 ; GFX8-NEXT: s_lshr_b64 s[6:7], s[2:3], s4
5474 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[0:1], s4
5475 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
5476 ; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
5477 ; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
5478 ; GFX8-NEXT: s_cmp_lg_u32 s11, 0
5479 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
5480 ; GFX8-NEXT: s_cmp_lg_u32 s12, 0
5481 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
5482 ; GFX8-NEXT: s_cmp_lg_u32 s11, 0
5483 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
5484 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
5485 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
5486 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v6
5487 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v7
5488 ; GFX8-NEXT: v_or_b32_e32 v2, s2, v2
5489 ; GFX8-NEXT: v_or_b32_e32 v3, s3, v3
5490 ; GFX8-NEXT: ; return to shader part epilog
5492 ; GFX9-LABEL: v_fshl_i128_vss:
5494 ; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f
5495 ; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7]
5496 ; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5]
5497 ; GFX9-NEXT: s_sub_i32 s5, s8, 64
5498 ; GFX9-NEXT: s_sub_i32 s6, 64, s8
5499 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64
5500 ; GFX9-NEXT: s_cselect_b32 s9, 1, 0
5501 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0
5502 ; GFX9-NEXT: s_cselect_b32 s10, 1, 0
5503 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1]
5504 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3]
5505 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1]
5506 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
5507 ; GFX9-NEXT: s_and_b32 s5, 1, s9
5508 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
5509 ; GFX9-NEXT: s_lshl_b32 s9, s2, 31
5510 ; GFX9-NEXT: s_mov_b32 s8, s7
5511 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
5512 ; GFX9-NEXT: s_and_b32 s5, 1, s10
5513 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
5514 ; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
5515 ; GFX9-NEXT: s_sub_i32 s10, s4, 64
5516 ; GFX9-NEXT: s_sub_i32 s8, 64, s4
5517 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64
5518 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6
5519 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7
5520 ; GFX9-NEXT: s_cselect_b32 s11, 1, 0
5521 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0
5522 ; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
5523 ; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
5524 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
5525 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
5526 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
5527 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0
5528 ; GFX9-NEXT: s_lshr_b64 s[6:7], s[2:3], s4
5529 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[0:1], s4
5530 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
5531 ; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
5532 ; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
5533 ; GFX9-NEXT: s_cmp_lg_u32 s11, 0
5534 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
5535 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0
5536 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
5537 ; GFX9-NEXT: s_cmp_lg_u32 s11, 0
5538 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
5539 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
5540 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
5541 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v6
5542 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v7
5543 ; GFX9-NEXT: v_or_b32_e32 v2, s2, v2
5544 ; GFX9-NEXT: v_or_b32_e32 v3, s3, v3
5545 ; GFX9-NEXT: ; return to shader part epilog
5547 ; GFX10-LABEL: v_fshl_i128_vss:
5549 ; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f
5550 ; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7]
5551 ; GFX10-NEXT: s_andn2_b64 s[10:11], s[6:7], s[4:5]
5552 ; GFX10-NEXT: s_sub_i32 s4, 64, s8
5553 ; GFX10-NEXT: s_sub_i32 s5, s8, 64
5554 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1]
5555 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s8, v[2:3]
5556 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64
5557 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], s8, v[0:1]
5558 ; GFX10-NEXT: s_cselect_b32 vcc_lo, 1, 0
5559 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0
5560 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
5561 ; GFX10-NEXT: s_cselect_b32 s6, 1, 0
5562 ; GFX10-NEXT: s_and_b32 s4, 1, vcc_lo
5563 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6
5564 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7
5565 ; GFX10-NEXT: v_cmp_ne_u32_e64 s4, 0, s4
5566 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
5567 ; GFX10-NEXT: s_lshl_b32 s5, s2, 31
5568 ; GFX10-NEXT: s_and_b32 s6, 1, s6
5569 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
5570 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s4
5571 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s4
5572 ; GFX10-NEXT: s_mov_b32 s4, s7
5573 ; GFX10-NEXT: s_sub_i32 s11, s10, 64
5574 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5575 ; GFX10-NEXT: s_sub_i32 s7, 64, s10
5576 ; GFX10-NEXT: s_cmp_lt_u32 s10, 64
5577 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo
5578 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0
5579 ; GFX10-NEXT: s_cmp_eq_u32 s10, 0
5580 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo
5581 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6
5582 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0
5583 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s10
5584 ; GFX10-NEXT: s_lshl_b64 s[6:7], s[2:3], s7
5585 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[2:3], s10
5586 ; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
5587 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s11
5588 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0
5589 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo
5590 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
5591 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0
5592 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo
5593 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
5594 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0
5595 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v6
5596 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[8:9], 0
5597 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v7
5598 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2
5599 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3
5600 ; GFX10-NEXT: ; return to shader part epilog
5601 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
5602 %cast.result = bitcast i128 %result to <4 x float>
5603 ret <4 x float> %cast.result
5606 define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
5607 ; GFX6-LABEL: s_fshl_i128_65:
5609 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
5610 ; GFX6-NEXT: s_lshr_b32 s4, s5, 31
5611 ; GFX6-NEXT: s_mov_b32 s5, 0
5612 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
5613 ; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
5614 ; GFX6-NEXT: s_lshr_b32 s4, s7, 31
5615 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5616 ; GFX6-NEXT: ; return to shader part epilog
5618 ; GFX8-LABEL: s_fshl_i128_65:
5620 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
5621 ; GFX8-NEXT: s_lshr_b32 s4, s5, 31
5622 ; GFX8-NEXT: s_mov_b32 s5, 0
5623 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
5624 ; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
5625 ; GFX8-NEXT: s_lshr_b32 s4, s7, 31
5626 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5627 ; GFX8-NEXT: ; return to shader part epilog
5629 ; GFX9-LABEL: s_fshl_i128_65:
5631 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
5632 ; GFX9-NEXT: s_lshr_b32 s4, s5, 31
5633 ; GFX9-NEXT: s_mov_b32 s5, 0
5634 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
5635 ; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
5636 ; GFX9-NEXT: s_lshr_b32 s4, s7, 31
5637 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5638 ; GFX9-NEXT: ; return to shader part epilog
5640 ; GFX10-LABEL: s_fshl_i128_65:
5642 ; GFX10-NEXT: s_lshr_b32 s2, s5, 31
5643 ; GFX10-NEXT: s_mov_b32 s3, 0
5644 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
5645 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
5646 ; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
5647 ; GFX10-NEXT: s_lshr_b32 s2, s7, 31
5648 ; GFX10-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
5649 ; GFX10-NEXT: ; return to shader part epilog
5650 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 65)
5654 define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
5655 ; GFX6-LABEL: v_fshl_i128_65:
5657 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5658 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[0:1], 1
5659 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[6:7], 1
5660 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5
5661 ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0
5662 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v7
5663 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
5664 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5666 ; GFX8-LABEL: v_fshl_i128_65:
5668 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5669 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
5670 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
5671 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5
5672 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
5673 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v7
5674 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
5675 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5677 ; GFX9-LABEL: v_fshl_i128_65:
5679 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5680 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
5681 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
5682 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5
5683 ; GFX9-NEXT: v_or_b32_e32 v0, v4, v0
5684 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v7
5685 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v4
5686 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5688 ; GFX10-LABEL: v_fshl_i128_65:
5690 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5691 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
5692 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
5693 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
5694 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v5
5695 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 31, v7
5696 ; GFX10-NEXT: v_or_b32_e32 v0, v4, v0
5697 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v5
5698 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5699 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 65)
5703 define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) {
5704 ; GFX6-LABEL: s_fshl_v2i128:
5706 ; GFX6-NEXT: s_mov_b64 s[18:19], 0x7f
5707 ; GFX6-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19]
5708 ; GFX6-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17]
5709 ; GFX6-NEXT: s_sub_i32 s17, s22, 64
5710 ; GFX6-NEXT: s_sub_i32 s23, 64, s22
5711 ; GFX6-NEXT: s_cmp_lt_u32 s22, 64
5712 ; GFX6-NEXT: s_cselect_b32 s28, 1, 0
5713 ; GFX6-NEXT: s_cmp_eq_u32 s22, 0
5714 ; GFX6-NEXT: s_cselect_b32 s29, 1, 0
5715 ; GFX6-NEXT: s_lshl_b64 s[24:25], s[0:1], s22
5716 ; GFX6-NEXT: s_lshr_b64 s[26:27], s[0:1], s23
5717 ; GFX6-NEXT: s_lshl_b64 s[22:23], s[2:3], s22
5718 ; GFX6-NEXT: s_or_b64 s[22:23], s[26:27], s[22:23]
5719 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s17
5720 ; GFX6-NEXT: s_cmp_lg_u32 s28, 0
5721 ; GFX6-NEXT: s_cselect_b64 s[24:25], s[24:25], 0
5722 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[22:23], s[0:1]
5723 ; GFX6-NEXT: s_cmp_lg_u32 s29, 0
5724 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
5725 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[8:9], 1
5726 ; GFX6-NEXT: s_lshl_b32 s9, s10, 31
5727 ; GFX6-NEXT: s_mov_b32 s8, s19
5728 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
5729 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[10:11], 1
5730 ; GFX6-NEXT: s_sub_i32 s26, s16, 64
5731 ; GFX6-NEXT: s_sub_i32 s22, 64, s16
5732 ; GFX6-NEXT: s_cmp_lt_u32 s16, 64
5733 ; GFX6-NEXT: s_cselect_b32 s27, 1, 0
5734 ; GFX6-NEXT: s_cmp_eq_u32 s16, 0
5735 ; GFX6-NEXT: s_cselect_b32 s28, 1, 0
5736 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[8:9], s16
5737 ; GFX6-NEXT: s_lshr_b64 s[16:17], s[0:1], s16
5738 ; GFX6-NEXT: s_lshl_b64 s[22:23], s[8:9], s22
5739 ; GFX6-NEXT: s_or_b64 s[16:17], s[16:17], s[22:23]
5740 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[8:9], s26
5741 ; GFX6-NEXT: s_cmp_lg_u32 s27, 0
5742 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9]
5743 ; GFX6-NEXT: s_cmp_lg_u32 s28, 0
5744 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9]
5745 ; GFX6-NEXT: s_cmp_lg_u32 s27, 0
5746 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[10:11], 0
5747 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
5748 ; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19]
5749 ; GFX6-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21]
5750 ; GFX6-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1]
5751 ; GFX6-NEXT: s_sub_i32 s11, s8, 64
5752 ; GFX6-NEXT: s_sub_i32 s9, 64, s8
5753 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64
5754 ; GFX6-NEXT: s_cselect_b32 s18, 1, 0
5755 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0
5756 ; GFX6-NEXT: s_cselect_b32 s22, 1, 0
5757 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], s8
5758 ; GFX6-NEXT: s_lshr_b64 s[20:21], s[4:5], s9
5759 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[6:7], s8
5760 ; GFX6-NEXT: s_or_b64 s[8:9], s[20:21], s[8:9]
5761 ; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s11
5762 ; GFX6-NEXT: s_cmp_lg_u32 s18, 0
5763 ; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
5764 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
5765 ; GFX6-NEXT: s_cmp_lg_u32 s22, 0
5766 ; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
5767 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[12:13], 1
5768 ; GFX6-NEXT: s_lshl_b32 s9, s14, 31
5769 ; GFX6-NEXT: s_mov_b32 s8, s19
5770 ; GFX6-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
5771 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[14:15], 1
5772 ; GFX6-NEXT: s_sub_i32 s18, s10, 64
5773 ; GFX6-NEXT: s_sub_i32 s14, 64, s10
5774 ; GFX6-NEXT: s_cmp_lt_u32 s10, 64
5775 ; GFX6-NEXT: s_cselect_b32 s19, 1, 0
5776 ; GFX6-NEXT: s_cmp_eq_u32 s10, 0
5777 ; GFX6-NEXT: s_cselect_b32 s20, 1, 0
5778 ; GFX6-NEXT: s_lshr_b64 s[12:13], s[8:9], s10
5779 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
5780 ; GFX6-NEXT: s_lshl_b64 s[14:15], s[8:9], s14
5781 ; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15]
5782 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[8:9], s18
5783 ; GFX6-NEXT: s_cmp_lg_u32 s19, 0
5784 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9]
5785 ; GFX6-NEXT: s_cmp_lg_u32 s20, 0
5786 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
5787 ; GFX6-NEXT: s_cmp_lg_u32 s19, 0
5788 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[12:13], 0
5789 ; GFX6-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5]
5790 ; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
5791 ; GFX6-NEXT: ; return to shader part epilog
5793 ; GFX8-LABEL: s_fshl_v2i128:
5795 ; GFX8-NEXT: s_mov_b64 s[18:19], 0x7f
5796 ; GFX8-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19]
5797 ; GFX8-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17]
5798 ; GFX8-NEXT: s_sub_i32 s17, s22, 64
5799 ; GFX8-NEXT: s_sub_i32 s23, 64, s22
5800 ; GFX8-NEXT: s_cmp_lt_u32 s22, 64
5801 ; GFX8-NEXT: s_cselect_b32 s28, 1, 0
5802 ; GFX8-NEXT: s_cmp_eq_u32 s22, 0
5803 ; GFX8-NEXT: s_cselect_b32 s29, 1, 0
5804 ; GFX8-NEXT: s_lshl_b64 s[24:25], s[0:1], s22
5805 ; GFX8-NEXT: s_lshr_b64 s[26:27], s[0:1], s23
5806 ; GFX8-NEXT: s_lshl_b64 s[22:23], s[2:3], s22
5807 ; GFX8-NEXT: s_or_b64 s[22:23], s[26:27], s[22:23]
5808 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s17
5809 ; GFX8-NEXT: s_cmp_lg_u32 s28, 0
5810 ; GFX8-NEXT: s_cselect_b64 s[24:25], s[24:25], 0
5811 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[22:23], s[0:1]
5812 ; GFX8-NEXT: s_cmp_lg_u32 s29, 0
5813 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
5814 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[8:9], 1
5815 ; GFX8-NEXT: s_lshl_b32 s9, s10, 31
5816 ; GFX8-NEXT: s_mov_b32 s8, s19
5817 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
5818 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[10:11], 1
5819 ; GFX8-NEXT: s_sub_i32 s26, s16, 64
5820 ; GFX8-NEXT: s_sub_i32 s22, 64, s16
5821 ; GFX8-NEXT: s_cmp_lt_u32 s16, 64
5822 ; GFX8-NEXT: s_cselect_b32 s27, 1, 0
5823 ; GFX8-NEXT: s_cmp_eq_u32 s16, 0
5824 ; GFX8-NEXT: s_cselect_b32 s28, 1, 0
5825 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[8:9], s16
5826 ; GFX8-NEXT: s_lshr_b64 s[16:17], s[0:1], s16
5827 ; GFX8-NEXT: s_lshl_b64 s[22:23], s[8:9], s22
5828 ; GFX8-NEXT: s_or_b64 s[16:17], s[16:17], s[22:23]
5829 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[8:9], s26
5830 ; GFX8-NEXT: s_cmp_lg_u32 s27, 0
5831 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9]
5832 ; GFX8-NEXT: s_cmp_lg_u32 s28, 0
5833 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9]
5834 ; GFX8-NEXT: s_cmp_lg_u32 s27, 0
5835 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[10:11], 0
5836 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
5837 ; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19]
5838 ; GFX8-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21]
5839 ; GFX8-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1]
5840 ; GFX8-NEXT: s_sub_i32 s11, s8, 64
5841 ; GFX8-NEXT: s_sub_i32 s9, 64, s8
5842 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64
5843 ; GFX8-NEXT: s_cselect_b32 s18, 1, 0
5844 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0
5845 ; GFX8-NEXT: s_cselect_b32 s22, 1, 0
5846 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], s8
5847 ; GFX8-NEXT: s_lshr_b64 s[20:21], s[4:5], s9
5848 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[6:7], s8
5849 ; GFX8-NEXT: s_or_b64 s[8:9], s[20:21], s[8:9]
5850 ; GFX8-NEXT: s_lshl_b64 s[4:5], s[4:5], s11
5851 ; GFX8-NEXT: s_cmp_lg_u32 s18, 0
5852 ; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
5853 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
5854 ; GFX8-NEXT: s_cmp_lg_u32 s22, 0
5855 ; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
5856 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[12:13], 1
5857 ; GFX8-NEXT: s_lshl_b32 s9, s14, 31
5858 ; GFX8-NEXT: s_mov_b32 s8, s19
5859 ; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
5860 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[14:15], 1
5861 ; GFX8-NEXT: s_sub_i32 s18, s10, 64
5862 ; GFX8-NEXT: s_sub_i32 s14, 64, s10
5863 ; GFX8-NEXT: s_cmp_lt_u32 s10, 64
5864 ; GFX8-NEXT: s_cselect_b32 s19, 1, 0
5865 ; GFX8-NEXT: s_cmp_eq_u32 s10, 0
5866 ; GFX8-NEXT: s_cselect_b32 s20, 1, 0
5867 ; GFX8-NEXT: s_lshr_b64 s[12:13], s[8:9], s10
5868 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
5869 ; GFX8-NEXT: s_lshl_b64 s[14:15], s[8:9], s14
5870 ; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15]
5871 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[8:9], s18
5872 ; GFX8-NEXT: s_cmp_lg_u32 s19, 0
5873 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9]
5874 ; GFX8-NEXT: s_cmp_lg_u32 s20, 0
5875 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
5876 ; GFX8-NEXT: s_cmp_lg_u32 s19, 0
5877 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[12:13], 0
5878 ; GFX8-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5]
5879 ; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
5880 ; GFX8-NEXT: ; return to shader part epilog
5882 ; GFX9-LABEL: s_fshl_v2i128:
5884 ; GFX9-NEXT: s_mov_b64 s[18:19], 0x7f
5885 ; GFX9-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19]
5886 ; GFX9-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17]
5887 ; GFX9-NEXT: s_sub_i32 s17, s22, 64
5888 ; GFX9-NEXT: s_sub_i32 s23, 64, s22
5889 ; GFX9-NEXT: s_cmp_lt_u32 s22, 64
5890 ; GFX9-NEXT: s_cselect_b32 s28, 1, 0
5891 ; GFX9-NEXT: s_cmp_eq_u32 s22, 0
5892 ; GFX9-NEXT: s_cselect_b32 s29, 1, 0
5893 ; GFX9-NEXT: s_lshl_b64 s[24:25], s[0:1], s22
5894 ; GFX9-NEXT: s_lshr_b64 s[26:27], s[0:1], s23
5895 ; GFX9-NEXT: s_lshl_b64 s[22:23], s[2:3], s22
5896 ; GFX9-NEXT: s_or_b64 s[22:23], s[26:27], s[22:23]
5897 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s17
5898 ; GFX9-NEXT: s_cmp_lg_u32 s28, 0
5899 ; GFX9-NEXT: s_cselect_b64 s[24:25], s[24:25], 0
5900 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[22:23], s[0:1]
5901 ; GFX9-NEXT: s_cmp_lg_u32 s29, 0
5902 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
5903 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[8:9], 1
5904 ; GFX9-NEXT: s_lshl_b32 s9, s10, 31
5905 ; GFX9-NEXT: s_mov_b32 s8, s19
5906 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
5907 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[10:11], 1
5908 ; GFX9-NEXT: s_sub_i32 s26, s16, 64
5909 ; GFX9-NEXT: s_sub_i32 s22, 64, s16
5910 ; GFX9-NEXT: s_cmp_lt_u32 s16, 64
5911 ; GFX9-NEXT: s_cselect_b32 s27, 1, 0
5912 ; GFX9-NEXT: s_cmp_eq_u32 s16, 0
5913 ; GFX9-NEXT: s_cselect_b32 s28, 1, 0
5914 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[8:9], s16
5915 ; GFX9-NEXT: s_lshr_b64 s[16:17], s[0:1], s16
5916 ; GFX9-NEXT: s_lshl_b64 s[22:23], s[8:9], s22
5917 ; GFX9-NEXT: s_or_b64 s[16:17], s[16:17], s[22:23]
5918 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[8:9], s26
5919 ; GFX9-NEXT: s_cmp_lg_u32 s27, 0
5920 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9]
5921 ; GFX9-NEXT: s_cmp_lg_u32 s28, 0
5922 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9]
5923 ; GFX9-NEXT: s_cmp_lg_u32 s27, 0
5924 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[10:11], 0
5925 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
5926 ; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19]
5927 ; GFX9-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21]
5928 ; GFX9-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1]
5929 ; GFX9-NEXT: s_sub_i32 s11, s8, 64
5930 ; GFX9-NEXT: s_sub_i32 s9, 64, s8
5931 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64
5932 ; GFX9-NEXT: s_cselect_b32 s18, 1, 0
5933 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0
5934 ; GFX9-NEXT: s_cselect_b32 s22, 1, 0
5935 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], s8
5936 ; GFX9-NEXT: s_lshr_b64 s[20:21], s[4:5], s9
5937 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[6:7], s8
5938 ; GFX9-NEXT: s_or_b64 s[8:9], s[20:21], s[8:9]
5939 ; GFX9-NEXT: s_lshl_b64 s[4:5], s[4:5], s11
5940 ; GFX9-NEXT: s_cmp_lg_u32 s18, 0
5941 ; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
5942 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
5943 ; GFX9-NEXT: s_cmp_lg_u32 s22, 0
5944 ; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
5945 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[12:13], 1
5946 ; GFX9-NEXT: s_lshl_b32 s9, s14, 31
5947 ; GFX9-NEXT: s_mov_b32 s8, s19
5948 ; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
5949 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[14:15], 1
5950 ; GFX9-NEXT: s_sub_i32 s18, s10, 64
5951 ; GFX9-NEXT: s_sub_i32 s14, 64, s10
5952 ; GFX9-NEXT: s_cmp_lt_u32 s10, 64
5953 ; GFX9-NEXT: s_cselect_b32 s19, 1, 0
5954 ; GFX9-NEXT: s_cmp_eq_u32 s10, 0
5955 ; GFX9-NEXT: s_cselect_b32 s20, 1, 0
5956 ; GFX9-NEXT: s_lshr_b64 s[12:13], s[8:9], s10
5957 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
5958 ; GFX9-NEXT: s_lshl_b64 s[14:15], s[8:9], s14
5959 ; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15]
5960 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[8:9], s18
5961 ; GFX9-NEXT: s_cmp_lg_u32 s19, 0
5962 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9]
5963 ; GFX9-NEXT: s_cmp_lg_u32 s20, 0
5964 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
5965 ; GFX9-NEXT: s_cmp_lg_u32 s19, 0
5966 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[12:13], 0
5967 ; GFX9-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5]
5968 ; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
5969 ; GFX9-NEXT: ; return to shader part epilog
5971 ; GFX10-LABEL: s_fshl_v2i128:
5973 ; GFX10-NEXT: s_mov_b64 s[18:19], 0x7f
5974 ; GFX10-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19]
5975 ; GFX10-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17]
5976 ; GFX10-NEXT: s_sub_i32 s17, s22, 64
5977 ; GFX10-NEXT: s_sub_i32 s23, 64, s22
5978 ; GFX10-NEXT: s_cmp_lt_u32 s22, 64
5979 ; GFX10-NEXT: s_cselect_b32 s28, 1, 0
5980 ; GFX10-NEXT: s_cmp_eq_u32 s22, 0
5981 ; GFX10-NEXT: s_cselect_b32 s29, 1, 0
5982 ; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s23
5983 ; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], s22
5984 ; GFX10-NEXT: s_lshl_b64 s[22:23], s[0:1], s22
5985 ; GFX10-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27]
5986 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s17
5987 ; GFX10-NEXT: s_cmp_lg_u32 s28, 0
5988 ; GFX10-NEXT: s_cselect_b64 s[22:23], s[22:23], 0
5989 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1]
5990 ; GFX10-NEXT: s_cmp_lg_u32 s29, 0
5991 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
5992 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], 1
5993 ; GFX10-NEXT: s_lshl_b32 s9, s10, 31
5994 ; GFX10-NEXT: s_mov_b32 s8, s19
5995 ; GFX10-NEXT: s_sub_i32 s26, s16, 64
5996 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
5997 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[10:11], 1
5998 ; GFX10-NEXT: s_sub_i32 s17, 64, s16
5999 ; GFX10-NEXT: s_cmp_lt_u32 s16, 64
6000 ; GFX10-NEXT: s_cselect_b32 s27, 1, 0
6001 ; GFX10-NEXT: s_cmp_eq_u32 s16, 0
6002 ; GFX10-NEXT: s_cselect_b32 s28, 1, 0
6003 ; GFX10-NEXT: s_lshr_b64 s[10:11], s[0:1], s16
6004 ; GFX10-NEXT: s_lshl_b64 s[24:25], s[8:9], s17
6005 ; GFX10-NEXT: s_lshr_b64 s[16:17], s[8:9], s16
6006 ; GFX10-NEXT: s_or_b64 s[10:11], s[10:11], s[24:25]
6007 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[8:9], s26
6008 ; GFX10-NEXT: s_cmp_lg_u32 s27, 0
6009 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9]
6010 ; GFX10-NEXT: s_cmp_lg_u32 s28, 0
6011 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9]
6012 ; GFX10-NEXT: s_cmp_lg_u32 s27, 0
6013 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[16:17], 0
6014 ; GFX10-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21]
6015 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
6016 ; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19]
6017 ; GFX10-NEXT: s_or_b64 s[0:1], s[22:23], s[0:1]
6018 ; GFX10-NEXT: s_sub_i32 s11, s8, 64
6019 ; GFX10-NEXT: s_sub_i32 s9, 64, s8
6020 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64
6021 ; GFX10-NEXT: s_cselect_b32 s18, 1, 0
6022 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0
6023 ; GFX10-NEXT: s_cselect_b32 s22, 1, 0
6024 ; GFX10-NEXT: s_lshr_b64 s[16:17], s[4:5], s9
6025 ; GFX10-NEXT: s_lshl_b64 s[20:21], s[6:7], s8
6026 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
6027 ; GFX10-NEXT: s_or_b64 s[16:17], s[16:17], s[20:21]
6028 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s11
6029 ; GFX10-NEXT: s_cmp_lg_u32 s18, 0
6030 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
6031 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5]
6032 ; GFX10-NEXT: s_cmp_lg_u32 s22, 0
6033 ; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
6034 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], 1
6035 ; GFX10-NEXT: s_lshl_b32 s13, s14, 31
6036 ; GFX10-NEXT: s_mov_b32 s12, s19
6037 ; GFX10-NEXT: s_sub_i32 s18, s10, 64
6038 ; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[12:13]
6039 ; GFX10-NEXT: s_lshr_b64 s[12:13], s[14:15], 1
6040 ; GFX10-NEXT: s_sub_i32 s11, 64, s10
6041 ; GFX10-NEXT: s_cmp_lt_u32 s10, 64
6042 ; GFX10-NEXT: s_cselect_b32 s19, 1, 0
6043 ; GFX10-NEXT: s_cmp_eq_u32 s10, 0
6044 ; GFX10-NEXT: s_cselect_b32 s20, 1, 0
6045 ; GFX10-NEXT: s_lshr_b64 s[14:15], s[4:5], s10
6046 ; GFX10-NEXT: s_lshl_b64 s[16:17], s[12:13], s11
6047 ; GFX10-NEXT: s_lshr_b64 s[10:11], s[12:13], s10
6048 ; GFX10-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17]
6049 ; GFX10-NEXT: s_lshr_b64 s[12:13], s[12:13], s18
6050 ; GFX10-NEXT: s_cmp_lg_u32 s19, 0
6051 ; GFX10-NEXT: s_cselect_b64 s[12:13], s[14:15], s[12:13]
6052 ; GFX10-NEXT: s_cmp_lg_u32 s20, 0
6053 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[12:13]
6054 ; GFX10-NEXT: s_cmp_lg_u32 s19, 0
6055 ; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0
6056 ; GFX10-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5]
6057 ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
6058 ; GFX10-NEXT: ; return to shader part epilog
6059 %result = call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
6060 ret <2 x i128> %result
6063 define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) {
6064 ; GFX6-LABEL: v_fshl_v2i128:
6066 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6067 ; GFX6-NEXT: s_movk_i32 s6, 0x7f
6068 ; GFX6-NEXT: v_and_b32_e32 v23, s6, v16
6069 ; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 64, v23
6070 ; GFX6-NEXT: v_lshr_b64 v[17:18], v[0:1], v17
6071 ; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], v23
6072 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[8:9], 1
6073 ; GFX6-NEXT: v_xor_b32_e32 v16, -1, v16
6074 ; GFX6-NEXT: v_or_b32_e32 v21, v17, v21
6075 ; GFX6-NEXT: v_lshlrev_b32_e32 v17, 31, v10
6076 ; GFX6-NEXT: v_and_b32_e32 v24, s6, v16
6077 ; GFX6-NEXT: v_lshr_b64 v[10:11], v[10:11], 1
6078 ; GFX6-NEXT: v_or_b32_e32 v9, v9, v17
6079 ; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 64, v24
6080 ; GFX6-NEXT: v_or_b32_e32 v22, v18, v22
6081 ; GFX6-NEXT: v_lshl_b64 v[16:17], v[10:11], v16
6082 ; GFX6-NEXT: v_lshr_b64 v[18:19], v[8:9], v24
6083 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23
6084 ; GFX6-NEXT: v_or_b32_e32 v18, v18, v16
6085 ; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v23
6086 ; GFX6-NEXT: v_or_b32_e32 v19, v19, v17
6087 ; GFX6-NEXT: v_lshl_b64 v[16:17], v[0:1], v16
6088 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v23
6089 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
6090 ; GFX6-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc
6091 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc
6092 ; GFX6-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc
6093 ; GFX6-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5]
6094 ; GFX6-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5]
6095 ; GFX6-NEXT: v_subrev_i32_e64 v0, s[4:5], 64, v24
6096 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[10:11], v0
6097 ; GFX6-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24
6098 ; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5]
6099 ; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc
6100 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v24
6101 ; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5]
6102 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
6103 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
6104 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc
6105 ; GFX6-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5]
6106 ; GFX6-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5]
6107 ; GFX6-NEXT: v_or_b32_e32 v0, v25, v2
6108 ; GFX6-NEXT: v_or_b32_e32 v1, v18, v3
6109 ; GFX6-NEXT: v_or_b32_e32 v2, v17, v8
6110 ; GFX6-NEXT: v_or_b32_e32 v3, v16, v9
6111 ; GFX6-NEXT: v_and_b32_e32 v16, s6, v20
6112 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v20
6113 ; GFX6-NEXT: v_and_b32_e32 v17, s6, v8
6114 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v16
6115 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], v8
6116 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v16
6117 ; GFX6-NEXT: v_subrev_i32_e32 v18, vcc, 64, v16
6118 ; GFX6-NEXT: v_or_b32_e32 v10, v8, v10
6119 ; GFX6-NEXT: v_or_b32_e32 v11, v9, v11
6120 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v16
6121 ; GFX6-NEXT: v_lshl_b64 v[4:5], v[4:5], v18
6122 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16
6123 ; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v8, vcc
6124 ; GFX6-NEXT: v_cndmask_b32_e32 v19, 0, v9, vcc
6125 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
6126 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
6127 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16
6128 ; GFX6-NEXT: v_cndmask_b32_e32 v16, v4, v6, vcc
6129 ; GFX6-NEXT: v_cndmask_b32_e32 v20, v5, v7, vcc
6130 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], 1
6131 ; GFX6-NEXT: v_lshlrev_b32_e32 v6, 31, v14
6132 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v6
6133 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[14:15], 1
6134 ; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 64, v17
6135 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], v17
6136 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v10
6137 ; GFX6-NEXT: v_subrev_i32_e32 v12, vcc, 64, v17
6138 ; GFX6-NEXT: v_or_b32_e32 v10, v8, v10
6139 ; GFX6-NEXT: v_or_b32_e32 v11, v9, v11
6140 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[6:7], v17
6141 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v12
6142 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17
6143 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
6144 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
6145 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v17
6146 ; GFX6-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5]
6147 ; GFX6-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5]
6148 ; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
6149 ; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
6150 ; GFX6-NEXT: v_or_b32_e32 v4, v18, v4
6151 ; GFX6-NEXT: v_or_b32_e32 v5, v19, v5
6152 ; GFX6-NEXT: v_or_b32_e32 v6, v16, v6
6153 ; GFX6-NEXT: v_or_b32_e32 v7, v20, v7
6154 ; GFX6-NEXT: s_setpc_b64 s[30:31]
6156 ; GFX8-LABEL: v_fshl_v2i128:
6158 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6159 ; GFX8-NEXT: s_movk_i32 s6, 0x7f
6160 ; GFX8-NEXT: v_and_b32_e32 v23, s6, v16
6161 ; GFX8-NEXT: v_sub_u32_e32 v17, vcc, 64, v23
6162 ; GFX8-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1]
6163 ; GFX8-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3]
6164 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9]
6165 ; GFX8-NEXT: v_xor_b32_e32 v16, -1, v16
6166 ; GFX8-NEXT: v_or_b32_e32 v21, v17, v21
6167 ; GFX8-NEXT: v_lshlrev_b32_e32 v17, 31, v10
6168 ; GFX8-NEXT: v_and_b32_e32 v24, s6, v16
6169 ; GFX8-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11]
6170 ; GFX8-NEXT: v_or_b32_e32 v9, v9, v17
6171 ; GFX8-NEXT: v_sub_u32_e32 v16, vcc, 64, v24
6172 ; GFX8-NEXT: v_or_b32_e32 v22, v18, v22
6173 ; GFX8-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11]
6174 ; GFX8-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9]
6175 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23
6176 ; GFX8-NEXT: v_or_b32_e32 v18, v18, v16
6177 ; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v23
6178 ; GFX8-NEXT: v_or_b32_e32 v19, v19, v17
6179 ; GFX8-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1]
6180 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v23, v[0:1]
6181 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
6182 ; GFX8-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc
6183 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc
6184 ; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc
6185 ; GFX8-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5]
6186 ; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5]
6187 ; GFX8-NEXT: v_subrev_u32_e64 v0, s[4:5], 64, v24
6188 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v0, v[10:11]
6189 ; GFX8-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24
6190 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5]
6191 ; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc
6192 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
6193 ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5]
6194 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
6195 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
6196 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc
6197 ; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5]
6198 ; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5]
6199 ; GFX8-NEXT: v_or_b32_e32 v0, v25, v2
6200 ; GFX8-NEXT: v_or_b32_e32 v1, v18, v3
6201 ; GFX8-NEXT: v_or_b32_e32 v2, v17, v8
6202 ; GFX8-NEXT: v_or_b32_e32 v3, v16, v9
6203 ; GFX8-NEXT: v_and_b32_e32 v16, s6, v20
6204 ; GFX8-NEXT: v_xor_b32_e32 v8, -1, v20
6205 ; GFX8-NEXT: v_and_b32_e32 v17, s6, v8
6206 ; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v16
6207 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v8, v[4:5]
6208 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v16, v[6:7]
6209 ; GFX8-NEXT: v_subrev_u32_e32 v18, vcc, 64, v16
6210 ; GFX8-NEXT: v_or_b32_e32 v10, v8, v10
6211 ; GFX8-NEXT: v_or_b32_e32 v11, v9, v11
6212 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], v16, v[4:5]
6213 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], v18, v[4:5]
6214 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16
6215 ; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v8, vcc
6216 ; GFX8-NEXT: v_cndmask_b32_e32 v19, 0, v9, vcc
6217 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
6218 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
6219 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16
6220 ; GFX8-NEXT: v_cndmask_b32_e32 v16, v4, v6, vcc
6221 ; GFX8-NEXT: v_cndmask_b32_e32 v20, v5, v7, vcc
6222 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], 1, v[12:13]
6223 ; GFX8-NEXT: v_lshlrev_b32_e32 v6, 31, v14
6224 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v6
6225 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15]
6226 ; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 64, v17
6227 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v17, v[4:5]
6228 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7]
6229 ; GFX8-NEXT: v_subrev_u32_e32 v12, vcc, 64, v17
6230 ; GFX8-NEXT: v_or_b32_e32 v10, v8, v10
6231 ; GFX8-NEXT: v_or_b32_e32 v11, v9, v11
6232 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v17, v[6:7]
6233 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], v12, v[6:7]
6234 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17
6235 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
6236 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
6237 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v17
6238 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5]
6239 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5]
6240 ; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
6241 ; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
6242 ; GFX8-NEXT: v_or_b32_e32 v4, v18, v4
6243 ; GFX8-NEXT: v_or_b32_e32 v5, v19, v5
6244 ; GFX8-NEXT: v_or_b32_e32 v6, v16, v6
6245 ; GFX8-NEXT: v_or_b32_e32 v7, v20, v7
6246 ; GFX8-NEXT: s_setpc_b64 s[30:31]
6248 ; GFX9-LABEL: v_fshl_v2i128:
6250 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6251 ; GFX9-NEXT: s_movk_i32 s6, 0x7f
6252 ; GFX9-NEXT: v_and_b32_e32 v23, s6, v16
6253 ; GFX9-NEXT: v_sub_u32_e32 v17, 64, v23
6254 ; GFX9-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1]
6255 ; GFX9-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3]
6256 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9]
6257 ; GFX9-NEXT: v_xor_b32_e32 v16, -1, v16
6258 ; GFX9-NEXT: v_or_b32_e32 v21, v17, v21
6259 ; GFX9-NEXT: v_lshlrev_b32_e32 v17, 31, v10
6260 ; GFX9-NEXT: v_and_b32_e32 v24, s6, v16
6261 ; GFX9-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11]
6262 ; GFX9-NEXT: v_or_b32_e32 v9, v9, v17
6263 ; GFX9-NEXT: v_sub_u32_e32 v16, 64, v24
6264 ; GFX9-NEXT: v_or_b32_e32 v22, v18, v22
6265 ; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11]
6266 ; GFX9-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9]
6267 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
6268 ; GFX9-NEXT: v_or_b32_e32 v18, v18, v16
6269 ; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v23
6270 ; GFX9-NEXT: v_or_b32_e32 v19, v19, v17
6271 ; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1]
6272 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v23, v[0:1]
6273 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23
6274 ; GFX9-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc
6275 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc
6276 ; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc
6277 ; GFX9-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5]
6278 ; GFX9-NEXT: v_subrev_u32_e32 v0, 64, v24
6279 ; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5]
6280 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v0, v[10:11]
6281 ; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24
6282 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5]
6283 ; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc
6284 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
6285 ; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5]
6286 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
6287 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
6288 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc
6289 ; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5]
6290 ; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5]
6291 ; GFX9-NEXT: v_or_b32_e32 v0, v25, v2
6292 ; GFX9-NEXT: v_or_b32_e32 v1, v18, v3
6293 ; GFX9-NEXT: v_or_b32_e32 v2, v17, v8
6294 ; GFX9-NEXT: v_or_b32_e32 v3, v16, v9
6295 ; GFX9-NEXT: v_and_b32_e32 v16, s6, v20
6296 ; GFX9-NEXT: v_xor_b32_e32 v8, -1, v20
6297 ; GFX9-NEXT: v_and_b32_e32 v17, s6, v8
6298 ; GFX9-NEXT: v_sub_u32_e32 v8, 64, v16
6299 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v8, v[4:5]
6300 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v16, v[6:7]
6301 ; GFX9-NEXT: v_subrev_u32_e32 v18, 64, v16
6302 ; GFX9-NEXT: v_or_b32_e32 v10, v8, v10
6303 ; GFX9-NEXT: v_or_b32_e32 v11, v9, v11
6304 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], v16, v[4:5]
6305 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v18, v[4:5]
6306 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16
6307 ; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v8, vcc
6308 ; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v9, vcc
6309 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
6310 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
6311 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16
6312 ; GFX9-NEXT: v_cndmask_b32_e32 v16, v4, v6, vcc
6313 ; GFX9-NEXT: v_cndmask_b32_e32 v20, v5, v7, vcc
6314 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[12:13]
6315 ; GFX9-NEXT: v_lshlrev_b32_e32 v6, 31, v14
6316 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v6
6317 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15]
6318 ; GFX9-NEXT: v_sub_u32_e32 v10, 64, v17
6319 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v17, v[4:5]
6320 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7]
6321 ; GFX9-NEXT: v_subrev_u32_e32 v12, 64, v17
6322 ; GFX9-NEXT: v_or_b32_e32 v10, v8, v10
6323 ; GFX9-NEXT: v_or_b32_e32 v11, v9, v11
6324 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v17, v[6:7]
6325 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], v12, v[6:7]
6326 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17
6327 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc
6328 ; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc
6329 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v17
6330 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5]
6331 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5]
6332 ; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
6333 ; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
6334 ; GFX9-NEXT: v_or_b32_e32 v4, v18, v4
6335 ; GFX9-NEXT: v_or_b32_e32 v5, v19, v5
6336 ; GFX9-NEXT: v_or_b32_e32 v6, v16, v6
6337 ; GFX9-NEXT: v_or_b32_e32 v7, v20, v7
6338 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6340 ; GFX10-LABEL: v_fshl_v2i128:
6342 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6343 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
6344 ; GFX10-NEXT: s_movk_i32 s7, 0x7f
6345 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9]
6346 ; GFX10-NEXT: v_and_b32_e32 v27, s7, v16
6347 ; GFX10-NEXT: v_xor_b32_e32 v16, -1, v16
6348 ; GFX10-NEXT: v_lshlrev_b32_e32 v21, 31, v10
6349 ; GFX10-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11]
6350 ; GFX10-NEXT: v_sub_nc_u32_e32 v17, 64, v27
6351 ; GFX10-NEXT: v_and_b32_e32 v28, s7, v16
6352 ; GFX10-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3]
6353 ; GFX10-NEXT: v_or_b32_e32 v9, v9, v21
6354 ; GFX10-NEXT: v_subrev_nc_u32_e32 v29, 64, v27
6355 ; GFX10-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1]
6356 ; GFX10-NEXT: v_sub_nc_u32_e32 v25, 64, v28
6357 ; GFX10-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1]
6358 ; GFX10-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9]
6359 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1]
6360 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27
6361 ; GFX10-NEXT: v_or_b32_e32 v18, v16, v18
6362 ; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 64, v28
6363 ; GFX10-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11]
6364 ; GFX10-NEXT: v_or_b32_e32 v19, v17, v19
6365 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v28
6366 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v28
6367 ; GFX10-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11]
6368 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v27
6369 ; GFX10-NEXT: v_or_b32_e32 v23, v23, v25
6370 ; GFX10-NEXT: v_or_b32_e32 v24, v24, v26
6371 ; GFX10-NEXT: v_cndmask_b32_e32 v19, v1, v19, vcc_lo
6372 ; GFX10-NEXT: v_cndmask_b32_e32 v18, v0, v18, vcc_lo
6373 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11]
6374 ; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v23, s4
6375 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v17, v24, s4
6376 ; GFX10-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo
6377 ; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v22, vcc_lo
6378 ; GFX10-NEXT: v_cndmask_b32_e64 v22, v19, v3, s6
6379 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v16, v8, s5
6380 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s6
6381 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v9, s5
6382 ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v0, s4
6383 ; GFX10-NEXT: v_and_b32_e32 v23, s7, v20
6384 ; GFX10-NEXT: v_or_b32_e32 v0, v21, v3
6385 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v20
6386 ; GFX10-NEXT: v_cndmask_b32_e64 v24, 0, v1, s4
6387 ; GFX10-NEXT: v_or_b32_e32 v1, v11, v8
6388 ; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v23
6389 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v9
6390 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13]
6391 ; GFX10-NEXT: v_lshlrev_b32_e32 v16, 31, v14
6392 ; GFX10-NEXT: v_and_b32_e32 v25, s7, v3
6393 ; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[4:5]
6394 ; GFX10-NEXT: v_lshlrev_b64 v[12:13], v23, v[6:7]
6395 ; GFX10-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15]
6396 ; GFX10-NEXT: v_or_b32_e32 v9, v9, v16
6397 ; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v25
6398 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v23
6399 ; GFX10-NEXT: v_lshlrev_b64 v[16:17], v23, v[4:5]
6400 ; GFX10-NEXT: v_or_b32_e32 v12, v10, v12
6401 ; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v25
6402 ; GFX10-NEXT: v_lshrrev_b64 v[18:19], v25, v[8:9]
6403 ; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
6404 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v23
6405 ; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
6406 ; GFX10-NEXT: v_or_b32_e32 v5, v11, v13
6407 ; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15]
6408 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v25
6409 ; GFX10-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo
6410 ; GFX10-NEXT: v_or_b32_e32 v16, v18, v20
6411 ; GFX10-NEXT: v_or_b32_e32 v18, v19, v21
6412 ; GFX10-NEXT: v_cndmask_b32_e32 v12, v3, v12, vcc_lo
6413 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo
6414 ; GFX10-NEXT: v_lshrrev_b64 v[3:4], v25, v[14:15]
6415 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v16, s4
6416 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v25
6417 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v23
6418 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v18, s4
6419 ; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
6420 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, v6, s6
6421 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6
6422 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v10, v8, s5
6423 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v11, v9, s5
6424 ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4
6425 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4
6426 ; GFX10-NEXT: v_or_b32_e32 v3, v22, v24
6427 ; GFX10-NEXT: v_or_b32_e32 v4, v13, v5
6428 ; GFX10-NEXT: v_or_b32_e32 v5, v14, v8
6429 ; GFX10-NEXT: v_or_b32_e32 v6, v6, v9
6430 ; GFX10-NEXT: v_or_b32_e32 v7, v7, v10
6431 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6432 %result = call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
6433 ret <2 x i128> %result
6436 declare i7 @llvm.fshl.i7(i7, i7, i7) #0
6437 declare i8 @llvm.fshl.i8(i8, i8, i8) #0
6438 declare <2 x i8> @llvm.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
6439 declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
6441 declare i16 @llvm.fshl.i16(i16, i16, i16) #0
6442 declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
6443 declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
6444 declare <4 x i16> @llvm.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
6445 declare <5 x i16> @llvm.fshl.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
6446 declare <6 x i16> @llvm.fshl.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
6447 declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
6449 declare i24 @llvm.fshl.i24(i24, i24, i24) #0
6450 declare <2 x i24> @llvm.fshl.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
6452 declare i32 @llvm.fshl.i32(i32, i32, i32) #0
6453 declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
6454 declare <3 x i32> @llvm.fshl.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
6455 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
6456 declare <5 x i32> @llvm.fshl.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
6457 declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
6459 declare i48 @llvm.fshl.i48(i48, i48, i48) #0
6461 declare i64 @llvm.fshl.i64(i64, i64, i64) #0
6462 declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
6464 declare i128 @llvm.fshl.i128(i128, i128, i128) #0
6465 declare <2 x i128> @llvm.fshl.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
6467 attributes #0 = { nounwind readnone speculatable willreturn }