1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s
8 define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
9 ; GFX6-LABEL: s_fshl_i7:
11 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
12 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
13 ; GFX6-NEXT: s_and_b32 s2, s2, 0x7f
14 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x60001
15 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
16 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
17 ; GFX6-NEXT: v_mul_lo_u32 v1, v0, -7
18 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
19 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
20 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
21 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7
22 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
23 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
24 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
25 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
26 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
27 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
28 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
29 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0
30 ; GFX6-NEXT: v_and_b32_e32 v0, 0x7f, v0
31 ; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1
32 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
33 ; GFX6-NEXT: v_lshr_b32_e32 v1, s1, v1
34 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
35 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
36 ; GFX6-NEXT: ; return to shader part epilog
38 ; GFX8-LABEL: s_fshl_i7:
40 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
41 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
42 ; GFX8-NEXT: s_and_b32 s2, s2, 0x7f
43 ; GFX8-NEXT: s_and_b32 s1, s1, 0x7f
44 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
45 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
46 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
47 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
48 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, -7
49 ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
50 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
51 ; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
52 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7
53 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
54 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
55 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
56 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
57 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
58 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
59 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
60 ; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0
61 ; GFX8-NEXT: v_and_b32_e32 v0, 0x7f, v0
62 ; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1
63 ; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0
64 ; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1
65 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
66 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
67 ; GFX8-NEXT: ; return to shader part epilog
69 ; GFX9-LABEL: s_fshl_i7:
71 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
72 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
73 ; GFX9-NEXT: s_and_b32 s2, s2, 0x7f
74 ; GFX9-NEXT: s_and_b32 s1, s1, 0x7f
75 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
76 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
77 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
78 ; GFX9-NEXT: s_lshr_b32 s1, s1, 1
79 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, -7
80 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
81 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
82 ; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
83 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7
84 ; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
85 ; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
86 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
87 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
88 ; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
89 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
90 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
91 ; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0
92 ; GFX9-NEXT: v_and_b32_e32 v0, 0x7f, v0
93 ; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1
94 ; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0
95 ; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1
96 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
97 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
98 ; GFX9-NEXT: ; return to shader part epilog
100 ; GFX10-LABEL: s_fshl_i7:
102 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
103 ; GFX10-NEXT: s_and_b32 s2, s2, 0x7f
104 ; GFX10-NEXT: s_and_b32 s1, s1, 0x7f
105 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
106 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
107 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
108 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
109 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
110 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, -7
111 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
112 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
113 ; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
114 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7
115 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
116 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
117 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
118 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
119 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
120 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
121 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
122 ; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0
123 ; GFX10-NEXT: v_and_b32_e32 v0, 0x7f, v0
124 ; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1
125 ; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0
126 ; GFX10-NEXT: v_lshrrev_b16 v1, v1, s1
127 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
128 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
129 ; GFX10-NEXT: ; return to shader part epilog
131 ; GFX11-LABEL: s_fshl_i7:
133 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
134 ; GFX11-NEXT: s_and_b32 s2, s2, 0x7f
135 ; GFX11-NEXT: s_and_b32 s1, s1, 0x7f
136 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
137 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
138 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
139 ; GFX11-NEXT: s_lshr_b32 s1, s1, 1
140 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
141 ; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
142 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
143 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
144 ; GFX11-NEXT: v_mul_lo_u32 v1, v0, -7
145 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
146 ; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1
147 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
148 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
149 ; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0
150 ; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7
151 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
152 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
153 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
154 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
155 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
156 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
157 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
158 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
159 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
160 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
161 ; GFX11-NEXT: v_sub_nc_u16 v1, 6, v0
162 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7f, v0
163 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
164 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1
165 ; GFX11-NEXT: v_lshlrev_b16 v0, v0, s0
166 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
167 ; GFX11-NEXT: v_lshrrev_b16 v1, v1, s1
168 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
169 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
170 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
171 ; GFX11-NEXT: ; return to shader part epilog
172 %result = call i7 @llvm.fshl.i7(i7 %lhs, i7 %rhs, i7 %amt)
176 define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) {
177 ; GFX6-LABEL: v_fshl_i7:
179 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
181 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3
182 ; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2
183 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 6
184 ; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
185 ; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
186 ; GFX6-NEXT: v_mul_lo_u32 v4, v3, -7
187 ; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
188 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
189 ; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
190 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7
191 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
192 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
193 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
194 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
195 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
196 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
197 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
198 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2
199 ; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2
200 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
201 ; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v3
202 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
203 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
204 ; GFX6-NEXT: s_setpc_b64 s[30:31]
206 ; GFX8-LABEL: v_fshl_i7:
208 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
209 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
210 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3
211 ; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2
212 ; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1
213 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1
214 ; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
215 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
216 ; GFX8-NEXT: v_mul_lo_u32 v4, v3, -7
217 ; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
218 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
219 ; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
220 ; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7
221 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
222 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
223 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
224 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
225 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
226 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
227 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
228 ; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2
229 ; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2
230 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
231 ; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v3
232 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
233 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
234 ; GFX8-NEXT: s_setpc_b64 s[30:31]
236 ; GFX9-LABEL: v_fshl_i7:
238 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
240 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
241 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2
242 ; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1
243 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 1, v1
244 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
245 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3
246 ; GFX9-NEXT: v_mul_lo_u32 v4, v3, -7
247 ; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4
248 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4
249 ; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
250 ; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7
251 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
252 ; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
253 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
254 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
255 ; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
256 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
257 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
258 ; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2
259 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2
260 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
261 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v3
262 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
263 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
264 ; GFX9-NEXT: s_setpc_b64 s[30:31]
266 ; GFX10-LABEL: v_fshl_i7:
268 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
270 ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2
271 ; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1
272 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3
273 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
274 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
275 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3
276 ; GFX10-NEXT: v_mul_lo_u32 v4, v3, -7
277 ; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4
278 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4
279 ; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
280 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7
281 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
282 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
283 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
284 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
285 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
286 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
287 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
288 ; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2
289 ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2
290 ; GFX10-NEXT: v_and_b32_e32 v3, 0x7f, v3
291 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0
292 ; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1
293 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
294 ; GFX10-NEXT: s_setpc_b64 s[30:31]
296 ; GFX11-LABEL: v_fshl_i7:
298 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
299 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
300 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2
301 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1
302 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
303 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3
304 ; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1
305 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
306 ; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
307 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
308 ; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
309 ; GFX11-NEXT: v_mul_lo_u32 v4, v3, -7
310 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
311 ; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4
312 ; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4
313 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
314 ; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3
315 ; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7
316 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
317 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
318 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
319 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
320 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
321 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
322 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
323 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
324 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
325 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
326 ; GFX11-NEXT: v_sub_nc_u16 v3, 6, v2
327 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2
328 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
329 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7f, v3
330 ; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0
331 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
332 ; GFX11-NEXT: v_lshrrev_b16 v1, v3, v1
333 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
334 ; GFX11-NEXT: s_setpc_b64 s[30:31]
335 %result = call i7 @llvm.fshl.i7(i7 %lhs, i7 %rhs, i7 %amt)
339 define amdgpu_ps i8 @s_fshl_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) {
340 ; GFX6-LABEL: s_fshl_i8:
342 ; GFX6-NEXT: s_and_b32 s3, s2, 7
343 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
344 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x70001
345 ; GFX6-NEXT: s_lshl_b32 s0, s0, s3
346 ; GFX6-NEXT: s_lshr_b32 s1, s1, s2
347 ; GFX6-NEXT: s_or_b32 s0, s0, s1
348 ; GFX6-NEXT: ; return to shader part epilog
350 ; GFX8-LABEL: s_fshl_i8:
352 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
353 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
354 ; GFX8-NEXT: s_and_b32 s3, s2, 7
355 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
356 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
357 ; GFX8-NEXT: s_lshl_b32 s0, s0, s3
358 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
359 ; GFX8-NEXT: s_or_b32 s0, s0, s1
360 ; GFX8-NEXT: ; return to shader part epilog
362 ; GFX9-LABEL: s_fshl_i8:
364 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
365 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
366 ; GFX9-NEXT: s_and_b32 s3, s2, 7
367 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
368 ; GFX9-NEXT: s_lshr_b32 s1, s1, 1
369 ; GFX9-NEXT: s_lshl_b32 s0, s0, s3
370 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
371 ; GFX9-NEXT: s_or_b32 s0, s0, s1
372 ; GFX9-NEXT: ; return to shader part epilog
374 ; GFX10-LABEL: s_fshl_i8:
376 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
377 ; GFX10-NEXT: s_and_b32 s3, s2, 7
378 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
379 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
380 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
381 ; GFX10-NEXT: s_lshl_b32 s0, s0, s3
382 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
383 ; GFX10-NEXT: s_or_b32 s0, s0, s1
384 ; GFX10-NEXT: ; return to shader part epilog
386 ; GFX11-LABEL: s_fshl_i8:
388 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
389 ; GFX11-NEXT: s_and_b32 s3, s2, 7
390 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
391 ; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
392 ; GFX11-NEXT: s_lshr_b32 s1, s1, 1
393 ; GFX11-NEXT: s_lshl_b32 s0, s0, s3
394 ; GFX11-NEXT: s_lshr_b32 s1, s1, s2
395 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
396 ; GFX11-NEXT: s_or_b32 s0, s0, s1
397 ; GFX11-NEXT: ; return to shader part epilog
398 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 %amt)
402 define i8 @v_fshl_i8(i8 %lhs, i8 %rhs, i8 %amt) {
403 ; GFX6-LABEL: v_fshl_i8:
405 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v2
407 ; GFX6-NEXT: v_not_b32_e32 v2, v2
408 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
409 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 7
410 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
411 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
412 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
413 ; GFX6-NEXT: s_setpc_b64 s[30:31]
415 ; GFX8-LABEL: v_fshl_i8:
417 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
418 ; GFX8-NEXT: v_and_b32_e32 v3, 7, v2
419 ; GFX8-NEXT: v_not_b32_e32 v2, v2
420 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0
421 ; GFX8-NEXT: v_mov_b32_e32 v3, 1
422 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
423 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
424 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
425 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
426 ; GFX8-NEXT: s_setpc_b64 s[30:31]
428 ; GFX9-LABEL: v_fshl_i8:
430 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431 ; GFX9-NEXT: v_and_b32_e32 v3, 7, v2
432 ; GFX9-NEXT: v_not_b32_e32 v2, v2
433 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0
434 ; GFX9-NEXT: v_mov_b32_e32 v3, 1
435 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
436 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
437 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
438 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
439 ; GFX9-NEXT: s_setpc_b64 s[30:31]
441 ; GFX10-LABEL: v_fshl_i8:
443 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444 ; GFX10-NEXT: v_not_b32_e32 v3, v2
445 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
446 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
447 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
448 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
449 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0
450 ; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1
451 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
452 ; GFX10-NEXT: s_setpc_b64 s[30:31]
454 ; GFX11-LABEL: v_fshl_i8:
456 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457 ; GFX11-NEXT: v_not_b32_e32 v3, v2
458 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
459 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
460 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
461 ; GFX11-NEXT: v_and_b32_e32 v3, 7, v3
462 ; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1
463 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
464 ; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0
465 ; GFX11-NEXT: v_lshrrev_b16 v1, v3, v1
466 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
467 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
468 ; GFX11-NEXT: s_setpc_b64 s[30:31]
469 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 %amt)
473 define amdgpu_ps i8 @s_fshl_i8_4(i8 inreg %lhs, i8 inreg %rhs) {
474 ; GFX6-LABEL: s_fshl_i8_4:
476 ; GFX6-NEXT: s_lshl_b32 s0, s0, 4
477 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x40004
478 ; GFX6-NEXT: s_or_b32 s0, s0, s1
479 ; GFX6-NEXT: ; return to shader part epilog
481 ; GFX8-LABEL: s_fshl_i8_4:
483 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
484 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
485 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4
486 ; GFX8-NEXT: s_lshr_b32 s1, s1, 4
487 ; GFX8-NEXT: s_or_b32 s0, s0, s1
488 ; GFX8-NEXT: ; return to shader part epilog
490 ; GFX9-LABEL: s_fshl_i8_4:
492 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
493 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
494 ; GFX9-NEXT: s_lshl_b32 s0, s0, 4
495 ; GFX9-NEXT: s_lshr_b32 s1, s1, 4
496 ; GFX9-NEXT: s_or_b32 s0, s0, s1
497 ; GFX9-NEXT: ; return to shader part epilog
499 ; GFX10-LABEL: s_fshl_i8_4:
501 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
502 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4
503 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
504 ; GFX10-NEXT: s_lshr_b32 s1, s1, 4
505 ; GFX10-NEXT: s_or_b32 s0, s0, s1
506 ; GFX10-NEXT: ; return to shader part epilog
508 ; GFX11-LABEL: s_fshl_i8_4:
510 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
511 ; GFX11-NEXT: s_lshl_b32 s0, s0, 4
512 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
513 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
514 ; GFX11-NEXT: s_lshr_b32 s1, s1, 4
515 ; GFX11-NEXT: s_or_b32 s0, s0, s1
516 ; GFX11-NEXT: ; return to shader part epilog
517 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 4)
521 define i8 @v_fshl_i8_4(i8 %lhs, i8 %rhs) {
522 ; GFX6-LABEL: v_fshl_i8_4:
524 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
525 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
526 ; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 4
527 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
528 ; GFX6-NEXT: s_setpc_b64 s[30:31]
530 ; GFX8-LABEL: v_fshl_i8_4:
532 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533 ; GFX8-NEXT: v_mov_b32_e32 v2, 4
534 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0
535 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
536 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
537 ; GFX8-NEXT: s_setpc_b64 s[30:31]
539 ; GFX9-LABEL: v_fshl_i8_4:
541 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542 ; GFX9-NEXT: s_mov_b32 s4, 4
543 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0
544 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
545 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
546 ; GFX9-NEXT: s_setpc_b64 s[30:31]
548 ; GFX10-LABEL: v_fshl_i8_4:
550 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
551 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
552 ; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0
553 ; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1
554 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
555 ; GFX10-NEXT: s_setpc_b64 s[30:31]
557 ; GFX11-LABEL: v_fshl_i8_4:
559 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
560 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
561 ; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0
562 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
563 ; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1
564 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
565 ; GFX11-NEXT: s_setpc_b64 s[30:31]
566 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 4)
570 define amdgpu_ps i8 @s_fshl_i8_5(i8 inreg %lhs, i8 inreg %rhs) {
571 ; GFX6-LABEL: s_fshl_i8_5:
573 ; GFX6-NEXT: s_lshl_b32 s0, s0, 5
574 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x50003
575 ; GFX6-NEXT: s_or_b32 s0, s0, s1
576 ; GFX6-NEXT: ; return to shader part epilog
578 ; GFX8-LABEL: s_fshl_i8_5:
580 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
581 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
582 ; GFX8-NEXT: s_lshl_b32 s0, s0, 5
583 ; GFX8-NEXT: s_lshr_b32 s1, s1, 3
584 ; GFX8-NEXT: s_or_b32 s0, s0, s1
585 ; GFX8-NEXT: ; return to shader part epilog
587 ; GFX9-LABEL: s_fshl_i8_5:
589 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
590 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
591 ; GFX9-NEXT: s_lshl_b32 s0, s0, 5
592 ; GFX9-NEXT: s_lshr_b32 s1, s1, 3
593 ; GFX9-NEXT: s_or_b32 s0, s0, s1
594 ; GFX9-NEXT: ; return to shader part epilog
596 ; GFX10-LABEL: s_fshl_i8_5:
598 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
599 ; GFX10-NEXT: s_lshl_b32 s0, s0, 5
600 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
601 ; GFX10-NEXT: s_lshr_b32 s1, s1, 3
602 ; GFX10-NEXT: s_or_b32 s0, s0, s1
603 ; GFX10-NEXT: ; return to shader part epilog
605 ; GFX11-LABEL: s_fshl_i8_5:
607 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
608 ; GFX11-NEXT: s_lshl_b32 s0, s0, 5
609 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
610 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
611 ; GFX11-NEXT: s_lshr_b32 s1, s1, 3
612 ; GFX11-NEXT: s_or_b32 s0, s0, s1
613 ; GFX11-NEXT: ; return to shader part epilog
614 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 5)
618 define i8 @v_fshl_i8_5(i8 %lhs, i8 %rhs) {
619 ; GFX6-LABEL: v_fshl_i8_5:
621 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
622 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 5, v0
623 ; GFX6-NEXT: v_bfe_u32 v1, v1, 3, 5
624 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
625 ; GFX6-NEXT: s_setpc_b64 s[30:31]
627 ; GFX8-LABEL: v_fshl_i8_5:
629 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
630 ; GFX8-NEXT: v_mov_b32_e32 v2, 3
631 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 5, v0
632 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
633 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
634 ; GFX8-NEXT: s_setpc_b64 s[30:31]
636 ; GFX9-LABEL: v_fshl_i8_5:
638 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639 ; GFX9-NEXT: v_mov_b32_e32 v2, 3
640 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 5, v0
641 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
642 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
643 ; GFX9-NEXT: s_setpc_b64 s[30:31]
645 ; GFX10-LABEL: v_fshl_i8_5:
647 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
649 ; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0
650 ; GFX10-NEXT: v_lshrrev_b16 v1, 3, v1
651 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
652 ; GFX10-NEXT: s_setpc_b64 s[30:31]
654 ; GFX11-LABEL: v_fshl_i8_5:
656 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
658 ; GFX11-NEXT: v_lshlrev_b16 v0, 5, v0
659 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
660 ; GFX11-NEXT: v_lshrrev_b16 v1, 3, v1
661 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
662 ; GFX11-NEXT: s_setpc_b64 s[30:31]
663 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 5)
667 define amdgpu_ps i16 @s_fshl_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) {
668 ; GFX6-LABEL: s_fshl_v2i8:
670 ; GFX6-NEXT: s_and_b32 s5, s2, 7
671 ; GFX6-NEXT: s_lshr_b32 s3, s0, 8
672 ; GFX6-NEXT: s_lshr_b32 s4, s2, 8
673 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
674 ; GFX6-NEXT: s_lshl_b32 s0, s0, s5
675 ; GFX6-NEXT: s_bfe_u32 s5, s1, 0x70001
676 ; GFX6-NEXT: s_lshr_b32 s2, s5, s2
677 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008
678 ; GFX6-NEXT: s_or_b32 s0, s0, s2
679 ; GFX6-NEXT: s_and_b32 s2, s4, 7
680 ; GFX6-NEXT: s_andn2_b32 s4, 7, s4
681 ; GFX6-NEXT: s_lshr_b32 s1, s1, 1
682 ; GFX6-NEXT: s_lshl_b32 s2, s3, s2
683 ; GFX6-NEXT: s_lshr_b32 s1, s1, s4
684 ; GFX6-NEXT: s_or_b32 s1, s2, s1
685 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
686 ; GFX6-NEXT: s_and_b32 s0, s0, 0xff
687 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8
688 ; GFX6-NEXT: s_or_b32 s0, s0, s1
689 ; GFX6-NEXT: ; return to shader part epilog
691 ; GFX8-LABEL: s_fshl_v2i8:
693 ; GFX8-NEXT: s_lshr_b32 s4, s1, 8
694 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
695 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
696 ; GFX8-NEXT: s_lshr_b32 s5, s2, 8
697 ; GFX8-NEXT: s_and_b32 s6, s2, 7
698 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
699 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
700 ; GFX8-NEXT: s_lshr_b32 s3, s0, 8
701 ; GFX8-NEXT: s_lshl_b32 s0, s0, s6
702 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
703 ; GFX8-NEXT: s_or_b32 s0, s0, s1
704 ; GFX8-NEXT: s_and_b32 s1, s5, 7
705 ; GFX8-NEXT: s_lshl_b32 s1, s3, s1
706 ; GFX8-NEXT: s_and_b32 s3, s4, 0xff
707 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
708 ; GFX8-NEXT: s_andn2_b32 s2, 7, s5
709 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
710 ; GFX8-NEXT: s_lshr_b32 s2, s3, s2
711 ; GFX8-NEXT: s_or_b32 s1, s1, s2
712 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
713 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
714 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8
715 ; GFX8-NEXT: s_or_b32 s0, s0, s1
716 ; GFX8-NEXT: ; return to shader part epilog
718 ; GFX9-LABEL: s_fshl_v2i8:
720 ; GFX9-NEXT: s_lshr_b32 s4, s1, 8
721 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
722 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
723 ; GFX9-NEXT: s_lshr_b32 s5, s2, 8
724 ; GFX9-NEXT: s_and_b32 s6, s2, 7
725 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
726 ; GFX9-NEXT: s_lshr_b32 s1, s1, 1
727 ; GFX9-NEXT: s_lshr_b32 s3, s0, 8
728 ; GFX9-NEXT: s_lshl_b32 s0, s0, s6
729 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
730 ; GFX9-NEXT: s_or_b32 s0, s0, s1
731 ; GFX9-NEXT: s_and_b32 s1, s5, 7
732 ; GFX9-NEXT: s_lshl_b32 s1, s3, s1
733 ; GFX9-NEXT: s_and_b32 s3, s4, 0xff
734 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
735 ; GFX9-NEXT: s_andn2_b32 s2, 7, s5
736 ; GFX9-NEXT: s_lshr_b32 s3, s3, 1
737 ; GFX9-NEXT: s_lshr_b32 s2, s3, s2
738 ; GFX9-NEXT: s_or_b32 s1, s1, s2
739 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
740 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
741 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8
742 ; GFX9-NEXT: s_or_b32 s0, s0, s1
743 ; GFX9-NEXT: ; return to shader part epilog
745 ; GFX10-LABEL: s_fshl_v2i8:
747 ; GFX10-NEXT: s_lshr_b32 s4, s1, 8
748 ; GFX10-NEXT: s_lshr_b32 s5, s2, 8
749 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff
750 ; GFX10-NEXT: s_and_b32 s6, s2, 7
751 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
752 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
753 ; GFX10-NEXT: s_lshr_b32 s3, s0, 8
754 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
755 ; GFX10-NEXT: s_lshl_b32 s0, s0, s6
756 ; GFX10-NEXT: s_and_b32 s6, s5, 7
757 ; GFX10-NEXT: s_andn2_b32 s5, 7, s5
758 ; GFX10-NEXT: s_lshr_b32 s4, s4, 1
759 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
760 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
761 ; GFX10-NEXT: s_lshl_b32 s3, s3, s6
762 ; GFX10-NEXT: s_lshr_b32 s4, s4, s5
763 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
764 ; GFX10-NEXT: s_or_b32 s2, s3, s4
765 ; GFX10-NEXT: s_or_b32 s0, s0, s1
766 ; GFX10-NEXT: s_and_b32 s1, s2, 0xff
767 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff
768 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8
769 ; GFX10-NEXT: s_or_b32 s0, s0, s1
770 ; GFX10-NEXT: ; return to shader part epilog
772 ; GFX11-LABEL: s_fshl_v2i8:
774 ; GFX11-NEXT: s_lshr_b32 s4, s1, 8
775 ; GFX11-NEXT: s_lshr_b32 s5, s2, 8
776 ; GFX11-NEXT: s_and_b32 s4, s4, 0xff
777 ; GFX11-NEXT: s_and_b32 s6, s2, 7
778 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
779 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
780 ; GFX11-NEXT: s_lshr_b32 s3, s0, 8
781 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
782 ; GFX11-NEXT: s_lshl_b32 s0, s0, s6
783 ; GFX11-NEXT: s_and_b32 s6, s5, 7
784 ; GFX11-NEXT: s_and_not1_b32 s5, 7, s5
785 ; GFX11-NEXT: s_lshr_b32 s4, s4, 1
786 ; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
787 ; GFX11-NEXT: s_lshr_b32 s1, s1, 1
788 ; GFX11-NEXT: s_lshl_b32 s3, s3, s6
789 ; GFX11-NEXT: s_lshr_b32 s4, s4, s5
790 ; GFX11-NEXT: s_lshr_b32 s1, s1, s2
791 ; GFX11-NEXT: s_or_b32 s2, s3, s4
792 ; GFX11-NEXT: s_or_b32 s0, s0, s1
793 ; GFX11-NEXT: s_and_b32 s1, s2, 0xff
794 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff
795 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8
796 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
797 ; GFX11-NEXT: s_or_b32 s0, s0, s1
798 ; GFX11-NEXT: ; return to shader part epilog
799 %lhs = bitcast i16 %lhs.arg to <2 x i8>
800 %rhs = bitcast i16 %rhs.arg to <2 x i8>
801 %amt = bitcast i16 %amt.arg to <2 x i8>
802 %result = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
803 %cast.result = bitcast <2 x i8> %result to i16
807 define i16 @v_fshl_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) {
808 ; GFX6-LABEL: v_fshl_v2i8:
810 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
811 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2
812 ; GFX6-NEXT: v_and_b32_e32 v5, 7, v2
813 ; GFX6-NEXT: v_not_b32_e32 v2, v2
814 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
815 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
816 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v5, v0
817 ; GFX6-NEXT: v_bfe_u32 v5, v1, 1, 7
818 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v5
819 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
820 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v4
821 ; GFX6-NEXT: v_not_b32_e32 v4, v4
822 ; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8
823 ; GFX6-NEXT: v_and_b32_e32 v4, 7, v4
824 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
825 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3
826 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v4, v1
827 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
828 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
829 ; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0
830 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
831 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
832 ; GFX6-NEXT: s_setpc_b64 s[30:31]
834 ; GFX8-LABEL: v_fshl_v2i8:
836 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
837 ; GFX8-NEXT: v_and_b32_e32 v6, 7, v2
838 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
839 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2
840 ; GFX8-NEXT: v_not_b32_e32 v2, v2
841 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0
842 ; GFX8-NEXT: v_mov_b32_e32 v6, 1
843 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
844 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
845 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
846 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
847 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
848 ; GFX8-NEXT: v_and_b32_e32 v1, 7, v5
849 ; GFX8-NEXT: v_not_b32_e32 v2, v5
850 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
851 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v1, v3
852 ; GFX8-NEXT: v_lshrrev_b16_sdwa v3, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
853 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v3
854 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
855 ; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1
856 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
857 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
858 ; GFX8-NEXT: s_setpc_b64 s[30:31]
860 ; GFX9-LABEL: v_fshl_v2i8:
862 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2
864 ; GFX9-NEXT: v_and_b32_e32 v6, 7, v2
865 ; GFX9-NEXT: v_not_b32_e32 v2, v2
866 ; GFX9-NEXT: s_mov_b32 s4, 1
867 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
868 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
869 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
870 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
871 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v6, v0
872 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
873 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
874 ; GFX9-NEXT: v_and_b32_e32 v1, 7, v5
875 ; GFX9-NEXT: v_not_b32_e32 v2, v5
876 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
877 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, v1, v3
878 ; GFX9-NEXT: v_lshrrev_b16_sdwa v3, s4, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
879 ; GFX9-NEXT: v_lshrrev_b16_e32 v2, v2, v3
880 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2
881 ; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1
882 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
883 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
884 ; GFX9-NEXT: s_setpc_b64 s[30:31]
886 ; GFX10-LABEL: v_fshl_v2i8:
888 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2
890 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v1
891 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v0
892 ; GFX10-NEXT: v_not_b32_e32 v7, v2
893 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
894 ; GFX10-NEXT: v_not_b32_e32 v6, v3
895 ; GFX10-NEXT: v_and_b32_e32 v4, 0xff, v4
896 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
897 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
898 ; GFX10-NEXT: v_and_b32_e32 v7, 7, v7
899 ; GFX10-NEXT: v_and_b32_e32 v6, 7, v6
900 ; GFX10-NEXT: v_lshrrev_b16 v4, 1, v4
901 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
902 ; GFX10-NEXT: v_lshlrev_b16 v3, v3, v5
903 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0
904 ; GFX10-NEXT: s_movk_i32 s4, 0xff
905 ; GFX10-NEXT: v_lshrrev_b16 v4, v6, v4
906 ; GFX10-NEXT: v_lshrrev_b16 v1, v7, v1
907 ; GFX10-NEXT: v_or_b32_e32 v2, v3, v4
908 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
909 ; GFX10-NEXT: v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
910 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
911 ; GFX10-NEXT: s_setpc_b64 s[30:31]
913 ; GFX11-LABEL: v_fshl_v2i8:
915 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
916 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v2
917 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v1
918 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v0
919 ; GFX11-NEXT: v_not_b32_e32 v7, v2
920 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
921 ; GFX11-NEXT: v_not_b32_e32 v6, v3
922 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v4
923 ; GFX11-NEXT: v_and_b32_e32 v3, 7, v3
924 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
925 ; GFX11-NEXT: v_and_b32_e32 v7, 7, v7
926 ; GFX11-NEXT: v_and_b32_e32 v6, 7, v6
927 ; GFX11-NEXT: v_lshrrev_b16 v4, 1, v4
928 ; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1
929 ; GFX11-NEXT: v_lshlrev_b16 v3, v3, v5
930 ; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0
931 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
932 ; GFX11-NEXT: v_lshrrev_b16 v4, v6, v4
933 ; GFX11-NEXT: v_lshrrev_b16 v1, v7, v1
934 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
935 ; GFX11-NEXT: v_or_b32_e32 v2, v3, v4
936 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
937 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
938 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2
939 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
940 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
941 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
942 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
943 ; GFX11-NEXT: s_setpc_b64 s[30:31]
944 %lhs = bitcast i16 %lhs.arg to <2 x i8>
945 %rhs = bitcast i16 %rhs.arg to <2 x i8>
946 %amt = bitcast i16 %amt.arg to <2 x i8>
947 %result = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
948 %cast.result = bitcast <2 x i8> %result to i16
952 define amdgpu_ps i32 @s_fshl_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) {
953 ; GFX6-LABEL: s_fshl_v4i8:
955 ; GFX6-NEXT: s_and_b32 s9, s2, 7
956 ; GFX6-NEXT: s_lshr_b32 s3, s0, 8
957 ; GFX6-NEXT: s_lshr_b32 s4, s0, 16
958 ; GFX6-NEXT: s_lshr_b32 s5, s0, 24
959 ; GFX6-NEXT: s_lshr_b32 s6, s2, 8
960 ; GFX6-NEXT: s_lshr_b32 s7, s2, 16
961 ; GFX6-NEXT: s_lshr_b32 s8, s2, 24
962 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
963 ; GFX6-NEXT: s_lshl_b32 s0, s0, s9
964 ; GFX6-NEXT: s_bfe_u32 s9, s1, 0x70001
965 ; GFX6-NEXT: s_lshr_b32 s2, s9, s2
966 ; GFX6-NEXT: s_or_b32 s0, s0, s2
967 ; GFX6-NEXT: s_and_b32 s2, s6, 7
968 ; GFX6-NEXT: s_lshl_b32 s2, s3, s2
969 ; GFX6-NEXT: s_bfe_u32 s3, s1, 0x80008
970 ; GFX6-NEXT: s_andn2_b32 s6, 7, s6
971 ; GFX6-NEXT: s_lshr_b32 s3, s3, 1
972 ; GFX6-NEXT: s_lshr_b32 s3, s3, s6
973 ; GFX6-NEXT: s_or_b32 s2, s2, s3
974 ; GFX6-NEXT: s_and_b32 s3, s7, 7
975 ; GFX6-NEXT: s_lshl_b32 s3, s4, s3
976 ; GFX6-NEXT: s_bfe_u32 s4, s1, 0x80010
977 ; GFX6-NEXT: s_andn2_b32 s6, 7, s7
978 ; GFX6-NEXT: s_lshr_b32 s4, s4, 1
979 ; GFX6-NEXT: s_lshr_b32 s4, s4, s6
980 ; GFX6-NEXT: s_or_b32 s3, s3, s4
981 ; GFX6-NEXT: s_and_b32 s4, s8, 7
982 ; GFX6-NEXT: s_andn2_b32 s6, 7, s8
983 ; GFX6-NEXT: s_lshr_b32 s1, s1, 25
984 ; GFX6-NEXT: s_and_b32 s2, s2, 0xff
985 ; GFX6-NEXT: s_lshl_b32 s4, s5, s4
986 ; GFX6-NEXT: s_lshr_b32 s1, s1, s6
987 ; GFX6-NEXT: s_and_b32 s0, s0, 0xff
988 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8
989 ; GFX6-NEXT: s_or_b32 s1, s4, s1
990 ; GFX6-NEXT: s_or_b32 s0, s0, s2
991 ; GFX6-NEXT: s_and_b32 s2, s3, 0xff
992 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
993 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
994 ; GFX6-NEXT: s_or_b32 s0, s0, s2
995 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24
996 ; GFX6-NEXT: s_or_b32 s0, s0, s1
997 ; GFX6-NEXT: ; return to shader part epilog
999 ; GFX8-LABEL: s_fshl_v4i8:
1001 ; GFX8-NEXT: s_lshr_b32 s6, s1, 8
1002 ; GFX8-NEXT: s_lshr_b32 s7, s1, 16
1003 ; GFX8-NEXT: s_lshr_b32 s8, s1, 24
1004 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
1005 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1006 ; GFX8-NEXT: s_lshr_b32 s9, s2, 8
1007 ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1008 ; GFX8-NEXT: s_lshr_b32 s11, s2, 24
1009 ; GFX8-NEXT: s_and_b32 s12, s2, 7
1010 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
1011 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
1012 ; GFX8-NEXT: s_lshr_b32 s3, s0, 8
1013 ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1014 ; GFX8-NEXT: s_lshr_b32 s5, s0, 24
1015 ; GFX8-NEXT: s_lshl_b32 s0, s0, s12
1016 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
1017 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1018 ; GFX8-NEXT: s_and_b32 s1, s9, 7
1019 ; GFX8-NEXT: s_lshl_b32 s1, s3, s1
1020 ; GFX8-NEXT: s_and_b32 s3, s6, 0xff
1021 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1022 ; GFX8-NEXT: s_andn2_b32 s2, 7, s9
1023 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
1024 ; GFX8-NEXT: s_lshr_b32 s2, s3, s2
1025 ; GFX8-NEXT: s_or_b32 s1, s1, s2
1026 ; GFX8-NEXT: s_and_b32 s2, s10, 7
1027 ; GFX8-NEXT: s_lshl_b32 s2, s4, s2
1028 ; GFX8-NEXT: s_and_b32 s4, s7, 0xff
1029 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1030 ; GFX8-NEXT: s_andn2_b32 s3, 7, s10
1031 ; GFX8-NEXT: s_lshr_b32 s4, s4, 1
1032 ; GFX8-NEXT: s_lshr_b32 s3, s4, s3
1033 ; GFX8-NEXT: s_or_b32 s2, s2, s3
1034 ; GFX8-NEXT: s_and_b32 s3, s11, 7
1035 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
1036 ; GFX8-NEXT: s_andn2_b32 s4, 7, s11
1037 ; GFX8-NEXT: s_lshl_b32 s3, s5, s3
1038 ; GFX8-NEXT: s_lshr_b32 s5, s8, 1
1039 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
1040 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8
1041 ; GFX8-NEXT: s_lshr_b32 s4, s5, s4
1042 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1043 ; GFX8-NEXT: s_and_b32 s1, s2, 0xff
1044 ; GFX8-NEXT: s_or_b32 s3, s3, s4
1045 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1046 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1047 ; GFX8-NEXT: s_and_b32 s1, s3, 0xff
1048 ; GFX8-NEXT: s_lshl_b32 s1, s1, 24
1049 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1050 ; GFX8-NEXT: ; return to shader part epilog
1052 ; GFX9-LABEL: s_fshl_v4i8:
1054 ; GFX9-NEXT: s_lshr_b32 s6, s1, 8
1055 ; GFX9-NEXT: s_lshr_b32 s7, s1, 16
1056 ; GFX9-NEXT: s_lshr_b32 s8, s1, 24
1057 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
1058 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
1059 ; GFX9-NEXT: s_lshr_b32 s9, s2, 8
1060 ; GFX9-NEXT: s_lshr_b32 s10, s2, 16
1061 ; GFX9-NEXT: s_lshr_b32 s11, s2, 24
1062 ; GFX9-NEXT: s_and_b32 s12, s2, 7
1063 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
1064 ; GFX9-NEXT: s_lshr_b32 s1, s1, 1
1065 ; GFX9-NEXT: s_lshr_b32 s3, s0, 8
1066 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
1067 ; GFX9-NEXT: s_lshr_b32 s5, s0, 24
1068 ; GFX9-NEXT: s_lshl_b32 s0, s0, s12
1069 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
1070 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1071 ; GFX9-NEXT: s_and_b32 s1, s9, 7
1072 ; GFX9-NEXT: s_lshl_b32 s1, s3, s1
1073 ; GFX9-NEXT: s_and_b32 s3, s6, 0xff
1074 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
1075 ; GFX9-NEXT: s_andn2_b32 s2, 7, s9
1076 ; GFX9-NEXT: s_lshr_b32 s3, s3, 1
1077 ; GFX9-NEXT: s_lshr_b32 s2, s3, s2
1078 ; GFX9-NEXT: s_or_b32 s1, s1, s2
1079 ; GFX9-NEXT: s_and_b32 s2, s10, 7
1080 ; GFX9-NEXT: s_lshl_b32 s2, s4, s2
1081 ; GFX9-NEXT: s_and_b32 s4, s7, 0xff
1082 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4
1083 ; GFX9-NEXT: s_andn2_b32 s3, 7, s10
1084 ; GFX9-NEXT: s_lshr_b32 s4, s4, 1
1085 ; GFX9-NEXT: s_lshr_b32 s3, s4, s3
1086 ; GFX9-NEXT: s_or_b32 s2, s2, s3
1087 ; GFX9-NEXT: s_and_b32 s3, s11, 7
1088 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
1089 ; GFX9-NEXT: s_andn2_b32 s4, 7, s11
1090 ; GFX9-NEXT: s_lshl_b32 s3, s5, s3
1091 ; GFX9-NEXT: s_lshr_b32 s5, s8, 1
1092 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
1093 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8
1094 ; GFX9-NEXT: s_lshr_b32 s4, s5, s4
1095 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1096 ; GFX9-NEXT: s_and_b32 s1, s2, 0xff
1097 ; GFX9-NEXT: s_or_b32 s3, s3, s4
1098 ; GFX9-NEXT: s_lshl_b32 s1, s1, 16
1099 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1100 ; GFX9-NEXT: s_and_b32 s1, s3, 0xff
1101 ; GFX9-NEXT: s_lshl_b32 s1, s1, 24
1102 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1103 ; GFX9-NEXT: ; return to shader part epilog
1105 ; GFX10-LABEL: s_fshl_v4i8:
1107 ; GFX10-NEXT: s_lshr_b32 s6, s1, 8
1108 ; GFX10-NEXT: s_lshr_b32 s7, s1, 16
1109 ; GFX10-NEXT: s_lshr_b32 s8, s1, 24
1110 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
1111 ; GFX10-NEXT: s_lshr_b32 s9, s2, 8
1112 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
1113 ; GFX10-NEXT: s_lshr_b32 s10, s2, 16
1114 ; GFX10-NEXT: s_lshr_b32 s11, s2, 24
1115 ; GFX10-NEXT: s_and_b32 s12, s2, 7
1116 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
1117 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
1118 ; GFX10-NEXT: s_lshr_b32 s3, s0, 8
1119 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
1120 ; GFX10-NEXT: s_and_b32 s2, s6, 0xff
1121 ; GFX10-NEXT: s_and_b32 s6, s9, 7
1122 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
1123 ; GFX10-NEXT: s_andn2_b32 s9, 7, s9
1124 ; GFX10-NEXT: s_lshr_b32 s2, s2, 1
1125 ; GFX10-NEXT: s_lshr_b32 s4, s0, 16
1126 ; GFX10-NEXT: s_lshr_b32 s5, s0, 24
1127 ; GFX10-NEXT: s_lshl_b32 s0, s0, s12
1128 ; GFX10-NEXT: s_lshl_b32 s3, s3, s6
1129 ; GFX10-NEXT: s_lshr_b32 s2, s2, s9
1130 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1131 ; GFX10-NEXT: s_or_b32 s1, s3, s2
1132 ; GFX10-NEXT: s_and_b32 s2, s7, 0xff
1133 ; GFX10-NEXT: s_and_b32 s3, s10, 7
1134 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
1135 ; GFX10-NEXT: s_andn2_b32 s6, 7, s10
1136 ; GFX10-NEXT: s_lshr_b32 s2, s2, 1
1137 ; GFX10-NEXT: s_lshl_b32 s3, s4, s3
1138 ; GFX10-NEXT: s_lshr_b32 s2, s2, s6
1139 ; GFX10-NEXT: s_and_b32 s4, s11, 7
1140 ; GFX10-NEXT: s_andn2_b32 s6, 7, s11
1141 ; GFX10-NEXT: s_lshr_b32 s7, s8, 1
1142 ; GFX10-NEXT: s_lshl_b32 s4, s5, s4
1143 ; GFX10-NEXT: s_lshr_b32 s5, s7, s6
1144 ; GFX10-NEXT: s_or_b32 s2, s3, s2
1145 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
1146 ; GFX10-NEXT: s_or_b32 s3, s4, s5
1147 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff
1148 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8
1149 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff
1150 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1151 ; GFX10-NEXT: s_lshl_b32 s1, s2, 16
1152 ; GFX10-NEXT: s_and_b32 s2, s3, 0xff
1153 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1154 ; GFX10-NEXT: s_lshl_b32 s1, s2, 24
1155 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1156 ; GFX10-NEXT: ; return to shader part epilog
1158 ; GFX11-LABEL: s_fshl_v4i8:
1160 ; GFX11-NEXT: s_lshr_b32 s6, s1, 8
1161 ; GFX11-NEXT: s_lshr_b32 s7, s1, 16
1162 ; GFX11-NEXT: s_lshr_b32 s8, s1, 24
1163 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
1164 ; GFX11-NEXT: s_lshr_b32 s9, s2, 8
1165 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
1166 ; GFX11-NEXT: s_lshr_b32 s10, s2, 16
1167 ; GFX11-NEXT: s_lshr_b32 s11, s2, 24
1168 ; GFX11-NEXT: s_and_b32 s12, s2, 7
1169 ; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
1170 ; GFX11-NEXT: s_lshr_b32 s1, s1, 1
1171 ; GFX11-NEXT: s_lshr_b32 s3, s0, 8
1172 ; GFX11-NEXT: s_lshr_b32 s1, s1, s2
1173 ; GFX11-NEXT: s_and_b32 s2, s6, 0xff
1174 ; GFX11-NEXT: s_and_b32 s6, s9, 7
1175 ; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
1176 ; GFX11-NEXT: s_and_not1_b32 s9, 7, s9
1177 ; GFX11-NEXT: s_lshr_b32 s2, s2, 1
1178 ; GFX11-NEXT: s_lshr_b32 s4, s0, 16
1179 ; GFX11-NEXT: s_lshr_b32 s5, s0, 24
1180 ; GFX11-NEXT: s_lshl_b32 s0, s0, s12
1181 ; GFX11-NEXT: s_lshl_b32 s3, s3, s6
1182 ; GFX11-NEXT: s_lshr_b32 s2, s2, s9
1183 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1184 ; GFX11-NEXT: s_or_b32 s1, s3, s2
1185 ; GFX11-NEXT: s_and_b32 s2, s7, 0xff
1186 ; GFX11-NEXT: s_and_b32 s3, s10, 7
1187 ; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
1188 ; GFX11-NEXT: s_and_not1_b32 s6, 7, s10
1189 ; GFX11-NEXT: s_lshr_b32 s2, s2, 1
1190 ; GFX11-NEXT: s_lshl_b32 s3, s4, s3
1191 ; GFX11-NEXT: s_lshr_b32 s2, s2, s6
1192 ; GFX11-NEXT: s_and_b32 s4, s11, 7
1193 ; GFX11-NEXT: s_and_not1_b32 s6, 7, s11
1194 ; GFX11-NEXT: s_lshr_b32 s7, s8, 1
1195 ; GFX11-NEXT: s_lshl_b32 s4, s5, s4
1196 ; GFX11-NEXT: s_lshr_b32 s5, s7, s6
1197 ; GFX11-NEXT: s_or_b32 s2, s3, s2
1198 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
1199 ; GFX11-NEXT: s_or_b32 s3, s4, s5
1200 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff
1201 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8
1202 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff
1203 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1204 ; GFX11-NEXT: s_lshl_b32 s1, s2, 16
1205 ; GFX11-NEXT: s_and_b32 s2, s3, 0xff
1206 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1207 ; GFX11-NEXT: s_lshl_b32 s1, s2, 24
1208 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1209 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1210 ; GFX11-NEXT: ; return to shader part epilog
1211 %lhs = bitcast i32 %lhs.arg to <4 x i8>
1212 %rhs = bitcast i32 %rhs.arg to <4 x i8>
1213 %amt = bitcast i32 %amt.arg to <4 x i8>
1214 %result = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1215 %cast.result = bitcast <4 x i8> %result to i32
1216 ret i32 %cast.result
1219 define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) {
1220 ; GFX6-LABEL: v_fshl_v4i8:
1222 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1223 ; GFX6-NEXT: v_lshrrev_b32_e32 v6, 8, v2
1224 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v2
1225 ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 24, v2
1226 ; GFX6-NEXT: v_and_b32_e32 v9, 7, v2
1227 ; GFX6-NEXT: v_not_b32_e32 v2, v2
1228 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1229 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1230 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0
1231 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
1232 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
1233 ; GFX6-NEXT: v_bfe_u32 v9, v1, 1, 7
1234 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v9
1235 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1236 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v6
1237 ; GFX6-NEXT: v_not_b32_e32 v6, v6
1238 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3
1239 ; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8
1240 ; GFX6-NEXT: v_and_b32_e32 v6, 7, v6
1241 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3
1242 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
1243 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1244 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v7
1245 ; GFX6-NEXT: v_not_b32_e32 v6, v7
1246 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v3, v4
1247 ; GFX6-NEXT: v_bfe_u32 v4, v1, 16, 8
1248 ; GFX6-NEXT: v_and_b32_e32 v6, 7, v6
1249 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 1, v4
1250 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4
1251 ; GFX6-NEXT: v_not_b32_e32 v6, v8
1252 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
1253 ; GFX6-NEXT: v_and_b32_e32 v4, 7, v8
1254 ; GFX6-NEXT: v_and_b32_e32 v6, 7, v6
1255 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 25, v1
1256 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v2
1257 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, v4, v5
1258 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v6, v1
1259 ; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0
1260 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
1261 ; GFX6-NEXT: v_or_b32_e32 v1, v4, v1
1262 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1263 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v3
1264 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1265 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
1266 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1267 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
1268 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1269 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1271 ; GFX8-LABEL: v_fshl_v4i8:
1273 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1274 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2
1275 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1276 ; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2
1277 ; GFX8-NEXT: v_and_b32_e32 v8, 7, v2
1278 ; GFX8-NEXT: v_not_b32_e32 v2, v2
1279 ; GFX8-NEXT: v_mov_b32_e32 v10, 1
1280 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
1281 ; GFX8-NEXT: v_lshrrev_b16_sdwa v11, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1282 ; GFX8-NEXT: v_lshlrev_b16_e32 v8, v8, v0
1283 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v11
1284 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
1285 ; GFX8-NEXT: v_or_b32_e32 v2, v8, v2
1286 ; GFX8-NEXT: v_and_b32_e32 v8, 7, v5
1287 ; GFX8-NEXT: v_not_b32_e32 v5, v5
1288 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1289 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
1290 ; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1291 ; GFX8-NEXT: v_mov_b32_e32 v9, 0xff
1292 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, v8, v3
1293 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v5, v4
1294 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4
1295 ; GFX8-NEXT: v_and_b32_e32 v4, 7, v6
1296 ; GFX8-NEXT: v_not_b32_e32 v5, v6
1297 ; GFX8-NEXT: v_and_b32_sdwa v6, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1298 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
1299 ; GFX8-NEXT: v_lshrrev_b16_e32 v6, 1, v6
1300 ; GFX8-NEXT: v_lshlrev_b16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1301 ; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v6
1302 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v5
1303 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v7
1304 ; GFX8-NEXT: v_not_b32_e32 v6, v7
1305 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1306 ; GFX8-NEXT: v_mov_b32_e32 v5, 1
1307 ; GFX8-NEXT: v_and_b32_e32 v6, 7, v6
1308 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1309 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v6, v1
1310 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1311 ; GFX8-NEXT: v_mov_b32_e32 v1, 8
1312 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1313 ; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1314 ; GFX8-NEXT: v_and_b32_e32 v2, 0xff, v4
1315 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1316 ; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v0
1317 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
1318 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1319 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
1320 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1322 ; GFX9-LABEL: v_fshl_v4i8:
1324 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2
1326 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1327 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2
1328 ; GFX9-NEXT: v_and_b32_e32 v8, 7, v2
1329 ; GFX9-NEXT: v_not_b32_e32 v2, v2
1330 ; GFX9-NEXT: s_mov_b32 s5, 1
1331 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
1332 ; GFX9-NEXT: v_lshrrev_b16_sdwa v10, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1333 ; GFX9-NEXT: v_lshlrev_b16_e32 v8, v8, v0
1334 ; GFX9-NEXT: v_lshrrev_b16_e32 v2, v2, v10
1335 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
1336 ; GFX9-NEXT: v_or_b32_e32 v2, v8, v2
1337 ; GFX9-NEXT: v_and_b32_e32 v8, 7, v5
1338 ; GFX9-NEXT: v_not_b32_e32 v5, v5
1339 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1340 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
1341 ; GFX9-NEXT: v_lshrrev_b16_sdwa v4, s5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1342 ; GFX9-NEXT: v_mov_b32_e32 v9, 0xff
1343 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, v8, v3
1344 ; GFX9-NEXT: v_lshrrev_b16_e32 v4, v5, v4
1345 ; GFX9-NEXT: v_or_b32_e32 v3, v3, v4
1346 ; GFX9-NEXT: v_and_b32_e32 v4, 7, v6
1347 ; GFX9-NEXT: v_not_b32_e32 v5, v6
1348 ; GFX9-NEXT: v_and_b32_sdwa v6, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1349 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
1350 ; GFX9-NEXT: v_lshrrev_b16_e32 v6, 1, v6
1351 ; GFX9-NEXT: v_lshlrev_b16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1352 ; GFX9-NEXT: v_lshrrev_b16_e32 v5, v5, v6
1353 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v5
1354 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v7
1355 ; GFX9-NEXT: v_not_b32_e32 v6, v7
1356 ; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1357 ; GFX9-NEXT: v_mov_b32_e32 v5, 1
1358 ; GFX9-NEXT: v_and_b32_e32 v6, 7, v6
1359 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1360 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v6, v1
1361 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
1362 ; GFX9-NEXT: v_mov_b32_e32 v1, 8
1363 ; GFX9-NEXT: s_movk_i32 s4, 0xff
1364 ; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1365 ; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1
1366 ; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v4
1367 ; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0
1368 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1369 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1370 ; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0
1371 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1373 ; GFX10-LABEL: v_fshl_v4i8:
1375 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1376 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v2
1377 ; GFX10-NEXT: v_and_b32_e32 v10, 7, v2
1378 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1379 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1380 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0
1381 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1
1382 ; GFX10-NEXT: v_not_b32_e32 v9, v2
1383 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v2
1384 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v2
1385 ; GFX10-NEXT: v_lshlrev_b16 v0, v10, v0
1386 ; GFX10-NEXT: v_not_b32_e32 v10, v8
1387 ; GFX10-NEXT: v_and_b32_e32 v8, 7, v8
1388 ; GFX10-NEXT: v_mov_b32_e32 v13, 0xff
1389 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1
1390 ; GFX10-NEXT: v_and_b32_e32 v12, 0xff, v1
1391 ; GFX10-NEXT: v_and_b32_e32 v6, 0xff, v6
1392 ; GFX10-NEXT: v_lshlrev_b16 v3, v8, v3
1393 ; GFX10-NEXT: v_not_b32_e32 v8, v11
1394 ; GFX10-NEXT: v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1395 ; GFX10-NEXT: v_not_b32_e32 v13, v2
1396 ; GFX10-NEXT: v_and_b32_e32 v10, 7, v10
1397 ; GFX10-NEXT: v_lshrrev_b16 v6, 1, v6
1398 ; GFX10-NEXT: v_and_b32_e32 v11, 7, v11
1399 ; GFX10-NEXT: v_and_b32_e32 v8, 7, v8
1400 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
1401 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
1402 ; GFX10-NEXT: v_and_b32_e32 v13, 7, v13
1403 ; GFX10-NEXT: v_lshrrev_b16 v7, 1, v7
1404 ; GFX10-NEXT: v_and_b32_e32 v9, 7, v9
1405 ; GFX10-NEXT: v_lshrrev_b16 v12, 1, v12
1406 ; GFX10-NEXT: v_lshrrev_b16 v6, v10, v6
1407 ; GFX10-NEXT: v_lshlrev_b16 v4, v11, v4
1408 ; GFX10-NEXT: v_lshrrev_b16 v1, v8, v1
1409 ; GFX10-NEXT: v_lshlrev_b16 v2, v2, v5
1410 ; GFX10-NEXT: v_lshrrev_b16 v5, v13, v7
1411 ; GFX10-NEXT: v_lshrrev_b16 v7, v9, v12
1412 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v6
1413 ; GFX10-NEXT: v_mov_b32_e32 v6, 8
1414 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1
1415 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v5
1416 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v7
1417 ; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1418 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
1419 ; GFX10-NEXT: v_and_b32_e32 v2, 0xff, v2
1420 ; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v3
1421 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1422 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2
1423 ; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2
1424 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1426 ; GFX11-LABEL: v_fshl_v4i8:
1428 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1429 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 8, v1
1430 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2
1431 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1432 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v1
1433 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2
1434 ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
1435 ; GFX11-NEXT: v_not_b32_e32 v13, v9
1436 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2
1437 ; GFX11-NEXT: v_and_b32_e32 v9, 7, v9
1438 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 24, v1
1439 ; GFX11-NEXT: v_lshrrev_b16 v6, 1, v6
1440 ; GFX11-NEXT: v_and_b32_e32 v13, 7, v13
1441 ; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7
1442 ; GFX11-NEXT: v_lshlrev_b16 v3, v9, v3
1443 ; GFX11-NEXT: v_not_b32_e32 v9, v10
1444 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1445 ; GFX11-NEXT: v_lshrrev_b16 v6, v13, v6
1446 ; GFX11-NEXT: v_not_b32_e32 v13, v11
1447 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v0
1448 ; GFX11-NEXT: v_and_b32_e32 v12, 7, v2
1449 ; GFX11-NEXT: v_not_b32_e32 v2, v2
1450 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
1451 ; GFX11-NEXT: v_and_b32_e32 v10, 7, v10
1452 ; GFX11-NEXT: v_and_b32_e32 v9, 7, v9
1453 ; GFX11-NEXT: v_lshrrev_b16 v7, 1, v7
1454 ; GFX11-NEXT: v_and_b32_e32 v11, 7, v11
1455 ; GFX11-NEXT: v_and_b32_e32 v13, 7, v13
1456 ; GFX11-NEXT: v_lshrrev_b16 v8, 1, v8
1457 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
1458 ; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1
1459 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v6
1460 ; GFX11-NEXT: v_lshlrev_b16 v4, v10, v4
1461 ; GFX11-NEXT: v_lshrrev_b16 v6, v9, v7
1462 ; GFX11-NEXT: v_lshlrev_b16 v5, v11, v5
1463 ; GFX11-NEXT: v_lshrrev_b16 v7, v13, v8
1464 ; GFX11-NEXT: v_lshlrev_b16 v0, v12, v0
1465 ; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1
1466 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3
1467 ; GFX11-NEXT: v_or_b32_e32 v3, v4, v6
1468 ; GFX11-NEXT: v_or_b32_e32 v4, v5, v7
1469 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1470 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1471 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v2
1472 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1473 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3
1474 ; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4
1475 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1476 ; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v1
1477 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v2
1478 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1479 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v3
1480 ; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2
1481 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1482 %lhs = bitcast i32 %lhs.arg to <4 x i8>
1483 %rhs = bitcast i32 %rhs.arg to <4 x i8>
1484 %amt = bitcast i32 %amt.arg to <4 x i8>
1485 %result = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1486 %cast.result = bitcast <4 x i8> %result to i32
1487 ret i32 %cast.result
1490 define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) {
1491 ; GFX6-LABEL: s_fshl_i24:
1493 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1494 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
1495 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1496 ; GFX6-NEXT: s_and_b32 s2, s2, 0xffffff
1497 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x170001
1498 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1499 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
1500 ; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
1501 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
1502 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1503 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
1504 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
1505 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
1506 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
1507 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1508 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1509 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
1510 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1511 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1512 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0
1513 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1514 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1515 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
1516 ; GFX6-NEXT: v_lshr_b32_e32 v1, s1, v1
1517 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1518 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
1519 ; GFX6-NEXT: ; return to shader part epilog
1521 ; GFX8-LABEL: s_fshl_i24:
1523 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1524 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
1525 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1526 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff
1527 ; GFX8-NEXT: s_bfe_u32 s1, s1, 0x170001
1528 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1529 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
1530 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
1531 ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
1532 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
1533 ; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
1534 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
1535 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
1536 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
1537 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1538 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1539 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
1540 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1541 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1542 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
1543 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1544 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1545 ; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0
1546 ; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s1
1547 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1548 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1549 ; GFX8-NEXT: ; return to shader part epilog
1551 ; GFX9-LABEL: s_fshl_i24:
1553 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1554 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
1555 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1556 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffffff
1557 ; GFX9-NEXT: s_bfe_u32 s1, s1, 0x170001
1558 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1559 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
1560 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
1561 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
1562 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
1563 ; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
1564 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
1565 ; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
1566 ; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
1567 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1568 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1569 ; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
1570 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1571 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1572 ; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0
1573 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1574 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1575 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s1
1576 ; GFX9-NEXT: v_lshl_or_b32 v0, s0, v0, v1
1577 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1578 ; GFX9-NEXT: ; return to shader part epilog
1580 ; GFX10-LABEL: s_fshl_i24:
1582 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1583 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffffff
1584 ; GFX10-NEXT: s_bfe_u32 s1, s1, 0x170001
1585 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
1586 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1587 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
1588 ; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0
1589 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
1590 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
1591 ; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
1592 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
1593 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
1594 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1595 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1596 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1597 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1598 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1599 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1600 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0
1601 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1602 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1603 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s1
1604 ; GFX10-NEXT: v_lshl_or_b32 v0, s0, v0, v1
1605 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1606 ; GFX10-NEXT: ; return to shader part epilog
1608 ; GFX11-LABEL: s_fshl_i24:
1610 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1611 ; GFX11-NEXT: s_and_b32 s2, s2, 0xffffff
1612 ; GFX11-NEXT: s_bfe_u32 s1, s1, 0x170001
1613 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1614 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
1615 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
1616 ; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1617 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
1618 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1619 ; GFX11-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0
1620 ; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1
1621 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1622 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
1623 ; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0
1624 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1625 ; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
1626 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
1627 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1628 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1629 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1630 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1631 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1632 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1633 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1634 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1635 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1636 ; GFX11-NEXT: v_sub_nc_u32_e32 v1, 23, v0
1637 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1638 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1639 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1640 ; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s1
1641 ; GFX11-NEXT: v_lshl_or_b32 v0, s0, v0, v1
1642 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1643 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1644 ; GFX11-NEXT: ; return to shader part epilog
1645 %result = call i24 @llvm.fshl.i24(i24 %lhs, i24 %rhs, i24 %amt)
1649 define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
1650 ; GFX6-LABEL: v_fshl_i24:
1652 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1654 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3
1655 ; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffffe8
1656 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1657 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 23
1658 ; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1659 ; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
1660 ; GFX6-NEXT: v_mul_lo_u32 v4, v3, v4
1661 ; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
1662 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
1663 ; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
1664 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
1665 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
1666 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
1667 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1668 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1669 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
1670 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1671 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1672 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
1673 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1674 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
1675 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3
1676 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1677 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1678 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1680 ; GFX8-LABEL: v_fshl_i24:
1682 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1683 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1684 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3
1685 ; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffffe8
1686 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1687 ; GFX8-NEXT: v_bfe_u32 v1, v1, 1, 23
1688 ; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1689 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
1690 ; GFX8-NEXT: v_mul_lo_u32 v4, v3, v4
1691 ; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
1692 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
1693 ; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
1694 ; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24
1695 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
1696 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
1697 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1698 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1699 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
1700 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1701 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1702 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
1703 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1704 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v2, v0
1705 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3
1706 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1707 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1708 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1710 ; GFX9-LABEL: v_fshl_i24:
1712 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1714 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
1715 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffffe8
1716 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1717 ; GFX9-NEXT: v_bfe_u32 v1, v1, 1, 23
1718 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1719 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3
1720 ; GFX9-NEXT: v_mul_lo_u32 v4, v3, v4
1721 ; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4
1722 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4
1723 ; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
1724 ; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
1725 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
1726 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
1727 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1728 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1729 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
1730 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1731 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1732 ; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2
1733 ; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1734 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1735 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, v3, v1
1736 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v2, v1
1737 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1739 ; GFX10-LABEL: v_fshl_i24:
1741 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1742 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1743 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1744 ; GFX10-NEXT: v_bfe_u32 v1, v1, 1, 23
1745 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3
1746 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1747 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3
1748 ; GFX10-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3
1749 ; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4
1750 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4
1751 ; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
1752 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
1753 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
1754 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1755 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1756 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1757 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1758 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1759 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1760 ; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2
1761 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1762 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1763 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, v3, v1
1764 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, v2, v1
1765 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1767 ; GFX11-LABEL: v_fshl_i24:
1769 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1770 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1771 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1772 ; GFX11-NEXT: v_bfe_u32 v1, v1, 1, 23
1773 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1774 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3
1775 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
1776 ; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1777 ; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
1778 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1779 ; GFX11-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3
1780 ; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4
1781 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1782 ; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4
1783 ; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3
1784 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1785 ; GFX11-NEXT: v_mul_lo_u32 v3, v3, 24
1786 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
1787 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1788 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1789 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1790 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1791 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1792 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1793 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1794 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1795 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1796 ; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v2
1797 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1798 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1799 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1800 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, v3, v1
1801 ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v2, v1
1802 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1803 %result = call i24 @llvm.fshl.i24(i24 %lhs, i24 %rhs, i24 %amt)
1807 define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
1808 ; GFX6-LABEL: s_fshl_v2i24:
1810 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1811 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
1812 ; GFX6-NEXT: s_lshr_b32 s6, s0, 16
1813 ; GFX6-NEXT: s_lshr_b32 s7, s0, 24
1814 ; GFX6-NEXT: s_and_b32 s9, s0, 0xff
1815 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008
1816 ; GFX6-NEXT: s_lshl_b32 s0, s0, 8
1817 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1818 ; GFX6-NEXT: s_or_b32 s0, s9, s0
1819 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1820 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1821 ; GFX6-NEXT: s_lshr_b32 s8, s1, 8
1822 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
1823 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1824 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1825 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
1826 ; GFX6-NEXT: s_or_b32 s0, s0, s6
1827 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8
1828 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1829 ; GFX6-NEXT: s_or_b32 s1, s7, s1
1830 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1831 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
1832 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1833 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1834 ; GFX6-NEXT: s_or_b32 s1, s1, s6
1835 ; GFX6-NEXT: s_lshr_b32 s6, s2, 16
1836 ; GFX6-NEXT: s_lshr_b32 s7, s2, 24
1837 ; GFX6-NEXT: s_and_b32 s9, s2, 0xff
1838 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008
1839 ; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1
1840 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8
1841 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1842 ; GFX6-NEXT: s_or_b32 s2, s9, s2
1843 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1844 ; GFX6-NEXT: s_lshr_b32 s8, s3, 8
1845 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
1846 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1847 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff
1848 ; GFX6-NEXT: s_or_b32 s2, s2, s6
1849 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8
1850 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1851 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2
1852 ; GFX6-NEXT: s_or_b32 s3, s7, s3
1853 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1854 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
1855 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1856 ; GFX6-NEXT: s_or_b32 s3, s3, s6
1857 ; GFX6-NEXT: s_lshr_b32 s6, s4, 16
1858 ; GFX6-NEXT: s_lshr_b32 s7, s4, 24
1859 ; GFX6-NEXT: s_and_b32 s9, s4, 0xff
1860 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008
1861 ; GFX6-NEXT: s_lshl_b32 s4, s4, 8
1862 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1863 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1864 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1865 ; GFX6-NEXT: s_or_b32 s4, s9, s4
1866 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1867 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
1868 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
1869 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1870 ; GFX6-NEXT: s_or_b32 s4, s4, s6
1871 ; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0
1872 ; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1873 ; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
1874 ; GFX6-NEXT: s_lshr_b32 s8, s5, 8
1875 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
1876 ; GFX6-NEXT: s_and_b32 s5, s5, 0xff
1877 ; GFX6-NEXT: v_mul_lo_u32 v1, v2, v1
1878 ; GFX6-NEXT: s_lshl_b32 s5, s5, 8
1879 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
1880 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0
1881 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1882 ; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1
1883 ; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1884 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1885 ; GFX6-NEXT: s_or_b32 s5, s7, s5
1886 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1887 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0
1888 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
1889 ; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1890 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1891 ; GFX6-NEXT: s_or_b32 s5, s5, s6
1892 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1893 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
1894 ; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1
1895 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0
1896 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1897 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24
1898 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
1899 ; GFX6-NEXT: s_lshr_b32 s0, s2, 1
1900 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3
1901 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
1902 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1
1903 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1904 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1905 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1906 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1907 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1908 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1909 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1910 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1
1911 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1912 ; GFX6-NEXT: s_lshr_b32 s0, s3, 1
1913 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1914 ; GFX6-NEXT: v_lshl_b32_e32 v1, s1, v1
1915 ; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
1916 ; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8
1917 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1918 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
1919 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3
1920 ; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8
1921 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1922 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1923 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
1924 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
1925 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2
1926 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1927 ; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8
1928 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
1929 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
1930 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
1931 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
1932 ; GFX6-NEXT: v_readfirstlane_b32 s1, v1
1933 ; GFX6-NEXT: ; return to shader part epilog
1935 ; GFX8-LABEL: s_fshl_v2i24:
1937 ; GFX8-NEXT: s_lshr_b32 s6, s0, 8
1938 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff
1939 ; GFX8-NEXT: s_lshr_b32 s7, s0, 16
1940 ; GFX8-NEXT: s_lshr_b32 s8, s0, 24
1941 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
1942 ; GFX8-NEXT: s_lshl_b32 s6, s6, 8
1943 ; GFX8-NEXT: s_or_b32 s0, s0, s6
1944 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff
1945 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1946 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
1947 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
1948 ; GFX8-NEXT: s_lshr_b32 s9, s1, 8
1949 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
1950 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
1951 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
1952 ; GFX8-NEXT: s_or_b32 s0, s0, s6
1953 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8
1954 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff
1955 ; GFX8-NEXT: s_or_b32 s1, s8, s1
1956 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
1957 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1958 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
1959 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1960 ; GFX8-NEXT: s_or_b32 s1, s1, s6
1961 ; GFX8-NEXT: s_lshr_b32 s6, s2, 8
1962 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
1963 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff
1964 ; GFX8-NEXT: s_lshr_b32 s7, s2, 16
1965 ; GFX8-NEXT: s_lshr_b32 s8, s2, 24
1966 ; GFX8-NEXT: s_and_b32 s2, s2, 0xff
1967 ; GFX8-NEXT: s_lshl_b32 s6, s6, 8
1968 ; GFX8-NEXT: s_or_b32 s2, s2, s6
1969 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff
1970 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1971 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
1972 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
1973 ; GFX8-NEXT: s_lshr_b32 s9, s3, 8
1974 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1975 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
1976 ; GFX8-NEXT: s_and_b32 s3, s3, 0xff
1977 ; GFX8-NEXT: s_or_b32 s2, s2, s6
1978 ; GFX8-NEXT: s_lshl_b32 s3, s3, 8
1979 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff
1980 ; GFX8-NEXT: s_or_b32 s3, s8, s3
1981 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
1982 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1983 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
1984 ; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
1985 ; GFX8-NEXT: s_or_b32 s3, s3, s6
1986 ; GFX8-NEXT: s_lshr_b32 s6, s4, 8
1987 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff
1988 ; GFX8-NEXT: s_lshr_b32 s7, s4, 16
1989 ; GFX8-NEXT: s_lshr_b32 s8, s4, 24
1990 ; GFX8-NEXT: s_and_b32 s4, s4, 0xff
1991 ; GFX8-NEXT: s_lshl_b32 s6, s6, 8
1992 ; GFX8-NEXT: s_or_b32 s4, s4, s6
1993 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff
1994 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1995 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1996 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
1997 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2
1998 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1999 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
2000 ; GFX8-NEXT: s_or_b32 s4, s4, s6
2001 ; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0
2002 ; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
2003 ; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2
2004 ; GFX8-NEXT: s_lshr_b32 s9, s5, 8
2005 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
2006 ; GFX8-NEXT: s_and_b32 s5, s5, 0xff
2007 ; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1
2008 ; GFX8-NEXT: s_lshl_b32 s5, s5, 8
2009 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0
2010 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0
2011 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2012 ; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1
2013 ; GFX8-NEXT: s_and_b32 s6, s9, 0xff
2014 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2015 ; GFX8-NEXT: s_or_b32 s5, s8, s5
2016 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
2017 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0
2018 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
2019 ; GFX8-NEXT: s_lshl_b32 s6, s6, 16
2020 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2021 ; GFX8-NEXT: s_or_b32 s5, s5, s6
2022 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2023 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
2024 ; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1
2025 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0
2026 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2027 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24
2028 ; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0
2029 ; GFX8-NEXT: s_lshr_b32 s0, s2, 1
2030 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3
2031 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0
2032 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1
2033 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
2034 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
2035 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2036 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2037 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
2038 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2039 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2040 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1
2041 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2042 ; GFX8-NEXT: s_lshr_b32 s0, s3, 1
2043 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2044 ; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s1
2045 ; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s0
2046 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
2047 ; GFX8-NEXT: v_mov_b32_e32 v2, 8
2048 ; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2049 ; GFX8-NEXT: v_mov_b32_e32 v4, 16
2050 ; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2051 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2052 ; GFX8-NEXT: v_or_b32_e32 v0, v3, v0
2053 ; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v1
2054 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3
2055 ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2056 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
2057 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
2058 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2059 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1
2060 ; GFX8-NEXT: ; return to shader part epilog
2062 ; GFX9-LABEL: s_fshl_v2i24:
2064 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
2065 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
2066 ; GFX9-NEXT: s_lshr_b32 s7, s0, 8
2067 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff
2068 ; GFX9-NEXT: s_lshr_b32 s9, s0, 16
2069 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
2070 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
2071 ; GFX9-NEXT: s_lshr_b32 s10, s0, 24
2072 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
2073 ; GFX9-NEXT: s_lshl_b32 s7, s7, 8
2074 ; GFX9-NEXT: s_or_b32 s0, s0, s7
2075 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff
2076 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
2077 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
2078 ; GFX9-NEXT: s_lshr_b32 s11, s1, 8
2079 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
2080 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
2081 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
2082 ; GFX9-NEXT: v_mul_lo_u32 v2, v0, v1
2083 ; GFX9-NEXT: s_or_b32 s0, s0, s7
2084 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8
2085 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff
2086 ; GFX9-NEXT: s_or_b32 s1, s10, s1
2087 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
2088 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
2089 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
2090 ; GFX9-NEXT: s_or_b32 s1, s1, s7
2091 ; GFX9-NEXT: s_lshr_b32 s7, s2, 8
2092 ; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2
2093 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff
2094 ; GFX9-NEXT: s_lshr_b32 s9, s2, 16
2095 ; GFX9-NEXT: s_lshr_b32 s10, s2, 24
2096 ; GFX9-NEXT: s_and_b32 s2, s2, 0xff
2097 ; GFX9-NEXT: s_lshl_b32 s7, s7, 8
2098 ; GFX9-NEXT: s_or_b32 s2, s2, s7
2099 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff
2100 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
2101 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v2
2102 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
2103 ; GFX9-NEXT: s_lshr_b32 s11, s3, 8
2104 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
2105 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
2106 ; GFX9-NEXT: s_and_b32 s3, s3, 0xff
2107 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2
2108 ; GFX9-NEXT: s_or_b32 s2, s2, s7
2109 ; GFX9-NEXT: s_lshl_b32 s3, s3, 8
2110 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff
2111 ; GFX9-NEXT: s_or_b32 s3, s10, s3
2112 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
2113 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
2114 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
2115 ; GFX9-NEXT: s_or_b32 s3, s3, s7
2116 ; GFX9-NEXT: s_lshr_b32 s7, s4, 8
2117 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
2118 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff
2119 ; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
2120 ; GFX9-NEXT: s_lshr_b32 s9, s4, 16
2121 ; GFX9-NEXT: s_lshr_b32 s10, s4, 24
2122 ; GFX9-NEXT: s_and_b32 s4, s4, 0xff
2123 ; GFX9-NEXT: s_lshl_b32 s7, s7, 8
2124 ; GFX9-NEXT: s_or_b32 s4, s4, s7
2125 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff
2126 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
2127 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4
2128 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
2129 ; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1
2130 ; GFX9-NEXT: s_or_b32 s4, s4, s7
2131 ; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0
2132 ; GFX9-NEXT: s_lshr_b32 s11, s5, 8
2133 ; GFX9-NEXT: s_and_b32 s5, s5, 0xff
2134 ; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1
2135 ; GFX9-NEXT: s_lshl_b32 s5, s5, 8
2136 ; GFX9-NEXT: s_and_b32 s7, s11, 0xff
2137 ; GFX9-NEXT: s_or_b32 s5, s10, s5
2138 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
2139 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
2140 ; GFX9-NEXT: s_and_b32 s5, 0xffff, s5
2141 ; GFX9-NEXT: s_lshl_b32 s7, s7, 16
2142 ; GFX9-NEXT: s_or_b32 s5, s5, s7
2143 ; GFX9-NEXT: v_add_u32_e32 v1, v2, v1
2144 ; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1
2145 ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0
2146 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0
2147 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2148 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2149 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0
2150 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2151 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24
2152 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2153 ; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0
2154 ; GFX9-NEXT: s_lshr_b32 s2, s2, 1
2155 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2156 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2157 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2
2158 ; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1
2159 ; GFX9-NEXT: v_lshl_or_b32 v0, s0, v0, v2
2160 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
2161 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2162 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2163 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
2164 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2165 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2166 ; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1
2167 ; GFX9-NEXT: s_lshr_b32 s0, s3, 1
2168 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2169 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2170 ; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s0
2171 ; GFX9-NEXT: s_mov_b32 s6, 8
2172 ; GFX9-NEXT: v_lshl_or_b32 v1, s1, v1, v2
2173 ; GFX9-NEXT: s_mov_b32 s8, 16
2174 ; GFX9-NEXT: s_movk_i32 s0, 0xff
2175 ; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2176 ; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v1
2177 ; GFX9-NEXT: v_and_or_b32 v2, v0, s0, v2
2178 ; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2179 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3
2180 ; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3
2181 ; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8
2182 ; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8
2183 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2
2184 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2185 ; GFX9-NEXT: v_readfirstlane_b32 s1, v1
2186 ; GFX9-NEXT: ; return to shader part epilog
2188 ; GFX10-LABEL: s_fshl_v2i24:
2190 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
2191 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24
2192 ; GFX10-NEXT: s_lshr_b32 s6, s0, 8
2193 ; GFX10-NEXT: s_lshr_b32 s7, s0, 16
2194 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff
2195 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
2196 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1
2197 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24
2198 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff
2199 ; GFX10-NEXT: s_lshl_b32 s6, s6, 8
2200 ; GFX10-NEXT: s_and_b32 s7, s7, 0xff
2201 ; GFX10-NEXT: s_or_b32 s0, s0, s6
2202 ; GFX10-NEXT: s_and_b32 s6, 0xffff, s7
2203 ; GFX10-NEXT: s_lshr_b32 s7, s4, 8
2204 ; GFX10-NEXT: s_lshr_b32 s10, s4, 16
2205 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
2206 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
2207 ; GFX10-NEXT: s_and_b32 s7, s7, 0xff
2208 ; GFX10-NEXT: s_lshr_b32 s11, s4, 24
2209 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff
2210 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
2211 ; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
2212 ; GFX10-NEXT: s_lshl_b32 s7, s7, 8
2213 ; GFX10-NEXT: s_lshr_b32 s12, s5, 8
2214 ; GFX10-NEXT: s_or_b32 s4, s4, s7
2215 ; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0
2216 ; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1
2217 ; GFX10-NEXT: s_and_b32 s7, s10, 0xff
2218 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
2219 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7
2220 ; GFX10-NEXT: s_and_b32 s5, s5, 0xff
2221 ; GFX10-NEXT: s_lshl_b32 s7, s7, 16
2222 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8
2223 ; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2
2224 ; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3
2225 ; GFX10-NEXT: s_or_b32 s4, s4, s7
2226 ; GFX10-NEXT: s_and_b32 s7, s12, 0xff
2227 ; GFX10-NEXT: s_or_b32 s5, s11, s5
2228 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7
2229 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
2230 ; GFX10-NEXT: s_lshl_b32 s7, s7, 16
2231 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2
2232 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3
2233 ; GFX10-NEXT: s_or_b32 s5, s5, s7
2234 ; GFX10-NEXT: s_lshr_b32 s9, s1, 8
2235 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
2236 ; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0
2237 ; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1
2238 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8
2239 ; GFX10-NEXT: s_and_b32 s7, s9, 0xff
2240 ; GFX10-NEXT: s_or_b32 s1, s8, s1
2241 ; GFX10-NEXT: s_lshr_b32 s8, s2, 8
2242 ; GFX10-NEXT: s_lshr_b32 s9, s2, 16
2243 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff
2244 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
2245 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24
2246 ; GFX10-NEXT: s_lshr_b32 s10, s2, 24
2247 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff
2248 ; GFX10-NEXT: s_lshl_b32 s8, s8, 8
2249 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7
2250 ; GFX10-NEXT: s_or_b32 s2, s2, s8
2251 ; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
2252 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0
2253 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1
2254 ; GFX10-NEXT: s_lshr_b32 s4, s3, 8
2255 ; GFX10-NEXT: s_and_b32 s5, s9, 0xff
2256 ; GFX10-NEXT: s_and_b32 s3, s3, 0xff
2257 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
2258 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2259 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
2260 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
2261 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8
2262 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff
2263 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2264 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2265 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
2266 ; GFX10-NEXT: s_lshl_b32 s5, s5, 16
2267 ; GFX10-NEXT: s_or_b32 s3, s10, s3
2268 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
2269 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2270 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2271 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
2272 ; GFX10-NEXT: s_or_b32 s2, s2, s5
2273 ; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
2274 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
2275 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2276 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2277 ; GFX10-NEXT: s_lshl_b32 s4, s4, 16
2278 ; GFX10-NEXT: s_lshr_b32 s2, s2, 1
2279 ; GFX10-NEXT: s_or_b32 s3, s3, s4
2280 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v0
2281 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2282 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2283 ; GFX10-NEXT: s_lshl_b32 s6, s6, 16
2284 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
2285 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2286 ; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1
2287 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2288 ; GFX10-NEXT: s_lshl_b32 s7, s7, 16
2289 ; GFX10-NEXT: s_or_b32 s0, s0, s6
2290 ; GFX10-NEXT: v_lshrrev_b32_e64 v2, v2, s2
2291 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2292 ; GFX10-NEXT: s_lshr_b32 s2, s3, 1
2293 ; GFX10-NEXT: s_or_b32 s1, s1, s7
2294 ; GFX10-NEXT: v_lshl_or_b32 v0, s0, v0, v2
2295 ; GFX10-NEXT: v_lshrrev_b32_e64 v3, v3, s2
2296 ; GFX10-NEXT: s_mov_b32 s0, 8
2297 ; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2298 ; GFX10-NEXT: v_lshl_or_b32 v1, s1, v1, v3
2299 ; GFX10-NEXT: s_mov_b32 s0, 16
2300 ; GFX10-NEXT: v_and_or_b32 v2, v0, 0xff, v2
2301 ; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v1
2302 ; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2303 ; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8
2304 ; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8
2305 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3
2306 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4
2307 ; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3
2308 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
2309 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2310 ; GFX10-NEXT: ; return to shader part epilog
2312 ; GFX11-LABEL: s_fshl_v2i24:
2314 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
2315 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24
2316 ; GFX11-NEXT: s_lshr_b32 s6, s0, 8
2317 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16
2318 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff
2319 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
2320 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1
2321 ; GFX11-NEXT: s_lshr_b32 s8, s0, 24
2322 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff
2323 ; GFX11-NEXT: s_lshl_b32 s6, s6, 8
2324 ; GFX11-NEXT: s_lshr_b32 s10, s4, 24
2325 ; GFX11-NEXT: s_or_b32 s0, s0, s6
2326 ; GFX11-NEXT: s_and_b32 s6, s7, 0xff
2327 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
2328 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
2329 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
2330 ; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1
2331 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16
2332 ; GFX11-NEXT: s_lshr_b32 s7, s4, 16
2333 ; GFX11-NEXT: s_or_b32 s0, s0, s6
2334 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2335 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
2336 ; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
2337 ; GFX11-NEXT: s_lshr_b32 s6, s4, 8
2338 ; GFX11-NEXT: s_and_b32 s4, s4, 0xff
2339 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff
2340 ; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0
2341 ; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1
2342 ; GFX11-NEXT: s_lshl_b32 s6, s6, 8
2343 ; GFX11-NEXT: s_and_b32 s7, s7, 0xff
2344 ; GFX11-NEXT: s_or_b32 s4, s4, s6
2345 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s7
2346 ; GFX11-NEXT: s_lshr_b32 s11, s5, 8
2347 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
2348 ; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2
2349 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16
2350 ; GFX11-NEXT: s_and_b32 s5, s5, 0xff
2351 ; GFX11-NEXT: s_or_b32 s4, s4, s6
2352 ; GFX11-NEXT: s_lshl_b32 s5, s5, 8
2353 ; GFX11-NEXT: s_and_b32 s6, s11, 0xff
2354 ; GFX11-NEXT: s_or_b32 s5, s10, s5
2355 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
2356 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2
2357 ; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3
2358 ; GFX11-NEXT: s_and_b32 s5, 0xffff, s5
2359 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16
2360 ; GFX11-NEXT: s_lshr_b32 s9, s1, 8
2361 ; GFX11-NEXT: v_mul_hi_u32 v0, s4, v0
2362 ; GFX11-NEXT: s_or_b32 s5, s5, s6
2363 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
2364 ; GFX11-NEXT: s_and_b32 s7, s9, 0xff
2365 ; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2
2366 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8
2367 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s7
2368 ; GFX11-NEXT: s_lshr_b32 s7, s2, 8
2369 ; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
2370 ; GFX11-NEXT: v_mul_hi_u32 v1, s5, v1
2371 ; GFX11-NEXT: s_or_b32 s1, s8, s1
2372 ; GFX11-NEXT: s_lshr_b32 s8, s2, 16
2373 ; GFX11-NEXT: s_and_b32 s7, s7, 0xff
2374 ; GFX11-NEXT: s_lshr_b32 s9, s3, 8
2375 ; GFX11-NEXT: s_lshl_b32 s7, s7, 8
2376 ; GFX11-NEXT: s_and_b32 s3, s3, 0xff
2377 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, s4, v0
2378 ; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24
2379 ; GFX11-NEXT: s_lshr_b32 s4, s2, 24
2380 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff
2381 ; GFX11-NEXT: s_lshl_b32 s3, s3, 8
2382 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
2383 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2384 ; GFX11-NEXT: s_or_b32 s2, s2, s7
2385 ; GFX11-NEXT: s_or_b32 s3, s4, s3
2386 ; GFX11-NEXT: v_sub_nc_u32_e32 v1, s5, v1
2387 ; GFX11-NEXT: s_and_b32 s5, s8, 0xff
2388 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2389 ; GFX11-NEXT: s_and_b32 s5, 0xffff, s5
2390 ; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
2391 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
2392 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2393 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
2394 ; GFX11-NEXT: s_lshl_b32 s5, s5, 16
2395 ; GFX11-NEXT: s_and_b32 s4, s9, 0xff
2396 ; GFX11-NEXT: s_or_b32 s2, s2, s5
2397 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2398 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2399 ; GFX11-NEXT: s_lshr_b32 s2, s2, 1
2400 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
2401 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
2402 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
2403 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2404 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2405 ; GFX11-NEXT: s_lshl_b32 s6, s6, 16
2406 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2407 ; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0
2408 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v2 :: v_dual_and_b32 v0, 0xffffff, v0
2409 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3
2410 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
2411 ; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1
2412 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2413 ; GFX11-NEXT: v_lshrrev_b32_e64 v2, v2, s2
2414 ; GFX11-NEXT: s_and_b32 s2, 0xffff, s3
2415 ; GFX11-NEXT: s_lshl_b32 s3, s4, 16
2416 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2417 ; GFX11-NEXT: s_or_b32 s2, s2, s3
2418 ; GFX11-NEXT: v_lshl_or_b32 v0, s0, v0, v2
2419 ; GFX11-NEXT: s_lshr_b32 s0, s2, 1
2420 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
2421 ; GFX11-NEXT: v_lshrrev_b32_e64 v2, v3, s0
2422 ; GFX11-NEXT: s_or_b32 s0, s1, s6
2423 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2424 ; GFX11-NEXT: v_bfe_u32 v3, v0, 8, 8
2425 ; GFX11-NEXT: v_lshl_or_b32 v1, s0, v1, v2
2426 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
2427 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v3
2428 ; GFX11-NEXT: v_bfe_u32 v3, v0, 16, 8
2429 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v1
2430 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2431 ; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v2
2432 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v3
2433 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2434 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 24, v4
2435 ; GFX11-NEXT: v_bfe_u32 v4, v1, 8, 8
2436 ; GFX11-NEXT: v_bfe_u32 v1, v1, 16, 8
2437 ; GFX11-NEXT: v_or3_b32 v0, v0, v2, v3
2438 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2439 ; GFX11-NEXT: v_lshl_or_b32 v1, v1, 8, v4
2440 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2441 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
2442 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
2443 ; GFX11-NEXT: ; return to shader part epilog
2444 %lhs = bitcast i48 %lhs.arg to <2 x i24>
2445 %rhs = bitcast i48 %rhs.arg to <2 x i24>
2446 %amt = bitcast i48 %amt.arg to <2 x i24>
2447 %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2448 %cast.result = bitcast <2 x i24> %result to i48
2449 ret i48 %cast.result
2452 define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
2453 ; GFX6-LABEL: v_fshl_v2i24:
2455 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2456 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2457 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6
2458 ; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8
2459 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24
2460 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v9, v9
2461 ; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2462 ; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6
2463 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2464 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2465 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 23
2466 ; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7
2467 ; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8
2468 ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8
2469 ; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6
2470 ; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v9
2471 ; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8
2472 ; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24
2473 ; GFX6-NEXT: v_mul_lo_u32 v7, v8, v7
2474 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
2475 ; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4
2476 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2477 ; GFX6-NEXT: v_mul_hi_u32 v7, v8, v7
2478 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2479 ; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4
2480 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2481 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2482 ; GFX6-NEXT: v_add_i32_e32 v7, vcc, v8, v7
2483 ; GFX6-NEXT: v_mul_hi_u32 v7, v5, v7
2484 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4
2485 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2486 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0
2487 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v6
2488 ; GFX6-NEXT: v_mul_lo_u32 v6, v7, 24
2489 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2490 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
2491 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6
2492 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
2493 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2494 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2495 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
2496 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2497 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2498 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
2499 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2500 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
2501 ; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 23
2502 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v4
2503 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
2504 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
2505 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2507 ; GFX8-LABEL: v_fshl_v2i24:
2509 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2510 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2511 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6
2512 ; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8
2513 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24
2514 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v9, v9
2515 ; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2516 ; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6
2517 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2518 ; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2519 ; GFX8-NEXT: v_bfe_u32 v2, v2, 1, 23
2520 ; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7
2521 ; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8
2522 ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8
2523 ; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6
2524 ; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v9
2525 ; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8
2526 ; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24
2527 ; GFX8-NEXT: v_mul_lo_u32 v7, v8, v7
2528 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6
2529 ; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4
2530 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2531 ; GFX8-NEXT: v_mul_hi_u32 v7, v8, v7
2532 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2533 ; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4
2534 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2535 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2536 ; GFX8-NEXT: v_add_u32_e32 v7, vcc, v8, v7
2537 ; GFX8-NEXT: v_mul_hi_u32 v7, v5, v7
2538 ; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 23, v4
2539 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2540 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v4, v0
2541 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v6
2542 ; GFX8-NEXT: v_mul_lo_u32 v6, v7, 24
2543 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2544 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
2545 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6
2546 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
2547 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2548 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2549 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
2550 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2551 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2552 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2
2553 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2554 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, v2, v1
2555 ; GFX8-NEXT: v_bfe_u32 v2, v3, 1, 23
2556 ; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v4
2557 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v3, v2
2558 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
2559 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2561 ; GFX9-LABEL: v_fshl_v2i24:
2563 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2564 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2565 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6
2566 ; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8
2567 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24
2568 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9
2569 ; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2570 ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6
2571 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2572 ; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9
2573 ; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9
2574 ; GFX9-NEXT: v_mul_lo_u32 v8, v6, v7
2575 ; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2576 ; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23
2577 ; GFX9-NEXT: v_mul_lo_u32 v7, v9, v7
2578 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8
2579 ; GFX9-NEXT: v_bfe_u32 v3, v3, 1, 23
2580 ; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7
2581 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8
2582 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6
2583 ; GFX9-NEXT: v_add_u32_e32 v7, v9, v7
2584 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
2585 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6
2586 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
2587 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2588 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2589 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
2590 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2591 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2592 ; GFX9-NEXT: v_mul_hi_u32 v6, v5, v7
2593 ; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4
2594 ; GFX9-NEXT: v_and_b32_e32 v7, 0xffffff, v7
2595 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2596 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
2597 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v7, v2
2598 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2
2599 ; GFX9-NEXT: v_sub_u32_e32 v2, v5, v6
2600 ; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
2601 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2602 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2603 ; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
2604 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2605 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2606 ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2
2607 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2608 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2609 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, v4, v3
2610 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v2, v3
2611 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2613 ; GFX10-LABEL: v_fshl_v2i24:
2615 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2616 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2617 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24
2618 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2619 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2620 ; GFX10-NEXT: v_bfe_u32 v2, v2, 1, 23
2621 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6
2622 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7
2623 ; GFX10-NEXT: v_bfe_u32 v3, v3, 1, 23
2624 ; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2625 ; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
2626 ; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6
2627 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7
2628 ; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6
2629 ; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7
2630 ; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8
2631 ; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9
2632 ; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8
2633 ; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9
2634 ; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6
2635 ; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7
2636 ; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
2637 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24
2638 ; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6
2639 ; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7
2640 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2641 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2642 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2643 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2644 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2645 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2646 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2647 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2648 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2649 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2650 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2651 ; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4
2652 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2653 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2654 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffffff, v6
2655 ; GFX10-NEXT: v_sub_nc_u32_e32 v7, 23, v5
2656 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2657 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, v6, v2
2658 ; GFX10-NEXT: v_and_b32_e32 v7, 0xffffff, v7
2659 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, v4, v2
2660 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, v7, v3
2661 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, v5, v3
2662 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2664 ; GFX11-LABEL: v_fshl_v2i24:
2666 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2667 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2668 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24
2669 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2670 ; GFX11-NEXT: v_bfe_u32 v2, v2, 1, 23
2671 ; GFX11-NEXT: v_bfe_u32 v3, v3, 1, 23
2672 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6
2673 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v7, v7
2674 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
2675 ; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_mul_f32 v7, 0x4f7ffffe, v7
2676 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
2677 ; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6
2678 ; GFX11-NEXT: v_cvt_u32_f32_e32 v7, v7
2679 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2680 ; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6
2681 ; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7
2682 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2683 ; GFX11-NEXT: v_mul_hi_u32 v8, v6, v8
2684 ; GFX11-NEXT: v_mul_hi_u32 v9, v7, v9
2685 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2686 ; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v8
2687 ; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v9
2688 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2689 ; GFX11-NEXT: v_mul_hi_u32 v7, v5, v7
2690 ; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24
2691 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2692 ; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v7
2693 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2694 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2695 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2696 ; GFX11-NEXT: v_mul_hi_u32 v6, v4, v6
2697 ; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24
2698 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2699 ; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v6
2700 ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2701 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2702 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2703 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2704 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2705 ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2706 ; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2707 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2708 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2709 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2710 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2711 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2712 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2713 ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4
2714 ; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
2715 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2716 ; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6
2717 ; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5
2718 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2719 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2720 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2
2721 ; GFX11-NEXT: v_and_b32_e32 v7, 0xffffff, v7
2722 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2723 ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v4, v2
2724 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3
2725 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2726 ; GFX11-NEXT: v_lshl_or_b32 v1, v1, v5, v3
2727 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2728 %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2729 ret <2 x i24> %result
2732 define amdgpu_ps i32 @s_fshl_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2733 ; GFX6-LABEL: s_fshl_i32:
2735 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2736 ; GFX6-NEXT: s_not_b32 s1, s2
2737 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 1
2738 ; GFX6-NEXT: s_lshr_b32 s0, s0, 1
2739 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2740 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2741 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2742 ; GFX6-NEXT: ; return to shader part epilog
2744 ; GFX8-LABEL: s_fshl_i32:
2746 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2747 ; GFX8-NEXT: s_not_b32 s1, s2
2748 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 1
2749 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
2750 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2751 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2752 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2753 ; GFX8-NEXT: ; return to shader part epilog
2755 ; GFX9-LABEL: s_fshl_i32:
2757 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2758 ; GFX9-NEXT: s_not_b32 s1, s2
2759 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 1
2760 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
2761 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2762 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
2763 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2764 ; GFX9-NEXT: ; return to shader part epilog
2766 ; GFX10-LABEL: s_fshl_i32:
2768 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 1
2769 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
2770 ; GFX10-NEXT: s_not_b32 s1, s2
2771 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1
2772 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2773 ; GFX10-NEXT: ; return to shader part epilog
2775 ; GFX11-LABEL: s_fshl_i32:
2777 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 1
2778 ; GFX11-NEXT: s_lshr_b32 s0, s0, 1
2779 ; GFX11-NEXT: s_not_b32 s1, s2
2780 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2781 ; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, s1
2782 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2783 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2784 ; GFX11-NEXT: ; return to shader part epilog
2785 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2789 define amdgpu_ps i32 @s_fshl_i32_5(i32 inreg %lhs, i32 inreg %rhs) {
2790 ; GFX6-LABEL: s_fshl_i32_5:
2792 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2793 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 27
2794 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2795 ; GFX6-NEXT: ; return to shader part epilog
2797 ; GFX8-LABEL: s_fshl_i32_5:
2799 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2800 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 27
2801 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2802 ; GFX8-NEXT: ; return to shader part epilog
2804 ; GFX9-LABEL: s_fshl_i32_5:
2806 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2807 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 27
2808 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2809 ; GFX9-NEXT: ; return to shader part epilog
2811 ; GFX10-LABEL: s_fshl_i32_5:
2813 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 27
2814 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2815 ; GFX10-NEXT: ; return to shader part epilog
2817 ; GFX11-LABEL: s_fshl_i32_5:
2819 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 27
2820 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2821 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2822 ; GFX11-NEXT: ; return to shader part epilog
2823 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5)
2827 define amdgpu_ps i32 @s_fshl_i32_8(i32 inreg %lhs, i32 inreg %rhs) {
2828 ; GFX6-LABEL: s_fshl_i32_8:
2830 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2831 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 24
2832 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2833 ; GFX6-NEXT: ; return to shader part epilog
2835 ; GFX8-LABEL: s_fshl_i32_8:
2837 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2838 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 24
2839 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2840 ; GFX8-NEXT: ; return to shader part epilog
2842 ; GFX9-LABEL: s_fshl_i32_8:
2844 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2845 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 24
2846 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2847 ; GFX9-NEXT: ; return to shader part epilog
2849 ; GFX10-LABEL: s_fshl_i32_8:
2851 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 24
2852 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2853 ; GFX10-NEXT: ; return to shader part epilog
2855 ; GFX11-LABEL: s_fshl_i32_8:
2857 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 24
2858 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2859 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2860 ; GFX11-NEXT: ; return to shader part epilog
2861 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8)
2865 define i32 @v_fshl_i32(i32 %lhs, i32 %rhs, i32 %amt) {
2866 ; GCN-LABEL: v_fshl_i32:
2868 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2869 ; GCN-NEXT: v_alignbit_b32 v1, v0, v1, 1
2870 ; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2871 ; GCN-NEXT: v_not_b32_e32 v2, v2
2872 ; GCN-NEXT: v_alignbit_b32 v0, v0, v1, v2
2873 ; GCN-NEXT: s_setpc_b64 s[30:31]
2875 ; GFX11-LABEL: v_fshl_i32:
2877 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2878 ; GFX11-NEXT: v_alignbit_b32 v1, v0, v1, 1
2879 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
2880 ; GFX11-NEXT: v_not_b32_e32 v2, v2
2881 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2882 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2
2883 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2884 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2888 define i32 @v_fshl_i32_5(i32 %lhs, i32 %rhs) {
2889 ; GCN-LABEL: v_fshl_i32_5:
2891 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2892 ; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 27
2893 ; GCN-NEXT: s_setpc_b64 s[30:31]
2895 ; GFX11-LABEL: v_fshl_i32_5:
2897 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2898 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 27
2899 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2900 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5)
2904 define i32 @v_fshl_i32_8(i32 %lhs, i32 %rhs) {
2905 ; GCN-LABEL: v_fshl_i32_8:
2907 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2908 ; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 24
2909 ; GCN-NEXT: s_setpc_b64 s[30:31]
2911 ; GFX11-LABEL: v_fshl_i32_8:
2913 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2914 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 24
2915 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2916 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8)
2920 define amdgpu_ps float @v_fshl_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) {
2921 ; GFX6-LABEL: v_fshl_i32_ssv:
2923 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2924 ; GFX6-NEXT: v_alignbit_b32 v1, s0, v1, 1
2925 ; GFX6-NEXT: s_lshr_b32 s0, s0, 1
2926 ; GFX6-NEXT: v_not_b32_e32 v0, v0
2927 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0
2928 ; GFX6-NEXT: ; return to shader part epilog
2930 ; GFX8-LABEL: v_fshl_i32_ssv:
2932 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2933 ; GFX8-NEXT: v_alignbit_b32 v1, s0, v1, 1
2934 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
2935 ; GFX8-NEXT: v_not_b32_e32 v0, v0
2936 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0
2937 ; GFX8-NEXT: ; return to shader part epilog
2939 ; GFX9-LABEL: v_fshl_i32_ssv:
2941 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2942 ; GFX9-NEXT: v_alignbit_b32 v1, s0, v1, 1
2943 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
2944 ; GFX9-NEXT: v_not_b32_e32 v0, v0
2945 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0
2946 ; GFX9-NEXT: ; return to shader part epilog
2948 ; GFX10-LABEL: v_fshl_i32_ssv:
2950 ; GFX10-NEXT: v_alignbit_b32 v1, s0, s1, 1
2951 ; GFX10-NEXT: v_not_b32_e32 v0, v0
2952 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
2953 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v1, v0
2954 ; GFX10-NEXT: ; return to shader part epilog
2956 ; GFX11-LABEL: v_fshl_i32_ssv:
2958 ; GFX11-NEXT: v_alignbit_b32 v1, s0, s1, 1
2959 ; GFX11-NEXT: v_not_b32_e32 v0, v0
2960 ; GFX11-NEXT: s_lshr_b32 s0, s0, 1
2961 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2962 ; GFX11-NEXT: v_alignbit_b32 v0, s0, v1, v0
2963 ; GFX11-NEXT: ; return to shader part epilog
2964 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
2965 %cast.result = bitcast i32 %result to float
2966 ret float %cast.result
2969 define amdgpu_ps float @v_fshl_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) {
2970 ; GFX6-LABEL: v_fshl_i32_svs:
2972 ; GFX6-NEXT: s_not_b32 s1, s1
2973 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 1
2974 ; GFX6-NEXT: s_lshr_b32 s0, s0, 1
2975 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2976 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2977 ; GFX6-NEXT: ; return to shader part epilog
2979 ; GFX8-LABEL: v_fshl_i32_svs:
2981 ; GFX8-NEXT: s_not_b32 s1, s1
2982 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 1
2983 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
2984 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2985 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2986 ; GFX8-NEXT: ; return to shader part epilog
2988 ; GFX9-LABEL: v_fshl_i32_svs:
2990 ; GFX9-NEXT: s_not_b32 s1, s1
2991 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 1
2992 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
2993 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2994 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
2995 ; GFX9-NEXT: ; return to shader part epilog
2997 ; GFX10-LABEL: v_fshl_i32_svs:
2999 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, 1
3000 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
3001 ; GFX10-NEXT: s_not_b32 s1, s1
3002 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1
3003 ; GFX10-NEXT: ; return to shader part epilog
3005 ; GFX11-LABEL: v_fshl_i32_svs:
3007 ; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, 1
3008 ; GFX11-NEXT: s_lshr_b32 s0, s0, 1
3009 ; GFX11-NEXT: s_not_b32 s1, s1
3010 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3011 ; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, s1
3012 ; GFX11-NEXT: ; return to shader part epilog
3013 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
3014 %cast.result = bitcast i32 %result to float
3015 ret float %cast.result
3018 define amdgpu_ps float @v_fshl_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
3019 ; GFX6-LABEL: v_fshl_i32_vss:
3021 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
3022 ; GFX6-NEXT: s_not_b32 s1, s2
3023 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 1
3024 ; GFX6-NEXT: s_lshr_b32 s0, s0, 1
3025 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
3026 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
3027 ; GFX6-NEXT: ; return to shader part epilog
3029 ; GFX8-LABEL: v_fshl_i32_vss:
3031 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
3032 ; GFX8-NEXT: s_not_b32 s1, s2
3033 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 1
3034 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
3035 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
3036 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
3037 ; GFX8-NEXT: ; return to shader part epilog
3039 ; GFX9-LABEL: v_fshl_i32_vss:
3041 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
3042 ; GFX9-NEXT: s_not_b32 s1, s2
3043 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 1
3044 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
3045 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
3046 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
3047 ; GFX9-NEXT: ; return to shader part epilog
3049 ; GFX10-LABEL: v_fshl_i32_vss:
3051 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 1
3052 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
3053 ; GFX10-NEXT: s_not_b32 s1, s2
3054 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1
3055 ; GFX10-NEXT: ; return to shader part epilog
3057 ; GFX11-LABEL: v_fshl_i32_vss:
3059 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 1
3060 ; GFX11-NEXT: s_lshr_b32 s0, s0, 1
3061 ; GFX11-NEXT: s_not_b32 s1, s2
3062 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3063 ; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, s1
3064 ; GFX11-NEXT: ; return to shader part epilog
3065 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt)
3066 %cast.result = bitcast i32 %result to float
3067 ret float %cast.result
3070 define <2 x i32> @v_fshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) {
3071 ; GFX6-LABEL: v_fshl_v2i32:
3073 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3074 ; GFX6-NEXT: v_alignbit_b32 v2, v0, v2, 1
3075 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3076 ; GFX6-NEXT: v_not_b32_e32 v4, v4
3077 ; GFX6-NEXT: v_alignbit_b32 v0, v0, v2, v4
3078 ; GFX6-NEXT: v_alignbit_b32 v2, v1, v3, 1
3079 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3080 ; GFX6-NEXT: v_not_b32_e32 v3, v5
3081 ; GFX6-NEXT: v_alignbit_b32 v1, v1, v2, v3
3082 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3084 ; GFX8-LABEL: v_fshl_v2i32:
3086 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3087 ; GFX8-NEXT: v_alignbit_b32 v2, v0, v2, 1
3088 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3089 ; GFX8-NEXT: v_not_b32_e32 v4, v4
3090 ; GFX8-NEXT: v_alignbit_b32 v0, v0, v2, v4
3091 ; GFX8-NEXT: v_alignbit_b32 v2, v1, v3, 1
3092 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3093 ; GFX8-NEXT: v_not_b32_e32 v3, v5
3094 ; GFX8-NEXT: v_alignbit_b32 v1, v1, v2, v3
3095 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3097 ; GFX9-LABEL: v_fshl_v2i32:
3099 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3100 ; GFX9-NEXT: v_alignbit_b32 v2, v0, v2, 1
3101 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3102 ; GFX9-NEXT: v_not_b32_e32 v4, v4
3103 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4
3104 ; GFX9-NEXT: v_alignbit_b32 v2, v1, v3, 1
3105 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3106 ; GFX9-NEXT: v_not_b32_e32 v3, v5
3107 ; GFX9-NEXT: v_alignbit_b32 v1, v1, v2, v3
3108 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3110 ; GFX10-LABEL: v_fshl_v2i32:
3112 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3113 ; GFX10-NEXT: v_alignbit_b32 v2, v0, v2, 1
3114 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3115 ; GFX10-NEXT: v_not_b32_e32 v4, v4
3116 ; GFX10-NEXT: v_alignbit_b32 v3, v1, v3, 1
3117 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3118 ; GFX10-NEXT: v_not_b32_e32 v5, v5
3119 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4
3120 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5
3121 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3123 ; GFX11-LABEL: v_fshl_v2i32:
3125 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3126 ; GFX11-NEXT: v_alignbit_b32 v2, v0, v2, 1
3127 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3128 ; GFX11-NEXT: v_not_b32_e32 v4, v4
3129 ; GFX11-NEXT: v_alignbit_b32 v3, v1, v3, 1
3130 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3131 ; GFX11-NEXT: v_not_b32_e32 v5, v5
3132 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
3133 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4
3134 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5
3135 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3136 %result = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt)
3137 ret <2 x i32> %result
3140 define <3 x i32> @v_fshl_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) {
3141 ; GFX6-LABEL: v_fshl_v3i32:
3143 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3144 ; GFX6-NEXT: v_alignbit_b32 v3, v0, v3, 1
3145 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3146 ; GFX6-NEXT: v_not_b32_e32 v6, v6
3147 ; GFX6-NEXT: v_alignbit_b32 v0, v0, v3, v6
3148 ; GFX6-NEXT: v_alignbit_b32 v3, v1, v4, 1
3149 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3150 ; GFX6-NEXT: v_not_b32_e32 v4, v7
3151 ; GFX6-NEXT: v_alignbit_b32 v1, v1, v3, v4
3152 ; GFX6-NEXT: v_alignbit_b32 v3, v2, v5, 1
3153 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2
3154 ; GFX6-NEXT: v_not_b32_e32 v4, v8
3155 ; GFX6-NEXT: v_alignbit_b32 v2, v2, v3, v4
3156 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3158 ; GFX8-LABEL: v_fshl_v3i32:
3160 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3161 ; GFX8-NEXT: v_alignbit_b32 v3, v0, v3, 1
3162 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3163 ; GFX8-NEXT: v_not_b32_e32 v6, v6
3164 ; GFX8-NEXT: v_alignbit_b32 v0, v0, v3, v6
3165 ; GFX8-NEXT: v_alignbit_b32 v3, v1, v4, 1
3166 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3167 ; GFX8-NEXT: v_not_b32_e32 v4, v7
3168 ; GFX8-NEXT: v_alignbit_b32 v1, v1, v3, v4
3169 ; GFX8-NEXT: v_alignbit_b32 v3, v2, v5, 1
3170 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v2
3171 ; GFX8-NEXT: v_not_b32_e32 v4, v8
3172 ; GFX8-NEXT: v_alignbit_b32 v2, v2, v3, v4
3173 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3175 ; GFX9-LABEL: v_fshl_v3i32:
3177 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3178 ; GFX9-NEXT: v_alignbit_b32 v3, v0, v3, 1
3179 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3180 ; GFX9-NEXT: v_not_b32_e32 v6, v6
3181 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v3, v6
3182 ; GFX9-NEXT: v_alignbit_b32 v3, v1, v4, 1
3183 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3184 ; GFX9-NEXT: v_not_b32_e32 v4, v7
3185 ; GFX9-NEXT: v_alignbit_b32 v1, v1, v3, v4
3186 ; GFX9-NEXT: v_alignbit_b32 v3, v2, v5, 1
3187 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2
3188 ; GFX9-NEXT: v_not_b32_e32 v4, v8
3189 ; GFX9-NEXT: v_alignbit_b32 v2, v2, v3, v4
3190 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3192 ; GFX10-LABEL: v_fshl_v3i32:
3194 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3195 ; GFX10-NEXT: v_alignbit_b32 v3, v0, v3, 1
3196 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3197 ; GFX10-NEXT: v_not_b32_e32 v6, v6
3198 ; GFX10-NEXT: v_alignbit_b32 v4, v1, v4, 1
3199 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3200 ; GFX10-NEXT: v_not_b32_e32 v7, v7
3201 ; GFX10-NEXT: v_alignbit_b32 v5, v2, v5, 1
3202 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v2
3203 ; GFX10-NEXT: v_not_b32_e32 v8, v8
3204 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6
3205 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7
3206 ; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8
3207 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3209 ; GFX11-LABEL: v_fshl_v3i32:
3211 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3212 ; GFX11-NEXT: v_alignbit_b32 v3, v0, v3, 1
3213 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3214 ; GFX11-NEXT: v_not_b32_e32 v6, v6
3215 ; GFX11-NEXT: v_alignbit_b32 v4, v1, v4, 1
3216 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3217 ; GFX11-NEXT: v_not_b32_e32 v7, v7
3218 ; GFX11-NEXT: v_alignbit_b32 v5, v2, v5, 1
3219 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v2
3220 ; GFX11-NEXT: v_not_b32_e32 v8, v8
3221 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6
3222 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7
3223 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
3224 ; GFX11-NEXT: v_alignbit_b32 v2, v2, v5, v8
3225 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3226 %result = call <3 x i32> @llvm.fshl.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt)
3227 ret <3 x i32> %result
3230 define <4 x i32> @v_fshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) {
3231 ; GFX6-LABEL: v_fshl_v4i32:
3233 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3234 ; GFX6-NEXT: v_alignbit_b32 v4, v0, v4, 1
3235 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3236 ; GFX6-NEXT: v_not_b32_e32 v8, v8
3237 ; GFX6-NEXT: v_alignbit_b32 v0, v0, v4, v8
3238 ; GFX6-NEXT: v_alignbit_b32 v4, v1, v5, 1
3239 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3240 ; GFX6-NEXT: v_not_b32_e32 v5, v9
3241 ; GFX6-NEXT: v_alignbit_b32 v1, v1, v4, v5
3242 ; GFX6-NEXT: v_alignbit_b32 v4, v2, v6, 1
3243 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2
3244 ; GFX6-NEXT: v_not_b32_e32 v5, v10
3245 ; GFX6-NEXT: v_alignbit_b32 v2, v2, v4, v5
3246 ; GFX6-NEXT: v_alignbit_b32 v4, v3, v7, 1
3247 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3
3248 ; GFX6-NEXT: v_not_b32_e32 v5, v11
3249 ; GFX6-NEXT: v_alignbit_b32 v3, v3, v4, v5
3250 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3252 ; GFX8-LABEL: v_fshl_v4i32:
3254 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3255 ; GFX8-NEXT: v_alignbit_b32 v4, v0, v4, 1
3256 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3257 ; GFX8-NEXT: v_not_b32_e32 v8, v8
3258 ; GFX8-NEXT: v_alignbit_b32 v0, v0, v4, v8
3259 ; GFX8-NEXT: v_alignbit_b32 v4, v1, v5, 1
3260 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3261 ; GFX8-NEXT: v_not_b32_e32 v5, v9
3262 ; GFX8-NEXT: v_alignbit_b32 v1, v1, v4, v5
3263 ; GFX8-NEXT: v_alignbit_b32 v4, v2, v6, 1
3264 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v2
3265 ; GFX8-NEXT: v_not_b32_e32 v5, v10
3266 ; GFX8-NEXT: v_alignbit_b32 v2, v2, v4, v5
3267 ; GFX8-NEXT: v_alignbit_b32 v4, v3, v7, 1
3268 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3
3269 ; GFX8-NEXT: v_not_b32_e32 v5, v11
3270 ; GFX8-NEXT: v_alignbit_b32 v3, v3, v4, v5
3271 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3273 ; GFX9-LABEL: v_fshl_v4i32:
3275 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3276 ; GFX9-NEXT: v_alignbit_b32 v4, v0, v4, 1
3277 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3278 ; GFX9-NEXT: v_not_b32_e32 v8, v8
3279 ; GFX9-NEXT: v_alignbit_b32 v0, v0, v4, v8
3280 ; GFX9-NEXT: v_alignbit_b32 v4, v1, v5, 1
3281 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3282 ; GFX9-NEXT: v_not_b32_e32 v5, v9
3283 ; GFX9-NEXT: v_alignbit_b32 v1, v1, v4, v5
3284 ; GFX9-NEXT: v_alignbit_b32 v4, v2, v6, 1
3285 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2
3286 ; GFX9-NEXT: v_not_b32_e32 v5, v10
3287 ; GFX9-NEXT: v_alignbit_b32 v2, v2, v4, v5
3288 ; GFX9-NEXT: v_alignbit_b32 v4, v3, v7, 1
3289 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3
3290 ; GFX9-NEXT: v_not_b32_e32 v5, v11
3291 ; GFX9-NEXT: v_alignbit_b32 v3, v3, v4, v5
3292 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3294 ; GFX10-LABEL: v_fshl_v4i32:
3296 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3297 ; GFX10-NEXT: v_alignbit_b32 v4, v0, v4, 1
3298 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3299 ; GFX10-NEXT: v_not_b32_e32 v8, v8
3300 ; GFX10-NEXT: v_alignbit_b32 v5, v1, v5, 1
3301 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3302 ; GFX10-NEXT: v_not_b32_e32 v9, v9
3303 ; GFX10-NEXT: v_alignbit_b32 v6, v2, v6, 1
3304 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v2
3305 ; GFX10-NEXT: v_not_b32_e32 v10, v10
3306 ; GFX10-NEXT: v_alignbit_b32 v7, v3, v7, 1
3307 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 1, v3
3308 ; GFX10-NEXT: v_not_b32_e32 v11, v11
3309 ; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8
3310 ; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9
3311 ; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10
3312 ; GFX10-NEXT: v_alignbit_b32 v3, v3, v7, v11
3313 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3315 ; GFX11-LABEL: v_fshl_v4i32:
3317 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3318 ; GFX11-NEXT: v_alignbit_b32 v4, v0, v4, 1
3319 ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0
3320 ; GFX11-NEXT: v_not_b32_e32 v8, v8
3321 ; GFX11-NEXT: v_alignbit_b32 v5, v1, v5, 1
3322 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v1
3323 ; GFX11-NEXT: v_not_b32_e32 v9, v9
3324 ; GFX11-NEXT: v_alignbit_b32 v6, v2, v6, 1
3325 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v2
3326 ; GFX11-NEXT: v_not_b32_e32 v10, v10
3327 ; GFX11-NEXT: v_alignbit_b32 v7, v3, v7, 1
3328 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 1, v3
3329 ; GFX11-NEXT: v_not_b32_e32 v11, v11
3330 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8
3331 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9
3332 ; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10
3333 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
3334 ; GFX11-NEXT: v_alignbit_b32 v3, v3, v7, v11
3335 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3336 %result = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt)
3337 ret <4 x i32> %result
3340 define amdgpu_ps i16 @s_fshl_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) {
3341 ; GFX6-LABEL: s_fshl_i16:
3343 ; GFX6-NEXT: s_and_b32 s3, s2, 15
3344 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2
3345 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
3346 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0xf0001
3347 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
3348 ; GFX6-NEXT: s_lshl_b32 s0, s0, s3
3349 ; GFX6-NEXT: s_lshr_b32 s1, s1, s2
3350 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3351 ; GFX6-NEXT: ; return to shader part epilog
3353 ; GFX8-LABEL: s_fshl_i16:
3355 ; GFX8-NEXT: s_and_b32 s3, s2, 15
3356 ; GFX8-NEXT: s_andn2_b32 s2, 15, s2
3357 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3358 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
3359 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
3360 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
3361 ; GFX8-NEXT: s_lshl_b32 s0, s0, s3
3362 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
3363 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3364 ; GFX8-NEXT: ; return to shader part epilog
3366 ; GFX9-LABEL: s_fshl_i16:
3368 ; GFX9-NEXT: s_and_b32 s3, s2, 15
3369 ; GFX9-NEXT: s_andn2_b32 s2, 15, s2
3370 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3371 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
3372 ; GFX9-NEXT: s_lshr_b32 s1, s1, 1
3373 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
3374 ; GFX9-NEXT: s_lshl_b32 s0, s0, s3
3375 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
3376 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3377 ; GFX9-NEXT: ; return to shader part epilog
3379 ; GFX10-LABEL: s_fshl_i16:
3381 ; GFX10-NEXT: s_and_b32 s3, s2, 15
3382 ; GFX10-NEXT: s_andn2_b32 s2, 15, s2
3383 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3384 ; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
3385 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
3386 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
3387 ; GFX10-NEXT: s_lshl_b32 s0, s0, s3
3388 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
3389 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3390 ; GFX10-NEXT: ; return to shader part epilog
3392 ; GFX11-LABEL: s_fshl_i16:
3394 ; GFX11-NEXT: s_and_b32 s3, s2, 15
3395 ; GFX11-NEXT: s_and_not1_b32 s2, 15, s2
3396 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3397 ; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
3398 ; GFX11-NEXT: s_lshr_b32 s1, s1, 1
3399 ; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
3400 ; GFX11-NEXT: s_lshl_b32 s0, s0, s3
3401 ; GFX11-NEXT: s_lshr_b32 s1, s1, s2
3402 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3403 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3404 ; GFX11-NEXT: ; return to shader part epilog
3405 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3409 define amdgpu_ps i16 @s_fshl_i16_4(i16 inreg %lhs, i16 inreg %rhs) {
3410 ; GFX6-LABEL: s_fshl_i16_4:
3412 ; GFX6-NEXT: s_lshl_b32 s0, s0, 4
3413 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x4000c
3414 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3415 ; GFX6-NEXT: ; return to shader part epilog
3417 ; GFX8-LABEL: s_fshl_i16_4:
3419 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3420 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4
3421 ; GFX8-NEXT: s_lshr_b32 s1, s1, 12
3422 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3423 ; GFX8-NEXT: ; return to shader part epilog
3425 ; GFX9-LABEL: s_fshl_i16_4:
3427 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3428 ; GFX9-NEXT: s_lshl_b32 s0, s0, 4
3429 ; GFX9-NEXT: s_lshr_b32 s1, s1, 12
3430 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3431 ; GFX9-NEXT: ; return to shader part epilog
3433 ; GFX10-LABEL: s_fshl_i16_4:
3435 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3436 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4
3437 ; GFX10-NEXT: s_lshr_b32 s1, s1, 12
3438 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3439 ; GFX10-NEXT: ; return to shader part epilog
3441 ; GFX11-LABEL: s_fshl_i16_4:
3443 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3444 ; GFX11-NEXT: s_lshl_b32 s0, s0, 4
3445 ; GFX11-NEXT: s_lshr_b32 s1, s1, 12
3446 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3447 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3448 ; GFX11-NEXT: ; return to shader part epilog
3449 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 4)
3453 define amdgpu_ps i16 @s_fshl_i16_5(i16 inreg %lhs, i16 inreg %rhs) {
3454 ; GFX6-LABEL: s_fshl_i16_5:
3456 ; GFX6-NEXT: s_lshl_b32 s0, s0, 5
3457 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x5000b
3458 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3459 ; GFX6-NEXT: ; return to shader part epilog
3461 ; GFX8-LABEL: s_fshl_i16_5:
3463 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3464 ; GFX8-NEXT: s_lshl_b32 s0, s0, 5
3465 ; GFX8-NEXT: s_lshr_b32 s1, s1, 11
3466 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3467 ; GFX8-NEXT: ; return to shader part epilog
3469 ; GFX9-LABEL: s_fshl_i16_5:
3471 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3472 ; GFX9-NEXT: s_lshl_b32 s0, s0, 5
3473 ; GFX9-NEXT: s_lshr_b32 s1, s1, 11
3474 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3475 ; GFX9-NEXT: ; return to shader part epilog
3477 ; GFX10-LABEL: s_fshl_i16_5:
3479 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3480 ; GFX10-NEXT: s_lshl_b32 s0, s0, 5
3481 ; GFX10-NEXT: s_lshr_b32 s1, s1, 11
3482 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3483 ; GFX10-NEXT: ; return to shader part epilog
3485 ; GFX11-LABEL: s_fshl_i16_5:
3487 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3488 ; GFX11-NEXT: s_lshl_b32 s0, s0, 5
3489 ; GFX11-NEXT: s_lshr_b32 s1, s1, 11
3490 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3491 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3492 ; GFX11-NEXT: ; return to shader part epilog
3493 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 5)
3497 define i16 @v_fshl_i16(i16 %lhs, i16 %rhs, i16 %amt) {
3498 ; GFX6-LABEL: v_fshl_i16:
3500 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3501 ; GFX6-NEXT: v_and_b32_e32 v3, 15, v2
3502 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
3503 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v2
3504 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
3505 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
3506 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3507 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
3508 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
3509 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3510 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3512 ; GFX8-LABEL: v_fshl_i16:
3514 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3515 ; GFX8-NEXT: v_and_b32_e32 v3, 15, v2
3516 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
3517 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
3518 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1
3519 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0
3520 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
3521 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3522 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3524 ; GFX9-LABEL: v_fshl_i16:
3526 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3527 ; GFX9-NEXT: v_and_b32_e32 v3, 15, v2
3528 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
3529 ; GFX9-NEXT: v_and_b32_e32 v2, 15, v2
3530 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 1, v1
3531 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0
3532 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
3533 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3534 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3536 ; GFX10-LABEL: v_fshl_i16:
3538 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3539 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
3540 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v2
3541 ; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1
3542 ; GFX10-NEXT: v_and_b32_e32 v3, 15, v3
3543 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0
3544 ; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1
3545 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3546 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3548 ; GFX11-LABEL: v_fshl_i16:
3550 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3551 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
3552 ; GFX11-NEXT: v_and_b32_e32 v2, 15, v2
3553 ; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1
3554 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
3555 ; GFX11-NEXT: v_and_b32_e32 v3, 15, v3
3556 ; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0
3557 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3558 ; GFX11-NEXT: v_lshrrev_b16 v1, v3, v1
3559 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3560 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3561 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3565 define i16 @v_fshl_i16_4(i16 %lhs, i16 %rhs) {
3566 ; GFX6-LABEL: v_fshl_i16_4:
3568 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3569 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
3570 ; GFX6-NEXT: v_bfe_u32 v1, v1, 12, 4
3571 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3572 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3574 ; GFX8-LABEL: v_fshl_i16_4:
3576 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3577 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0
3578 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 12, v1
3579 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3580 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3582 ; GFX9-LABEL: v_fshl_i16_4:
3584 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3585 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0
3586 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 12, v1
3587 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3588 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3590 ; GFX10-LABEL: v_fshl_i16_4:
3592 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3593 ; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0
3594 ; GFX10-NEXT: v_lshrrev_b16 v1, 12, v1
3595 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3596 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3598 ; GFX11-LABEL: v_fshl_i16_4:
3600 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3601 ; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0
3602 ; GFX11-NEXT: v_lshrrev_b16 v1, 12, v1
3603 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3604 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3605 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3606 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 4)
3610 define i16 @v_fshl_i16_5(i16 %lhs, i16 %rhs) {
3611 ; GFX6-LABEL: v_fshl_i16_5:
3613 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3614 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 5, v0
3615 ; GFX6-NEXT: v_bfe_u32 v1, v1, 11, 5
3616 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3617 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3619 ; GFX8-LABEL: v_fshl_i16_5:
3621 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3622 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 5, v0
3623 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 11, v1
3624 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3625 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3627 ; GFX9-LABEL: v_fshl_i16_5:
3629 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3630 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 5, v0
3631 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 11, v1
3632 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3633 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3635 ; GFX10-LABEL: v_fshl_i16_5:
3637 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3638 ; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0
3639 ; GFX10-NEXT: v_lshrrev_b16 v1, 11, v1
3640 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3641 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3643 ; GFX11-LABEL: v_fshl_i16_5:
3645 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3646 ; GFX11-NEXT: v_lshlrev_b16 v0, 5, v0
3647 ; GFX11-NEXT: v_lshrrev_b16 v1, 11, v1
3648 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3649 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3650 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3651 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 5)
3655 define amdgpu_ps half @v_fshl_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) {
3656 ; GFX6-LABEL: v_fshl_i16_ssv:
3658 ; GFX6-NEXT: v_and_b32_e32 v1, 15, v0
3659 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3660 ; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3661 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3662 ; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
3663 ; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001
3664 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3665 ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
3666 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
3667 ; GFX6-NEXT: ; return to shader part epilog
3669 ; GFX8-LABEL: v_fshl_i16_ssv:
3671 ; GFX8-NEXT: v_and_b32_e32 v1, 15, v0
3672 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
3673 ; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0
3674 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s1
3675 ; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
3676 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
3677 ; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0
3678 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
3679 ; GFX8-NEXT: ; return to shader part epilog
3681 ; GFX9-LABEL: v_fshl_i16_ssv:
3683 ; GFX9-NEXT: v_and_b32_e32 v1, 15, v0
3684 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
3685 ; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0
3686 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s1
3687 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
3688 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
3689 ; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s0
3690 ; GFX9-NEXT: v_or_b32_e32 v0, v1, v0
3691 ; GFX9-NEXT: ; return to shader part epilog
3693 ; GFX10-LABEL: v_fshl_i16_ssv:
3695 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
3696 ; GFX10-NEXT: v_and_b32_e32 v0, 15, v0
3697 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3698 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
3699 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v1
3700 ; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0
3701 ; GFX10-NEXT: v_lshrrev_b16 v1, v1, s1
3702 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3703 ; GFX10-NEXT: ; return to shader part epilog
3705 ; GFX11-LABEL: v_fshl_i16_ssv:
3707 ; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
3708 ; GFX11-NEXT: v_and_b32_e32 v0, 15, v0
3709 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3710 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
3711 ; GFX11-NEXT: s_lshr_b32 s1, s1, 1
3712 ; GFX11-NEXT: v_and_b32_e32 v1, 15, v1
3713 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3714 ; GFX11-NEXT: v_lshlrev_b16 v0, v0, s0
3715 ; GFX11-NEXT: v_lshrrev_b16 v1, v1, s1
3716 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3717 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3718 ; GFX11-NEXT: ; return to shader part epilog
3719 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3720 %cast.result = bitcast i16 %result to half
3721 ret half %cast.result
3724 define amdgpu_ps half @v_fshl_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) {
3725 ; GFX6-LABEL: v_fshl_i16_svs:
3727 ; GFX6-NEXT: s_and_b32 s2, s1, 15
3728 ; GFX6-NEXT: s_andn2_b32 s1, 15, s1
3729 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
3730 ; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15
3731 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3732 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
3733 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0
3734 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
3735 ; GFX6-NEXT: ; return to shader part epilog
3737 ; GFX8-LABEL: v_fshl_i16_svs:
3739 ; GFX8-NEXT: s_and_b32 s2, s1, 15
3740 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
3741 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
3742 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0
3743 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
3744 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0
3745 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
3746 ; GFX8-NEXT: ; return to shader part epilog
3748 ; GFX9-LABEL: v_fshl_i16_svs:
3750 ; GFX9-NEXT: s_and_b32 s2, s1, 15
3751 ; GFX9-NEXT: s_andn2_b32 s1, 15, s1
3752 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
3753 ; GFX9-NEXT: v_lshrrev_b16_e32 v0, 1, v0
3754 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
3755 ; GFX9-NEXT: v_lshrrev_b16_e32 v0, s1, v0
3756 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
3757 ; GFX9-NEXT: ; return to shader part epilog
3759 ; GFX10-LABEL: v_fshl_i16_svs:
3761 ; GFX10-NEXT: v_lshrrev_b16 v0, 1, v0
3762 ; GFX10-NEXT: s_andn2_b32 s2, 15, s1
3763 ; GFX10-NEXT: s_and_b32 s1, s1, 15
3764 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3765 ; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0
3766 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
3767 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
3768 ; GFX10-NEXT: ; return to shader part epilog
3770 ; GFX11-LABEL: v_fshl_i16_svs:
3772 ; GFX11-NEXT: v_lshrrev_b16 v0, 1, v0
3773 ; GFX11-NEXT: s_and_not1_b32 s2, 15, s1
3774 ; GFX11-NEXT: s_and_b32 s1, s1, 15
3775 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3776 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3777 ; GFX11-NEXT: v_lshrrev_b16 v0, s2, v0
3778 ; GFX11-NEXT: s_lshl_b32 s0, s0, s1
3779 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3780 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
3781 ; GFX11-NEXT: ; return to shader part epilog
3782 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3783 %cast.result = bitcast i16 %result to half
3784 ret half %cast.result
3787 define amdgpu_ps half @v_fshl_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) {
3788 ; GFX6-LABEL: v_fshl_i16_vss:
3790 ; GFX6-NEXT: s_and_b32 s2, s1, 15
3791 ; GFX6-NEXT: s_andn2_b32 s1, 15, s1
3792 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
3793 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001
3794 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3795 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s2, v0
3796 ; GFX6-NEXT: s_lshr_b32 s0, s0, s1
3797 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
3798 ; GFX6-NEXT: ; return to shader part epilog
3800 ; GFX8-LABEL: v_fshl_i16_vss:
3802 ; GFX8-NEXT: s_and_b32 s2, s1, 15
3803 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
3804 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
3805 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
3806 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3807 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, s2, v0
3808 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
3809 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
3810 ; GFX8-NEXT: ; return to shader part epilog
3812 ; GFX9-LABEL: v_fshl_i16_vss:
3814 ; GFX9-NEXT: s_and_b32 s2, s1, 15
3815 ; GFX9-NEXT: s_andn2_b32 s1, 15, s1
3816 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
3817 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
3818 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3819 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, s2, v0
3820 ; GFX9-NEXT: s_lshr_b32 s0, s0, s1
3821 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
3822 ; GFX9-NEXT: ; return to shader part epilog
3824 ; GFX10-LABEL: v_fshl_i16_vss:
3826 ; GFX10-NEXT: s_and_b32 s2, s1, 15
3827 ; GFX10-NEXT: s_andn2_b32 s1, 15, s1
3828 ; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
3829 ; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0
3830 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
3831 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3832 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
3833 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
3834 ; GFX10-NEXT: ; return to shader part epilog
3836 ; GFX11-LABEL: v_fshl_i16_vss:
3838 ; GFX11-NEXT: s_and_b32 s2, s1, 15
3839 ; GFX11-NEXT: s_and_not1_b32 s1, 15, s1
3840 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
3841 ; GFX11-NEXT: v_lshlrev_b16 v0, s2, v0
3842 ; GFX11-NEXT: s_lshr_b32 s0, s0, 1
3843 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3844 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3845 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1
3846 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3847 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
3848 ; GFX11-NEXT: ; return to shader part epilog
3849 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt)
3850 %cast.result = bitcast i16 %result to half
3851 ret half %cast.result
3854 define amdgpu_ps i32 @s_fshl_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3855 ; GFX6-LABEL: s_fshl_v2i16:
3857 ; GFX6-NEXT: s_and_b32 s6, s4, 15
3858 ; GFX6-NEXT: s_andn2_b32 s4, 15, s4
3859 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
3860 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0xf0001
3861 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
3862 ; GFX6-NEXT: s_lshl_b32 s0, s0, s6
3863 ; GFX6-NEXT: s_lshr_b32 s2, s2, s4
3864 ; GFX6-NEXT: s_or_b32 s0, s0, s2
3865 ; GFX6-NEXT: s_and_b32 s2, s5, 15
3866 ; GFX6-NEXT: s_andn2_b32 s4, 15, s5
3867 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
3868 ; GFX6-NEXT: s_lshl_b32 s1, s1, s2
3869 ; GFX6-NEXT: s_bfe_u32 s2, s3, 0xf0001
3870 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s4
3871 ; GFX6-NEXT: s_lshr_b32 s2, s2, s3
3872 ; GFX6-NEXT: s_or_b32 s1, s1, s2
3873 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3874 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
3875 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
3876 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3877 ; GFX6-NEXT: ; return to shader part epilog
3879 ; GFX8-LABEL: s_fshl_v2i16:
3881 ; GFX8-NEXT: s_lshr_b32 s4, s1, 16
3882 ; GFX8-NEXT: s_lshr_b32 s5, s2, 16
3883 ; GFX8-NEXT: s_and_b32 s6, s2, 15
3884 ; GFX8-NEXT: s_andn2_b32 s2, 15, s2
3885 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3886 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
3887 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
3888 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
3889 ; GFX8-NEXT: s_lshr_b32 s3, s0, 16
3890 ; GFX8-NEXT: s_lshl_b32 s0, s0, s6
3891 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
3892 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3893 ; GFX8-NEXT: s_and_b32 s1, s5, 15
3894 ; GFX8-NEXT: s_andn2_b32 s2, 15, s5
3895 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3896 ; GFX8-NEXT: s_lshl_b32 s1, s3, s1
3897 ; GFX8-NEXT: s_lshr_b32 s3, s4, 1
3898 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
3899 ; GFX8-NEXT: s_lshr_b32 s2, s3, s2
3900 ; GFX8-NEXT: s_or_b32 s1, s1, s2
3901 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3902 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
3903 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
3904 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3905 ; GFX8-NEXT: ; return to shader part epilog
3907 ; GFX9-LABEL: s_fshl_v2i16:
3909 ; GFX9-NEXT: s_and_b32 s3, s2, 0xf000f
3910 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
3911 ; GFX9-NEXT: s_lshr_b32 s5, s3, 16
3912 ; GFX9-NEXT: s_lshl_b32 s0, s0, s3
3913 ; GFX9-NEXT: s_lshl_b32 s3, s4, s5
3914 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3
3915 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
3916 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
3917 ; GFX9-NEXT: s_lshr_b32 s1, s1, 0x10001
3918 ; GFX9-NEXT: s_lshr_b32 s3, s3, 1
3919 ; GFX9-NEXT: s_andn2_b32 s2, 0xf000f, s2
3920 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s3
3921 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
3922 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
3923 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
3924 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
3925 ; GFX9-NEXT: s_lshr_b32 s2, s3, s4
3926 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
3927 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3928 ; GFX9-NEXT: ; return to shader part epilog
3930 ; GFX10-LABEL: s_fshl_v2i16:
3932 ; GFX10-NEXT: s_and_b32 s6, s1, 0xffff
3933 ; GFX10-NEXT: s_lshr_b32 s1, s1, 16
3934 ; GFX10-NEXT: s_and_b32 s3, s2, 0xf000f
3935 ; GFX10-NEXT: s_lshr_b32 s6, s6, 0x10001
3936 ; GFX10-NEXT: s_lshr_b32 s1, s1, 1
3937 ; GFX10-NEXT: s_andn2_b32 s2, 0xf000f, s2
3938 ; GFX10-NEXT: s_lshr_b32 s4, s0, 16
3939 ; GFX10-NEXT: s_lshr_b32 s5, s3, 16
3940 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s6, s1
3941 ; GFX10-NEXT: s_lshl_b32 s0, s0, s3
3942 ; GFX10-NEXT: s_lshl_b32 s3, s4, s5
3943 ; GFX10-NEXT: s_lshr_b32 s4, s1, 16
3944 ; GFX10-NEXT: s_and_b32 s1, s1, 0xffff
3945 ; GFX10-NEXT: s_lshr_b32 s5, s2, 16
3946 ; GFX10-NEXT: s_lshr_b32 s1, s1, s2
3947 ; GFX10-NEXT: s_lshr_b32 s2, s4, s5
3948 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3
3949 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2
3950 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3951 ; GFX10-NEXT: ; return to shader part epilog
3953 ; GFX11-LABEL: s_fshl_v2i16:
3955 ; GFX11-NEXT: s_and_b32 s6, s1, 0xffff
3956 ; GFX11-NEXT: s_lshr_b32 s1, s1, 16
3957 ; GFX11-NEXT: s_and_b32 s3, s2, 0xf000f
3958 ; GFX11-NEXT: s_lshr_b32 s6, s6, 0x10001
3959 ; GFX11-NEXT: s_lshr_b32 s1, s1, 1
3960 ; GFX11-NEXT: s_and_not1_b32 s2, 0xf000f, s2
3961 ; GFX11-NEXT: s_lshr_b32 s4, s0, 16
3962 ; GFX11-NEXT: s_lshr_b32 s5, s3, 16
3963 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s6, s1
3964 ; GFX11-NEXT: s_lshl_b32 s0, s0, s3
3965 ; GFX11-NEXT: s_lshl_b32 s3, s4, s5
3966 ; GFX11-NEXT: s_lshr_b32 s4, s1, 16
3967 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff
3968 ; GFX11-NEXT: s_lshr_b32 s5, s2, 16
3969 ; GFX11-NEXT: s_lshr_b32 s1, s1, s2
3970 ; GFX11-NEXT: s_lshr_b32 s2, s4, s5
3971 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s3
3972 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s2
3973 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3974 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3975 ; GFX11-NEXT: ; return to shader part epilog
3976 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3977 %cast = bitcast <2 x i16> %result to i32
3981 define <2 x i16> @v_fshl_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3982 ; GFX6-LABEL: v_fshl_v2i16:
3984 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3985 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v4
3986 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
3987 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3988 ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
3989 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3990 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
3991 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0
3992 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
3993 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
3994 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v5
3995 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5
3996 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3997 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3998 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
3999 ; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
4000 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
4001 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
4002 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
4003 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4005 ; GFX8-LABEL: v_fshl_v2i16:
4007 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4008 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2
4009 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v2
4010 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
4011 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
4012 ; GFX8-NEXT: v_lshrrev_b16_e32 v5, 1, v1
4013 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, v4, v0
4014 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v5
4015 ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
4016 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v3
4017 ; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3
4018 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4019 ; GFX8-NEXT: v_mov_b32_e32 v4, 1
4020 ; GFX8-NEXT: v_and_b32_e32 v3, 15, v3
4021 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4022 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1
4023 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
4024 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
4025 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4026 ; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4027 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4029 ; GFX9-LABEL: v_fshl_v2i16:
4031 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4032 ; GFX9-NEXT: v_and_b32_e32 v3, 0xf000f, v2
4033 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
4034 ; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v2
4035 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4036 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v3, v0
4037 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1
4038 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
4039 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4041 ; GFX10-LABEL: v_fshl_v2i16:
4043 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4044 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
4045 ; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2
4046 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4047 ; GFX10-NEXT: v_and_b32_e32 v3, 0xf000f, v3
4048 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v2, v0
4049 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, v3, v1
4050 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
4051 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4053 ; GFX11-LABEL: v_fshl_v2i16:
4055 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4056 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
4057 ; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2
4058 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4059 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
4060 ; GFX11-NEXT: v_and_b32_e32 v3, 0xf000f, v3
4061 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v2, v0
4062 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4063 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, v3, v1
4064 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4065 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4066 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4067 ret <2 x i16> %result
4070 define <2 x i16> @v_fshl_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
4071 ; GFX6-LABEL: v_fshl_v2i16_4_8:
4073 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4074 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
4075 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
4076 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 11, v2
4077 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
4078 ; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
4079 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
4080 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 7, v2
4081 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
4082 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4084 ; GFX8-LABEL: v_fshl_v2i16_4_8:
4086 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4087 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0
4088 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0
4089 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 12, v1
4090 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
4091 ; GFX8-NEXT: v_mov_b32_e32 v3, 8
4092 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2
4093 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4094 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
4095 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
4096 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4097 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4098 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4100 ; GFX9-LABEL: v_fshl_v2i16_4_8:
4102 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4103 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x80004
4104 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0
4105 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x8000c
4106 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1
4107 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
4108 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4110 ; GFX10-LABEL: v_fshl_v2i16_4_8:
4112 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4113 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x80004, v0
4114 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x8000c, v1
4115 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
4116 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4118 ; GFX11-LABEL: v_fshl_v2i16_4_8:
4120 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4121 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 0x80004, v0
4122 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, 0x8000c, v1
4123 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4124 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4125 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4126 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>)
4127 ret <2 x i16> %result
4130 define amdgpu_ps float @v_fshl_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) {
4131 ; GFX6-LABEL: v_fshl_v2i16_ssv:
4133 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
4134 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
4135 ; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
4136 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
4137 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
4138 ; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001
4139 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
4140 ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
4141 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
4142 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
4143 ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
4144 ; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
4145 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
4146 ; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001
4147 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
4148 ; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2
4149 ; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
4150 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
4151 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
4152 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
4153 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4154 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
4155 ; GFX6-NEXT: ; return to shader part epilog
4157 ; GFX8-LABEL: v_fshl_v2i16_ssv:
4159 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v0
4160 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
4161 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
4162 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
4163 ; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0
4164 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s1
4165 ; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
4166 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
4167 ; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0
4168 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
4169 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
4170 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v1
4171 ; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
4172 ; GFX8-NEXT: v_and_b32_e32 v1, 15, v1
4173 ; GFX8-NEXT: s_lshr_b32 s0, s3, 1
4174 ; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2
4175 ; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s0
4176 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
4177 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
4178 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4179 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4180 ; GFX8-NEXT: ; return to shader part epilog
4182 ; GFX9-LABEL: v_fshl_v2i16_ssv:
4184 ; GFX9-NEXT: v_and_b32_e32 v1, 0xf000f, v0
4185 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v1, s0
4186 ; GFX9-NEXT: s_lshr_b32 s0, s1, 16
4187 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
4188 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
4189 ; GFX9-NEXT: s_lshr_b32 s1, s1, 0x10001
4190 ; GFX9-NEXT: s_lshr_b32 s0, s0, 1
4191 ; GFX9-NEXT: v_and_b32_e32 v0, 0xf000f, v0
4192 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s1, s0
4193 ; GFX9-NEXT: v_pk_lshrrev_b16 v0, v0, s0
4194 ; GFX9-NEXT: v_or_b32_e32 v0, v1, v0
4195 ; GFX9-NEXT: ; return to shader part epilog
4197 ; GFX10-LABEL: v_fshl_v2i16_ssv:
4199 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
4200 ; GFX10-NEXT: s_lshr_b32 s2, s1, 16
4201 ; GFX10-NEXT: s_and_b32 s1, s1, 0xffff
4202 ; GFX10-NEXT: v_and_b32_e32 v0, 0xf000f, v0
4203 ; GFX10-NEXT: s_lshr_b32 s1, s1, 0x10001
4204 ; GFX10-NEXT: v_and_b32_e32 v1, 0xf000f, v1
4205 ; GFX10-NEXT: s_lshr_b32 s2, s2, 1
4206 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4207 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v0, s0
4208 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, v1, s1
4209 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
4210 ; GFX10-NEXT: ; return to shader part epilog
4212 ; GFX11-LABEL: v_fshl_v2i16_ssv:
4214 ; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
4215 ; GFX11-NEXT: s_lshr_b32 s2, s1, 16
4216 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff
4217 ; GFX11-NEXT: v_and_b32_e32 v0, 0xf000f, v0
4218 ; GFX11-NEXT: s_lshr_b32 s1, s1, 0x10001
4219 ; GFX11-NEXT: v_and_b32_e32 v1, 0xf000f, v1
4220 ; GFX11-NEXT: s_lshr_b32 s2, s2, 1
4221 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
4222 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4223 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v0, s0
4224 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, v1, s1
4225 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4226 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
4227 ; GFX11-NEXT: ; return to shader part epilog
4228 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4229 %cast = bitcast <2 x i16> %result to float
4233 define amdgpu_ps float @v_fshl_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
4234 ; GFX6-LABEL: v_fshl_v2i16_svs:
4236 ; GFX6-NEXT: s_and_b32 s4, s2, 15
4237 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2
4238 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4239 ; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15
4240 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
4241 ; GFX6-NEXT: s_lshl_b32 s0, s0, s4
4242 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s2, v0
4243 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
4244 ; GFX6-NEXT: s_and_b32 s0, s3, 15
4245 ; GFX6-NEXT: s_andn2_b32 s2, 15, s3
4246 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4247 ; GFX6-NEXT: s_lshl_b32 s0, s1, s0
4248 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
4249 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
4250 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, s1, v1
4251 ; GFX6-NEXT: v_or_b32_e32 v1, s0, v1
4252 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
4253 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
4254 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4255 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
4256 ; GFX6-NEXT: ; return to shader part epilog
4258 ; GFX8-LABEL: v_fshl_v2i16_svs:
4260 ; GFX8-NEXT: s_and_b32 s4, s1, 15
4261 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
4262 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
4263 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4264 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v0
4265 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
4266 ; GFX8-NEXT: s_lshl_b32 s0, s0, s4
4267 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, s1, v1
4268 ; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
4269 ; GFX8-NEXT: s_and_b32 s0, s3, 15
4270 ; GFX8-NEXT: v_mov_b32_e32 v2, 1
4271 ; GFX8-NEXT: s_andn2_b32 s1, 15, s3
4272 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
4273 ; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4274 ; GFX8-NEXT: s_lshl_b32 s0, s2, s0
4275 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0
4276 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
4277 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
4278 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4279 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4280 ; GFX8-NEXT: ; return to shader part epilog
4282 ; GFX9-LABEL: v_fshl_v2i16_svs:
4284 ; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f
4285 ; GFX9-NEXT: s_lshr_b32 s3, s0, 16
4286 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
4287 ; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1
4288 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
4289 ; GFX9-NEXT: s_lshl_b32 s2, s3, s4
4290 ; GFX9-NEXT: v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4291 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
4292 ; GFX9-NEXT: v_pk_lshrrev_b16 v0, s1, v0
4293 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
4294 ; GFX9-NEXT: ; return to shader part epilog
4296 ; GFX10-LABEL: v_fshl_v2i16_svs:
4298 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4299 ; GFX10-NEXT: s_and_b32 s2, s1, 0xf000f
4300 ; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1
4301 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
4302 ; GFX10-NEXT: s_lshr_b32 s4, s2, 16
4303 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, s1, v0
4304 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
4305 ; GFX10-NEXT: s_lshl_b32 s1, s3, s4
4306 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4307 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
4308 ; GFX10-NEXT: ; return to shader part epilog
4310 ; GFX11-LABEL: v_fshl_v2i16_svs:
4312 ; GFX11-NEXT: v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4313 ; GFX11-NEXT: s_and_b32 s2, s1, 0xf000f
4314 ; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1
4315 ; GFX11-NEXT: s_lshr_b32 s3, s0, 16
4316 ; GFX11-NEXT: s_lshr_b32 s4, s2, 16
4317 ; GFX11-NEXT: v_pk_lshrrev_b16 v0, s1, v0
4318 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
4319 ; GFX11-NEXT: s_lshl_b32 s1, s3, s4
4320 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4321 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4322 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
4323 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
4324 ; GFX11-NEXT: ; return to shader part epilog
4325 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4326 %cast = bitcast <2 x i16> %result to float
4330 define amdgpu_ps float @v_fshl_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
4331 ; GFX6-LABEL: v_fshl_v2i16_vss:
4333 ; GFX6-NEXT: s_and_b32 s4, s2, 15
4334 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2
4335 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4336 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001
4337 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
4338 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
4339 ; GFX6-NEXT: s_lshr_b32 s0, s0, s2
4340 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
4341 ; GFX6-NEXT: s_and_b32 s0, s3, 15
4342 ; GFX6-NEXT: s_andn2_b32 s2, 15, s3
4343 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4344 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1
4345 ; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001
4346 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
4347 ; GFX6-NEXT: s_lshr_b32 s0, s0, s1
4348 ; GFX6-NEXT: v_or_b32_e32 v1, s0, v1
4349 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
4350 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
4351 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4352 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
4353 ; GFX6-NEXT: ; return to shader part epilog
4355 ; GFX8-LABEL: v_fshl_v2i16_vss:
4357 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
4358 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
4359 ; GFX8-NEXT: s_and_b32 s4, s1, 15
4360 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
4361 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
4362 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
4363 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4364 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, s4, v0
4365 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
4366 ; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
4367 ; GFX8-NEXT: s_and_b32 s0, s3, 15
4368 ; GFX8-NEXT: s_andn2_b32 s1, 15, s3
4369 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
4370 ; GFX8-NEXT: s_lshr_b32 s0, s2, 1
4371 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4372 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4373 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
4374 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
4375 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
4376 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4377 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4378 ; GFX8-NEXT: ; return to shader part epilog
4380 ; GFX9-LABEL: v_fshl_v2i16_vss:
4382 ; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f
4383 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, s2, v0
4384 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
4385 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
4386 ; GFX9-NEXT: s_lshr_b32 s0, s0, 0x10001
4387 ; GFX9-NEXT: s_lshr_b32 s2, s2, 1
4388 ; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1
4389 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
4390 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
4391 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
4392 ; GFX9-NEXT: s_lshr_b32 s3, s1, 16
4393 ; GFX9-NEXT: s_lshr_b32 s0, s0, s1
4394 ; GFX9-NEXT: s_lshr_b32 s1, s2, s3
4395 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4396 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
4397 ; GFX9-NEXT: ; return to shader part epilog
4399 ; GFX10-LABEL: v_fshl_v2i16_vss:
4401 ; GFX10-NEXT: s_and_b32 s3, s0, 0xffff
4402 ; GFX10-NEXT: s_lshr_b32 s0, s0, 16
4403 ; GFX10-NEXT: s_lshr_b32 s3, s3, 0x10001
4404 ; GFX10-NEXT: s_lshr_b32 s0, s0, 1
4405 ; GFX10-NEXT: s_and_b32 s2, s1, 0xf000f
4406 ; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1
4407 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s3, s0
4408 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, s2, v0
4409 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
4410 ; GFX10-NEXT: s_and_b32 s0, s0, 0xffff
4411 ; GFX10-NEXT: s_lshr_b32 s3, s1, 16
4412 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
4413 ; GFX10-NEXT: s_lshr_b32 s1, s2, s3
4414 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4415 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
4416 ; GFX10-NEXT: ; return to shader part epilog
4418 ; GFX11-LABEL: v_fshl_v2i16_vss:
4420 ; GFX11-NEXT: s_and_b32 s3, s0, 0xffff
4421 ; GFX11-NEXT: s_lshr_b32 s0, s0, 16
4422 ; GFX11-NEXT: s_lshr_b32 s3, s3, 0x10001
4423 ; GFX11-NEXT: s_lshr_b32 s0, s0, 1
4424 ; GFX11-NEXT: s_and_b32 s2, s1, 0xf000f
4425 ; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1
4426 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s3, s0
4427 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, s2, v0
4428 ; GFX11-NEXT: s_lshr_b32 s2, s0, 16
4429 ; GFX11-NEXT: s_and_b32 s0, s0, 0xffff
4430 ; GFX11-NEXT: s_lshr_b32 s3, s1, 16
4431 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1
4432 ; GFX11-NEXT: s_lshr_b32 s1, s2, s3
4433 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
4434 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4435 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
4436 ; GFX11-NEXT: ; return to shader part epilog
4437 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4438 %cast = bitcast <2 x i16> %result to float
4443 define amdgpu_ps i48 @s_fshl_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) {
4444 ; GFX6-LABEL: s_fshl_v3i16:
4446 ; GFX6-NEXT: s_and_b32 s9, s6, 15
4447 ; GFX6-NEXT: s_andn2_b32 s6, 15, s6
4448 ; GFX6-NEXT: s_and_b32 s9, 0xffff, s9
4449 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
4450 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
4451 ; GFX6-NEXT: s_lshl_b32 s0, s0, s9
4452 ; GFX6-NEXT: s_lshr_b32 s3, s3, s6
4453 ; GFX6-NEXT: s_or_b32 s0, s0, s3
4454 ; GFX6-NEXT: s_and_b32 s3, s7, 15
4455 ; GFX6-NEXT: s_andn2_b32 s6, 15, s7
4456 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
4457 ; GFX6-NEXT: s_lshl_b32 s1, s1, s3
4458 ; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001
4459 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s6
4460 ; GFX6-NEXT: s_lshr_b32 s3, s3, s4
4461 ; GFX6-NEXT: s_or_b32 s1, s1, s3
4462 ; GFX6-NEXT: s_and_b32 s3, s8, 15
4463 ; GFX6-NEXT: s_andn2_b32 s4, 15, s8
4464 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
4465 ; GFX6-NEXT: s_lshl_b32 s2, s2, s3
4466 ; GFX6-NEXT: s_bfe_u32 s3, s5, 0xf0001
4467 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4468 ; GFX6-NEXT: s_lshr_b32 s3, s3, s4
4469 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
4470 ; GFX6-NEXT: s_or_b32 s2, s2, s3
4471 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4472 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
4473 ; GFX6-NEXT: s_or_b32 s0, s0, s1
4474 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
4475 ; GFX6-NEXT: ; return to shader part epilog
4477 ; GFX8-LABEL: s_fshl_v3i16:
4479 ; GFX8-NEXT: s_lshr_b32 s7, s2, 16
4480 ; GFX8-NEXT: s_lshr_b32 s8, s4, 16
4481 ; GFX8-NEXT: s_and_b32 s9, s4, 15
4482 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4
4483 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4484 ; GFX8-NEXT: s_and_b32 s9, 0xffff, s9
4485 ; GFX8-NEXT: s_lshr_b32 s2, s2, 1
4486 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4487 ; GFX8-NEXT: s_lshr_b32 s6, s0, 16
4488 ; GFX8-NEXT: s_lshl_b32 s0, s0, s9
4489 ; GFX8-NEXT: s_lshr_b32 s2, s2, s4
4490 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4491 ; GFX8-NEXT: s_and_b32 s2, s8, 15
4492 ; GFX8-NEXT: s_andn2_b32 s4, 15, s8
4493 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4494 ; GFX8-NEXT: s_lshl_b32 s2, s6, s2
4495 ; GFX8-NEXT: s_lshr_b32 s6, s7, 1
4496 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4497 ; GFX8-NEXT: s_lshr_b32 s4, s6, s4
4498 ; GFX8-NEXT: s_or_b32 s2, s2, s4
4499 ; GFX8-NEXT: s_and_b32 s4, s5, 15
4500 ; GFX8-NEXT: s_andn2_b32 s5, 15, s5
4501 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4502 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
4503 ; GFX8-NEXT: s_lshl_b32 s1, s1, s4
4504 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
4505 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
4506 ; GFX8-NEXT: s_lshr_b32 s3, s3, s4
4507 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4508 ; GFX8-NEXT: s_or_b32 s1, s1, s3
4509 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
4510 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
4511 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4512 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4513 ; GFX8-NEXT: ; return to shader part epilog
4515 ; GFX9-LABEL: s_fshl_v3i16:
4517 ; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f
4518 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
4519 ; GFX9-NEXT: s_lshr_b32 s8, s6, 16
4520 ; GFX9-NEXT: s_lshl_b32 s0, s0, s6
4521 ; GFX9-NEXT: s_lshl_b32 s6, s7, s8
4522 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4523 ; GFX9-NEXT: s_lshr_b32 s6, s2, 16
4524 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
4525 ; GFX9-NEXT: s_lshr_b32 s2, s2, 0x10001
4526 ; GFX9-NEXT: s_lshr_b32 s6, s6, 1
4527 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4
4528 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s6
4529 ; GFX9-NEXT: s_lshr_b32 s6, s2, 16
4530 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
4531 ; GFX9-NEXT: s_lshr_b32 s7, s4, 16
4532 ; GFX9-NEXT: s_lshr_b32 s2, s2, s4
4533 ; GFX9-NEXT: s_lshr_b32 s4, s6, s7
4534 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4535 ; GFX9-NEXT: s_or_b32 s0, s0, s2
4536 ; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f
4537 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5
4538 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
4539 ; GFX9-NEXT: s_lshr_b32 s6, s2, 16
4540 ; GFX9-NEXT: s_lshl_b32 s1, s1, s2
4541 ; GFX9-NEXT: s_lshl_b32 s2, s5, s6
4542 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4543 ; GFX9-NEXT: s_lshr_b32 s2, s3, 16
4544 ; GFX9-NEXT: s_and_b32 s3, s3, 0xffff
4545 ; GFX9-NEXT: s_lshr_b32 s3, s3, 0x10001
4546 ; GFX9-NEXT: s_lshr_b32 s2, s2, 1
4547 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s2
4548 ; GFX9-NEXT: s_lshr_b32 s3, s2, 16
4549 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
4550 ; GFX9-NEXT: s_lshr_b32 s5, s4, 16
4551 ; GFX9-NEXT: s_lshr_b32 s2, s2, s4
4552 ; GFX9-NEXT: s_lshr_b32 s3, s3, s5
4553 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3
4554 ; GFX9-NEXT: s_or_b32 s1, s1, s2
4555 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
4556 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
4557 ; GFX9-NEXT: s_lshl_b32 s2, s2, 16
4558 ; GFX9-NEXT: s_or_b32 s0, s0, s2
4559 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
4560 ; GFX9-NEXT: ; return to shader part epilog
4562 ; GFX10-LABEL: s_fshl_v3i16:
4564 ; GFX10-NEXT: s_and_b32 s9, s2, 0xffff
4565 ; GFX10-NEXT: s_lshr_b32 s2, s2, 16
4566 ; GFX10-NEXT: s_and_b32 s6, s4, 0xf000f
4567 ; GFX10-NEXT: s_lshr_b32 s9, s9, 0x10001
4568 ; GFX10-NEXT: s_lshr_b32 s2, s2, 1
4569 ; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4
4570 ; GFX10-NEXT: s_lshr_b32 s7, s0, 16
4571 ; GFX10-NEXT: s_lshr_b32 s8, s6, 16
4572 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s9, s2
4573 ; GFX10-NEXT: s_lshl_b32 s0, s0, s6
4574 ; GFX10-NEXT: s_lshl_b32 s6, s7, s8
4575 ; GFX10-NEXT: s_lshr_b32 s7, s2, 16
4576 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffff
4577 ; GFX10-NEXT: s_lshr_b32 s8, s4, 16
4578 ; GFX10-NEXT: s_lshr_b32 s2, s2, s4
4579 ; GFX10-NEXT: s_lshr_b32 s4, s7, s8
4580 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4581 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4582 ; GFX10-NEXT: s_and_b32 s7, s3, 0xffff
4583 ; GFX10-NEXT: s_lshr_b32 s3, s3, 16
4584 ; GFX10-NEXT: s_or_b32 s0, s0, s2
4585 ; GFX10-NEXT: s_and_b32 s2, s5, 0xf000f
4586 ; GFX10-NEXT: s_lshr_b32 s7, s7, 0x10001
4587 ; GFX10-NEXT: s_lshr_b32 s3, s3, 1
4588 ; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s5
4589 ; GFX10-NEXT: s_lshr_b32 s5, s1, 16
4590 ; GFX10-NEXT: s_lshr_b32 s6, s2, 16
4591 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2
4592 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s7, s3
4593 ; GFX10-NEXT: s_lshl_b32 s3, s5, s6
4594 ; GFX10-NEXT: s_lshr_b32 s5, s2, 16
4595 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffff
4596 ; GFX10-NEXT: s_lshr_b32 s6, s4, 16
4597 ; GFX10-NEXT: s_lshr_b32 s2, s2, s4
4598 ; GFX10-NEXT: s_lshr_b32 s4, s5, s6
4599 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3
4600 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4601 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
4602 ; GFX10-NEXT: s_and_b32 s0, s0, 0xffff
4603 ; GFX10-NEXT: s_lshl_b32 s3, s3, 16
4604 ; GFX10-NEXT: s_or_b32 s1, s1, s2
4605 ; GFX10-NEXT: s_or_b32 s0, s0, s3
4606 ; GFX10-NEXT: s_and_b32 s1, s1, 0xffff
4607 ; GFX10-NEXT: ; return to shader part epilog
4609 ; GFX11-LABEL: s_fshl_v3i16:
4611 ; GFX11-NEXT: s_and_b32 s9, s2, 0xffff
4612 ; GFX11-NEXT: s_lshr_b32 s2, s2, 16
4613 ; GFX11-NEXT: s_and_b32 s6, s4, 0xf000f
4614 ; GFX11-NEXT: s_lshr_b32 s9, s9, 0x10001
4615 ; GFX11-NEXT: s_lshr_b32 s2, s2, 1
4616 ; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4
4617 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16
4618 ; GFX11-NEXT: s_lshr_b32 s8, s6, 16
4619 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s9, s2
4620 ; GFX11-NEXT: s_lshl_b32 s0, s0, s6
4621 ; GFX11-NEXT: s_lshl_b32 s6, s7, s8
4622 ; GFX11-NEXT: s_lshr_b32 s7, s2, 16
4623 ; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
4624 ; GFX11-NEXT: s_lshr_b32 s8, s4, 16
4625 ; GFX11-NEXT: s_lshr_b32 s2, s2, s4
4626 ; GFX11-NEXT: s_lshr_b32 s4, s7, s8
4627 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4628 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4629 ; GFX11-NEXT: s_and_b32 s7, s3, 0xffff
4630 ; GFX11-NEXT: s_lshr_b32 s3, s3, 16
4631 ; GFX11-NEXT: s_or_b32 s0, s0, s2
4632 ; GFX11-NEXT: s_and_b32 s2, s5, 0xf000f
4633 ; GFX11-NEXT: s_lshr_b32 s7, s7, 0x10001
4634 ; GFX11-NEXT: s_lshr_b32 s3, s3, 1
4635 ; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s5
4636 ; GFX11-NEXT: s_lshr_b32 s5, s1, 16
4637 ; GFX11-NEXT: s_lshr_b32 s6, s2, 16
4638 ; GFX11-NEXT: s_lshl_b32 s1, s1, s2
4639 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s7, s3
4640 ; GFX11-NEXT: s_lshl_b32 s3, s5, s6
4641 ; GFX11-NEXT: s_lshr_b32 s5, s2, 16
4642 ; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
4643 ; GFX11-NEXT: s_lshr_b32 s6, s4, 16
4644 ; GFX11-NEXT: s_lshr_b32 s2, s2, s4
4645 ; GFX11-NEXT: s_lshr_b32 s4, s5, s6
4646 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s3
4647 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4648 ; GFX11-NEXT: s_lshr_b32 s3, s0, 16
4649 ; GFX11-NEXT: s_and_b32 s0, s0, 0xffff
4650 ; GFX11-NEXT: s_lshl_b32 s3, s3, 16
4651 ; GFX11-NEXT: s_or_b32 s1, s1, s2
4652 ; GFX11-NEXT: s_or_b32 s0, s0, s3
4653 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff
4654 ; GFX11-NEXT: ; return to shader part epilog
4655 %result = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4656 %cast = bitcast <3 x i16> %result to i48
4660 define <3 x half> @v_fshl_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) {
4661 ; GFX6-LABEL: v_fshl_v3i16:
4663 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4664 ; GFX6-NEXT: v_and_b32_e32 v9, 15, v6
4665 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4666 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4667 ; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9
4668 ; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4669 ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
4670 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
4671 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
4672 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
4673 ; GFX6-NEXT: v_and_b32_e32 v3, 15, v7
4674 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
4675 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4676 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
4677 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1
4678 ; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15
4679 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
4680 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4681 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
4682 ; GFX6-NEXT: v_and_b32_e32 v3, 15, v8
4683 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v8
4684 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
4685 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
4686 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v3, v2
4687 ; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15
4688 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4689 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4690 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
4691 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4693 ; GFX8-LABEL: v_fshl_v3i16:
4695 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4696 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v4
4697 ; GFX8-NEXT: v_and_b32_e32 v7, 15, v4
4698 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
4699 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
4700 ; GFX8-NEXT: v_lshrrev_b16_e32 v8, 1, v2
4701 ; GFX8-NEXT: v_lshlrev_b16_e32 v7, v7, v0
4702 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v8
4703 ; GFX8-NEXT: v_or_b32_e32 v4, v7, v4
4704 ; GFX8-NEXT: v_and_b32_e32 v7, 15, v6
4705 ; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6
4706 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4707 ; GFX8-NEXT: v_mov_b32_e32 v7, 1
4708 ; GFX8-NEXT: v_and_b32_e32 v6, 15, v6
4709 ; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4710 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v6, v2
4711 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
4712 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v5
4713 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5
4714 ; GFX8-NEXT: v_and_b32_e32 v5, 15, v5
4715 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v2, v1
4716 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v3
4717 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v5, v2
4718 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
4719 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
4720 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4721 ; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4722 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
4723 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4725 ; GFX9-LABEL: v_fshl_v3i16:
4727 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4728 ; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4
4729 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
4730 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4731 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
4732 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v6, v0
4733 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2
4734 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
4735 ; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5
4736 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5
4737 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4738 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v2, v1
4739 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v3 op_sel_hi:[0,1]
4740 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2
4741 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2
4742 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4744 ; GFX10-LABEL: v_fshl_v3i16:
4746 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4747 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
4748 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
4749 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4750 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
4751 ; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5
4752 ; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6
4753 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1]
4754 ; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7
4755 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v4, v0
4756 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v5, v1
4757 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v6, v2
4758 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v7, v3
4759 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
4760 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
4761 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4763 ; GFX11-LABEL: v_fshl_v3i16:
4765 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4766 ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4
4767 ; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5
4768 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4769 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
4770 ; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5
4771 ; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6
4772 ; GFX11-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1]
4773 ; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7
4774 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v4, v0
4775 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, v5, v1
4776 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, v6, v2
4777 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
4778 ; GFX11-NEXT: v_pk_lshrrev_b16 v3, v7, v3
4779 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
4780 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
4781 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
4782 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4783 %result = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4784 %cast.result = bitcast <3 x i16> %result to <3 x half>
4785 ret <3 x half> %cast.result
4788 define amdgpu_ps <2 x i32> @s_fshl_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) {
4789 ; GFX6-LABEL: s_fshl_v4i16:
4791 ; GFX6-NEXT: s_and_b32 s12, s8, 15
4792 ; GFX6-NEXT: s_andn2_b32 s8, 15, s8
4793 ; GFX6-NEXT: s_and_b32 s12, 0xffff, s12
4794 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0xf0001
4795 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
4796 ; GFX6-NEXT: s_lshl_b32 s0, s0, s12
4797 ; GFX6-NEXT: s_lshr_b32 s4, s4, s8
4798 ; GFX6-NEXT: s_or_b32 s0, s0, s4
4799 ; GFX6-NEXT: s_and_b32 s4, s9, 15
4800 ; GFX6-NEXT: s_andn2_b32 s8, 15, s9
4801 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4802 ; GFX6-NEXT: s_lshl_b32 s1, s1, s4
4803 ; GFX6-NEXT: s_bfe_u32 s4, s5, 0xf0001
4804 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s8
4805 ; GFX6-NEXT: s_lshr_b32 s4, s4, s5
4806 ; GFX6-NEXT: s_or_b32 s1, s1, s4
4807 ; GFX6-NEXT: s_and_b32 s4, s10, 15
4808 ; GFX6-NEXT: s_andn2_b32 s5, 15, s10
4809 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4810 ; GFX6-NEXT: s_lshl_b32 s2, s2, s4
4811 ; GFX6-NEXT: s_bfe_u32 s4, s6, 0xf0001
4812 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
4813 ; GFX6-NEXT: s_lshr_b32 s4, s4, s5
4814 ; GFX6-NEXT: s_or_b32 s2, s2, s4
4815 ; GFX6-NEXT: s_and_b32 s4, s11, 15
4816 ; GFX6-NEXT: s_andn2_b32 s5, 15, s11
4817 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4818 ; GFX6-NEXT: s_lshl_b32 s3, s3, s4
4819 ; GFX6-NEXT: s_bfe_u32 s4, s7, 0xf0001
4820 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
4821 ; GFX6-NEXT: s_lshr_b32 s4, s4, s5
4822 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
4823 ; GFX6-NEXT: s_or_b32 s3, s3, s4
4824 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4825 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
4826 ; GFX6-NEXT: s_or_b32 s0, s0, s1
4827 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
4828 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s3
4829 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
4830 ; GFX6-NEXT: s_or_b32 s1, s1, s2
4831 ; GFX6-NEXT: ; return to shader part epilog
4833 ; GFX8-LABEL: s_fshl_v4i16:
4835 ; GFX8-NEXT: s_lshr_b32 s8, s2, 16
4836 ; GFX8-NEXT: s_lshr_b32 s10, s4, 16
4837 ; GFX8-NEXT: s_and_b32 s12, s4, 15
4838 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4
4839 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4840 ; GFX8-NEXT: s_and_b32 s12, 0xffff, s12
4841 ; GFX8-NEXT: s_lshr_b32 s2, s2, 1
4842 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4843 ; GFX8-NEXT: s_lshr_b32 s6, s0, 16
4844 ; GFX8-NEXT: s_lshl_b32 s0, s0, s12
4845 ; GFX8-NEXT: s_lshr_b32 s2, s2, s4
4846 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4847 ; GFX8-NEXT: s_and_b32 s2, s10, 15
4848 ; GFX8-NEXT: s_andn2_b32 s4, 15, s10
4849 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4850 ; GFX8-NEXT: s_lshl_b32 s2, s6, s2
4851 ; GFX8-NEXT: s_lshr_b32 s6, s8, 1
4852 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4853 ; GFX8-NEXT: s_lshr_b32 s4, s6, s4
4854 ; GFX8-NEXT: s_or_b32 s2, s2, s4
4855 ; GFX8-NEXT: s_and_b32 s4, s5, 15
4856 ; GFX8-NEXT: s_lshr_b32 s9, s3, 16
4857 ; GFX8-NEXT: s_lshr_b32 s11, s5, 16
4858 ; GFX8-NEXT: s_andn2_b32 s5, 15, s5
4859 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4860 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
4861 ; GFX8-NEXT: s_lshr_b32 s7, s1, 16
4862 ; GFX8-NEXT: s_lshl_b32 s1, s1, s4
4863 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
4864 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
4865 ; GFX8-NEXT: s_lshr_b32 s3, s3, s4
4866 ; GFX8-NEXT: s_or_b32 s1, s1, s3
4867 ; GFX8-NEXT: s_and_b32 s3, s11, 15
4868 ; GFX8-NEXT: s_andn2_b32 s4, 15, s11
4869 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
4870 ; GFX8-NEXT: s_lshr_b32 s5, s9, 1
4871 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4872 ; GFX8-NEXT: s_lshl_b32 s3, s7, s3
4873 ; GFX8-NEXT: s_lshr_b32 s4, s5, s4
4874 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4875 ; GFX8-NEXT: s_or_b32 s3, s3, s4
4876 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
4877 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
4878 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4879 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
4880 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4881 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
4882 ; GFX8-NEXT: s_or_b32 s1, s1, s2
4883 ; GFX8-NEXT: ; return to shader part epilog
4885 ; GFX9-LABEL: s_fshl_v4i16:
4887 ; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f
4888 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
4889 ; GFX9-NEXT: s_lshr_b32 s8, s6, 16
4890 ; GFX9-NEXT: s_lshl_b32 s0, s0, s6
4891 ; GFX9-NEXT: s_lshl_b32 s6, s7, s8
4892 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4893 ; GFX9-NEXT: s_lshr_b32 s6, s2, 16
4894 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
4895 ; GFX9-NEXT: s_lshr_b32 s2, s2, 0x10001
4896 ; GFX9-NEXT: s_lshr_b32 s6, s6, 1
4897 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4
4898 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s6
4899 ; GFX9-NEXT: s_lshr_b32 s6, s2, 16
4900 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
4901 ; GFX9-NEXT: s_lshr_b32 s7, s4, 16
4902 ; GFX9-NEXT: s_lshr_b32 s2, s2, s4
4903 ; GFX9-NEXT: s_lshr_b32 s4, s6, s7
4904 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4905 ; GFX9-NEXT: s_or_b32 s0, s0, s2
4906 ; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f
4907 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5
4908 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
4909 ; GFX9-NEXT: s_lshr_b32 s6, s2, 16
4910 ; GFX9-NEXT: s_lshl_b32 s1, s1, s2
4911 ; GFX9-NEXT: s_lshl_b32 s2, s5, s6
4912 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4913 ; GFX9-NEXT: s_lshr_b32 s2, s3, 16
4914 ; GFX9-NEXT: s_and_b32 s3, s3, 0xffff
4915 ; GFX9-NEXT: s_lshr_b32 s3, s3, 0x10001
4916 ; GFX9-NEXT: s_lshr_b32 s2, s2, 1
4917 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s2
4918 ; GFX9-NEXT: s_lshr_b32 s3, s2, 16
4919 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
4920 ; GFX9-NEXT: s_lshr_b32 s5, s4, 16
4921 ; GFX9-NEXT: s_lshr_b32 s2, s2, s4
4922 ; GFX9-NEXT: s_lshr_b32 s3, s3, s5
4923 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3
4924 ; GFX9-NEXT: s_or_b32 s1, s1, s2
4925 ; GFX9-NEXT: ; return to shader part epilog
4927 ; GFX10-LABEL: s_fshl_v4i16:
4929 ; GFX10-NEXT: s_and_b32 s9, s2, 0xffff
4930 ; GFX10-NEXT: s_lshr_b32 s2, s2, 16
4931 ; GFX10-NEXT: s_and_b32 s6, s4, 0xf000f
4932 ; GFX10-NEXT: s_lshr_b32 s9, s9, 0x10001
4933 ; GFX10-NEXT: s_lshr_b32 s2, s2, 1
4934 ; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4
4935 ; GFX10-NEXT: s_lshr_b32 s7, s0, 16
4936 ; GFX10-NEXT: s_lshr_b32 s8, s6, 16
4937 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s9, s2
4938 ; GFX10-NEXT: s_lshl_b32 s0, s0, s6
4939 ; GFX10-NEXT: s_lshl_b32 s6, s7, s8
4940 ; GFX10-NEXT: s_lshr_b32 s7, s2, 16
4941 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffff
4942 ; GFX10-NEXT: s_lshr_b32 s8, s4, 16
4943 ; GFX10-NEXT: s_lshr_b32 s2, s2, s4
4944 ; GFX10-NEXT: s_lshr_b32 s4, s7, s8
4945 ; GFX10-NEXT: s_and_b32 s8, s3, 0xffff
4946 ; GFX10-NEXT: s_lshr_b32 s3, s3, 16
4947 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4948 ; GFX10-NEXT: s_and_b32 s4, s5, 0xf000f
4949 ; GFX10-NEXT: s_lshr_b32 s8, s8, 0x10001
4950 ; GFX10-NEXT: s_lshr_b32 s3, s3, 1
4951 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4952 ; GFX10-NEXT: s_andn2_b32 s5, 0xf000f, s5
4953 ; GFX10-NEXT: s_lshr_b32 s6, s1, 16
4954 ; GFX10-NEXT: s_lshr_b32 s7, s4, 16
4955 ; GFX10-NEXT: s_pack_ll_b32_b16 s3, s8, s3
4956 ; GFX10-NEXT: s_lshl_b32 s1, s1, s4
4957 ; GFX10-NEXT: s_lshl_b32 s4, s6, s7
4958 ; GFX10-NEXT: s_lshr_b32 s6, s3, 16
4959 ; GFX10-NEXT: s_and_b32 s3, s3, 0xffff
4960 ; GFX10-NEXT: s_lshr_b32 s7, s5, 16
4961 ; GFX10-NEXT: s_lshr_b32 s3, s3, s5
4962 ; GFX10-NEXT: s_lshr_b32 s5, s6, s7
4963 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4964 ; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5
4965 ; GFX10-NEXT: s_or_b32 s0, s0, s2
4966 ; GFX10-NEXT: s_or_b32 s1, s1, s3
4967 ; GFX10-NEXT: ; return to shader part epilog
4969 ; GFX11-LABEL: s_fshl_v4i16:
4971 ; GFX11-NEXT: s_and_b32 s9, s2, 0xffff
4972 ; GFX11-NEXT: s_lshr_b32 s2, s2, 16
4973 ; GFX11-NEXT: s_and_b32 s6, s4, 0xf000f
4974 ; GFX11-NEXT: s_lshr_b32 s9, s9, 0x10001
4975 ; GFX11-NEXT: s_lshr_b32 s2, s2, 1
4976 ; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4
4977 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16
4978 ; GFX11-NEXT: s_lshr_b32 s8, s6, 16
4979 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s9, s2
4980 ; GFX11-NEXT: s_lshl_b32 s0, s0, s6
4981 ; GFX11-NEXT: s_lshl_b32 s6, s7, s8
4982 ; GFX11-NEXT: s_lshr_b32 s7, s2, 16
4983 ; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
4984 ; GFX11-NEXT: s_lshr_b32 s8, s4, 16
4985 ; GFX11-NEXT: s_lshr_b32 s2, s2, s4
4986 ; GFX11-NEXT: s_lshr_b32 s4, s7, s8
4987 ; GFX11-NEXT: s_and_b32 s8, s3, 0xffff
4988 ; GFX11-NEXT: s_lshr_b32 s3, s3, 16
4989 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4990 ; GFX11-NEXT: s_and_b32 s4, s5, 0xf000f
4991 ; GFX11-NEXT: s_lshr_b32 s8, s8, 0x10001
4992 ; GFX11-NEXT: s_lshr_b32 s3, s3, 1
4993 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4994 ; GFX11-NEXT: s_and_not1_b32 s5, 0xf000f, s5
4995 ; GFX11-NEXT: s_lshr_b32 s6, s1, 16
4996 ; GFX11-NEXT: s_lshr_b32 s7, s4, 16
4997 ; GFX11-NEXT: s_pack_ll_b32_b16 s3, s8, s3
4998 ; GFX11-NEXT: s_lshl_b32 s1, s1, s4
4999 ; GFX11-NEXT: s_lshl_b32 s4, s6, s7
5000 ; GFX11-NEXT: s_lshr_b32 s6, s3, 16
5001 ; GFX11-NEXT: s_and_b32 s3, s3, 0xffff
5002 ; GFX11-NEXT: s_lshr_b32 s7, s5, 16
5003 ; GFX11-NEXT: s_lshr_b32 s3, s3, s5
5004 ; GFX11-NEXT: s_lshr_b32 s5, s6, s7
5005 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s4
5006 ; GFX11-NEXT: s_pack_ll_b32_b16 s3, s3, s5
5007 ; GFX11-NEXT: s_or_b32 s0, s0, s2
5008 ; GFX11-NEXT: s_or_b32 s1, s1, s3
5009 ; GFX11-NEXT: ; return to shader part epilog
5010 %result = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
5011 %cast.result = bitcast <4 x i16> %result to <2 x i32>
5012 ret <2 x i32> %cast.result
5015 define <4 x half> @v_fshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) {
5016 ; GFX6-LABEL: v_fshl_v4i16:
5018 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5019 ; GFX6-NEXT: v_and_b32_e32 v12, 15, v8
5020 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
5021 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
5022 ; GFX6-NEXT: v_and_b32_e32 v12, 0xffff, v12
5023 ; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
5024 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
5025 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v12, v0
5026 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
5027 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
5028 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v9
5029 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v9
5030 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
5031 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
5032 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
5033 ; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
5034 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8
5035 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
5036 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
5037 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v10
5038 ; GFX6-NEXT: v_xor_b32_e32 v5, -1, v10
5039 ; GFX6-NEXT: v_and_b32_e32 v5, 15, v5
5040 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
5041 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2
5042 ; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15
5043 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
5044 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
5045 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
5046 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v11
5047 ; GFX6-NEXT: v_xor_b32_e32 v5, -1, v11
5048 ; GFX6-NEXT: v_and_b32_e32 v5, 15, v5
5049 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
5050 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
5051 ; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15
5052 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
5053 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
5054 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
5055 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5057 ; GFX8-LABEL: v_fshl_v4i16:
5059 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5060 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v4
5061 ; GFX8-NEXT: v_and_b32_e32 v8, 15, v4
5062 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
5063 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
5064 ; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v2
5065 ; GFX8-NEXT: v_lshlrev_b16_e32 v8, v8, v0
5066 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v9
5067 ; GFX8-NEXT: v_or_b32_e32 v4, v8, v4
5068 ; GFX8-NEXT: v_and_b32_e32 v8, 15, v6
5069 ; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6
5070 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5071 ; GFX8-NEXT: v_mov_b32_e32 v8, 1
5072 ; GFX8-NEXT: v_and_b32_e32 v6, 15, v6
5073 ; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5074 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v6, v2
5075 ; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v5
5076 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5077 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v5
5078 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5
5079 ; GFX8-NEXT: v_and_b32_e32 v5, 15, v5
5080 ; GFX8-NEXT: v_lshrrev_b16_e32 v6, 1, v3
5081 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v1
5082 ; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v6
5083 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v5
5084 ; GFX8-NEXT: v_and_b32_e32 v5, 15, v7
5085 ; GFX8-NEXT: v_xor_b32_e32 v6, -1, v7
5086 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5087 ; GFX8-NEXT: v_mov_b32_e32 v5, 1
5088 ; GFX8-NEXT: v_and_b32_e32 v6, 15, v6
5089 ; GFX8-NEXT: v_lshrrev_b16_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5090 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, v6, v3
5091 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
5092 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
5093 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
5094 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
5095 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
5096 ; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5097 ; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5098 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5100 ; GFX9-LABEL: v_fshl_v4i16:
5102 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5103 ; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4
5104 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
5105 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5106 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
5107 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v6, v0
5108 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2
5109 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5110 ; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5
5111 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5
5112 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5113 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v2, v1
5114 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v3 op_sel_hi:[0,1]
5115 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2
5116 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2
5117 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5119 ; GFX10-LABEL: v_fshl_v4i16:
5121 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5122 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
5123 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
5124 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5125 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
5126 ; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5
5127 ; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6
5128 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1]
5129 ; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7
5130 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v4, v0
5131 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v5, v1
5132 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v6, v2
5133 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v7, v3
5134 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
5135 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
5136 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5138 ; GFX11-LABEL: v_fshl_v4i16:
5140 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5141 ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4
5142 ; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5
5143 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5144 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1]
5145 ; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5
5146 ; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6
5147 ; GFX11-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1]
5148 ; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7
5149 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v4, v0
5150 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, v5, v1
5151 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, v6, v2
5152 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
5153 ; GFX11-NEXT: v_pk_lshrrev_b16 v3, v7, v3
5154 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5155 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5156 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
5157 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5158 %result = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
5159 %cast.result = bitcast <4 x i16> %result to <4 x half>
5160 ret <4 x half> %cast.result
5163 define amdgpu_ps i64 @s_fshl_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) {
5164 ; GCN-LABEL: s_fshl_i64:
5166 ; GCN-NEXT: s_and_b64 s[6:7], s[4:5], 63
5167 ; GCN-NEXT: s_andn2_b64 s[4:5], 63, s[4:5]
5168 ; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
5169 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s6
5170 ; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s4
5171 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5172 ; GCN-NEXT: ; return to shader part epilog
5174 ; GFX11-LABEL: s_fshl_i64:
5176 ; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 63
5177 ; GFX11-NEXT: s_and_not1_b64 s[4:5], 63, s[4:5]
5178 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
5179 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s6
5180 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s4
5181 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5182 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5183 ; GFX11-NEXT: ; return to shader part epilog
5184 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
5188 define amdgpu_ps i64 @s_fshl_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
5189 ; GCN-LABEL: s_fshl_i64_5:
5191 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
5192 ; GCN-NEXT: s_lshr_b32 s2, s3, 27
5193 ; GCN-NEXT: s_mov_b32 s3, 0
5194 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5195 ; GCN-NEXT: ; return to shader part epilog
5197 ; GFX11-LABEL: s_fshl_i64_5:
5199 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
5200 ; GFX11-NEXT: s_lshr_b32 s2, s3, 27
5201 ; GFX11-NEXT: s_mov_b32 s3, 0
5202 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5203 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5204 ; GFX11-NEXT: ; return to shader part epilog
5205 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 5)
5209 define amdgpu_ps i64 @s_fshl_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
5210 ; GCN-LABEL: s_fshl_i64_32:
5212 ; GCN-NEXT: s_mov_b32 s1, s0
5213 ; GCN-NEXT: s_mov_b32 s0, 0
5214 ; GCN-NEXT: s_mov_b32 s2, s3
5215 ; GCN-NEXT: s_mov_b32 s3, s0
5216 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5217 ; GCN-NEXT: ; return to shader part epilog
5219 ; GFX11-LABEL: s_fshl_i64_32:
5221 ; GFX11-NEXT: s_mov_b32 s1, s0
5222 ; GFX11-NEXT: s_mov_b32 s0, 0
5223 ; GFX11-NEXT: s_mov_b32 s2, s3
5224 ; GFX11-NEXT: s_mov_b32 s3, s0
5225 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5226 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5227 ; GFX11-NEXT: ; return to shader part epilog
5228 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 32)
5232 define amdgpu_ps i64 @s_fshl_i64_48(i64 inreg %lhs, i64 inreg %rhs) {
5233 ; GCN-LABEL: s_fshl_i64_48:
5235 ; GCN-NEXT: s_lshl_b32 s1, s0, 16
5236 ; GCN-NEXT: s_mov_b32 s0, 0
5237 ; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 16
5238 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5239 ; GCN-NEXT: ; return to shader part epilog
5241 ; GFX11-LABEL: s_fshl_i64_48:
5243 ; GFX11-NEXT: s_lshl_b32 s1, s0, 16
5244 ; GFX11-NEXT: s_mov_b32 s0, 0
5245 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 16
5246 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5247 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5248 ; GFX11-NEXT: ; return to shader part epilog
5249 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48)
5253 define i64 @v_fshl_i64(i64 %lhs, i64 %rhs, i64 %amt) {
5254 ; GFX6-LABEL: v_fshl_i64:
5256 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5257 ; GFX6-NEXT: v_and_b32_e32 v5, 63, v4
5258 ; GFX6-NEXT: v_not_b32_e32 v4, v4
5259 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 1
5260 ; GFX6-NEXT: v_and_b32_e32 v4, 63, v4
5261 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v5
5262 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v4
5263 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
5264 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
5265 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5267 ; GFX8-LABEL: v_fshl_i64:
5269 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5270 ; GFX8-NEXT: v_and_b32_e32 v5, 63, v4
5271 ; GFX8-NEXT: v_not_b32_e32 v4, v4
5272 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
5273 ; GFX8-NEXT: v_and_b32_e32 v4, 63, v4
5274 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1]
5275 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3]
5276 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5277 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
5278 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5280 ; GFX9-LABEL: v_fshl_i64:
5282 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5283 ; GFX9-NEXT: v_and_b32_e32 v5, 63, v4
5284 ; GFX9-NEXT: v_not_b32_e32 v4, v4
5285 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
5286 ; GFX9-NEXT: v_and_b32_e32 v4, 63, v4
5287 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1]
5288 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3]
5289 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5290 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v3
5291 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5293 ; GFX10-LABEL: v_fshl_i64:
5295 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5296 ; GFX10-NEXT: v_not_b32_e32 v5, v4
5297 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
5298 ; GFX10-NEXT: v_and_b32_e32 v4, 63, v4
5299 ; GFX10-NEXT: v_and_b32_e32 v5, 63, v5
5300 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
5301 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3]
5302 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
5303 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
5304 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5306 ; GFX11-LABEL: v_fshl_i64:
5308 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5309 ; GFX11-NEXT: v_not_b32_e32 v5, v4
5310 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
5311 ; GFX11-NEXT: v_and_b32_e32 v4, 63, v4
5312 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
5313 ; GFX11-NEXT: v_and_b32_e32 v5, 63, v5
5314 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
5315 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5316 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3]
5317 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5318 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5319 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
5320 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5321 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
5325 define i64 @v_fshl_i64_5(i64 %lhs, i64 %rhs) {
5326 ; GFX6-LABEL: v_fshl_i64_5:
5328 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5329 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 5
5330 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 27, v3
5331 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
5332 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5334 ; GFX8-LABEL: v_fshl_i64_5:
5336 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5337 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
5338 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 27, v3
5339 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5340 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5342 ; GFX9-LABEL: v_fshl_i64_5:
5344 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5345 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
5346 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, 27, v3
5347 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5348 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5350 ; GFX10-LABEL: v_fshl_i64_5:
5352 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5353 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
5354 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 27, v3
5355 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
5356 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5358 ; GFX11-LABEL: v_fshl_i64_5:
5360 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5361 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
5362 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 27, v3
5363 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5364 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5365 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5366 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 5)
5370 define i64 @v_fshl_i64_32(i64 %lhs, i64 %rhs) {
5371 ; GCN-LABEL: v_fshl_i64_32:
5373 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5374 ; GCN-NEXT: v_mov_b32_e32 v1, v0
5375 ; GCN-NEXT: v_mov_b32_e32 v0, v3
5376 ; GCN-NEXT: s_setpc_b64 s[30:31]
5378 ; GFX11-LABEL: v_fshl_i64_32:
5380 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5381 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
5382 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5383 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 32)
5387 define i64 @v_fshl_i64_48(i64 %lhs, i64 %rhs) {
5388 ; GFX6-LABEL: v_fshl_i64_48:
5390 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5391 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
5392 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[2:3], 16
5393 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v4
5394 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
5395 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5397 ; GFX8-LABEL: v_fshl_i64_48:
5399 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5400 ; GFX8-NEXT: v_mov_b32_e32 v4, v0
5401 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3]
5402 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4
5403 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
5404 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5406 ; GFX9-LABEL: v_fshl_i64_48:
5408 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5409 ; GFX9-NEXT: v_mov_b32_e32 v4, v0
5410 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3]
5411 ; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1
5412 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5414 ; GFX10-LABEL: v_fshl_i64_48:
5416 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5417 ; GFX10-NEXT: v_mov_b32_e32 v4, v0
5418 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3]
5419 ; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v1
5420 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5422 ; GFX11-LABEL: v_fshl_i64_48:
5424 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5425 ; GFX11-NEXT: v_mov_b32_e32 v4, v0
5426 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3]
5427 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5428 ; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v1
5429 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5430 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48)
5434 define amdgpu_ps <2 x float> @v_fshl_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) {
5435 ; GFX6-LABEL: v_fshl_i64_ssv:
5437 ; GFX6-NEXT: v_and_b32_e32 v1, 63, v0
5438 ; GFX6-NEXT: v_not_b32_e32 v0, v0
5439 ; GFX6-NEXT: v_and_b32_e32 v2, 63, v0
5440 ; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v1
5441 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[2:3], 1
5442 ; GFX6-NEXT: v_lshr_b64 v[2:3], s[0:1], v2
5443 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
5444 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
5445 ; GFX6-NEXT: ; return to shader part epilog
5447 ; GFX8-LABEL: v_fshl_i64_ssv:
5449 ; GFX8-NEXT: v_and_b32_e32 v1, 63, v0
5450 ; GFX8-NEXT: v_not_b32_e32 v0, v0
5451 ; GFX8-NEXT: v_and_b32_e32 v2, 63, v0
5452 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v1, s[0:1]
5453 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[2:3], 1
5454 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
5455 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5456 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
5457 ; GFX8-NEXT: ; return to shader part epilog
5459 ; GFX9-LABEL: v_fshl_i64_ssv:
5461 ; GFX9-NEXT: v_and_b32_e32 v1, 63, v0
5462 ; GFX9-NEXT: v_not_b32_e32 v0, v0
5463 ; GFX9-NEXT: v_and_b32_e32 v2, 63, v0
5464 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v1, s[0:1]
5465 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[2:3], 1
5466 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
5467 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5468 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v3
5469 ; GFX9-NEXT: ; return to shader part epilog
5471 ; GFX10-LABEL: v_fshl_i64_ssv:
5473 ; GFX10-NEXT: v_not_b32_e32 v1, v0
5474 ; GFX10-NEXT: v_and_b32_e32 v0, 63, v0
5475 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
5476 ; GFX10-NEXT: v_and_b32_e32 v2, 63, v1
5477 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
5478 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3]
5479 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
5480 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
5481 ; GFX10-NEXT: ; return to shader part epilog
5483 ; GFX11-LABEL: v_fshl_i64_ssv:
5485 ; GFX11-NEXT: v_not_b32_e32 v1, v0
5486 ; GFX11-NEXT: v_and_b32_e32 v0, 63, v0
5487 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
5488 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5489 ; GFX11-NEXT: v_and_b32_e32 v2, 63, v1
5490 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
5491 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5492 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3]
5493 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5494 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5495 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
5496 ; GFX11-NEXT: ; return to shader part epilog
5497 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
5498 %cast = bitcast i64 %result to <2 x float>
5499 ret <2 x float> %cast
5502 define amdgpu_ps <2 x float> @v_fshl_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) {
5503 ; GFX6-LABEL: v_fshl_i64_svs:
5505 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 1
5506 ; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63
5507 ; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5508 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s2
5509 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
5510 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
5511 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
5512 ; GFX6-NEXT: ; return to shader part epilog
5514 ; GFX8-LABEL: v_fshl_i64_svs:
5516 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
5517 ; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63
5518 ; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5519 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], s2, v[0:1]
5520 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
5521 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
5522 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v1
5523 ; GFX8-NEXT: ; return to shader part epilog
5525 ; GFX9-LABEL: v_fshl_i64_svs:
5527 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
5528 ; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63
5529 ; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5530 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], s2, v[0:1]
5531 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
5532 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
5533 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v1
5534 ; GFX9-NEXT: ; return to shader part epilog
5536 ; GFX10-LABEL: v_fshl_i64_svs:
5538 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
5539 ; GFX10-NEXT: s_andn2_b64 s[4:5], 63, s[2:3]
5540 ; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], 63
5541 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5542 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
5543 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
5544 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v1
5545 ; GFX10-NEXT: ; return to shader part epilog
5547 ; GFX11-LABEL: v_fshl_i64_svs:
5549 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
5550 ; GFX11-NEXT: s_and_not1_b64 s[4:5], 63, s[2:3]
5551 ; GFX11-NEXT: s_and_b64 s[2:3], s[2:3], 63
5552 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5553 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5554 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
5555 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5556 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
5557 ; GFX11-NEXT: v_or_b32_e32 v1, s1, v1
5558 ; GFX11-NEXT: ; return to shader part epilog
5559 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
5560 %cast = bitcast i64 %result to <2 x float>
5561 ret <2 x float> %cast
5564 define amdgpu_ps <2 x float> @v_fshl_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) {
5565 ; GFX6-LABEL: v_fshl_i64_vss:
5567 ; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63
5568 ; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5569 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s4
5570 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
5571 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5572 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
5573 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
5574 ; GFX6-NEXT: ; return to shader part epilog
5576 ; GFX8-LABEL: v_fshl_i64_vss:
5578 ; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63
5579 ; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5580 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
5581 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
5582 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5583 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
5584 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v1
5585 ; GFX8-NEXT: ; return to shader part epilog
5587 ; GFX9-LABEL: v_fshl_i64_vss:
5589 ; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63
5590 ; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5591 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
5592 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
5593 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5594 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
5595 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v1
5596 ; GFX9-NEXT: ; return to shader part epilog
5598 ; GFX10-LABEL: v_fshl_i64_vss:
5600 ; GFX10-NEXT: s_and_b64 s[4:5], s[2:3], 63
5601 ; GFX10-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5602 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
5603 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
5604 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5605 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
5606 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v1
5607 ; GFX10-NEXT: ; return to shader part epilog
5609 ; GFX11-LABEL: v_fshl_i64_vss:
5611 ; GFX11-NEXT: s_and_b64 s[4:5], s[2:3], 63
5612 ; GFX11-NEXT: s_and_not1_b64 s[2:3], 63, s[2:3]
5613 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
5614 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
5615 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5616 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5617 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
5618 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
5619 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5620 ; GFX11-NEXT: v_or_b32_e32 v1, s1, v1
5621 ; GFX11-NEXT: ; return to shader part epilog
5622 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt)
5623 %cast = bitcast i64 %result to <2 x float>
5624 ret <2 x float> %cast
5627 define amdgpu_ps <2 x i64> @s_fshl_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) {
5628 ; GFX6-LABEL: s_fshl_v2i64:
5630 ; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], 63
5631 ; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
5632 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
5633 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
5634 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5635 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5636 ; GFX6-NEXT: s_and_b64 s[4:5], s[10:11], 63
5637 ; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5638 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s4
5639 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
5640 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5641 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5642 ; GFX6-NEXT: ; return to shader part epilog
5644 ; GFX8-LABEL: s_fshl_v2i64:
5646 ; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], 63
5647 ; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
5648 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
5649 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
5650 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5651 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5652 ; GFX8-NEXT: s_and_b64 s[4:5], s[10:11], 63
5653 ; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5654 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s4
5655 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
5656 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5657 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5658 ; GFX8-NEXT: ; return to shader part epilog
5660 ; GFX9-LABEL: s_fshl_v2i64:
5662 ; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], 63
5663 ; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
5664 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
5665 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
5666 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5667 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5668 ; GFX9-NEXT: s_and_b64 s[4:5], s[10:11], 63
5669 ; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5670 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s4
5671 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
5672 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5673 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5674 ; GFX9-NEXT: ; return to shader part epilog
5676 ; GFX10-LABEL: s_fshl_v2i64:
5678 ; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], 63
5679 ; GFX10-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
5680 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
5681 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
5682 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5683 ; GFX10-NEXT: s_and_b64 s[8:9], s[10:11], 63
5684 ; GFX10-NEXT: s_andn2_b64 s[10:11], 63, s[10:11]
5685 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
5686 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5687 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s10
5688 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5689 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
5690 ; GFX10-NEXT: ; return to shader part epilog
5692 ; GFX11-LABEL: s_fshl_v2i64:
5694 ; GFX11-NEXT: s_and_b64 s[12:13], s[8:9], 63
5695 ; GFX11-NEXT: s_and_not1_b64 s[8:9], 63, s[8:9]
5696 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
5697 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
5698 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5699 ; GFX11-NEXT: s_and_b64 s[8:9], s[10:11], 63
5700 ; GFX11-NEXT: s_and_not1_b64 s[10:11], 63, s[10:11]
5701 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
5702 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5703 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s10
5704 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5705 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
5706 ; GFX11-NEXT: ; return to shader part epilog
5707 %result = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5708 ret <2 x i64> %result
5711 define <2 x i64> @v_fshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) {
5712 ; GFX6-LABEL: v_fshl_v2i64:
5714 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5715 ; GFX6-NEXT: v_and_b32_e32 v9, 63, v8
5716 ; GFX6-NEXT: v_not_b32_e32 v8, v8
5717 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], 1
5718 ; GFX6-NEXT: v_and_b32_e32 v8, 63, v8
5719 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v9
5720 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v8
5721 ; GFX6-NEXT: v_not_b32_e32 v8, v10
5722 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], 1
5723 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
5724 ; GFX6-NEXT: v_and_b32_e32 v4, 63, v10
5725 ; GFX6-NEXT: v_and_b32_e32 v8, 63, v8
5726 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v4
5727 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v8
5728 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v5
5729 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v6
5730 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v7
5731 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5733 ; GFX8-LABEL: v_fshl_v2i64:
5735 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5736 ; GFX8-NEXT: v_and_b32_e32 v9, 63, v8
5737 ; GFX8-NEXT: v_not_b32_e32 v8, v8
5738 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
5739 ; GFX8-NEXT: v_and_b32_e32 v8, 63, v8
5740 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1]
5741 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5]
5742 ; GFX8-NEXT: v_not_b32_e32 v8, v10
5743 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
5744 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
5745 ; GFX8-NEXT: v_and_b32_e32 v4, 63, v10
5746 ; GFX8-NEXT: v_and_b32_e32 v8, 63, v8
5747 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3]
5748 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], v8, v[6:7]
5749 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v5
5750 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
5751 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v7
5752 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5754 ; GFX9-LABEL: v_fshl_v2i64:
5756 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5757 ; GFX9-NEXT: v_and_b32_e32 v9, 63, v8
5758 ; GFX9-NEXT: v_not_b32_e32 v8, v8
5759 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
5760 ; GFX9-NEXT: v_and_b32_e32 v8, 63, v8
5761 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1]
5762 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5]
5763 ; GFX9-NEXT: v_not_b32_e32 v8, v10
5764 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
5765 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v4
5766 ; GFX9-NEXT: v_and_b32_e32 v4, 63, v10
5767 ; GFX9-NEXT: v_and_b32_e32 v8, 63, v8
5768 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3]
5769 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], v8, v[6:7]
5770 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v5
5771 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v6
5772 ; GFX9-NEXT: v_or_b32_e32 v3, v3, v7
5773 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5775 ; GFX10-LABEL: v_fshl_v2i64:
5777 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5778 ; GFX10-NEXT: v_not_b32_e32 v9, v8
5779 ; GFX10-NEXT: v_not_b32_e32 v11, v10
5780 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
5781 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
5782 ; GFX10-NEXT: v_and_b32_e32 v8, 63, v8
5783 ; GFX10-NEXT: v_and_b32_e32 v9, 63, v9
5784 ; GFX10-NEXT: v_and_b32_e32 v10, 63, v10
5785 ; GFX10-NEXT: v_and_b32_e32 v11, 63, v11
5786 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
5787 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5]
5788 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], v10, v[2:3]
5789 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], v11, v[6:7]
5790 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v4
5791 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v5
5792 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v6
5793 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v7
5794 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5796 ; GFX11-LABEL: v_fshl_v2i64:
5798 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5799 ; GFX11-NEXT: v_not_b32_e32 v9, v8
5800 ; GFX11-NEXT: v_not_b32_e32 v11, v10
5801 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
5802 ; GFX11-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7]
5803 ; GFX11-NEXT: v_and_b32_e32 v8, 63, v8
5804 ; GFX11-NEXT: v_and_b32_e32 v9, 63, v9
5805 ; GFX11-NEXT: v_and_b32_e32 v10, 63, v10
5806 ; GFX11-NEXT: v_and_b32_e32 v11, 63, v11
5807 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5808 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
5809 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5]
5810 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5811 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], v10, v[2:3]
5812 ; GFX11-NEXT: v_lshrrev_b64 v[6:7], v11, v[6:7]
5813 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5814 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
5815 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v5
5816 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5817 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v6
5818 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v7
5819 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5820 %result = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5821 ret <2 x i64> %result
5824 define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) {
5825 ; GFX6-LABEL: s_fshl_i128:
5827 ; GFX6-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5828 ; GFX6-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5829 ; GFX6-NEXT: s_sub_i32 s9, s10, 64
5830 ; GFX6-NEXT: s_sub_i32 s11, 64, s10
5831 ; GFX6-NEXT: s_cmp_lt_u32 s10, 64
5832 ; GFX6-NEXT: s_cselect_b32 s13, 1, 0
5833 ; GFX6-NEXT: s_cmp_eq_u32 s10, 0
5834 ; GFX6-NEXT: s_cselect_b32 s18, 1, 0
5835 ; GFX6-NEXT: s_lshl_b64 s[14:15], s[0:1], s10
5836 ; GFX6-NEXT: s_lshr_b64 s[16:17], s[0:1], s11
5837 ; GFX6-NEXT: s_lshl_b64 s[10:11], s[2:3], s10
5838 ; GFX6-NEXT: s_or_b64 s[10:11], s[16:17], s[10:11]
5839 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s9
5840 ; GFX6-NEXT: s_cmp_lg_u32 s13, 0
5841 ; GFX6-NEXT: s_cselect_b64 s[14:15], s[14:15], 0
5842 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1]
5843 ; GFX6-NEXT: s_cmp_lg_u32 s18, 0
5844 ; GFX6-NEXT: s_mov_b32 s12, 0
5845 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
5846 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
5847 ; GFX6-NEXT: s_lshl_b32 s13, s6, 31
5848 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13]
5849 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
5850 ; GFX6-NEXT: s_sub_i32 s12, s8, 64
5851 ; GFX6-NEXT: s_sub_i32 s10, 64, s8
5852 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64
5853 ; GFX6-NEXT: s_cselect_b32 s13, 1, 0
5854 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0
5855 ; GFX6-NEXT: s_cselect_b32 s16, 1, 0
5856 ; GFX6-NEXT: s_lshr_b64 s[6:7], s[4:5], s8
5857 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[0:1], s8
5858 ; GFX6-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
5859 ; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
5860 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
5861 ; GFX6-NEXT: s_cmp_lg_u32 s13, 0
5862 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
5863 ; GFX6-NEXT: s_cmp_lg_u32 s16, 0
5864 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
5865 ; GFX6-NEXT: s_cmp_lg_u32 s13, 0
5866 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[6:7], 0
5867 ; GFX6-NEXT: s_or_b64 s[0:1], s[14:15], s[0:1]
5868 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5869 ; GFX6-NEXT: ; return to shader part epilog
5871 ; GFX8-LABEL: s_fshl_i128:
5873 ; GFX8-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5874 ; GFX8-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5875 ; GFX8-NEXT: s_sub_i32 s9, s10, 64
5876 ; GFX8-NEXT: s_sub_i32 s11, 64, s10
5877 ; GFX8-NEXT: s_cmp_lt_u32 s10, 64
5878 ; GFX8-NEXT: s_cselect_b32 s13, 1, 0
5879 ; GFX8-NEXT: s_cmp_eq_u32 s10, 0
5880 ; GFX8-NEXT: s_cselect_b32 s18, 1, 0
5881 ; GFX8-NEXT: s_lshl_b64 s[14:15], s[0:1], s10
5882 ; GFX8-NEXT: s_lshr_b64 s[16:17], s[0:1], s11
5883 ; GFX8-NEXT: s_lshl_b64 s[10:11], s[2:3], s10
5884 ; GFX8-NEXT: s_or_b64 s[10:11], s[16:17], s[10:11]
5885 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s9
5886 ; GFX8-NEXT: s_cmp_lg_u32 s13, 0
5887 ; GFX8-NEXT: s_cselect_b64 s[14:15], s[14:15], 0
5888 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1]
5889 ; GFX8-NEXT: s_cmp_lg_u32 s18, 0
5890 ; GFX8-NEXT: s_mov_b32 s12, 0
5891 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
5892 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
5893 ; GFX8-NEXT: s_lshl_b32 s13, s6, 31
5894 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13]
5895 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
5896 ; GFX8-NEXT: s_sub_i32 s12, s8, 64
5897 ; GFX8-NEXT: s_sub_i32 s10, 64, s8
5898 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64
5899 ; GFX8-NEXT: s_cselect_b32 s13, 1, 0
5900 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0
5901 ; GFX8-NEXT: s_cselect_b32 s16, 1, 0
5902 ; GFX8-NEXT: s_lshr_b64 s[6:7], s[4:5], s8
5903 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[0:1], s8
5904 ; GFX8-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
5905 ; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
5906 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
5907 ; GFX8-NEXT: s_cmp_lg_u32 s13, 0
5908 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
5909 ; GFX8-NEXT: s_cmp_lg_u32 s16, 0
5910 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
5911 ; GFX8-NEXT: s_cmp_lg_u32 s13, 0
5912 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[6:7], 0
5913 ; GFX8-NEXT: s_or_b64 s[0:1], s[14:15], s[0:1]
5914 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5915 ; GFX8-NEXT: ; return to shader part epilog
5917 ; GFX9-LABEL: s_fshl_i128:
5919 ; GFX9-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5920 ; GFX9-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5921 ; GFX9-NEXT: s_sub_i32 s9, s10, 64
5922 ; GFX9-NEXT: s_sub_i32 s11, 64, s10
5923 ; GFX9-NEXT: s_cmp_lt_u32 s10, 64
5924 ; GFX9-NEXT: s_cselect_b32 s13, 1, 0
5925 ; GFX9-NEXT: s_cmp_eq_u32 s10, 0
5926 ; GFX9-NEXT: s_cselect_b32 s18, 1, 0
5927 ; GFX9-NEXT: s_lshl_b64 s[14:15], s[0:1], s10
5928 ; GFX9-NEXT: s_lshr_b64 s[16:17], s[0:1], s11
5929 ; GFX9-NEXT: s_lshl_b64 s[10:11], s[2:3], s10
5930 ; GFX9-NEXT: s_or_b64 s[10:11], s[16:17], s[10:11]
5931 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s9
5932 ; GFX9-NEXT: s_cmp_lg_u32 s13, 0
5933 ; GFX9-NEXT: s_cselect_b64 s[14:15], s[14:15], 0
5934 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1]
5935 ; GFX9-NEXT: s_cmp_lg_u32 s18, 0
5936 ; GFX9-NEXT: s_mov_b32 s12, 0
5937 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
5938 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
5939 ; GFX9-NEXT: s_lshl_b32 s13, s6, 31
5940 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13]
5941 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
5942 ; GFX9-NEXT: s_sub_i32 s12, s8, 64
5943 ; GFX9-NEXT: s_sub_i32 s10, 64, s8
5944 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64
5945 ; GFX9-NEXT: s_cselect_b32 s13, 1, 0
5946 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0
5947 ; GFX9-NEXT: s_cselect_b32 s16, 1, 0
5948 ; GFX9-NEXT: s_lshr_b64 s[6:7], s[4:5], s8
5949 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[0:1], s8
5950 ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
5951 ; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
5952 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
5953 ; GFX9-NEXT: s_cmp_lg_u32 s13, 0
5954 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
5955 ; GFX9-NEXT: s_cmp_lg_u32 s16, 0
5956 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
5957 ; GFX9-NEXT: s_cmp_lg_u32 s13, 0
5958 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[6:7], 0
5959 ; GFX9-NEXT: s_or_b64 s[0:1], s[14:15], s[0:1]
5960 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5961 ; GFX9-NEXT: ; return to shader part epilog
5963 ; GFX10-LABEL: s_fshl_i128:
5965 ; GFX10-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5966 ; GFX10-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5967 ; GFX10-NEXT: s_sub_i32 s9, s10, 64
5968 ; GFX10-NEXT: s_sub_i32 s11, 64, s10
5969 ; GFX10-NEXT: s_cmp_lt_u32 s10, 64
5970 ; GFX10-NEXT: s_mov_b32 s12, 0
5971 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0
5972 ; GFX10-NEXT: s_cmp_eq_u32 s10, 0
5973 ; GFX10-NEXT: s_cselect_b32 s18, 1, 0
5974 ; GFX10-NEXT: s_lshr_b64 s[14:15], s[0:1], s11
5975 ; GFX10-NEXT: s_lshl_b64 s[16:17], s[2:3], s10
5976 ; GFX10-NEXT: s_lshl_b64 s[10:11], s[0:1], s10
5977 ; GFX10-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17]
5978 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s9
5979 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0
5980 ; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0
5981 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1]
5982 ; GFX10-NEXT: s_cmp_lg_u32 s18, 0
5983 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
5984 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
5985 ; GFX10-NEXT: s_lshl_b32 s13, s6, 31
5986 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
5987 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13]
5988 ; GFX10-NEXT: s_sub_i32 s14, s8, 64
5989 ; GFX10-NEXT: s_sub_i32 s9, 64, s8
5990 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64
5991 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0
5992 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0
5993 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0
5994 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s8
5995 ; GFX10-NEXT: s_lshl_b64 s[12:13], s[4:5], s9
5996 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[4:5], s8
5997 ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[12:13]
5998 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s14
5999 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0
6000 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5]
6001 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0
6002 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
6003 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0
6004 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[8:9], 0
6005 ; GFX10-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1]
6006 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6007 ; GFX10-NEXT: ; return to shader part epilog
6009 ; GFX11-LABEL: s_fshl_i128:
6011 ; GFX11-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
6012 ; GFX11-NEXT: s_and_not1_b64 s[8:9], 0x7f, s[8:9]
6013 ; GFX11-NEXT: s_sub_i32 s9, s10, 64
6014 ; GFX11-NEXT: s_sub_i32 s11, 64, s10
6015 ; GFX11-NEXT: s_cmp_lt_u32 s10, 64
6016 ; GFX11-NEXT: s_mov_b32 s12, 0
6017 ; GFX11-NEXT: s_cselect_b32 s13, 1, 0
6018 ; GFX11-NEXT: s_cmp_eq_u32 s10, 0
6019 ; GFX11-NEXT: s_cselect_b32 s18, 1, 0
6020 ; GFX11-NEXT: s_lshr_b64 s[14:15], s[0:1], s11
6021 ; GFX11-NEXT: s_lshl_b64 s[16:17], s[2:3], s10
6022 ; GFX11-NEXT: s_lshl_b64 s[10:11], s[0:1], s10
6023 ; GFX11-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17]
6024 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s9
6025 ; GFX11-NEXT: s_cmp_lg_u32 s13, 0
6026 ; GFX11-NEXT: s_cselect_b64 s[10:11], s[10:11], 0
6027 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1]
6028 ; GFX11-NEXT: s_cmp_lg_u32 s18, 0
6029 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6030 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
6031 ; GFX11-NEXT: s_lshl_b32 s13, s6, 31
6032 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[6:7], 1
6033 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13]
6034 ; GFX11-NEXT: s_sub_i32 s14, s8, 64
6035 ; GFX11-NEXT: s_sub_i32 s9, 64, s8
6036 ; GFX11-NEXT: s_cmp_lt_u32 s8, 64
6037 ; GFX11-NEXT: s_cselect_b32 s15, 1, 0
6038 ; GFX11-NEXT: s_cmp_eq_u32 s8, 0
6039 ; GFX11-NEXT: s_cselect_b32 s16, 1, 0
6040 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s8
6041 ; GFX11-NEXT: s_lshl_b64 s[12:13], s[4:5], s9
6042 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[4:5], s8
6043 ; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[12:13]
6044 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], s14
6045 ; GFX11-NEXT: s_cmp_lg_u32 s15, 0
6046 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5]
6047 ; GFX11-NEXT: s_cmp_lg_u32 s16, 0
6048 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
6049 ; GFX11-NEXT: s_cmp_lg_u32 s15, 0
6050 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[8:9], 0
6051 ; GFX11-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1]
6052 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6053 ; GFX11-NEXT: ; return to shader part epilog
6054 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
6058 define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) {
6059 ; GFX6-LABEL: v_fshl_i128:
6061 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6062 ; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v8
6063 ; GFX6-NEXT: v_not_b32_e32 v8, v8
6064 ; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v8
6065 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v14
6066 ; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v14
6067 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[0:1], v8
6068 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], v14
6069 ; GFX6-NEXT: v_lshl_b64 v[12:13], v[0:1], v14
6070 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v16
6071 ; GFX6-NEXT: v_or_b32_e32 v8, v8, v10
6072 ; GFX6-NEXT: v_or_b32_e32 v9, v9, v11
6073 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
6074 ; GFX6-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc
6075 ; GFX6-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc
6076 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
6077 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
6078 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14
6079 ; GFX6-NEXT: v_cndmask_b32_e32 v12, v0, v2, vcc
6080 ; GFX6-NEXT: v_cndmask_b32_e32 v13, v1, v3, vcc
6081 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], 1
6082 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 31, v6
6083 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
6084 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[6:7], 1
6085 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v15
6086 ; GFX6-NEXT: v_subrev_i32_e32 v14, vcc, 64, v15
6087 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], v15
6088 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], v6
6089 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], v15
6090 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v14
6091 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6
6092 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7
6093 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
6094 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6095 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6096 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
6097 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
6098 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
6099 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6100 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6101 ; GFX6-NEXT: v_or_b32_e32 v0, v10, v0
6102 ; GFX6-NEXT: v_or_b32_e32 v1, v11, v1
6103 ; GFX6-NEXT: v_or_b32_e32 v2, v12, v2
6104 ; GFX6-NEXT: v_or_b32_e32 v3, v13, v3
6105 ; GFX6-NEXT: s_setpc_b64 s[30:31]
6107 ; GFX8-LABEL: v_fshl_i128:
6109 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6110 ; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v8
6111 ; GFX8-NEXT: v_not_b32_e32 v8, v8
6112 ; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v8
6113 ; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v14
6114 ; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v14
6115 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v8, v[0:1]
6116 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v14, v[2:3]
6117 ; GFX8-NEXT: v_lshlrev_b64 v[12:13], v14, v[0:1]
6118 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[0:1]
6119 ; GFX8-NEXT: v_or_b32_e32 v8, v8, v10
6120 ; GFX8-NEXT: v_or_b32_e32 v9, v9, v11
6121 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
6122 ; GFX8-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc
6123 ; GFX8-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc
6124 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
6125 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc
6126 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14
6127 ; GFX8-NEXT: v_cndmask_b32_e32 v12, v0, v2, vcc
6128 ; GFX8-NEXT: v_cndmask_b32_e32 v13, v1, v3, vcc
6129 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[4:5]
6130 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 31, v6
6131 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
6132 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7]
6133 ; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v15
6134 ; GFX8-NEXT: v_subrev_u32_e32 v14, vcc, 64, v15
6135 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v15, v[0:1]
6136 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3]
6137 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v15, v[2:3]
6138 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v14, v[2:3]
6139 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6
6140 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7
6141 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
6142 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6143 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6144 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
6145 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
6146 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
6147 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6148 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6149 ; GFX8-NEXT: v_or_b32_e32 v0, v10, v0
6150 ; GFX8-NEXT: v_or_b32_e32 v1, v11, v1
6151 ; GFX8-NEXT: v_or_b32_e32 v2, v12, v2
6152 ; GFX8-NEXT: v_or_b32_e32 v3, v13, v3
6153 ; GFX8-NEXT: s_setpc_b64 s[30:31]
6155 ; GFX9-LABEL: v_fshl_i128:
6157 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6158 ; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v8
6159 ; GFX9-NEXT: v_not_b32_e32 v8, v8
6160 ; GFX9-NEXT: v_and_b32_e32 v15, 0x7f, v8
6161 ; GFX9-NEXT: v_sub_u32_e32 v8, 64, v14
6162 ; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v14
6163 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v8, v[0:1]
6164 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v14, v[2:3]
6165 ; GFX9-NEXT: v_lshlrev_b64 v[12:13], v14, v[0:1]
6166 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[0:1]
6167 ; GFX9-NEXT: v_or_b32_e32 v8, v8, v10
6168 ; GFX9-NEXT: v_or_b32_e32 v9, v9, v11
6169 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
6170 ; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v12, vcc
6171 ; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc
6172 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
6173 ; GFX9-NEXT: v_cndmask_b32_e32 v8, v1, v9, vcc
6174 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v14
6175 ; GFX9-NEXT: v_cndmask_b32_e32 v12, v0, v2, vcc
6176 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[4:5]
6177 ; GFX9-NEXT: v_cndmask_b32_e32 v13, v8, v3, vcc
6178 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7]
6179 ; GFX9-NEXT: v_lshl_or_b32 v1, v6, 31, v1
6180 ; GFX9-NEXT: v_sub_u32_e32 v6, 64, v15
6181 ; GFX9-NEXT: v_subrev_u32_e32 v14, 64, v15
6182 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v15, v[0:1]
6183 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3]
6184 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v15, v[2:3]
6185 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v14, v[2:3]
6186 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6
6187 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7
6188 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
6189 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6190 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6191 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v15
6192 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
6193 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
6194 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6195 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6196 ; GFX9-NEXT: v_or_b32_e32 v0, v10, v0
6197 ; GFX9-NEXT: v_or_b32_e32 v1, v11, v1
6198 ; GFX9-NEXT: v_or_b32_e32 v2, v12, v2
6199 ; GFX9-NEXT: v_or_b32_e32 v3, v13, v3
6200 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6202 ; GFX10-LABEL: v_fshl_i128:
6204 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6205 ; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v8
6206 ; GFX10-NEXT: v_not_b32_e32 v8, v8
6207 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
6208 ; GFX10-NEXT: v_lshrrev_b64 v[12:13], 1, v[6:7]
6209 ; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18
6210 ; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v8
6211 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
6212 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 31, v5
6213 ; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
6214 ; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1]
6215 ; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19
6216 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1]
6217 ; GFX10-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5]
6218 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
6219 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
6220 ; GFX10-NEXT: v_or_b32_e32 v10, v10, v8
6221 ; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v19
6222 ; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13]
6223 ; GFX10-NEXT: v_or_b32_e32 v11, v11, v9
6224 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19
6225 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo
6226 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13]
6227 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19
6228 ; GFX10-NEXT: v_or_b32_e32 v14, v14, v16
6229 ; GFX10-NEXT: v_or_b32_e32 v15, v15, v17
6230 ; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo
6231 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[12:13]
6232 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18
6233 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v14, s4
6234 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v15, s4
6235 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6236 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc_lo
6237 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6
6238 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6
6239 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5
6240 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v5, s5
6241 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v0, s4
6242 ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v1, s4
6243 ; GFX10-NEXT: v_or_b32_e32 v0, v6, v4
6244 ; GFX10-NEXT: v_or_b32_e32 v1, v7, v5
6245 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v8
6246 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v9
6247 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6249 ; GFX11-LABEL: v_fshl_i128:
6251 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6252 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5]
6253 ; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v8
6254 ; GFX11-NEXT: v_not_b32_e32 v8, v8
6255 ; GFX11-NEXT: v_lshrrev_b64 v[12:13], 1, v[6:7]
6256 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
6257 ; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18
6258 ; GFX11-NEXT: v_lshl_or_b32 v5, v6, 31, v5
6259 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1]
6260 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
6261 ; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v8
6262 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
6263 ; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1]
6264 ; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
6265 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6266 ; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19
6267 ; GFX11-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5]
6268 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19
6269 ; GFX11-NEXT: v_or_b32_e32 v10, v10, v8
6270 ; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v19
6271 ; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13]
6272 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
6273 ; GFX11-NEXT: v_or_b32_e32 v11, v11, v9
6274 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19
6275 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13]
6276 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc_lo
6277 ; GFX11-NEXT: v_or_b32_e32 v14, v14, v16
6278 ; GFX11-NEXT: v_or_b32_e32 v15, v15, v17
6279 ; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
6280 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[12:13]
6281 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3)
6282 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v14, s0
6283 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18
6284 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v15, s0
6285 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
6286 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
6287 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2
6288 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2
6289 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, v5, s1
6290 ; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v0, s0
6291 ; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v1, s0
6292 ; GFX11-NEXT: v_or_b32_e32 v0, v6, v4
6293 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6294 ; GFX11-NEXT: v_or_b32_e32 v1, v7, v5
6295 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v8
6296 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
6297 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v9
6298 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6299 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
6303 define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) {
6304 ; GFX6-LABEL: v_fshl_i128_ssv:
6306 ; GFX6-NEXT: v_and_b32_e32 v6, 0x7f, v0
6307 ; GFX6-NEXT: v_not_b32_e32 v0, v0
6308 ; GFX6-NEXT: v_and_b32_e32 v7, 0x7f, v0
6309 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v6
6310 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v0
6311 ; GFX6-NEXT: v_lshl_b64 v[2:3], s[2:3], v6
6312 ; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v6
6313 ; GFX6-NEXT: v_lshl_b64 v[4:5], s[0:1], v6
6314 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
6315 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
6316 ; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v8
6317 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
6318 ; GFX6-NEXT: s_mov_b32 s8, 0
6319 ; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
6320 ; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
6321 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6322 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6323 ; GFX6-NEXT: v_mov_b32_e32 v2, s2
6324 ; GFX6-NEXT: v_mov_b32_e32 v3, s3
6325 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
6326 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
6327 ; GFX6-NEXT: s_lshl_b32 s9, s6, 31
6328 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v0, v2, vcc
6329 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
6330 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
6331 ; GFX6-NEXT: s_lshr_b64 s[2:3], s[6:7], 1
6332 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v7
6333 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v7
6334 ; GFX6-NEXT: v_lshl_b64 v[2:3], s[2:3], v2
6335 ; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v7
6336 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
6337 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
6338 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[2:3], v11
6339 ; GFX6-NEXT: v_lshr_b64 v[4:5], s[2:3], v7
6340 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
6341 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6342 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6343 ; GFX6-NEXT: v_mov_b32_e32 v2, s0
6344 ; GFX6-NEXT: v_mov_b32_e32 v3, s1
6345 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v7
6346 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6347 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6348 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
6349 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
6350 ; GFX6-NEXT: v_or_b32_e32 v0, v8, v0
6351 ; GFX6-NEXT: v_or_b32_e32 v1, v9, v1
6352 ; GFX6-NEXT: v_or_b32_e32 v2, v6, v2
6353 ; GFX6-NEXT: v_or_b32_e32 v3, v10, v3
6354 ; GFX6-NEXT: ; return to shader part epilog
6356 ; GFX8-LABEL: v_fshl_i128_ssv:
6358 ; GFX8-NEXT: v_and_b32_e32 v6, 0x7f, v0
6359 ; GFX8-NEXT: v_not_b32_e32 v0, v0
6360 ; GFX8-NEXT: v_and_b32_e32 v7, 0x7f, v0
6361 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v6
6362 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
6363 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v6, s[2:3]
6364 ; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v6
6365 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], v6, s[0:1]
6366 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
6367 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
6368 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, s[0:1]
6369 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
6370 ; GFX8-NEXT: s_mov_b32 s8, 0
6371 ; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
6372 ; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
6373 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6374 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6375 ; GFX8-NEXT: v_mov_b32_e32 v2, s2
6376 ; GFX8-NEXT: v_mov_b32_e32 v3, s3
6377 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
6378 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
6379 ; GFX8-NEXT: s_lshl_b32 s9, s6, 31
6380 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v0, v2, vcc
6381 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
6382 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
6383 ; GFX8-NEXT: s_lshr_b64 s[2:3], s[6:7], 1
6384 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v7
6385 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v7, s[0:1]
6386 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3]
6387 ; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v7
6388 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
6389 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
6390 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[2:3]
6391 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v7, s[2:3]
6392 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
6393 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6394 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6395 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
6396 ; GFX8-NEXT: v_mov_b32_e32 v3, s1
6397 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v7
6398 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6399 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6400 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
6401 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
6402 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
6403 ; GFX8-NEXT: v_or_b32_e32 v1, v9, v1
6404 ; GFX8-NEXT: v_or_b32_e32 v2, v6, v2
6405 ; GFX8-NEXT: v_or_b32_e32 v3, v10, v3
6406 ; GFX8-NEXT: ; return to shader part epilog
6408 ; GFX9-LABEL: v_fshl_i128_ssv:
6410 ; GFX9-NEXT: v_and_b32_e32 v6, 0x7f, v0
6411 ; GFX9-NEXT: v_not_b32_e32 v0, v0
6412 ; GFX9-NEXT: v_and_b32_e32 v7, 0x7f, v0
6413 ; GFX9-NEXT: v_sub_u32_e32 v0, 64, v6
6414 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1]
6415 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v6, s[2:3]
6416 ; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v6
6417 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v6, s[0:1]
6418 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
6419 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
6420 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, s[0:1]
6421 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
6422 ; GFX9-NEXT: s_mov_b32 s8, 0
6423 ; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
6424 ; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
6425 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6426 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6427 ; GFX9-NEXT: v_mov_b32_e32 v2, s2
6428 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6
6429 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[4:5], 1
6430 ; GFX9-NEXT: s_lshl_b32 s9, s6, 31
6431 ; GFX9-NEXT: v_mov_b32_e32 v3, s3
6432 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v2, vcc
6433 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
6434 ; GFX9-NEXT: s_lshr_b64 s[2:3], s[6:7], 1
6435 ; GFX9-NEXT: v_sub_u32_e32 v2, 64, v7
6436 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
6437 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v7, s[0:1]
6438 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3]
6439 ; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v7
6440 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
6441 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
6442 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[2:3]
6443 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v7, s[2:3]
6444 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
6445 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6446 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6447 ; GFX9-NEXT: v_mov_b32_e32 v2, s0
6448 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
6449 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v7
6450 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6451 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6452 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
6453 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
6454 ; GFX9-NEXT: v_or_b32_e32 v0, v8, v0
6455 ; GFX9-NEXT: v_or_b32_e32 v1, v9, v1
6456 ; GFX9-NEXT: v_or_b32_e32 v2, v6, v2
6457 ; GFX9-NEXT: v_or_b32_e32 v3, v10, v3
6458 ; GFX9-NEXT: ; return to shader part epilog
6460 ; GFX10-LABEL: v_fshl_i128_ssv:
6462 ; GFX10-NEXT: v_and_b32_e32 v12, 0x7f, v0
6463 ; GFX10-NEXT: v_not_b32_e32 v0, v0
6464 ; GFX10-NEXT: s_mov_b32 s8, 0
6465 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
6466 ; GFX10-NEXT: s_lshl_b32 s9, s6, 31
6467 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v12
6468 ; GFX10-NEXT: v_and_b32_e32 v13, 0x7f, v0
6469 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[2:3]
6470 ; GFX10-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
6471 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
6472 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
6473 ; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13
6474 ; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
6475 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], v13, s[8:9]
6476 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12
6477 ; GFX10-NEXT: v_lshlrev_b64 v[4:5], v12, s[0:1]
6478 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v0
6479 ; GFX10-NEXT: v_subrev_nc_u32_e32 v0, 64, v13
6480 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
6481 ; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
6482 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v1
6483 ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13
6484 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[6:7]
6485 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 0, v13
6486 ; GFX10-NEXT: v_or_b32_e32 v6, v6, v8
6487 ; GFX10-NEXT: v_or_b32_e32 v7, v7, v9
6488 ; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6489 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6490 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7]
6491 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s0
6492 ; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 0, v12
6493 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s0
6494 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc_lo
6495 ; GFX10-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc_lo
6496 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s8, s1
6497 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v8, s2, s4
6498 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, s3, s4
6499 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s9, s1
6500 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0
6501 ; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0
6502 ; GFX10-NEXT: v_or_b32_e32 v0, v4, v0
6503 ; GFX10-NEXT: v_or_b32_e32 v1, v5, v1
6504 ; GFX10-NEXT: v_or_b32_e32 v2, v6, v2
6505 ; GFX10-NEXT: v_or_b32_e32 v3, v7, v3
6506 ; GFX10-NEXT: ; return to shader part epilog
6508 ; GFX11-LABEL: v_fshl_i128_ssv:
6510 ; GFX11-NEXT: v_and_b32_e32 v12, 0x7f, v0
6511 ; GFX11-NEXT: v_not_b32_e32 v0, v0
6512 ; GFX11-NEXT: s_mov_b32 s8, 0
6513 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], 1
6514 ; GFX11-NEXT: s_lshl_b32 s9, s6, 31
6515 ; GFX11-NEXT: v_lshlrev_b64 v[4:5], v12, s[0:1]
6516 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12
6517 ; GFX11-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9]
6518 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
6519 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_4)
6520 ; GFX11-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc_lo
6521 ; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
6522 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v12
6523 ; GFX11-NEXT: v_cmp_eq_u32_e64 s4, 0, v12
6524 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc_lo
6525 ; GFX11-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
6526 ; GFX11-NEXT: v_and_b32_e32 v13, 0x7f, v0
6527 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[2:3]
6528 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
6529 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
6530 ; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13
6531 ; GFX11-NEXT: v_lshrrev_b64 v[6:7], v13, s[8:9]
6532 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v0
6533 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6534 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v1
6535 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
6536 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
6537 ; GFX11-NEXT: v_or_b32_e32 v6, v6, v8
6538 ; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6539 ; GFX11-NEXT: v_subrev_nc_u32_e32 v0, 64, v13
6540 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
6541 ; GFX11-NEXT: v_or_b32_e32 v7, v7, v9
6542 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v13
6543 ; GFX11-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6544 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7]
6545 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[6:7]
6546 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v13
6547 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
6548 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0
6549 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s0
6550 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
6551 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s0
6552 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v8, s2, s4
6553 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v10, s3, s4
6554 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0
6555 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s8, s1
6556 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s9, s1
6557 ; GFX11-NEXT: v_or_b32_e32 v2, v6, v2
6558 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6559 ; GFX11-NEXT: v_or_b32_e32 v3, v7, v3
6560 ; GFX11-NEXT: v_or_b32_e32 v0, v4, v0
6561 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
6562 ; GFX11-NEXT: v_or_b32_e32 v1, v5, v1
6563 ; GFX11-NEXT: ; return to shader part epilog
6564 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
6565 %cast.result = bitcast i128 %result to <4 x float>
6566 ret <4 x float> %cast.result
6569 define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) {
6570 ; GFX6-LABEL: v_fshl_i128_svs:
6572 ; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6573 ; GFX6-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6574 ; GFX6-NEXT: s_sub_i32 s5, s6, 64
6575 ; GFX6-NEXT: s_sub_i32 s7, 64, s6
6576 ; GFX6-NEXT: s_cmp_lt_u32 s6, 64
6577 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0
6578 ; GFX6-NEXT: s_cmp_eq_u32 s6, 0
6579 ; GFX6-NEXT: s_cselect_b32 s13, 1, 0
6580 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], s6
6581 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[0:1], s7
6582 ; GFX6-NEXT: s_lshl_b64 s[6:7], s[2:3], s6
6583 ; GFX6-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7]
6584 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s5
6585 ; GFX6-NEXT: s_cmp_lg_u32 s12, 0
6586 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
6587 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1]
6588 ; GFX6-NEXT: s_cmp_lg_u32 s13, 0
6589 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 1
6590 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6591 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 31, v2
6592 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 1
6593 ; GFX6-NEXT: s_sub_i32 s0, s4, 64
6594 ; GFX6-NEXT: s_sub_i32 s1, 64, s4
6595 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
6596 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64
6597 ; GFX6-NEXT: s_cselect_b32 s5, 1, 0
6598 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0
6599 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s4
6600 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s1
6601 ; GFX6-NEXT: s_cselect_b32 s6, 1, 0
6602 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s4
6603 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s0
6604 ; GFX6-NEXT: s_and_b32 s0, 1, s5
6605 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6
6606 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7
6607 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
6608 ; GFX6-NEXT: s_and_b32 s0, 1, s6
6609 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6610 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6611 ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
6612 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6613 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6614 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6615 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6616 ; GFX6-NEXT: v_or_b32_e32 v0, s8, v0
6617 ; GFX6-NEXT: v_or_b32_e32 v1, s9, v1
6618 ; GFX6-NEXT: v_or_b32_e32 v2, s2, v2
6619 ; GFX6-NEXT: v_or_b32_e32 v3, s3, v3
6620 ; GFX6-NEXT: ; return to shader part epilog
6622 ; GFX8-LABEL: v_fshl_i128_svs:
6624 ; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6625 ; GFX8-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6626 ; GFX8-NEXT: s_sub_i32 s5, s6, 64
6627 ; GFX8-NEXT: s_sub_i32 s7, 64, s6
6628 ; GFX8-NEXT: s_cmp_lt_u32 s6, 64
6629 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0
6630 ; GFX8-NEXT: s_cmp_eq_u32 s6, 0
6631 ; GFX8-NEXT: s_cselect_b32 s13, 1, 0
6632 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], s6
6633 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[0:1], s7
6634 ; GFX8-NEXT: s_lshl_b64 s[6:7], s[2:3], s6
6635 ; GFX8-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7]
6636 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s5
6637 ; GFX8-NEXT: s_cmp_lg_u32 s12, 0
6638 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
6639 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1]
6640 ; GFX8-NEXT: s_cmp_lg_u32 s13, 0
6641 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
6642 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6643 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 31, v2
6644 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
6645 ; GFX8-NEXT: s_sub_i32 s0, s4, 64
6646 ; GFX8-NEXT: s_sub_i32 s1, 64, s4
6647 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
6648 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64
6649 ; GFX8-NEXT: s_cselect_b32 s5, 1, 0
6650 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0
6651 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1]
6652 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3]
6653 ; GFX8-NEXT: s_cselect_b32 s6, 1, 0
6654 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], s4, v[2:3]
6655 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3]
6656 ; GFX8-NEXT: s_and_b32 s0, 1, s5
6657 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6
6658 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7
6659 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
6660 ; GFX8-NEXT: s_and_b32 s0, 1, s6
6661 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6662 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6663 ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
6664 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6665 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6666 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6667 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6668 ; GFX8-NEXT: v_or_b32_e32 v0, s8, v0
6669 ; GFX8-NEXT: v_or_b32_e32 v1, s9, v1
6670 ; GFX8-NEXT: v_or_b32_e32 v2, s2, v2
6671 ; GFX8-NEXT: v_or_b32_e32 v3, s3, v3
6672 ; GFX8-NEXT: ; return to shader part epilog
6674 ; GFX9-LABEL: v_fshl_i128_svs:
6676 ; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6677 ; GFX9-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6678 ; GFX9-NEXT: s_sub_i32 s5, s6, 64
6679 ; GFX9-NEXT: s_sub_i32 s7, 64, s6
6680 ; GFX9-NEXT: s_cmp_lt_u32 s6, 64
6681 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0
6682 ; GFX9-NEXT: s_cmp_eq_u32 s6, 0
6683 ; GFX9-NEXT: s_cselect_b32 s13, 1, 0
6684 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], s6
6685 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[0:1], s7
6686 ; GFX9-NEXT: s_lshl_b64 s[6:7], s[2:3], s6
6687 ; GFX9-NEXT: s_or_b64 s[6:7], s[10:11], s[6:7]
6688 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s5
6689 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0
6690 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
6691 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
6692 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1]
6693 ; GFX9-NEXT: s_cmp_lg_u32 s13, 0
6694 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6695 ; GFX9-NEXT: v_lshl_or_b32 v1, v2, 31, v1
6696 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
6697 ; GFX9-NEXT: s_sub_i32 s0, s4, 64
6698 ; GFX9-NEXT: s_sub_i32 s1, 64, s4
6699 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64
6700 ; GFX9-NEXT: s_cselect_b32 s5, 1, 0
6701 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0
6702 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1]
6703 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3]
6704 ; GFX9-NEXT: s_cselect_b32 s6, 1, 0
6705 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], s4, v[2:3]
6706 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3]
6707 ; GFX9-NEXT: s_and_b32 s0, 1, s5
6708 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6
6709 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7
6710 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
6711 ; GFX9-NEXT: s_and_b32 s0, 1, s6
6712 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6713 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6714 ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
6715 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6716 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6717 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6718 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6719 ; GFX9-NEXT: v_or_b32_e32 v0, s8, v0
6720 ; GFX9-NEXT: v_or_b32_e32 v1, s9, v1
6721 ; GFX9-NEXT: v_or_b32_e32 v2, s2, v2
6722 ; GFX9-NEXT: v_or_b32_e32 v3, s3, v3
6723 ; GFX9-NEXT: ; return to shader part epilog
6725 ; GFX10-LABEL: v_fshl_i128_svs:
6727 ; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6728 ; GFX10-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6729 ; GFX10-NEXT: s_sub_i32 s5, s6, 64
6730 ; GFX10-NEXT: s_sub_i32 s7, 64, s6
6731 ; GFX10-NEXT: s_cmp_lt_u32 s6, 64
6732 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
6733 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0
6734 ; GFX10-NEXT: s_cmp_eq_u32 s6, 0
6735 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0
6736 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], s7
6737 ; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s6
6738 ; GFX10-NEXT: s_lshl_b64 s[6:7], s[0:1], s6
6739 ; GFX10-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
6740 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s5
6741 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0
6742 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 31, v1
6743 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
6744 ; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
6745 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
6746 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0
6747 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1]
6748 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6749 ; GFX10-NEXT: s_sub_i32 s0, 64, s4
6750 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3]
6751 ; GFX10-NEXT: s_sub_i32 s0, s4, 64
6752 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64
6753 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3]
6754 ; GFX10-NEXT: s_cselect_b32 s1, 1, 0
6755 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0
6756 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6
6757 ; GFX10-NEXT: s_cselect_b32 s5, 1, 0
6758 ; GFX10-NEXT: s_and_b32 s0, 1, s1
6759 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7
6760 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
6761 ; GFX10-NEXT: s_and_b32 s0, 1, s5
6762 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3]
6763 ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
6764 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo
6765 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo
6766 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo
6767 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc_lo
6768 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0
6769 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0
6770 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2
6771 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3
6772 ; GFX10-NEXT: v_or_b32_e32 v0, s6, v0
6773 ; GFX10-NEXT: v_or_b32_e32 v1, s7, v1
6774 ; GFX10-NEXT: ; return to shader part epilog
6776 ; GFX11-LABEL: v_fshl_i128_svs:
6778 ; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6779 ; GFX11-NEXT: s_and_not1_b64 s[4:5], 0x7f, s[4:5]
6780 ; GFX11-NEXT: s_sub_i32 s5, s6, 64
6781 ; GFX11-NEXT: s_sub_i32 s7, 64, s6
6782 ; GFX11-NEXT: s_cmp_lt_u32 s6, 64
6783 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1]
6784 ; GFX11-NEXT: s_cselect_b32 s12, 1, 0
6785 ; GFX11-NEXT: s_cmp_eq_u32 s6, 0
6786 ; GFX11-NEXT: s_cselect_b32 s13, 1, 0
6787 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[0:1], s7
6788 ; GFX11-NEXT: s_lshl_b64 s[10:11], s[2:3], s6
6789 ; GFX11-NEXT: s_lshl_b64 s[6:7], s[0:1], s6
6790 ; GFX11-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
6791 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s5
6792 ; GFX11-NEXT: s_cmp_lg_u32 s12, 0
6793 ; GFX11-NEXT: v_lshl_or_b32 v1, v2, 31, v1
6794 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3]
6795 ; GFX11-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
6796 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
6797 ; GFX11-NEXT: s_cmp_lg_u32 s13, 0
6798 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], s4, v[0:1]
6799 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6800 ; GFX11-NEXT: s_sub_i32 s0, 64, s4
6801 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6802 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3]
6803 ; GFX11-NEXT: s_sub_i32 s0, s4, 64
6804 ; GFX11-NEXT: s_cmp_lt_u32 s4, 64
6805 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3]
6806 ; GFX11-NEXT: s_cselect_b32 s1, 1, 0
6807 ; GFX11-NEXT: s_cmp_eq_u32 s4, 0
6808 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v6
6809 ; GFX11-NEXT: s_cselect_b32 s5, 1, 0
6810 ; GFX11-NEXT: s_and_b32 s0, 1, s1
6811 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7
6812 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
6813 ; GFX11-NEXT: s_and_b32 s0, 1, s5
6814 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3]
6815 ; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
6816 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5
6817 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
6818 ; GFX11-NEXT: v_dual_cndmask_b32 v2, 0, v2 :: v_dual_cndmask_b32 v3, 0, v3
6819 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0
6820 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
6821 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0
6822 ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2
6823 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6824 ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3
6825 ; GFX11-NEXT: v_or_b32_e32 v0, s6, v0
6826 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
6827 ; GFX11-NEXT: v_or_b32_e32 v1, s7, v1
6828 ; GFX11-NEXT: ; return to shader part epilog
6829 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
6830 %cast.result = bitcast i128 %result to <4 x float>
6831 ret <4 x float> %cast.result
6834 define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) {
6835 ; GFX6-LABEL: v_fshl_i128_vss:
6837 ; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6838 ; GFX6-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6839 ; GFX6-NEXT: s_sub_i32 s5, s6, 64
6840 ; GFX6-NEXT: s_sub_i32 s7, 64, s6
6841 ; GFX6-NEXT: s_cmp_lt_u32 s6, 64
6842 ; GFX6-NEXT: s_cselect_b32 s9, 1, 0
6843 ; GFX6-NEXT: s_cmp_eq_u32 s6, 0
6844 ; GFX6-NEXT: s_mov_b32 s8, 0
6845 ; GFX6-NEXT: s_cselect_b32 s10, 1, 0
6846 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s7
6847 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], s6
6848 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s5
6849 ; GFX6-NEXT: s_and_b32 s5, 1, s9
6850 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
6851 ; GFX6-NEXT: s_lshl_b32 s9, s2, 31
6852 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s6
6853 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
6854 ; GFX6-NEXT: s_and_b32 s5, 1, s10
6855 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
6856 ; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
6857 ; GFX6-NEXT: s_sub_i32 s10, s4, 64
6858 ; GFX6-NEXT: s_sub_i32 s8, 64, s4
6859 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64
6860 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6
6861 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7
6862 ; GFX6-NEXT: s_cselect_b32 s11, 1, 0
6863 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0
6864 ; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
6865 ; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
6866 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
6867 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
6868 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
6869 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0
6870 ; GFX6-NEXT: s_lshr_b64 s[6:7], s[2:3], s4
6871 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[0:1], s4
6872 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
6873 ; GFX6-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
6874 ; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
6875 ; GFX6-NEXT: s_cmp_lg_u32 s11, 0
6876 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
6877 ; GFX6-NEXT: s_cmp_lg_u32 s12, 0
6878 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
6879 ; GFX6-NEXT: s_cmp_lg_u32 s11, 0
6880 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
6881 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
6882 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
6883 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v6
6884 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v7
6885 ; GFX6-NEXT: v_or_b32_e32 v2, s2, v2
6886 ; GFX6-NEXT: v_or_b32_e32 v3, s3, v3
6887 ; GFX6-NEXT: ; return to shader part epilog
6889 ; GFX8-LABEL: v_fshl_i128_vss:
6891 ; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6892 ; GFX8-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6893 ; GFX8-NEXT: s_sub_i32 s5, s6, 64
6894 ; GFX8-NEXT: s_sub_i32 s7, 64, s6
6895 ; GFX8-NEXT: s_cmp_lt_u32 s6, 64
6896 ; GFX8-NEXT: s_cselect_b32 s9, 1, 0
6897 ; GFX8-NEXT: s_cmp_eq_u32 s6, 0
6898 ; GFX8-NEXT: s_mov_b32 s8, 0
6899 ; GFX8-NEXT: s_cselect_b32 s10, 1, 0
6900 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1]
6901 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], s6, v[0:1]
6902 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
6903 ; GFX8-NEXT: s_and_b32 s5, 1, s9
6904 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
6905 ; GFX8-NEXT: s_lshl_b32 s9, s2, 31
6906 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s6, v[2:3]
6907 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
6908 ; GFX8-NEXT: s_and_b32 s5, 1, s10
6909 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
6910 ; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
6911 ; GFX8-NEXT: s_sub_i32 s10, s4, 64
6912 ; GFX8-NEXT: s_sub_i32 s8, 64, s4
6913 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64
6914 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6
6915 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7
6916 ; GFX8-NEXT: s_cselect_b32 s11, 1, 0
6917 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0
6918 ; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
6919 ; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
6920 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
6921 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
6922 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
6923 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0
6924 ; GFX8-NEXT: s_lshr_b64 s[6:7], s[2:3], s4
6925 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[0:1], s4
6926 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
6927 ; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
6928 ; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
6929 ; GFX8-NEXT: s_cmp_lg_u32 s11, 0
6930 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
6931 ; GFX8-NEXT: s_cmp_lg_u32 s12, 0
6932 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
6933 ; GFX8-NEXT: s_cmp_lg_u32 s11, 0
6934 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
6935 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
6936 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
6937 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v6
6938 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v7
6939 ; GFX8-NEXT: v_or_b32_e32 v2, s2, v2
6940 ; GFX8-NEXT: v_or_b32_e32 v3, s3, v3
6941 ; GFX8-NEXT: ; return to shader part epilog
6943 ; GFX9-LABEL: v_fshl_i128_vss:
6945 ; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6946 ; GFX9-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6947 ; GFX9-NEXT: s_sub_i32 s5, s6, 64
6948 ; GFX9-NEXT: s_sub_i32 s7, 64, s6
6949 ; GFX9-NEXT: s_cmp_lt_u32 s6, 64
6950 ; GFX9-NEXT: s_cselect_b32 s9, 1, 0
6951 ; GFX9-NEXT: s_cmp_eq_u32 s6, 0
6952 ; GFX9-NEXT: s_mov_b32 s8, 0
6953 ; GFX9-NEXT: s_cselect_b32 s10, 1, 0
6954 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1]
6955 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], s6, v[0:1]
6956 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
6957 ; GFX9-NEXT: s_and_b32 s5, 1, s9
6958 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
6959 ; GFX9-NEXT: s_lshl_b32 s9, s2, 31
6960 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s6, v[2:3]
6961 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
6962 ; GFX9-NEXT: s_and_b32 s5, 1, s10
6963 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9]
6964 ; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
6965 ; GFX9-NEXT: s_sub_i32 s10, s4, 64
6966 ; GFX9-NEXT: s_sub_i32 s8, 64, s4
6967 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64
6968 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6
6969 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7
6970 ; GFX9-NEXT: s_cselect_b32 s11, 1, 0
6971 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0
6972 ; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
6973 ; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
6974 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
6975 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
6976 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5
6977 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0
6978 ; GFX9-NEXT: s_lshr_b64 s[6:7], s[2:3], s4
6979 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[0:1], s4
6980 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
6981 ; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
6982 ; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
6983 ; GFX9-NEXT: s_cmp_lg_u32 s11, 0
6984 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
6985 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0
6986 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
6987 ; GFX9-NEXT: s_cmp_lg_u32 s11, 0
6988 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
6989 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
6990 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
6991 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v6
6992 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v7
6993 ; GFX9-NEXT: v_or_b32_e32 v2, s2, v2
6994 ; GFX9-NEXT: v_or_b32_e32 v3, s3, v3
6995 ; GFX9-NEXT: ; return to shader part epilog
6997 ; GFX10-LABEL: v_fshl_i128_vss:
6999 ; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
7000 ; GFX10-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
7001 ; GFX10-NEXT: s_sub_i32 s5, s6, 64
7002 ; GFX10-NEXT: s_sub_i32 s7, 64, s6
7003 ; GFX10-NEXT: s_cmp_lt_u32 s6, 64
7004 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1]
7005 ; GFX10-NEXT: s_cselect_b32 s8, 1, 0
7006 ; GFX10-NEXT: s_cmp_eq_u32 s6, 0
7007 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s6, v[2:3]
7008 ; GFX10-NEXT: s_cselect_b32 s9, 1, 0
7009 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], s6, v[0:1]
7010 ; GFX10-NEXT: s_and_b32 s6, 1, s8
7011 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
7012 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6
7013 ; GFX10-NEXT: s_mov_b32 s6, 0
7014 ; GFX10-NEXT: s_lshl_b32 s7, s2, 31
7015 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
7016 ; GFX10-NEXT: s_and_b32 s5, 1, s9
7017 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7]
7018 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
7019 ; GFX10-NEXT: s_sub_i32 s10, s4, 64
7020 ; GFX10-NEXT: s_sub_i32 s8, 64, s4
7021 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6
7022 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7
7023 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64
7024 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo
7025 ; GFX10-NEXT: s_cselect_b32 s11, 1, 0
7026 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0
7027 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo
7028 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0
7029 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s4
7030 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
7031 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
7032 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
7033 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5
7034 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[2:3], s4
7035 ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
7036 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
7037 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0
7038 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo
7039 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3]
7040 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0
7041 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo
7042 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
7043 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0
7044 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v6
7045 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], 0
7046 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v7
7047 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2
7048 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3
7049 ; GFX10-NEXT: ; return to shader part epilog
7051 ; GFX11-LABEL: v_fshl_i128_vss:
7053 ; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
7054 ; GFX11-NEXT: s_and_not1_b64 s[4:5], 0x7f, s[4:5]
7055 ; GFX11-NEXT: s_sub_i32 s5, s6, 64
7056 ; GFX11-NEXT: s_sub_i32 s7, 64, s6
7057 ; GFX11-NEXT: s_cmp_lt_u32 s6, 64
7058 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1]
7059 ; GFX11-NEXT: s_cselect_b32 s8, 1, 0
7060 ; GFX11-NEXT: s_cmp_eq_u32 s6, 0
7061 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], s6, v[2:3]
7062 ; GFX11-NEXT: s_cselect_b32 s9, 1, 0
7063 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], s6, v[0:1]
7064 ; GFX11-NEXT: s_and_b32 s6, 1, s8
7065 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
7066 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6
7067 ; GFX11-NEXT: s_mov_b32 s6, 0
7068 ; GFX11-NEXT: s_lshl_b32 s7, s2, 31
7069 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
7070 ; GFX11-NEXT: s_and_b32 s5, 1, s9
7071 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7]
7072 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 1
7073 ; GFX11-NEXT: s_sub_i32 s10, s4, 64
7074 ; GFX11-NEXT: s_sub_i32 s8, 64, s4
7075 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v6
7076 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7
7077 ; GFX11-NEXT: s_cmp_lt_u32 s4, 64
7078 ; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9
7079 ; GFX11-NEXT: s_cselect_b32 s11, 1, 0
7080 ; GFX11-NEXT: s_cmp_eq_u32 s4, 0
7081 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
7082 ; GFX11-NEXT: s_cselect_b32 s12, 1, 0
7083 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s4
7084 ; GFX11-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
7085 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5
7086 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[2:3], s4
7087 ; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
7088 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
7089 ; GFX11-NEXT: s_cmp_lg_u32 s11, 0
7090 ; GFX11-NEXT: v_dual_cndmask_b32 v2, v0, v2 :: v_dual_cndmask_b32 v3, v1, v3
7091 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3]
7092 ; GFX11-NEXT: s_cmp_lg_u32 s12, 0
7093 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
7094 ; GFX11-NEXT: s_cmp_lg_u32 s11, 0
7095 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v6
7096 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], 0
7097 ; GFX11-NEXT: v_or_b32_e32 v1, s1, v7
7098 ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2
7099 ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3
7100 ; GFX11-NEXT: ; return to shader part epilog
7101 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt)
7102 %cast.result = bitcast i128 %result to <4 x float>
7103 ret <4 x float> %cast.result
7106 define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
7107 ; GFX6-LABEL: s_fshl_i128_65:
7109 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
7110 ; GFX6-NEXT: s_lshr_b32 s4, s5, 31
7111 ; GFX6-NEXT: s_mov_b32 s5, 0
7112 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
7113 ; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
7114 ; GFX6-NEXT: s_lshr_b32 s4, s7, 31
7115 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
7116 ; GFX6-NEXT: ; return to shader part epilog
7118 ; GFX8-LABEL: s_fshl_i128_65:
7120 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
7121 ; GFX8-NEXT: s_lshr_b32 s4, s5, 31
7122 ; GFX8-NEXT: s_mov_b32 s5, 0
7123 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
7124 ; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
7125 ; GFX8-NEXT: s_lshr_b32 s4, s7, 31
7126 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
7127 ; GFX8-NEXT: ; return to shader part epilog
7129 ; GFX9-LABEL: s_fshl_i128_65:
7131 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
7132 ; GFX9-NEXT: s_lshr_b32 s4, s5, 31
7133 ; GFX9-NEXT: s_mov_b32 s5, 0
7134 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
7135 ; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
7136 ; GFX9-NEXT: s_lshr_b32 s4, s7, 31
7137 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
7138 ; GFX9-NEXT: ; return to shader part epilog
7140 ; GFX10-LABEL: s_fshl_i128_65:
7142 ; GFX10-NEXT: s_lshr_b32 s2, s5, 31
7143 ; GFX10-NEXT: s_mov_b32 s3, 0
7144 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
7145 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
7146 ; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
7147 ; GFX10-NEXT: s_lshr_b32 s2, s7, 31
7148 ; GFX10-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
7149 ; GFX10-NEXT: ; return to shader part epilog
7151 ; GFX11-LABEL: s_fshl_i128_65:
7153 ; GFX11-NEXT: s_lshr_b32 s2, s5, 31
7154 ; GFX11-NEXT: s_mov_b32 s3, 0
7155 ; GFX11-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
7156 ; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
7157 ; GFX11-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
7158 ; GFX11-NEXT: s_lshr_b32 s2, s7, 31
7159 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
7160 ; GFX11-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
7161 ; GFX11-NEXT: ; return to shader part epilog
7162 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 65)
7166 define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
7167 ; GFX6-LABEL: v_fshl_i128_65:
7169 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7170 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[0:1], 1
7171 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[6:7], 1
7172 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7173 ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0
7174 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v7
7175 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
7176 ; GFX6-NEXT: s_setpc_b64 s[30:31]
7178 ; GFX8-LABEL: v_fshl_i128_65:
7180 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7181 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
7182 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
7183 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7184 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
7185 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v7
7186 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
7187 ; GFX8-NEXT: s_setpc_b64 s[30:31]
7189 ; GFX9-LABEL: v_fshl_i128_65:
7191 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7192 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
7193 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
7194 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7195 ; GFX9-NEXT: v_or_b32_e32 v0, v4, v0
7196 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v7
7197 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v4
7198 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7200 ; GFX10-LABEL: v_fshl_i128_65:
7202 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7203 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
7204 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
7205 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7206 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 31, v7
7207 ; GFX10-NEXT: v_or_b32_e32 v0, v4, v0
7208 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v5
7209 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7211 ; GFX11-LABEL: v_fshl_i128_65:
7213 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7214 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
7215 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
7216 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7217 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 31, v7
7218 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7219 ; GFX11-NEXT: v_or_b32_e32 v0, v4, v0
7220 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v5
7221 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7222 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 65)
7226 define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) {
7227 ; GFX6-LABEL: s_fshl_v2i128:
7229 ; GFX6-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7230 ; GFX6-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7231 ; GFX6-NEXT: s_sub_i32 s17, s18, 64
7232 ; GFX6-NEXT: s_sub_i32 s19, 64, s18
7233 ; GFX6-NEXT: s_cmp_lt_u32 s18, 64
7234 ; GFX6-NEXT: s_cselect_b32 s23, 1, 0
7235 ; GFX6-NEXT: s_cmp_eq_u32 s18, 0
7236 ; GFX6-NEXT: s_cselect_b32 s28, 1, 0
7237 ; GFX6-NEXT: s_lshl_b64 s[24:25], s[0:1], s18
7238 ; GFX6-NEXT: s_lshr_b64 s[26:27], s[0:1], s19
7239 ; GFX6-NEXT: s_lshl_b64 s[18:19], s[2:3], s18
7240 ; GFX6-NEXT: s_or_b64 s[18:19], s[26:27], s[18:19]
7241 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s17
7242 ; GFX6-NEXT: s_cmp_lg_u32 s23, 0
7243 ; GFX6-NEXT: s_cselect_b64 s[24:25], s[24:25], 0
7244 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[18:19], s[0:1]
7245 ; GFX6-NEXT: s_cmp_lg_u32 s28, 0
7246 ; GFX6-NEXT: s_mov_b32 s22, 0
7247 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
7248 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[8:9], 1
7249 ; GFX6-NEXT: s_lshl_b32 s23, s10, 31
7250 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23]
7251 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[10:11], 1
7252 ; GFX6-NEXT: s_sub_i32 s23, s16, 64
7253 ; GFX6-NEXT: s_sub_i32 s18, 64, s16
7254 ; GFX6-NEXT: s_cmp_lt_u32 s16, 64
7255 ; GFX6-NEXT: s_cselect_b32 s26, 1, 0
7256 ; GFX6-NEXT: s_cmp_eq_u32 s16, 0
7257 ; GFX6-NEXT: s_cselect_b32 s27, 1, 0
7258 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[8:9], s16
7259 ; GFX6-NEXT: s_lshr_b64 s[16:17], s[0:1], s16
7260 ; GFX6-NEXT: s_lshl_b64 s[18:19], s[8:9], s18
7261 ; GFX6-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19]
7262 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[8:9], s23
7263 ; GFX6-NEXT: s_cmp_lg_u32 s26, 0
7264 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9]
7265 ; GFX6-NEXT: s_cmp_lg_u32 s27, 0
7266 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9]
7267 ; GFX6-NEXT: s_cmp_lg_u32 s26, 0
7268 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[10:11], 0
7269 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7270 ; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7271 ; GFX6-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7272 ; GFX6-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1]
7273 ; GFX6-NEXT: s_sub_i32 s11, s8, 64
7274 ; GFX6-NEXT: s_sub_i32 s9, 64, s8
7275 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64
7276 ; GFX6-NEXT: s_cselect_b32 s20, 1, 0
7277 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0
7278 ; GFX6-NEXT: s_cselect_b32 s21, 1, 0
7279 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], s8
7280 ; GFX6-NEXT: s_lshr_b64 s[18:19], s[4:5], s9
7281 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[6:7], s8
7282 ; GFX6-NEXT: s_or_b64 s[8:9], s[18:19], s[8:9]
7283 ; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s11
7284 ; GFX6-NEXT: s_cmp_lg_u32 s20, 0
7285 ; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
7286 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
7287 ; GFX6-NEXT: s_cmp_lg_u32 s21, 0
7288 ; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
7289 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[12:13], 1
7290 ; GFX6-NEXT: s_lshl_b32 s23, s14, 31
7291 ; GFX6-NEXT: s_or_b64 s[4:5], s[4:5], s[22:23]
7292 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[14:15], 1
7293 ; GFX6-NEXT: s_sub_i32 s18, s10, 64
7294 ; GFX6-NEXT: s_sub_i32 s14, 64, s10
7295 ; GFX6-NEXT: s_cmp_lt_u32 s10, 64
7296 ; GFX6-NEXT: s_cselect_b32 s19, 1, 0
7297 ; GFX6-NEXT: s_cmp_eq_u32 s10, 0
7298 ; GFX6-NEXT: s_cselect_b32 s20, 1, 0
7299 ; GFX6-NEXT: s_lshr_b64 s[12:13], s[8:9], s10
7300 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
7301 ; GFX6-NEXT: s_lshl_b64 s[14:15], s[8:9], s14
7302 ; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15]
7303 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[8:9], s18
7304 ; GFX6-NEXT: s_cmp_lg_u32 s19, 0
7305 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9]
7306 ; GFX6-NEXT: s_cmp_lg_u32 s20, 0
7307 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
7308 ; GFX6-NEXT: s_cmp_lg_u32 s19, 0
7309 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[12:13], 0
7310 ; GFX6-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5]
7311 ; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
7312 ; GFX6-NEXT: ; return to shader part epilog
7314 ; GFX8-LABEL: s_fshl_v2i128:
7316 ; GFX8-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7317 ; GFX8-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7318 ; GFX8-NEXT: s_sub_i32 s17, s18, 64
7319 ; GFX8-NEXT: s_sub_i32 s19, 64, s18
7320 ; GFX8-NEXT: s_cmp_lt_u32 s18, 64
7321 ; GFX8-NEXT: s_cselect_b32 s23, 1, 0
7322 ; GFX8-NEXT: s_cmp_eq_u32 s18, 0
7323 ; GFX8-NEXT: s_cselect_b32 s28, 1, 0
7324 ; GFX8-NEXT: s_lshl_b64 s[24:25], s[0:1], s18
7325 ; GFX8-NEXT: s_lshr_b64 s[26:27], s[0:1], s19
7326 ; GFX8-NEXT: s_lshl_b64 s[18:19], s[2:3], s18
7327 ; GFX8-NEXT: s_or_b64 s[18:19], s[26:27], s[18:19]
7328 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s17
7329 ; GFX8-NEXT: s_cmp_lg_u32 s23, 0
7330 ; GFX8-NEXT: s_cselect_b64 s[24:25], s[24:25], 0
7331 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[18:19], s[0:1]
7332 ; GFX8-NEXT: s_cmp_lg_u32 s28, 0
7333 ; GFX8-NEXT: s_mov_b32 s22, 0
7334 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
7335 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[8:9], 1
7336 ; GFX8-NEXT: s_lshl_b32 s23, s10, 31
7337 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23]
7338 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[10:11], 1
7339 ; GFX8-NEXT: s_sub_i32 s23, s16, 64
7340 ; GFX8-NEXT: s_sub_i32 s18, 64, s16
7341 ; GFX8-NEXT: s_cmp_lt_u32 s16, 64
7342 ; GFX8-NEXT: s_cselect_b32 s26, 1, 0
7343 ; GFX8-NEXT: s_cmp_eq_u32 s16, 0
7344 ; GFX8-NEXT: s_cselect_b32 s27, 1, 0
7345 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[8:9], s16
7346 ; GFX8-NEXT: s_lshr_b64 s[16:17], s[0:1], s16
7347 ; GFX8-NEXT: s_lshl_b64 s[18:19], s[8:9], s18
7348 ; GFX8-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19]
7349 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[8:9], s23
7350 ; GFX8-NEXT: s_cmp_lg_u32 s26, 0
7351 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9]
7352 ; GFX8-NEXT: s_cmp_lg_u32 s27, 0
7353 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9]
7354 ; GFX8-NEXT: s_cmp_lg_u32 s26, 0
7355 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[10:11], 0
7356 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7357 ; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7358 ; GFX8-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7359 ; GFX8-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1]
7360 ; GFX8-NEXT: s_sub_i32 s11, s8, 64
7361 ; GFX8-NEXT: s_sub_i32 s9, 64, s8
7362 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64
7363 ; GFX8-NEXT: s_cselect_b32 s20, 1, 0
7364 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0
7365 ; GFX8-NEXT: s_cselect_b32 s21, 1, 0
7366 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], s8
7367 ; GFX8-NEXT: s_lshr_b64 s[18:19], s[4:5], s9
7368 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[6:7], s8
7369 ; GFX8-NEXT: s_or_b64 s[8:9], s[18:19], s[8:9]
7370 ; GFX8-NEXT: s_lshl_b64 s[4:5], s[4:5], s11
7371 ; GFX8-NEXT: s_cmp_lg_u32 s20, 0
7372 ; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
7373 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
7374 ; GFX8-NEXT: s_cmp_lg_u32 s21, 0
7375 ; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
7376 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[12:13], 1
7377 ; GFX8-NEXT: s_lshl_b32 s23, s14, 31
7378 ; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[22:23]
7379 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[14:15], 1
7380 ; GFX8-NEXT: s_sub_i32 s18, s10, 64
7381 ; GFX8-NEXT: s_sub_i32 s14, 64, s10
7382 ; GFX8-NEXT: s_cmp_lt_u32 s10, 64
7383 ; GFX8-NEXT: s_cselect_b32 s19, 1, 0
7384 ; GFX8-NEXT: s_cmp_eq_u32 s10, 0
7385 ; GFX8-NEXT: s_cselect_b32 s20, 1, 0
7386 ; GFX8-NEXT: s_lshr_b64 s[12:13], s[8:9], s10
7387 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
7388 ; GFX8-NEXT: s_lshl_b64 s[14:15], s[8:9], s14
7389 ; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15]
7390 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[8:9], s18
7391 ; GFX8-NEXT: s_cmp_lg_u32 s19, 0
7392 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9]
7393 ; GFX8-NEXT: s_cmp_lg_u32 s20, 0
7394 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
7395 ; GFX8-NEXT: s_cmp_lg_u32 s19, 0
7396 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[12:13], 0
7397 ; GFX8-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5]
7398 ; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
7399 ; GFX8-NEXT: ; return to shader part epilog
7401 ; GFX9-LABEL: s_fshl_v2i128:
7403 ; GFX9-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7404 ; GFX9-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7405 ; GFX9-NEXT: s_sub_i32 s17, s18, 64
7406 ; GFX9-NEXT: s_sub_i32 s19, 64, s18
7407 ; GFX9-NEXT: s_cmp_lt_u32 s18, 64
7408 ; GFX9-NEXT: s_cselect_b32 s23, 1, 0
7409 ; GFX9-NEXT: s_cmp_eq_u32 s18, 0
7410 ; GFX9-NEXT: s_cselect_b32 s28, 1, 0
7411 ; GFX9-NEXT: s_lshl_b64 s[24:25], s[0:1], s18
7412 ; GFX9-NEXT: s_lshr_b64 s[26:27], s[0:1], s19
7413 ; GFX9-NEXT: s_lshl_b64 s[18:19], s[2:3], s18
7414 ; GFX9-NEXT: s_or_b64 s[18:19], s[26:27], s[18:19]
7415 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s17
7416 ; GFX9-NEXT: s_cmp_lg_u32 s23, 0
7417 ; GFX9-NEXT: s_cselect_b64 s[24:25], s[24:25], 0
7418 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[18:19], s[0:1]
7419 ; GFX9-NEXT: s_cmp_lg_u32 s28, 0
7420 ; GFX9-NEXT: s_mov_b32 s22, 0
7421 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
7422 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[8:9], 1
7423 ; GFX9-NEXT: s_lshl_b32 s23, s10, 31
7424 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23]
7425 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[10:11], 1
7426 ; GFX9-NEXT: s_sub_i32 s23, s16, 64
7427 ; GFX9-NEXT: s_sub_i32 s18, 64, s16
7428 ; GFX9-NEXT: s_cmp_lt_u32 s16, 64
7429 ; GFX9-NEXT: s_cselect_b32 s26, 1, 0
7430 ; GFX9-NEXT: s_cmp_eq_u32 s16, 0
7431 ; GFX9-NEXT: s_cselect_b32 s27, 1, 0
7432 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[8:9], s16
7433 ; GFX9-NEXT: s_lshr_b64 s[16:17], s[0:1], s16
7434 ; GFX9-NEXT: s_lshl_b64 s[18:19], s[8:9], s18
7435 ; GFX9-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19]
7436 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[8:9], s23
7437 ; GFX9-NEXT: s_cmp_lg_u32 s26, 0
7438 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9]
7439 ; GFX9-NEXT: s_cmp_lg_u32 s27, 0
7440 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9]
7441 ; GFX9-NEXT: s_cmp_lg_u32 s26, 0
7442 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[10:11], 0
7443 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7444 ; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7445 ; GFX9-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7446 ; GFX9-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1]
7447 ; GFX9-NEXT: s_sub_i32 s11, s8, 64
7448 ; GFX9-NEXT: s_sub_i32 s9, 64, s8
7449 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64
7450 ; GFX9-NEXT: s_cselect_b32 s20, 1, 0
7451 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0
7452 ; GFX9-NEXT: s_cselect_b32 s21, 1, 0
7453 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], s8
7454 ; GFX9-NEXT: s_lshr_b64 s[18:19], s[4:5], s9
7455 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[6:7], s8
7456 ; GFX9-NEXT: s_or_b64 s[8:9], s[18:19], s[8:9]
7457 ; GFX9-NEXT: s_lshl_b64 s[4:5], s[4:5], s11
7458 ; GFX9-NEXT: s_cmp_lg_u32 s20, 0
7459 ; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
7460 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5]
7461 ; GFX9-NEXT: s_cmp_lg_u32 s21, 0
7462 ; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
7463 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[12:13], 1
7464 ; GFX9-NEXT: s_lshl_b32 s23, s14, 31
7465 ; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], s[22:23]
7466 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[14:15], 1
7467 ; GFX9-NEXT: s_sub_i32 s18, s10, 64
7468 ; GFX9-NEXT: s_sub_i32 s14, 64, s10
7469 ; GFX9-NEXT: s_cmp_lt_u32 s10, 64
7470 ; GFX9-NEXT: s_cselect_b32 s19, 1, 0
7471 ; GFX9-NEXT: s_cmp_eq_u32 s10, 0
7472 ; GFX9-NEXT: s_cselect_b32 s20, 1, 0
7473 ; GFX9-NEXT: s_lshr_b64 s[12:13], s[8:9], s10
7474 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
7475 ; GFX9-NEXT: s_lshl_b64 s[14:15], s[8:9], s14
7476 ; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15]
7477 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[8:9], s18
7478 ; GFX9-NEXT: s_cmp_lg_u32 s19, 0
7479 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9]
7480 ; GFX9-NEXT: s_cmp_lg_u32 s20, 0
7481 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
7482 ; GFX9-NEXT: s_cmp_lg_u32 s19, 0
7483 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[12:13], 0
7484 ; GFX9-NEXT: s_or_b64 s[4:5], s[16:17], s[4:5]
7485 ; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
7486 ; GFX9-NEXT: ; return to shader part epilog
7488 ; GFX10-LABEL: s_fshl_v2i128:
7490 ; GFX10-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7491 ; GFX10-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7492 ; GFX10-NEXT: s_sub_i32 s17, s18, 64
7493 ; GFX10-NEXT: s_sub_i32 s19, 64, s18
7494 ; GFX10-NEXT: s_cmp_lt_u32 s18, 64
7495 ; GFX10-NEXT: s_mov_b32 s22, 0
7496 ; GFX10-NEXT: s_cselect_b32 s23, 1, 0
7497 ; GFX10-NEXT: s_cmp_eq_u32 s18, 0
7498 ; GFX10-NEXT: s_cselect_b32 s28, 1, 0
7499 ; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s19
7500 ; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], s18
7501 ; GFX10-NEXT: s_lshl_b64 s[18:19], s[0:1], s18
7502 ; GFX10-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27]
7503 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s17
7504 ; GFX10-NEXT: s_cmp_lg_u32 s23, 0
7505 ; GFX10-NEXT: s_cselect_b64 s[18:19], s[18:19], 0
7506 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1]
7507 ; GFX10-NEXT: s_cmp_lg_u32 s28, 0
7508 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
7509 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], 1
7510 ; GFX10-NEXT: s_lshl_b32 s23, s10, 31
7511 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[10:11], 1
7512 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23]
7513 ; GFX10-NEXT: s_sub_i32 s23, s16, 64
7514 ; GFX10-NEXT: s_sub_i32 s17, 64, s16
7515 ; GFX10-NEXT: s_cmp_lt_u32 s16, 64
7516 ; GFX10-NEXT: s_cselect_b32 s26, 1, 0
7517 ; GFX10-NEXT: s_cmp_eq_u32 s16, 0
7518 ; GFX10-NEXT: s_cselect_b32 s27, 1, 0
7519 ; GFX10-NEXT: s_lshr_b64 s[10:11], s[0:1], s16
7520 ; GFX10-NEXT: s_lshl_b64 s[24:25], s[8:9], s17
7521 ; GFX10-NEXT: s_lshr_b64 s[16:17], s[8:9], s16
7522 ; GFX10-NEXT: s_or_b64 s[10:11], s[10:11], s[24:25]
7523 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[8:9], s23
7524 ; GFX10-NEXT: s_cmp_lg_u32 s26, 0
7525 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9]
7526 ; GFX10-NEXT: s_cmp_lg_u32 s27, 0
7527 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9]
7528 ; GFX10-NEXT: s_cmp_lg_u32 s26, 0
7529 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[16:17], 0
7530 ; GFX10-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7531 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7532 ; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7533 ; GFX10-NEXT: s_or_b64 s[0:1], s[18:19], s[0:1]
7534 ; GFX10-NEXT: s_sub_i32 s11, s8, 64
7535 ; GFX10-NEXT: s_sub_i32 s9, 64, s8
7536 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64
7537 ; GFX10-NEXT: s_cselect_b32 s20, 1, 0
7538 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0
7539 ; GFX10-NEXT: s_cselect_b32 s21, 1, 0
7540 ; GFX10-NEXT: s_lshr_b64 s[16:17], s[4:5], s9
7541 ; GFX10-NEXT: s_lshl_b64 s[18:19], s[6:7], s8
7542 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
7543 ; GFX10-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19]
7544 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s11
7545 ; GFX10-NEXT: s_cmp_lg_u32 s20, 0
7546 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
7547 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5]
7548 ; GFX10-NEXT: s_cmp_lg_u32 s21, 0
7549 ; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
7550 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], 1
7551 ; GFX10-NEXT: s_lshl_b32 s23, s14, 31
7552 ; GFX10-NEXT: s_lshr_b64 s[12:13], s[14:15], 1
7553 ; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[22:23]
7554 ; GFX10-NEXT: s_sub_i32 s18, s10, 64
7555 ; GFX10-NEXT: s_sub_i32 s11, 64, s10
7556 ; GFX10-NEXT: s_cmp_lt_u32 s10, 64
7557 ; GFX10-NEXT: s_cselect_b32 s19, 1, 0
7558 ; GFX10-NEXT: s_cmp_eq_u32 s10, 0
7559 ; GFX10-NEXT: s_cselect_b32 s20, 1, 0
7560 ; GFX10-NEXT: s_lshr_b64 s[14:15], s[4:5], s10
7561 ; GFX10-NEXT: s_lshl_b64 s[16:17], s[12:13], s11
7562 ; GFX10-NEXT: s_lshr_b64 s[10:11], s[12:13], s10
7563 ; GFX10-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17]
7564 ; GFX10-NEXT: s_lshr_b64 s[12:13], s[12:13], s18
7565 ; GFX10-NEXT: s_cmp_lg_u32 s19, 0
7566 ; GFX10-NEXT: s_cselect_b64 s[12:13], s[14:15], s[12:13]
7567 ; GFX10-NEXT: s_cmp_lg_u32 s20, 0
7568 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[12:13]
7569 ; GFX10-NEXT: s_cmp_lg_u32 s19, 0
7570 ; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0
7571 ; GFX10-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5]
7572 ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
7573 ; GFX10-NEXT: ; return to shader part epilog
7575 ; GFX11-LABEL: s_fshl_v2i128:
7577 ; GFX11-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7578 ; GFX11-NEXT: s_and_not1_b64 s[16:17], 0x7f, s[16:17]
7579 ; GFX11-NEXT: s_sub_i32 s17, s18, 64
7580 ; GFX11-NEXT: s_sub_i32 s19, 64, s18
7581 ; GFX11-NEXT: s_cmp_lt_u32 s18, 64
7582 ; GFX11-NEXT: s_mov_b32 s22, 0
7583 ; GFX11-NEXT: s_cselect_b32 s23, 1, 0
7584 ; GFX11-NEXT: s_cmp_eq_u32 s18, 0
7585 ; GFX11-NEXT: s_cselect_b32 s28, 1, 0
7586 ; GFX11-NEXT: s_lshr_b64 s[24:25], s[0:1], s19
7587 ; GFX11-NEXT: s_lshl_b64 s[26:27], s[2:3], s18
7588 ; GFX11-NEXT: s_lshl_b64 s[18:19], s[0:1], s18
7589 ; GFX11-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27]
7590 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s17
7591 ; GFX11-NEXT: s_cmp_lg_u32 s23, 0
7592 ; GFX11-NEXT: s_cselect_b64 s[18:19], s[18:19], 0
7593 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1]
7594 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0
7595 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
7596 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[8:9], 1
7597 ; GFX11-NEXT: s_lshl_b32 s23, s10, 31
7598 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[10:11], 1
7599 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23]
7600 ; GFX11-NEXT: s_sub_i32 s23, s16, 64
7601 ; GFX11-NEXT: s_sub_i32 s17, 64, s16
7602 ; GFX11-NEXT: s_cmp_lt_u32 s16, 64
7603 ; GFX11-NEXT: s_cselect_b32 s26, 1, 0
7604 ; GFX11-NEXT: s_cmp_eq_u32 s16, 0
7605 ; GFX11-NEXT: s_cselect_b32 s27, 1, 0
7606 ; GFX11-NEXT: s_lshr_b64 s[10:11], s[0:1], s16
7607 ; GFX11-NEXT: s_lshl_b64 s[24:25], s[8:9], s17
7608 ; GFX11-NEXT: s_lshr_b64 s[16:17], s[8:9], s16
7609 ; GFX11-NEXT: s_or_b64 s[10:11], s[10:11], s[24:25]
7610 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[8:9], s23
7611 ; GFX11-NEXT: s_cmp_lg_u32 s26, 0
7612 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9]
7613 ; GFX11-NEXT: s_cmp_lg_u32 s27, 0
7614 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9]
7615 ; GFX11-NEXT: s_cmp_lg_u32 s26, 0
7616 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[16:17], 0
7617 ; GFX11-NEXT: s_and_not1_b64 s[10:11], 0x7f, s[20:21]
7618 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7619 ; GFX11-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7620 ; GFX11-NEXT: s_or_b64 s[0:1], s[18:19], s[0:1]
7621 ; GFX11-NEXT: s_sub_i32 s11, s8, 64
7622 ; GFX11-NEXT: s_sub_i32 s9, 64, s8
7623 ; GFX11-NEXT: s_cmp_lt_u32 s8, 64
7624 ; GFX11-NEXT: s_cselect_b32 s20, 1, 0
7625 ; GFX11-NEXT: s_cmp_eq_u32 s8, 0
7626 ; GFX11-NEXT: s_cselect_b32 s21, 1, 0
7627 ; GFX11-NEXT: s_lshr_b64 s[16:17], s[4:5], s9
7628 ; GFX11-NEXT: s_lshl_b64 s[18:19], s[6:7], s8
7629 ; GFX11-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
7630 ; GFX11-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19]
7631 ; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], s11
7632 ; GFX11-NEXT: s_cmp_lg_u32 s20, 0
7633 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
7634 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5]
7635 ; GFX11-NEXT: s_cmp_lg_u32 s21, 0
7636 ; GFX11-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
7637 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[12:13], 1
7638 ; GFX11-NEXT: s_lshl_b32 s23, s14, 31
7639 ; GFX11-NEXT: s_lshr_b64 s[12:13], s[14:15], 1
7640 ; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[22:23]
7641 ; GFX11-NEXT: s_sub_i32 s18, s10, 64
7642 ; GFX11-NEXT: s_sub_i32 s11, 64, s10
7643 ; GFX11-NEXT: s_cmp_lt_u32 s10, 64
7644 ; GFX11-NEXT: s_cselect_b32 s19, 1, 0
7645 ; GFX11-NEXT: s_cmp_eq_u32 s10, 0
7646 ; GFX11-NEXT: s_cselect_b32 s20, 1, 0
7647 ; GFX11-NEXT: s_lshr_b64 s[14:15], s[4:5], s10
7648 ; GFX11-NEXT: s_lshl_b64 s[16:17], s[12:13], s11
7649 ; GFX11-NEXT: s_lshr_b64 s[10:11], s[12:13], s10
7650 ; GFX11-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17]
7651 ; GFX11-NEXT: s_lshr_b64 s[12:13], s[12:13], s18
7652 ; GFX11-NEXT: s_cmp_lg_u32 s19, 0
7653 ; GFX11-NEXT: s_cselect_b64 s[12:13], s[14:15], s[12:13]
7654 ; GFX11-NEXT: s_cmp_lg_u32 s20, 0
7655 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], s[12:13]
7656 ; GFX11-NEXT: s_cmp_lg_u32 s19, 0
7657 ; GFX11-NEXT: s_cselect_b64 s[10:11], s[10:11], 0
7658 ; GFX11-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5]
7659 ; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
7660 ; GFX11-NEXT: ; return to shader part epilog
7661 %result = call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
7662 ret <2 x i128> %result
7665 define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) {
7666 ; GFX6-LABEL: v_fshl_v2i128:
7668 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7669 ; GFX6-NEXT: v_and_b32_e32 v23, 0x7f, v16
7670 ; GFX6-NEXT: v_not_b32_e32 v16, v16
7671 ; GFX6-NEXT: v_and_b32_e32 v24, 0x7f, v16
7672 ; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 64, v23
7673 ; GFX6-NEXT: v_subrev_i32_e32 v25, vcc, 64, v23
7674 ; GFX6-NEXT: v_lshr_b64 v[16:17], v[0:1], v16
7675 ; GFX6-NEXT: v_lshl_b64 v[18:19], v[2:3], v23
7676 ; GFX6-NEXT: v_lshl_b64 v[21:22], v[0:1], v23
7677 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v25
7678 ; GFX6-NEXT: v_or_b32_e32 v16, v16, v18
7679 ; GFX6-NEXT: v_or_b32_e32 v17, v17, v19
7680 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
7681 ; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v21, vcc
7682 ; GFX6-NEXT: v_cndmask_b32_e32 v19, 0, v22, vcc
7683 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
7684 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
7685 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v23
7686 ; GFX6-NEXT: v_cndmask_b32_e32 v21, v0, v2, vcc
7687 ; GFX6-NEXT: v_cndmask_b32_e32 v22, v1, v3, vcc
7688 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[8:9], 1
7689 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 31, v10
7690 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
7691 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[10:11], 1
7692 ; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 64, v24
7693 ; GFX6-NEXT: v_subrev_i32_e32 v23, vcc, 64, v24
7694 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[0:1], v24
7695 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], v10
7696 ; GFX6-NEXT: v_lshr_b64 v[16:17], v[2:3], v24
7697 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v23
7698 ; GFX6-NEXT: v_or_b32_e32 v8, v8, v10
7699 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
7700 ; GFX6-NEXT: v_or_b32_e32 v9, v9, v11
7701 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
7702 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v24
7703 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc
7704 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
7705 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
7706 ; GFX6-NEXT: v_or_b32_e32 v0, v18, v0
7707 ; GFX6-NEXT: v_and_b32_e32 v18, 0x7f, v20
7708 ; GFX6-NEXT: v_not_b32_e32 v8, v20
7709 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
7710 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
7711 ; GFX6-NEXT: v_or_b32_e32 v1, v19, v1
7712 ; GFX6-NEXT: v_and_b32_e32 v19, 0x7f, v8
7713 ; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v18
7714 ; GFX6-NEXT: v_subrev_i32_e32 v20, vcc, 64, v18
7715 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], v8
7716 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v18
7717 ; GFX6-NEXT: v_lshl_b64 v[16:17], v[4:5], v18
7718 ; GFX6-NEXT: v_lshl_b64 v[4:5], v[4:5], v20
7719 ; GFX6-NEXT: v_or_b32_e32 v8, v8, v10
7720 ; GFX6-NEXT: v_or_b32_e32 v9, v9, v11
7721 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
7722 ; GFX6-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
7723 ; GFX6-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
7724 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
7725 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
7726 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v18
7727 ; GFX6-NEXT: v_cndmask_b32_e32 v18, v4, v6, vcc
7728 ; GFX6-NEXT: v_cndmask_b32_e32 v20, v5, v7, vcc
7729 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], 1
7730 ; GFX6-NEXT: v_lshlrev_b32_e32 v6, 31, v14
7731 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v6
7732 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[14:15], 1
7733 ; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 64, v19
7734 ; GFX6-NEXT: v_subrev_i32_e32 v14, vcc, 64, v19
7735 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], v19
7736 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v10
7737 ; GFX6-NEXT: v_lshr_b64 v[12:13], v[6:7], v19
7738 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v14
7739 ; GFX6-NEXT: v_or_b32_e32 v8, v8, v10
7740 ; GFX6-NEXT: v_or_b32_e32 v9, v9, v11
7741 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
7742 ; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
7743 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
7744 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19
7745 ; GFX6-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5]
7746 ; GFX6-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5]
7747 ; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v12, vcc
7748 ; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v13, vcc
7749 ; GFX6-NEXT: v_or_b32_e32 v2, v21, v2
7750 ; GFX6-NEXT: v_or_b32_e32 v3, v22, v3
7751 ; GFX6-NEXT: v_or_b32_e32 v4, v16, v4
7752 ; GFX6-NEXT: v_or_b32_e32 v5, v17, v5
7753 ; GFX6-NEXT: v_or_b32_e32 v6, v18, v6
7754 ; GFX6-NEXT: v_or_b32_e32 v7, v20, v7
7755 ; GFX6-NEXT: s_setpc_b64 s[30:31]
7757 ; GFX8-LABEL: v_fshl_v2i128:
7759 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7760 ; GFX8-NEXT: v_and_b32_e32 v23, 0x7f, v16
7761 ; GFX8-NEXT: v_not_b32_e32 v16, v16
7762 ; GFX8-NEXT: v_and_b32_e32 v24, 0x7f, v16
7763 ; GFX8-NEXT: v_sub_u32_e32 v16, vcc, 64, v23
7764 ; GFX8-NEXT: v_subrev_u32_e32 v25, vcc, 64, v23
7765 ; GFX8-NEXT: v_lshrrev_b64 v[16:17], v16, v[0:1]
7766 ; GFX8-NEXT: v_lshlrev_b64 v[18:19], v23, v[2:3]
7767 ; GFX8-NEXT: v_lshlrev_b64 v[21:22], v23, v[0:1]
7768 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v25, v[0:1]
7769 ; GFX8-NEXT: v_or_b32_e32 v16, v16, v18
7770 ; GFX8-NEXT: v_or_b32_e32 v17, v17, v19
7771 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
7772 ; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v21, vcc
7773 ; GFX8-NEXT: v_cndmask_b32_e32 v19, 0, v22, vcc
7774 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
7775 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v17, vcc
7776 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v23
7777 ; GFX8-NEXT: v_cndmask_b32_e32 v21, v0, v2, vcc
7778 ; GFX8-NEXT: v_cndmask_b32_e32 v22, v1, v3, vcc
7779 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[8:9]
7780 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 31, v10
7781 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
7782 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[10:11]
7783 ; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 64, v24
7784 ; GFX8-NEXT: v_subrev_u32_e32 v23, vcc, 64, v24
7785 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v24, v[0:1]
7786 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
7787 ; GFX8-NEXT: v_lshrrev_b64 v[16:17], v24, v[2:3]
7788 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v23, v[2:3]
7789 ; GFX8-NEXT: v_or_b32_e32 v8, v8, v10
7790 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
7791 ; GFX8-NEXT: v_or_b32_e32 v9, v9, v11
7792 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
7793 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v24
7794 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc
7795 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
7796 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
7797 ; GFX8-NEXT: v_or_b32_e32 v0, v18, v0
7798 ; GFX8-NEXT: v_and_b32_e32 v18, 0x7f, v20
7799 ; GFX8-NEXT: v_not_b32_e32 v8, v20
7800 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
7801 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
7802 ; GFX8-NEXT: v_or_b32_e32 v1, v19, v1
7803 ; GFX8-NEXT: v_and_b32_e32 v19, 0x7f, v8
7804 ; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v18
7805 ; GFX8-NEXT: v_subrev_u32_e32 v20, vcc, 64, v18
7806 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v8, v[4:5]
7807 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v18, v[6:7]
7808 ; GFX8-NEXT: v_lshlrev_b64 v[16:17], v18, v[4:5]
7809 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], v20, v[4:5]
7810 ; GFX8-NEXT: v_or_b32_e32 v8, v8, v10
7811 ; GFX8-NEXT: v_or_b32_e32 v9, v9, v11
7812 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
7813 ; GFX8-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
7814 ; GFX8-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
7815 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
7816 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc
7817 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v18
7818 ; GFX8-NEXT: v_cndmask_b32_e32 v18, v4, v6, vcc
7819 ; GFX8-NEXT: v_cndmask_b32_e32 v20, v5, v7, vcc
7820 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], 1, v[12:13]
7821 ; GFX8-NEXT: v_lshlrev_b32_e32 v6, 31, v14
7822 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v6
7823 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15]
7824 ; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 64, v19
7825 ; GFX8-NEXT: v_subrev_u32_e32 v14, vcc, 64, v19
7826 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v19, v[4:5]
7827 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7]
7828 ; GFX8-NEXT: v_lshrrev_b64 v[12:13], v19, v[6:7]
7829 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], v14, v[6:7]
7830 ; GFX8-NEXT: v_or_b32_e32 v8, v8, v10
7831 ; GFX8-NEXT: v_or_b32_e32 v9, v9, v11
7832 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
7833 ; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
7834 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
7835 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19
7836 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5]
7837 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5]
7838 ; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v12, vcc
7839 ; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v13, vcc
7840 ; GFX8-NEXT: v_or_b32_e32 v2, v21, v2
7841 ; GFX8-NEXT: v_or_b32_e32 v3, v22, v3
7842 ; GFX8-NEXT: v_or_b32_e32 v4, v16, v4
7843 ; GFX8-NEXT: v_or_b32_e32 v5, v17, v5
7844 ; GFX8-NEXT: v_or_b32_e32 v6, v18, v6
7845 ; GFX8-NEXT: v_or_b32_e32 v7, v20, v7
7846 ; GFX8-NEXT: s_setpc_b64 s[30:31]
7848 ; GFX9-LABEL: v_fshl_v2i128:
7850 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7851 ; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v16
7852 ; GFX9-NEXT: v_not_b32_e32 v16, v16
7853 ; GFX9-NEXT: v_and_b32_e32 v24, 0x7f, v16
7854 ; GFX9-NEXT: v_sub_u32_e32 v16, 64, v23
7855 ; GFX9-NEXT: v_subrev_u32_e32 v25, 64, v23
7856 ; GFX9-NEXT: v_lshrrev_b64 v[16:17], v16, v[0:1]
7857 ; GFX9-NEXT: v_lshlrev_b64 v[18:19], v23, v[2:3]
7858 ; GFX9-NEXT: v_lshlrev_b64 v[21:22], v23, v[0:1]
7859 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v25, v[0:1]
7860 ; GFX9-NEXT: v_or_b32_e32 v16, v16, v18
7861 ; GFX9-NEXT: v_or_b32_e32 v17, v17, v19
7862 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
7863 ; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v21, vcc
7864 ; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v22, vcc
7865 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v16, vcc
7866 ; GFX9-NEXT: v_cndmask_b32_e32 v16, v1, v17, vcc
7867 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v23
7868 ; GFX9-NEXT: v_cndmask_b32_e32 v21, v0, v2, vcc
7869 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[8:9]
7870 ; GFX9-NEXT: v_cndmask_b32_e32 v22, v16, v3, vcc
7871 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[10:11]
7872 ; GFX9-NEXT: v_lshl_or_b32 v1, v10, 31, v1
7873 ; GFX9-NEXT: v_sub_u32_e32 v10, 64, v24
7874 ; GFX9-NEXT: v_subrev_u32_e32 v23, 64, v24
7875 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v24, v[0:1]
7876 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
7877 ; GFX9-NEXT: v_lshrrev_b64 v[16:17], v24, v[2:3]
7878 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v23, v[2:3]
7879 ; GFX9-NEXT: v_or_b32_e32 v8, v8, v10
7880 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
7881 ; GFX9-NEXT: v_or_b32_e32 v9, v9, v11
7882 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc
7883 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v24
7884 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc
7885 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5]
7886 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
7887 ; GFX9-NEXT: v_or_b32_e32 v0, v18, v0
7888 ; GFX9-NEXT: v_and_b32_e32 v18, 0x7f, v20
7889 ; GFX9-NEXT: v_not_b32_e32 v8, v20
7890 ; GFX9-NEXT: v_or_b32_e32 v1, v19, v1
7891 ; GFX9-NEXT: v_and_b32_e32 v19, 0x7f, v8
7892 ; GFX9-NEXT: v_sub_u32_e32 v8, 64, v18
7893 ; GFX9-NEXT: v_subrev_u32_e32 v20, 64, v18
7894 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v8, v[4:5]
7895 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v18, v[6:7]
7896 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
7897 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
7898 ; GFX9-NEXT: v_lshlrev_b64 v[16:17], v18, v[4:5]
7899 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v20, v[4:5]
7900 ; GFX9-NEXT: v_or_b32_e32 v8, v8, v10
7901 ; GFX9-NEXT: v_or_b32_e32 v9, v9, v11
7902 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
7903 ; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
7904 ; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
7905 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc
7906 ; GFX9-NEXT: v_cndmask_b32_e32 v8, v5, v9, vcc
7907 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v18
7908 ; GFX9-NEXT: v_cndmask_b32_e32 v18, v4, v6, vcc
7909 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[12:13]
7910 ; GFX9-NEXT: v_cndmask_b32_e32 v20, v8, v7, vcc
7911 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15]
7912 ; GFX9-NEXT: v_lshl_or_b32 v5, v14, 31, v5
7913 ; GFX9-NEXT: v_sub_u32_e32 v10, 64, v19
7914 ; GFX9-NEXT: v_subrev_u32_e32 v14, 64, v19
7915 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v19, v[4:5]
7916 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7]
7917 ; GFX9-NEXT: v_lshrrev_b64 v[12:13], v19, v[6:7]
7918 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], v14, v[6:7]
7919 ; GFX9-NEXT: v_or_b32_e32 v8, v8, v10
7920 ; GFX9-NEXT: v_or_b32_e32 v9, v9, v11
7921 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
7922 ; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc
7923 ; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc
7924 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19
7925 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5]
7926 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5]
7927 ; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v12, vcc
7928 ; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v13, vcc
7929 ; GFX9-NEXT: v_or_b32_e32 v2, v21, v2
7930 ; GFX9-NEXT: v_or_b32_e32 v3, v22, v3
7931 ; GFX9-NEXT: v_or_b32_e32 v4, v16, v4
7932 ; GFX9-NEXT: v_or_b32_e32 v5, v17, v5
7933 ; GFX9-NEXT: v_or_b32_e32 v6, v18, v6
7934 ; GFX9-NEXT: v_or_b32_e32 v7, v20, v7
7935 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7937 ; GFX10-LABEL: v_fshl_v2i128:
7939 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7940 ; GFX10-NEXT: v_and_b32_e32 v27, 0x7f, v16
7941 ; GFX10-NEXT: v_not_b32_e32 v16, v16
7942 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9]
7943 ; GFX10-NEXT: v_sub_nc_u32_e32 v17, 64, v27
7944 ; GFX10-NEXT: v_and_b32_e32 v28, 0x7f, v16
7945 ; GFX10-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3]
7946 ; GFX10-NEXT: v_lshl_or_b32 v9, v10, 31, v9
7947 ; GFX10-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11]
7948 ; GFX10-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1]
7949 ; GFX10-NEXT: v_sub_nc_u32_e32 v25, 64, v28
7950 ; GFX10-NEXT: v_subrev_nc_u32_e32 v29, 64, v27
7951 ; GFX10-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9]
7952 ; GFX10-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1]
7953 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27
7954 ; GFX10-NEXT: v_or_b32_e32 v18, v16, v18
7955 ; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 64, v28
7956 ; GFX10-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11]
7957 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1]
7958 ; GFX10-NEXT: v_or_b32_e32 v19, v17, v19
7959 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v28
7960 ; GFX10-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11]
7961 ; GFX10-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo
7962 ; GFX10-NEXT: v_or_b32_e32 v23, v23, v25
7963 ; GFX10-NEXT: v_cndmask_b32_e32 v18, v0, v18, vcc_lo
7964 ; GFX10-NEXT: v_or_b32_e32 v0, v24, v26
7965 ; GFX10-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc_lo
7966 ; GFX10-NEXT: v_cndmask_b32_e32 v19, v1, v19, vcc_lo
7967 ; GFX10-NEXT: v_cndmask_b32_e64 v16, v16, v23, s4
7968 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v27
7969 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v28
7970 ; GFX10-NEXT: v_cndmask_b32_e64 v17, v17, v0, s4
7971 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11]
7972 ; GFX10-NEXT: v_and_b32_e32 v24, 0x7f, v20
7973 ; GFX10-NEXT: v_cndmask_b32_e32 v23, v19, v3, vcc_lo
7974 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v16, v8, s5
7975 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v17, v9, s5
7976 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v18, v2, vcc_lo
7977 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v0, s4
7978 ; GFX10-NEXT: v_cndmask_b32_e64 v25, 0, v1, s4
7979 ; GFX10-NEXT: v_or_b32_e32 v0, v21, v3
7980 ; GFX10-NEXT: v_not_b32_e32 v3, v20
7981 ; GFX10-NEXT: v_or_b32_e32 v1, v22, v8
7982 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13]
7983 ; GFX10-NEXT: v_sub_nc_u32_e32 v11, 64, v24
7984 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v10
7985 ; GFX10-NEXT: v_and_b32_e32 v22, 0x7f, v3
7986 ; GFX10-NEXT: v_lshlrev_b64 v[12:13], v24, v[6:7]
7987 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v24
7988 ; GFX10-NEXT: v_lshrrev_b64 v[10:11], v11, v[4:5]
7989 ; GFX10-NEXT: v_lshl_or_b32 v9, v14, 31, v9
7990 ; GFX10-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15]
7991 ; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v22
7992 ; GFX10-NEXT: v_lshlrev_b64 v[16:17], v24, v[4:5]
7993 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v24
7994 ; GFX10-NEXT: v_or_b32_e32 v12, v10, v12
7995 ; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v22
7996 ; GFX10-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9]
7997 ; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
7998 ; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
7999 ; GFX10-NEXT: v_or_b32_e32 v5, v11, v13
8000 ; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15]
8001 ; GFX10-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo
8002 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v22
8003 ; GFX10-NEXT: v_or_b32_e32 v16, v18, v20
8004 ; GFX10-NEXT: v_or_b32_e32 v18, v19, v21
8005 ; GFX10-NEXT: v_cndmask_b32_e32 v12, v3, v12, vcc_lo
8006 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo
8007 ; GFX10-NEXT: v_lshrrev_b64 v[3:4], v22, v[14:15]
8008 ; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v16, s4
8009 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v22
8010 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v24
8011 ; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v18, s4
8012 ; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
8013 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, v6, s6
8014 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6
8015 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v10, v8, s5
8016 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v11, v9, s5
8017 ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4
8018 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4
8019 ; GFX10-NEXT: v_or_b32_e32 v3, v23, v25
8020 ; GFX10-NEXT: v_or_b32_e32 v4, v13, v5
8021 ; GFX10-NEXT: v_or_b32_e32 v5, v14, v8
8022 ; GFX10-NEXT: v_or_b32_e32 v6, v6, v9
8023 ; GFX10-NEXT: v_or_b32_e32 v7, v7, v10
8024 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8026 ; GFX11-LABEL: v_fshl_v2i128:
8028 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8029 ; GFX11-NEXT: v_and_b32_e32 v27, 0x7f, v16
8030 ; GFX11-NEXT: v_not_b32_e32 v16, v16
8031 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9]
8032 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
8033 ; GFX11-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1]
8034 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27
8035 ; GFX11-NEXT: v_and_b32_e32 v28, 0x7f, v16
8036 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
8037 ; GFX11-NEXT: v_lshl_or_b32 v9, v10, 31, v9
8038 ; GFX11-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11]
8039 ; GFX11-NEXT: v_dual_cndmask_b32 v21, 0, v21 :: v_dual_cndmask_b32 v22, 0, v22
8040 ; GFX11-NEXT: v_sub_nc_u32_e32 v17, 64, v27
8041 ; GFX11-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3]
8042 ; GFX11-NEXT: v_sub_nc_u32_e32 v25, 64, v28
8043 ; GFX11-NEXT: v_subrev_nc_u32_e32 v29, 64, v27
8044 ; GFX11-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9]
8045 ; GFX11-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1]
8046 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v28
8047 ; GFX11-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11]
8048 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1]
8049 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v28
8050 ; GFX11-NEXT: v_or_b32_e32 v18, v16, v18
8051 ; GFX11-NEXT: v_subrev_nc_u32_e32 v16, 64, v28
8052 ; GFX11-NEXT: v_or_b32_e32 v19, v17, v19
8053 ; GFX11-NEXT: v_or_b32_e32 v23, v23, v25
8054 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8055 ; GFX11-NEXT: v_cndmask_b32_e32 v18, v0, v18, vcc_lo
8056 ; GFX11-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11]
8057 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_4) | instid1(VALU_DEP_4)
8058 ; GFX11-NEXT: v_cndmask_b32_e32 v19, v1, v19, vcc_lo
8059 ; GFX11-NEXT: v_or_b32_e32 v0, v24, v26
8060 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v27
8061 ; GFX11-NEXT: v_and_b32_e32 v24, 0x7f, v20
8062 ; GFX11-NEXT: v_cndmask_b32_e64 v16, v16, v23, s0
8063 ; GFX11-NEXT: v_cndmask_b32_e64 v17, v17, v0, s0
8064 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11]
8065 ; GFX11-NEXT: v_dual_cndmask_b32 v2, v18, v2 :: v_dual_cndmask_b32 v23, v19, v3
8066 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8067 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v16, v8, s1
8068 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v17, v9, s1
8069 ; GFX11-NEXT: v_sub_nc_u32_e32 v11, 64, v24
8070 ; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v0, s0
8071 ; GFX11-NEXT: v_cndmask_b32_e64 v25, 0, v1, s0
8072 ; GFX11-NEXT: v_or_b32_e32 v0, v21, v3
8073 ; GFX11-NEXT: v_not_b32_e32 v3, v20
8074 ; GFX11-NEXT: v_or_b32_e32 v1, v22, v8
8075 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13]
8076 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v10
8077 ; GFX11-NEXT: v_lshrrev_b64 v[10:11], v11, v[4:5]
8078 ; GFX11-NEXT: v_lshlrev_b64 v[12:13], v24, v[6:7]
8079 ; GFX11-NEXT: v_lshlrev_b64 v[16:17], v24, v[4:5]
8080 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v24
8081 ; GFX11-NEXT: v_and_b32_e32 v22, 0x7f, v3
8082 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v24
8083 ; GFX11-NEXT: v_lshl_or_b32 v9, v14, 31, v9
8084 ; GFX11-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15]
8085 ; GFX11-NEXT: v_or_b32_e32 v12, v10, v12
8086 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
8087 ; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
8088 ; GFX11-NEXT: v_or_b32_e32 v5, v11, v13
8089 ; GFX11-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo
8090 ; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v22
8091 ; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v22
8092 ; GFX11-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9]
8093 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v22
8094 ; GFX11-NEXT: v_cndmask_b32_e32 v12, v3, v12, vcc_lo
8095 ; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
8096 ; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15]
8097 ; GFX11-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo
8098 ; GFX11-NEXT: v_lshrrev_b64 v[3:4], v22, v[14:15]
8099 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v22
8100 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v24
8101 ; GFX11-NEXT: v_or_b32_e32 v16, v18, v20
8102 ; GFX11-NEXT: v_or_b32_e32 v18, v19, v21
8103 ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
8104 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8105 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, v6, s2
8106 ; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0
8107 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3)
8108 ; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v18, s0
8109 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2
8110 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v10, v8, s1
8111 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
8112 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v11, v9, s1
8113 ; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v3, s0
8114 ; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v4, s0
8115 ; GFX11-NEXT: v_or_b32_e32 v3, v23, v25
8116 ; GFX11-NEXT: v_or_b32_e32 v4, v13, v5
8117 ; GFX11-NEXT: v_or_b32_e32 v5, v14, v8
8118 ; GFX11-NEXT: v_or_b32_e32 v6, v6, v9
8119 ; GFX11-NEXT: v_or_b32_e32 v7, v7, v10
8120 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8121 %result = call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
8122 ret <2 x i128> %result
8125 declare i7 @llvm.fshl.i7(i7, i7, i7) #0
8126 declare i8 @llvm.fshl.i8(i8, i8, i8) #0
8127 declare <2 x i8> @llvm.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
8128 declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
8130 declare i16 @llvm.fshl.i16(i16, i16, i16) #0
8131 declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
8132 declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
8133 declare <4 x i16> @llvm.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
8134 declare <5 x i16> @llvm.fshl.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
8135 declare <6 x i16> @llvm.fshl.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
8136 declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
8138 declare i24 @llvm.fshl.i24(i24, i24, i24) #0
8139 declare <2 x i24> @llvm.fshl.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
8141 declare i32 @llvm.fshl.i32(i32, i32, i32) #0
8142 declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
8143 declare <3 x i32> @llvm.fshl.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
8144 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
8145 declare <5 x i32> @llvm.fshl.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
8146 declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
8148 declare i48 @llvm.fshl.i48(i48, i48, i48) #0
8150 declare i64 @llvm.fshl.i64(i64, i64, i64) #0
8151 declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
8153 declare i128 @llvm.fshl.i128(i128, i128, i128) #0
8154 declare <2 x i128> @llvm.fshl.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
8156 attributes #0 = { nounwind readnone speculatable willreturn }