1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s
8 define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
9 ; GFX6-LABEL: s_fshr_i7:
11 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
12 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
13 ; GFX6-NEXT: s_and_b32 s2, s2, 0x7f
14 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
15 ; GFX6-NEXT: s_and_b32 s1, s1, 0x7f
16 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
17 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
18 ; GFX6-NEXT: v_mul_lo_u32 v1, v0, -7
19 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
20 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
21 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
22 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7
23 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
24 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
25 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
26 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
27 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
28 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
29 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
30 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0
31 ; GFX6-NEXT: v_and_b32_e32 v0, 0x7f, v0
32 ; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1
33 ; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
34 ; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0
35 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
36 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
37 ; GFX6-NEXT: ; return to shader part epilog
39 ; GFX8-LABEL: s_fshr_i7:
41 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
42 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
43 ; GFX8-NEXT: s_and_b32 s2, s2, 0x7f
44 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
45 ; GFX8-NEXT: s_and_b32 s1, s1, 0x7f
46 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
47 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
48 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, -7
49 ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
50 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
51 ; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
52 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7
53 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
54 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
55 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
56 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
57 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
58 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
59 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
60 ; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0
61 ; GFX8-NEXT: v_and_b32_e32 v0, 0x7f, v0
62 ; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1
63 ; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0
64 ; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s1
65 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
66 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
67 ; GFX8-NEXT: ; return to shader part epilog
69 ; GFX9-LABEL: s_fshr_i7:
71 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
72 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
73 ; GFX9-NEXT: s_and_b32 s2, s2, 0x7f
74 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
75 ; GFX9-NEXT: s_and_b32 s1, s1, 0x7f
76 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
77 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
78 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, -7
79 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
80 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
81 ; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
82 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7
83 ; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
84 ; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
85 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
86 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
87 ; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
88 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
89 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
90 ; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0
91 ; GFX9-NEXT: v_and_b32_e32 v0, 0x7f, v0
92 ; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1
93 ; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0
94 ; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s1
95 ; GFX9-NEXT: v_or_b32_e32 v0, v1, v0
96 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
97 ; GFX9-NEXT: ; return to shader part epilog
99 ; GFX10-LABEL: s_fshr_i7:
101 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
102 ; GFX10-NEXT: s_and_b32 s2, s2, 0x7f
103 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
104 ; GFX10-NEXT: s_and_b32 s1, s1, 0x7f
105 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
106 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
107 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
108 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, -7
109 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
110 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
111 ; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
112 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7
113 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
114 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
115 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
116 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
117 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
118 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
119 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
120 ; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0
121 ; GFX10-NEXT: v_and_b32_e32 v0, 0x7f, v0
122 ; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1
123 ; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1
124 ; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0
125 ; GFX10-NEXT: v_or_b32_e32 v0, v1, v0
126 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
127 ; GFX10-NEXT: ; return to shader part epilog
129 ; GFX11-LABEL: s_fshr_i7:
131 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
132 ; GFX11-NEXT: s_and_b32 s2, s2, 0x7f
133 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
134 ; GFX11-NEXT: s_and_b32 s1, s1, 0x7f
135 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
136 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
137 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
138 ; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
139 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
140 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
141 ; GFX11-NEXT: v_mul_lo_u32 v1, v0, -7
142 ; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1
143 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
144 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
145 ; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0
146 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
147 ; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7
148 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
149 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
150 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
151 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
152 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
153 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
154 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
155 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
156 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
157 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
158 ; GFX11-NEXT: v_sub_nc_u16 v1, 6, v0
159 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7f, v0
160 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1
161 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
162 ; GFX11-NEXT: v_lshrrev_b16 v0, v0, s1
163 ; GFX11-NEXT: v_lshlrev_b16 v1, v1, s0
164 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
165 ; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
166 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
167 ; GFX11-NEXT: ; return to shader part epilog
168 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
172 define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
173 ; GFX6-LABEL: v_fshr_i7:
175 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
177 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3
178 ; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2
179 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
180 ; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1
181 ; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
182 ; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
183 ; GFX6-NEXT: v_mul_lo_u32 v4, v3, -7
184 ; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
185 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
186 ; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
187 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7
188 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
189 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
190 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
191 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
192 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
193 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
194 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
195 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2
196 ; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2
197 ; GFX6-NEXT: v_and_b32_e32 v3, 0x7f, v3
198 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
199 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
200 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
201 ; GFX6-NEXT: s_setpc_b64 s[30:31]
203 ; GFX8-LABEL: v_fshr_i7:
205 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
207 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3
208 ; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2
209 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
210 ; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1
211 ; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
212 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
213 ; GFX8-NEXT: v_mul_lo_u32 v4, v3, -7
214 ; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
215 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
216 ; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
217 ; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7
218 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
219 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
220 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
221 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
222 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
223 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
224 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
225 ; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2
226 ; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2
227 ; GFX8-NEXT: v_and_b32_e32 v3, 0x7f, v3
228 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0
229 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
230 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
231 ; GFX8-NEXT: s_setpc_b64 s[30:31]
233 ; GFX9-LABEL: v_fshr_i7:
235 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
237 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
238 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2
239 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
240 ; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1
241 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
242 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3
243 ; GFX9-NEXT: v_mul_lo_u32 v4, v3, -7
244 ; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4
245 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4
246 ; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
247 ; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7
248 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
249 ; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
250 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
251 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
252 ; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
253 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
254 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
255 ; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2
256 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2
257 ; GFX9-NEXT: v_and_b32_e32 v3, 0x7f, v3
258 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0
259 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
260 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
261 ; GFX9-NEXT: s_setpc_b64 s[30:31]
263 ; GFX10-LABEL: v_fshr_i7:
265 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
267 ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2
268 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
269 ; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1
270 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3
271 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
272 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3
273 ; GFX10-NEXT: v_mul_lo_u32 v4, v3, -7
274 ; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4
275 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4
276 ; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
277 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7
278 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
279 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
280 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
281 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
282 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
283 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
284 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
285 ; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2
286 ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2
287 ; GFX10-NEXT: v_and_b32_e32 v3, 0x7f, v3
288 ; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1
289 ; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0
290 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
291 ; GFX10-NEXT: s_setpc_b64 s[30:31]
293 ; GFX11-LABEL: v_fshr_i7:
295 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
297 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2
298 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
299 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1
300 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
301 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3
302 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
303 ; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
304 ; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
305 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
306 ; GFX11-NEXT: v_mul_lo_u32 v4, v3, -7
307 ; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4
308 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
309 ; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4
310 ; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3
311 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
312 ; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7
313 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
314 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
315 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
316 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
317 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
318 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
319 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
320 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
321 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
322 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
323 ; GFX11-NEXT: v_sub_nc_u16 v3, 6, v2
324 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2
325 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7f, v3
326 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
327 ; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1
328 ; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0
329 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
330 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
331 ; GFX11-NEXT: s_setpc_b64 s[30:31]
332 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
336 define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) {
337 ; GFX6-LABEL: s_fshr_i8:
339 ; GFX6-NEXT: s_and_b32 s3, s2, 7
340 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
341 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
342 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
343 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
344 ; GFX6-NEXT: s_lshr_b32 s1, s1, s3
345 ; GFX6-NEXT: s_or_b32 s0, s0, s1
346 ; GFX6-NEXT: ; return to shader part epilog
348 ; GFX8-LABEL: s_fshr_i8:
350 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
351 ; GFX8-NEXT: s_and_b32 s3, s2, 7
352 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
353 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
354 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
355 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
356 ; GFX8-NEXT: s_lshr_b32 s1, s1, s3
357 ; GFX8-NEXT: s_or_b32 s0, s0, s1
358 ; GFX8-NEXT: ; return to shader part epilog
360 ; GFX9-LABEL: s_fshr_i8:
362 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
363 ; GFX9-NEXT: s_and_b32 s3, s2, 7
364 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
365 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
366 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
367 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
368 ; GFX9-NEXT: s_lshr_b32 s1, s1, s3
369 ; GFX9-NEXT: s_or_b32 s0, s0, s1
370 ; GFX9-NEXT: ; return to shader part epilog
372 ; GFX10-LABEL: s_fshr_i8:
374 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
375 ; GFX10-NEXT: s_and_b32 s3, s2, 7
376 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
377 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
378 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
379 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
380 ; GFX10-NEXT: s_lshr_b32 s1, s1, s3
381 ; GFX10-NEXT: s_or_b32 s0, s0, s1
382 ; GFX10-NEXT: ; return to shader part epilog
384 ; GFX11-LABEL: s_fshr_i8:
386 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
387 ; GFX11-NEXT: s_and_b32 s3, s2, 7
388 ; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
389 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
390 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
391 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
392 ; GFX11-NEXT: s_lshr_b32 s1, s1, s3
393 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
394 ; GFX11-NEXT: s_or_b32 s0, s0, s1
395 ; GFX11-NEXT: ; return to shader part epilog
396 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
400 define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) {
401 ; GFX6-LABEL: v_fshr_i8:
403 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v2
405 ; GFX6-NEXT: v_not_b32_e32 v2, v2
406 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
407 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
408 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
409 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
410 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1
411 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
412 ; GFX6-NEXT: s_setpc_b64 s[30:31]
414 ; GFX8-LABEL: v_fshr_i8:
416 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417 ; GFX8-NEXT: v_and_b32_e32 v3, 7, v2
418 ; GFX8-NEXT: v_not_b32_e32 v2, v2
419 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
420 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
421 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
422 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
423 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
424 ; GFX8-NEXT: s_setpc_b64 s[30:31]
426 ; GFX9-LABEL: v_fshr_i8:
428 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429 ; GFX9-NEXT: v_and_b32_e32 v3, 7, v2
430 ; GFX9-NEXT: v_not_b32_e32 v2, v2
431 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
432 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
433 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
434 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
435 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
436 ; GFX9-NEXT: s_setpc_b64 s[30:31]
438 ; GFX10-LABEL: v_fshr_i8:
440 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441 ; GFX10-NEXT: v_not_b32_e32 v3, v2
442 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
443 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
444 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
445 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
446 ; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1
447 ; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0
448 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
449 ; GFX10-NEXT: s_setpc_b64 s[30:31]
451 ; GFX11-LABEL: v_fshr_i8:
453 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; GFX11-NEXT: v_not_b32_e32 v3, v2
455 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
456 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
457 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
458 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
459 ; GFX11-NEXT: v_and_b32_e32 v3, 7, v3
460 ; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1
461 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
462 ; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0
463 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
464 ; GFX11-NEXT: s_setpc_b64 s[30:31]
465 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
469 define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) {
470 ; GFX6-LABEL: s_fshr_i8_4:
472 ; GFX6-NEXT: s_lshl_b32 s0, s0, 4
473 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x40004
474 ; GFX6-NEXT: s_or_b32 s0, s0, s1
475 ; GFX6-NEXT: ; return to shader part epilog
477 ; GFX8-LABEL: s_fshr_i8_4:
479 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
480 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
481 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4
482 ; GFX8-NEXT: s_lshr_b32 s1, s1, 4
483 ; GFX8-NEXT: s_or_b32 s0, s0, s1
484 ; GFX8-NEXT: ; return to shader part epilog
486 ; GFX9-LABEL: s_fshr_i8_4:
488 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
489 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
490 ; GFX9-NEXT: s_lshl_b32 s0, s0, 4
491 ; GFX9-NEXT: s_lshr_b32 s1, s1, 4
492 ; GFX9-NEXT: s_or_b32 s0, s0, s1
493 ; GFX9-NEXT: ; return to shader part epilog
495 ; GFX10-LABEL: s_fshr_i8_4:
497 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
498 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4
499 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
500 ; GFX10-NEXT: s_lshr_b32 s1, s1, 4
501 ; GFX10-NEXT: s_or_b32 s0, s0, s1
502 ; GFX10-NEXT: ; return to shader part epilog
504 ; GFX11-LABEL: s_fshr_i8_4:
506 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
507 ; GFX11-NEXT: s_lshl_b32 s0, s0, 4
508 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
509 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
510 ; GFX11-NEXT: s_lshr_b32 s1, s1, 4
511 ; GFX11-NEXT: s_or_b32 s0, s0, s1
512 ; GFX11-NEXT: ; return to shader part epilog
513 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
517 define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) {
518 ; GFX6-LABEL: v_fshr_i8_4:
520 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
522 ; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 4
523 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
524 ; GFX6-NEXT: s_setpc_b64 s[30:31]
526 ; GFX8-LABEL: v_fshr_i8_4:
528 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529 ; GFX8-NEXT: v_mov_b32_e32 v2, 4
530 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0
531 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
532 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
533 ; GFX8-NEXT: s_setpc_b64 s[30:31]
535 ; GFX9-LABEL: v_fshr_i8_4:
537 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538 ; GFX9-NEXT: s_mov_b32 s4, 4
539 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0
540 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
541 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
542 ; GFX9-NEXT: s_setpc_b64 s[30:31]
544 ; GFX10-LABEL: v_fshr_i8_4:
546 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
548 ; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0
549 ; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1
550 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
551 ; GFX10-NEXT: s_setpc_b64 s[30:31]
553 ; GFX11-LABEL: v_fshr_i8_4:
555 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
556 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
557 ; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0
558 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
559 ; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1
560 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
561 ; GFX11-NEXT: s_setpc_b64 s[30:31]
562 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
566 define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) {
567 ; GFX6-LABEL: s_fshr_i8_5:
569 ; GFX6-NEXT: s_lshl_b32 s0, s0, 3
570 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x30005
571 ; GFX6-NEXT: s_or_b32 s0, s0, s1
572 ; GFX6-NEXT: ; return to shader part epilog
574 ; GFX8-LABEL: s_fshr_i8_5:
576 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
577 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
578 ; GFX8-NEXT: s_lshl_b32 s0, s0, 3
579 ; GFX8-NEXT: s_lshr_b32 s1, s1, 5
580 ; GFX8-NEXT: s_or_b32 s0, s0, s1
581 ; GFX8-NEXT: ; return to shader part epilog
583 ; GFX9-LABEL: s_fshr_i8_5:
585 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
586 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
587 ; GFX9-NEXT: s_lshl_b32 s0, s0, 3
588 ; GFX9-NEXT: s_lshr_b32 s1, s1, 5
589 ; GFX9-NEXT: s_or_b32 s0, s0, s1
590 ; GFX9-NEXT: ; return to shader part epilog
592 ; GFX10-LABEL: s_fshr_i8_5:
594 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
595 ; GFX10-NEXT: s_lshl_b32 s0, s0, 3
596 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
597 ; GFX10-NEXT: s_lshr_b32 s1, s1, 5
598 ; GFX10-NEXT: s_or_b32 s0, s0, s1
599 ; GFX10-NEXT: ; return to shader part epilog
601 ; GFX11-LABEL: s_fshr_i8_5:
603 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
604 ; GFX11-NEXT: s_lshl_b32 s0, s0, 3
605 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
606 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
607 ; GFX11-NEXT: s_lshr_b32 s1, s1, 5
608 ; GFX11-NEXT: s_or_b32 s0, s0, s1
609 ; GFX11-NEXT: ; return to shader part epilog
610 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
614 define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) {
615 ; GFX6-LABEL: v_fshr_i8_5:
617 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
618 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0
619 ; GFX6-NEXT: v_bfe_u32 v1, v1, 5, 3
620 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
621 ; GFX6-NEXT: s_setpc_b64 s[30:31]
623 ; GFX8-LABEL: v_fshr_i8_5:
625 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626 ; GFX8-NEXT: v_mov_b32_e32 v2, 5
627 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 3, v0
628 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
629 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
630 ; GFX8-NEXT: s_setpc_b64 s[30:31]
632 ; GFX9-LABEL: v_fshr_i8_5:
634 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
636 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 3, v0
637 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
638 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
639 ; GFX9-NEXT: s_setpc_b64 s[30:31]
641 ; GFX10-LABEL: v_fshr_i8_5:
643 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
645 ; GFX10-NEXT: v_lshlrev_b16 v0, 3, v0
646 ; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1
647 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
648 ; GFX10-NEXT: s_setpc_b64 s[30:31]
650 ; GFX11-LABEL: v_fshr_i8_5:
652 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
654 ; GFX11-NEXT: v_lshlrev_b16 v0, 3, v0
655 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
656 ; GFX11-NEXT: v_lshrrev_b16 v1, 5, v1
657 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
658 ; GFX11-NEXT: s_setpc_b64 s[30:31]
659 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
663 define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) {
664 ; GFX6-LABEL: s_fshr_v2i8:
666 ; GFX6-NEXT: s_lshr_b32 s3, s0, 8
667 ; GFX6-NEXT: s_lshr_b32 s4, s2, 8
668 ; GFX6-NEXT: s_and_b32 s5, s2, 7
669 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
670 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
671 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
672 ; GFX6-NEXT: s_and_b32 s2, s1, 0xff
673 ; GFX6-NEXT: s_lshr_b32 s2, s2, s5
674 ; GFX6-NEXT: s_or_b32 s0, s0, s2
675 ; GFX6-NEXT: s_and_b32 s2, s4, 7
676 ; GFX6-NEXT: s_andn2_b32 s4, 7, s4
677 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1
678 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008
679 ; GFX6-NEXT: s_lshl_b32 s3, s3, s4
680 ; GFX6-NEXT: s_lshr_b32 s1, s1, s2
681 ; GFX6-NEXT: s_or_b32 s1, s3, s1
682 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
683 ; GFX6-NEXT: s_and_b32 s0, s0, 0xff
684 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8
685 ; GFX6-NEXT: s_or_b32 s0, s0, s1
686 ; GFX6-NEXT: ; return to shader part epilog
688 ; GFX8-LABEL: s_fshr_v2i8:
690 ; GFX8-NEXT: s_lshr_b32 s3, s0, 8
691 ; GFX8-NEXT: s_lshr_b32 s4, s1, 8
692 ; GFX8-NEXT: s_lshr_b32 s5, s2, 8
693 ; GFX8-NEXT: s_and_b32 s6, s2, 7
694 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
695 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
696 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
697 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
698 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
699 ; GFX8-NEXT: s_andn2_b32 s2, 7, s5
700 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
701 ; GFX8-NEXT: s_lshr_b32 s1, s1, s6
702 ; GFX8-NEXT: s_lshl_b32 s2, s3, s2
703 ; GFX8-NEXT: s_and_b32 s3, s4, 0xff
704 ; GFX8-NEXT: s_or_b32 s0, s0, s1
705 ; GFX8-NEXT: s_and_b32 s1, s5, 7
706 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
707 ; GFX8-NEXT: s_lshr_b32 s1, s3, s1
708 ; GFX8-NEXT: s_or_b32 s1, s2, s1
709 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
710 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
711 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8
712 ; GFX8-NEXT: s_or_b32 s0, s0, s1
713 ; GFX8-NEXT: ; return to shader part epilog
715 ; GFX9-LABEL: s_fshr_v2i8:
717 ; GFX9-NEXT: s_lshr_b32 s3, s0, 8
718 ; GFX9-NEXT: s_lshr_b32 s4, s1, 8
719 ; GFX9-NEXT: s_lshr_b32 s5, s2, 8
720 ; GFX9-NEXT: s_and_b32 s6, s2, 7
721 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
722 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
723 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
724 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
725 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
726 ; GFX9-NEXT: s_andn2_b32 s2, 7, s5
727 ; GFX9-NEXT: s_lshl_b32 s3, s3, 1
728 ; GFX9-NEXT: s_lshr_b32 s1, s1, s6
729 ; GFX9-NEXT: s_lshl_b32 s2, s3, s2
730 ; GFX9-NEXT: s_and_b32 s3, s4, 0xff
731 ; GFX9-NEXT: s_or_b32 s0, s0, s1
732 ; GFX9-NEXT: s_and_b32 s1, s5, 7
733 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
734 ; GFX9-NEXT: s_lshr_b32 s1, s3, s1
735 ; GFX9-NEXT: s_or_b32 s1, s2, s1
736 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
737 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
738 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8
739 ; GFX9-NEXT: s_or_b32 s0, s0, s1
740 ; GFX9-NEXT: ; return to shader part epilog
742 ; GFX10-LABEL: s_fshr_v2i8:
744 ; GFX10-NEXT: s_lshr_b32 s4, s1, 8
745 ; GFX10-NEXT: s_lshr_b32 s3, s0, 8
746 ; GFX10-NEXT: s_lshr_b32 s5, s2, 8
747 ; GFX10-NEXT: s_and_b32 s6, s2, 7
748 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
749 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
750 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff
751 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
752 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
753 ; GFX10-NEXT: s_and_b32 s2, s5, 7
754 ; GFX10-NEXT: s_andn2_b32 s5, 7, s5
755 ; GFX10-NEXT: s_lshl_b32 s3, s3, 1
756 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
757 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
758 ; GFX10-NEXT: s_lshl_b32 s3, s3, s5
759 ; GFX10-NEXT: s_lshr_b32 s2, s4, s2
760 ; GFX10-NEXT: s_lshr_b32 s1, s1, s6
761 ; GFX10-NEXT: s_or_b32 s2, s3, s2
762 ; GFX10-NEXT: s_or_b32 s0, s0, s1
763 ; GFX10-NEXT: s_and_b32 s1, s2, 0xff
764 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff
765 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8
766 ; GFX10-NEXT: s_or_b32 s0, s0, s1
767 ; GFX10-NEXT: ; return to shader part epilog
769 ; GFX11-LABEL: s_fshr_v2i8:
771 ; GFX11-NEXT: s_lshr_b32 s4, s1, 8
772 ; GFX11-NEXT: s_lshr_b32 s3, s0, 8
773 ; GFX11-NEXT: s_lshr_b32 s5, s2, 8
774 ; GFX11-NEXT: s_and_b32 s6, s2, 7
775 ; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
776 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
777 ; GFX11-NEXT: s_and_b32 s4, s4, 0xff
778 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
779 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
780 ; GFX11-NEXT: s_and_b32 s2, s5, 7
781 ; GFX11-NEXT: s_and_not1_b32 s5, 7, s5
782 ; GFX11-NEXT: s_lshl_b32 s3, s3, 1
783 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
784 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
785 ; GFX11-NEXT: s_lshl_b32 s3, s3, s5
786 ; GFX11-NEXT: s_lshr_b32 s2, s4, s2
787 ; GFX11-NEXT: s_lshr_b32 s1, s1, s6
788 ; GFX11-NEXT: s_or_b32 s2, s3, s2
789 ; GFX11-NEXT: s_or_b32 s0, s0, s1
790 ; GFX11-NEXT: s_and_b32 s1, s2, 0xff
791 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff
792 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8
793 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
794 ; GFX11-NEXT: s_or_b32 s0, s0, s1
795 ; GFX11-NEXT: ; return to shader part epilog
796 %lhs = bitcast i16 %lhs.arg to <2 x i8>
797 %rhs = bitcast i16 %rhs.arg to <2 x i8>
798 %amt = bitcast i16 %amt.arg to <2 x i8>
799 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
800 %cast.result = bitcast <2 x i8> %result to i16
804 define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) {
805 ; GFX6-LABEL: v_fshr_v2i8:
807 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
808 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2
809 ; GFX6-NEXT: v_and_b32_e32 v5, 7, v2
810 ; GFX6-NEXT: v_not_b32_e32 v2, v2
811 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
812 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
813 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
814 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
815 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
816 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v5, v2
817 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
818 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v4
819 ; GFX6-NEXT: v_not_b32_e32 v4, v4
820 ; GFX6-NEXT: v_and_b32_e32 v4, 7, v4
821 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
822 ; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8
823 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
824 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
825 ; GFX6-NEXT: v_or_b32_e32 v1, v3, v1
826 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
827 ; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0
828 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
829 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
830 ; GFX6-NEXT: s_setpc_b64 s[30:31]
832 ; GFX8-LABEL: v_fshr_v2i8:
834 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
835 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2
836 ; GFX8-NEXT: v_and_b32_e32 v6, 7, v2
837 ; GFX8-NEXT: v_not_b32_e32 v2, v2
838 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
839 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
840 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
841 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
842 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
843 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
844 ; GFX8-NEXT: v_not_b32_e32 v2, v5
845 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
846 ; GFX8-NEXT: v_and_b32_e32 v1, 7, v5
847 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
848 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3
849 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v3
850 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
851 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
852 ; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1
853 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
854 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
855 ; GFX8-NEXT: s_setpc_b64 s[30:31]
857 ; GFX9-LABEL: v_fshr_v2i8:
859 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2
861 ; GFX9-NEXT: v_and_b32_e32 v6, 7, v2
862 ; GFX9-NEXT: v_not_b32_e32 v2, v2
863 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
864 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
865 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
866 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
867 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
868 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
869 ; GFX9-NEXT: v_not_b32_e32 v2, v5
870 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
871 ; GFX9-NEXT: v_and_b32_e32 v1, 7, v5
872 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
873 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3
874 ; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v3
875 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
876 ; GFX9-NEXT: v_or_b32_e32 v1, v2, v1
877 ; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1
878 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
879 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
880 ; GFX9-NEXT: s_setpc_b64 s[30:31]
882 ; GFX10-LABEL: v_fshr_v2i8:
884 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
885 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2
886 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0
887 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
888 ; GFX10-NEXT: v_and_b32_e32 v7, 7, v2
889 ; GFX10-NEXT: v_not_b32_e32 v2, v2
890 ; GFX10-NEXT: v_not_b32_e32 v6, v3
891 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
892 ; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4
893 ; GFX10-NEXT: v_and_b32_e32 v5, 0xff, v5
894 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
895 ; GFX10-NEXT: v_and_b32_e32 v6, 7, v6
896 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
897 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
898 ; GFX10-NEXT: v_lshrrev_b16 v3, v3, v5
899 ; GFX10-NEXT: s_movk_i32 s4, 0xff
900 ; GFX10-NEXT: v_lshlrev_b16 v4, v6, v4
901 ; GFX10-NEXT: v_lshrrev_b16 v1, v7, v1
902 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0
903 ; GFX10-NEXT: v_or_b32_e32 v2, v4, v3
904 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
905 ; GFX10-NEXT: v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
906 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
907 ; GFX10-NEXT: s_setpc_b64 s[30:31]
909 ; GFX11-LABEL: v_fshr_v2i8:
911 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
912 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v2
913 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v0
914 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
915 ; GFX11-NEXT: v_and_b32_e32 v7, 7, v2
916 ; GFX11-NEXT: v_not_b32_e32 v2, v2
917 ; GFX11-NEXT: v_not_b32_e32 v6, v3
918 ; GFX11-NEXT: v_and_b32_e32 v3, 7, v3
919 ; GFX11-NEXT: v_lshlrev_b16 v4, 1, v4
920 ; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5
921 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
922 ; GFX11-NEXT: v_and_b32_e32 v6, 7, v6
923 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
924 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
925 ; GFX11-NEXT: v_lshrrev_b16 v3, v3, v5
926 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
927 ; GFX11-NEXT: v_lshlrev_b16 v4, v6, v4
928 ; GFX11-NEXT: v_lshrrev_b16 v1, v7, v1
929 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
930 ; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0
931 ; GFX11-NEXT: v_or_b32_e32 v2, v4, v3
932 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
933 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
934 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2
935 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
936 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
937 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
938 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
939 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
940 ; GFX11-NEXT: s_setpc_b64 s[30:31]
941 %lhs = bitcast i16 %lhs.arg to <2 x i8>
942 %rhs = bitcast i16 %rhs.arg to <2 x i8>
943 %amt = bitcast i16 %amt.arg to <2 x i8>
944 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
945 %cast.result = bitcast <2 x i8> %result to i16
949 define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) {
950 ; GFX6-LABEL: s_fshr_v4i8:
952 ; GFX6-NEXT: s_lshr_b32 s3, s0, 8
953 ; GFX6-NEXT: s_lshr_b32 s4, s0, 16
954 ; GFX6-NEXT: s_lshr_b32 s5, s0, 24
955 ; GFX6-NEXT: s_lshr_b32 s7, s2, 8
956 ; GFX6-NEXT: s_lshr_b32 s8, s2, 16
957 ; GFX6-NEXT: s_lshr_b32 s9, s2, 24
958 ; GFX6-NEXT: s_and_b32 s10, s2, 7
959 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
960 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
961 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
962 ; GFX6-NEXT: s_and_b32 s2, s1, 0xff
963 ; GFX6-NEXT: s_lshr_b32 s2, s2, s10
964 ; GFX6-NEXT: s_or_b32 s0, s0, s2
965 ; GFX6-NEXT: s_and_b32 s2, s7, 7
966 ; GFX6-NEXT: s_andn2_b32 s7, 7, s7
967 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1
968 ; GFX6-NEXT: s_lshl_b32 s3, s3, s7
969 ; GFX6-NEXT: s_bfe_u32 s7, s1, 0x80008
970 ; GFX6-NEXT: s_lshr_b32 s2, s7, s2
971 ; GFX6-NEXT: s_lshr_b32 s6, s1, 24
972 ; GFX6-NEXT: s_or_b32 s2, s3, s2
973 ; GFX6-NEXT: s_and_b32 s3, s8, 7
974 ; GFX6-NEXT: s_andn2_b32 s7, 7, s8
975 ; GFX6-NEXT: s_lshl_b32 s4, s4, 1
976 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80010
977 ; GFX6-NEXT: s_lshl_b32 s4, s4, s7
978 ; GFX6-NEXT: s_lshr_b32 s1, s1, s3
979 ; GFX6-NEXT: s_or_b32 s1, s4, s1
980 ; GFX6-NEXT: s_and_b32 s3, s9, 7
981 ; GFX6-NEXT: s_andn2_b32 s4, 7, s9
982 ; GFX6-NEXT: s_lshl_b32 s5, s5, 1
983 ; GFX6-NEXT: s_and_b32 s2, s2, 0xff
984 ; GFX6-NEXT: s_lshl_b32 s4, s5, s4
985 ; GFX6-NEXT: s_lshr_b32 s3, s6, s3
986 ; GFX6-NEXT: s_and_b32 s0, s0, 0xff
987 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8
988 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
989 ; GFX6-NEXT: s_or_b32 s3, s4, s3
990 ; GFX6-NEXT: s_or_b32 s0, s0, s2
991 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
992 ; GFX6-NEXT: s_or_b32 s0, s0, s1
993 ; GFX6-NEXT: s_and_b32 s1, s3, 0xff
994 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24
995 ; GFX6-NEXT: s_or_b32 s0, s0, s1
996 ; GFX6-NEXT: ; return to shader part epilog
998 ; GFX8-LABEL: s_fshr_v4i8:
1000 ; GFX8-NEXT: s_lshr_b32 s3, s0, 8
1001 ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1002 ; GFX8-NEXT: s_lshr_b32 s5, s0, 24
1003 ; GFX8-NEXT: s_lshr_b32 s6, s1, 8
1004 ; GFX8-NEXT: s_lshr_b32 s7, s1, 16
1005 ; GFX8-NEXT: s_lshr_b32 s8, s1, 24
1006 ; GFX8-NEXT: s_lshr_b32 s9, s2, 8
1007 ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1008 ; GFX8-NEXT: s_lshr_b32 s11, s2, 24
1009 ; GFX8-NEXT: s_and_b32 s12, s2, 7
1010 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
1011 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
1012 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
1013 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
1014 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1015 ; GFX8-NEXT: s_andn2_b32 s2, 7, s9
1016 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
1017 ; GFX8-NEXT: s_lshr_b32 s1, s1, s12
1018 ; GFX8-NEXT: s_lshl_b32 s2, s3, s2
1019 ; GFX8-NEXT: s_and_b32 s3, s6, 0xff
1020 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1021 ; GFX8-NEXT: s_and_b32 s1, s9, 7
1022 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1023 ; GFX8-NEXT: s_lshr_b32 s1, s3, s1
1024 ; GFX8-NEXT: s_andn2_b32 s3, 7, s10
1025 ; GFX8-NEXT: s_lshl_b32 s4, s4, 1
1026 ; GFX8-NEXT: s_lshl_b32 s3, s4, s3
1027 ; GFX8-NEXT: s_and_b32 s4, s7, 0xff
1028 ; GFX8-NEXT: s_or_b32 s1, s2, s1
1029 ; GFX8-NEXT: s_and_b32 s2, s10, 7
1030 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1031 ; GFX8-NEXT: s_lshr_b32 s2, s4, s2
1032 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
1033 ; GFX8-NEXT: s_or_b32 s2, s3, s2
1034 ; GFX8-NEXT: s_and_b32 s3, s11, 7
1035 ; GFX8-NEXT: s_andn2_b32 s4, 7, s11
1036 ; GFX8-NEXT: s_lshl_b32 s5, s5, 1
1037 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
1038 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8
1039 ; GFX8-NEXT: s_lshl_b32 s4, s5, s4
1040 ; GFX8-NEXT: s_lshr_b32 s3, s8, s3
1041 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1042 ; GFX8-NEXT: s_and_b32 s1, s2, 0xff
1043 ; GFX8-NEXT: s_or_b32 s3, s4, s3
1044 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1045 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1046 ; GFX8-NEXT: s_and_b32 s1, s3, 0xff
1047 ; GFX8-NEXT: s_lshl_b32 s1, s1, 24
1048 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1049 ; GFX8-NEXT: ; return to shader part epilog
1051 ; GFX9-LABEL: s_fshr_v4i8:
1053 ; GFX9-NEXT: s_lshr_b32 s3, s0, 8
1054 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
1055 ; GFX9-NEXT: s_lshr_b32 s5, s0, 24
1056 ; GFX9-NEXT: s_lshr_b32 s6, s1, 8
1057 ; GFX9-NEXT: s_lshr_b32 s7, s1, 16
1058 ; GFX9-NEXT: s_lshr_b32 s8, s1, 24
1059 ; GFX9-NEXT: s_lshr_b32 s9, s2, 8
1060 ; GFX9-NEXT: s_lshr_b32 s10, s2, 16
1061 ; GFX9-NEXT: s_lshr_b32 s11, s2, 24
1062 ; GFX9-NEXT: s_and_b32 s12, s2, 7
1063 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
1064 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
1065 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
1066 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
1067 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
1068 ; GFX9-NEXT: s_andn2_b32 s2, 7, s9
1069 ; GFX9-NEXT: s_lshl_b32 s3, s3, 1
1070 ; GFX9-NEXT: s_lshr_b32 s1, s1, s12
1071 ; GFX9-NEXT: s_lshl_b32 s2, s3, s2
1072 ; GFX9-NEXT: s_and_b32 s3, s6, 0xff
1073 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1074 ; GFX9-NEXT: s_and_b32 s1, s9, 7
1075 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
1076 ; GFX9-NEXT: s_lshr_b32 s1, s3, s1
1077 ; GFX9-NEXT: s_andn2_b32 s3, 7, s10
1078 ; GFX9-NEXT: s_lshl_b32 s4, s4, 1
1079 ; GFX9-NEXT: s_lshl_b32 s3, s4, s3
1080 ; GFX9-NEXT: s_and_b32 s4, s7, 0xff
1081 ; GFX9-NEXT: s_or_b32 s1, s2, s1
1082 ; GFX9-NEXT: s_and_b32 s2, s10, 7
1083 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4
1084 ; GFX9-NEXT: s_lshr_b32 s2, s4, s2
1085 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
1086 ; GFX9-NEXT: s_or_b32 s2, s3, s2
1087 ; GFX9-NEXT: s_and_b32 s3, s11, 7
1088 ; GFX9-NEXT: s_andn2_b32 s4, 7, s11
1089 ; GFX9-NEXT: s_lshl_b32 s5, s5, 1
1090 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
1091 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8
1092 ; GFX9-NEXT: s_lshl_b32 s4, s5, s4
1093 ; GFX9-NEXT: s_lshr_b32 s3, s8, s3
1094 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1095 ; GFX9-NEXT: s_and_b32 s1, s2, 0xff
1096 ; GFX9-NEXT: s_or_b32 s3, s4, s3
1097 ; GFX9-NEXT: s_lshl_b32 s1, s1, 16
1098 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1099 ; GFX9-NEXT: s_and_b32 s1, s3, 0xff
1100 ; GFX9-NEXT: s_lshl_b32 s1, s1, 24
1101 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1102 ; GFX9-NEXT: ; return to shader part epilog
1104 ; GFX10-LABEL: s_fshr_v4i8:
1106 ; GFX10-NEXT: s_lshr_b32 s6, s1, 8
1107 ; GFX10-NEXT: s_lshr_b32 s3, s0, 8
1108 ; GFX10-NEXT: s_lshr_b32 s4, s0, 16
1109 ; GFX10-NEXT: s_lshr_b32 s5, s0, 24
1110 ; GFX10-NEXT: s_lshr_b32 s7, s1, 16
1111 ; GFX10-NEXT: s_lshr_b32 s8, s1, 24
1112 ; GFX10-NEXT: s_lshr_b32 s9, s2, 8
1113 ; GFX10-NEXT: s_lshr_b32 s10, s2, 16
1114 ; GFX10-NEXT: s_lshr_b32 s11, s2, 24
1115 ; GFX10-NEXT: s_and_b32 s12, s2, 7
1116 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
1117 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
1118 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
1119 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff
1120 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
1121 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
1122 ; GFX10-NEXT: s_and_b32 s2, s9, 7
1123 ; GFX10-NEXT: s_andn2_b32 s9, 7, s9
1124 ; GFX10-NEXT: s_lshl_b32 s3, s3, 1
1125 ; GFX10-NEXT: s_and_b32 s6, 0xffff, s6
1126 ; GFX10-NEXT: s_lshr_b32 s1, s1, s12
1127 ; GFX10-NEXT: s_lshl_b32 s3, s3, s9
1128 ; GFX10-NEXT: s_lshr_b32 s2, s6, s2
1129 ; GFX10-NEXT: s_and_b32 s6, s7, 0xff
1130 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1131 ; GFX10-NEXT: s_or_b32 s1, s3, s2
1132 ; GFX10-NEXT: s_and_b32 s2, s10, 7
1133 ; GFX10-NEXT: s_andn2_b32 s3, 7, s10
1134 ; GFX10-NEXT: s_lshl_b32 s4, s4, 1
1135 ; GFX10-NEXT: s_and_b32 s6, 0xffff, s6
1136 ; GFX10-NEXT: s_lshl_b32 s3, s4, s3
1137 ; GFX10-NEXT: s_lshr_b32 s2, s6, s2
1138 ; GFX10-NEXT: s_andn2_b32 s4, 7, s11
1139 ; GFX10-NEXT: s_lshl_b32 s5, s5, 1
1140 ; GFX10-NEXT: s_and_b32 s6, s11, 7
1141 ; GFX10-NEXT: s_lshl_b32 s4, s5, s4
1142 ; GFX10-NEXT: s_lshr_b32 s5, s8, s6
1143 ; GFX10-NEXT: s_or_b32 s2, s3, s2
1144 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
1145 ; GFX10-NEXT: s_or_b32 s3, s4, s5
1146 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff
1147 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8
1148 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff
1149 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1150 ; GFX10-NEXT: s_lshl_b32 s1, s2, 16
1151 ; GFX10-NEXT: s_and_b32 s2, s3, 0xff
1152 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1153 ; GFX10-NEXT: s_lshl_b32 s1, s2, 24
1154 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1155 ; GFX10-NEXT: ; return to shader part epilog
1157 ; GFX11-LABEL: s_fshr_v4i8:
1159 ; GFX11-NEXT: s_lshr_b32 s6, s1, 8
1160 ; GFX11-NEXT: s_lshr_b32 s3, s0, 8
1161 ; GFX11-NEXT: s_lshr_b32 s4, s0, 16
1162 ; GFX11-NEXT: s_lshr_b32 s5, s0, 24
1163 ; GFX11-NEXT: s_lshr_b32 s7, s1, 16
1164 ; GFX11-NEXT: s_lshr_b32 s8, s1, 24
1165 ; GFX11-NEXT: s_lshr_b32 s9, s2, 8
1166 ; GFX11-NEXT: s_lshr_b32 s10, s2, 16
1167 ; GFX11-NEXT: s_lshr_b32 s11, s2, 24
1168 ; GFX11-NEXT: s_and_b32 s12, s2, 7
1169 ; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
1170 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
1171 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
1172 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff
1173 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
1174 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
1175 ; GFX11-NEXT: s_and_b32 s2, s9, 7
1176 ; GFX11-NEXT: s_and_not1_b32 s9, 7, s9
1177 ; GFX11-NEXT: s_lshl_b32 s3, s3, 1
1178 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
1179 ; GFX11-NEXT: s_lshr_b32 s1, s1, s12
1180 ; GFX11-NEXT: s_lshl_b32 s3, s3, s9
1181 ; GFX11-NEXT: s_lshr_b32 s2, s6, s2
1182 ; GFX11-NEXT: s_and_b32 s6, s7, 0xff
1183 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1184 ; GFX11-NEXT: s_or_b32 s1, s3, s2
1185 ; GFX11-NEXT: s_and_b32 s2, s10, 7
1186 ; GFX11-NEXT: s_and_not1_b32 s3, 7, s10
1187 ; GFX11-NEXT: s_lshl_b32 s4, s4, 1
1188 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
1189 ; GFX11-NEXT: s_lshl_b32 s3, s4, s3
1190 ; GFX11-NEXT: s_lshr_b32 s2, s6, s2
1191 ; GFX11-NEXT: s_and_not1_b32 s4, 7, s11
1192 ; GFX11-NEXT: s_lshl_b32 s5, s5, 1
1193 ; GFX11-NEXT: s_and_b32 s6, s11, 7
1194 ; GFX11-NEXT: s_lshl_b32 s4, s5, s4
1195 ; GFX11-NEXT: s_lshr_b32 s5, s8, s6
1196 ; GFX11-NEXT: s_or_b32 s2, s3, s2
1197 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
1198 ; GFX11-NEXT: s_or_b32 s3, s4, s5
1199 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff
1200 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8
1201 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff
1202 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1203 ; GFX11-NEXT: s_lshl_b32 s1, s2, 16
1204 ; GFX11-NEXT: s_and_b32 s2, s3, 0xff
1205 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1206 ; GFX11-NEXT: s_lshl_b32 s1, s2, 24
1207 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1208 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1209 ; GFX11-NEXT: ; return to shader part epilog
1210 %lhs = bitcast i32 %lhs.arg to <4 x i8>
1211 %rhs = bitcast i32 %rhs.arg to <4 x i8>
1212 %amt = bitcast i32 %amt.arg to <4 x i8>
1213 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1214 %cast.result = bitcast <4 x i8> %result to i32
1215 ret i32 %cast.result
1218 define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) {
1219 ; GFX6-LABEL: v_fshr_v4i8:
1221 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 8, v2
1223 ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v2
1224 ; GFX6-NEXT: v_lshrrev_b32_e32 v9, 24, v2
1225 ; GFX6-NEXT: v_and_b32_e32 v10, 7, v2
1226 ; GFX6-NEXT: v_not_b32_e32 v2, v2
1227 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1228 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1229 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0
1230 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
1231 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1232 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
1233 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
1234 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v10, v2
1235 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1236 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v7
1237 ; GFX6-NEXT: v_not_b32_e32 v7, v7
1238 ; GFX6-NEXT: v_and_b32_e32 v7, 7, v7
1239 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
1240 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v7, v3
1241 ; GFX6-NEXT: v_bfe_u32 v7, v1, 8, 8
1242 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v7
1243 ; GFX6-NEXT: v_not_b32_e32 v7, v8
1244 ; GFX6-NEXT: v_lshrrev_b32_e32 v6, 24, v1
1245 ; GFX6-NEXT: v_or_b32_e32 v2, v3, v2
1246 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v8
1247 ; GFX6-NEXT: v_and_b32_e32 v7, 7, v7
1248 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
1249 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
1250 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4
1251 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1
1252 ; GFX6-NEXT: v_or_b32_e32 v1, v4, v1
1253 ; GFX6-NEXT: v_not_b32_e32 v4, v9
1254 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v9
1255 ; GFX6-NEXT: v_and_b32_e32 v4, 7, v4
1256 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5
1257 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v2
1258 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, v4, v5
1259 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v3, v6
1260 ; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0
1261 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
1262 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
1263 ; GFX6-NEXT: v_or_b32_e32 v3, v4, v3
1264 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1265 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1266 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1267 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
1268 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
1269 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1270 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1272 ; GFX8-LABEL: v_fshr_v4i8:
1274 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2
1276 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1277 ; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2
1278 ; GFX8-NEXT: v_and_b32_e32 v8, 7, v2
1279 ; GFX8-NEXT: v_not_b32_e32 v2, v2
1280 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
1281 ; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v0
1282 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v9
1283 ; GFX8-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1284 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1285 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v8
1286 ; GFX8-NEXT: v_and_b32_e32 v8, 7, v5
1287 ; GFX8-NEXT: v_not_b32_e32 v5, v5
1288 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
1289 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
1290 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3
1291 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, v5, v3
1292 ; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1293 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4
1294 ; GFX8-NEXT: v_and_b32_e32 v4, 7, v6
1295 ; GFX8-NEXT: v_not_b32_e32 v5, v6
1296 ; GFX8-NEXT: v_mov_b32_e32 v6, 1
1297 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
1298 ; GFX8-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1299 ; GFX8-NEXT: v_lshlrev_b16_e32 v5, v5, v8
1300 ; GFX8-NEXT: v_mov_b32_e32 v8, 0xff
1301 ; GFX8-NEXT: v_and_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1302 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v8
1303 ; GFX8-NEXT: v_or_b32_e32 v4, v5, v4
1304 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v7
1305 ; GFX8-NEXT: v_not_b32_e32 v7, v7
1306 ; GFX8-NEXT: v_and_b32_e32 v7, 7, v7
1307 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1308 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0
1309 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1310 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1311 ; GFX8-NEXT: v_mov_b32_e32 v1, 8
1312 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1313 ; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1314 ; GFX8-NEXT: v_and_b32_e32 v2, 0xff, v4
1315 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1316 ; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v0
1317 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
1318 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1319 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
1320 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1322 ; GFX9-LABEL: v_fshr_v4i8:
1324 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1325 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2
1326 ; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1327 ; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2
1328 ; GFX9-NEXT: v_and_b32_e32 v8, 7, v2
1329 ; GFX9-NEXT: v_not_b32_e32 v2, v2
1330 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
1331 ; GFX9-NEXT: v_lshlrev_b16_e32 v9, 1, v0
1332 ; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v9
1333 ; GFX9-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1334 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1335 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v8
1336 ; GFX9-NEXT: v_and_b32_e32 v8, 7, v5
1337 ; GFX9-NEXT: v_not_b32_e32 v5, v5
1338 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
1339 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
1340 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3
1341 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, v5, v3
1342 ; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1343 ; GFX9-NEXT: v_or_b32_e32 v3, v3, v4
1344 ; GFX9-NEXT: v_and_b32_e32 v4, 7, v6
1345 ; GFX9-NEXT: v_not_b32_e32 v5, v6
1346 ; GFX9-NEXT: v_mov_b32_e32 v6, 1
1347 ; GFX9-NEXT: s_movk_i32 s4, 0xff
1348 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
1349 ; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1350 ; GFX9-NEXT: v_lshlrev_b16_e32 v5, v5, v8
1351 ; GFX9-NEXT: v_and_b32_sdwa v8, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1352 ; GFX9-NEXT: v_lshrrev_b16_e32 v4, v4, v8
1353 ; GFX9-NEXT: v_or_b32_e32 v4, v5, v4
1354 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v7
1355 ; GFX9-NEXT: v_not_b32_e32 v7, v7
1356 ; GFX9-NEXT: v_and_b32_e32 v7, 7, v7
1357 ; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1358 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v7, v0
1359 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1360 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
1361 ; GFX9-NEXT: v_mov_b32_e32 v1, 8
1362 ; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1363 ; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1
1364 ; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v4
1365 ; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0
1366 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1367 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1368 ; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0
1369 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1371 ; GFX10-LABEL: v_fshr_v4i8:
1373 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1374 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v2
1375 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1376 ; GFX10-NEXT: v_not_b32_e32 v8, v2
1377 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2
1378 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v2
1379 ; GFX10-NEXT: v_not_b32_e32 v12, v7
1380 ; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3
1381 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1382 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0
1383 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1
1384 ; GFX10-NEXT: v_and_b32_e32 v12, 7, v12
1385 ; GFX10-NEXT: v_and_b32_e32 v8, 7, v8
1386 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
1387 ; GFX10-NEXT: v_not_b32_e32 v13, v10
1388 ; GFX10-NEXT: s_movk_i32 s4, 0xff
1389 ; GFX10-NEXT: v_lshlrev_b16 v3, v12, v3
1390 ; GFX10-NEXT: v_not_b32_e32 v12, v11
1391 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1
1392 ; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0
1393 ; GFX10-NEXT: v_and_b32_e32 v8, 0xff, v1
1394 ; GFX10-NEXT: v_and_b32_e32 v7, 7, v7
1395 ; GFX10-NEXT: v_and_b32_e32 v6, 0xff, v6
1396 ; GFX10-NEXT: v_and_b32_e32 v10, 7, v10
1397 ; GFX10-NEXT: v_and_b32_e32 v13, 7, v13
1398 ; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4
1399 ; GFX10-NEXT: v_and_b32_sdwa v1, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1400 ; GFX10-NEXT: v_and_b32_e32 v12, 7, v12
1401 ; GFX10-NEXT: v_lshlrev_b16 v5, 1, v5
1402 ; GFX10-NEXT: v_and_b32_e32 v11, 7, v11
1403 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
1404 ; GFX10-NEXT: v_lshrrev_b16 v6, v7, v6
1405 ; GFX10-NEXT: v_lshlrev_b16 v4, v13, v4
1406 ; GFX10-NEXT: v_lshrrev_b16 v1, v10, v1
1407 ; GFX10-NEXT: v_lshlrev_b16 v5, v12, v5
1408 ; GFX10-NEXT: v_lshrrev_b16 v7, v11, v9
1409 ; GFX10-NEXT: v_lshrrev_b16 v2, v2, v8
1410 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v6
1411 ; GFX10-NEXT: v_mov_b32_e32 v6, 8
1412 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1
1413 ; GFX10-NEXT: v_or_b32_e32 v4, v5, v7
1414 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
1415 ; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1416 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
1417 ; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v4
1418 ; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v2
1419 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1420 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3
1421 ; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2
1422 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1424 ; GFX11-LABEL: v_fshr_v4i8:
1426 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1427 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 8, v1
1428 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 8, v2
1429 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1430 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v2
1431 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 24, v2
1432 ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
1433 ; GFX11-NEXT: v_not_b32_e32 v12, v7
1434 ; GFX11-NEXT: v_and_b32_e32 v7, 7, v7
1435 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1436 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v0
1437 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v1
1438 ; GFX11-NEXT: v_and_b32_e32 v12, 7, v12
1439 ; GFX11-NEXT: v_lshlrev_b16 v3, 1, v3
1440 ; GFX11-NEXT: v_not_b32_e32 v14, v11
1441 ; GFX11-NEXT: v_lshrrev_b16 v6, v7, v6
1442 ; GFX11-NEXT: v_not_b32_e32 v7, v13
1443 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 24, v1
1444 ; GFX11-NEXT: v_not_b32_e32 v10, v2
1445 ; GFX11-NEXT: v_lshlrev_b16 v3, v12, v3
1446 ; GFX11-NEXT: v_and_b32_e32 v11, 7, v11
1447 ; GFX11-NEXT: v_and_b32_e32 v12, 7, v14
1448 ; GFX11-NEXT: v_lshlrev_b16 v4, 1, v4
1449 ; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8
1450 ; GFX11-NEXT: v_and_b32_e32 v7, 7, v7
1451 ; GFX11-NEXT: v_lshlrev_b16 v5, 1, v5
1452 ; GFX11-NEXT: v_and_b32_e32 v13, 7, v13
1453 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
1454 ; GFX11-NEXT: v_and_b32_e32 v10, 7, v10
1455 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
1456 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
1457 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v6
1458 ; GFX11-NEXT: v_lshlrev_b16 v4, v12, v4
1459 ; GFX11-NEXT: v_lshrrev_b16 v6, v11, v8
1460 ; GFX11-NEXT: v_lshlrev_b16 v5, v7, v5
1461 ; GFX11-NEXT: v_lshrrev_b16 v7, v13, v9
1462 ; GFX11-NEXT: v_lshlrev_b16 v0, v10, v0
1463 ; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1
1464 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3
1465 ; GFX11-NEXT: v_or_b32_e32 v3, v4, v6
1466 ; GFX11-NEXT: v_or_b32_e32 v4, v5, v7
1467 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1468 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1469 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v2
1470 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1471 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3
1472 ; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4
1473 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1474 ; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v1
1475 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v2
1476 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1477 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v3
1478 ; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2
1479 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1480 %lhs = bitcast i32 %lhs.arg to <4 x i8>
1481 %rhs = bitcast i32 %rhs.arg to <4 x i8>
1482 %amt = bitcast i32 %amt.arg to <4 x i8>
1483 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1484 %cast.result = bitcast <4 x i8> %result to i32
1485 ret i32 %cast.result
1488 define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) {
1489 ; GFX6-LABEL: s_fshr_i24:
1491 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1492 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
1493 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1494 ; GFX6-NEXT: s_and_b32 s2, s2, 0xffffff
1495 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
1496 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1497 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
1498 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffffff
1499 ; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
1500 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
1501 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1502 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
1503 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
1504 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
1505 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
1506 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1507 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1508 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
1509 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1510 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1511 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0
1512 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1513 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1514 ; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
1515 ; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0
1516 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
1517 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
1518 ; GFX6-NEXT: ; return to shader part epilog
1520 ; GFX8-LABEL: s_fshr_i24:
1522 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1523 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
1524 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1525 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff
1526 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
1527 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1528 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
1529 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffffff
1530 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
1531 ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
1532 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
1533 ; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
1534 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
1535 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
1536 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
1537 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1538 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1539 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
1540 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1541 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1542 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
1543 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1544 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1545 ; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
1546 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s1
1547 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
1548 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1549 ; GFX8-NEXT: ; return to shader part epilog
1551 ; GFX9-LABEL: s_fshr_i24:
1553 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1554 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
1555 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1556 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffffff
1557 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffffff
1558 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1559 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
1560 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
1561 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
1562 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
1563 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
1564 ; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
1565 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
1566 ; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
1567 ; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
1568 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1569 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1570 ; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
1571 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1572 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1573 ; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0
1574 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1575 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1576 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s1
1577 ; GFX9-NEXT: v_lshl_or_b32 v0, s0, v1, v0
1578 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1579 ; GFX9-NEXT: ; return to shader part epilog
1581 ; GFX10-LABEL: s_fshr_i24:
1583 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1584 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffffff
1585 ; GFX10-NEXT: s_and_b32 s1, s1, 0xffffff
1586 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
1587 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
1588 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1589 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
1590 ; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0
1591 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
1592 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
1593 ; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
1594 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
1595 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
1596 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1597 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1598 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1599 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1600 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1601 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1602 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0
1603 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1604 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1605 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s1
1606 ; GFX10-NEXT: v_lshl_or_b32 v0, s0, v1, v0
1607 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1608 ; GFX10-NEXT: ; return to shader part epilog
1610 ; GFX11-LABEL: s_fshr_i24:
1612 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1613 ; GFX11-NEXT: s_and_b32 s2, s2, 0xffffff
1614 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffffff
1615 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
1616 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1617 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
1618 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
1619 ; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1620 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
1621 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1622 ; GFX11-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0
1623 ; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1
1624 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1625 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
1626 ; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0
1627 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1628 ; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
1629 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
1630 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1631 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1632 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1633 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1634 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1635 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1636 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1637 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1638 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1639 ; GFX11-NEXT: v_sub_nc_u32_e32 v1, 23, v0
1640 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1641 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1642 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1643 ; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s1
1644 ; GFX11-NEXT: v_lshl_or_b32 v0, s0, v1, v0
1645 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1646 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1647 ; GFX11-NEXT: ; return to shader part epilog
1648 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1652 define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
1653 ; GFX6-LABEL: v_fshr_i24:
1655 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1656 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1657 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3
1658 ; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffffe8
1659 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1660 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1661 ; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1662 ; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
1663 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1664 ; GFX6-NEXT: v_mul_lo_u32 v4, v3, v4
1665 ; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
1666 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
1667 ; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
1668 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
1669 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
1670 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
1671 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1672 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1673 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
1674 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1675 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1676 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
1677 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1678 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1679 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
1680 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1681 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1682 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1684 ; GFX8-LABEL: v_fshr_i24:
1686 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1687 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1688 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3
1689 ; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffffe8
1690 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1691 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1692 ; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1693 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
1694 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1695 ; GFX8-NEXT: v_mul_lo_u32 v4, v3, v4
1696 ; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
1697 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
1698 ; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
1699 ; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24
1700 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
1701 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
1702 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1703 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1704 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
1705 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1706 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1707 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
1708 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1709 ; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1710 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v3, v0
1711 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1712 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1713 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1715 ; GFX9-LABEL: v_fshr_i24:
1717 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1718 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1719 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
1720 ; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffffe8
1721 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1722 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1723 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1724 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3
1725 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1726 ; GFX9-NEXT: v_mul_lo_u32 v4, v3, v4
1727 ; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4
1728 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4
1729 ; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
1730 ; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
1731 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
1732 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
1733 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1734 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1735 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
1736 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1737 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1738 ; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2
1739 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1740 ; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1741 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1742 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v3, v1
1743 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1745 ; GFX10-LABEL: v_fshr_i24:
1747 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1748 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1749 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1750 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1751 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1752 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3
1753 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1754 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3
1755 ; GFX10-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3
1756 ; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4
1757 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4
1758 ; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
1759 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
1760 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
1761 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1762 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1763 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1764 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1765 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1766 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1767 ; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2
1768 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1769 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1770 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1771 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, v3, v1
1772 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1774 ; GFX11-LABEL: v_fshr_i24:
1776 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1777 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1778 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1779 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1780 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1781 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1782 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3
1783 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
1784 ; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1785 ; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
1786 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1787 ; GFX11-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3
1788 ; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4
1789 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1790 ; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4
1791 ; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3
1792 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1793 ; GFX11-NEXT: v_mul_lo_u32 v3, v3, 24
1794 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
1795 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1796 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1797 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1798 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1799 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1800 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1801 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1802 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1803 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1804 ; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v2
1805 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1806 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1807 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1808 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1809 ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v3, v1
1810 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1811 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1815 define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
1816 ; GFX6-LABEL: s_fshr_v2i24:
1818 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1819 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
1820 ; GFX6-NEXT: s_lshr_b32 s6, s0, 16
1821 ; GFX6-NEXT: s_lshr_b32 s7, s0, 24
1822 ; GFX6-NEXT: s_lshr_b32 s8, s1, 8
1823 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1824 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
1825 ; GFX6-NEXT: s_and_b32 s9, s0, 0xff
1826 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008
1827 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1828 ; GFX6-NEXT: s_lshl_b32 s0, s0, 8
1829 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8
1830 ; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1831 ; GFX6-NEXT: s_or_b32 s0, s9, s0
1832 ; GFX6-NEXT: s_or_b32 s1, s7, s1
1833 ; GFX6-NEXT: s_and_b32 s7, s8, 0xff
1834 ; GFX6-NEXT: s_lshr_b32 s8, s2, 16
1835 ; GFX6-NEXT: s_lshr_b32 s9, s2, 24
1836 ; GFX6-NEXT: s_and_b32 s11, s2, 0xff
1837 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008
1838 ; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1
1839 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8
1840 ; GFX6-NEXT: s_and_b32 s8, s8, 0xff
1841 ; GFX6-NEXT: s_or_b32 s2, s11, s2
1842 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
1843 ; GFX6-NEXT: s_lshr_b32 s10, s3, 8
1844 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
1845 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16
1846 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff
1847 ; GFX6-NEXT: s_or_b32 s2, s2, s8
1848 ; GFX6-NEXT: s_lshl_b32 s3, s3, 8
1849 ; GFX6-NEXT: s_and_b32 s8, s10, 0xff
1850 ; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2
1851 ; GFX6-NEXT: s_or_b32 s3, s9, s3
1852 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
1853 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
1854 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16
1855 ; GFX6-NEXT: s_or_b32 s3, s3, s8
1856 ; GFX6-NEXT: s_lshr_b32 s8, s4, 16
1857 ; GFX6-NEXT: s_lshr_b32 s9, s4, 24
1858 ; GFX6-NEXT: s_and_b32 s11, s4, 0xff
1859 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008
1860 ; GFX6-NEXT: s_lshl_b32 s4, s4, 8
1861 ; GFX6-NEXT: s_and_b32 s8, s8, 0xff
1862 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1863 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1864 ; GFX6-NEXT: s_or_b32 s4, s11, s4
1865 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
1866 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
1867 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
1868 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16
1869 ; GFX6-NEXT: s_or_b32 s4, s4, s8
1870 ; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0
1871 ; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1872 ; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
1873 ; GFX6-NEXT: s_lshr_b32 s10, s5, 8
1874 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
1875 ; GFX6-NEXT: s_and_b32 s5, s5, 0xff
1876 ; GFX6-NEXT: v_mul_lo_u32 v1, v2, v1
1877 ; GFX6-NEXT: s_lshl_b32 s5, s5, 8
1878 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0
1879 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0
1880 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1881 ; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1
1882 ; GFX6-NEXT: s_and_b32 s8, s10, 0xff
1883 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1884 ; GFX6-NEXT: s_or_b32 s5, s9, s5
1885 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
1886 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0
1887 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
1888 ; GFX6-NEXT: s_lshl_b32 s8, s8, 16
1889 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1890 ; GFX6-NEXT: s_or_b32 s5, s5, s8
1891 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1892 ; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1
1893 ; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1
1894 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1895 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
1896 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1897 ; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24
1898 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0
1899 ; GFX6-NEXT: s_lshl_b32 s4, s6, 17
1900 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
1901 ; GFX6-NEXT: s_or_b32 s0, s4, s0
1902 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3
1903 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1904 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
1905 ; GFX6-NEXT: v_lshr_b32_e32 v0, s2, v0
1906 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1
1907 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
1908 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1909 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1910 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1911 ; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1912 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1913 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
1914 ; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
1915 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1916 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1
1917 ; GFX6-NEXT: s_lshl_b32 s0, s7, 17
1918 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
1919 ; GFX6-NEXT: s_or_b32 s0, s0, s1
1920 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1921 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1922 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
1923 ; GFX6-NEXT: v_lshr_b32_e32 v1, s3, v1
1924 ; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8
1925 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
1926 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
1927 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3
1928 ; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8
1929 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1930 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1931 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
1932 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
1933 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2
1934 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1935 ; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8
1936 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
1937 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
1938 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
1939 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
1940 ; GFX6-NEXT: v_readfirstlane_b32 s1, v1
1941 ; GFX6-NEXT: ; return to shader part epilog
1943 ; GFX8-LABEL: s_fshr_v2i24:
1945 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1946 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
1947 ; GFX8-NEXT: s_lshr_b32 s9, s1, 8
1948 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
1949 ; GFX8-NEXT: s_lshr_b32 s6, s0, 8
1950 ; GFX8-NEXT: s_lshr_b32 s8, s0, 24
1951 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8
1952 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1953 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff
1954 ; GFX8-NEXT: s_or_b32 s1, s8, s1
1955 ; GFX8-NEXT: s_lshr_b32 s8, s2, 8
1956 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
1957 ; GFX8-NEXT: s_lshr_b32 s7, s0, 16
1958 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
1959 ; GFX8-NEXT: s_lshl_b32 s6, s6, 8
1960 ; GFX8-NEXT: s_and_b32 s8, s8, 0xff
1961 ; GFX8-NEXT: s_or_b32 s0, s0, s6
1962 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff
1963 ; GFX8-NEXT: s_and_b32 s7, s9, 0xff
1964 ; GFX8-NEXT: s_lshr_b32 s9, s2, 16
1965 ; GFX8-NEXT: s_lshr_b32 s10, s2, 24
1966 ; GFX8-NEXT: s_and_b32 s2, s2, 0xff
1967 ; GFX8-NEXT: s_lshl_b32 s8, s8, 8
1968 ; GFX8-NEXT: s_or_b32 s2, s2, s8
1969 ; GFX8-NEXT: s_and_b32 s8, s9, 0xff
1970 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1971 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
1972 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
1973 ; GFX8-NEXT: s_lshr_b32 s11, s3, 8
1974 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1975 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16
1976 ; GFX8-NEXT: s_and_b32 s3, s3, 0xff
1977 ; GFX8-NEXT: s_or_b32 s2, s2, s8
1978 ; GFX8-NEXT: s_lshl_b32 s3, s3, 8
1979 ; GFX8-NEXT: s_and_b32 s8, s11, 0xff
1980 ; GFX8-NEXT: s_or_b32 s3, s10, s3
1981 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
1982 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1983 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16
1984 ; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2
1985 ; GFX8-NEXT: s_or_b32 s3, s3, s8
1986 ; GFX8-NEXT: s_lshr_b32 s8, s4, 8
1987 ; GFX8-NEXT: s_and_b32 s8, s8, 0xff
1988 ; GFX8-NEXT: s_lshr_b32 s9, s4, 16
1989 ; GFX8-NEXT: s_lshr_b32 s10, s4, 24
1990 ; GFX8-NEXT: s_and_b32 s4, s4, 0xff
1991 ; GFX8-NEXT: s_lshl_b32 s8, s8, 8
1992 ; GFX8-NEXT: s_or_b32 s4, s4, s8
1993 ; GFX8-NEXT: s_and_b32 s8, s9, 0xff
1994 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
1995 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1996 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
1997 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2
1998 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1999 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16
2000 ; GFX8-NEXT: s_or_b32 s4, s4, s8
2001 ; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0
2002 ; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
2003 ; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2
2004 ; GFX8-NEXT: s_lshr_b32 s11, s5, 8
2005 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
2006 ; GFX8-NEXT: s_and_b32 s5, s5, 0xff
2007 ; GFX8-NEXT: v_mul_lo_u32 v1, v2, v1
2008 ; GFX8-NEXT: s_lshl_b32 s5, s5, 8
2009 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0
2010 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0
2011 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2012 ; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1
2013 ; GFX8-NEXT: s_and_b32 s8, s11, 0xff
2014 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2015 ; GFX8-NEXT: s_or_b32 s5, s10, s5
2016 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
2017 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0
2018 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
2019 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16
2020 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2021 ; GFX8-NEXT: s_or_b32 s5, s5, s8
2022 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2023 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1
2024 ; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1
2025 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
2026 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
2027 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0
2028 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24
2029 ; GFX8-NEXT: s_lshl_b32 s4, s6, 17
2030 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
2031 ; GFX8-NEXT: s_or_b32 s0, s4, s0
2032 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3
2033 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2034 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
2035 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s2
2036 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1
2037 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
2038 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
2039 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2040 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2041 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
2042 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2043 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
2044 ; GFX8-NEXT: s_and_b32 s7, 0xffff, s7
2045 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2046 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1
2047 ; GFX8-NEXT: s_lshl_b32 s0, s7, 17
2048 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
2049 ; GFX8-NEXT: s_or_b32 s0, s0, s1
2050 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2051 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2052 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
2053 ; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s3
2054 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
2055 ; GFX8-NEXT: v_mov_b32_e32 v2, 8
2056 ; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2057 ; GFX8-NEXT: v_mov_b32_e32 v4, 16
2058 ; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2059 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2060 ; GFX8-NEXT: v_or_b32_e32 v0, v3, v0
2061 ; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v1
2062 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3
2063 ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2064 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
2065 ; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
2066 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2067 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1
2068 ; GFX8-NEXT: ; return to shader part epilog
2070 ; GFX9-LABEL: s_fshr_v2i24:
2072 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
2073 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
2074 ; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
2075 ; GFX9-NEXT: s_lshr_b32 s11, s1, 8
2076 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
2077 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
2078 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
2079 ; GFX9-NEXT: s_lshr_b32 s7, s0, 8
2080 ; GFX9-NEXT: s_lshr_b32 s10, s0, 24
2081 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8
2082 ; GFX9-NEXT: v_mul_lo_u32 v2, v0, v1
2083 ; GFX9-NEXT: s_and_b32 s7, s7, 0xff
2084 ; GFX9-NEXT: s_or_b32 s1, s10, s1
2085 ; GFX9-NEXT: s_lshr_b32 s10, s2, 8
2086 ; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2
2087 ; GFX9-NEXT: s_lshr_b32 s9, s0, 16
2088 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
2089 ; GFX9-NEXT: s_lshl_b32 s7, s7, 8
2090 ; GFX9-NEXT: s_and_b32 s10, s10, 0xff
2091 ; GFX9-NEXT: s_or_b32 s0, s0, s7
2092 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff
2093 ; GFX9-NEXT: s_and_b32 s9, s11, 0xff
2094 ; GFX9-NEXT: s_lshr_b32 s11, s2, 16
2095 ; GFX9-NEXT: s_lshr_b32 s12, s2, 24
2096 ; GFX9-NEXT: s_and_b32 s2, s2, 0xff
2097 ; GFX9-NEXT: s_lshl_b32 s10, s10, 8
2098 ; GFX9-NEXT: s_or_b32 s2, s2, s10
2099 ; GFX9-NEXT: s_and_b32 s10, s11, 0xff
2100 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v2
2101 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
2102 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10
2103 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2
2104 ; GFX9-NEXT: s_lshr_b32 s13, s3, 8
2105 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
2106 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16
2107 ; GFX9-NEXT: s_and_b32 s3, s3, 0xff
2108 ; GFX9-NEXT: s_or_b32 s2, s2, s10
2109 ; GFX9-NEXT: s_lshl_b32 s3, s3, 8
2110 ; GFX9-NEXT: s_and_b32 s10, s13, 0xff
2111 ; GFX9-NEXT: s_or_b32 s3, s12, s3
2112 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10
2113 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
2114 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16
2115 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
2116 ; GFX9-NEXT: s_or_b32 s3, s3, s10
2117 ; GFX9-NEXT: s_lshr_b32 s10, s4, 8
2118 ; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
2119 ; GFX9-NEXT: s_and_b32 s10, s10, 0xff
2120 ; GFX9-NEXT: s_lshr_b32 s11, s4, 16
2121 ; GFX9-NEXT: s_lshr_b32 s12, s4, 24
2122 ; GFX9-NEXT: s_and_b32 s4, s4, 0xff
2123 ; GFX9-NEXT: s_lshl_b32 s10, s10, 8
2124 ; GFX9-NEXT: s_or_b32 s4, s4, s10
2125 ; GFX9-NEXT: s_and_b32 s10, s11, 0xff
2126 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10
2127 ; GFX9-NEXT: v_mul_lo_u32 v1, v2, v1
2128 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4
2129 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16
2130 ; GFX9-NEXT: s_or_b32 s4, s4, s10
2131 ; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0
2132 ; GFX9-NEXT: s_lshr_b32 s13, s5, 8
2133 ; GFX9-NEXT: s_and_b32 s5, s5, 0xff
2134 ; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1
2135 ; GFX9-NEXT: s_lshl_b32 s5, s5, 8
2136 ; GFX9-NEXT: s_and_b32 s10, s13, 0xff
2137 ; GFX9-NEXT: s_or_b32 s5, s12, s5
2138 ; GFX9-NEXT: s_and_b32 s10, 0xffff, s10
2139 ; GFX9-NEXT: s_and_b32 s5, 0xffff, s5
2140 ; GFX9-NEXT: s_lshl_b32 s10, s10, 16
2141 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
2142 ; GFX9-NEXT: s_or_b32 s5, s5, s10
2143 ; GFX9-NEXT: v_add_u32_e32 v1, v2, v1
2144 ; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1
2145 ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0
2146 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0
2147 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2148 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2149 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24
2150 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0
2151 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2152 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
2153 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
2154 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2155 ; GFX9-NEXT: v_sub_u32_e32 v3, 23, v0
2156 ; GFX9-NEXT: s_lshl_b32 s4, s7, 17
2157 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
2158 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2159 ; GFX9-NEXT: s_or_b32 s0, s4, s0
2160 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v3
2161 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s2
2162 ; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1
2163 ; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0
2164 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
2165 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2166 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2167 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
2168 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2169 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
2170 ; GFX9-NEXT: s_and_b32 s9, 0xffff, s9
2171 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2172 ; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1
2173 ; GFX9-NEXT: s_lshl_b32 s0, s9, 17
2174 ; GFX9-NEXT: s_lshl_b32 s1, s1, 1
2175 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2176 ; GFX9-NEXT: s_or_b32 s0, s0, s1
2177 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2178 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s3
2179 ; GFX9-NEXT: s_mov_b32 s6, 8
2180 ; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1
2181 ; GFX9-NEXT: s_mov_b32 s8, 16
2182 ; GFX9-NEXT: s_movk_i32 s0, 0xff
2183 ; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2184 ; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v1
2185 ; GFX9-NEXT: v_and_or_b32 v2, v0, s0, v2
2186 ; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2187 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3
2188 ; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3
2189 ; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8
2190 ; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8
2191 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2
2192 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2193 ; GFX9-NEXT: v_readfirstlane_b32 s1, v1
2194 ; GFX9-NEXT: ; return to shader part epilog
2196 ; GFX10-LABEL: s_fshr_v2i24:
2198 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
2199 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24
2200 ; GFX10-NEXT: s_lshr_b32 s9, s1, 8
2201 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
2202 ; GFX10-NEXT: s_lshr_b32 s6, s0, 8
2203 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
2204 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1
2205 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24
2206 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8
2207 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff
2208 ; GFX10-NEXT: s_or_b32 s1, s8, s1
2209 ; GFX10-NEXT: s_lshr_b32 s8, s4, 8
2210 ; GFX10-NEXT: s_lshr_b32 s7, s0, 16
2211 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff
2212 ; GFX10-NEXT: s_lshl_b32 s6, s6, 8
2213 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
2214 ; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
2215 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff
2216 ; GFX10-NEXT: s_or_b32 s0, s0, s6
2217 ; GFX10-NEXT: s_and_b32 s6, s7, 0xff
2218 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
2219 ; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1
2220 ; GFX10-NEXT: s_and_b32 s7, s9, 0xff
2221 ; GFX10-NEXT: s_lshr_b32 s9, s4, 16
2222 ; GFX10-NEXT: s_lshr_b32 s10, s4, 24
2223 ; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0
2224 ; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1
2225 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff
2226 ; GFX10-NEXT: s_lshl_b32 s8, s8, 8
2227 ; GFX10-NEXT: s_lshr_b32 s11, s5, 8
2228 ; GFX10-NEXT: s_or_b32 s4, s4, s8
2229 ; GFX10-NEXT: s_and_b32 s8, s9, 0xff
2230 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
2231 ; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2
2232 ; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3
2233 ; GFX10-NEXT: s_and_b32 s8, 0xffff, s8
2234 ; GFX10-NEXT: s_and_b32 s5, s5, 0xff
2235 ; GFX10-NEXT: s_lshl_b32 s8, s8, 16
2236 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8
2237 ; GFX10-NEXT: s_or_b32 s4, s4, s8
2238 ; GFX10-NEXT: s_and_b32 s8, s11, 0xff
2239 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2
2240 ; GFX10-NEXT: s_or_b32 s5, s10, s5
2241 ; GFX10-NEXT: s_and_b32 s8, 0xffff, s8
2242 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3
2243 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
2244 ; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0
2245 ; GFX10-NEXT: s_lshl_b32 s8, s8, 16
2246 ; GFX10-NEXT: s_lshr_b32 s9, s2, 8
2247 ; GFX10-NEXT: s_or_b32 s5, s5, s8
2248 ; GFX10-NEXT: s_lshr_b32 s8, s2, 16
2249 ; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1
2250 ; GFX10-NEXT: s_and_b32 s9, s9, 0xff
2251 ; GFX10-NEXT: s_lshr_b32 s10, s2, 24
2252 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
2253 ; GFX10-NEXT: s_lshr_b32 s11, s3, 8
2254 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff
2255 ; GFX10-NEXT: s_lshl_b32 s9, s9, 8
2256 ; GFX10-NEXT: s_and_b32 s8, s8, 0xff
2257 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24
2258 ; GFX10-NEXT: s_and_b32 s3, s3, 0xff
2259 ; GFX10-NEXT: s_or_b32 s2, s2, s9
2260 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0
2261 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s8
2262 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8
2263 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
2264 ; GFX10-NEXT: s_lshl_b32 s4, s4, 16
2265 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
2266 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1
2267 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2268 ; GFX10-NEXT: s_and_b32 s5, s11, 0xff
2269 ; GFX10-NEXT: s_or_b32 s3, s10, s3
2270 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
2271 ; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
2272 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2273 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
2274 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2275 ; GFX10-NEXT: s_lshl_b32 s5, s5, 16
2276 ; GFX10-NEXT: s_or_b32 s2, s2, s4
2277 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0
2278 ; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
2279 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2280 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2281 ; GFX10-NEXT: s_and_b32 s6, 0xffff, s6
2282 ; GFX10-NEXT: s_or_b32 s3, s3, s5
2283 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
2284 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
2285 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo
2286 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2287 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7
2288 ; GFX10-NEXT: s_lshl_b32 s4, s6, 17
2289 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
2290 ; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0
2291 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2292 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2293 ; GFX10-NEXT: s_or_b32 s0, s4, s0
2294 ; GFX10-NEXT: s_lshl_b32 s1, s1, 1
2295 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2296 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1
2297 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2298 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s2
2299 ; GFX10-NEXT: s_lshl_b32 s2, s7, 17
2300 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2301 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s3
2302 ; GFX10-NEXT: v_lshl_or_b32 v0, s0, v3, v0
2303 ; GFX10-NEXT: s_or_b32 s0, s2, s1
2304 ; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1
2305 ; GFX10-NEXT: s_mov_b32 s0, 8
2306 ; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2307 ; GFX10-NEXT: s_mov_b32 s0, 16
2308 ; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v1
2309 ; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8
2310 ; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8
2311 ; GFX10-NEXT: v_and_or_b32 v2, v0, 0xff, v2
2312 ; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2313 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3
2314 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4
2315 ; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3
2316 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1
2317 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2318 ; GFX10-NEXT: ; return to shader part epilog
2320 ; GFX11-LABEL: s_fshr_v2i24:
2322 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
2323 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24
2324 ; GFX11-NEXT: s_lshr_b32 s6, s0, 8
2325 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16
2326 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff
2327 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
2328 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1
2329 ; GFX11-NEXT: s_lshr_b32 s8, s0, 24
2330 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff
2331 ; GFX11-NEXT: s_lshl_b32 s6, s6, 8
2332 ; GFX11-NEXT: s_lshr_b32 s9, s1, 8
2333 ; GFX11-NEXT: s_or_b32 s0, s0, s6
2334 ; GFX11-NEXT: s_and_b32 s6, s7, 0xff
2335 ; GFX11-NEXT: s_and_b32 s7, s9, 0xff
2336 ; GFX11-NEXT: s_lshr_b32 s9, s4, 8
2337 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
2338 ; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1
2339 ; GFX11-NEXT: s_lshr_b32 s10, s4, 16
2340 ; GFX11-NEXT: s_and_b32 s9, s9, 0xff
2341 ; GFX11-NEXT: s_and_b32 s11, s4, 0xff
2342 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2343 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
2344 ; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
2345 ; GFX11-NEXT: s_lshl_b32 s9, s9, 8
2346 ; GFX11-NEXT: s_and_b32 s10, s10, 0xff
2347 ; GFX11-NEXT: s_or_b32 s9, s11, s9
2348 ; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0
2349 ; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1
2350 ; GFX11-NEXT: s_and_b32 s10, 0xffff, s10
2351 ; GFX11-NEXT: s_and_b32 s9, 0xffff, s9
2352 ; GFX11-NEXT: s_lshl_b32 s10, s10, 16
2353 ; GFX11-NEXT: s_lshr_b32 s11, s5, 8
2354 ; GFX11-NEXT: s_or_b32 s9, s9, s10
2355 ; GFX11-NEXT: s_and_b32 s5, s5, 0xff
2356 ; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2
2357 ; GFX11-NEXT: s_lshr_b32 s4, s4, 24
2358 ; GFX11-NEXT: s_lshl_b32 s5, s5, 8
2359 ; GFX11-NEXT: s_and_b32 s10, s11, 0xff
2360 ; GFX11-NEXT: s_or_b32 s4, s4, s5
2361 ; GFX11-NEXT: s_and_b32 s5, 0xffff, s10
2362 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
2363 ; GFX11-NEXT: s_lshl_b32 s5, s5, 16
2364 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2
2365 ; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3
2366 ; GFX11-NEXT: s_or_b32 s4, s4, s5
2367 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
2368 ; GFX11-NEXT: s_lshr_b32 s10, s2, 16
2369 ; GFX11-NEXT: v_mul_hi_u32 v0, s9, v0
2370 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8
2371 ; GFX11-NEXT: s_lshr_b32 s5, s2, 24
2372 ; GFX11-NEXT: s_or_b32 s1, s8, s1
2373 ; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2
2374 ; GFX11-NEXT: s_lshr_b32 s8, s2, 8
2375 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff
2376 ; GFX11-NEXT: s_and_b32 s8, s8, 0xff
2377 ; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
2378 ; GFX11-NEXT: v_mul_hi_u32 v1, s4, v1
2379 ; GFX11-NEXT: s_lshl_b32 s8, s8, 8
2380 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
2381 ; GFX11-NEXT: s_or_b32 s2, s2, s8
2382 ; GFX11-NEXT: s_and_b32 s8, s10, 0xff
2383 ; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
2384 ; GFX11-NEXT: s_and_b32 s8, 0xffff, s8
2385 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, s9, v0
2386 ; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24
2387 ; GFX11-NEXT: s_lshr_b32 s9, s3, 8
2388 ; GFX11-NEXT: s_and_b32 s3, s3, 0xff
2389 ; GFX11-NEXT: s_lshl_b32 s8, s8, 16
2390 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
2391 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2392 ; GFX11-NEXT: s_lshl_b32 s3, s3, 8
2393 ; GFX11-NEXT: s_or_b32 s2, s2, s8
2394 ; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
2395 ; GFX11-NEXT: s_and_b32 s4, s9, 0xff
2396 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2397 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
2398 ; GFX11-NEXT: s_or_b32 s3, s5, s3
2399 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
2400 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
2401 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
2402 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2403 ; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
2404 ; GFX11-NEXT: s_lshl_b32 s4, s4, 16
2405 ; GFX11-NEXT: s_lshl_b32 s5, s6, 17
2406 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
2407 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2408 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2409 ; GFX11-NEXT: s_or_b32 s0, s5, s0
2410 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
2411 ; GFX11-NEXT: s_and_b32 s7, 0xffff, s7
2412 ; GFX11-NEXT: s_lshl_b32 s1, s1, 1
2413 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2414 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2415 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
2416 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2417 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2418 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2)
2419 ; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1
2420 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2421 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v0
2422 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2423 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2424 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
2425 ; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2
2426 ; GFX11-NEXT: s_or_b32 s2, s3, s4
2427 ; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2
2428 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
2429 ; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0
2430 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3
2431 ; GFX11-NEXT: s_lshl_b32 s0, s7, 17
2432 ; GFX11-NEXT: s_or_b32 s0, s0, s1
2433 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2434 ; GFX11-NEXT: v_bfe_u32 v3, v0, 8, 8
2435 ; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1
2436 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
2437 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v3
2438 ; GFX11-NEXT: v_bfe_u32 v3, v0, 16, 8
2439 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v1
2440 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2441 ; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v2
2442 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v3
2443 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2444 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 24, v4
2445 ; GFX11-NEXT: v_bfe_u32 v4, v1, 8, 8
2446 ; GFX11-NEXT: v_bfe_u32 v1, v1, 16, 8
2447 ; GFX11-NEXT: v_or3_b32 v0, v0, v2, v3
2448 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2449 ; GFX11-NEXT: v_lshl_or_b32 v1, v1, 8, v4
2450 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2451 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
2452 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1
2453 ; GFX11-NEXT: ; return to shader part epilog
2454 %lhs = bitcast i48 %lhs.arg to <2 x i24>
2455 %rhs = bitcast i48 %rhs.arg to <2 x i24>
2456 %amt = bitcast i48 %amt.arg to <2 x i24>
2457 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2458 %cast.result = bitcast <2 x i24> %result to i48
2459 ret i48 %cast.result
2462 define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
2463 ; GFX6-LABEL: v_fshr_v2i24:
2465 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2466 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2467 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6
2468 ; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8
2469 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2470 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24
2471 ; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2472 ; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6
2473 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2474 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2475 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2476 ; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7
2477 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
2478 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2479 ; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8
2480 ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8
2481 ; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6
2482 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v9
2483 ; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24
2484 ; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
2485 ; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8
2486 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
2487 ; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4
2488 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2489 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2490 ; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4
2491 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2492 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2493 ; GFX6-NEXT: v_mul_lo_u32 v6, v8, v7
2494 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4
2495 ; GFX6-NEXT: v_and_b32_e32 v7, 0xffffff, v7
2496 ; GFX6-NEXT: v_mul_hi_u32 v6, v8, v6
2497 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2498 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0
2499 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2500 ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v8, v6
2501 ; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6
2502 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
2503 ; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24
2504 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6
2505 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
2506 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2507 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2508 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
2509 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2510 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2511 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
2512 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2513 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2514 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
2515 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v3
2516 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
2517 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2519 ; GFX8-LABEL: v_fshr_v2i24:
2521 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2522 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2523 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6
2524 ; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8
2525 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2526 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24
2527 ; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2528 ; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6
2529 ; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2530 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2531 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2532 ; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7
2533 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1
2534 ; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2535 ; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8
2536 ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8
2537 ; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6
2538 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v8, v9
2539 ; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24
2540 ; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
2541 ; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8
2542 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6
2543 ; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4
2544 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2545 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2546 ; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4
2547 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2548 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2549 ; GFX8-NEXT: v_mul_lo_u32 v6, v8, v7
2550 ; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4
2551 ; GFX8-NEXT: v_and_b32_e32 v7, 0xffffff, v7
2552 ; GFX8-NEXT: v_mul_hi_u32 v6, v8, v6
2553 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2554 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v7, v0
2555 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2556 ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6
2557 ; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6
2558 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
2559 ; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24
2560 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6
2561 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
2562 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2563 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2564 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
2565 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2566 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2567 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2
2568 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2569 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2570 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, v4, v1
2571 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v2, v3
2572 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
2573 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2575 ; GFX9-LABEL: v_fshr_v2i24:
2577 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2578 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2579 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6
2580 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24
2581 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9
2582 ; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8
2583 ; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2584 ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6
2585 ; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9
2586 ; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9
2587 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2588 ; GFX9-NEXT: v_mul_lo_u32 v8, v6, v7
2589 ; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2590 ; GFX9-NEXT: v_mul_lo_u32 v7, v9, v7
2591 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2592 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8
2593 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2594 ; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7
2595 ; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2596 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8
2597 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6
2598 ; GFX9-NEXT: v_add_u32_e32 v7, v9, v7
2599 ; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7
2600 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1
2601 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
2602 ; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24
2603 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6
2604 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
2605 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2606 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2607 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
2608 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2609 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2610 ; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4
2611 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2612 ; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6
2613 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2614 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2
2615 ; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7
2616 ; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
2617 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2618 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2619 ; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
2620 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2621 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2622 ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2
2623 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2624 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2625 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3
2626 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2
2627 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2629 ; GFX10-LABEL: v_fshr_v2i24:
2631 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2632 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2633 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24
2634 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2635 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2636 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2637 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6
2638 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7
2639 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2640 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2641 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v1
2642 ; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2643 ; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
2644 ; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6
2645 ; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7
2646 ; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6
2647 ; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7
2648 ; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8
2649 ; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9
2650 ; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8
2651 ; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9
2652 ; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6
2653 ; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7
2654 ; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
2655 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24
2656 ; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6
2657 ; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7
2658 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2659 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2660 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2661 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2662 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2663 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2664 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2665 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2666 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2667 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2668 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2669 ; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4
2670 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2671 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2672 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffffff, v6
2673 ; GFX10-NEXT: v_sub_nc_u32_e32 v7, 23, v5
2674 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2675 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2676 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v7
2677 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, v5, v3
2678 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, v6, v2
2679 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, v4, v3
2680 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2682 ; GFX11-LABEL: v_fshr_v2i24:
2684 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2685 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2686 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24
2687 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2688 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2689 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2690 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6
2691 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v7, v7
2692 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2693 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
2694 ; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1
2695 ; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
2696 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2697 ; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6
2698 ; GFX11-NEXT: v_cvt_u32_f32_e32 v7, v7
2699 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2700 ; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6
2701 ; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7
2702 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2703 ; GFX11-NEXT: v_mul_hi_u32 v8, v6, v8
2704 ; GFX11-NEXT: v_mul_hi_u32 v9, v7, v9
2705 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2706 ; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v8
2707 ; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v9
2708 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2709 ; GFX11-NEXT: v_mul_hi_u32 v7, v5, v7
2710 ; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24
2711 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2712 ; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v7
2713 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2714 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2715 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2716 ; GFX11-NEXT: v_mul_hi_u32 v6, v4, v6
2717 ; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24
2718 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2719 ; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v6
2720 ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2721 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2722 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2723 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2724 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2725 ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2726 ; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2727 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2728 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2729 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2730 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2731 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2732 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2733 ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4
2734 ; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
2735 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2736 ; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6
2737 ; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5
2738 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2739 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
2740 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2741 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7
2742 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2743 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, v5, v3
2744 ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v6, v2
2745 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
2746 ; GFX11-NEXT: v_lshl_or_b32 v1, v1, v4, v3
2747 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2748 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2749 ret <2 x i24> %result
2752 define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2753 ; GFX6-LABEL: s_fshr_i32:
2755 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2756 ; GFX6-NEXT: v_mov_b32_e32 v1, s2
2757 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2758 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2759 ; GFX6-NEXT: ; return to shader part epilog
2761 ; GFX8-LABEL: s_fshr_i32:
2763 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2764 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
2765 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2766 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2767 ; GFX8-NEXT: ; return to shader part epilog
2769 ; GFX9-LABEL: s_fshr_i32:
2771 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2772 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
2773 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
2774 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2775 ; GFX9-NEXT: ; return to shader part epilog
2777 ; GFX10-LABEL: s_fshr_i32:
2779 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
2780 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0
2781 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2782 ; GFX10-NEXT: ; return to shader part epilog
2784 ; GFX11-LABEL: s_fshr_i32:
2786 ; GFX11-NEXT: v_mov_b32_e32 v0, s2
2787 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2788 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0
2789 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2790 ; GFX11-NEXT: ; return to shader part epilog
2791 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2795 define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) {
2796 ; GFX6-LABEL: s_fshr_i32_5:
2798 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2799 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 5
2800 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2801 ; GFX6-NEXT: ; return to shader part epilog
2803 ; GFX8-LABEL: s_fshr_i32_5:
2805 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2806 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 5
2807 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2808 ; GFX8-NEXT: ; return to shader part epilog
2810 ; GFX9-LABEL: s_fshr_i32_5:
2812 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2813 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 5
2814 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2815 ; GFX9-NEXT: ; return to shader part epilog
2817 ; GFX10-LABEL: s_fshr_i32_5:
2819 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 5
2820 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2821 ; GFX10-NEXT: ; return to shader part epilog
2823 ; GFX11-LABEL: s_fshr_i32_5:
2825 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 5
2826 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2827 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2828 ; GFX11-NEXT: ; return to shader part epilog
2829 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2833 define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) {
2834 ; GFX6-LABEL: s_fshr_i32_8:
2836 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2837 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 8
2838 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2839 ; GFX6-NEXT: ; return to shader part epilog
2841 ; GFX8-LABEL: s_fshr_i32_8:
2843 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2844 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 8
2845 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2846 ; GFX8-NEXT: ; return to shader part epilog
2848 ; GFX9-LABEL: s_fshr_i32_8:
2850 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2851 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 8
2852 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2853 ; GFX9-NEXT: ; return to shader part epilog
2855 ; GFX10-LABEL: s_fshr_i32_8:
2857 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 8
2858 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2859 ; GFX10-NEXT: ; return to shader part epilog
2861 ; GFX11-LABEL: s_fshr_i32_8:
2863 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 8
2864 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2865 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2866 ; GFX11-NEXT: ; return to shader part epilog
2867 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2871 define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) {
2872 ; GCN-LABEL: v_fshr_i32:
2874 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2875 ; GCN-NEXT: v_alignbit_b32 v0, v0, v1, v2
2876 ; GCN-NEXT: s_setpc_b64 s[30:31]
2878 ; GFX11-LABEL: v_fshr_i32:
2880 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2881 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2
2882 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2883 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2887 define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) {
2888 ; GCN-LABEL: v_fshr_i32_5:
2890 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2891 ; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 5
2892 ; GCN-NEXT: s_setpc_b64 s[30:31]
2894 ; GFX11-LABEL: v_fshr_i32_5:
2896 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2897 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 5
2898 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2899 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2903 define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) {
2904 ; GCN-LABEL: v_fshr_i32_8:
2906 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2907 ; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 8
2908 ; GCN-NEXT: s_setpc_b64 s[30:31]
2910 ; GFX11-LABEL: v_fshr_i32_8:
2912 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2913 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 8
2914 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2915 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2919 define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) {
2920 ; GFX6-LABEL: v_fshr_i32_ssv:
2922 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2923 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0
2924 ; GFX6-NEXT: ; return to shader part epilog
2926 ; GFX8-LABEL: v_fshr_i32_ssv:
2928 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2929 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0
2930 ; GFX8-NEXT: ; return to shader part epilog
2932 ; GFX9-LABEL: v_fshr_i32_ssv:
2934 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2935 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0
2936 ; GFX9-NEXT: ; return to shader part epilog
2938 ; GFX10-LABEL: v_fshr_i32_ssv:
2940 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0
2941 ; GFX10-NEXT: ; return to shader part epilog
2943 ; GFX11-LABEL: v_fshr_i32_ssv:
2945 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0
2946 ; GFX11-NEXT: ; return to shader part epilog
2947 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2948 %cast.result = bitcast i32 %result to float
2949 ret float %cast.result
2952 define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) {
2953 ; GFX6-LABEL: v_fshr_i32_svs:
2955 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2956 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2957 ; GFX6-NEXT: ; return to shader part epilog
2959 ; GFX8-LABEL: v_fshr_i32_svs:
2961 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2962 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2963 ; GFX8-NEXT: ; return to shader part epilog
2965 ; GFX9-LABEL: v_fshr_i32_svs:
2967 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2968 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
2969 ; GFX9-NEXT: ; return to shader part epilog
2971 ; GFX10-LABEL: v_fshr_i32_svs:
2973 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1
2974 ; GFX10-NEXT: ; return to shader part epilog
2976 ; GFX11-LABEL: v_fshr_i32_svs:
2978 ; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, s1
2979 ; GFX11-NEXT: ; return to shader part epilog
2980 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2981 %cast.result = bitcast i32 %result to float
2982 ret float %cast.result
2985 define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2986 ; GFX6-LABEL: v_fshr_i32_vss:
2988 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2989 ; GFX6-NEXT: v_mov_b32_e32 v1, s2
2990 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2991 ; GFX6-NEXT: ; return to shader part epilog
2993 ; GFX8-LABEL: v_fshr_i32_vss:
2995 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2996 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
2997 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2998 ; GFX8-NEXT: ; return to shader part epilog
3000 ; GFX9-LABEL: v_fshr_i32_vss:
3002 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
3003 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
3004 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
3005 ; GFX9-NEXT: ; return to shader part epilog
3007 ; GFX10-LABEL: v_fshr_i32_vss:
3009 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
3010 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0
3011 ; GFX10-NEXT: ; return to shader part epilog
3013 ; GFX11-LABEL: v_fshr_i32_vss:
3015 ; GFX11-NEXT: v_mov_b32_e32 v0, s2
3016 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3017 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0
3018 ; GFX11-NEXT: ; return to shader part epilog
3019 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
3020 %cast.result = bitcast i32 %result to float
3021 ret float %cast.result
3024 define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) {
3025 ; GCN-LABEL: v_fshr_v2i32:
3027 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3028 ; GCN-NEXT: v_alignbit_b32 v0, v0, v2, v4
3029 ; GCN-NEXT: v_alignbit_b32 v1, v1, v3, v5
3030 ; GCN-NEXT: s_setpc_b64 s[30:31]
3032 ; GFX11-LABEL: v_fshr_v2i32:
3034 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3035 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4
3036 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5
3037 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3038 %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt)
3039 ret <2 x i32> %result
3042 define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) {
3043 ; GCN-LABEL: v_fshr_v3i32:
3045 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3046 ; GCN-NEXT: v_alignbit_b32 v0, v0, v3, v6
3047 ; GCN-NEXT: v_alignbit_b32 v1, v1, v4, v7
3048 ; GCN-NEXT: v_alignbit_b32 v2, v2, v5, v8
3049 ; GCN-NEXT: s_setpc_b64 s[30:31]
3051 ; GFX11-LABEL: v_fshr_v3i32:
3053 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3054 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6
3055 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7
3056 ; GFX11-NEXT: v_alignbit_b32 v2, v2, v5, v8
3057 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3058 %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt)
3059 ret <3 x i32> %result
3062 define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) {
3063 ; GCN-LABEL: v_fshr_v4i32:
3065 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3066 ; GCN-NEXT: v_alignbit_b32 v0, v0, v4, v8
3067 ; GCN-NEXT: v_alignbit_b32 v1, v1, v5, v9
3068 ; GCN-NEXT: v_alignbit_b32 v2, v2, v6, v10
3069 ; GCN-NEXT: v_alignbit_b32 v3, v3, v7, v11
3070 ; GCN-NEXT: s_setpc_b64 s[30:31]
3072 ; GFX11-LABEL: v_fshr_v4i32:
3074 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3075 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8
3076 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9
3077 ; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10
3078 ; GFX11-NEXT: v_alignbit_b32 v3, v3, v7, v11
3079 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3080 %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt)
3081 ret <4 x i32> %result
3084 define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) {
3085 ; GFX6-LABEL: s_fshr_i16:
3087 ; GFX6-NEXT: s_and_b32 s3, s2, 15
3088 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2
3089 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3090 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
3091 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
3092 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s3
3093 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
3094 ; GFX6-NEXT: s_lshr_b32 s1, s1, s2
3095 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3096 ; GFX6-NEXT: ; return to shader part epilog
3098 ; GFX8-LABEL: s_fshr_i16:
3100 ; GFX8-NEXT: s_and_b32 s3, s2, 15
3101 ; GFX8-NEXT: s_andn2_b32 s2, 15, s2
3102 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
3103 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
3104 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
3105 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3106 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
3107 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
3108 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3109 ; GFX8-NEXT: ; return to shader part epilog
3111 ; GFX9-LABEL: s_fshr_i16:
3113 ; GFX9-NEXT: s_and_b32 s3, s2, 15
3114 ; GFX9-NEXT: s_andn2_b32 s2, 15, s2
3115 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
3116 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
3117 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
3118 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3119 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s3
3120 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
3121 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3122 ; GFX9-NEXT: ; return to shader part epilog
3124 ; GFX10-LABEL: s_fshr_i16:
3126 ; GFX10-NEXT: s_and_b32 s3, s2, 15
3127 ; GFX10-NEXT: s_andn2_b32 s2, 15, s2
3128 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
3129 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
3130 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3131 ; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
3132 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
3133 ; GFX10-NEXT: s_lshr_b32 s1, s1, s3
3134 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3135 ; GFX10-NEXT: ; return to shader part epilog
3137 ; GFX11-LABEL: s_fshr_i16:
3139 ; GFX11-NEXT: s_and_b32 s3, s2, 15
3140 ; GFX11-NEXT: s_and_not1_b32 s2, 15, s2
3141 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
3142 ; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
3143 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3144 ; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
3145 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
3146 ; GFX11-NEXT: s_lshr_b32 s1, s1, s3
3147 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3148 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3149 ; GFX11-NEXT: ; return to shader part epilog
3150 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3154 define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) {
3155 ; GFX6-LABEL: s_fshr_i16_4:
3157 ; GFX6-NEXT: s_lshl_b32 s0, s0, 12
3158 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0xc0004
3159 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3160 ; GFX6-NEXT: ; return to shader part epilog
3162 ; GFX8-LABEL: s_fshr_i16_4:
3164 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3165 ; GFX8-NEXT: s_lshl_b32 s0, s0, 12
3166 ; GFX8-NEXT: s_lshr_b32 s1, s1, 4
3167 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3168 ; GFX8-NEXT: ; return to shader part epilog
3170 ; GFX9-LABEL: s_fshr_i16_4:
3172 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3173 ; GFX9-NEXT: s_lshl_b32 s0, s0, 12
3174 ; GFX9-NEXT: s_lshr_b32 s1, s1, 4
3175 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3176 ; GFX9-NEXT: ; return to shader part epilog
3178 ; GFX10-LABEL: s_fshr_i16_4:
3180 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3181 ; GFX10-NEXT: s_lshl_b32 s0, s0, 12
3182 ; GFX10-NEXT: s_lshr_b32 s1, s1, 4
3183 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3184 ; GFX10-NEXT: ; return to shader part epilog
3186 ; GFX11-LABEL: s_fshr_i16_4:
3188 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3189 ; GFX11-NEXT: s_lshl_b32 s0, s0, 12
3190 ; GFX11-NEXT: s_lshr_b32 s1, s1, 4
3191 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3192 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3193 ; GFX11-NEXT: ; return to shader part epilog
3194 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
3198 define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) {
3199 ; GFX6-LABEL: s_fshr_i16_5:
3201 ; GFX6-NEXT: s_lshl_b32 s0, s0, 11
3202 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0xb0005
3203 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3204 ; GFX6-NEXT: ; return to shader part epilog
3206 ; GFX8-LABEL: s_fshr_i16_5:
3208 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3209 ; GFX8-NEXT: s_lshl_b32 s0, s0, 11
3210 ; GFX8-NEXT: s_lshr_b32 s1, s1, 5
3211 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3212 ; GFX8-NEXT: ; return to shader part epilog
3214 ; GFX9-LABEL: s_fshr_i16_5:
3216 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3217 ; GFX9-NEXT: s_lshl_b32 s0, s0, 11
3218 ; GFX9-NEXT: s_lshr_b32 s1, s1, 5
3219 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3220 ; GFX9-NEXT: ; return to shader part epilog
3222 ; GFX10-LABEL: s_fshr_i16_5:
3224 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3225 ; GFX10-NEXT: s_lshl_b32 s0, s0, 11
3226 ; GFX10-NEXT: s_lshr_b32 s1, s1, 5
3227 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3228 ; GFX10-NEXT: ; return to shader part epilog
3230 ; GFX11-LABEL: s_fshr_i16_5:
3232 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3233 ; GFX11-NEXT: s_lshl_b32 s0, s0, 11
3234 ; GFX11-NEXT: s_lshr_b32 s1, s1, 5
3235 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3236 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3237 ; GFX11-NEXT: ; return to shader part epilog
3238 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
3242 define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) {
3243 ; GFX6-LABEL: v_fshr_i16:
3245 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3246 ; GFX6-NEXT: v_and_b32_e32 v3, 15, v2
3247 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
3248 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v2
3249 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
3250 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3251 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
3252 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
3253 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3254 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
3255 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3256 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3258 ; GFX8-LABEL: v_fshr_i16:
3260 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3261 ; GFX8-NEXT: v_and_b32_e32 v3, 15, v2
3262 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
3263 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
3264 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
3265 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
3266 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1
3267 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3268 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3270 ; GFX9-LABEL: v_fshr_i16:
3272 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3273 ; GFX9-NEXT: v_and_b32_e32 v3, 15, v2
3274 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
3275 ; GFX9-NEXT: v_and_b32_e32 v2, 15, v2
3276 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
3277 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
3278 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v3, v1
3279 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3280 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3282 ; GFX10-LABEL: v_fshr_i16:
3284 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3285 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
3286 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
3287 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v2
3288 ; GFX10-NEXT: v_and_b32_e32 v3, 15, v3
3289 ; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1
3290 ; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0
3291 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3292 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3294 ; GFX11-LABEL: v_fshr_i16:
3296 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3297 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
3298 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
3299 ; GFX11-NEXT: v_and_b32_e32 v2, 15, v2
3300 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3301 ; GFX11-NEXT: v_and_b32_e32 v3, 15, v3
3302 ; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1
3303 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3304 ; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0
3305 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3306 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3307 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3311 define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) {
3312 ; GFX6-LABEL: v_fshr_i16_4:
3314 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3315 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0
3316 ; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 12
3317 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3318 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3320 ; GFX8-LABEL: v_fshr_i16_4:
3322 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3323 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0
3324 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 4, v1
3325 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3326 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3328 ; GFX9-LABEL: v_fshr_i16_4:
3330 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3331 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 12, v0
3332 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 4, v1
3333 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3334 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3336 ; GFX10-LABEL: v_fshr_i16_4:
3338 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3339 ; GFX10-NEXT: v_lshlrev_b16 v0, 12, v0
3340 ; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1
3341 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3342 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3344 ; GFX11-LABEL: v_fshr_i16_4:
3346 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3347 ; GFX11-NEXT: v_lshlrev_b16 v0, 12, v0
3348 ; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1
3349 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3350 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3351 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3352 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
3356 define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) {
3357 ; GFX6-LABEL: v_fshr_i16_5:
3359 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3360 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 11, v0
3361 ; GFX6-NEXT: v_bfe_u32 v1, v1, 5, 11
3362 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3363 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3365 ; GFX8-LABEL: v_fshr_i16_5:
3367 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3368 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 11, v0
3369 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 5, v1
3370 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3371 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3373 ; GFX9-LABEL: v_fshr_i16_5:
3375 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3376 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 11, v0
3377 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 5, v1
3378 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3379 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3381 ; GFX10-LABEL: v_fshr_i16_5:
3383 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3384 ; GFX10-NEXT: v_lshlrev_b16 v0, 11, v0
3385 ; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1
3386 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3387 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3389 ; GFX11-LABEL: v_fshr_i16_5:
3391 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3392 ; GFX11-NEXT: v_lshlrev_b16 v0, 11, v0
3393 ; GFX11-NEXT: v_lshrrev_b16 v1, 5, v1
3394 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3395 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3396 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3397 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
3401 define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) {
3402 ; GFX6-LABEL: v_fshr_i16_ssv:
3404 ; GFX6-NEXT: v_and_b32_e32 v1, 15, v0
3405 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3406 ; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3407 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3408 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3409 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
3410 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3411 ; GFX6-NEXT: s_and_b32 s0, s1, 0xffff
3412 ; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
3413 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3414 ; GFX6-NEXT: ; return to shader part epilog
3416 ; GFX8-LABEL: v_fshr_i16_ssv:
3418 ; GFX8-NEXT: v_and_b32_e32 v1, 15, v0
3419 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
3420 ; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
3421 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
3422 ; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0
3423 ; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1
3424 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3425 ; GFX8-NEXT: ; return to shader part epilog
3427 ; GFX9-LABEL: v_fshr_i16_ssv:
3429 ; GFX9-NEXT: v_and_b32_e32 v1, 15, v0
3430 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
3431 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
3432 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
3433 ; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0
3434 ; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1
3435 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3436 ; GFX9-NEXT: ; return to shader part epilog
3438 ; GFX10-LABEL: v_fshr_i16_ssv:
3440 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
3441 ; GFX10-NEXT: v_and_b32_e32 v0, 15, v0
3442 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
3443 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v1
3444 ; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1
3445 ; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0
3446 ; GFX10-NEXT: v_or_b32_e32 v0, v1, v0
3447 ; GFX10-NEXT: ; return to shader part epilog
3449 ; GFX11-LABEL: v_fshr_i16_ssv:
3451 ; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
3452 ; GFX11-NEXT: v_and_b32_e32 v0, 15, v0
3453 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
3454 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3455 ; GFX11-NEXT: v_and_b32_e32 v1, 15, v1
3456 ; GFX11-NEXT: v_lshrrev_b16 v0, v0, s1
3457 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3458 ; GFX11-NEXT: v_lshlrev_b16 v1, v1, s0
3459 ; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
3460 ; GFX11-NEXT: ; return to shader part epilog
3461 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3462 %cast.result = bitcast i16 %result to half
3463 ret half %cast.result
3466 define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) {
3467 ; GFX6-LABEL: v_fshr_i16_svs:
3469 ; GFX6-NEXT: s_and_b32 s2, s1, 15
3470 ; GFX6-NEXT: s_andn2_b32 s1, 15, s1
3471 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3472 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3473 ; GFX6-NEXT: s_lshl_b32 s0, s0, s1
3474 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
3475 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3476 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0
3477 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
3478 ; GFX6-NEXT: ; return to shader part epilog
3480 ; GFX8-LABEL: v_fshr_i16_svs:
3482 ; GFX8-NEXT: s_and_b32 s2, s1, 15
3483 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
3484 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
3485 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3486 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1
3487 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, s2, v0
3488 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
3489 ; GFX8-NEXT: ; return to shader part epilog
3491 ; GFX9-LABEL: v_fshr_i16_svs:
3493 ; GFX9-NEXT: s_and_b32 s2, s1, 15
3494 ; GFX9-NEXT: s_andn2_b32 s1, 15, s1
3495 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
3496 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3497 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
3498 ; GFX9-NEXT: v_lshrrev_b16_e32 v0, s2, v0
3499 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
3500 ; GFX9-NEXT: ; return to shader part epilog
3502 ; GFX10-LABEL: v_fshr_i16_svs:
3504 ; GFX10-NEXT: s_and_b32 s2, s1, 15
3505 ; GFX10-NEXT: s_andn2_b32 s1, 15, s1
3506 ; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0
3507 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
3508 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3509 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
3510 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
3511 ; GFX10-NEXT: ; return to shader part epilog
3513 ; GFX11-LABEL: v_fshr_i16_svs:
3515 ; GFX11-NEXT: s_and_b32 s2, s1, 15
3516 ; GFX11-NEXT: s_and_not1_b32 s1, 15, s1
3517 ; GFX11-NEXT: v_lshrrev_b16 v0, s2, v0
3518 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
3519 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3520 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3521 ; GFX11-NEXT: s_lshl_b32 s0, s0, s1
3522 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3523 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
3524 ; GFX11-NEXT: ; return to shader part epilog
3525 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3526 %cast.result = bitcast i16 %result to half
3527 ret half %cast.result
3530 define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) {
3531 ; GFX6-LABEL: v_fshr_i16_vss:
3533 ; GFX6-NEXT: s_and_b32 s2, s1, 15
3534 ; GFX6-NEXT: s_andn2_b32 s1, 15, s1
3535 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
3536 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3537 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s1, v0
3538 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
3539 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
3540 ; GFX6-NEXT: s_lshr_b32 s0, s0, s1
3541 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
3542 ; GFX6-NEXT: ; return to shader part epilog
3544 ; GFX8-LABEL: v_fshr_i16_vss:
3546 ; GFX8-NEXT: s_and_b32 s2, s1, 15
3547 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
3548 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
3549 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, s1, v0
3550 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
3551 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
3552 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
3553 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
3554 ; GFX8-NEXT: ; return to shader part epilog
3556 ; GFX9-LABEL: v_fshr_i16_vss:
3558 ; GFX9-NEXT: s_and_b32 s2, s1, 15
3559 ; GFX9-NEXT: s_andn2_b32 s1, 15, s1
3560 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
3561 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, s1, v0
3562 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
3563 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s2
3564 ; GFX9-NEXT: s_lshr_b32 s0, s0, s1
3565 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
3566 ; GFX9-NEXT: ; return to shader part epilog
3568 ; GFX10-LABEL: v_fshr_i16_vss:
3570 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
3571 ; GFX10-NEXT: s_andn2_b32 s2, 15, s1
3572 ; GFX10-NEXT: s_and_b32 s1, s1, 15
3573 ; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
3574 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3575 ; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0
3576 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
3577 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
3578 ; GFX10-NEXT: ; return to shader part epilog
3580 ; GFX11-LABEL: v_fshr_i16_vss:
3582 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
3583 ; GFX11-NEXT: s_and_not1_b32 s2, 15, s1
3584 ; GFX11-NEXT: s_and_b32 s1, s1, 15
3585 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
3586 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3587 ; GFX11-NEXT: v_lshlrev_b16 v0, s2, v0
3588 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1
3589 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3590 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
3591 ; GFX11-NEXT: ; return to shader part epilog
3592 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3593 %cast.result = bitcast i16 %result to half
3594 ret half %cast.result
3597 define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3598 ; GFX6-LABEL: s_fshr_v2i16:
3600 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16
3601 ; GFX6-NEXT: s_and_b32 s4, s4, 0xffff
3602 ; GFX6-NEXT: s_or_b32 s4, s5, s4
3603 ; GFX6-NEXT: s_bfe_u32 s5, s2, 0xf0001
3604 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3605 ; GFX6-NEXT: s_lshr_b32 s5, s5, 14
3606 ; GFX6-NEXT: s_or_b32 s0, s0, s5
3607 ; GFX6-NEXT: s_bfe_u32 s5, s3, 0xf0001
3608 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
3609 ; GFX6-NEXT: s_lshr_b32 s5, s5, 14
3610 ; GFX6-NEXT: s_xor_b32 s4, s4, -1
3611 ; GFX6-NEXT: s_or_b32 s1, s1, s5
3612 ; GFX6-NEXT: s_lshl_b32 s2, s2, 1
3613 ; GFX6-NEXT: s_lshr_b32 s5, s4, 16
3614 ; GFX6-NEXT: s_and_b32 s6, s4, 15
3615 ; GFX6-NEXT: s_andn2_b32 s4, 15, s4
3616 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
3617 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0xf0001
3618 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
3619 ; GFX6-NEXT: s_lshl_b32 s0, s0, s6
3620 ; GFX6-NEXT: s_lshr_b32 s2, s2, s4
3621 ; GFX6-NEXT: s_or_b32 s0, s0, s2
3622 ; GFX6-NEXT: s_and_b32 s2, s5, 15
3623 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1
3624 ; GFX6-NEXT: s_andn2_b32 s4, 15, s5
3625 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
3626 ; GFX6-NEXT: s_lshl_b32 s1, s1, s2
3627 ; GFX6-NEXT: s_bfe_u32 s2, s3, 0xf0001
3628 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s4
3629 ; GFX6-NEXT: s_lshr_b32 s2, s2, s3
3630 ; GFX6-NEXT: s_or_b32 s1, s1, s2
3631 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3632 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
3633 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
3634 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3635 ; GFX6-NEXT: ; return to shader part epilog
3637 ; GFX8-LABEL: s_fshr_v2i16:
3639 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s1
3640 ; GFX8-NEXT: s_lshr_b32 s3, s0, 16
3641 ; GFX8-NEXT: s_lshr_b32 s4, s1, 16
3642 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
3643 ; GFX8-NEXT: s_lshr_b32 s5, s5, 15
3644 ; GFX8-NEXT: s_or_b32 s0, s0, s5
3645 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
3646 ; GFX8-NEXT: s_lshr_b32 s5, s4, 15
3647 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
3648 ; GFX8-NEXT: s_xor_b32 s2, s2, -1
3649 ; GFX8-NEXT: s_or_b32 s3, s3, s5
3650 ; GFX8-NEXT: s_lshr_b32 s5, s2, 16
3651 ; GFX8-NEXT: s_and_b32 s6, s2, 15
3652 ; GFX8-NEXT: s_andn2_b32 s2, 15, s2
3653 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3654 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
3655 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
3656 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
3657 ; GFX8-NEXT: s_lshl_b32 s0, s0, s6
3658 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
3659 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3660 ; GFX8-NEXT: s_and_b32 s1, s5, 15
3661 ; GFX8-NEXT: s_lshl_b32 s4, s4, 1
3662 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3663 ; GFX8-NEXT: s_andn2_b32 s2, 15, s5
3664 ; GFX8-NEXT: s_lshl_b32 s1, s3, s1
3665 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s4
3666 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
3667 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
3668 ; GFX8-NEXT: s_lshr_b32 s2, s3, s2
3669 ; GFX8-NEXT: s_or_b32 s1, s1, s2
3670 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3671 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
3672 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
3673 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3674 ; GFX8-NEXT: ; return to shader part epilog
3676 ; GFX9-LABEL: s_fshr_v2i16:
3678 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
3679 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001
3680 ; GFX9-NEXT: s_lshl_b32 s4, s4, 1
3681 ; GFX9-NEXT: s_and_b32 s3, s2, 0xf000f
3682 ; GFX9-NEXT: s_andn2_b32 s2, 0xf000f, s2
3683 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
3684 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
3685 ; GFX9-NEXT: s_lshr_b32 s5, s2, 16
3686 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
3687 ; GFX9-NEXT: s_lshl_b32 s2, s4, s5
3688 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3689 ; GFX9-NEXT: s_lshr_b32 s2, s1, 16
3690 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
3691 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
3692 ; GFX9-NEXT: s_lshr_b32 s1, s1, s3
3693 ; GFX9-NEXT: s_lshr_b32 s2, s2, s4
3694 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
3695 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3696 ; GFX9-NEXT: ; return to shader part epilog
3698 ; GFX10-LABEL: s_fshr_v2i16:
3700 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
3701 ; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001
3702 ; GFX10-NEXT: s_lshl_b32 s3, s3, 1
3703 ; GFX10-NEXT: s_and_b32 s4, s2, 0xf000f
3704 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3
3705 ; GFX10-NEXT: s_andn2_b32 s2, 0xf000f, s2
3706 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
3707 ; GFX10-NEXT: s_lshr_b32 s5, s2, 16
3708 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
3709 ; GFX10-NEXT: s_lshl_b32 s2, s3, s5
3710 ; GFX10-NEXT: s_lshr_b32 s3, s1, 16
3711 ; GFX10-NEXT: s_and_b32 s1, s1, 0xffff
3712 ; GFX10-NEXT: s_lshr_b32 s5, s4, 16
3713 ; GFX10-NEXT: s_lshr_b32 s1, s1, s4
3714 ; GFX10-NEXT: s_lshr_b32 s3, s3, s5
3715 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3716 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3
3717 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3718 ; GFX10-NEXT: ; return to shader part epilog
3720 ; GFX11-LABEL: s_fshr_v2i16:
3722 ; GFX11-NEXT: s_lshr_b32 s3, s0, 16
3723 ; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001
3724 ; GFX11-NEXT: s_lshl_b32 s3, s3, 1
3725 ; GFX11-NEXT: s_and_b32 s4, s2, 0xf000f
3726 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s3
3727 ; GFX11-NEXT: s_and_not1_b32 s2, 0xf000f, s2
3728 ; GFX11-NEXT: s_lshr_b32 s3, s0, 16
3729 ; GFX11-NEXT: s_lshr_b32 s5, s2, 16
3730 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
3731 ; GFX11-NEXT: s_lshl_b32 s2, s3, s5
3732 ; GFX11-NEXT: s_lshr_b32 s3, s1, 16
3733 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff
3734 ; GFX11-NEXT: s_lshr_b32 s5, s4, 16
3735 ; GFX11-NEXT: s_lshr_b32 s1, s1, s4
3736 ; GFX11-NEXT: s_lshr_b32 s3, s3, s5
3737 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3738 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s3
3739 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3740 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3741 ; GFX11-NEXT: ; return to shader part epilog
3742 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3743 %cast = bitcast <2 x i16> %result to i32
3747 define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3748 ; GFX6-LABEL: v_fshr_v2i16:
3750 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3751 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
3752 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
3753 ; GFX6-NEXT: v_or_b32_e32 v4, v5, v4
3754 ; GFX6-NEXT: v_bfe_u32 v5, v2, 1, 15
3755 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
3756 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5
3757 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v5
3758 ; GFX6-NEXT: v_bfe_u32 v5, v3, 1, 15
3759 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
3760 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5
3761 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
3762 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v5
3763 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4
3764 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v4
3765 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
3766 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
3767 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3768 ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
3769 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3770 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
3771 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0
3772 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
3773 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
3774 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v5
3775 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5
3776 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
3777 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3778 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3779 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
3780 ; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
3781 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
3782 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
3783 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
3784 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3786 ; GFX8-LABEL: v_fshr_v2i16:
3788 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3789 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0
3790 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v1
3791 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4
3792 ; GFX8-NEXT: v_mov_b32_e32 v4, 1
3793 ; GFX8-NEXT: v_mov_b32_e32 v5, 15
3794 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3795 ; GFX8-NEXT: v_lshrrev_b16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3796 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
3797 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
3798 ; GFX8-NEXT: v_lshlrev_b16_e32 v5, 1, v1
3799 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3800 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2
3801 ; GFX8-NEXT: v_and_b32_e32 v6, 15, v2
3802 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
3803 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
3804 ; GFX8-NEXT: v_lshrrev_b16_e32 v5, 1, v5
3805 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, v6, v3
3806 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v5
3807 ; GFX8-NEXT: v_or_b32_e32 v2, v3, v2
3808 ; GFX8-NEXT: v_and_b32_e32 v3, 15, v4
3809 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
3810 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
3811 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1
3812 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0
3813 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v4, v1
3814 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3815 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
3816 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
3817 ; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3818 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3820 ; GFX9-LABEL: v_fshr_v2i16:
3822 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3823 ; GFX9-NEXT: v_and_b32_e32 v3, 0xf000f, v2
3824 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
3825 ; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v2
3826 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3827 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0
3828 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, v3, v1
3829 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3830 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3832 ; GFX10-LABEL: v_fshr_v2i16:
3834 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3835 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
3836 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3837 ; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2
3838 ; GFX10-NEXT: v_and_b32_e32 v3, 0xf000f, v3
3839 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, v2, v1
3840 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v3, v0
3841 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3842 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3844 ; GFX11-LABEL: v_fshr_v2i16:
3846 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3847 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
3848 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3849 ; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2
3850 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3851 ; GFX11-NEXT: v_and_b32_e32 v3, 0xf000f, v3
3852 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, v2, v1
3853 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3854 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v3, v0
3855 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3856 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3857 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3858 ret <2 x i16> %result
3861 define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
3862 ; GFX6-LABEL: v_fshr_v2i16_4_8:
3864 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3865 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3866 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0
3867 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 3, v2
3868 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
3869 ; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
3870 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
3871 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 7, v2
3872 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
3873 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3875 ; GFX8-LABEL: v_fshr_v2i16_4_8:
3877 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3878 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3879 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0
3880 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 4, v1
3881 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
3882 ; GFX8-NEXT: v_mov_b32_e32 v3, 8
3883 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2
3884 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3885 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
3886 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
3887 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3888 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3889 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3891 ; GFX9-LABEL: v_fshr_v2i16_4_8:
3893 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3894 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x8000c
3895 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0
3896 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x80004
3897 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1
3898 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3899 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3901 ; GFX10-LABEL: v_fshr_v2i16_4_8:
3903 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3904 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0
3905 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1
3906 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3907 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3909 ; GFX11-LABEL: v_fshr_v2i16_4_8:
3911 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3912 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0
3913 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1
3914 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3915 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3916 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3917 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>)
3918 ret <2 x i16> %result
3921 define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) {
3922 ; GFX6-LABEL: v_fshr_v2i16_ssv:
3924 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3925 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3926 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
3927 ; GFX6-NEXT: s_bfe_u32 s4, s2, 0xf0001
3928 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3929 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3930 ; GFX6-NEXT: s_lshr_b32 s4, s4, 14
3931 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
3932 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
3933 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3934 ; GFX6-NEXT: s_or_b32 s0, s0, s4
3935 ; GFX6-NEXT: s_lshl_b32 s2, s2, 1
3936 ; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3937 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3938 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
3939 ; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001
3940 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3941 ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
3942 ; GFX6-NEXT: s_bfe_u32 s4, s3, 0xf0001
3943 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
3944 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
3945 ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
3946 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
3947 ; GFX6-NEXT: s_lshr_b32 s4, s4, 14
3948 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1
3949 ; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
3950 ; GFX6-NEXT: s_or_b32 s1, s1, s4
3951 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3952 ; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001
3953 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3954 ; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2
3955 ; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
3956 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
3957 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3958 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3959 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3960 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3961 ; GFX6-NEXT: ; return to shader part epilog
3963 ; GFX8-LABEL: v_fshr_v2i16_ssv:
3965 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s1
3966 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
3967 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
3968 ; GFX8-NEXT: s_lshr_b32 s4, s4, 15
3969 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
3970 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
3971 ; GFX8-NEXT: s_or_b32 s0, s0, s4
3972 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
3973 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v0
3974 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
3975 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
3976 ; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0
3977 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s1
3978 ; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
3979 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
3980 ; GFX8-NEXT: s_lshr_b32 s4, s3, 15
3981 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
3982 ; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0
3983 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1
3984 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
3985 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v1
3986 ; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1
3987 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s3
3988 ; GFX8-NEXT: s_or_b32 s2, s2, s4
3989 ; GFX8-NEXT: v_and_b32_e32 v1, 15, v1
3990 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
3991 ; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2
3992 ; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s0
3993 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
3994 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
3995 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3996 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3997 ; GFX8-NEXT: ; return to shader part epilog
3999 ; GFX9-LABEL: v_fshr_v2i16_ssv:
4001 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
4002 ; GFX9-NEXT: v_and_b32_e32 v1, 0xf000f, v0
4003 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
4004 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001
4005 ; GFX9-NEXT: s_lshl_b32 s2, s2, 1
4006 ; GFX9-NEXT: v_and_b32_e32 v0, 0xf000f, v0
4007 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
4008 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, s0
4009 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, v1, s1
4010 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
4011 ; GFX9-NEXT: ; return to shader part epilog
4013 ; GFX10-LABEL: v_fshr_v2i16_ssv:
4015 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
4016 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
4017 ; GFX10-NEXT: v_and_b32_e32 v0, 0xf000f, v0
4018 ; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001
4019 ; GFX10-NEXT: s_lshl_b32 s2, s2, 1
4020 ; GFX10-NEXT: v_and_b32_e32 v1, 0xf000f, v1
4021 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
4022 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, v0, s1
4023 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v1, s0
4024 ; GFX10-NEXT: v_or_b32_e32 v0, v1, v0
4025 ; GFX10-NEXT: ; return to shader part epilog
4027 ; GFX11-LABEL: v_fshr_v2i16_ssv:
4029 ; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
4030 ; GFX11-NEXT: s_lshr_b32 s2, s0, 16
4031 ; GFX11-NEXT: v_and_b32_e32 v0, 0xf000f, v0
4032 ; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001
4033 ; GFX11-NEXT: s_lshl_b32 s2, s2, 1
4034 ; GFX11-NEXT: v_and_b32_e32 v1, 0xf000f, v1
4035 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2
4036 ; GFX11-NEXT: v_pk_lshrrev_b16 v0, v0, s1
4037 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4038 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, v1, s0
4039 ; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
4040 ; GFX11-NEXT: ; return to shader part epilog
4041 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4042 %cast = bitcast <2 x i16> %result to float
4046 define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
4047 ; GFX6-LABEL: v_fshr_v2i16_svs:
4049 ; GFX6-NEXT: v_bfe_u32 v2, v0, 1, 15
4050 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
4051 ; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
4052 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
4053 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 14, v2
4054 ; GFX6-NEXT: v_bfe_u32 v3, v1, 1, 15
4055 ; GFX6-NEXT: s_or_b32 s2, s3, s2
4056 ; GFX6-NEXT: v_or_b32_e32 v2, s0, v2
4057 ; GFX6-NEXT: s_lshl_b32 s0, s1, 1
4058 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3
4059 ; GFX6-NEXT: v_or_b32_e32 v3, s0, v3
4060 ; GFX6-NEXT: s_xor_b32 s0, s2, -1
4061 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
4062 ; GFX6-NEXT: s_lshr_b32 s1, s0, 16
4063 ; GFX6-NEXT: s_and_b32 s2, s0, 15
4064 ; GFX6-NEXT: s_andn2_b32 s0, 15, s0
4065 ; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15
4066 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4067 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
4068 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0
4069 ; GFX6-NEXT: s_and_b32 s0, s1, 15
4070 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
4071 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, s2, v2
4072 ; GFX6-NEXT: s_andn2_b32 s1, 15, s1
4073 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4074 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
4075 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, s0, v3
4076 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
4077 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s1
4078 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1
4079 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
4080 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
4081 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
4082 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4083 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
4084 ; GFX6-NEXT: ; return to shader part epilog
4086 ; GFX8-LABEL: v_fshr_v2i16_svs:
4088 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
4089 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
4090 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 15, v0
4091 ; GFX8-NEXT: v_mov_b32_e32 v2, 15
4092 ; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
4093 ; GFX8-NEXT: s_lshl_b32 s0, s2, 1
4094 ; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4095 ; GFX8-NEXT: v_or_b32_e32 v2, s0, v2
4096 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0
4097 ; GFX8-NEXT: v_mov_b32_e32 v4, 1
4098 ; GFX8-NEXT: s_xor_b32 s0, s1, -1
4099 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4100 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
4101 ; GFX8-NEXT: s_and_b32 s2, s0, 15
4102 ; GFX8-NEXT: s_andn2_b32 s0, 15, s0
4103 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3
4104 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, s0, v3
4105 ; GFX8-NEXT: s_and_b32 s0, s1, 15
4106 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
4107 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0
4108 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, s0, v2
4109 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0
4110 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
4111 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, s2, v1
4112 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
4113 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
4114 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4115 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4116 ; GFX8-NEXT: ; return to shader part epilog
4118 ; GFX9-LABEL: v_fshr_v2i16_svs:
4120 ; GFX9-NEXT: s_lshr_b32 s3, s0, 16
4121 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001
4122 ; GFX9-NEXT: s_lshl_b32 s3, s3, 1
4123 ; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f
4124 ; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1
4125 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3
4126 ; GFX9-NEXT: s_lshr_b32 s3, s0, 16
4127 ; GFX9-NEXT: s_lshr_b32 s4, s1, 16
4128 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
4129 ; GFX9-NEXT: s_lshl_b32 s1, s3, s4
4130 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4131 ; GFX9-NEXT: v_pk_lshrrev_b16 v0, s2, v0
4132 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
4133 ; GFX9-NEXT: ; return to shader part epilog
4135 ; GFX10-LABEL: v_fshr_v2i16_svs:
4137 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
4138 ; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001
4139 ; GFX10-NEXT: s_lshl_b32 s2, s2, 1
4140 ; GFX10-NEXT: s_and_b32 s3, s1, 0xf000f
4141 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
4142 ; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1
4143 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
4144 ; GFX10-NEXT: s_lshr_b32 s4, s1, 16
4145 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, s3, v0
4146 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
4147 ; GFX10-NEXT: s_lshl_b32 s1, s2, s4
4148 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4149 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
4150 ; GFX10-NEXT: ; return to shader part epilog
4152 ; GFX11-LABEL: v_fshr_v2i16_svs:
4154 ; GFX11-NEXT: s_lshr_b32 s2, s0, 16
4155 ; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001
4156 ; GFX11-NEXT: s_lshl_b32 s2, s2, 1
4157 ; GFX11-NEXT: s_and_b32 s3, s1, 0xf000f
4158 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2
4159 ; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1
4160 ; GFX11-NEXT: s_lshr_b32 s2, s0, 16
4161 ; GFX11-NEXT: s_lshr_b32 s4, s1, 16
4162 ; GFX11-NEXT: v_pk_lshrrev_b16 v0, s3, v0
4163 ; GFX11-NEXT: s_lshl_b32 s0, s0, s1
4164 ; GFX11-NEXT: s_lshl_b32 s1, s2, s4
4165 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4166 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4167 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
4168 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
4169 ; GFX11-NEXT: ; return to shader part epilog
4170 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4171 %cast = bitcast <2 x i16> %result to float
4175 define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
4176 ; GFX6-LABEL: v_fshr_v2i16_vss:
4178 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
4179 ; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
4180 ; GFX6-NEXT: s_or_b32 s2, s3, s2
4181 ; GFX6-NEXT: s_bfe_u32 s3, s0, 0xf0001
4182 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
4183 ; GFX6-NEXT: s_lshr_b32 s3, s3, 14
4184 ; GFX6-NEXT: v_or_b32_e32 v0, s3, v0
4185 ; GFX6-NEXT: s_bfe_u32 s3, s1, 0xf0001
4186 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
4187 ; GFX6-NEXT: s_lshr_b32 s3, s3, 14
4188 ; GFX6-NEXT: s_xor_b32 s2, s2, -1
4189 ; GFX6-NEXT: v_or_b32_e32 v1, s3, v1
4190 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
4191 ; GFX6-NEXT: s_lshr_b32 s3, s2, 16
4192 ; GFX6-NEXT: s_and_b32 s4, s2, 15
4193 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2
4194 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4195 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001
4196 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
4197 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
4198 ; GFX6-NEXT: s_lshr_b32 s0, s0, s2
4199 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
4200 ; GFX6-NEXT: s_and_b32 s0, s3, 15
4201 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
4202 ; GFX6-NEXT: s_andn2_b32 s2, 15, s3
4203 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4204 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1
4205 ; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001
4206 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
4207 ; GFX6-NEXT: s_lshr_b32 s0, s0, s1
4208 ; GFX6-NEXT: v_or_b32_e32 v1, s0, v1
4209 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
4210 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
4211 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4212 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
4213 ; GFX6-NEXT: ; return to shader part epilog
4215 ; GFX8-LABEL: v_fshr_v2i16_vss:
4217 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s0
4218 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
4219 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v0
4220 ; GFX8-NEXT: s_lshr_b32 s3, s3, 15
4221 ; GFX8-NEXT: v_mov_b32_e32 v2, 1
4222 ; GFX8-NEXT: v_or_b32_e32 v1, s3, v1
4223 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4224 ; GFX8-NEXT: s_lshr_b32 s3, s2, 15
4225 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
4226 ; GFX8-NEXT: s_xor_b32 s1, s1, -1
4227 ; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
4228 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
4229 ; GFX8-NEXT: s_and_b32 s4, s1, 15
4230 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
4231 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
4232 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
4233 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4234 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, s4, v1
4235 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
4236 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1
4237 ; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
4238 ; GFX8-NEXT: s_and_b32 s0, s3, 15
4239 ; GFX8-NEXT: s_andn2_b32 s1, 15, s3
4240 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0
4241 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s2
4242 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
4243 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4244 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
4245 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
4246 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
4247 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4248 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4249 ; GFX8-NEXT: ; return to shader part epilog
4251 ; GFX9-LABEL: v_fshr_v2i16_vss:
4253 ; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f
4254 ; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1
4255 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4256 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, s1, v0
4257 ; GFX9-NEXT: s_lshr_b32 s1, s0, 16
4258 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
4259 ; GFX9-NEXT: s_lshr_b32 s3, s2, 16
4260 ; GFX9-NEXT: s_lshr_b32 s0, s0, s2
4261 ; GFX9-NEXT: s_lshr_b32 s1, s1, s3
4262 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4263 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
4264 ; GFX9-NEXT: ; return to shader part epilog
4266 ; GFX10-LABEL: v_fshr_v2i16_vss:
4268 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4269 ; GFX10-NEXT: s_and_b32 s2, s1, 0xf000f
4270 ; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1
4271 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
4272 ; GFX10-NEXT: s_and_b32 s0, s0, 0xffff
4273 ; GFX10-NEXT: s_lshr_b32 s4, s2, 16
4274 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, s1, v0
4275 ; GFX10-NEXT: s_lshr_b32 s0, s0, s2
4276 ; GFX10-NEXT: s_lshr_b32 s1, s3, s4
4277 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4278 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
4279 ; GFX10-NEXT: ; return to shader part epilog
4281 ; GFX11-LABEL: v_fshr_v2i16_vss:
4283 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4284 ; GFX11-NEXT: s_and_b32 s2, s1, 0xf000f
4285 ; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1
4286 ; GFX11-NEXT: s_lshr_b32 s3, s0, 16
4287 ; GFX11-NEXT: s_and_b32 s0, s0, 0xffff
4288 ; GFX11-NEXT: s_lshr_b32 s4, s2, 16
4289 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, s1, v0
4290 ; GFX11-NEXT: s_lshr_b32 s0, s0, s2
4291 ; GFX11-NEXT: s_lshr_b32 s1, s3, s4
4292 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4293 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4294 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
4295 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
4296 ; GFX11-NEXT: ; return to shader part epilog
4297 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4298 %cast = bitcast <2 x i16> %result to float
4302 define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) {
4303 ; GFX6-LABEL: s_fshr_v3i16:
4305 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
4306 ; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
4307 ; GFX6-NEXT: s_lshl_b32 s7, s7, 16
4308 ; GFX6-NEXT: s_or_b32 s6, s6, s7
4309 ; GFX6-NEXT: s_and_b32 s7, s8, 0xffff
4310 ; GFX6-NEXT: s_bfe_u32 s8, s3, 0xf0001
4311 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
4312 ; GFX6-NEXT: s_lshr_b32 s8, s8, 14
4313 ; GFX6-NEXT: s_or_b32 s0, s0, s8
4314 ; GFX6-NEXT: s_bfe_u32 s8, s4, 0xf0001
4315 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
4316 ; GFX6-NEXT: s_lshr_b32 s8, s8, 14
4317 ; GFX6-NEXT: s_xor_b32 s6, s6, -1
4318 ; GFX6-NEXT: s_or_b32 s1, s1, s8
4319 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1
4320 ; GFX6-NEXT: s_lshr_b32 s8, s6, 16
4321 ; GFX6-NEXT: s_and_b32 s9, s6, 15
4322 ; GFX6-NEXT: s_andn2_b32 s6, 15, s6
4323 ; GFX6-NEXT: s_and_b32 s9, 0xffff, s9
4324 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
4325 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
4326 ; GFX6-NEXT: s_lshl_b32 s0, s0, s9
4327 ; GFX6-NEXT: s_lshr_b32 s3, s3, s6
4328 ; GFX6-NEXT: s_or_b32 s0, s0, s3
4329 ; GFX6-NEXT: s_and_b32 s3, s8, 15
4330 ; GFX6-NEXT: s_lshl_b32 s4, s4, 1
4331 ; GFX6-NEXT: s_andn2_b32 s6, 15, s8
4332 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
4333 ; GFX6-NEXT: s_lshl_b32 s1, s1, s3
4334 ; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001
4335 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s6
4336 ; GFX6-NEXT: s_lshr_b32 s3, s3, s4
4337 ; GFX6-NEXT: s_or_b32 s1, s1, s3
4338 ; GFX6-NEXT: s_bfe_u32 s3, s5, 0xf0001
4339 ; GFX6-NEXT: s_lshl_b32 s2, s2, 1
4340 ; GFX6-NEXT: s_lshr_b32 s3, s3, 14
4341 ; GFX6-NEXT: s_xor_b32 s4, s7, -1
4342 ; GFX6-NEXT: s_or_b32 s2, s2, s3
4343 ; GFX6-NEXT: s_lshl_b32 s3, s5, 1
4344 ; GFX6-NEXT: s_and_b32 s5, s4, 15
4345 ; GFX6-NEXT: s_andn2_b32 s4, 15, s4
4346 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
4347 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
4348 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4349 ; GFX6-NEXT: s_lshl_b32 s2, s2, s5
4350 ; GFX6-NEXT: s_lshr_b32 s3, s3, s4
4351 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
4352 ; GFX6-NEXT: s_or_b32 s2, s2, s3
4353 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4354 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
4355 ; GFX6-NEXT: s_or_b32 s0, s0, s1
4356 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
4357 ; GFX6-NEXT: ; return to shader part epilog
4359 ; GFX8-LABEL: s_fshr_v3i16:
4361 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s2
4362 ; GFX8-NEXT: s_lshr_b32 s6, s0, 16
4363 ; GFX8-NEXT: s_lshr_b32 s7, s2, 16
4364 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
4365 ; GFX8-NEXT: s_lshr_b32 s8, s8, 15
4366 ; GFX8-NEXT: s_or_b32 s0, s0, s8
4367 ; GFX8-NEXT: s_lshl_b32 s6, s6, 1
4368 ; GFX8-NEXT: s_lshr_b32 s8, s7, 15
4369 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1
4370 ; GFX8-NEXT: s_xor_b32 s4, s4, -1
4371 ; GFX8-NEXT: s_or_b32 s6, s6, s8
4372 ; GFX8-NEXT: s_lshr_b32 s8, s4, 16
4373 ; GFX8-NEXT: s_and_b32 s9, s4, 15
4374 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4
4375 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4376 ; GFX8-NEXT: s_and_b32 s9, 0xffff, s9
4377 ; GFX8-NEXT: s_lshr_b32 s2, s2, 1
4378 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4379 ; GFX8-NEXT: s_lshl_b32 s0, s0, s9
4380 ; GFX8-NEXT: s_lshr_b32 s2, s2, s4
4381 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4382 ; GFX8-NEXT: s_and_b32 s2, s8, 15
4383 ; GFX8-NEXT: s_lshl_b32 s7, s7, 1
4384 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4385 ; GFX8-NEXT: s_andn2_b32 s4, 15, s8
4386 ; GFX8-NEXT: s_lshl_b32 s2, s6, s2
4387 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s7
4388 ; GFX8-NEXT: s_lshr_b32 s6, s6, 1
4389 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4390 ; GFX8-NEXT: s_lshr_b32 s4, s6, s4
4391 ; GFX8-NEXT: s_or_b32 s2, s2, s4
4392 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s3
4393 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
4394 ; GFX8-NEXT: s_lshr_b32 s4, s4, 15
4395 ; GFX8-NEXT: s_or_b32 s1, s1, s4
4396 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
4397 ; GFX8-NEXT: s_xor_b32 s4, s5, -1
4398 ; GFX8-NEXT: s_and_b32 s5, s4, 15
4399 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4
4400 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
4401 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
4402 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
4403 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4404 ; GFX8-NEXT: s_lshl_b32 s1, s1, s5
4405 ; GFX8-NEXT: s_lshr_b32 s3, s3, s4
4406 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4407 ; GFX8-NEXT: s_or_b32 s1, s1, s3
4408 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
4409 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
4410 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4411 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4412 ; GFX8-NEXT: ; return to shader part epilog
4414 ; GFX9-LABEL: s_fshr_v3i16:
4416 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
4417 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001
4418 ; GFX9-NEXT: s_lshl_b32 s7, s7, 1
4419 ; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f
4420 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4
4421 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s7
4422 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
4423 ; GFX9-NEXT: s_lshr_b32 s8, s4, 16
4424 ; GFX9-NEXT: s_lshl_b32 s0, s0, s4
4425 ; GFX9-NEXT: s_lshl_b32 s4, s7, s8
4426 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4427 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
4428 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
4429 ; GFX9-NEXT: s_lshr_b32 s7, s6, 16
4430 ; GFX9-NEXT: s_lshr_b32 s2, s2, s6
4431 ; GFX9-NEXT: s_lshr_b32 s4, s4, s7
4432 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4433 ; GFX9-NEXT: s_or_b32 s0, s0, s2
4434 ; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f
4435 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5
4436 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
4437 ; GFX9-NEXT: s_lshl_b32 s1, s1, 0x10001
4438 ; GFX9-NEXT: s_lshl_b32 s5, s5, 1
4439 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
4440 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
4441 ; GFX9-NEXT: s_lshr_b32 s6, s4, 16
4442 ; GFX9-NEXT: s_lshl_b32 s1, s1, s4
4443 ; GFX9-NEXT: s_lshl_b32 s4, s5, s6
4444 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4445 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
4446 ; GFX9-NEXT: s_and_b32 s3, s3, 0xffff
4447 ; GFX9-NEXT: s_lshr_b32 s5, s2, 16
4448 ; GFX9-NEXT: s_lshr_b32 s2, s3, s2
4449 ; GFX9-NEXT: s_lshr_b32 s3, s4, s5
4450 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3
4451 ; GFX9-NEXT: s_or_b32 s1, s1, s2
4452 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
4453 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
4454 ; GFX9-NEXT: s_lshl_b32 s2, s2, 16
4455 ; GFX9-NEXT: s_or_b32 s0, s0, s2
4456 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
4457 ; GFX9-NEXT: ; return to shader part epilog
4459 ; GFX10-LABEL: s_fshr_v3i16:
4461 ; GFX10-NEXT: s_lshr_b32 s6, s0, 16
4462 ; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001
4463 ; GFX10-NEXT: s_lshl_b32 s6, s6, 1
4464 ; GFX10-NEXT: s_and_b32 s7, s4, 0xf000f
4465 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4466 ; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4
4467 ; GFX10-NEXT: s_lshr_b32 s6, s0, 16
4468 ; GFX10-NEXT: s_lshr_b32 s8, s4, 16
4469 ; GFX10-NEXT: s_lshl_b32 s0, s0, s4
4470 ; GFX10-NEXT: s_lshl_b32 s4, s6, s8
4471 ; GFX10-NEXT: s_lshr_b32 s6, s2, 16
4472 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffff
4473 ; GFX10-NEXT: s_lshr_b32 s8, s7, 16
4474 ; GFX10-NEXT: s_lshr_b32 s2, s2, s7
4475 ; GFX10-NEXT: s_lshr_b32 s6, s6, s8
4476 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4477 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s6
4478 ; GFX10-NEXT: s_and_b32 s4, s5, 0xf000f
4479 ; GFX10-NEXT: s_or_b32 s0, s0, s2
4480 ; GFX10-NEXT: s_lshr_b32 s2, s1, 16
4481 ; GFX10-NEXT: s_lshl_b32 s1, s1, 0x10001
4482 ; GFX10-NEXT: s_lshl_b32 s2, s2, 1
4483 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4484 ; GFX10-NEXT: s_andn2_b32 s2, 0xf000f, s5
4485 ; GFX10-NEXT: s_lshr_b32 s5, s1, 16
4486 ; GFX10-NEXT: s_lshr_b32 s6, s2, 16
4487 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2
4488 ; GFX10-NEXT: s_lshl_b32 s2, s5, s6
4489 ; GFX10-NEXT: s_lshr_b32 s5, s3, 16
4490 ; GFX10-NEXT: s_and_b32 s3, s3, 0xffff
4491 ; GFX10-NEXT: s_lshr_b32 s6, s4, 16
4492 ; GFX10-NEXT: s_lshr_b32 s3, s3, s4
4493 ; GFX10-NEXT: s_lshr_b32 s4, s5, s6
4494 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4495 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4
4496 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
4497 ; GFX10-NEXT: s_and_b32 s0, s0, 0xffff
4498 ; GFX10-NEXT: s_lshl_b32 s3, s3, 16
4499 ; GFX10-NEXT: s_or_b32 s1, s1, s2
4500 ; GFX10-NEXT: s_or_b32 s0, s0, s3
4501 ; GFX10-NEXT: s_and_b32 s1, s1, 0xffff
4502 ; GFX10-NEXT: ; return to shader part epilog
4504 ; GFX11-LABEL: s_fshr_v3i16:
4506 ; GFX11-NEXT: s_lshr_b32 s6, s0, 16
4507 ; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001
4508 ; GFX11-NEXT: s_lshl_b32 s6, s6, 1
4509 ; GFX11-NEXT: s_and_b32 s7, s4, 0xf000f
4510 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4511 ; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4
4512 ; GFX11-NEXT: s_lshr_b32 s6, s0, 16
4513 ; GFX11-NEXT: s_lshr_b32 s8, s4, 16
4514 ; GFX11-NEXT: s_lshl_b32 s0, s0, s4
4515 ; GFX11-NEXT: s_lshl_b32 s4, s6, s8
4516 ; GFX11-NEXT: s_lshr_b32 s6, s2, 16
4517 ; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
4518 ; GFX11-NEXT: s_lshr_b32 s8, s7, 16
4519 ; GFX11-NEXT: s_lshr_b32 s2, s2, s7
4520 ; GFX11-NEXT: s_lshr_b32 s6, s6, s8
4521 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4522 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s6
4523 ; GFX11-NEXT: s_and_b32 s4, s5, 0xf000f
4524 ; GFX11-NEXT: s_or_b32 s0, s0, s2
4525 ; GFX11-NEXT: s_lshr_b32 s2, s1, 16
4526 ; GFX11-NEXT: s_lshl_b32 s1, s1, 0x10001
4527 ; GFX11-NEXT: s_lshl_b32 s2, s2, 1
4528 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4529 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4530 ; GFX11-NEXT: s_and_not1_b32 s2, 0xf000f, s5
4531 ; GFX11-NEXT: s_lshr_b32 s5, s1, 16
4532 ; GFX11-NEXT: s_lshr_b32 s6, s2, 16
4533 ; GFX11-NEXT: s_lshl_b32 s1, s1, s2
4534 ; GFX11-NEXT: s_lshl_b32 s2, s5, s6
4535 ; GFX11-NEXT: s_lshr_b32 s5, s3, 16
4536 ; GFX11-NEXT: s_and_b32 s3, s3, 0xffff
4537 ; GFX11-NEXT: s_lshr_b32 s6, s4, 16
4538 ; GFX11-NEXT: s_lshr_b32 s3, s3, s4
4539 ; GFX11-NEXT: s_lshr_b32 s4, s5, s6
4540 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4541 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s3, s4
4542 ; GFX11-NEXT: s_lshr_b32 s3, s0, 16
4543 ; GFX11-NEXT: s_and_b32 s0, s0, 0xffff
4544 ; GFX11-NEXT: s_lshl_b32 s3, s3, 16
4545 ; GFX11-NEXT: s_or_b32 s1, s1, s2
4546 ; GFX11-NEXT: s_or_b32 s0, s0, s3
4547 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff
4548 ; GFX11-NEXT: ; return to shader part epilog
4549 %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4550 %cast = bitcast <3 x i16> %result to i48
4554 define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) {
4555 ; GFX6-LABEL: v_fshr_v3i16:
4557 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4558 ; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v7
4559 ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
4560 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
4561 ; GFX6-NEXT: v_or_b32_e32 v6, v6, v7
4562 ; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v8
4563 ; GFX6-NEXT: v_bfe_u32 v8, v3, 1, 15
4564 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
4565 ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8
4566 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v8
4567 ; GFX6-NEXT: v_bfe_u32 v8, v4, 1, 15
4568 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
4569 ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8
4570 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4571 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v8
4572 ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v6
4573 ; GFX6-NEXT: v_and_b32_e32 v9, 15, v6
4574 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4575 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
4576 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4577 ; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9
4578 ; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4579 ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
4580 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
4581 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
4582 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
4583 ; GFX6-NEXT: v_and_b32_e32 v3, 15, v8
4584 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v8
4585 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
4586 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4587 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
4588 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1
4589 ; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15
4590 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
4591 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4592 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
4593 ; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15
4594 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
4595 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3
4596 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v7
4597 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
4598 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v5
4599 ; GFX6-NEXT: v_and_b32_e32 v5, 15, v4
4600 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
4601 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
4602 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
4603 ; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4604 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4605 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v5, v2
4606 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4607 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
4608 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4610 ; GFX8-LABEL: v_fshr_v3i16:
4612 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4613 ; GFX8-NEXT: v_lshlrev_b16_e32 v7, 1, v0
4614 ; GFX8-NEXT: v_lshrrev_b16_e32 v8, 15, v2
4615 ; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2
4616 ; GFX8-NEXT: v_or_b32_e32 v7, v7, v8
4617 ; GFX8-NEXT: v_mov_b32_e32 v8, 1
4618 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4619 ; GFX8-NEXT: v_lshrrev_b16_e32 v8, 15, v6
4620 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
4621 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v8
4622 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v2
4623 ; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4
4624 ; GFX8-NEXT: v_and_b32_e32 v9, 15, v4
4625 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
4626 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
4627 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2
4628 ; GFX8-NEXT: v_lshlrev_b16_e32 v7, v9, v7
4629 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2
4630 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v6
4631 ; GFX8-NEXT: v_or_b32_e32 v2, v7, v2
4632 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v8
4633 ; GFX8-NEXT: v_xor_b32_e32 v7, -1, v8
4634 ; GFX8-NEXT: v_and_b32_e32 v7, 15, v7
4635 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v4, v0
4636 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 1, v6
4637 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v4
4638 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
4639 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v1
4640 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v3
4641 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
4642 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5
4643 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3
4644 ; GFX8-NEXT: v_and_b32_e32 v5, 15, v4
4645 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
4646 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
4647 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3
4648 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v5, v1
4649 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, v4, v3
4650 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
4651 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
4652 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4653 ; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4654 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
4655 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4657 ; GFX9-LABEL: v_fshr_v3i16:
4659 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4660 ; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4
4661 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
4662 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4663 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4664 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0
4665 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2
4666 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5
4667 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
4668 ; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5
4669 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4670 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4671 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1
4672 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3
4673 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2
4674 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4676 ; GFX10-LABEL: v_fshr_v3i16:
4678 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4679 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
4680 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
4681 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4682 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4683 ; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5
4684 ; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6
4685 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4686 ; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7
4687 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2
4688 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3
4689 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0
4690 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1
4691 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
4692 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
4693 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4695 ; GFX11-LABEL: v_fshr_v3i16:
4697 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4698 ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4
4699 ; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5
4700 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4701 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4702 ; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5
4703 ; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6
4704 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4705 ; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7
4706 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, v4, v2
4707 ; GFX11-NEXT: v_pk_lshrrev_b16 v3, v5, v3
4708 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v6, v0
4709 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
4710 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, v7, v1
4711 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
4712 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
4713 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
4714 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4715 %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4716 %cast.result = bitcast <3 x i16> %result to <3 x half>
4717 ret <3 x half> %cast.result
4720 define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) {
4721 ; GFX6-LABEL: s_fshr_v4i16:
4723 ; GFX6-NEXT: s_lshl_b32 s9, s9, 16
4724 ; GFX6-NEXT: s_and_b32 s8, s8, 0xffff
4725 ; GFX6-NEXT: s_or_b32 s8, s9, s8
4726 ; GFX6-NEXT: s_lshl_b32 s9, s11, 16
4727 ; GFX6-NEXT: s_and_b32 s10, s10, 0xffff
4728 ; GFX6-NEXT: s_or_b32 s9, s9, s10
4729 ; GFX6-NEXT: s_bfe_u32 s10, s4, 0xf0001
4730 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
4731 ; GFX6-NEXT: s_lshr_b32 s10, s10, 14
4732 ; GFX6-NEXT: s_or_b32 s0, s0, s10
4733 ; GFX6-NEXT: s_bfe_u32 s10, s5, 0xf0001
4734 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
4735 ; GFX6-NEXT: s_lshr_b32 s10, s10, 14
4736 ; GFX6-NEXT: s_xor_b32 s8, s8, -1
4737 ; GFX6-NEXT: s_or_b32 s1, s1, s10
4738 ; GFX6-NEXT: s_lshl_b32 s4, s4, 1
4739 ; GFX6-NEXT: s_lshr_b32 s10, s8, 16
4740 ; GFX6-NEXT: s_and_b32 s11, s8, 15
4741 ; GFX6-NEXT: s_andn2_b32 s8, 15, s8
4742 ; GFX6-NEXT: s_and_b32 s11, 0xffff, s11
4743 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0xf0001
4744 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
4745 ; GFX6-NEXT: s_lshl_b32 s0, s0, s11
4746 ; GFX6-NEXT: s_lshr_b32 s4, s4, s8
4747 ; GFX6-NEXT: s_or_b32 s0, s0, s4
4748 ; GFX6-NEXT: s_and_b32 s4, s10, 15
4749 ; GFX6-NEXT: s_lshl_b32 s5, s5, 1
4750 ; GFX6-NEXT: s_andn2_b32 s8, 15, s10
4751 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4752 ; GFX6-NEXT: s_lshl_b32 s1, s1, s4
4753 ; GFX6-NEXT: s_bfe_u32 s4, s5, 0xf0001
4754 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s8
4755 ; GFX6-NEXT: s_lshr_b32 s4, s4, s5
4756 ; GFX6-NEXT: s_or_b32 s1, s1, s4
4757 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
4758 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4759 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
4760 ; GFX6-NEXT: s_or_b32 s0, s0, s1
4761 ; GFX6-NEXT: s_lshl_b32 s1, s2, 1
4762 ; GFX6-NEXT: s_bfe_u32 s2, s6, 0xf0001
4763 ; GFX6-NEXT: s_lshr_b32 s2, s2, 14
4764 ; GFX6-NEXT: s_or_b32 s1, s1, s2
4765 ; GFX6-NEXT: s_lshl_b32 s2, s3, 1
4766 ; GFX6-NEXT: s_bfe_u32 s3, s7, 0xf0001
4767 ; GFX6-NEXT: s_lshr_b32 s3, s3, 14
4768 ; GFX6-NEXT: s_xor_b32 s5, s9, -1
4769 ; GFX6-NEXT: s_or_b32 s2, s2, s3
4770 ; GFX6-NEXT: s_lshl_b32 s3, s6, 1
4771 ; GFX6-NEXT: s_lshl_b32 s4, s7, 1
4772 ; GFX6-NEXT: s_lshr_b32 s6, s5, 16
4773 ; GFX6-NEXT: s_and_b32 s7, s5, 15
4774 ; GFX6-NEXT: s_andn2_b32 s5, 15, s5
4775 ; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
4776 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
4777 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
4778 ; GFX6-NEXT: s_lshl_b32 s1, s1, s7
4779 ; GFX6-NEXT: s_lshr_b32 s3, s3, s5
4780 ; GFX6-NEXT: s_or_b32 s1, s1, s3
4781 ; GFX6-NEXT: s_and_b32 s3, s6, 15
4782 ; GFX6-NEXT: s_andn2_b32 s5, 15, s6
4783 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
4784 ; GFX6-NEXT: s_lshl_b32 s2, s2, s3
4785 ; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001
4786 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s5
4787 ; GFX6-NEXT: s_lshr_b32 s3, s3, s4
4788 ; GFX6-NEXT: s_or_b32 s2, s2, s3
4789 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
4790 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
4791 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
4792 ; GFX6-NEXT: s_or_b32 s1, s1, s2
4793 ; GFX6-NEXT: ; return to shader part epilog
4795 ; GFX8-LABEL: s_fshr_v4i16:
4797 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s2
4798 ; GFX8-NEXT: s_lshr_b32 s6, s0, 16
4799 ; GFX8-NEXT: s_lshr_b32 s7, s2, 16
4800 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
4801 ; GFX8-NEXT: s_lshr_b32 s8, s8, 15
4802 ; GFX8-NEXT: s_or_b32 s0, s0, s8
4803 ; GFX8-NEXT: s_lshl_b32 s6, s6, 1
4804 ; GFX8-NEXT: s_lshr_b32 s8, s7, 15
4805 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1
4806 ; GFX8-NEXT: s_xor_b32 s4, s4, -1
4807 ; GFX8-NEXT: s_or_b32 s6, s6, s8
4808 ; GFX8-NEXT: s_lshr_b32 s8, s4, 16
4809 ; GFX8-NEXT: s_and_b32 s9, s4, 15
4810 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4
4811 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4812 ; GFX8-NEXT: s_and_b32 s9, 0xffff, s9
4813 ; GFX8-NEXT: s_lshr_b32 s2, s2, 1
4814 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4815 ; GFX8-NEXT: s_lshl_b32 s0, s0, s9
4816 ; GFX8-NEXT: s_lshr_b32 s2, s2, s4
4817 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4818 ; GFX8-NEXT: s_and_b32 s2, s8, 15
4819 ; GFX8-NEXT: s_lshl_b32 s7, s7, 1
4820 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4821 ; GFX8-NEXT: s_andn2_b32 s4, 15, s8
4822 ; GFX8-NEXT: s_lshl_b32 s2, s6, s2
4823 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s7
4824 ; GFX8-NEXT: s_lshr_b32 s6, s6, 1
4825 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4826 ; GFX8-NEXT: s_lshr_b32 s4, s6, s4
4827 ; GFX8-NEXT: s_or_b32 s2, s2, s4
4828 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4829 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
4830 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
4831 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s3
4832 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4833 ; GFX8-NEXT: s_lshr_b32 s2, s1, 16
4834 ; GFX8-NEXT: s_lshr_b32 s4, s3, 16
4835 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
4836 ; GFX8-NEXT: s_lshr_b32 s6, s6, 15
4837 ; GFX8-NEXT: s_or_b32 s1, s1, s6
4838 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1
4839 ; GFX8-NEXT: s_lshr_b32 s6, s4, 15
4840 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
4841 ; GFX8-NEXT: s_xor_b32 s5, s5, -1
4842 ; GFX8-NEXT: s_or_b32 s2, s2, s6
4843 ; GFX8-NEXT: s_lshr_b32 s6, s5, 16
4844 ; GFX8-NEXT: s_and_b32 s7, s5, 15
4845 ; GFX8-NEXT: s_andn2_b32 s5, 15, s5
4846 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
4847 ; GFX8-NEXT: s_and_b32 s7, 0xffff, s7
4848 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
4849 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
4850 ; GFX8-NEXT: s_lshl_b32 s1, s1, s7
4851 ; GFX8-NEXT: s_lshr_b32 s3, s3, s5
4852 ; GFX8-NEXT: s_or_b32 s1, s1, s3
4853 ; GFX8-NEXT: s_and_b32 s3, s6, 15
4854 ; GFX8-NEXT: s_lshl_b32 s4, s4, 1
4855 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
4856 ; GFX8-NEXT: s_andn2_b32 s5, 15, s6
4857 ; GFX8-NEXT: s_lshl_b32 s2, s2, s3
4858 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s4
4859 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
4860 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
4861 ; GFX8-NEXT: s_lshr_b32 s3, s3, s4
4862 ; GFX8-NEXT: s_or_b32 s2, s2, s3
4863 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4864 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4865 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
4866 ; GFX8-NEXT: s_or_b32 s1, s1, s2
4867 ; GFX8-NEXT: ; return to shader part epilog
4869 ; GFX9-LABEL: s_fshr_v4i16:
4871 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
4872 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001
4873 ; GFX9-NEXT: s_lshl_b32 s7, s7, 1
4874 ; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f
4875 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4
4876 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s7
4877 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
4878 ; GFX9-NEXT: s_lshr_b32 s8, s4, 16
4879 ; GFX9-NEXT: s_lshl_b32 s0, s0, s4
4880 ; GFX9-NEXT: s_lshl_b32 s4, s7, s8
4881 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4882 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
4883 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
4884 ; GFX9-NEXT: s_lshr_b32 s7, s6, 16
4885 ; GFX9-NEXT: s_lshr_b32 s2, s2, s6
4886 ; GFX9-NEXT: s_lshr_b32 s4, s4, s7
4887 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4888 ; GFX9-NEXT: s_or_b32 s0, s0, s2
4889 ; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f
4890 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5
4891 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
4892 ; GFX9-NEXT: s_lshl_b32 s1, s1, 0x10001
4893 ; GFX9-NEXT: s_lshl_b32 s5, s5, 1
4894 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
4895 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
4896 ; GFX9-NEXT: s_lshr_b32 s6, s4, 16
4897 ; GFX9-NEXT: s_lshl_b32 s1, s1, s4
4898 ; GFX9-NEXT: s_lshl_b32 s4, s5, s6
4899 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4900 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
4901 ; GFX9-NEXT: s_and_b32 s3, s3, 0xffff
4902 ; GFX9-NEXT: s_lshr_b32 s5, s2, 16
4903 ; GFX9-NEXT: s_lshr_b32 s2, s3, s2
4904 ; GFX9-NEXT: s_lshr_b32 s3, s4, s5
4905 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3
4906 ; GFX9-NEXT: s_or_b32 s1, s1, s2
4907 ; GFX9-NEXT: ; return to shader part epilog
4909 ; GFX10-LABEL: s_fshr_v4i16:
4911 ; GFX10-NEXT: s_lshr_b32 s6, s0, 16
4912 ; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001
4913 ; GFX10-NEXT: s_lshl_b32 s6, s6, 1
4914 ; GFX10-NEXT: s_and_b32 s7, s4, 0xf000f
4915 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4916 ; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4
4917 ; GFX10-NEXT: s_lshr_b32 s6, s0, 16
4918 ; GFX10-NEXT: s_lshr_b32 s8, s4, 16
4919 ; GFX10-NEXT: s_lshl_b32 s0, s0, s4
4920 ; GFX10-NEXT: s_lshl_b32 s4, s6, s8
4921 ; GFX10-NEXT: s_lshr_b32 s6, s2, 16
4922 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4923 ; GFX10-NEXT: s_lshr_b32 s4, s1, 16
4924 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffff
4925 ; GFX10-NEXT: s_lshr_b32 s8, s7, 16
4926 ; GFX10-NEXT: s_lshl_b32 s1, s1, 0x10001
4927 ; GFX10-NEXT: s_lshl_b32 s4, s4, 1
4928 ; GFX10-NEXT: s_lshr_b32 s2, s2, s7
4929 ; GFX10-NEXT: s_lshr_b32 s6, s6, s8
4930 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4931 ; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s5
4932 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s6
4933 ; GFX10-NEXT: s_and_b32 s6, s5, 0xf000f
4934 ; GFX10-NEXT: s_lshr_b32 s5, s1, 16
4935 ; GFX10-NEXT: s_lshr_b32 s7, s4, 16
4936 ; GFX10-NEXT: s_lshl_b32 s1, s1, s4
4937 ; GFX10-NEXT: s_lshl_b32 s4, s5, s7
4938 ; GFX10-NEXT: s_lshr_b32 s5, s3, 16
4939 ; GFX10-NEXT: s_and_b32 s3, s3, 0xffff
4940 ; GFX10-NEXT: s_lshr_b32 s7, s6, 16
4941 ; GFX10-NEXT: s_lshr_b32 s3, s3, s6
4942 ; GFX10-NEXT: s_lshr_b32 s5, s5, s7
4943 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4944 ; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5
4945 ; GFX10-NEXT: s_or_b32 s0, s0, s2
4946 ; GFX10-NEXT: s_or_b32 s1, s1, s3
4947 ; GFX10-NEXT: ; return to shader part epilog
4949 ; GFX11-LABEL: s_fshr_v4i16:
4951 ; GFX11-NEXT: s_lshr_b32 s6, s0, 16
4952 ; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001
4953 ; GFX11-NEXT: s_lshl_b32 s6, s6, 1
4954 ; GFX11-NEXT: s_and_b32 s7, s4, 0xf000f
4955 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4956 ; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4
4957 ; GFX11-NEXT: s_lshr_b32 s6, s0, 16
4958 ; GFX11-NEXT: s_lshr_b32 s8, s4, 16
4959 ; GFX11-NEXT: s_lshl_b32 s0, s0, s4
4960 ; GFX11-NEXT: s_lshl_b32 s4, s6, s8
4961 ; GFX11-NEXT: s_lshr_b32 s6, s2, 16
4962 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4963 ; GFX11-NEXT: s_lshr_b32 s4, s1, 16
4964 ; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
4965 ; GFX11-NEXT: s_lshr_b32 s8, s7, 16
4966 ; GFX11-NEXT: s_lshl_b32 s1, s1, 0x10001
4967 ; GFX11-NEXT: s_lshl_b32 s4, s4, 1
4968 ; GFX11-NEXT: s_lshr_b32 s2, s2, s7
4969 ; GFX11-NEXT: s_lshr_b32 s6, s6, s8
4970 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4971 ; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s5
4972 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s6
4973 ; GFX11-NEXT: s_and_b32 s6, s5, 0xf000f
4974 ; GFX11-NEXT: s_lshr_b32 s5, s1, 16
4975 ; GFX11-NEXT: s_lshr_b32 s7, s4, 16
4976 ; GFX11-NEXT: s_lshl_b32 s1, s1, s4
4977 ; GFX11-NEXT: s_lshl_b32 s4, s5, s7
4978 ; GFX11-NEXT: s_lshr_b32 s5, s3, 16
4979 ; GFX11-NEXT: s_and_b32 s3, s3, 0xffff
4980 ; GFX11-NEXT: s_lshr_b32 s7, s6, 16
4981 ; GFX11-NEXT: s_lshr_b32 s3, s3, s6
4982 ; GFX11-NEXT: s_lshr_b32 s5, s5, s7
4983 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4984 ; GFX11-NEXT: s_pack_ll_b32_b16 s3, s3, s5
4985 ; GFX11-NEXT: s_or_b32 s0, s0, s2
4986 ; GFX11-NEXT: s_or_b32 s1, s1, s3
4987 ; GFX11-NEXT: ; return to shader part epilog
4988 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4989 %cast.result = bitcast <4 x i16> %result to <2 x i32>
4990 ret <2 x i32> %cast.result
4993 define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) {
4994 ; GFX6-LABEL: v_fshr_v4i16:
4996 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4997 ; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9
4998 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
4999 ; GFX6-NEXT: v_or_b32_e32 v8, v9, v8
5000 ; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v11
5001 ; GFX6-NEXT: v_and_b32_e32 v10, 0xffff, v10
5002 ; GFX6-NEXT: v_or_b32_e32 v9, v9, v10
5003 ; GFX6-NEXT: v_bfe_u32 v10, v4, 1, 15
5004 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
5005 ; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10
5006 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v10
5007 ; GFX6-NEXT: v_bfe_u32 v10, v5, 1, 15
5008 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
5009 ; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10
5010 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
5011 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v10
5012 ; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8
5013 ; GFX6-NEXT: v_and_b32_e32 v11, 15, v8
5014 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
5015 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
5016 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
5017 ; GFX6-NEXT: v_and_b32_e32 v11, 0xffff, v11
5018 ; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
5019 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
5020 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v11, v0
5021 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
5022 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
5023 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v10
5024 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10
5025 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5
5026 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
5027 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
5028 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
5029 ; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
5030 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8
5031 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
5032 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
5033 ; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15
5034 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
5035 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4
5036 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
5037 ; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15
5038 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
5039 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4
5040 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
5041 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v6
5042 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v9
5043 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v7
5044 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6
5045 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v6
5046 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
5047 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
5048 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
5049 ; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
5050 ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
5051 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2
5052 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4
5053 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
5054 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v7
5055 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
5056 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
5057 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
5058 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
5059 ; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
5060 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v6
5061 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
5062 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
5063 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5065 ; GFX8-LABEL: v_fshr_v4i16:
5067 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5068 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0
5069 ; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2
5070 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v7
5071 ; GFX8-NEXT: v_mov_b32_e32 v7, 1
5072 ; GFX8-NEXT: v_mov_b32_e32 v8, 15
5073 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5074 ; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5075 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
5076 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v9
5077 ; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2
5078 ; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5079 ; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v4
5080 ; GFX8-NEXT: v_and_b32_e32 v10, 15, v4
5081 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
5082 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
5083 ; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v9
5084 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, v10, v6
5085 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v9
5086 ; GFX8-NEXT: v_or_b32_e32 v4, v6, v4
5087 ; GFX8-NEXT: v_and_b32_e32 v6, 15, v7
5088 ; GFX8-NEXT: v_xor_b32_e32 v7, -1, v7
5089 ; GFX8-NEXT: v_and_b32_e32 v7, 15, v7
5090 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2
5091 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0
5092 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v7, v2
5093 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5094 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
5095 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
5096 ; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5097 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v1
5098 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v3
5099 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
5100 ; GFX8-NEXT: v_mov_b32_e32 v4, 1
5101 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5102 ; GFX8-NEXT: v_lshrrev_b16_sdwa v6, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5103 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v6
5104 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v3
5105 ; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5106 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5
5107 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v4
5108 ; GFX8-NEXT: v_and_b32_e32 v7, 15, v4
5109 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
5110 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
5111 ; GFX8-NEXT: v_lshrrev_b16_e32 v6, 1, v6
5112 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v7, v2
5113 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v6
5114 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
5115 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v5
5116 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5
5117 ; GFX8-NEXT: v_and_b32_e32 v5, 15, v5
5118 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3
5119 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1
5120 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, v5, v3
5121 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
5122 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
5123 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
5124 ; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5125 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5127 ; GFX9-LABEL: v_fshr_v4i16:
5129 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5130 ; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4
5131 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
5132 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5133 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
5134 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0
5135 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2
5136 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5
5137 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5138 ; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5
5139 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5140 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
5141 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1
5142 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3
5143 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2
5144 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5146 ; GFX10-LABEL: v_fshr_v4i16:
5148 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5149 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
5150 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
5151 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5152 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
5153 ; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5
5154 ; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6
5155 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
5156 ; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7
5157 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2
5158 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3
5159 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0
5160 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1
5161 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
5162 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
5163 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5165 ; GFX11-LABEL: v_fshr_v4i16:
5167 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5168 ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4
5169 ; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5
5170 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5171 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
5172 ; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5
5173 ; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6
5174 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
5175 ; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7
5176 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, v4, v2
5177 ; GFX11-NEXT: v_pk_lshrrev_b16 v3, v5, v3
5178 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v6, v0
5179 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
5180 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, v7, v1
5181 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5182 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5183 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
5184 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5185 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
5186 %cast.result = bitcast <4 x i16> %result to <4 x half>
5187 ret <4 x half> %cast.result
5190 define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) {
5191 ; GFX6-LABEL: s_fshr_i64:
5193 ; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 63
5194 ; GFX6-NEXT: s_andn2_b64 s[4:5], 63, s[4:5]
5195 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5196 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
5197 ; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s6
5198 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5199 ; GFX6-NEXT: ; return to shader part epilog
5201 ; GFX8-LABEL: s_fshr_i64:
5203 ; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 63
5204 ; GFX8-NEXT: s_andn2_b64 s[4:5], 63, s[4:5]
5205 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5206 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
5207 ; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s6
5208 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5209 ; GFX8-NEXT: ; return to shader part epilog
5211 ; GFX9-LABEL: s_fshr_i64:
5213 ; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 63
5214 ; GFX9-NEXT: s_andn2_b64 s[4:5], 63, s[4:5]
5215 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5216 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
5217 ; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s6
5218 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5219 ; GFX9-NEXT: ; return to shader part epilog
5221 ; GFX10-LABEL: s_fshr_i64:
5223 ; GFX10-NEXT: s_andn2_b64 s[6:7], 63, s[4:5]
5224 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5225 ; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], 63
5226 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s6
5227 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s4
5228 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5229 ; GFX10-NEXT: ; return to shader part epilog
5231 ; GFX11-LABEL: s_fshr_i64:
5233 ; GFX11-NEXT: s_and_not1_b64 s[6:7], 63, s[4:5]
5234 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5235 ; GFX11-NEXT: s_and_b64 s[4:5], s[4:5], 63
5236 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s6
5237 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s4
5238 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5239 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5240 ; GFX11-NEXT: ; return to shader part epilog
5241 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5245 define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
5246 ; GCN-LABEL: s_fshr_i64_5:
5248 ; GCN-NEXT: s_lshl_b32 s1, s0, 27
5249 ; GCN-NEXT: s_mov_b32 s0, 0
5250 ; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 5
5251 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5252 ; GCN-NEXT: ; return to shader part epilog
5254 ; GFX11-LABEL: s_fshr_i64_5:
5256 ; GFX11-NEXT: s_lshl_b32 s1, s0, 27
5257 ; GFX11-NEXT: s_mov_b32 s0, 0
5258 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 5
5259 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5260 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5261 ; GFX11-NEXT: ; return to shader part epilog
5262 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
5266 define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
5267 ; GCN-LABEL: s_fshr_i64_32:
5269 ; GCN-NEXT: s_mov_b32 s1, s0
5270 ; GCN-NEXT: s_mov_b32 s0, 0
5271 ; GCN-NEXT: s_mov_b32 s2, s3
5272 ; GCN-NEXT: s_mov_b32 s3, s0
5273 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5274 ; GCN-NEXT: ; return to shader part epilog
5276 ; GFX11-LABEL: s_fshr_i64_32:
5278 ; GFX11-NEXT: s_mov_b32 s1, s0
5279 ; GFX11-NEXT: s_mov_b32 s0, 0
5280 ; GFX11-NEXT: s_mov_b32 s2, s3
5281 ; GFX11-NEXT: s_mov_b32 s3, s0
5282 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5283 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5284 ; GFX11-NEXT: ; return to shader part epilog
5285 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
5289 define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) {
5290 ; GCN-LABEL: s_fshr_i64_48:
5292 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 16
5293 ; GCN-NEXT: s_lshr_b32 s2, s3, 16
5294 ; GCN-NEXT: s_mov_b32 s3, 0
5295 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5296 ; GCN-NEXT: ; return to shader part epilog
5298 ; GFX11-LABEL: s_fshr_i64_48:
5300 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 16
5301 ; GFX11-NEXT: s_lshr_b32 s2, s3, 16
5302 ; GFX11-NEXT: s_mov_b32 s3, 0
5303 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5304 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5305 ; GFX11-NEXT: ; return to shader part epilog
5306 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
5310 define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) {
5311 ; GFX6-LABEL: v_fshr_i64:
5313 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5314 ; GFX6-NEXT: v_and_b32_e32 v5, 63, v4
5315 ; GFX6-NEXT: v_not_b32_e32 v4, v4
5316 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
5317 ; GFX6-NEXT: v_and_b32_e32 v4, 63, v4
5318 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v4
5319 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v5
5320 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
5321 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
5322 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5324 ; GFX8-LABEL: v_fshr_i64:
5326 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5327 ; GFX8-NEXT: v_and_b32_e32 v5, 63, v4
5328 ; GFX8-NEXT: v_not_b32_e32 v4, v4
5329 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5330 ; GFX8-NEXT: v_and_b32_e32 v4, 63, v4
5331 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
5332 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3]
5333 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5334 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
5335 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5337 ; GFX9-LABEL: v_fshr_i64:
5339 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5340 ; GFX9-NEXT: v_and_b32_e32 v5, 63, v4
5341 ; GFX9-NEXT: v_not_b32_e32 v4, v4
5342 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5343 ; GFX9-NEXT: v_and_b32_e32 v4, 63, v4
5344 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
5345 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3]
5346 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5347 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v3
5348 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5350 ; GFX10-LABEL: v_fshr_i64:
5352 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5353 ; GFX10-NEXT: v_not_b32_e32 v5, v4
5354 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5355 ; GFX10-NEXT: v_and_b32_e32 v4, 63, v4
5356 ; GFX10-NEXT: v_and_b32_e32 v5, 63, v5
5357 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3]
5358 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1]
5359 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
5360 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
5361 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5363 ; GFX11-LABEL: v_fshr_i64:
5365 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5366 ; GFX11-NEXT: v_not_b32_e32 v5, v4
5367 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5368 ; GFX11-NEXT: v_and_b32_e32 v4, 63, v4
5369 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
5370 ; GFX11-NEXT: v_and_b32_e32 v5, 63, v5
5371 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3]
5372 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5373 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1]
5374 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5375 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5376 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
5377 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5378 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5382 define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) {
5383 ; GFX6-LABEL: v_fshr_i64_5:
5385 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5386 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
5387 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[2:3], 5
5388 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 27, v4
5389 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
5390 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5392 ; GFX8-LABEL: v_fshr_i64_5:
5394 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5395 ; GFX8-NEXT: v_mov_b32_e32 v4, v0
5396 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3]
5397 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 27, v4
5398 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
5399 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5401 ; GFX9-LABEL: v_fshr_i64_5:
5403 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5404 ; GFX9-NEXT: v_mov_b32_e32 v4, v0
5405 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3]
5406 ; GFX9-NEXT: v_lshl_or_b32 v1, v4, 27, v1
5407 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5409 ; GFX10-LABEL: v_fshr_i64_5:
5411 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5412 ; GFX10-NEXT: v_mov_b32_e32 v4, v0
5413 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3]
5414 ; GFX10-NEXT: v_lshl_or_b32 v1, v4, 27, v1
5415 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5417 ; GFX11-LABEL: v_fshr_i64_5:
5419 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5420 ; GFX11-NEXT: v_mov_b32_e32 v4, v0
5421 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3]
5422 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5423 ; GFX11-NEXT: v_lshl_or_b32 v1, v4, 27, v1
5424 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5425 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
5429 define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) {
5430 ; GCN-LABEL: v_fshr_i64_32:
5432 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5433 ; GCN-NEXT: v_mov_b32_e32 v1, v0
5434 ; GCN-NEXT: v_mov_b32_e32 v0, v3
5435 ; GCN-NEXT: s_setpc_b64 s[30:31]
5437 ; GFX11-LABEL: v_fshr_i64_32:
5439 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5440 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
5441 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5442 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
5446 define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) {
5447 ; GFX6-LABEL: v_fshr_i64_48:
5449 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5450 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 16
5451 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v3
5452 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
5453 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5455 ; GFX8-LABEL: v_fshr_i64_48:
5457 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5458 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
5459 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5460 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5462 ; GFX9-LABEL: v_fshr_i64_48:
5464 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5465 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
5466 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5467 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5469 ; GFX10-LABEL: v_fshr_i64_48:
5471 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5472 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
5473 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5474 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5476 ; GFX11-LABEL: v_fshr_i64_48:
5478 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5479 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
5480 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v3
5481 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5482 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5483 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5484 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
5488 define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) {
5489 ; GFX6-LABEL: v_fshr_i64_ssv:
5491 ; GFX6-NEXT: v_and_b32_e32 v2, 63, v0
5492 ; GFX6-NEXT: v_not_b32_e32 v0, v0
5493 ; GFX6-NEXT: v_and_b32_e32 v0, 63, v0
5494 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5495 ; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v0
5496 ; GFX6-NEXT: v_lshr_b64 v[2:3], s[2:3], v2
5497 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
5498 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
5499 ; GFX6-NEXT: ; return to shader part epilog
5501 ; GFX8-LABEL: v_fshr_i64_ssv:
5503 ; GFX8-NEXT: v_and_b32_e32 v2, 63, v0
5504 ; GFX8-NEXT: v_not_b32_e32 v0, v0
5505 ; GFX8-NEXT: v_and_b32_e32 v0, 63, v0
5506 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5507 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
5508 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3]
5509 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5510 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
5511 ; GFX8-NEXT: ; return to shader part epilog
5513 ; GFX9-LABEL: v_fshr_i64_ssv:
5515 ; GFX9-NEXT: v_and_b32_e32 v2, 63, v0
5516 ; GFX9-NEXT: v_not_b32_e32 v0, v0
5517 ; GFX9-NEXT: v_and_b32_e32 v0, 63, v0
5518 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5519 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
5520 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3]
5521 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5522 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v3
5523 ; GFX9-NEXT: ; return to shader part epilog
5525 ; GFX10-LABEL: v_fshr_i64_ssv:
5527 ; GFX10-NEXT: v_not_b32_e32 v1, v0
5528 ; GFX10-NEXT: v_and_b32_e32 v0, 63, v0
5529 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5530 ; GFX10-NEXT: v_and_b32_e32 v2, 63, v1
5531 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3]
5532 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1]
5533 ; GFX10-NEXT: v_or_b32_e32 v0, v2, v0
5534 ; GFX10-NEXT: v_or_b32_e32 v1, v3, v1
5535 ; GFX10-NEXT: ; return to shader part epilog
5537 ; GFX11-LABEL: v_fshr_i64_ssv:
5539 ; GFX11-NEXT: v_not_b32_e32 v1, v0
5540 ; GFX11-NEXT: v_and_b32_e32 v0, 63, v0
5541 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5542 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5543 ; GFX11-NEXT: v_and_b32_e32 v2, 63, v1
5544 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3]
5545 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5546 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1]
5547 ; GFX11-NEXT: v_or_b32_e32 v0, v2, v0
5548 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5549 ; GFX11-NEXT: v_or_b32_e32 v1, v3, v1
5550 ; GFX11-NEXT: ; return to shader part epilog
5551 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5552 %cast = bitcast i64 %result to <2 x float>
5553 ret <2 x float> %cast
5556 define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) {
5557 ; GFX6-LABEL: v_fshr_i64_svs:
5559 ; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63
5560 ; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5561 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5562 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s4
5563 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5564 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
5565 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
5566 ; GFX6-NEXT: ; return to shader part epilog
5568 ; GFX8-LABEL: v_fshr_i64_svs:
5570 ; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63
5571 ; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5572 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5573 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
5574 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5575 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
5576 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v1
5577 ; GFX8-NEXT: ; return to shader part epilog
5579 ; GFX9-LABEL: v_fshr_i64_svs:
5581 ; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63
5582 ; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5583 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5584 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
5585 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5586 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
5587 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v1
5588 ; GFX9-NEXT: ; return to shader part epilog
5590 ; GFX10-LABEL: v_fshr_i64_svs:
5592 ; GFX10-NEXT: s_and_b64 s[4:5], s[2:3], 63
5593 ; GFX10-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5594 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
5595 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5596 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5597 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
5598 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v1
5599 ; GFX10-NEXT: ; return to shader part epilog
5601 ; GFX11-LABEL: v_fshr_i64_svs:
5603 ; GFX11-NEXT: s_and_b64 s[4:5], s[2:3], 63
5604 ; GFX11-NEXT: s_and_not1_b64 s[2:3], 63, s[2:3]
5605 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
5606 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5607 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5608 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5609 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
5610 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
5611 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5612 ; GFX11-NEXT: v_or_b32_e32 v1, s1, v1
5613 ; GFX11-NEXT: ; return to shader part epilog
5614 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5615 %cast = bitcast i64 %result to <2 x float>
5616 ret <2 x float> %cast
5619 define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) {
5620 ; GFX6-LABEL: v_fshr_i64_vss:
5622 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
5623 ; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63
5624 ; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5625 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s2
5626 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s4
5627 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
5628 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
5629 ; GFX6-NEXT: ; return to shader part epilog
5631 ; GFX8-LABEL: v_fshr_i64_vss:
5633 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5634 ; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63
5635 ; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5636 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1]
5637 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s4
5638 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
5639 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v1
5640 ; GFX8-NEXT: ; return to shader part epilog
5642 ; GFX9-LABEL: v_fshr_i64_vss:
5644 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5645 ; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63
5646 ; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5647 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1]
5648 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s4
5649 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
5650 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v1
5651 ; GFX9-NEXT: ; return to shader part epilog
5653 ; GFX10-LABEL: v_fshr_i64_vss:
5655 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5656 ; GFX10-NEXT: s_andn2_b64 s[4:5], 63, s[2:3]
5657 ; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], 63
5658 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5659 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
5660 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
5661 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v1
5662 ; GFX10-NEXT: ; return to shader part epilog
5664 ; GFX11-LABEL: v_fshr_i64_vss:
5666 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5667 ; GFX11-NEXT: s_and_not1_b64 s[4:5], 63, s[2:3]
5668 ; GFX11-NEXT: s_and_b64 s[2:3], s[2:3], 63
5669 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5670 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5671 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
5672 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5673 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
5674 ; GFX11-NEXT: v_or_b32_e32 v1, s1, v1
5675 ; GFX11-NEXT: ; return to shader part epilog
5676 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5677 %cast = bitcast i64 %result to <2 x float>
5678 ret <2 x float> %cast
5681 define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) {
5682 ; GFX6-LABEL: s_fshr_v2i64:
5684 ; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], 63
5685 ; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
5686 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5687 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s8
5688 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
5689 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5690 ; GFX6-NEXT: s_and_b64 s[4:5], s[10:11], 63
5691 ; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5692 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5693 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5694 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], s4
5695 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5696 ; GFX6-NEXT: ; return to shader part epilog
5698 ; GFX8-LABEL: s_fshr_v2i64:
5700 ; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], 63
5701 ; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
5702 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5703 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s8
5704 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
5705 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5706 ; GFX8-NEXT: s_and_b64 s[4:5], s[10:11], 63
5707 ; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5708 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5709 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5710 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], s4
5711 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5712 ; GFX8-NEXT: ; return to shader part epilog
5714 ; GFX9-LABEL: s_fshr_v2i64:
5716 ; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], 63
5717 ; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
5718 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5719 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s8
5720 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
5721 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5722 ; GFX9-NEXT: s_and_b64 s[4:5], s[10:11], 63
5723 ; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5724 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5725 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5726 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], s4
5727 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5728 ; GFX9-NEXT: ; return to shader part epilog
5730 ; GFX10-LABEL: s_fshr_v2i64:
5732 ; GFX10-NEXT: s_andn2_b64 s[12:13], 63, s[8:9]
5733 ; GFX10-NEXT: s_and_b64 s[8:9], s[8:9], 63
5734 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5735 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5736 ; GFX10-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5737 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5738 ; GFX10-NEXT: s_and_b64 s[10:11], s[10:11], 63
5739 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
5740 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5741 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s10
5742 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5743 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
5744 ; GFX10-NEXT: ; return to shader part epilog
5746 ; GFX11-LABEL: s_fshr_v2i64:
5748 ; GFX11-NEXT: s_and_not1_b64 s[12:13], 63, s[8:9]
5749 ; GFX11-NEXT: s_and_b64 s[8:9], s[8:9], 63
5750 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5751 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5752 ; GFX11-NEXT: s_and_not1_b64 s[8:9], 63, s[10:11]
5753 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5754 ; GFX11-NEXT: s_and_b64 s[10:11], s[10:11], 63
5755 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
5756 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5757 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s10
5758 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5759 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
5760 ; GFX11-NEXT: ; return to shader part epilog
5761 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5762 ret <2 x i64> %result
5765 define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) {
5766 ; GFX6-LABEL: v_fshr_v2i64:
5768 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5769 ; GFX6-NEXT: v_and_b32_e32 v9, 63, v8
5770 ; GFX6-NEXT: v_not_b32_e32 v8, v8
5771 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
5772 ; GFX6-NEXT: v_and_b32_e32 v8, 63, v8
5773 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v8
5774 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v9
5775 ; GFX6-NEXT: v_not_b32_e32 v8, v10
5776 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
5777 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
5778 ; GFX6-NEXT: v_and_b32_e32 v4, 63, v10
5779 ; GFX6-NEXT: v_and_b32_e32 v8, 63, v8
5780 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v8
5781 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v4
5782 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v5
5783 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v6
5784 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v7
5785 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5787 ; GFX8-LABEL: v_fshr_v2i64:
5789 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5790 ; GFX8-NEXT: v_and_b32_e32 v9, 63, v8
5791 ; GFX8-NEXT: v_not_b32_e32 v8, v8
5792 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5793 ; GFX8-NEXT: v_and_b32_e32 v8, 63, v8
5794 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
5795 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5]
5796 ; GFX8-NEXT: v_not_b32_e32 v8, v10
5797 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
5798 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
5799 ; GFX8-NEXT: v_and_b32_e32 v4, 63, v10
5800 ; GFX8-NEXT: v_and_b32_e32 v8, 63, v8
5801 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3]
5802 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7]
5803 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v5
5804 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
5805 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v7
5806 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5808 ; GFX9-LABEL: v_fshr_v2i64:
5810 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5811 ; GFX9-NEXT: v_and_b32_e32 v9, 63, v8
5812 ; GFX9-NEXT: v_not_b32_e32 v8, v8
5813 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5814 ; GFX9-NEXT: v_and_b32_e32 v8, 63, v8
5815 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
5816 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5]
5817 ; GFX9-NEXT: v_not_b32_e32 v8, v10
5818 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
5819 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v4
5820 ; GFX9-NEXT: v_and_b32_e32 v4, 63, v10
5821 ; GFX9-NEXT: v_and_b32_e32 v8, 63, v8
5822 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3]
5823 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7]
5824 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v5
5825 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v6
5826 ; GFX9-NEXT: v_or_b32_e32 v3, v3, v7
5827 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5829 ; GFX10-LABEL: v_fshr_v2i64:
5831 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5832 ; GFX10-NEXT: v_not_b32_e32 v9, v8
5833 ; GFX10-NEXT: v_not_b32_e32 v11, v10
5834 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5835 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
5836 ; GFX10-NEXT: v_and_b32_e32 v8, 63, v8
5837 ; GFX10-NEXT: v_and_b32_e32 v9, 63, v9
5838 ; GFX10-NEXT: v_and_b32_e32 v11, 63, v11
5839 ; GFX10-NEXT: v_and_b32_e32 v10, 63, v10
5840 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5]
5841 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1]
5842 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], v11, v[2:3]
5843 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], v10, v[6:7]
5844 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v4
5845 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v5
5846 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v6
5847 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v7
5848 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5850 ; GFX11-LABEL: v_fshr_v2i64:
5852 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5853 ; GFX11-NEXT: v_not_b32_e32 v9, v8
5854 ; GFX11-NEXT: v_not_b32_e32 v11, v10
5855 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5856 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
5857 ; GFX11-NEXT: v_and_b32_e32 v8, 63, v8
5858 ; GFX11-NEXT: v_and_b32_e32 v9, 63, v9
5859 ; GFX11-NEXT: v_and_b32_e32 v11, 63, v11
5860 ; GFX11-NEXT: v_and_b32_e32 v10, 63, v10
5861 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5862 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5]
5863 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1]
5864 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5865 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], v11, v[2:3]
5866 ; GFX11-NEXT: v_lshrrev_b64 v[6:7], v10, v[6:7]
5867 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5868 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
5869 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v5
5870 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5871 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v6
5872 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v7
5873 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5874 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5875 ret <2 x i64> %result
5878 define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) {
5879 ; GFX6-LABEL: s_fshr_i128:
5881 ; GFX6-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5882 ; GFX6-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5883 ; GFX6-NEXT: s_lshl_b64 s[12:13], s[0:1], 1
5884 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5885 ; GFX6-NEXT: s_lshr_b32 s0, s1, 31
5886 ; GFX6-NEXT: s_mov_b32 s1, 0
5887 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
5888 ; GFX6-NEXT: s_sub_i32 s11, s8, 64
5889 ; GFX6-NEXT: s_sub_i32 s9, 64, s8
5890 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64
5891 ; GFX6-NEXT: s_cselect_b32 s16, 1, 0
5892 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0
5893 ; GFX6-NEXT: s_cselect_b32 s17, 1, 0
5894 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[12:13], s8
5895 ; GFX6-NEXT: s_lshr_b64 s[14:15], s[12:13], s9
5896 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
5897 ; GFX6-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9]
5898 ; GFX6-NEXT: s_lshl_b64 s[12:13], s[12:13], s11
5899 ; GFX6-NEXT: s_cmp_lg_u32 s16, 0
5900 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
5901 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[12:13]
5902 ; GFX6-NEXT: s_cmp_lg_u32 s17, 0
5903 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9]
5904 ; GFX6-NEXT: s_sub_i32 s14, s10, 64
5905 ; GFX6-NEXT: s_sub_i32 s12, 64, s10
5906 ; GFX6-NEXT: s_cmp_lt_u32 s10, 64
5907 ; GFX6-NEXT: s_cselect_b32 s15, 1, 0
5908 ; GFX6-NEXT: s_cmp_eq_u32 s10, 0
5909 ; GFX6-NEXT: s_cselect_b32 s16, 1, 0
5910 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], s10
5911 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
5912 ; GFX6-NEXT: s_lshl_b64 s[12:13], s[6:7], s12
5913 ; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
5914 ; GFX6-NEXT: s_lshr_b64 s[6:7], s[6:7], s14
5915 ; GFX6-NEXT: s_cmp_lg_u32 s15, 0
5916 ; GFX6-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7]
5917 ; GFX6-NEXT: s_cmp_lg_u32 s16, 0
5918 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
5919 ; GFX6-NEXT: s_cmp_lg_u32 s15, 0
5920 ; GFX6-NEXT: s_cselect_b64 s[6:7], s[0:1], 0
5921 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
5922 ; GFX6-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7]
5923 ; GFX6-NEXT: ; return to shader part epilog
5925 ; GFX8-LABEL: s_fshr_i128:
5927 ; GFX8-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5928 ; GFX8-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5929 ; GFX8-NEXT: s_lshl_b64 s[12:13], s[0:1], 1
5930 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5931 ; GFX8-NEXT: s_lshr_b32 s0, s1, 31
5932 ; GFX8-NEXT: s_mov_b32 s1, 0
5933 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
5934 ; GFX8-NEXT: s_sub_i32 s11, s8, 64
5935 ; GFX8-NEXT: s_sub_i32 s9, 64, s8
5936 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64
5937 ; GFX8-NEXT: s_cselect_b32 s16, 1, 0
5938 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0
5939 ; GFX8-NEXT: s_cselect_b32 s17, 1, 0
5940 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[12:13], s8
5941 ; GFX8-NEXT: s_lshr_b64 s[14:15], s[12:13], s9
5942 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
5943 ; GFX8-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9]
5944 ; GFX8-NEXT: s_lshl_b64 s[12:13], s[12:13], s11
5945 ; GFX8-NEXT: s_cmp_lg_u32 s16, 0
5946 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
5947 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[12:13]
5948 ; GFX8-NEXT: s_cmp_lg_u32 s17, 0
5949 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9]
5950 ; GFX8-NEXT: s_sub_i32 s14, s10, 64
5951 ; GFX8-NEXT: s_sub_i32 s12, 64, s10
5952 ; GFX8-NEXT: s_cmp_lt_u32 s10, 64
5953 ; GFX8-NEXT: s_cselect_b32 s15, 1, 0
5954 ; GFX8-NEXT: s_cmp_eq_u32 s10, 0
5955 ; GFX8-NEXT: s_cselect_b32 s16, 1, 0
5956 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], s10
5957 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
5958 ; GFX8-NEXT: s_lshl_b64 s[12:13], s[6:7], s12
5959 ; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
5960 ; GFX8-NEXT: s_lshr_b64 s[6:7], s[6:7], s14
5961 ; GFX8-NEXT: s_cmp_lg_u32 s15, 0
5962 ; GFX8-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7]
5963 ; GFX8-NEXT: s_cmp_lg_u32 s16, 0
5964 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
5965 ; GFX8-NEXT: s_cmp_lg_u32 s15, 0
5966 ; GFX8-NEXT: s_cselect_b64 s[6:7], s[0:1], 0
5967 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
5968 ; GFX8-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7]
5969 ; GFX8-NEXT: ; return to shader part epilog
5971 ; GFX9-LABEL: s_fshr_i128:
5973 ; GFX9-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5974 ; GFX9-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5975 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[0:1], 1
5976 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5977 ; GFX9-NEXT: s_lshr_b32 s0, s1, 31
5978 ; GFX9-NEXT: s_mov_b32 s1, 0
5979 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
5980 ; GFX9-NEXT: s_sub_i32 s11, s8, 64
5981 ; GFX9-NEXT: s_sub_i32 s9, 64, s8
5982 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64
5983 ; GFX9-NEXT: s_cselect_b32 s16, 1, 0
5984 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0
5985 ; GFX9-NEXT: s_cselect_b32 s17, 1, 0
5986 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[12:13], s8
5987 ; GFX9-NEXT: s_lshr_b64 s[14:15], s[12:13], s9
5988 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
5989 ; GFX9-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9]
5990 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[12:13], s11
5991 ; GFX9-NEXT: s_cmp_lg_u32 s16, 0
5992 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
5993 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[12:13]
5994 ; GFX9-NEXT: s_cmp_lg_u32 s17, 0
5995 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9]
5996 ; GFX9-NEXT: s_sub_i32 s14, s10, 64
5997 ; GFX9-NEXT: s_sub_i32 s12, 64, s10
5998 ; GFX9-NEXT: s_cmp_lt_u32 s10, 64
5999 ; GFX9-NEXT: s_cselect_b32 s15, 1, 0
6000 ; GFX9-NEXT: s_cmp_eq_u32 s10, 0
6001 ; GFX9-NEXT: s_cselect_b32 s16, 1, 0
6002 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], s10
6003 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
6004 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[6:7], s12
6005 ; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
6006 ; GFX9-NEXT: s_lshr_b64 s[6:7], s[6:7], s14
6007 ; GFX9-NEXT: s_cmp_lg_u32 s15, 0
6008 ; GFX9-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7]
6009 ; GFX9-NEXT: s_cmp_lg_u32 s16, 0
6010 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
6011 ; GFX9-NEXT: s_cmp_lg_u32 s15, 0
6012 ; GFX9-NEXT: s_cselect_b64 s[6:7], s[0:1], 0
6013 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
6014 ; GFX9-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7]
6015 ; GFX9-NEXT: ; return to shader part epilog
6017 ; GFX10-LABEL: s_fshr_i128:
6019 ; GFX10-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
6020 ; GFX10-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
6021 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6022 ; GFX10-NEXT: s_lshr_b32 s12, s1, 31
6023 ; GFX10-NEXT: s_mov_b32 s13, 0
6024 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
6025 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[12:13]
6026 ; GFX10-NEXT: s_sub_i32 s11, s8, 64
6027 ; GFX10-NEXT: s_sub_i32 s9, 64, s8
6028 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64
6029 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0
6030 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0
6031 ; GFX10-NEXT: s_cselect_b32 s17, 1, 0
6032 ; GFX10-NEXT: s_lshr_b64 s[12:13], s[0:1], s9
6033 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], s8
6034 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
6035 ; GFX10-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15]
6036 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s11
6037 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0
6038 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
6039 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1]
6040 ; GFX10-NEXT: s_cmp_lg_u32 s17, 0
6041 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6042 ; GFX10-NEXT: s_sub_i32 s14, s10, 64
6043 ; GFX10-NEXT: s_sub_i32 s11, 64, s10
6044 ; GFX10-NEXT: s_cmp_lt_u32 s10, 64
6045 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0
6046 ; GFX10-NEXT: s_cmp_eq_u32 s10, 0
6047 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0
6048 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], s10
6049 ; GFX10-NEXT: s_lshl_b64 s[12:13], s[6:7], s11
6050 ; GFX10-NEXT: s_lshr_b64 s[10:11], s[6:7], s10
6051 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13]
6052 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s14
6053 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0
6054 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7]
6055 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0
6056 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
6057 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0
6058 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[10:11], 0
6059 ; GFX10-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
6060 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6061 ; GFX10-NEXT: ; return to shader part epilog
6063 ; GFX11-LABEL: s_fshr_i128:
6065 ; GFX11-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
6066 ; GFX11-NEXT: s_and_not1_b64 s[8:9], 0x7f, s[8:9]
6067 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6068 ; GFX11-NEXT: s_lshr_b32 s12, s1, 31
6069 ; GFX11-NEXT: s_mov_b32 s13, 0
6070 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
6071 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[12:13]
6072 ; GFX11-NEXT: s_sub_i32 s11, s8, 64
6073 ; GFX11-NEXT: s_sub_i32 s9, 64, s8
6074 ; GFX11-NEXT: s_cmp_lt_u32 s8, 64
6075 ; GFX11-NEXT: s_cselect_b32 s16, 1, 0
6076 ; GFX11-NEXT: s_cmp_eq_u32 s8, 0
6077 ; GFX11-NEXT: s_cselect_b32 s17, 1, 0
6078 ; GFX11-NEXT: s_lshr_b64 s[12:13], s[0:1], s9
6079 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], s8
6080 ; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
6081 ; GFX11-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15]
6082 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s11
6083 ; GFX11-NEXT: s_cmp_lg_u32 s16, 0
6084 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
6085 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1]
6086 ; GFX11-NEXT: s_cmp_lg_u32 s17, 0
6087 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6088 ; GFX11-NEXT: s_sub_i32 s14, s10, 64
6089 ; GFX11-NEXT: s_sub_i32 s11, 64, s10
6090 ; GFX11-NEXT: s_cmp_lt_u32 s10, 64
6091 ; GFX11-NEXT: s_cselect_b32 s15, 1, 0
6092 ; GFX11-NEXT: s_cmp_eq_u32 s10, 0
6093 ; GFX11-NEXT: s_cselect_b32 s16, 1, 0
6094 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[4:5], s10
6095 ; GFX11-NEXT: s_lshl_b64 s[12:13], s[6:7], s11
6096 ; GFX11-NEXT: s_lshr_b64 s[10:11], s[6:7], s10
6097 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13]
6098 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s14
6099 ; GFX11-NEXT: s_cmp_lg_u32 s15, 0
6100 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7]
6101 ; GFX11-NEXT: s_cmp_lg_u32 s16, 0
6102 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
6103 ; GFX11-NEXT: s_cmp_lg_u32 s15, 0
6104 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[10:11], 0
6105 ; GFX11-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
6106 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6107 ; GFX11-NEXT: ; return to shader part epilog
6108 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6112 define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
6113 ; GFX6-LABEL: v_fshr_i128:
6115 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6116 ; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v8
6117 ; GFX6-NEXT: v_not_b32_e32 v8, v8
6118 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
6119 ; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v8
6120 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], 1
6121 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6122 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v0
6123 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v15
6124 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[8:9], v0
6125 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], v15
6126 ; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v15
6127 ; GFX6-NEXT: v_lshl_b64 v[12:13], v[8:9], v15
6128 ; GFX6-NEXT: v_or_b32_e32 v10, v0, v10
6129 ; GFX6-NEXT: v_or_b32_e32 v11, v1, v11
6130 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[8:9], v16
6131 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
6132 ; GFX6-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc
6133 ; GFX6-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc
6134 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
6135 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
6136 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15
6137 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc
6138 ; GFX6-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc
6139 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v14
6140 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], v14
6141 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[6:7], v2
6142 ; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14
6143 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
6144 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
6145 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], v15
6146 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[6:7], v14
6147 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
6148 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6149 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6150 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
6151 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6152 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6153 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6154 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6155 ; GFX6-NEXT: v_or_b32_e32 v0, v12, v0
6156 ; GFX6-NEXT: v_or_b32_e32 v1, v13, v1
6157 ; GFX6-NEXT: v_or_b32_e32 v2, v10, v2
6158 ; GFX6-NEXT: v_or_b32_e32 v3, v11, v3
6159 ; GFX6-NEXT: s_setpc_b64 s[30:31]
6161 ; GFX8-LABEL: v_fshr_i128:
6163 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6164 ; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v8
6165 ; GFX8-NEXT: v_not_b32_e32 v8, v8
6166 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6167 ; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v8
6168 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1]
6169 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6170 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v0
6171 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v15
6172 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9]
6173 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3]
6174 ; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v15
6175 ; GFX8-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9]
6176 ; GFX8-NEXT: v_or_b32_e32 v10, v0, v10
6177 ; GFX8-NEXT: v_or_b32_e32 v11, v1, v11
6178 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[8:9]
6179 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
6180 ; GFX8-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc
6181 ; GFX8-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc
6182 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
6183 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
6184 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15
6185 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc
6186 ; GFX8-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc
6187 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v14
6188 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5]
6189 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
6190 ; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14
6191 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
6192 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
6193 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7]
6194 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7]
6195 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
6196 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6197 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6198 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
6199 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6200 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6201 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6202 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6203 ; GFX8-NEXT: v_or_b32_e32 v0, v12, v0
6204 ; GFX8-NEXT: v_or_b32_e32 v1, v13, v1
6205 ; GFX8-NEXT: v_or_b32_e32 v2, v10, v2
6206 ; GFX8-NEXT: v_or_b32_e32 v3, v11, v3
6207 ; GFX8-NEXT: s_setpc_b64 s[30:31]
6209 ; GFX9-LABEL: v_fshr_i128:
6211 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6212 ; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v8
6213 ; GFX9-NEXT: v_not_b32_e32 v8, v8
6214 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6215 ; GFX9-NEXT: v_and_b32_e32 v15, 0x7f, v8
6216 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1]
6217 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6218 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v0
6219 ; GFX9-NEXT: v_sub_u32_e32 v0, 64, v15
6220 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9]
6221 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3]
6222 ; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v15
6223 ; GFX9-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9]
6224 ; GFX9-NEXT: v_or_b32_e32 v10, v0, v10
6225 ; GFX9-NEXT: v_or_b32_e32 v11, v1, v11
6226 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[8:9]
6227 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
6228 ; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc
6229 ; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc
6230 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
6231 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
6232 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15
6233 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc
6234 ; GFX9-NEXT: v_sub_u32_e32 v2, 64, v14
6235 ; GFX9-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc
6236 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5]
6237 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
6238 ; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14
6239 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
6240 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
6241 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7]
6242 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7]
6243 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
6244 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6245 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6246 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
6247 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6248 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6249 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6250 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6251 ; GFX9-NEXT: v_or_b32_e32 v0, v12, v0
6252 ; GFX9-NEXT: v_or_b32_e32 v1, v13, v1
6253 ; GFX9-NEXT: v_or_b32_e32 v2, v10, v2
6254 ; GFX9-NEXT: v_or_b32_e32 v3, v11, v3
6255 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6257 ; GFX10-LABEL: v_fshr_i128:
6259 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6260 ; GFX10-NEXT: v_not_b32_e32 v9, v8
6261 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6262 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 31, v1
6263 ; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v8
6264 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
6265 ; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v9
6266 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v10
6267 ; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19
6268 ; GFX10-NEXT: v_subrev_nc_u32_e32 v21, 64, v19
6269 ; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18
6270 ; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
6271 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
6272 ; GFX10-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5]
6273 ; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
6274 ; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1]
6275 ; GFX10-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1]
6276 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
6277 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
6278 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19
6279 ; GFX10-NEXT: v_or_b32_e32 v12, v12, v16
6280 ; GFX10-NEXT: v_or_b32_e32 v10, v10, v8
6281 ; GFX10-NEXT: v_or_b32_e32 v11, v11, v9
6282 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7]
6283 ; GFX10-NEXT: v_or_b32_e32 v13, v13, v17
6284 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19
6285 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo
6286 ; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo
6287 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7]
6288 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v12, s4
6289 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18
6290 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v13, s4
6291 ; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo
6292 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo
6293 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5
6294 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6
6295 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6
6296 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, v5, s5
6297 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v0, s4
6298 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v1, s4
6299 ; GFX10-NEXT: v_or_b32_e32 v0, v14, v4
6300 ; GFX10-NEXT: v_or_b32_e32 v1, v7, v5
6301 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v6
6302 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v8
6303 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6305 ; GFX11-LABEL: v_fshr_i128:
6307 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6308 ; GFX11-NEXT: v_not_b32_e32 v9, v8
6309 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 31, v1
6310 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
6311 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6312 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
6313 ; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v9
6314 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v10
6315 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
6316 ; GFX11-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1]
6317 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
6318 ; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v8
6319 ; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18
6320 ; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
6321 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
6322 ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo
6323 ; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19
6324 ; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1]
6325 ; GFX11-NEXT: v_subrev_nc_u32_e32 v21, 64, v19
6326 ; GFX11-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5]
6327 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
6328 ; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
6329 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19
6330 ; GFX11-NEXT: v_or_b32_e32 v10, v10, v8
6331 ; GFX11-NEXT: v_or_b32_e32 v11, v11, v9
6332 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7]
6333 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19
6334 ; GFX11-NEXT: v_or_b32_e32 v12, v12, v16
6335 ; GFX11-NEXT: v_or_b32_e32 v13, v13, v17
6336 ; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
6337 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7]
6338 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo
6339 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v12, s0
6340 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18
6341 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v9, v13, s0
6342 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
6343 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
6344 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2
6345 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2
6346 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
6347 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v6, v5, s1
6348 ; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v0, s0
6349 ; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v1, s0
6350 ; GFX11-NEXT: v_or_b32_e32 v0, v14, v4
6351 ; GFX11-NEXT: v_or_b32_e32 v1, v7, v5
6352 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6353 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v6
6354 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v8
6355 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6356 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6360 define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) {
6361 ; GFX6-LABEL: v_fshr_i128_ssv:
6363 ; GFX6-NEXT: v_and_b32_e32 v6, 0x7f, v0
6364 ; GFX6-NEXT: v_not_b32_e32 v0, v0
6365 ; GFX6-NEXT: s_mov_b32 s9, 0
6366 ; GFX6-NEXT: v_and_b32_e32 v7, 0x7f, v0
6367 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6368 ; GFX6-NEXT: s_lshr_b32 s8, s1, 31
6369 ; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1
6370 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
6371 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v7
6372 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[10:11], v0
6373 ; GFX6-NEXT: v_lshl_b64 v[2:3], s[0:1], v7
6374 ; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v7
6375 ; GFX6-NEXT: v_lshl_b64 v[4:5], s[10:11], v7
6376 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
6377 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
6378 ; GFX6-NEXT: v_lshl_b64 v[0:1], s[10:11], v8
6379 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
6380 ; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
6381 ; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
6382 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6383 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6384 ; GFX6-NEXT: v_mov_b32_e32 v2, s0
6385 ; GFX6-NEXT: v_mov_b32_e32 v3, s1
6386 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
6387 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc
6388 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
6389 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v6
6390 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v6
6391 ; GFX6-NEXT: v_lshl_b64 v[2:3], s[6:7], v2
6392 ; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v6
6393 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
6394 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
6395 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v11
6396 ; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v6
6397 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
6398 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6399 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6400 ; GFX6-NEXT: v_mov_b32_e32 v2, s4
6401 ; GFX6-NEXT: v_mov_b32_e32 v3, s5
6402 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
6403 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6404 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6405 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
6406 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
6407 ; GFX6-NEXT: v_or_b32_e32 v0, v8, v0
6408 ; GFX6-NEXT: v_or_b32_e32 v1, v9, v1
6409 ; GFX6-NEXT: v_or_b32_e32 v2, v7, v2
6410 ; GFX6-NEXT: v_or_b32_e32 v3, v10, v3
6411 ; GFX6-NEXT: ; return to shader part epilog
6413 ; GFX8-LABEL: v_fshr_i128_ssv:
6415 ; GFX8-NEXT: v_and_b32_e32 v6, 0x7f, v0
6416 ; GFX8-NEXT: v_not_b32_e32 v0, v0
6417 ; GFX8-NEXT: s_mov_b32 s9, 0
6418 ; GFX8-NEXT: v_and_b32_e32 v7, 0x7f, v0
6419 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6420 ; GFX8-NEXT: s_lshr_b32 s8, s1, 31
6421 ; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1
6422 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
6423 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v7
6424 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[10:11]
6425 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1]
6426 ; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v7
6427 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], v7, s[10:11]
6428 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
6429 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
6430 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, s[10:11]
6431 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
6432 ; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
6433 ; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
6434 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6435 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6436 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
6437 ; GFX8-NEXT: v_mov_b32_e32 v3, s1
6438 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
6439 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc
6440 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
6441 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v6
6442 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5]
6443 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7]
6444 ; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v6
6445 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
6446 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
6447 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7]
6448 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7]
6449 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
6450 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6451 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6452 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
6453 ; GFX8-NEXT: v_mov_b32_e32 v3, s5
6454 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
6455 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6456 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6457 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
6458 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
6459 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
6460 ; GFX8-NEXT: v_or_b32_e32 v1, v9, v1
6461 ; GFX8-NEXT: v_or_b32_e32 v2, v7, v2
6462 ; GFX8-NEXT: v_or_b32_e32 v3, v10, v3
6463 ; GFX8-NEXT: ; return to shader part epilog
6465 ; GFX9-LABEL: v_fshr_i128_ssv:
6467 ; GFX9-NEXT: v_and_b32_e32 v6, 0x7f, v0
6468 ; GFX9-NEXT: v_not_b32_e32 v0, v0
6469 ; GFX9-NEXT: s_mov_b32 s9, 0
6470 ; GFX9-NEXT: v_and_b32_e32 v7, 0x7f, v0
6471 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6472 ; GFX9-NEXT: s_lshr_b32 s8, s1, 31
6473 ; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1
6474 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
6475 ; GFX9-NEXT: v_sub_u32_e32 v0, 64, v7
6476 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[10:11]
6477 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1]
6478 ; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v7
6479 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v7, s[10:11]
6480 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
6481 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
6482 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, s[10:11]
6483 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
6484 ; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
6485 ; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
6486 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6487 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6488 ; GFX9-NEXT: v_mov_b32_e32 v2, s0
6489 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
6490 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
6491 ; GFX9-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc
6492 ; GFX9-NEXT: v_sub_u32_e32 v2, 64, v6
6493 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
6494 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5]
6495 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7]
6496 ; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v6
6497 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
6498 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
6499 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7]
6500 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7]
6501 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
6502 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6503 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6504 ; GFX9-NEXT: v_mov_b32_e32 v2, s4
6505 ; GFX9-NEXT: v_mov_b32_e32 v3, s5
6506 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
6507 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6508 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6509 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
6510 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
6511 ; GFX9-NEXT: v_or_b32_e32 v0, v8, v0
6512 ; GFX9-NEXT: v_or_b32_e32 v1, v9, v1
6513 ; GFX9-NEXT: v_or_b32_e32 v2, v7, v2
6514 ; GFX9-NEXT: v_or_b32_e32 v3, v10, v3
6515 ; GFX9-NEXT: ; return to shader part epilog
6517 ; GFX10-LABEL: v_fshr_i128_ssv:
6519 ; GFX10-NEXT: v_not_b32_e32 v1, v0
6520 ; GFX10-NEXT: v_and_b32_e32 v13, 0x7f, v0
6521 ; GFX10-NEXT: s_mov_b32 s9, 0
6522 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6523 ; GFX10-NEXT: s_lshr_b32 s8, s1, 31
6524 ; GFX10-NEXT: v_and_b32_e32 v12, 0x7f, v1
6525 ; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13
6526 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
6527 ; GFX10-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9]
6528 ; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 64, v13
6529 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v12
6530 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9]
6531 ; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
6532 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5]
6533 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
6534 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
6535 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12
6536 ; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
6537 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], v12, s[0:1]
6538 ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13
6539 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v8
6540 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v0
6541 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v1
6542 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7]
6543 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v9
6544 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 0, v13
6545 ; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6546 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6547 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7]
6548 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
6549 ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v12
6550 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0
6551 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6552 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo
6553 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1
6554 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, s8, s2
6555 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, s9, s2
6556 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1
6557 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0
6558 ; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0
6559 ; GFX10-NEXT: v_or_b32_e32 v0, v6, v0
6560 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1
6561 ; GFX10-NEXT: v_or_b32_e32 v2, v5, v2
6562 ; GFX10-NEXT: v_or_b32_e32 v3, v7, v3
6563 ; GFX10-NEXT: ; return to shader part epilog
6565 ; GFX11-LABEL: v_fshr_i128_ssv:
6567 ; GFX11-NEXT: v_not_b32_e32 v1, v0
6568 ; GFX11-NEXT: s_lshr_b32 s8, s1, 31
6569 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
6570 ; GFX11-NEXT: s_mov_b32 s9, 0
6571 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6572 ; GFX11-NEXT: v_and_b32_e32 v12, 0x7f, v1
6573 ; GFX11-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9]
6574 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6575 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], v12, s[0:1]
6576 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12
6577 ; GFX11-NEXT: v_and_b32_e32 v13, 0x7f, v0
6578 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v12
6579 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9]
6580 ; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
6581 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6582 ; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13
6583 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
6584 ; GFX11-NEXT: v_subrev_nc_u32_e32 v14, 64, v13
6585 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5]
6586 ; GFX11-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
6587 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
6588 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v13
6589 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v0
6590 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v1
6591 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7]
6592 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v13
6593 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v8
6594 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v9
6595 ; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6596 ; GFX11-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6597 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7]
6598 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
6599 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v12
6600 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0
6601 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo
6602 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6603 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1
6604 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v8, s8, s2
6605 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v10, s9, s2
6606 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1
6607 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0
6608 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0
6609 ; GFX11-NEXT: v_or_b32_e32 v0, v6, v0
6610 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6611 ; GFX11-NEXT: v_or_b32_e32 v1, v4, v1
6612 ; GFX11-NEXT: v_or_b32_e32 v2, v5, v2
6613 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
6614 ; GFX11-NEXT: v_or_b32_e32 v3, v7, v3
6615 ; GFX11-NEXT: ; return to shader part epilog
6616 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6617 %cast.result = bitcast i128 %result to <4 x float>
6618 ret <4 x float> %cast.result
6621 define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) {
6622 ; GFX6-LABEL: v_fshr_i128_svs:
6624 ; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6625 ; GFX6-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6626 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6627 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6628 ; GFX6-NEXT: s_lshr_b32 s0, s1, 31
6629 ; GFX6-NEXT: s_mov_b32 s1, 0
6630 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
6631 ; GFX6-NEXT: s_sub_i32 s7, s4, 64
6632 ; GFX6-NEXT: s_sub_i32 s5, 64, s4
6633 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64
6634 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0
6635 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0
6636 ; GFX6-NEXT: s_cselect_b32 s13, 1, 0
6637 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[8:9], s4
6638 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[8:9], s5
6639 ; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], s4
6640 ; GFX6-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
6641 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[8:9], s7
6642 ; GFX6-NEXT: s_cmp_lg_u32 s12, 0
6643 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
6644 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
6645 ; GFX6-NEXT: s_cmp_lg_u32 s13, 0
6646 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5]
6647 ; GFX6-NEXT: s_sub_i32 s0, s6, 64
6648 ; GFX6-NEXT: s_sub_i32 s1, 64, s6
6649 ; GFX6-NEXT: s_cmp_lt_u32 s6, 64
6650 ; GFX6-NEXT: s_cselect_b32 s7, 1, 0
6651 ; GFX6-NEXT: s_cmp_eq_u32 s6, 0
6652 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s6
6653 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s1
6654 ; GFX6-NEXT: s_cselect_b32 s8, 1, 0
6655 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s6
6656 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s0
6657 ; GFX6-NEXT: s_and_b32 s0, 1, s7
6658 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6
6659 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7
6660 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
6661 ; GFX6-NEXT: s_and_b32 s0, 1, s8
6662 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6663 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6664 ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
6665 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6666 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6667 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6668 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6669 ; GFX6-NEXT: v_or_b32_e32 v0, s2, v0
6670 ; GFX6-NEXT: v_or_b32_e32 v1, s3, v1
6671 ; GFX6-NEXT: v_or_b32_e32 v2, s4, v2
6672 ; GFX6-NEXT: v_or_b32_e32 v3, s5, v3
6673 ; GFX6-NEXT: ; return to shader part epilog
6675 ; GFX8-LABEL: v_fshr_i128_svs:
6677 ; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6678 ; GFX8-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6679 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6680 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6681 ; GFX8-NEXT: s_lshr_b32 s0, s1, 31
6682 ; GFX8-NEXT: s_mov_b32 s1, 0
6683 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
6684 ; GFX8-NEXT: s_sub_i32 s7, s4, 64
6685 ; GFX8-NEXT: s_sub_i32 s5, 64, s4
6686 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64
6687 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0
6688 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0
6689 ; GFX8-NEXT: s_cselect_b32 s13, 1, 0
6690 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[8:9], s4
6691 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[8:9], s5
6692 ; GFX8-NEXT: s_lshl_b64 s[4:5], s[0:1], s4
6693 ; GFX8-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
6694 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[8:9], s7
6695 ; GFX8-NEXT: s_cmp_lg_u32 s12, 0
6696 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
6697 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
6698 ; GFX8-NEXT: s_cmp_lg_u32 s13, 0
6699 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5]
6700 ; GFX8-NEXT: s_sub_i32 s0, s6, 64
6701 ; GFX8-NEXT: s_sub_i32 s1, 64, s6
6702 ; GFX8-NEXT: s_cmp_lt_u32 s6, 64
6703 ; GFX8-NEXT: s_cselect_b32 s7, 1, 0
6704 ; GFX8-NEXT: s_cmp_eq_u32 s6, 0
6705 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1]
6706 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3]
6707 ; GFX8-NEXT: s_cselect_b32 s8, 1, 0
6708 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], s6, v[2:3]
6709 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3]
6710 ; GFX8-NEXT: s_and_b32 s0, 1, s7
6711 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6
6712 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7
6713 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
6714 ; GFX8-NEXT: s_and_b32 s0, 1, s8
6715 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6716 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6717 ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
6718 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6719 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6720 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6721 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6722 ; GFX8-NEXT: v_or_b32_e32 v0, s2, v0
6723 ; GFX8-NEXT: v_or_b32_e32 v1, s3, v1
6724 ; GFX8-NEXT: v_or_b32_e32 v2, s4, v2
6725 ; GFX8-NEXT: v_or_b32_e32 v3, s5, v3
6726 ; GFX8-NEXT: ; return to shader part epilog
6728 ; GFX9-LABEL: v_fshr_i128_svs:
6730 ; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6731 ; GFX9-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6732 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6733 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6734 ; GFX9-NEXT: s_lshr_b32 s0, s1, 31
6735 ; GFX9-NEXT: s_mov_b32 s1, 0
6736 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
6737 ; GFX9-NEXT: s_sub_i32 s7, s4, 64
6738 ; GFX9-NEXT: s_sub_i32 s5, 64, s4
6739 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64
6740 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0
6741 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0
6742 ; GFX9-NEXT: s_cselect_b32 s13, 1, 0
6743 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[8:9], s4
6744 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[8:9], s5
6745 ; GFX9-NEXT: s_lshl_b64 s[4:5], s[0:1], s4
6746 ; GFX9-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
6747 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[8:9], s7
6748 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0
6749 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
6750 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
6751 ; GFX9-NEXT: s_cmp_lg_u32 s13, 0
6752 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5]
6753 ; GFX9-NEXT: s_sub_i32 s0, s6, 64
6754 ; GFX9-NEXT: s_sub_i32 s1, 64, s6
6755 ; GFX9-NEXT: s_cmp_lt_u32 s6, 64
6756 ; GFX9-NEXT: s_cselect_b32 s7, 1, 0
6757 ; GFX9-NEXT: s_cmp_eq_u32 s6, 0
6758 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1]
6759 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3]
6760 ; GFX9-NEXT: s_cselect_b32 s8, 1, 0
6761 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], s6, v[2:3]
6762 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3]
6763 ; GFX9-NEXT: s_and_b32 s0, 1, s7
6764 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6
6765 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7
6766 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
6767 ; GFX9-NEXT: s_and_b32 s0, 1, s8
6768 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6769 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6770 ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
6771 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6772 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6773 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6774 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6775 ; GFX9-NEXT: v_or_b32_e32 v0, s2, v0
6776 ; GFX9-NEXT: v_or_b32_e32 v1, s3, v1
6777 ; GFX9-NEXT: v_or_b32_e32 v2, s4, v2
6778 ; GFX9-NEXT: v_or_b32_e32 v3, s5, v3
6779 ; GFX9-NEXT: ; return to shader part epilog
6781 ; GFX10-LABEL: v_fshr_i128_svs:
6783 ; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6784 ; GFX10-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6785 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6786 ; GFX10-NEXT: s_lshr_b32 s8, s1, 31
6787 ; GFX10-NEXT: s_mov_b32 s9, 0
6788 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
6789 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
6790 ; GFX10-NEXT: s_sub_i32 s7, s4, 64
6791 ; GFX10-NEXT: s_sub_i32 s5, 64, s4
6792 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64
6793 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1]
6794 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0
6795 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0
6796 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0
6797 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], s5
6798 ; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s4
6799 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[0:1], s4
6800 ; GFX10-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
6801 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s7
6802 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0
6803 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], 0
6804 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
6805 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0
6806 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6807 ; GFX10-NEXT: s_sub_i32 s0, 64, s6
6808 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3]
6809 ; GFX10-NEXT: s_sub_i32 s0, s6, 64
6810 ; GFX10-NEXT: s_cmp_lt_u32 s6, 64
6811 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3]
6812 ; GFX10-NEXT: s_cselect_b32 s1, 1, 0
6813 ; GFX10-NEXT: s_cmp_eq_u32 s6, 0
6814 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6
6815 ; GFX10-NEXT: s_cselect_b32 s7, 1, 0
6816 ; GFX10-NEXT: s_and_b32 s0, 1, s1
6817 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7
6818 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
6819 ; GFX10-NEXT: s_and_b32 s0, 1, s7
6820 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], s6, v[2:3]
6821 ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
6822 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo
6823 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo
6824 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo
6825 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc_lo
6826 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0
6827 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0
6828 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2
6829 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3
6830 ; GFX10-NEXT: v_or_b32_e32 v0, s4, v0
6831 ; GFX10-NEXT: v_or_b32_e32 v1, s5, v1
6832 ; GFX10-NEXT: ; return to shader part epilog
6834 ; GFX11-LABEL: v_fshr_i128_svs:
6836 ; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6837 ; GFX11-NEXT: s_and_not1_b64 s[4:5], 0x7f, s[4:5]
6838 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6839 ; GFX11-NEXT: s_lshr_b32 s8, s1, 31
6840 ; GFX11-NEXT: s_mov_b32 s9, 0
6841 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
6842 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
6843 ; GFX11-NEXT: s_sub_i32 s7, s4, 64
6844 ; GFX11-NEXT: s_sub_i32 s5, 64, s4
6845 ; GFX11-NEXT: s_cmp_lt_u32 s4, 64
6846 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1]
6847 ; GFX11-NEXT: s_cselect_b32 s12, 1, 0
6848 ; GFX11-NEXT: s_cmp_eq_u32 s4, 0
6849 ; GFX11-NEXT: s_cselect_b32 s13, 1, 0
6850 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[0:1], s5
6851 ; GFX11-NEXT: s_lshl_b64 s[10:11], s[2:3], s4
6852 ; GFX11-NEXT: s_lshl_b64 s[4:5], s[0:1], s4
6853 ; GFX11-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
6854 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s7
6855 ; GFX11-NEXT: s_cmp_lg_u32 s12, 0
6856 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], 0
6857 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
6858 ; GFX11-NEXT: s_cmp_lg_u32 s13, 0
6859 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6860 ; GFX11-NEXT: s_sub_i32 s0, 64, s6
6861 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6862 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3]
6863 ; GFX11-NEXT: s_sub_i32 s0, s6, 64
6864 ; GFX11-NEXT: s_cmp_lt_u32 s6, 64
6865 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3]
6866 ; GFX11-NEXT: s_cselect_b32 s1, 1, 0
6867 ; GFX11-NEXT: s_cmp_eq_u32 s6, 0
6868 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v6
6869 ; GFX11-NEXT: s_cselect_b32 s7, 1, 0
6870 ; GFX11-NEXT: s_and_b32 s0, 1, s1
6871 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7
6872 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
6873 ; GFX11-NEXT: s_and_b32 s0, 1, s7
6874 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], s6, v[2:3]
6875 ; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
6876 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5
6877 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
6878 ; GFX11-NEXT: v_dual_cndmask_b32 v2, 0, v2 :: v_dual_cndmask_b32 v3, 0, v3
6879 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0
6880 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
6881 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0
6882 ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2
6883 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6884 ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3
6885 ; GFX11-NEXT: v_or_b32_e32 v0, s4, v0
6886 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
6887 ; GFX11-NEXT: v_or_b32_e32 v1, s5, v1
6888 ; GFX11-NEXT: ; return to shader part epilog
6889 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6890 %cast.result = bitcast i128 %result to <4 x float>
6891 ret <4 x float> %cast.result
6894 define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) {
6895 ; GFX6-LABEL: v_fshr_i128_vss:
6897 ; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6898 ; GFX6-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6899 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
6900 ; GFX6-NEXT: s_sub_i32 s5, s4, 64
6901 ; GFX6-NEXT: s_sub_i32 s7, 64, s4
6902 ; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], 1
6903 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6904 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64
6905 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v0
6906 ; GFX6-NEXT: s_cselect_b32 s8, 1, 0
6907 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0
6908 ; GFX6-NEXT: s_cselect_b32 s9, 1, 0
6909 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], s7
6910 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s4
6911 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], s4
6912 ; GFX6-NEXT: s_and_b32 s4, 1, s8
6913 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
6914 ; GFX6-NEXT: s_and_b32 s4, 1, s9
6915 ; GFX6-NEXT: s_sub_i32 s10, s6, 64
6916 ; GFX6-NEXT: s_sub_i32 s8, 64, s6
6917 ; GFX6-NEXT: v_or_b32_e32 v6, v0, v6
6918 ; GFX6-NEXT: v_or_b32_e32 v7, v1, v7
6919 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[4:5], s5
6920 ; GFX6-NEXT: s_cmp_lt_u32 s6, 64
6921 ; GFX6-NEXT: s_cselect_b32 s11, 1, 0
6922 ; GFX6-NEXT: s_cmp_eq_u32 s6, 0
6923 ; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
6924 ; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc
6925 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
6926 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
6927 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
6928 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0
6929 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[2:3], s6
6930 ; GFX6-NEXT: s_lshr_b64 s[6:7], s[0:1], s6
6931 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
6932 ; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
6933 ; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
6934 ; GFX6-NEXT: s_cmp_lg_u32 s11, 0
6935 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3]
6936 ; GFX6-NEXT: s_cmp_lg_u32 s12, 0
6937 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
6938 ; GFX6-NEXT: s_cmp_lg_u32 s11, 0
6939 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
6940 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
6941 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[4:5], 0
6942 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v4
6943 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v5
6944 ; GFX6-NEXT: v_or_b32_e32 v2, s2, v2
6945 ; GFX6-NEXT: v_or_b32_e32 v3, s3, v3
6946 ; GFX6-NEXT: ; return to shader part epilog
6948 ; GFX8-LABEL: v_fshr_i128_vss:
6950 ; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6951 ; GFX8-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6952 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6953 ; GFX8-NEXT: s_sub_i32 s5, s4, 64
6954 ; GFX8-NEXT: s_sub_i32 s7, 64, s4
6955 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1]
6956 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6957 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64
6958 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v0
6959 ; GFX8-NEXT: s_cselect_b32 s8, 1, 0
6960 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0
6961 ; GFX8-NEXT: s_cselect_b32 s9, 1, 0
6962 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], s7, v[4:5]
6963 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3]
6964 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5]
6965 ; GFX8-NEXT: s_and_b32 s4, 1, s8
6966 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
6967 ; GFX8-NEXT: s_and_b32 s4, 1, s9
6968 ; GFX8-NEXT: s_sub_i32 s10, s6, 64
6969 ; GFX8-NEXT: s_sub_i32 s8, 64, s6
6970 ; GFX8-NEXT: v_or_b32_e32 v6, v0, v6
6971 ; GFX8-NEXT: v_or_b32_e32 v7, v1, v7
6972 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5]
6973 ; GFX8-NEXT: s_cmp_lt_u32 s6, 64
6974 ; GFX8-NEXT: s_cselect_b32 s11, 1, 0
6975 ; GFX8-NEXT: s_cmp_eq_u32 s6, 0
6976 ; GFX8-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
6977 ; GFX8-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc
6978 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
6979 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
6980 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
6981 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0
6982 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[2:3], s6
6983 ; GFX8-NEXT: s_lshr_b64 s[6:7], s[0:1], s6
6984 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
6985 ; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
6986 ; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
6987 ; GFX8-NEXT: s_cmp_lg_u32 s11, 0
6988 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3]
6989 ; GFX8-NEXT: s_cmp_lg_u32 s12, 0
6990 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
6991 ; GFX8-NEXT: s_cmp_lg_u32 s11, 0
6992 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
6993 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
6994 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[4:5], 0
6995 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v4
6996 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v5
6997 ; GFX8-NEXT: v_or_b32_e32 v2, s2, v2
6998 ; GFX8-NEXT: v_or_b32_e32 v3, s3, v3
6999 ; GFX8-NEXT: ; return to shader part epilog
7001 ; GFX9-LABEL: v_fshr_i128_vss:
7003 ; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
7004 ; GFX9-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
7005 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
7006 ; GFX9-NEXT: s_sub_i32 s5, s4, 64
7007 ; GFX9-NEXT: s_sub_i32 s7, 64, s4
7008 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1]
7009 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1
7010 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64
7011 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v0
7012 ; GFX9-NEXT: s_cselect_b32 s8, 1, 0
7013 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0
7014 ; GFX9-NEXT: s_cselect_b32 s9, 1, 0
7015 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], s7, v[4:5]
7016 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3]
7017 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5]
7018 ; GFX9-NEXT: s_and_b32 s4, 1, s8
7019 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
7020 ; GFX9-NEXT: s_and_b32 s4, 1, s9
7021 ; GFX9-NEXT: s_sub_i32 s10, s6, 64
7022 ; GFX9-NEXT: s_sub_i32 s8, 64, s6
7023 ; GFX9-NEXT: v_or_b32_e32 v6, v0, v6
7024 ; GFX9-NEXT: v_or_b32_e32 v7, v1, v7
7025 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5]
7026 ; GFX9-NEXT: s_cmp_lt_u32 s6, 64
7027 ; GFX9-NEXT: s_cselect_b32 s11, 1, 0
7028 ; GFX9-NEXT: s_cmp_eq_u32 s6, 0
7029 ; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
7030 ; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc
7031 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
7032 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
7033 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
7034 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0
7035 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[2:3], s6
7036 ; GFX9-NEXT: s_lshr_b64 s[6:7], s[0:1], s6
7037 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
7038 ; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
7039 ; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
7040 ; GFX9-NEXT: s_cmp_lg_u32 s11, 0
7041 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3]
7042 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0
7043 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
7044 ; GFX9-NEXT: s_cmp_lg_u32 s11, 0
7045 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
7046 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
7047 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[4:5], 0
7048 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v4
7049 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v5
7050 ; GFX9-NEXT: v_or_b32_e32 v2, s2, v2
7051 ; GFX9-NEXT: v_or_b32_e32 v3, s3, v3
7052 ; GFX9-NEXT: ; return to shader part epilog
7054 ; GFX10-LABEL: v_fshr_i128_vss:
7056 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
7057 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v1
7058 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
7059 ; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
7060 ; GFX10-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
7061 ; GFX10-NEXT: s_sub_i32 s7, 64, s4
7062 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v4
7063 ; GFX10-NEXT: s_sub_i32 s5, s4, 64
7064 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64
7065 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1]
7066 ; GFX10-NEXT: s_cselect_b32 s8, 1, 0
7067 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3]
7068 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0
7069 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], s4, v[0:1]
7070 ; GFX10-NEXT: s_cselect_b32 s9, 1, 0
7071 ; GFX10-NEXT: s_and_b32 s4, 1, s8
7072 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
7073 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
7074 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6
7075 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7
7076 ; GFX10-NEXT: s_and_b32 s4, 1, s9
7077 ; GFX10-NEXT: s_sub_i32 s10, s6, 64
7078 ; GFX10-NEXT: s_sub_i32 s7, 64, s6
7079 ; GFX10-NEXT: s_cmp_lt_u32 s6, 64
7080 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo
7081 ; GFX10-NEXT: s_cselect_b32 s11, 1, 0
7082 ; GFX10-NEXT: s_cmp_eq_u32 s6, 0
7083 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo
7084 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
7085 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
7086 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
7087 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0
7088 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s6
7089 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s7
7090 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[2:3], s6
7091 ; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
7092 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
7093 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0
7094 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo
7095 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
7096 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0
7097 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo
7098 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
7099 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0
7100 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v6
7101 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
7102 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v7
7103 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2
7104 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3
7105 ; GFX10-NEXT: ; return to shader part epilog
7107 ; GFX11-LABEL: v_fshr_i128_vss:
7109 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
7110 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v1
7111 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
7112 ; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
7113 ; GFX11-NEXT: s_and_not1_b64 s[4:5], 0x7f, s[4:5]
7114 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
7115 ; GFX11-NEXT: s_sub_i32 s7, 64, s4
7116 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v4
7117 ; GFX11-NEXT: s_sub_i32 s5, s4, 64
7118 ; GFX11-NEXT: s_cmp_lt_u32 s4, 64
7119 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1]
7120 ; GFX11-NEXT: s_cselect_b32 s8, 1, 0
7121 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3]
7122 ; GFX11-NEXT: s_cmp_eq_u32 s4, 0
7123 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], s4, v[0:1]
7124 ; GFX11-NEXT: s_cselect_b32 s9, 1, 0
7125 ; GFX11-NEXT: s_and_b32 s4, 1, s8
7126 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
7127 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
7128 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v6
7129 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7
7130 ; GFX11-NEXT: s_and_b32 s4, 1, s9
7131 ; GFX11-NEXT: s_sub_i32 s10, s6, 64
7132 ; GFX11-NEXT: s_sub_i32 s7, 64, s6
7133 ; GFX11-NEXT: s_cmp_lt_u32 s6, 64
7134 ; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9
7135 ; GFX11-NEXT: s_cselect_b32 s11, 1, 0
7136 ; GFX11-NEXT: s_cmp_eq_u32 s6, 0
7137 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
7138 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
7139 ; GFX11-NEXT: s_cselect_b32 s12, 1, 0
7140 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[0:1], s6
7141 ; GFX11-NEXT: s_lshl_b64 s[8:9], s[2:3], s7
7142 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[2:3], s6
7143 ; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
7144 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
7145 ; GFX11-NEXT: s_cmp_lg_u32 s11, 0
7146 ; GFX11-NEXT: v_dual_cndmask_b32 v2, v0, v2 :: v_dual_cndmask_b32 v3, v1, v3
7147 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
7148 ; GFX11-NEXT: s_cmp_lg_u32 s12, 0
7149 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
7150 ; GFX11-NEXT: s_cmp_lg_u32 s11, 0
7151 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v6
7152 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
7153 ; GFX11-NEXT: v_or_b32_e32 v1, s1, v7
7154 ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2
7155 ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3
7156 ; GFX11-NEXT: ; return to shader part epilog
7157 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
7158 %cast.result = bitcast i128 %result to <4 x float>
7159 ret <4 x float> %cast.result
7162 define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
7163 ; GFX6-LABEL: s_fshr_i128_65:
7165 ; GFX6-NEXT: s_mov_b32 s4, 0
7166 ; GFX6-NEXT: s_lshl_b32 s5, s0, 31
7167 ; GFX6-NEXT: s_lshl_b32 s3, s2, 31
7168 ; GFX6-NEXT: s_mov_b32 s2, s4
7169 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
7170 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1]
7171 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], 1
7172 ; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
7173 ; GFX6-NEXT: ; return to shader part epilog
7175 ; GFX8-LABEL: s_fshr_i128_65:
7177 ; GFX8-NEXT: s_mov_b32 s4, 0
7178 ; GFX8-NEXT: s_lshl_b32 s5, s0, 31
7179 ; GFX8-NEXT: s_lshl_b32 s3, s2, 31
7180 ; GFX8-NEXT: s_mov_b32 s2, s4
7181 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
7182 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1]
7183 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], 1
7184 ; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
7185 ; GFX8-NEXT: ; return to shader part epilog
7187 ; GFX9-LABEL: s_fshr_i128_65:
7189 ; GFX9-NEXT: s_mov_b32 s4, 0
7190 ; GFX9-NEXT: s_lshl_b32 s5, s0, 31
7191 ; GFX9-NEXT: s_lshl_b32 s3, s2, 31
7192 ; GFX9-NEXT: s_mov_b32 s2, s4
7193 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
7194 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1]
7195 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], 1
7196 ; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
7197 ; GFX9-NEXT: ; return to shader part epilog
7199 ; GFX10-LABEL: s_fshr_i128_65:
7201 ; GFX10-NEXT: s_mov_b32 s4, 0
7202 ; GFX10-NEXT: s_lshl_b32 s5, s0, 31
7203 ; GFX10-NEXT: s_lshl_b32 s3, s2, 31
7204 ; GFX10-NEXT: s_mov_b32 s2, s4
7205 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
7206 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], 1
7207 ; GFX10-NEXT: s_or_b64 s[0:1], s[4:5], s[6:7]
7208 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7209 ; GFX10-NEXT: ; return to shader part epilog
7211 ; GFX11-LABEL: s_fshr_i128_65:
7213 ; GFX11-NEXT: s_mov_b32 s4, 0
7214 ; GFX11-NEXT: s_lshl_b32 s5, s0, 31
7215 ; GFX11-NEXT: s_lshl_b32 s3, s2, 31
7216 ; GFX11-NEXT: s_mov_b32 s2, s4
7217 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
7218 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[0:1], 1
7219 ; GFX11-NEXT: s_or_b64 s[0:1], s[4:5], s[6:7]
7220 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7221 ; GFX11-NEXT: ; return to shader part epilog
7222 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
7226 define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) {
7227 ; GFX6-LABEL: v_fshr_i128_65:
7229 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7230 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 31, v0
7231 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 31, v2
7232 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[0:1], 1
7233 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], 1
7234 ; GFX6-NEXT: v_or_b32_e32 v3, v5, v3
7235 ; GFX6-NEXT: v_or_b32_e32 v1, v4, v1
7236 ; GFX6-NEXT: s_setpc_b64 s[30:31]
7238 ; GFX8-LABEL: v_fshr_i128_65:
7240 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7241 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 31, v0
7242 ; GFX8-NEXT: v_lshlrev_b32_e32 v5, 31, v2
7243 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1]
7244 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[6:7]
7245 ; GFX8-NEXT: v_or_b32_e32 v3, v5, v3
7246 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
7247 ; GFX8-NEXT: s_setpc_b64 s[30:31]
7249 ; GFX9-LABEL: v_fshr_i128_65:
7251 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7252 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
7253 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1]
7254 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7]
7255 ; GFX9-NEXT: v_lshl_or_b32 v3, v8, 31, v3
7256 ; GFX9-NEXT: v_lshl_or_b32 v1, v0, 31, v5
7257 ; GFX9-NEXT: v_mov_b32_e32 v0, v4
7258 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7260 ; GFX10-LABEL: v_fshr_i128_65:
7262 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7263 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
7264 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7]
7265 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1]
7266 ; GFX10-NEXT: v_lshl_or_b32 v1, v0, 31, v5
7267 ; GFX10-NEXT: v_lshl_or_b32 v3, v8, 31, v3
7268 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
7269 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7271 ; GFX11-LABEL: v_fshr_i128_65:
7273 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7274 ; GFX11-NEXT: v_mov_b32_e32 v8, v2
7275 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7]
7276 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1]
7277 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7278 ; GFX11-NEXT: v_lshl_or_b32 v1, v0, 31, v5
7279 ; GFX11-NEXT: v_lshl_or_b32 v3, v8, 31, v3
7280 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
7281 ; GFX11-NEXT: v_mov_b32_e32 v0, v4
7282 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7283 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
7287 define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) {
7288 ; GFX6-LABEL: s_fshr_v2i128:
7290 ; GFX6-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7291 ; GFX6-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7292 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
7293 ; GFX6-NEXT: s_lshr_b32 s24, s1, 31
7294 ; GFX6-NEXT: s_mov_b32 s25, 0
7295 ; GFX6-NEXT: s_lshl_b64 s[22:23], s[0:1], 1
7296 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[24:25]
7297 ; GFX6-NEXT: s_sub_i32 s19, s16, 64
7298 ; GFX6-NEXT: s_sub_i32 s17, 64, s16
7299 ; GFX6-NEXT: s_cmp_lt_u32 s16, 64
7300 ; GFX6-NEXT: s_cselect_b32 s24, 1, 0
7301 ; GFX6-NEXT: s_cmp_eq_u32 s16, 0
7302 ; GFX6-NEXT: s_cselect_b32 s28, 1, 0
7303 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[22:23], s16
7304 ; GFX6-NEXT: s_lshr_b64 s[26:27], s[22:23], s17
7305 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[0:1], s16
7306 ; GFX6-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17]
7307 ; GFX6-NEXT: s_lshl_b64 s[22:23], s[22:23], s19
7308 ; GFX6-NEXT: s_cmp_lg_u32 s24, 0
7309 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
7310 ; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], s[22:23]
7311 ; GFX6-NEXT: s_cmp_lg_u32 s28, 0
7312 ; GFX6-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17]
7313 ; GFX6-NEXT: s_sub_i32 s24, s18, 64
7314 ; GFX6-NEXT: s_sub_i32 s22, 64, s18
7315 ; GFX6-NEXT: s_cmp_lt_u32 s18, 64
7316 ; GFX6-NEXT: s_cselect_b32 s26, 1, 0
7317 ; GFX6-NEXT: s_cmp_eq_u32 s18, 0
7318 ; GFX6-NEXT: s_cselect_b32 s27, 1, 0
7319 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[10:11], s18
7320 ; GFX6-NEXT: s_lshr_b64 s[18:19], s[8:9], s18
7321 ; GFX6-NEXT: s_lshl_b64 s[22:23], s[10:11], s22
7322 ; GFX6-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23]
7323 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[10:11], s24
7324 ; GFX6-NEXT: s_cmp_lg_u32 s26, 0
7325 ; GFX6-NEXT: s_cselect_b64 s[10:11], s[18:19], s[10:11]
7326 ; GFX6-NEXT: s_cmp_lg_u32 s27, 0
7327 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11]
7328 ; GFX6-NEXT: s_cmp_lg_u32 s26, 0
7329 ; GFX6-NEXT: s_cselect_b64 s[10:11], s[0:1], 0
7330 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
7331 ; GFX6-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11]
7332 ; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7333 ; GFX6-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7334 ; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
7335 ; GFX6-NEXT: s_lshr_b32 s24, s5, 31
7336 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], 1
7337 ; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[24:25]
7338 ; GFX6-NEXT: s_sub_i32 s9, s10, 64
7339 ; GFX6-NEXT: s_sub_i32 s11, 64, s10
7340 ; GFX6-NEXT: s_cmp_lt_u32 s10, 64
7341 ; GFX6-NEXT: s_cselect_b32 s20, 1, 0
7342 ; GFX6-NEXT: s_cmp_eq_u32 s10, 0
7343 ; GFX6-NEXT: s_cselect_b32 s21, 1, 0
7344 ; GFX6-NEXT: s_lshl_b64 s[6:7], s[16:17], s10
7345 ; GFX6-NEXT: s_lshr_b64 s[18:19], s[16:17], s11
7346 ; GFX6-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
7347 ; GFX6-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11]
7348 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[16:17], s9
7349 ; GFX6-NEXT: s_cmp_lg_u32 s20, 0
7350 ; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
7351 ; GFX6-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17]
7352 ; GFX6-NEXT: s_cmp_lg_u32 s21, 0
7353 ; GFX6-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11]
7354 ; GFX6-NEXT: s_sub_i32 s18, s8, 64
7355 ; GFX6-NEXT: s_sub_i32 s16, 64, s8
7356 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64
7357 ; GFX6-NEXT: s_cselect_b32 s19, 1, 0
7358 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0
7359 ; GFX6-NEXT: s_cselect_b32 s20, 1, 0
7360 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[14:15], s8
7361 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[12:13], s8
7362 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[14:15], s16
7363 ; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17]
7364 ; GFX6-NEXT: s_lshr_b64 s[14:15], s[14:15], s18
7365 ; GFX6-NEXT: s_cmp_lg_u32 s19, 0
7366 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15]
7367 ; GFX6-NEXT: s_cmp_lg_u32 s20, 0
7368 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9]
7369 ; GFX6-NEXT: s_cmp_lg_u32 s19, 0
7370 ; GFX6-NEXT: s_cselect_b64 s[12:13], s[4:5], 0
7371 ; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9]
7372 ; GFX6-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13]
7373 ; GFX6-NEXT: ; return to shader part epilog
7375 ; GFX8-LABEL: s_fshr_v2i128:
7377 ; GFX8-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7378 ; GFX8-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7379 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
7380 ; GFX8-NEXT: s_lshr_b32 s24, s1, 31
7381 ; GFX8-NEXT: s_mov_b32 s25, 0
7382 ; GFX8-NEXT: s_lshl_b64 s[22:23], s[0:1], 1
7383 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[24:25]
7384 ; GFX8-NEXT: s_sub_i32 s19, s16, 64
7385 ; GFX8-NEXT: s_sub_i32 s17, 64, s16
7386 ; GFX8-NEXT: s_cmp_lt_u32 s16, 64
7387 ; GFX8-NEXT: s_cselect_b32 s24, 1, 0
7388 ; GFX8-NEXT: s_cmp_eq_u32 s16, 0
7389 ; GFX8-NEXT: s_cselect_b32 s28, 1, 0
7390 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[22:23], s16
7391 ; GFX8-NEXT: s_lshr_b64 s[26:27], s[22:23], s17
7392 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[0:1], s16
7393 ; GFX8-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17]
7394 ; GFX8-NEXT: s_lshl_b64 s[22:23], s[22:23], s19
7395 ; GFX8-NEXT: s_cmp_lg_u32 s24, 0
7396 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
7397 ; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], s[22:23]
7398 ; GFX8-NEXT: s_cmp_lg_u32 s28, 0
7399 ; GFX8-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17]
7400 ; GFX8-NEXT: s_sub_i32 s24, s18, 64
7401 ; GFX8-NEXT: s_sub_i32 s22, 64, s18
7402 ; GFX8-NEXT: s_cmp_lt_u32 s18, 64
7403 ; GFX8-NEXT: s_cselect_b32 s26, 1, 0
7404 ; GFX8-NEXT: s_cmp_eq_u32 s18, 0
7405 ; GFX8-NEXT: s_cselect_b32 s27, 1, 0
7406 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[10:11], s18
7407 ; GFX8-NEXT: s_lshr_b64 s[18:19], s[8:9], s18
7408 ; GFX8-NEXT: s_lshl_b64 s[22:23], s[10:11], s22
7409 ; GFX8-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23]
7410 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[10:11], s24
7411 ; GFX8-NEXT: s_cmp_lg_u32 s26, 0
7412 ; GFX8-NEXT: s_cselect_b64 s[10:11], s[18:19], s[10:11]
7413 ; GFX8-NEXT: s_cmp_lg_u32 s27, 0
7414 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11]
7415 ; GFX8-NEXT: s_cmp_lg_u32 s26, 0
7416 ; GFX8-NEXT: s_cselect_b64 s[10:11], s[0:1], 0
7417 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
7418 ; GFX8-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11]
7419 ; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7420 ; GFX8-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7421 ; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
7422 ; GFX8-NEXT: s_lshr_b32 s24, s5, 31
7423 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], 1
7424 ; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[24:25]
7425 ; GFX8-NEXT: s_sub_i32 s9, s10, 64
7426 ; GFX8-NEXT: s_sub_i32 s11, 64, s10
7427 ; GFX8-NEXT: s_cmp_lt_u32 s10, 64
7428 ; GFX8-NEXT: s_cselect_b32 s20, 1, 0
7429 ; GFX8-NEXT: s_cmp_eq_u32 s10, 0
7430 ; GFX8-NEXT: s_cselect_b32 s21, 1, 0
7431 ; GFX8-NEXT: s_lshl_b64 s[6:7], s[16:17], s10
7432 ; GFX8-NEXT: s_lshr_b64 s[18:19], s[16:17], s11
7433 ; GFX8-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
7434 ; GFX8-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11]
7435 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[16:17], s9
7436 ; GFX8-NEXT: s_cmp_lg_u32 s20, 0
7437 ; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
7438 ; GFX8-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17]
7439 ; GFX8-NEXT: s_cmp_lg_u32 s21, 0
7440 ; GFX8-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11]
7441 ; GFX8-NEXT: s_sub_i32 s18, s8, 64
7442 ; GFX8-NEXT: s_sub_i32 s16, 64, s8
7443 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64
7444 ; GFX8-NEXT: s_cselect_b32 s19, 1, 0
7445 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0
7446 ; GFX8-NEXT: s_cselect_b32 s20, 1, 0
7447 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[14:15], s8
7448 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[12:13], s8
7449 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[14:15], s16
7450 ; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17]
7451 ; GFX8-NEXT: s_lshr_b64 s[14:15], s[14:15], s18
7452 ; GFX8-NEXT: s_cmp_lg_u32 s19, 0
7453 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15]
7454 ; GFX8-NEXT: s_cmp_lg_u32 s20, 0
7455 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9]
7456 ; GFX8-NEXT: s_cmp_lg_u32 s19, 0
7457 ; GFX8-NEXT: s_cselect_b64 s[12:13], s[4:5], 0
7458 ; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9]
7459 ; GFX8-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13]
7460 ; GFX8-NEXT: ; return to shader part epilog
7462 ; GFX9-LABEL: s_fshr_v2i128:
7464 ; GFX9-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7465 ; GFX9-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7466 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
7467 ; GFX9-NEXT: s_lshr_b32 s24, s1, 31
7468 ; GFX9-NEXT: s_mov_b32 s25, 0
7469 ; GFX9-NEXT: s_lshl_b64 s[22:23], s[0:1], 1
7470 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[24:25]
7471 ; GFX9-NEXT: s_sub_i32 s19, s16, 64
7472 ; GFX9-NEXT: s_sub_i32 s17, 64, s16
7473 ; GFX9-NEXT: s_cmp_lt_u32 s16, 64
7474 ; GFX9-NEXT: s_cselect_b32 s24, 1, 0
7475 ; GFX9-NEXT: s_cmp_eq_u32 s16, 0
7476 ; GFX9-NEXT: s_cselect_b32 s28, 1, 0
7477 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[22:23], s16
7478 ; GFX9-NEXT: s_lshr_b64 s[26:27], s[22:23], s17
7479 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[0:1], s16
7480 ; GFX9-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17]
7481 ; GFX9-NEXT: s_lshl_b64 s[22:23], s[22:23], s19
7482 ; GFX9-NEXT: s_cmp_lg_u32 s24, 0
7483 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
7484 ; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], s[22:23]
7485 ; GFX9-NEXT: s_cmp_lg_u32 s28, 0
7486 ; GFX9-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17]
7487 ; GFX9-NEXT: s_sub_i32 s24, s18, 64
7488 ; GFX9-NEXT: s_sub_i32 s22, 64, s18
7489 ; GFX9-NEXT: s_cmp_lt_u32 s18, 64
7490 ; GFX9-NEXT: s_cselect_b32 s26, 1, 0
7491 ; GFX9-NEXT: s_cmp_eq_u32 s18, 0
7492 ; GFX9-NEXT: s_cselect_b32 s27, 1, 0
7493 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[10:11], s18
7494 ; GFX9-NEXT: s_lshr_b64 s[18:19], s[8:9], s18
7495 ; GFX9-NEXT: s_lshl_b64 s[22:23], s[10:11], s22
7496 ; GFX9-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23]
7497 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[10:11], s24
7498 ; GFX9-NEXT: s_cmp_lg_u32 s26, 0
7499 ; GFX9-NEXT: s_cselect_b64 s[10:11], s[18:19], s[10:11]
7500 ; GFX9-NEXT: s_cmp_lg_u32 s27, 0
7501 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11]
7502 ; GFX9-NEXT: s_cmp_lg_u32 s26, 0
7503 ; GFX9-NEXT: s_cselect_b64 s[10:11], s[0:1], 0
7504 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
7505 ; GFX9-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11]
7506 ; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7507 ; GFX9-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7508 ; GFX9-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
7509 ; GFX9-NEXT: s_lshr_b32 s24, s5, 31
7510 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], 1
7511 ; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[24:25]
7512 ; GFX9-NEXT: s_sub_i32 s9, s10, 64
7513 ; GFX9-NEXT: s_sub_i32 s11, 64, s10
7514 ; GFX9-NEXT: s_cmp_lt_u32 s10, 64
7515 ; GFX9-NEXT: s_cselect_b32 s20, 1, 0
7516 ; GFX9-NEXT: s_cmp_eq_u32 s10, 0
7517 ; GFX9-NEXT: s_cselect_b32 s21, 1, 0
7518 ; GFX9-NEXT: s_lshl_b64 s[6:7], s[16:17], s10
7519 ; GFX9-NEXT: s_lshr_b64 s[18:19], s[16:17], s11
7520 ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
7521 ; GFX9-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11]
7522 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[16:17], s9
7523 ; GFX9-NEXT: s_cmp_lg_u32 s20, 0
7524 ; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
7525 ; GFX9-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17]
7526 ; GFX9-NEXT: s_cmp_lg_u32 s21, 0
7527 ; GFX9-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11]
7528 ; GFX9-NEXT: s_sub_i32 s18, s8, 64
7529 ; GFX9-NEXT: s_sub_i32 s16, 64, s8
7530 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64
7531 ; GFX9-NEXT: s_cselect_b32 s19, 1, 0
7532 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0
7533 ; GFX9-NEXT: s_cselect_b32 s20, 1, 0
7534 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[14:15], s8
7535 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[12:13], s8
7536 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[14:15], s16
7537 ; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17]
7538 ; GFX9-NEXT: s_lshr_b64 s[14:15], s[14:15], s18
7539 ; GFX9-NEXT: s_cmp_lg_u32 s19, 0
7540 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15]
7541 ; GFX9-NEXT: s_cmp_lg_u32 s20, 0
7542 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9]
7543 ; GFX9-NEXT: s_cmp_lg_u32 s19, 0
7544 ; GFX9-NEXT: s_cselect_b64 s[12:13], s[4:5], 0
7545 ; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9]
7546 ; GFX9-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13]
7547 ; GFX9-NEXT: ; return to shader part epilog
7549 ; GFX10-LABEL: s_fshr_v2i128:
7551 ; GFX10-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7552 ; GFX10-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7553 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
7554 ; GFX10-NEXT: s_lshr_b32 s22, s1, 31
7555 ; GFX10-NEXT: s_mov_b32 s23, 0
7556 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
7557 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[22:23]
7558 ; GFX10-NEXT: s_sub_i32 s19, s16, 64
7559 ; GFX10-NEXT: s_sub_i32 s17, 64, s16
7560 ; GFX10-NEXT: s_cmp_lt_u32 s16, 64
7561 ; GFX10-NEXT: s_cselect_b32 s22, 1, 0
7562 ; GFX10-NEXT: s_cmp_eq_u32 s16, 0
7563 ; GFX10-NEXT: s_cselect_b32 s28, 1, 0
7564 ; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s17
7565 ; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], s16
7566 ; GFX10-NEXT: s_lshl_b64 s[16:17], s[0:1], s16
7567 ; GFX10-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27]
7568 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s19
7569 ; GFX10-NEXT: s_cmp_lg_u32 s22, 0
7570 ; GFX10-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
7571 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1]
7572 ; GFX10-NEXT: s_cmp_lg_u32 s28, 0
7573 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
7574 ; GFX10-NEXT: s_sub_i32 s22, s18, 64
7575 ; GFX10-NEXT: s_sub_i32 s19, 64, s18
7576 ; GFX10-NEXT: s_cmp_lt_u32 s18, 64
7577 ; GFX10-NEXT: s_cselect_b32 s26, 1, 0
7578 ; GFX10-NEXT: s_cmp_eq_u32 s18, 0
7579 ; GFX10-NEXT: s_cselect_b32 s27, 1, 0
7580 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], s18
7581 ; GFX10-NEXT: s_lshl_b64 s[24:25], s[10:11], s19
7582 ; GFX10-NEXT: s_lshr_b64 s[18:19], s[10:11], s18
7583 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25]
7584 ; GFX10-NEXT: s_lshr_b64 s[10:11], s[10:11], s22
7585 ; GFX10-NEXT: s_cmp_lg_u32 s26, 0
7586 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11]
7587 ; GFX10-NEXT: s_cmp_lg_u32 s27, 0
7588 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
7589 ; GFX10-NEXT: s_cmp_lg_u32 s26, 0
7590 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[18:19], 0
7591 ; GFX10-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7592 ; GFX10-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
7593 ; GFX10-NEXT: s_lshr_b32 s22, s5, 31
7594 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7595 ; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7596 ; GFX10-NEXT: s_or_b64 s[0:1], s[16:17], s[0:1]
7597 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], 1
7598 ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[22:23]
7599 ; GFX10-NEXT: s_sub_i32 s9, s10, 64
7600 ; GFX10-NEXT: s_sub_i32 s11, 64, s10
7601 ; GFX10-NEXT: s_cmp_lt_u32 s10, 64
7602 ; GFX10-NEXT: s_cselect_b32 s20, 1, 0
7603 ; GFX10-NEXT: s_cmp_eq_u32 s10, 0
7604 ; GFX10-NEXT: s_cselect_b32 s21, 1, 0
7605 ; GFX10-NEXT: s_lshr_b64 s[16:17], s[4:5], s11
7606 ; GFX10-NEXT: s_lshl_b64 s[18:19], s[6:7], s10
7607 ; GFX10-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
7608 ; GFX10-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19]
7609 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s9
7610 ; GFX10-NEXT: s_cmp_lg_u32 s20, 0
7611 ; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0
7612 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5]
7613 ; GFX10-NEXT: s_cmp_lg_u32 s21, 0
7614 ; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
7615 ; GFX10-NEXT: s_sub_i32 s18, s8, 64
7616 ; GFX10-NEXT: s_sub_i32 s9, 64, s8
7617 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64
7618 ; GFX10-NEXT: s_cselect_b32 s19, 1, 0
7619 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0
7620 ; GFX10-NEXT: s_cselect_b32 s20, 1, 0
7621 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], s8
7622 ; GFX10-NEXT: s_lshl_b64 s[16:17], s[14:15], s9
7623 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[14:15], s8
7624 ; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[16:17]
7625 ; GFX10-NEXT: s_lshr_b64 s[14:15], s[14:15], s18
7626 ; GFX10-NEXT: s_cmp_lg_u32 s19, 0
7627 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[14:15]
7628 ; GFX10-NEXT: s_cmp_lg_u32 s20, 0
7629 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5]
7630 ; GFX10-NEXT: s_cmp_lg_u32 s19, 0
7631 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
7632 ; GFX10-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
7633 ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
7634 ; GFX10-NEXT: ; return to shader part epilog
7636 ; GFX11-LABEL: s_fshr_v2i128:
7638 ; GFX11-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7639 ; GFX11-NEXT: s_and_not1_b64 s[16:17], 0x7f, s[16:17]
7640 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
7641 ; GFX11-NEXT: s_lshr_b32 s22, s1, 31
7642 ; GFX11-NEXT: s_mov_b32 s23, 0
7643 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
7644 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[22:23]
7645 ; GFX11-NEXT: s_sub_i32 s19, s16, 64
7646 ; GFX11-NEXT: s_sub_i32 s17, 64, s16
7647 ; GFX11-NEXT: s_cmp_lt_u32 s16, 64
7648 ; GFX11-NEXT: s_cselect_b32 s22, 1, 0
7649 ; GFX11-NEXT: s_cmp_eq_u32 s16, 0
7650 ; GFX11-NEXT: s_cselect_b32 s28, 1, 0
7651 ; GFX11-NEXT: s_lshr_b64 s[24:25], s[0:1], s17
7652 ; GFX11-NEXT: s_lshl_b64 s[26:27], s[2:3], s16
7653 ; GFX11-NEXT: s_lshl_b64 s[16:17], s[0:1], s16
7654 ; GFX11-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27]
7655 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s19
7656 ; GFX11-NEXT: s_cmp_lg_u32 s22, 0
7657 ; GFX11-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
7658 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1]
7659 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0
7660 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
7661 ; GFX11-NEXT: s_sub_i32 s22, s18, 64
7662 ; GFX11-NEXT: s_sub_i32 s19, 64, s18
7663 ; GFX11-NEXT: s_cmp_lt_u32 s18, 64
7664 ; GFX11-NEXT: s_cselect_b32 s26, 1, 0
7665 ; GFX11-NEXT: s_cmp_eq_u32 s18, 0
7666 ; GFX11-NEXT: s_cselect_b32 s27, 1, 0
7667 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[8:9], s18
7668 ; GFX11-NEXT: s_lshl_b64 s[24:25], s[10:11], s19
7669 ; GFX11-NEXT: s_lshr_b64 s[18:19], s[10:11], s18
7670 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25]
7671 ; GFX11-NEXT: s_lshr_b64 s[10:11], s[10:11], s22
7672 ; GFX11-NEXT: s_cmp_lg_u32 s26, 0
7673 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11]
7674 ; GFX11-NEXT: s_cmp_lg_u32 s27, 0
7675 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
7676 ; GFX11-NEXT: s_cmp_lg_u32 s26, 0
7677 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[18:19], 0
7678 ; GFX11-NEXT: s_and_not1_b64 s[10:11], 0x7f, s[20:21]
7679 ; GFX11-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
7680 ; GFX11-NEXT: s_lshr_b32 s22, s5, 31
7681 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7682 ; GFX11-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7683 ; GFX11-NEXT: s_or_b64 s[0:1], s[16:17], s[0:1]
7684 ; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], 1
7685 ; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[22:23]
7686 ; GFX11-NEXT: s_sub_i32 s9, s10, 64
7687 ; GFX11-NEXT: s_sub_i32 s11, 64, s10
7688 ; GFX11-NEXT: s_cmp_lt_u32 s10, 64
7689 ; GFX11-NEXT: s_cselect_b32 s20, 1, 0
7690 ; GFX11-NEXT: s_cmp_eq_u32 s10, 0
7691 ; GFX11-NEXT: s_cselect_b32 s21, 1, 0
7692 ; GFX11-NEXT: s_lshr_b64 s[16:17], s[4:5], s11
7693 ; GFX11-NEXT: s_lshl_b64 s[18:19], s[6:7], s10
7694 ; GFX11-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
7695 ; GFX11-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19]
7696 ; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], s9
7697 ; GFX11-NEXT: s_cmp_lg_u32 s20, 0
7698 ; GFX11-NEXT: s_cselect_b64 s[10:11], s[10:11], 0
7699 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5]
7700 ; GFX11-NEXT: s_cmp_lg_u32 s21, 0
7701 ; GFX11-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
7702 ; GFX11-NEXT: s_sub_i32 s18, s8, 64
7703 ; GFX11-NEXT: s_sub_i32 s9, 64, s8
7704 ; GFX11-NEXT: s_cmp_lt_u32 s8, 64
7705 ; GFX11-NEXT: s_cselect_b32 s19, 1, 0
7706 ; GFX11-NEXT: s_cmp_eq_u32 s8, 0
7707 ; GFX11-NEXT: s_cselect_b32 s20, 1, 0
7708 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[12:13], s8
7709 ; GFX11-NEXT: s_lshl_b64 s[16:17], s[14:15], s9
7710 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[14:15], s8
7711 ; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[16:17]
7712 ; GFX11-NEXT: s_lshr_b64 s[14:15], s[14:15], s18
7713 ; GFX11-NEXT: s_cmp_lg_u32 s19, 0
7714 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], s[14:15]
7715 ; GFX11-NEXT: s_cmp_lg_u32 s20, 0
7716 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5]
7717 ; GFX11-NEXT: s_cmp_lg_u32 s19, 0
7718 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
7719 ; GFX11-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
7720 ; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
7721 ; GFX11-NEXT: ; return to shader part epilog
7722 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
7723 ret <2 x i128> %result
7726 define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) {
7727 ; GFX6-LABEL: v_fshr_v2i128:
7729 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7730 ; GFX6-NEXT: v_and_b32_e32 v23, 0x7f, v16
7731 ; GFX6-NEXT: v_not_b32_e32 v16, v16
7732 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
7733 ; GFX6-NEXT: v_and_b32_e32 v24, 0x7f, v16
7734 ; GFX6-NEXT: v_lshl_b64 v[16:17], v[0:1], 1
7735 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1
7736 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v0
7737 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v24
7738 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[16:17], v0
7739 ; GFX6-NEXT: v_lshl_b64 v[18:19], v[2:3], v24
7740 ; GFX6-NEXT: v_subrev_i32_e32 v25, vcc, 64, v24
7741 ; GFX6-NEXT: v_lshl_b64 v[21:22], v[16:17], v24
7742 ; GFX6-NEXT: v_or_b32_e32 v18, v0, v18
7743 ; GFX6-NEXT: v_or_b32_e32 v19, v1, v19
7744 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[16:17], v25
7745 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
7746 ; GFX6-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc
7747 ; GFX6-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc
7748 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v18, vcc
7749 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v19, vcc
7750 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
7751 ; GFX6-NEXT: v_cndmask_b32_e32 v18, v0, v2, vcc
7752 ; GFX6-NEXT: v_cndmask_b32_e32 v19, v1, v3, vcc
7753 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v23
7754 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[8:9], v23
7755 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[10:11], v2
7756 ; GFX6-NEXT: v_subrev_i32_e32 v24, vcc, 64, v23
7757 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
7758 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
7759 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v24
7760 ; GFX6-NEXT: v_lshr_b64 v[16:17], v[10:11], v23
7761 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
7762 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
7763 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23
7764 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
7765 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
7766 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
7767 ; GFX6-NEXT: v_not_b32_e32 v8, v20
7768 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
7769 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
7770 ; GFX6-NEXT: v_or_b32_e32 v3, v19, v3
7771 ; GFX6-NEXT: v_and_b32_e32 v19, 0x7f, v8
7772 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], 1
7773 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7774 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
7775 ; GFX6-NEXT: v_or_b32_e32 v6, v6, v4
7776 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v19
7777 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], v4
7778 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v19
7779 ; GFX6-NEXT: v_or_b32_e32 v2, v18, v2
7780 ; GFX6-NEXT: v_and_b32_e32 v18, 0x7f, v20
7781 ; GFX6-NEXT: v_subrev_i32_e32 v20, vcc, 64, v19
7782 ; GFX6-NEXT: v_lshl_b64 v[16:17], v[8:9], v19
7783 ; GFX6-NEXT: v_or_b32_e32 v10, v4, v10
7784 ; GFX6-NEXT: v_or_b32_e32 v11, v5, v11
7785 ; GFX6-NEXT: v_lshl_b64 v[4:5], v[8:9], v20
7786 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
7787 ; GFX6-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
7788 ; GFX6-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
7789 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
7790 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
7791 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v19
7792 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v4, v6, vcc
7793 ; GFX6-NEXT: v_cndmask_b32_e32 v11, v5, v7, vcc
7794 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v18
7795 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], v18
7796 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[14:15], v6
7797 ; GFX6-NEXT: v_subrev_i32_e32 v19, vcc, 64, v18
7798 ; GFX6-NEXT: v_or_b32_e32 v6, v4, v6
7799 ; GFX6-NEXT: v_or_b32_e32 v7, v5, v7
7800 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[14:15], v19
7801 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[14:15], v18
7802 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
7803 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
7804 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
7805 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v18
7806 ; GFX6-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[4:5]
7807 ; GFX6-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[4:5]
7808 ; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
7809 ; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
7810 ; GFX6-NEXT: v_or_b32_e32 v0, v21, v0
7811 ; GFX6-NEXT: v_or_b32_e32 v1, v22, v1
7812 ; GFX6-NEXT: v_or_b32_e32 v4, v16, v4
7813 ; GFX6-NEXT: v_or_b32_e32 v5, v17, v5
7814 ; GFX6-NEXT: v_or_b32_e32 v6, v10, v6
7815 ; GFX6-NEXT: v_or_b32_e32 v7, v11, v7
7816 ; GFX6-NEXT: s_setpc_b64 s[30:31]
7818 ; GFX8-LABEL: v_fshr_v2i128:
7820 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7821 ; GFX8-NEXT: v_and_b32_e32 v23, 0x7f, v16
7822 ; GFX8-NEXT: v_not_b32_e32 v16, v16
7823 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
7824 ; GFX8-NEXT: v_and_b32_e32 v24, 0x7f, v16
7825 ; GFX8-NEXT: v_lshlrev_b64 v[16:17], 1, v[0:1]
7826 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1
7827 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v0
7828 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v24
7829 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[16:17]
7830 ; GFX8-NEXT: v_lshlrev_b64 v[18:19], v24, v[2:3]
7831 ; GFX8-NEXT: v_subrev_u32_e32 v25, vcc, 64, v24
7832 ; GFX8-NEXT: v_lshlrev_b64 v[21:22], v24, v[16:17]
7833 ; GFX8-NEXT: v_or_b32_e32 v18, v0, v18
7834 ; GFX8-NEXT: v_or_b32_e32 v19, v1, v19
7835 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v25, v[16:17]
7836 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
7837 ; GFX8-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc
7838 ; GFX8-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc
7839 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v18, vcc
7840 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v19, vcc
7841 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
7842 ; GFX8-NEXT: v_cndmask_b32_e32 v18, v0, v2, vcc
7843 ; GFX8-NEXT: v_cndmask_b32_e32 v19, v1, v3, vcc
7844 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v23
7845 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v23, v[8:9]
7846 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11]
7847 ; GFX8-NEXT: v_subrev_u32_e32 v24, vcc, 64, v23
7848 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
7849 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
7850 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
7851 ; GFX8-NEXT: v_lshrrev_b64 v[16:17], v23, v[10:11]
7852 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
7853 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
7854 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23
7855 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
7856 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
7857 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
7858 ; GFX8-NEXT: v_not_b32_e32 v8, v20
7859 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
7860 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
7861 ; GFX8-NEXT: v_or_b32_e32 v3, v19, v3
7862 ; GFX8-NEXT: v_and_b32_e32 v19, 0x7f, v8
7863 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5]
7864 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7865 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
7866 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v4
7867 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v19
7868 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9]
7869 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v19, v[6:7]
7870 ; GFX8-NEXT: v_or_b32_e32 v2, v18, v2
7871 ; GFX8-NEXT: v_and_b32_e32 v18, 0x7f, v20
7872 ; GFX8-NEXT: v_subrev_u32_e32 v20, vcc, 64, v19
7873 ; GFX8-NEXT: v_lshlrev_b64 v[16:17], v19, v[8:9]
7874 ; GFX8-NEXT: v_or_b32_e32 v10, v4, v10
7875 ; GFX8-NEXT: v_or_b32_e32 v11, v5, v11
7876 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], v20, v[8:9]
7877 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
7878 ; GFX8-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
7879 ; GFX8-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
7880 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
7881 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
7882 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v19
7883 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v4, v6, vcc
7884 ; GFX8-NEXT: v_cndmask_b32_e32 v11, v5, v7, vcc
7885 ; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v18
7886 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v18, v[12:13]
7887 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15]
7888 ; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, 64, v18
7889 ; GFX8-NEXT: v_or_b32_e32 v6, v4, v6
7890 ; GFX8-NEXT: v_or_b32_e32 v7, v5, v7
7891 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v19, v[14:15]
7892 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v18, v[14:15]
7893 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
7894 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
7895 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
7896 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v18
7897 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[4:5]
7898 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[4:5]
7899 ; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
7900 ; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
7901 ; GFX8-NEXT: v_or_b32_e32 v0, v21, v0
7902 ; GFX8-NEXT: v_or_b32_e32 v1, v22, v1
7903 ; GFX8-NEXT: v_or_b32_e32 v4, v16, v4
7904 ; GFX8-NEXT: v_or_b32_e32 v5, v17, v5
7905 ; GFX8-NEXT: v_or_b32_e32 v6, v10, v6
7906 ; GFX8-NEXT: v_or_b32_e32 v7, v11, v7
7907 ; GFX8-NEXT: s_setpc_b64 s[30:31]
7909 ; GFX9-LABEL: v_fshr_v2i128:
7911 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7912 ; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v16
7913 ; GFX9-NEXT: v_not_b32_e32 v16, v16
7914 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
7915 ; GFX9-NEXT: v_and_b32_e32 v24, 0x7f, v16
7916 ; GFX9-NEXT: v_lshlrev_b64 v[16:17], 1, v[0:1]
7917 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1
7918 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v0
7919 ; GFX9-NEXT: v_sub_u32_e32 v0, 64, v24
7920 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[16:17]
7921 ; GFX9-NEXT: v_lshlrev_b64 v[18:19], v24, v[2:3]
7922 ; GFX9-NEXT: v_subrev_u32_e32 v25, 64, v24
7923 ; GFX9-NEXT: v_lshlrev_b64 v[21:22], v24, v[16:17]
7924 ; GFX9-NEXT: v_or_b32_e32 v18, v0, v18
7925 ; GFX9-NEXT: v_or_b32_e32 v19, v1, v19
7926 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v25, v[16:17]
7927 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
7928 ; GFX9-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc
7929 ; GFX9-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc
7930 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v18, vcc
7931 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v19, vcc
7932 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
7933 ; GFX9-NEXT: v_cndmask_b32_e32 v18, v0, v2, vcc
7934 ; GFX9-NEXT: v_sub_u32_e32 v2, 64, v23
7935 ; GFX9-NEXT: v_cndmask_b32_e32 v19, v1, v3, vcc
7936 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v23, v[8:9]
7937 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11]
7938 ; GFX9-NEXT: v_subrev_u32_e32 v24, 64, v23
7939 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
7940 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
7941 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
7942 ; GFX9-NEXT: v_lshrrev_b64 v[16:17], v23, v[10:11]
7943 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
7944 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
7945 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23
7946 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
7947 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
7948 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
7949 ; GFX9-NEXT: v_not_b32_e32 v8, v20
7950 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
7951 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
7952 ; GFX9-NEXT: v_or_b32_e32 v3, v19, v3
7953 ; GFX9-NEXT: v_and_b32_e32 v19, 0x7f, v8
7954 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5]
7955 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7956 ; GFX9-NEXT: v_or_b32_e32 v6, v6, v4
7957 ; GFX9-NEXT: v_sub_u32_e32 v4, 64, v19
7958 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
7959 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9]
7960 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v19, v[6:7]
7961 ; GFX9-NEXT: v_or_b32_e32 v2, v18, v2
7962 ; GFX9-NEXT: v_and_b32_e32 v18, 0x7f, v20
7963 ; GFX9-NEXT: v_subrev_u32_e32 v20, 64, v19
7964 ; GFX9-NEXT: v_lshlrev_b64 v[16:17], v19, v[8:9]
7965 ; GFX9-NEXT: v_or_b32_e32 v10, v4, v10
7966 ; GFX9-NEXT: v_or_b32_e32 v11, v5, v11
7967 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v20, v[8:9]
7968 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
7969 ; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
7970 ; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
7971 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
7972 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
7973 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v19
7974 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v4, v6, vcc
7975 ; GFX9-NEXT: v_sub_u32_e32 v6, 64, v18
7976 ; GFX9-NEXT: v_cndmask_b32_e32 v11, v5, v7, vcc
7977 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v18, v[12:13]
7978 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15]
7979 ; GFX9-NEXT: v_subrev_u32_e32 v19, 64, v18
7980 ; GFX9-NEXT: v_or_b32_e32 v6, v4, v6
7981 ; GFX9-NEXT: v_or_b32_e32 v7, v5, v7
7982 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v19, v[14:15]
7983 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v18, v[14:15]
7984 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
7985 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
7986 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
7987 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v18
7988 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[4:5]
7989 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[4:5]
7990 ; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
7991 ; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
7992 ; GFX9-NEXT: v_or_b32_e32 v0, v21, v0
7993 ; GFX9-NEXT: v_or_b32_e32 v1, v22, v1
7994 ; GFX9-NEXT: v_or_b32_e32 v4, v16, v4
7995 ; GFX9-NEXT: v_or_b32_e32 v5, v17, v5
7996 ; GFX9-NEXT: v_or_b32_e32 v6, v10, v6
7997 ; GFX9-NEXT: v_or_b32_e32 v7, v11, v7
7998 ; GFX9-NEXT: s_setpc_b64 s[30:31]
8000 ; GFX10-LABEL: v_fshr_v2i128:
8002 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8003 ; GFX10-NEXT: v_not_b32_e32 v17, v16
8004 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
8005 ; GFX10-NEXT: v_and_b32_e32 v26, 0x7f, v16
8006 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
8007 ; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v17
8008 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 31, v1
8009 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
8010 ; GFX10-NEXT: v_subrev_nc_u32_e32 v27, 64, v26
8011 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v26
8012 ; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v25
8013 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v17
8014 ; GFX10-NEXT: v_subrev_nc_u32_e32 v19, 64, v25
8015 ; GFX10-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1]
8016 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
8017 ; GFX10-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1]
8018 ; GFX10-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3]
8019 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1]
8020 ; GFX10-NEXT: v_cndmask_b32_e32 v23, 0, v23, vcc_lo
8021 ; GFX10-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo
8022 ; GFX10-NEXT: v_or_b32_e32 v22, v18, v22
8023 ; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v26
8024 ; GFX10-NEXT: v_or_b32_e32 v21, v17, v21
8025 ; GFX10-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9]
8026 ; GFX10-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo
8027 ; GFX10-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11]
8028 ; GFX10-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo
8029 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11]
8030 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25
8031 ; GFX10-NEXT: v_or_b32_e32 v16, v16, v18
8032 ; GFX10-NEXT: v_or_b32_e32 v17, v17, v19
8033 ; GFX10-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo
8034 ; GFX10-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo
8035 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26
8036 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s4
8037 ; GFX10-NEXT: v_not_b32_e32 v16, v20
8038 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s4
8039 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11]
8040 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
8041 ; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v16
8042 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 31, v5
8043 ; GFX10-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5]
8044 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
8045 ; GFX10-NEXT: v_or_b32_e32 v0, v23, v0
8046 ; GFX10-NEXT: v_sub_nc_u32_e32 v9, 64, v25
8047 ; GFX10-NEXT: v_or_b32_e32 v6, v6, v8
8048 ; GFX10-NEXT: v_and_b32_e32 v23, 0x7f, v20
8049 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s4
8050 ; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, v3, s4
8051 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5]
8052 ; GFX10-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7]
8053 ; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v23
8054 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v25
8055 ; GFX10-NEXT: v_or_b32_e32 v2, v18, v2
8056 ; GFX10-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5]
8057 ; GFX10-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13]
8058 ; GFX10-NEXT: v_or_b32_e32 v10, v8, v10
8059 ; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v23
8060 ; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
8061 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
8062 ; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
8063 ; GFX10-NEXT: v_or_b32_e32 v5, v9, v11
8064 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[14:15]
8065 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v23
8066 ; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v16, vcc_lo
8067 ; GFX10-NEXT: v_or_b32_e32 v16, v18, v20
8068 ; GFX10-NEXT: v_or_b32_e32 v18, v19, v21
8069 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v3, v10, vcc_lo
8070 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo
8071 ; GFX10-NEXT: v_lshrrev_b64 v[3:4], v23, v[14:15]
8072 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v16, s4
8073 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v23
8074 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v25
8075 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v18, s4
8076 ; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
8077 ; GFX10-NEXT: v_or_b32_e32 v1, v24, v1
8078 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v6, s6
8079 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6
8080 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, v12, s5
8081 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v9, v13, s5
8082 ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4
8083 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4
8084 ; GFX10-NEXT: v_or_b32_e32 v3, v22, v26
8085 ; GFX10-NEXT: v_or_b32_e32 v4, v11, v5
8086 ; GFX10-NEXT: v_or_b32_e32 v5, v14, v8
8087 ; GFX10-NEXT: v_or_b32_e32 v6, v6, v9
8088 ; GFX10-NEXT: v_or_b32_e32 v7, v7, v10
8089 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8091 ; GFX11-LABEL: v_fshr_v2i128:
8093 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8094 ; GFX11-NEXT: v_not_b32_e32 v17, v16
8095 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
8096 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
8097 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
8098 ; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v17
8099 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 31, v1
8100 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
8101 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
8102 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
8103 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v17
8104 ; GFX11-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1]
8105 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8106 ; GFX11-NEXT: v_dual_cndmask_b32 v23, 0, v23 :: v_dual_and_b32 v26, 0x7f, v16
8107 ; GFX11-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo
8108 ; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v25
8109 ; GFX11-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3]
8110 ; GFX11-NEXT: v_subrev_nc_u32_e32 v19, 64, v25
8111 ; GFX11-NEXT: v_subrev_nc_u32_e32 v27, 64, v26
8112 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v26
8113 ; GFX11-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1]
8114 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
8115 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1]
8116 ; GFX11-NEXT: v_or_b32_e32 v22, v18, v22
8117 ; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v26
8118 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
8119 ; GFX11-NEXT: v_or_b32_e32 v21, v17, v21
8120 ; GFX11-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9]
8121 ; GFX11-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo
8122 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8123 ; GFX11-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11]
8124 ; GFX11-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo
8125 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11]
8126 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25
8127 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8128 ; GFX11-NEXT: v_or_b32_e32 v16, v16, v18
8129 ; GFX11-NEXT: v_or_b32_e32 v17, v17, v19
8130 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s0
8131 ; GFX11-NEXT: v_not_b32_e32 v16, v20
8132 ; GFX11-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo
8133 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
8134 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s0
8135 ; GFX11-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo
8136 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26
8137 ; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v16
8138 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11]
8139 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
8140 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 31, v5
8141 ; GFX11-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5]
8142 ; GFX11-NEXT: v_sub_nc_u32_e32 v9, 64, v25
8143 ; GFX11-NEXT: v_cndmask_b32_e64 v26, 0, v3, s0
8144 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v25
8145 ; GFX11-NEXT: v_or_b32_e32 v6, v6, v8
8146 ; GFX11-NEXT: v_or_b32_e32 v0, v23, v0
8147 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5]
8148 ; GFX11-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5]
8149 ; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
8150 ; GFX11-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7]
8151 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
8152 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0
8153 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v25
8154 ; GFX11-NEXT: v_or_b32_e32 v1, v24, v1
8155 ; GFX11-NEXT: v_or_b32_e32 v10, v8, v10
8156 ; GFX11-NEXT: v_and_b32_e32 v23, 0x7f, v20
8157 ; GFX11-NEXT: v_or_b32_e32 v2, v18, v2
8158 ; GFX11-NEXT: v_or_b32_e32 v5, v9, v11
8159 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8160 ; GFX11-NEXT: v_dual_cndmask_b32 v11, 0, v16 :: v_dual_cndmask_b32 v10, v3, v10
8161 ; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v23
8162 ; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v23
8163 ; GFX11-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13]
8164 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v23
8165 ; GFX11-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo
8166 ; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
8167 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[14:15]
8168 ; GFX11-NEXT: v_lshrrev_b64 v[3:4], v23, v[14:15]
8169 ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
8170 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v23
8171 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v10, v6, s2
8172 ; GFX11-NEXT: v_or_b32_e32 v16, v18, v20
8173 ; GFX11-NEXT: v_or_b32_e32 v18, v19, v21
8174 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2
8175 ; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v4, s0
8176 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8177 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v16, s0
8178 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v18, s0
8179 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
8180 ; GFX11-NEXT: v_or_b32_e32 v7, v7, v10
8181 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v8, v12, s1
8182 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
8183 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v9, v13, s1
8184 ; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v3, s0
8185 ; GFX11-NEXT: v_or_b32_e32 v3, v22, v26
8186 ; GFX11-NEXT: v_or_b32_e32 v4, v11, v5
8187 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8188 ; GFX11-NEXT: v_or_b32_e32 v5, v14, v8
8189 ; GFX11-NEXT: v_or_b32_e32 v6, v6, v9
8190 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8191 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
8192 ret <2 x i128> %result
8195 declare i7 @llvm.fshr.i7(i7, i7, i7) #0
8196 declare i8 @llvm.fshr.i8(i8, i8, i8) #0
8197 declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
8198 declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
8200 declare i16 @llvm.fshr.i16(i16, i16, i16) #0
8201 declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
8202 declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
8203 declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
8204 declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
8205 declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
8206 declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
8208 declare i24 @llvm.fshr.i24(i24, i24, i24) #0
8209 declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
8211 declare i32 @llvm.fshr.i32(i32, i32, i32) #0
8212 declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
8213 declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
8214 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
8215 declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
8216 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
8218 declare i48 @llvm.fshr.i48(i48, i48, i48) #0
8220 declare i64 @llvm.fshr.i64(i64, i64, i64) #0
8221 declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
8223 declare i128 @llvm.fshr.i128(i128, i128, i128) #0
8224 declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
8226 attributes #0 = { nounwind readnone speculatable willreturn }