1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s
8 define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) {
9 ; GFX6-LABEL: s_fshr_i7:
11 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
12 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
13 ; GFX6-NEXT: s_and_b32 s2, s2, 0x7f
14 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
15 ; GFX6-NEXT: s_and_b32 s1, s1, 0x7f
16 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
17 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
18 ; GFX6-NEXT: v_mul_lo_u32 v1, v0, -7
19 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
20 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
21 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
22 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7
23 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
24 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
25 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
26 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
27 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0
28 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
29 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
30 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0
31 ; GFX6-NEXT: v_and_b32_e32 v0, 0x7f, v0
32 ; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1
33 ; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
34 ; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0
35 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
36 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
37 ; GFX6-NEXT: ; return to shader part epilog
39 ; GFX8-LABEL: s_fshr_i7:
41 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
42 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
43 ; GFX8-NEXT: s_and_b32 s2, s2, 0x7f
44 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
45 ; GFX8-NEXT: s_and_b32 s1, s1, 0x7f
46 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
47 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
48 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, -7
49 ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
50 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
51 ; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
52 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7
53 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
54 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
55 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
56 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
57 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0
58 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
59 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
60 ; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0
61 ; GFX8-NEXT: v_and_b32_e32 v0, 0x7f, v0
62 ; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1
63 ; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0
64 ; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s1
65 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
66 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
67 ; GFX8-NEXT: ; return to shader part epilog
69 ; GFX9-LABEL: s_fshr_i7:
71 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
72 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
73 ; GFX9-NEXT: s_and_b32 s2, s2, 0x7f
74 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
75 ; GFX9-NEXT: s_and_b32 s1, s1, 0x7f
76 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
77 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
78 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, -7
79 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
80 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
81 ; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
82 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7
83 ; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
84 ; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
85 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
86 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
87 ; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0
88 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0
89 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
90 ; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0
91 ; GFX9-NEXT: v_and_b32_e32 v0, 0x7f, v0
92 ; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1
93 ; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0
94 ; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s1
95 ; GFX9-NEXT: v_or_b32_e32 v0, v1, v0
96 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
97 ; GFX9-NEXT: ; return to shader part epilog
99 ; GFX10-LABEL: s_fshr_i7:
101 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
102 ; GFX10-NEXT: s_and_b32 s2, s2, 0x7f
103 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
104 ; GFX10-NEXT: s_and_b32 s1, s1, 0x7f
105 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
106 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
107 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
108 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, -7
109 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
110 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
111 ; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
112 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7
113 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
114 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
115 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
116 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
117 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
118 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
119 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
120 ; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0
121 ; GFX10-NEXT: v_and_b32_e32 v0, 0x7f, v0
122 ; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1
123 ; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1
124 ; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0
125 ; GFX10-NEXT: v_or_b32_e32 v0, v1, v0
126 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
127 ; GFX10-NEXT: ; return to shader part epilog
129 ; GFX11-LABEL: s_fshr_i7:
131 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 7
132 ; GFX11-NEXT: s_and_b32 s2, s2, 0x7f
133 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
134 ; GFX11-NEXT: s_and_b32 s1, s1, 0x7f
135 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
136 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
137 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
138 ; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
139 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
140 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
141 ; GFX11-NEXT: v_mul_lo_u32 v1, v0, -7
142 ; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1
143 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
144 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
145 ; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0
146 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
147 ; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7
148 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
149 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
150 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
151 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
152 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
153 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
154 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0
155 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0
156 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
157 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
158 ; GFX11-NEXT: v_sub_nc_u16 v1, 6, v0
159 ; GFX11-NEXT: v_and_b32_e32 v0, 0x7f, v0
160 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1
161 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
162 ; GFX11-NEXT: v_lshrrev_b16 v0, v0, s1
163 ; GFX11-NEXT: v_lshlrev_b16 v1, v1, s0
164 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
165 ; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
166 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
167 ; GFX11-NEXT: ; return to shader part epilog
168 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
172 define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) {
173 ; GFX6-LABEL: v_fshr_i7:
175 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
177 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3
178 ; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2
179 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
180 ; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1
181 ; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
182 ; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
183 ; GFX6-NEXT: v_mul_lo_u32 v4, v3, -7
184 ; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
185 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
186 ; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
187 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7
188 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
189 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
190 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
191 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
192 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2
193 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
194 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
195 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2
196 ; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2
197 ; GFX6-NEXT: v_and_b32_e32 v3, 0x7f, v3
198 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
199 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
200 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
201 ; GFX6-NEXT: s_setpc_b64 s[30:31]
203 ; GFX8-LABEL: v_fshr_i7:
205 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
207 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3
208 ; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2
209 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
210 ; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1
211 ; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
212 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
213 ; GFX8-NEXT: v_mul_lo_u32 v4, v3, -7
214 ; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
215 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
216 ; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
217 ; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7
218 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
219 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
220 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
221 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
222 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2
223 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
224 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
225 ; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2
226 ; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2
227 ; GFX8-NEXT: v_and_b32_e32 v3, 0x7f, v3
228 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0
229 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
230 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
231 ; GFX8-NEXT: s_setpc_b64 s[30:31]
233 ; GFX9-LABEL: v_fshr_i7:
235 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
237 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
238 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2
239 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
240 ; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1
241 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
242 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3
243 ; GFX9-NEXT: v_mul_lo_u32 v4, v3, -7
244 ; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4
245 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4
246 ; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
247 ; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7
248 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
249 ; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
250 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
251 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
252 ; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2
253 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2
254 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
255 ; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2
256 ; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2
257 ; GFX9-NEXT: v_and_b32_e32 v3, 0x7f, v3
258 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0
259 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1
260 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
261 ; GFX9-NEXT: s_setpc_b64 s[30:31]
263 ; GFX10-LABEL: v_fshr_i7:
265 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
267 ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2
268 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
269 ; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1
270 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3
271 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
272 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3
273 ; GFX10-NEXT: v_mul_lo_u32 v4, v3, -7
274 ; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4
275 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4
276 ; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
277 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7
278 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
279 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
280 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
281 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
282 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
283 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
284 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
285 ; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2
286 ; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2
287 ; GFX10-NEXT: v_and_b32_e32 v3, 0x7f, v3
288 ; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1
289 ; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0
290 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
291 ; GFX10-NEXT: s_setpc_b64 s[30:31]
293 ; GFX11-LABEL: v_fshr_i7:
295 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 7
297 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2
298 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
299 ; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1
300 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
301 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3
302 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
303 ; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
304 ; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
305 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
306 ; GFX11-NEXT: v_mul_lo_u32 v4, v3, -7
307 ; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4
308 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
309 ; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4
310 ; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3
311 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
312 ; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7
313 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
314 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
315 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
316 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
317 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
318 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
319 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2
320 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2
321 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
322 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
323 ; GFX11-NEXT: v_sub_nc_u16 v3, 6, v2
324 ; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2
325 ; GFX11-NEXT: v_and_b32_e32 v3, 0x7f, v3
326 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
327 ; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1
328 ; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0
329 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
330 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
331 ; GFX11-NEXT: s_setpc_b64 s[30:31]
332 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt)
336 define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) {
337 ; GFX6-LABEL: s_fshr_i8:
339 ; GFX6-NEXT: s_and_b32 s3, s2, 7
340 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
341 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
342 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
343 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
344 ; GFX6-NEXT: s_lshr_b32 s1, s1, s3
345 ; GFX6-NEXT: s_or_b32 s0, s0, s1
346 ; GFX6-NEXT: ; return to shader part epilog
348 ; GFX8-LABEL: s_fshr_i8:
350 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
351 ; GFX8-NEXT: s_and_b32 s3, s2, 7
352 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
353 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
354 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
355 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
356 ; GFX8-NEXT: s_lshr_b32 s1, s1, s3
357 ; GFX8-NEXT: s_or_b32 s0, s0, s1
358 ; GFX8-NEXT: ; return to shader part epilog
360 ; GFX9-LABEL: s_fshr_i8:
362 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
363 ; GFX9-NEXT: s_and_b32 s3, s2, 7
364 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
365 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
366 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
367 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
368 ; GFX9-NEXT: s_lshr_b32 s1, s1, s3
369 ; GFX9-NEXT: s_or_b32 s0, s0, s1
370 ; GFX9-NEXT: ; return to shader part epilog
372 ; GFX10-LABEL: s_fshr_i8:
374 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
375 ; GFX10-NEXT: s_and_b32 s3, s2, 7
376 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
377 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
378 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
379 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
380 ; GFX10-NEXT: s_lshr_b32 s1, s1, s3
381 ; GFX10-NEXT: s_or_b32 s0, s0, s1
382 ; GFX10-NEXT: ; return to shader part epilog
384 ; GFX11-LABEL: s_fshr_i8:
386 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
387 ; GFX11-NEXT: s_and_b32 s3, s2, 7
388 ; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
389 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
390 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
391 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
392 ; GFX11-NEXT: s_lshr_b32 s1, s1, s3
393 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
394 ; GFX11-NEXT: s_or_b32 s0, s0, s1
395 ; GFX11-NEXT: ; return to shader part epilog
396 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
400 define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) {
401 ; GFX6-LABEL: v_fshr_i8:
403 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v2
405 ; GFX6-NEXT: v_not_b32_e32 v2, v2
406 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
407 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
408 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
409 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
410 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1
411 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
412 ; GFX6-NEXT: s_setpc_b64 s[30:31]
414 ; GFX8-LABEL: v_fshr_i8:
416 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417 ; GFX8-NEXT: v_and_b32_e32 v3, 7, v2
418 ; GFX8-NEXT: v_not_b32_e32 v2, v2
419 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
420 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
421 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
422 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
423 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
424 ; GFX8-NEXT: s_setpc_b64 s[30:31]
426 ; GFX9-LABEL: v_fshr_i8:
428 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429 ; GFX9-NEXT: v_and_b32_e32 v3, 7, v2
430 ; GFX9-NEXT: v_not_b32_e32 v2, v2
431 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
432 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
433 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
434 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
435 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
436 ; GFX9-NEXT: s_setpc_b64 s[30:31]
438 ; GFX10-LABEL: v_fshr_i8:
440 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441 ; GFX10-NEXT: v_not_b32_e32 v3, v2
442 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
443 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
444 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
445 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
446 ; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1
447 ; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0
448 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
449 ; GFX10-NEXT: s_setpc_b64 s[30:31]
451 ; GFX11-LABEL: v_fshr_i8:
453 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
454 ; GFX11-NEXT: v_not_b32_e32 v3, v2
455 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
456 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
457 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
458 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
459 ; GFX11-NEXT: v_and_b32_e32 v3, 7, v3
460 ; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1
461 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
462 ; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0
463 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
464 ; GFX11-NEXT: s_setpc_b64 s[30:31]
465 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt)
469 define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) {
470 ; GFX6-LABEL: s_fshr_i8_4:
472 ; GFX6-NEXT: s_lshl_b32 s0, s0, 4
473 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x40004
474 ; GFX6-NEXT: s_or_b32 s0, s0, s1
475 ; GFX6-NEXT: ; return to shader part epilog
477 ; GFX8-LABEL: s_fshr_i8_4:
479 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
480 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
481 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4
482 ; GFX8-NEXT: s_lshr_b32 s1, s1, 4
483 ; GFX8-NEXT: s_or_b32 s0, s0, s1
484 ; GFX8-NEXT: ; return to shader part epilog
486 ; GFX9-LABEL: s_fshr_i8_4:
488 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
489 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
490 ; GFX9-NEXT: s_lshl_b32 s0, s0, 4
491 ; GFX9-NEXT: s_lshr_b32 s1, s1, 4
492 ; GFX9-NEXT: s_or_b32 s0, s0, s1
493 ; GFX9-NEXT: ; return to shader part epilog
495 ; GFX10-LABEL: s_fshr_i8_4:
497 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
498 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4
499 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
500 ; GFX10-NEXT: s_lshr_b32 s1, s1, 4
501 ; GFX10-NEXT: s_or_b32 s0, s0, s1
502 ; GFX10-NEXT: ; return to shader part epilog
504 ; GFX11-LABEL: s_fshr_i8_4:
506 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
507 ; GFX11-NEXT: s_lshl_b32 s0, s0, 4
508 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
509 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
510 ; GFX11-NEXT: s_lshr_b32 s1, s1, 4
511 ; GFX11-NEXT: s_or_b32 s0, s0, s1
512 ; GFX11-NEXT: ; return to shader part epilog
513 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
517 define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) {
518 ; GFX6-LABEL: v_fshr_i8_4:
520 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0
522 ; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 4
523 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
524 ; GFX6-NEXT: s_setpc_b64 s[30:31]
526 ; GFX8-LABEL: v_fshr_i8_4:
528 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529 ; GFX8-NEXT: v_mov_b32_e32 v2, 4
530 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0
531 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
532 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
533 ; GFX8-NEXT: s_setpc_b64 s[30:31]
535 ; GFX9-LABEL: v_fshr_i8_4:
537 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538 ; GFX9-NEXT: v_mov_b32_e32 v2, 4
539 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0
540 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
541 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
542 ; GFX9-NEXT: s_setpc_b64 s[30:31]
544 ; GFX10-LABEL: v_fshr_i8_4:
546 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
548 ; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0
549 ; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1
550 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
551 ; GFX10-NEXT: s_setpc_b64 s[30:31]
553 ; GFX11-LABEL: v_fshr_i8_4:
555 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
556 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
557 ; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0
558 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
559 ; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1
560 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
561 ; GFX11-NEXT: s_setpc_b64 s[30:31]
562 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4)
566 define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) {
567 ; GFX6-LABEL: s_fshr_i8_5:
569 ; GFX6-NEXT: s_lshl_b32 s0, s0, 3
570 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x30005
571 ; GFX6-NEXT: s_or_b32 s0, s0, s1
572 ; GFX6-NEXT: ; return to shader part epilog
574 ; GFX8-LABEL: s_fshr_i8_5:
576 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
577 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
578 ; GFX8-NEXT: s_lshl_b32 s0, s0, 3
579 ; GFX8-NEXT: s_lshr_b32 s1, s1, 5
580 ; GFX8-NEXT: s_or_b32 s0, s0, s1
581 ; GFX8-NEXT: ; return to shader part epilog
583 ; GFX9-LABEL: s_fshr_i8_5:
585 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
586 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
587 ; GFX9-NEXT: s_lshl_b32 s0, s0, 3
588 ; GFX9-NEXT: s_lshr_b32 s1, s1, 5
589 ; GFX9-NEXT: s_or_b32 s0, s0, s1
590 ; GFX9-NEXT: ; return to shader part epilog
592 ; GFX10-LABEL: s_fshr_i8_5:
594 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
595 ; GFX10-NEXT: s_lshl_b32 s0, s0, 3
596 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
597 ; GFX10-NEXT: s_lshr_b32 s1, s1, 5
598 ; GFX10-NEXT: s_or_b32 s0, s0, s1
599 ; GFX10-NEXT: ; return to shader part epilog
601 ; GFX11-LABEL: s_fshr_i8_5:
603 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
604 ; GFX11-NEXT: s_lshl_b32 s0, s0, 3
605 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
606 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
607 ; GFX11-NEXT: s_lshr_b32 s1, s1, 5
608 ; GFX11-NEXT: s_or_b32 s0, s0, s1
609 ; GFX11-NEXT: ; return to shader part epilog
610 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
614 define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) {
615 ; GFX6-LABEL: v_fshr_i8_5:
617 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
618 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0
619 ; GFX6-NEXT: v_bfe_u32 v1, v1, 5, 3
620 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
621 ; GFX6-NEXT: s_setpc_b64 s[30:31]
623 ; GFX8-LABEL: v_fshr_i8_5:
625 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626 ; GFX8-NEXT: v_mov_b32_e32 v2, 5
627 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 3, v0
628 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
629 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
630 ; GFX8-NEXT: s_setpc_b64 s[30:31]
632 ; GFX9-LABEL: v_fshr_i8_5:
634 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635 ; GFX9-NEXT: v_mov_b32_e32 v2, 5
636 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 3, v0
637 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
638 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
639 ; GFX9-NEXT: s_setpc_b64 s[30:31]
641 ; GFX10-LABEL: v_fshr_i8_5:
643 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
645 ; GFX10-NEXT: v_lshlrev_b16 v0, 3, v0
646 ; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1
647 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
648 ; GFX10-NEXT: s_setpc_b64 s[30:31]
650 ; GFX11-LABEL: v_fshr_i8_5:
652 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
654 ; GFX11-NEXT: v_lshlrev_b16 v0, 3, v0
655 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
656 ; GFX11-NEXT: v_lshrrev_b16 v1, 5, v1
657 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
658 ; GFX11-NEXT: s_setpc_b64 s[30:31]
659 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5)
663 define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) {
664 ; GFX6-LABEL: s_fshr_v2i8:
666 ; GFX6-NEXT: s_lshr_b32 s3, s0, 8
667 ; GFX6-NEXT: s_lshr_b32 s4, s2, 8
668 ; GFX6-NEXT: s_and_b32 s5, s2, 7
669 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
670 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
671 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
672 ; GFX6-NEXT: s_and_b32 s2, s1, 0xff
673 ; GFX6-NEXT: s_lshr_b32 s2, s2, s5
674 ; GFX6-NEXT: s_or_b32 s0, s0, s2
675 ; GFX6-NEXT: s_and_b32 s2, s4, 7
676 ; GFX6-NEXT: s_andn2_b32 s4, 7, s4
677 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1
678 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008
679 ; GFX6-NEXT: s_lshl_b32 s3, s3, s4
680 ; GFX6-NEXT: s_lshr_b32 s1, s1, s2
681 ; GFX6-NEXT: s_or_b32 s1, s3, s1
682 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
683 ; GFX6-NEXT: s_and_b32 s0, s0, 0xff
684 ; GFX6-NEXT: s_lshl_b32 s1, s1, 8
685 ; GFX6-NEXT: s_or_b32 s0, s0, s1
686 ; GFX6-NEXT: ; return to shader part epilog
688 ; GFX8-LABEL: s_fshr_v2i8:
690 ; GFX8-NEXT: s_lshr_b32 s3, s0, 8
691 ; GFX8-NEXT: s_lshr_b32 s4, s1, 8
692 ; GFX8-NEXT: s_lshr_b32 s5, s2, 8
693 ; GFX8-NEXT: s_and_b32 s6, s2, 7
694 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
695 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
696 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
697 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
698 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
699 ; GFX8-NEXT: s_andn2_b32 s2, 7, s5
700 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
701 ; GFX8-NEXT: s_lshr_b32 s1, s1, s6
702 ; GFX8-NEXT: s_lshl_b32 s2, s3, s2
703 ; GFX8-NEXT: s_and_b32 s3, s4, 0xff
704 ; GFX8-NEXT: s_or_b32 s0, s0, s1
705 ; GFX8-NEXT: s_and_b32 s1, s5, 7
706 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
707 ; GFX8-NEXT: s_lshr_b32 s1, s3, s1
708 ; GFX8-NEXT: s_or_b32 s1, s2, s1
709 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
710 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
711 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8
712 ; GFX8-NEXT: s_or_b32 s0, s0, s1
713 ; GFX8-NEXT: ; return to shader part epilog
715 ; GFX9-LABEL: s_fshr_v2i8:
717 ; GFX9-NEXT: s_lshr_b32 s3, s0, 8
718 ; GFX9-NEXT: s_lshr_b32 s4, s1, 8
719 ; GFX9-NEXT: s_lshr_b32 s5, s2, 8
720 ; GFX9-NEXT: s_and_b32 s6, s2, 7
721 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
722 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
723 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
724 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
725 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
726 ; GFX9-NEXT: s_andn2_b32 s2, 7, s5
727 ; GFX9-NEXT: s_lshl_b32 s3, s3, 1
728 ; GFX9-NEXT: s_lshr_b32 s1, s1, s6
729 ; GFX9-NEXT: s_lshl_b32 s2, s3, s2
730 ; GFX9-NEXT: s_and_b32 s3, s4, 0xff
731 ; GFX9-NEXT: s_or_b32 s0, s0, s1
732 ; GFX9-NEXT: s_and_b32 s1, s5, 7
733 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
734 ; GFX9-NEXT: s_lshr_b32 s1, s3, s1
735 ; GFX9-NEXT: s_or_b32 s1, s2, s1
736 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
737 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
738 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8
739 ; GFX9-NEXT: s_or_b32 s0, s0, s1
740 ; GFX9-NEXT: ; return to shader part epilog
742 ; GFX10-LABEL: s_fshr_v2i8:
744 ; GFX10-NEXT: s_lshr_b32 s4, s1, 8
745 ; GFX10-NEXT: s_lshr_b32 s3, s0, 8
746 ; GFX10-NEXT: s_lshr_b32 s5, s2, 8
747 ; GFX10-NEXT: s_and_b32 s6, s2, 7
748 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
749 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
750 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff
751 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
752 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
753 ; GFX10-NEXT: s_and_b32 s2, s5, 7
754 ; GFX10-NEXT: s_andn2_b32 s5, 7, s5
755 ; GFX10-NEXT: s_lshl_b32 s3, s3, 1
756 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
757 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
758 ; GFX10-NEXT: s_lshl_b32 s3, s3, s5
759 ; GFX10-NEXT: s_lshr_b32 s2, s4, s2
760 ; GFX10-NEXT: s_lshr_b32 s1, s1, s6
761 ; GFX10-NEXT: s_or_b32 s2, s3, s2
762 ; GFX10-NEXT: s_or_b32 s0, s0, s1
763 ; GFX10-NEXT: s_and_b32 s1, s2, 0xff
764 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff
765 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8
766 ; GFX10-NEXT: s_or_b32 s0, s0, s1
767 ; GFX10-NEXT: ; return to shader part epilog
769 ; GFX11-LABEL: s_fshr_v2i8:
771 ; GFX11-NEXT: s_lshr_b32 s4, s1, 8
772 ; GFX11-NEXT: s_lshr_b32 s3, s0, 8
773 ; GFX11-NEXT: s_lshr_b32 s5, s2, 8
774 ; GFX11-NEXT: s_and_b32 s6, s2, 7
775 ; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
776 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
777 ; GFX11-NEXT: s_and_b32 s4, s4, 0xff
778 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
779 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
780 ; GFX11-NEXT: s_and_b32 s2, s5, 7
781 ; GFX11-NEXT: s_and_not1_b32 s5, 7, s5
782 ; GFX11-NEXT: s_lshl_b32 s3, s3, 1
783 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
784 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
785 ; GFX11-NEXT: s_lshl_b32 s3, s3, s5
786 ; GFX11-NEXT: s_lshr_b32 s2, s4, s2
787 ; GFX11-NEXT: s_lshr_b32 s1, s1, s6
788 ; GFX11-NEXT: s_or_b32 s2, s3, s2
789 ; GFX11-NEXT: s_or_b32 s0, s0, s1
790 ; GFX11-NEXT: s_and_b32 s1, s2, 0xff
791 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff
792 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8
793 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
794 ; GFX11-NEXT: s_or_b32 s0, s0, s1
795 ; GFX11-NEXT: ; return to shader part epilog
796 %lhs = bitcast i16 %lhs.arg to <2 x i8>
797 %rhs = bitcast i16 %rhs.arg to <2 x i8>
798 %amt = bitcast i16 %amt.arg to <2 x i8>
799 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
800 %cast.result = bitcast <2 x i8> %result to i16
804 define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) {
805 ; GFX6-LABEL: v_fshr_v2i8:
807 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
808 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2
809 ; GFX6-NEXT: v_and_b32_e32 v5, 7, v2
810 ; GFX6-NEXT: v_not_b32_e32 v2, v2
811 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
812 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
813 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
814 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
815 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
816 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v5, v2
817 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
818 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v4
819 ; GFX6-NEXT: v_not_b32_e32 v4, v4
820 ; GFX6-NEXT: v_and_b32_e32 v4, 7, v4
821 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
822 ; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8
823 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
824 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
825 ; GFX6-NEXT: v_or_b32_e32 v1, v3, v1
826 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
827 ; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0
828 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
829 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
830 ; GFX6-NEXT: s_setpc_b64 s[30:31]
832 ; GFX8-LABEL: v_fshr_v2i8:
834 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
835 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2
836 ; GFX8-NEXT: v_and_b32_e32 v6, 7, v2
837 ; GFX8-NEXT: v_not_b32_e32 v2, v2
838 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
839 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
840 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
841 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
842 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
843 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
844 ; GFX8-NEXT: v_not_b32_e32 v2, v5
845 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
846 ; GFX8-NEXT: v_and_b32_e32 v1, 7, v5
847 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
848 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3
849 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v3
850 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
851 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
852 ; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1
853 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1
854 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
855 ; GFX8-NEXT: s_setpc_b64 s[30:31]
857 ; GFX9-LABEL: v_fshr_v2i8:
859 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2
861 ; GFX9-NEXT: v_and_b32_e32 v6, 7, v2
862 ; GFX9-NEXT: v_not_b32_e32 v2, v2
863 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
864 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
865 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
866 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
867 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
868 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
869 ; GFX9-NEXT: v_not_b32_e32 v2, v5
870 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
871 ; GFX9-NEXT: v_and_b32_e32 v1, 7, v5
872 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
873 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3
874 ; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v3
875 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
876 ; GFX9-NEXT: v_or_b32_e32 v1, v2, v1
877 ; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1
878 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1
879 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
880 ; GFX9-NEXT: s_setpc_b64 s[30:31]
882 ; GFX10-LABEL: v_fshr_v2i8:
884 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
885 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2
886 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0
887 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1
888 ; GFX10-NEXT: v_and_b32_e32 v6, 7, v2
889 ; GFX10-NEXT: v_not_b32_e32 v2, v2
890 ; GFX10-NEXT: v_not_b32_e32 v7, v3
891 ; GFX10-NEXT: v_and_b32_e32 v3, 7, v3
892 ; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4
893 ; GFX10-NEXT: v_and_b32_e32 v5, 0xff, v5
894 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
895 ; GFX10-NEXT: v_and_b32_e32 v7, 7, v7
896 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
897 ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2
898 ; GFX10-NEXT: v_lshrrev_b16 v3, v3, v5
899 ; GFX10-NEXT: v_lshlrev_b16 v4, v7, v4
900 ; GFX10-NEXT: v_lshrrev_b16 v1, v6, v1
901 ; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0
902 ; GFX10-NEXT: v_or_b32_e32 v2, v4, v3
903 ; GFX10-NEXT: v_mov_b32_e32 v3, 0xff
904 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
905 ; GFX10-NEXT: v_and_b32_sdwa v1, v2, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
906 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
907 ; GFX10-NEXT: s_setpc_b64 s[30:31]
909 ; GFX11-LABEL: v_fshr_v2i8:
911 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
912 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v2
913 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v0
914 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1
915 ; GFX11-NEXT: v_and_b32_e32 v7, 7, v2
916 ; GFX11-NEXT: v_not_b32_e32 v2, v2
917 ; GFX11-NEXT: v_not_b32_e32 v6, v3
918 ; GFX11-NEXT: v_and_b32_e32 v3, 7, v3
919 ; GFX11-NEXT: v_lshlrev_b16 v4, 1, v4
920 ; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5
921 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
922 ; GFX11-NEXT: v_and_b32_e32 v6, 7, v6
923 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
924 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
925 ; GFX11-NEXT: v_lshrrev_b16 v3, v3, v5
926 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
927 ; GFX11-NEXT: v_lshlrev_b16 v4, v6, v4
928 ; GFX11-NEXT: v_lshrrev_b16 v1, v7, v1
929 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
930 ; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0
931 ; GFX11-NEXT: v_or_b32_e32 v2, v4, v3
932 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
933 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
934 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2
935 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
936 ; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0
937 ; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1
938 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
939 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
940 ; GFX11-NEXT: s_setpc_b64 s[30:31]
941 %lhs = bitcast i16 %lhs.arg to <2 x i8>
942 %rhs = bitcast i16 %rhs.arg to <2 x i8>
943 %amt = bitcast i16 %amt.arg to <2 x i8>
944 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt)
945 %cast.result = bitcast <2 x i8> %result to i16
949 define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) {
950 ; GFX6-LABEL: s_fshr_v4i8:
952 ; GFX6-NEXT: s_lshr_b32 s3, s0, 8
953 ; GFX6-NEXT: s_lshr_b32 s4, s0, 16
954 ; GFX6-NEXT: s_lshr_b32 s5, s0, 24
955 ; GFX6-NEXT: s_lshr_b32 s7, s2, 8
956 ; GFX6-NEXT: s_lshr_b32 s8, s2, 16
957 ; GFX6-NEXT: s_lshr_b32 s9, s2, 24
958 ; GFX6-NEXT: s_and_b32 s10, s2, 7
959 ; GFX6-NEXT: s_andn2_b32 s2, 7, s2
960 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
961 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
962 ; GFX6-NEXT: s_and_b32 s2, s1, 0xff
963 ; GFX6-NEXT: s_lshr_b32 s2, s2, s10
964 ; GFX6-NEXT: s_or_b32 s0, s0, s2
965 ; GFX6-NEXT: s_and_b32 s2, s7, 7
966 ; GFX6-NEXT: s_andn2_b32 s7, 7, s7
967 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1
968 ; GFX6-NEXT: s_lshl_b32 s3, s3, s7
969 ; GFX6-NEXT: s_bfe_u32 s7, s1, 0x80008
970 ; GFX6-NEXT: s_lshr_b32 s2, s7, s2
971 ; GFX6-NEXT: s_lshr_b32 s6, s1, 24
972 ; GFX6-NEXT: s_or_b32 s2, s3, s2
973 ; GFX6-NEXT: s_and_b32 s3, s8, 7
974 ; GFX6-NEXT: s_andn2_b32 s7, 7, s8
975 ; GFX6-NEXT: s_lshl_b32 s4, s4, 1
976 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80010
977 ; GFX6-NEXT: s_lshl_b32 s4, s4, s7
978 ; GFX6-NEXT: s_lshr_b32 s1, s1, s3
979 ; GFX6-NEXT: s_or_b32 s1, s4, s1
980 ; GFX6-NEXT: s_and_b32 s3, s9, 7
981 ; GFX6-NEXT: s_andn2_b32 s4, 7, s9
982 ; GFX6-NEXT: s_lshl_b32 s5, s5, 1
983 ; GFX6-NEXT: s_and_b32 s2, s2, 0xff
984 ; GFX6-NEXT: s_lshl_b32 s4, s5, s4
985 ; GFX6-NEXT: s_lshr_b32 s3, s6, s3
986 ; GFX6-NEXT: s_and_b32 s0, s0, 0xff
987 ; GFX6-NEXT: s_lshl_b32 s2, s2, 8
988 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
989 ; GFX6-NEXT: s_or_b32 s3, s4, s3
990 ; GFX6-NEXT: s_or_b32 s0, s0, s2
991 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
992 ; GFX6-NEXT: s_or_b32 s0, s0, s1
993 ; GFX6-NEXT: s_and_b32 s1, s3, 0xff
994 ; GFX6-NEXT: s_lshl_b32 s1, s1, 24
995 ; GFX6-NEXT: s_or_b32 s0, s0, s1
996 ; GFX6-NEXT: ; return to shader part epilog
998 ; GFX8-LABEL: s_fshr_v4i8:
1000 ; GFX8-NEXT: s_lshr_b32 s3, s0, 8
1001 ; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1002 ; GFX8-NEXT: s_lshr_b32 s5, s0, 24
1003 ; GFX8-NEXT: s_lshr_b32 s6, s1, 8
1004 ; GFX8-NEXT: s_lshr_b32 s7, s1, 16
1005 ; GFX8-NEXT: s_lshr_b32 s8, s1, 24
1006 ; GFX8-NEXT: s_lshr_b32 s9, s2, 8
1007 ; GFX8-NEXT: s_lshr_b32 s10, s2, 16
1008 ; GFX8-NEXT: s_lshr_b32 s11, s2, 24
1009 ; GFX8-NEXT: s_and_b32 s12, s2, 7
1010 ; GFX8-NEXT: s_andn2_b32 s2, 7, s2
1011 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
1012 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
1013 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
1014 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
1015 ; GFX8-NEXT: s_andn2_b32 s2, 7, s9
1016 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
1017 ; GFX8-NEXT: s_lshr_b32 s1, s1, s12
1018 ; GFX8-NEXT: s_lshl_b32 s2, s3, s2
1019 ; GFX8-NEXT: s_and_b32 s3, s6, 0xff
1020 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1021 ; GFX8-NEXT: s_and_b32 s1, s9, 7
1022 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1023 ; GFX8-NEXT: s_lshr_b32 s1, s3, s1
1024 ; GFX8-NEXT: s_andn2_b32 s3, 7, s10
1025 ; GFX8-NEXT: s_lshl_b32 s4, s4, 1
1026 ; GFX8-NEXT: s_lshl_b32 s3, s4, s3
1027 ; GFX8-NEXT: s_and_b32 s4, s7, 0xff
1028 ; GFX8-NEXT: s_or_b32 s1, s2, s1
1029 ; GFX8-NEXT: s_and_b32 s2, s10, 7
1030 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1031 ; GFX8-NEXT: s_lshr_b32 s2, s4, s2
1032 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
1033 ; GFX8-NEXT: s_or_b32 s2, s3, s2
1034 ; GFX8-NEXT: s_and_b32 s3, s11, 7
1035 ; GFX8-NEXT: s_andn2_b32 s4, 7, s11
1036 ; GFX8-NEXT: s_lshl_b32 s5, s5, 1
1037 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
1038 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8
1039 ; GFX8-NEXT: s_lshl_b32 s4, s5, s4
1040 ; GFX8-NEXT: s_lshr_b32 s3, s8, s3
1041 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1042 ; GFX8-NEXT: s_and_b32 s1, s2, 0xff
1043 ; GFX8-NEXT: s_or_b32 s3, s4, s3
1044 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
1045 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1046 ; GFX8-NEXT: s_and_b32 s1, s3, 0xff
1047 ; GFX8-NEXT: s_lshl_b32 s1, s1, 24
1048 ; GFX8-NEXT: s_or_b32 s0, s0, s1
1049 ; GFX8-NEXT: ; return to shader part epilog
1051 ; GFX9-LABEL: s_fshr_v4i8:
1053 ; GFX9-NEXT: s_lshr_b32 s3, s0, 8
1054 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
1055 ; GFX9-NEXT: s_lshr_b32 s5, s0, 24
1056 ; GFX9-NEXT: s_lshr_b32 s6, s1, 8
1057 ; GFX9-NEXT: s_lshr_b32 s7, s1, 16
1058 ; GFX9-NEXT: s_lshr_b32 s8, s1, 24
1059 ; GFX9-NEXT: s_lshr_b32 s9, s2, 8
1060 ; GFX9-NEXT: s_lshr_b32 s10, s2, 16
1061 ; GFX9-NEXT: s_lshr_b32 s11, s2, 24
1062 ; GFX9-NEXT: s_and_b32 s12, s2, 7
1063 ; GFX9-NEXT: s_andn2_b32 s2, 7, s2
1064 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
1065 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
1066 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
1067 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
1068 ; GFX9-NEXT: s_andn2_b32 s2, 7, s9
1069 ; GFX9-NEXT: s_lshl_b32 s3, s3, 1
1070 ; GFX9-NEXT: s_lshr_b32 s1, s1, s12
1071 ; GFX9-NEXT: s_lshl_b32 s2, s3, s2
1072 ; GFX9-NEXT: s_and_b32 s3, s6, 0xff
1073 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1074 ; GFX9-NEXT: s_and_b32 s1, s9, 7
1075 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
1076 ; GFX9-NEXT: s_lshr_b32 s1, s3, s1
1077 ; GFX9-NEXT: s_andn2_b32 s3, 7, s10
1078 ; GFX9-NEXT: s_lshl_b32 s4, s4, 1
1079 ; GFX9-NEXT: s_lshl_b32 s3, s4, s3
1080 ; GFX9-NEXT: s_and_b32 s4, s7, 0xff
1081 ; GFX9-NEXT: s_or_b32 s1, s2, s1
1082 ; GFX9-NEXT: s_and_b32 s2, s10, 7
1083 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4
1084 ; GFX9-NEXT: s_lshr_b32 s2, s4, s2
1085 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
1086 ; GFX9-NEXT: s_or_b32 s2, s3, s2
1087 ; GFX9-NEXT: s_and_b32 s3, s11, 7
1088 ; GFX9-NEXT: s_andn2_b32 s4, 7, s11
1089 ; GFX9-NEXT: s_lshl_b32 s5, s5, 1
1090 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
1091 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8
1092 ; GFX9-NEXT: s_lshl_b32 s4, s5, s4
1093 ; GFX9-NEXT: s_lshr_b32 s3, s8, s3
1094 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1095 ; GFX9-NEXT: s_and_b32 s1, s2, 0xff
1096 ; GFX9-NEXT: s_or_b32 s3, s4, s3
1097 ; GFX9-NEXT: s_lshl_b32 s1, s1, 16
1098 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1099 ; GFX9-NEXT: s_and_b32 s1, s3, 0xff
1100 ; GFX9-NEXT: s_lshl_b32 s1, s1, 24
1101 ; GFX9-NEXT: s_or_b32 s0, s0, s1
1102 ; GFX9-NEXT: ; return to shader part epilog
1104 ; GFX10-LABEL: s_fshr_v4i8:
1106 ; GFX10-NEXT: s_lshr_b32 s6, s1, 8
1107 ; GFX10-NEXT: s_lshr_b32 s3, s0, 8
1108 ; GFX10-NEXT: s_lshr_b32 s4, s0, 16
1109 ; GFX10-NEXT: s_lshr_b32 s5, s0, 24
1110 ; GFX10-NEXT: s_lshr_b32 s7, s1, 16
1111 ; GFX10-NEXT: s_lshr_b32 s8, s1, 24
1112 ; GFX10-NEXT: s_lshr_b32 s9, s2, 8
1113 ; GFX10-NEXT: s_lshr_b32 s10, s2, 16
1114 ; GFX10-NEXT: s_lshr_b32 s11, s2, 24
1115 ; GFX10-NEXT: s_and_b32 s12, s2, 7
1116 ; GFX10-NEXT: s_andn2_b32 s2, 7, s2
1117 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
1118 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
1119 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff
1120 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
1121 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
1122 ; GFX10-NEXT: s_and_b32 s2, s9, 7
1123 ; GFX10-NEXT: s_andn2_b32 s9, 7, s9
1124 ; GFX10-NEXT: s_lshl_b32 s3, s3, 1
1125 ; GFX10-NEXT: s_and_b32 s6, 0xffff, s6
1126 ; GFX10-NEXT: s_lshr_b32 s1, s1, s12
1127 ; GFX10-NEXT: s_lshl_b32 s3, s3, s9
1128 ; GFX10-NEXT: s_lshr_b32 s2, s6, s2
1129 ; GFX10-NEXT: s_and_b32 s6, s7, 0xff
1130 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1131 ; GFX10-NEXT: s_or_b32 s1, s3, s2
1132 ; GFX10-NEXT: s_and_b32 s2, s10, 7
1133 ; GFX10-NEXT: s_andn2_b32 s3, 7, s10
1134 ; GFX10-NEXT: s_lshl_b32 s4, s4, 1
1135 ; GFX10-NEXT: s_and_b32 s6, 0xffff, s6
1136 ; GFX10-NEXT: s_lshl_b32 s3, s4, s3
1137 ; GFX10-NEXT: s_lshr_b32 s2, s6, s2
1138 ; GFX10-NEXT: s_andn2_b32 s4, 7, s11
1139 ; GFX10-NEXT: s_lshl_b32 s5, s5, 1
1140 ; GFX10-NEXT: s_and_b32 s6, s11, 7
1141 ; GFX10-NEXT: s_lshl_b32 s4, s5, s4
1142 ; GFX10-NEXT: s_lshr_b32 s5, s8, s6
1143 ; GFX10-NEXT: s_or_b32 s2, s3, s2
1144 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
1145 ; GFX10-NEXT: s_or_b32 s3, s4, s5
1146 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff
1147 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8
1148 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff
1149 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1150 ; GFX10-NEXT: s_lshl_b32 s1, s2, 16
1151 ; GFX10-NEXT: s_and_b32 s2, s3, 0xff
1152 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1153 ; GFX10-NEXT: s_lshl_b32 s1, s2, 24
1154 ; GFX10-NEXT: s_or_b32 s0, s0, s1
1155 ; GFX10-NEXT: ; return to shader part epilog
1157 ; GFX11-LABEL: s_fshr_v4i8:
1159 ; GFX11-NEXT: s_lshr_b32 s6, s1, 8
1160 ; GFX11-NEXT: s_lshr_b32 s3, s0, 8
1161 ; GFX11-NEXT: s_lshr_b32 s4, s0, 16
1162 ; GFX11-NEXT: s_lshr_b32 s5, s0, 24
1163 ; GFX11-NEXT: s_lshr_b32 s7, s1, 16
1164 ; GFX11-NEXT: s_lshr_b32 s8, s1, 24
1165 ; GFX11-NEXT: s_lshr_b32 s9, s2, 8
1166 ; GFX11-NEXT: s_lshr_b32 s10, s2, 16
1167 ; GFX11-NEXT: s_lshr_b32 s11, s2, 24
1168 ; GFX11-NEXT: s_and_b32 s12, s2, 7
1169 ; GFX11-NEXT: s_and_not1_b32 s2, 7, s2
1170 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
1171 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
1172 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff
1173 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
1174 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
1175 ; GFX11-NEXT: s_and_b32 s2, s9, 7
1176 ; GFX11-NEXT: s_and_not1_b32 s9, 7, s9
1177 ; GFX11-NEXT: s_lshl_b32 s3, s3, 1
1178 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
1179 ; GFX11-NEXT: s_lshr_b32 s1, s1, s12
1180 ; GFX11-NEXT: s_lshl_b32 s3, s3, s9
1181 ; GFX11-NEXT: s_lshr_b32 s2, s6, s2
1182 ; GFX11-NEXT: s_and_b32 s6, s7, 0xff
1183 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1184 ; GFX11-NEXT: s_or_b32 s1, s3, s2
1185 ; GFX11-NEXT: s_and_b32 s2, s10, 7
1186 ; GFX11-NEXT: s_and_not1_b32 s3, 7, s10
1187 ; GFX11-NEXT: s_lshl_b32 s4, s4, 1
1188 ; GFX11-NEXT: s_and_b32 s6, 0xffff, s6
1189 ; GFX11-NEXT: s_lshl_b32 s3, s4, s3
1190 ; GFX11-NEXT: s_lshr_b32 s2, s6, s2
1191 ; GFX11-NEXT: s_and_not1_b32 s4, 7, s11
1192 ; GFX11-NEXT: s_lshl_b32 s5, s5, 1
1193 ; GFX11-NEXT: s_and_b32 s6, s11, 7
1194 ; GFX11-NEXT: s_lshl_b32 s4, s5, s4
1195 ; GFX11-NEXT: s_lshr_b32 s5, s8, s6
1196 ; GFX11-NEXT: s_or_b32 s2, s3, s2
1197 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
1198 ; GFX11-NEXT: s_or_b32 s3, s4, s5
1199 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff
1200 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8
1201 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff
1202 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1203 ; GFX11-NEXT: s_lshl_b32 s1, s2, 16
1204 ; GFX11-NEXT: s_and_b32 s2, s3, 0xff
1205 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1206 ; GFX11-NEXT: s_lshl_b32 s1, s2, 24
1207 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1208 ; GFX11-NEXT: s_or_b32 s0, s0, s1
1209 ; GFX11-NEXT: ; return to shader part epilog
1210 %lhs = bitcast i32 %lhs.arg to <4 x i8>
1211 %rhs = bitcast i32 %rhs.arg to <4 x i8>
1212 %amt = bitcast i32 %amt.arg to <4 x i8>
1213 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1214 %cast.result = bitcast <4 x i8> %result to i32
1215 ret i32 %cast.result
1218 define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) {
1219 ; GFX6-LABEL: v_fshr_v4i8:
1221 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 8, v2
1223 ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v2
1224 ; GFX6-NEXT: v_lshrrev_b32_e32 v9, 24, v2
1225 ; GFX6-NEXT: v_and_b32_e32 v10, 7, v2
1226 ; GFX6-NEXT: v_not_b32_e32 v2, v2
1227 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1228 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1229 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0
1230 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v2
1231 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1232 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
1233 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
1234 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v10, v2
1235 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1236 ; GFX6-NEXT: v_and_b32_e32 v2, 7, v7
1237 ; GFX6-NEXT: v_not_b32_e32 v7, v7
1238 ; GFX6-NEXT: v_and_b32_e32 v7, 7, v7
1239 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
1240 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v7, v3
1241 ; GFX6-NEXT: v_bfe_u32 v7, v1, 8, 8
1242 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v7
1243 ; GFX6-NEXT: v_not_b32_e32 v7, v8
1244 ; GFX6-NEXT: v_lshrrev_b32_e32 v6, 24, v1
1245 ; GFX6-NEXT: v_or_b32_e32 v2, v3, v2
1246 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v8
1247 ; GFX6-NEXT: v_and_b32_e32 v7, 7, v7
1248 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
1249 ; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
1250 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4
1251 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1
1252 ; GFX6-NEXT: v_or_b32_e32 v1, v4, v1
1253 ; GFX6-NEXT: v_not_b32_e32 v4, v9
1254 ; GFX6-NEXT: v_and_b32_e32 v3, 7, v9
1255 ; GFX6-NEXT: v_and_b32_e32 v4, 7, v4
1256 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5
1257 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v2
1258 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, v4, v5
1259 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v3, v6
1260 ; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0
1261 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
1262 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1
1263 ; GFX6-NEXT: v_or_b32_e32 v3, v4, v3
1264 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1265 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1266 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1267 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
1268 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1
1269 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1270 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1272 ; GFX8-LABEL: v_fshr_v4i8:
1274 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275 ; GFX8-NEXT: v_not_b32_e32 v7, v2
1276 ; GFX8-NEXT: v_and_b32_e32 v6, 7, v2
1277 ; GFX8-NEXT: v_and_b32_e32 v7, 7, v7
1278 ; GFX8-NEXT: v_lshlrev_b16_e32 v8, 1, v0
1279 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2
1280 ; GFX8-NEXT: v_lshlrev_b16_e32 v7, v7, v8
1281 ; GFX8-NEXT: v_lshrrev_b16_sdwa v6, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1282 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1283 ; GFX8-NEXT: v_or_b32_e32 v6, v7, v6
1284 ; GFX8-NEXT: v_and_b32_e32 v7, 7, v5
1285 ; GFX8-NEXT: v_not_b32_e32 v5, v5
1286 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1
1287 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v5
1288 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3
1289 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, v5, v3
1290 ; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1291 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4
1292 ; GFX8-NEXT: v_mov_b32_e32 v4, 7
1293 ; GFX8-NEXT: v_mov_b32_e32 v8, 0xff
1294 ; GFX8-NEXT: v_and_b32_sdwa v5, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1295 ; GFX8-NEXT: v_not_b32_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1296 ; GFX8-NEXT: v_mov_b32_e32 v9, 1
1297 ; GFX8-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1298 ; GFX8-NEXT: v_not_b32_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
1299 ; GFX8-NEXT: v_and_b32_e32 v7, 7, v7
1300 ; GFX8-NEXT: v_lshlrev_b16_sdwa v10, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1301 ; GFX8-NEXT: v_and_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1302 ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2
1303 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1304 ; GFX8-NEXT: v_lshlrev_b16_e32 v7, v7, v10
1305 ; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v8
1306 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
1307 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1308 ; GFX8-NEXT: v_or_b32_e32 v5, v7, v5
1309 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1310 ; GFX8-NEXT: v_mov_b32_e32 v1, 8
1311 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1312 ; GFX8-NEXT: v_and_b32_e32 v2, 0xff, v5
1313 ; GFX8-NEXT: v_or_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1314 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1315 ; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v0
1316 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
1317 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1318 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
1319 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1321 ; GFX9-LABEL: v_fshr_v4i8:
1323 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1324 ; GFX9-NEXT: v_not_b32_e32 v7, v2
1325 ; GFX9-NEXT: v_and_b32_e32 v6, 7, v2
1326 ; GFX9-NEXT: v_and_b32_e32 v7, 7, v7
1327 ; GFX9-NEXT: v_lshlrev_b16_e32 v8, 1, v0
1328 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2
1329 ; GFX9-NEXT: v_lshlrev_b16_e32 v7, v7, v8
1330 ; GFX9-NEXT: v_lshrrev_b16_sdwa v6, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1331 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1332 ; GFX9-NEXT: v_or_b32_e32 v6, v7, v6
1333 ; GFX9-NEXT: v_and_b32_e32 v7, 7, v5
1334 ; GFX9-NEXT: v_not_b32_e32 v5, v5
1335 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1
1336 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5
1337 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3
1338 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, v5, v3
1339 ; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1340 ; GFX9-NEXT: v_or_b32_e32 v3, v3, v4
1341 ; GFX9-NEXT: v_mov_b32_e32 v4, 7
1342 ; GFX9-NEXT: v_not_b32_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1343 ; GFX9-NEXT: v_mov_b32_e32 v9, 1
1344 ; GFX9-NEXT: v_mov_b32_e32 v8, 0xff
1345 ; GFX9-NEXT: v_and_b32_sdwa v5, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1346 ; GFX9-NEXT: v_and_b32_e32 v7, 7, v7
1347 ; GFX9-NEXT: v_lshlrev_b16_sdwa v10, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
1348 ; GFX9-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1349 ; GFX9-NEXT: v_not_b32_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
1350 ; GFX9-NEXT: v_lshlrev_b16_e32 v7, v7, v10
1351 ; GFX9-NEXT: v_and_b32_sdwa v10, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1352 ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2
1353 ; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1354 ; GFX9-NEXT: v_lshrrev_b16_e32 v5, v5, v10
1355 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
1356 ; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
1357 ; GFX9-NEXT: v_or_b32_e32 v5, v7, v5
1358 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
1359 ; GFX9-NEXT: v_mov_b32_e32 v1, 8
1360 ; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1361 ; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v5
1362 ; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0
1363 ; GFX9-NEXT: v_and_or_b32 v1, v6, v8, v1
1364 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1365 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0
1366 ; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0
1367 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1369 ; GFX10-LABEL: v_fshr_v4i8:
1371 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1372 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v2
1373 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0
1374 ; GFX10-NEXT: v_not_b32_e32 v8, v2
1375 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0
1376 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v0
1377 ; GFX10-NEXT: v_not_b32_e32 v10, v5
1378 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
1379 ; GFX10-NEXT: v_and_b32_e32 v8, 7, v8
1380 ; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4
1381 ; GFX10-NEXT: v_mov_b32_e32 v3, 7
1382 ; GFX10-NEXT: v_and_b32_e32 v10, 7, v10
1383 ; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v1
1384 ; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0
1385 ; GFX10-NEXT: v_not_b32_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1386 ; GFX10-NEXT: v_not_b32_sdwa v14, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
1387 ; GFX10-NEXT: v_lshlrev_b16 v4, v10, v4
1388 ; GFX10-NEXT: v_mov_b32_e32 v10, 0xff
1389 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v1
1390 ; GFX10-NEXT: v_and_b32_e32 v12, 7, v2
1391 ; GFX10-NEXT: v_and_b32_e32 v13, 0xff, v1
1392 ; GFX10-NEXT: v_and_b32_e32 v5, 7, v5
1393 ; GFX10-NEXT: v_and_b32_e32 v9, 0xff, v9
1394 ; GFX10-NEXT: v_and_b32_sdwa v15, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1395 ; GFX10-NEXT: v_and_b32_e32 v8, 7, v8
1396 ; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6
1397 ; GFX10-NEXT: v_and_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1398 ; GFX10-NEXT: v_and_b32_e32 v10, 7, v14
1399 ; GFX10-NEXT: v_lshlrev_b16 v7, 1, v7
1400 ; GFX10-NEXT: v_and_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD
1401 ; GFX10-NEXT: v_lshrrev_b16 v3, v5, v9
1402 ; GFX10-NEXT: v_lshlrev_b16 v5, v8, v6
1403 ; GFX10-NEXT: v_lshrrev_b16 v1, v15, v1
1404 ; GFX10-NEXT: v_lshlrev_b16 v6, v10, v7
1405 ; GFX10-NEXT: v_lshrrev_b16 v2, v2, v11
1406 ; GFX10-NEXT: v_lshrrev_b16 v7, v12, v13
1407 ; GFX10-NEXT: v_or_b32_e32 v3, v4, v3
1408 ; GFX10-NEXT: v_mov_b32_e32 v4, 8
1409 ; GFX10-NEXT: v_or_b32_e32 v1, v5, v1
1410 ; GFX10-NEXT: v_or_b32_e32 v2, v6, v2
1411 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v7
1412 ; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
1413 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1
1414 ; GFX10-NEXT: v_and_b32_e32 v2, 0xff, v2
1415 ; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v3
1416 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1417 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2
1418 ; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2
1419 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1421 ; GFX11-LABEL: v_fshr_v4i8:
1423 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1424 ; GFX11-NEXT: v_lshrrev_b32_e32 v6, 8, v1
1425 ; GFX11-NEXT: v_lshrrev_b32_e32 v7, 8, v2
1426 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v0
1427 ; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v2
1428 ; GFX11-NEXT: v_lshrrev_b32_e32 v13, 24, v2
1429 ; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6
1430 ; GFX11-NEXT: v_not_b32_e32 v12, v7
1431 ; GFX11-NEXT: v_and_b32_e32 v7, 7, v7
1432 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1433 ; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v0
1434 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v1
1435 ; GFX11-NEXT: v_and_b32_e32 v12, 7, v12
1436 ; GFX11-NEXT: v_lshlrev_b16 v3, 1, v3
1437 ; GFX11-NEXT: v_not_b32_e32 v14, v11
1438 ; GFX11-NEXT: v_lshrrev_b16 v6, v7, v6
1439 ; GFX11-NEXT: v_not_b32_e32 v7, v13
1440 ; GFX11-NEXT: v_lshrrev_b32_e32 v9, 24, v1
1441 ; GFX11-NEXT: v_not_b32_e32 v10, v2
1442 ; GFX11-NEXT: v_lshlrev_b16 v3, v12, v3
1443 ; GFX11-NEXT: v_and_b32_e32 v11, 7, v11
1444 ; GFX11-NEXT: v_and_b32_e32 v12, 7, v14
1445 ; GFX11-NEXT: v_lshlrev_b16 v4, 1, v4
1446 ; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8
1447 ; GFX11-NEXT: v_and_b32_e32 v7, 7, v7
1448 ; GFX11-NEXT: v_lshlrev_b16 v5, 1, v5
1449 ; GFX11-NEXT: v_and_b32_e32 v13, 7, v13
1450 ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2
1451 ; GFX11-NEXT: v_and_b32_e32 v10, 7, v10
1452 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
1453 ; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1
1454 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v6
1455 ; GFX11-NEXT: v_lshlrev_b16 v4, v12, v4
1456 ; GFX11-NEXT: v_lshrrev_b16 v6, v11, v8
1457 ; GFX11-NEXT: v_lshlrev_b16 v5, v7, v5
1458 ; GFX11-NEXT: v_lshrrev_b16 v7, v13, v9
1459 ; GFX11-NEXT: v_lshlrev_b16 v0, v10, v0
1460 ; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1
1461 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3
1462 ; GFX11-NEXT: v_or_b32_e32 v3, v4, v6
1463 ; GFX11-NEXT: v_or_b32_e32 v4, v5, v7
1464 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1465 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
1466 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v2
1467 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
1468 ; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3
1469 ; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4
1470 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
1471 ; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1
1472 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v2
1473 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
1474 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v3
1475 ; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2
1476 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1477 %lhs = bitcast i32 %lhs.arg to <4 x i8>
1478 %rhs = bitcast i32 %rhs.arg to <4 x i8>
1479 %amt = bitcast i32 %amt.arg to <4 x i8>
1480 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt)
1481 %cast.result = bitcast <4 x i8> %result to i32
1482 ret i32 %cast.result
1485 define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) {
1486 ; GFX6-LABEL: s_fshr_i24:
1488 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1489 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
1490 ; GFX6-NEXT: v_not_b32_e32 v1, 23
1491 ; GFX6-NEXT: s_and_b32 s2, s2, 0xffffff
1492 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
1493 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1494 ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
1495 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffffff
1496 ; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
1497 ; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
1498 ; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1499 ; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
1500 ; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
1501 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0
1502 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
1503 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1504 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1505 ; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0
1506 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1507 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1508 ; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0
1509 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1510 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1511 ; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
1512 ; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0
1513 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
1514 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
1515 ; GFX6-NEXT: ; return to shader part epilog
1517 ; GFX8-LABEL: s_fshr_i24:
1519 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1520 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
1521 ; GFX8-NEXT: v_not_b32_e32 v1, 23
1522 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff
1523 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
1524 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1525 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
1526 ; GFX8-NEXT: s_and_b32 s1, s1, 0xffffff
1527 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
1528 ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
1529 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
1530 ; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
1531 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
1532 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0
1533 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
1534 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1535 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1536 ; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0
1537 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1538 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1539 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0
1540 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1541 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1542 ; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0
1543 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s1
1544 ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
1545 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
1546 ; GFX8-NEXT: ; return to shader part epilog
1548 ; GFX9-LABEL: s_fshr_i24:
1550 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1551 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
1552 ; GFX9-NEXT: v_not_b32_e32 v1, 23
1553 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffffff
1554 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffffff
1555 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1556 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
1557 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
1558 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
1559 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
1560 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
1561 ; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0
1562 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
1563 ; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0
1564 ; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
1565 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1566 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1567 ; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0
1568 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1569 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1570 ; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0
1571 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1572 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1573 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s1
1574 ; GFX9-NEXT: v_lshl_or_b32 v0, s0, v1, v0
1575 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
1576 ; GFX9-NEXT: ; return to shader part epilog
1578 ; GFX10-LABEL: s_fshr_i24:
1580 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1581 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffffff
1582 ; GFX10-NEXT: s_and_b32 s1, s1, 0xffffff
1583 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
1584 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
1585 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1586 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
1587 ; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0
1588 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
1589 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
1590 ; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0
1591 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
1592 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0
1593 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1594 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1595 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1596 ; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1597 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1598 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1599 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0
1600 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1601 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1602 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s1
1603 ; GFX10-NEXT: v_lshl_or_b32 v0, s0, v1, v0
1604 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
1605 ; GFX10-NEXT: ; return to shader part epilog
1607 ; GFX11-LABEL: s_fshr_i24:
1609 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1610 ; GFX11-NEXT: s_and_b32 s2, s2, 0xffffff
1611 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffffff
1612 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
1613 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1614 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
1615 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
1616 ; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1617 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
1618 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1619 ; GFX11-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0
1620 ; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1
1621 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1622 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
1623 ; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0
1624 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1625 ; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
1626 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0
1627 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1628 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1629 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1630 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1631 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1632 ; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0
1633 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
1634 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
1635 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1636 ; GFX11-NEXT: v_sub_nc_u32_e32 v1, 23, v0
1637 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1638 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1639 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1640 ; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s1
1641 ; GFX11-NEXT: v_lshl_or_b32 v0, s0, v1, v0
1642 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
1643 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
1644 ; GFX11-NEXT: ; return to shader part epilog
1645 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1649 define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
1650 ; GFX6-LABEL: v_fshr_i24:
1652 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1654 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3
1655 ; GFX6-NEXT: v_not_b32_e32 v4, 23
1656 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1657 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1658 ; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1659 ; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3
1660 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1661 ; GFX6-NEXT: v_mul_lo_u32 v4, v3, v4
1662 ; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4
1663 ; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4
1664 ; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
1665 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
1666 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3
1667 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
1668 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1669 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1670 ; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2
1671 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1672 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1673 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2
1674 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1675 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1676 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0
1677 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1678 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1679 ; GFX6-NEXT: s_setpc_b64 s[30:31]
1681 ; GFX8-LABEL: v_fshr_i24:
1683 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1684 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1685 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3
1686 ; GFX8-NEXT: v_not_b32_e32 v4, 23
1687 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1688 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1689 ; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1690 ; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3
1691 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1692 ; GFX8-NEXT: v_mul_lo_u32 v4, v3, v4
1693 ; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4
1694 ; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4
1695 ; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3
1696 ; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24
1697 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3
1698 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
1699 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1700 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1701 ; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2
1702 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1703 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1704 ; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2
1705 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1706 ; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1707 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v3, v0
1708 ; GFX8-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1709 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
1710 ; GFX8-NEXT: s_setpc_b64 s[30:31]
1712 ; GFX9-LABEL: v_fshr_i24:
1714 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1715 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1716 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
1717 ; GFX9-NEXT: v_not_b32_e32 v4, 23
1718 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1719 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1720 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1721 ; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3
1722 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1723 ; GFX9-NEXT: v_mul_lo_u32 v4, v3, v4
1724 ; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4
1725 ; GFX9-NEXT: v_add_u32_e32 v3, v3, v4
1726 ; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3
1727 ; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24
1728 ; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3
1729 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
1730 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1731 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1732 ; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2
1733 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1734 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1735 ; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2
1736 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1737 ; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1738 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1739 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v3, v1
1740 ; GFX9-NEXT: s_setpc_b64 s[30:31]
1742 ; GFX10-LABEL: v_fshr_i24:
1744 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1745 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1746 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1747 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1748 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1749 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3
1750 ; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1751 ; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3
1752 ; GFX10-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3
1753 ; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4
1754 ; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4
1755 ; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3
1756 ; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24
1757 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3
1758 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1759 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1760 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1761 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1762 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1763 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1764 ; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2
1765 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1766 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1767 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1768 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, v3, v1
1769 ; GFX10-NEXT: s_setpc_b64 s[30:31]
1771 ; GFX11-LABEL: v_fshr_i24:
1773 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
1775 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1776 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1777 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1778 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
1779 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3
1780 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
1781 ; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
1782 ; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3
1783 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1784 ; GFX11-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3
1785 ; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4
1786 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1787 ; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4
1788 ; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3
1789 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1790 ; GFX11-NEXT: v_mul_lo_u32 v3, v3, 24
1791 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3
1792 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1793 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1794 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1795 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1796 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1797 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2
1798 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2
1799 ; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo
1800 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1801 ; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v2
1802 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1803 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1804 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1805 ; GFX11-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1806 ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v3, v1
1807 ; GFX11-NEXT: s_setpc_b64 s[30:31]
1808 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt)
1812 define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
1813 ; GFX6-LABEL: s_fshr_v2i24:
1815 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1816 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
1817 ; GFX6-NEXT: s_lshr_b32 s7, s1, 8
1818 ; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008
1819 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1820 ; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1821 ; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
1822 ; GFX6-NEXT: v_mov_b32_e32 v0, s0
1823 ; GFX6-NEXT: v_not_b32_e32 v3, 23
1824 ; GFX6-NEXT: s_lshr_b32 s6, s0, 16
1825 ; GFX6-NEXT: s_and_b32 s8, s0, 0xff
1826 ; GFX6-NEXT: s_lshl_b32 s9, s9, 8
1827 ; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24
1828 ; GFX6-NEXT: s_and_b32 s0, s7, 0xff
1829 ; GFX6-NEXT: s_lshr_b32 s1, s2, 16
1830 ; GFX6-NEXT: s_lshr_b32 s7, s3, 8
1831 ; GFX6-NEXT: s_bfe_u32 s10, s2, 0x80008
1832 ; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
1833 ; GFX6-NEXT: s_or_b32 s8, s8, s9
1834 ; GFX6-NEXT: s_and_b32 s9, s2, 0xff
1835 ; GFX6-NEXT: s_lshl_b32 s10, s10, 8
1836 ; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1837 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff
1838 ; GFX6-NEXT: v_mov_b32_e32 v1, s2
1839 ; GFX6-NEXT: s_and_b32 s2, s7, 0xff
1840 ; GFX6-NEXT: s_or_b32 s9, s9, s10
1841 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
1842 ; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24
1843 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
1844 ; GFX6-NEXT: s_and_b32 s9, 0xffff, s9
1845 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1846 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
1847 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1848 ; GFX6-NEXT: s_or_b32 s1, s9, s1
1849 ; GFX6-NEXT: v_or_b32_e32 v1, s2, v1
1850 ; GFX6-NEXT: s_lshr_b32 s2, s4, 16
1851 ; GFX6-NEXT: s_bfe_u32 s9, s4, 0x80008
1852 ; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
1853 ; GFX6-NEXT: s_and_b32 s7, s4, 0xff
1854 ; GFX6-NEXT: s_lshl_b32 s9, s9, 8
1855 ; GFX6-NEXT: s_and_b32 s2, s2, 0xff
1856 ; GFX6-NEXT: s_or_b32 s7, s7, s9
1857 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
1858 ; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
1859 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1860 ; GFX6-NEXT: s_or_b32 s2, s7, s2
1861 ; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1862 ; GFX6-NEXT: v_mul_hi_u32 v3, s2, v2
1863 ; GFX6-NEXT: s_lshr_b32 s3, s5, 8
1864 ; GFX6-NEXT: s_and_b32 s5, s5, 0xff
1865 ; GFX6-NEXT: v_mov_b32_e32 v4, s4
1866 ; GFX6-NEXT: s_and_b32 s3, s3, 0xff
1867 ; GFX6-NEXT: v_alignbit_b32 v4, s5, v4, 24
1868 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
1869 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
1870 ; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
1871 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
1872 ; GFX6-NEXT: v_or_b32_e32 v4, s3, v4
1873 ; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
1874 ; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s2, v3
1875 ; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
1876 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
1877 ; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
1878 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1879 ; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
1880 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
1881 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1882 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
1883 ; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
1884 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
1885 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1886 ; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1887 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1888 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
1889 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1890 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
1891 ; GFX6-NEXT: s_lshl_b32 s2, s6, 17
1892 ; GFX6-NEXT: s_lshl_b32 s3, s8, 1
1893 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1894 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
1895 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
1896 ; GFX6-NEXT: s_or_b32 s2, s2, s3
1897 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
1898 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1899 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1900 ; GFX6-NEXT: v_lshl_b32_e32 v5, s2, v5
1901 ; GFX6-NEXT: v_lshr_b32_e32 v3, s1, v3
1902 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
1903 ; GFX6-NEXT: s_lshl_b32 s0, s0, 17
1904 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
1905 ; GFX6-NEXT: v_or_b32_e32 v3, v5, v3
1906 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
1907 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
1908 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1909 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0
1910 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1911 ; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
1912 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1913 ; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
1914 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
1915 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1916 ; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
1917 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1918 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1919 ; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
1920 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2
1921 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1922 ; GFX6-NEXT: v_bfe_u32 v2, v0, 8, 8
1923 ; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8
1924 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0
1925 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
1926 ; GFX6-NEXT: v_readfirstlane_b32 s0, v1
1927 ; GFX6-NEXT: v_readfirstlane_b32 s1, v0
1928 ; GFX6-NEXT: ; return to shader part epilog
1930 ; GFX8-LABEL: s_fshr_v2i24:
1932 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1933 ; GFX8-NEXT: s_lshr_b32 s9, s1, 8
1934 ; GFX8-NEXT: s_and_b32 s1, s1, 0xff
1935 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
1936 ; GFX8-NEXT: s_lshr_b32 s6, s0, 8
1937 ; GFX8-NEXT: s_lshr_b32 s8, s0, 24
1938 ; GFX8-NEXT: s_lshl_b32 s1, s1, 8
1939 ; GFX8-NEXT: s_and_b32 s6, s6, 0xff
1940 ; GFX8-NEXT: s_or_b32 s1, s8, s1
1941 ; GFX8-NEXT: s_lshr_b32 s8, s2, 8
1942 ; GFX8-NEXT: s_lshr_b32 s7, s0, 16
1943 ; GFX8-NEXT: s_and_b32 s0, s0, 0xff
1944 ; GFX8-NEXT: s_lshl_b32 s6, s6, 8
1945 ; GFX8-NEXT: s_and_b32 s8, s8, 0xff
1946 ; GFX8-NEXT: s_or_b32 s0, s0, s6
1947 ; GFX8-NEXT: s_and_b32 s6, s7, 0xff
1948 ; GFX8-NEXT: s_and_b32 s7, s9, 0xff
1949 ; GFX8-NEXT: s_lshr_b32 s9, s2, 16
1950 ; GFX8-NEXT: s_lshr_b32 s10, s2, 24
1951 ; GFX8-NEXT: s_and_b32 s2, s2, 0xff
1952 ; GFX8-NEXT: s_lshl_b32 s8, s8, 8
1953 ; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1954 ; GFX8-NEXT: s_or_b32 s2, s2, s8
1955 ; GFX8-NEXT: s_and_b32 s8, s9, 0xff
1956 ; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0
1957 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
1958 ; GFX8-NEXT: s_lshr_b32 s11, s3, 8
1959 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
1960 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16
1961 ; GFX8-NEXT: s_and_b32 s3, s3, 0xff
1962 ; GFX8-NEXT: s_or_b32 s2, s2, s8
1963 ; GFX8-NEXT: s_lshl_b32 s3, s3, 8
1964 ; GFX8-NEXT: s_and_b32 s8, s11, 0xff
1965 ; GFX8-NEXT: v_not_b32_e32 v1, 23
1966 ; GFX8-NEXT: s_or_b32 s3, s10, s3
1967 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
1968 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
1969 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
1970 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16
1971 ; GFX8-NEXT: s_or_b32 s3, s3, s8
1972 ; GFX8-NEXT: s_lshr_b32 s8, s4, 8
1973 ; GFX8-NEXT: s_and_b32 s8, s8, 0xff
1974 ; GFX8-NEXT: s_lshr_b32 s9, s4, 16
1975 ; GFX8-NEXT: s_lshr_b32 s10, s4, 24
1976 ; GFX8-NEXT: s_and_b32 s4, s4, 0xff
1977 ; GFX8-NEXT: s_lshl_b32 s8, s8, 8
1978 ; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1
1979 ; GFX8-NEXT: s_or_b32 s4, s4, s8
1980 ; GFX8-NEXT: s_and_b32 s8, s9, 0xff
1981 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
1982 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
1983 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16
1984 ; GFX8-NEXT: s_or_b32 s4, s4, s8
1985 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
1986 ; GFX8-NEXT: v_mul_hi_u32 v1, s4, v0
1987 ; GFX8-NEXT: s_lshr_b32 s11, s5, 8
1988 ; GFX8-NEXT: s_and_b32 s5, s5, 0xff
1989 ; GFX8-NEXT: s_lshl_b32 s5, s5, 8
1990 ; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24
1991 ; GFX8-NEXT: s_and_b32 s8, s11, 0xff
1992 ; GFX8-NEXT: s_or_b32 s5, s10, s5
1993 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
1994 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
1995 ; GFX8-NEXT: s_lshl_b32 s8, s8, 16
1996 ; GFX8-NEXT: s_or_b32 s5, s5, s8
1997 ; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s4, v1
1998 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
1999 ; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0
2000 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2001 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2002 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1
2003 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2004 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24
2005 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
2006 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
2007 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2008 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1
2009 ; GFX8-NEXT: s_lshl_b32 s4, s6, 17
2010 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
2011 ; GFX8-NEXT: s_or_b32 s0, s4, s0
2012 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2013 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2014 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
2015 ; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s2
2016 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0
2017 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
2018 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0
2019 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2020 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2021 ; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v0
2022 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2023 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
2024 ; GFX8-NEXT: s_and_b32 s7, 0xffff, s7
2025 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2026 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v0
2027 ; GFX8-NEXT: s_lshl_b32 s0, s7, 17
2028 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
2029 ; GFX8-NEXT: s_or_b32 s0, s0, s1
2030 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2031 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2032 ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0
2033 ; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s3
2034 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
2035 ; GFX8-NEXT: v_mov_b32_e32 v2, 8
2036 ; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2037 ; GFX8-NEXT: v_mov_b32_e32 v4, 16
2038 ; GFX8-NEXT: v_or_b32_sdwa v3, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
2039 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2040 ; GFX8-NEXT: v_or_b32_e32 v1, v3, v1
2041 ; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0
2042 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3
2043 ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2044 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
2045 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
2046 ; GFX8-NEXT: v_readfirstlane_b32 s0, v1
2047 ; GFX8-NEXT: v_readfirstlane_b32 s1, v0
2048 ; GFX8-NEXT: ; return to shader part epilog
2050 ; GFX9-LABEL: s_fshr_v2i24:
2052 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
2053 ; GFX9-NEXT: s_lshr_b32 s9, s1, 8
2054 ; GFX9-NEXT: s_and_b32 s1, s1, 0xff
2055 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
2056 ; GFX9-NEXT: s_lshr_b32 s6, s0, 8
2057 ; GFX9-NEXT: s_lshr_b32 s8, s0, 24
2058 ; GFX9-NEXT: s_lshl_b32 s1, s1, 8
2059 ; GFX9-NEXT: s_and_b32 s6, s6, 0xff
2060 ; GFX9-NEXT: s_or_b32 s1, s8, s1
2061 ; GFX9-NEXT: s_lshr_b32 s8, s2, 8
2062 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
2063 ; GFX9-NEXT: s_and_b32 s0, s0, 0xff
2064 ; GFX9-NEXT: s_lshl_b32 s6, s6, 8
2065 ; GFX9-NEXT: s_and_b32 s8, s8, 0xff
2066 ; GFX9-NEXT: s_or_b32 s0, s0, s6
2067 ; GFX9-NEXT: s_and_b32 s6, s7, 0xff
2068 ; GFX9-NEXT: s_and_b32 s7, s9, 0xff
2069 ; GFX9-NEXT: s_lshr_b32 s9, s2, 16
2070 ; GFX9-NEXT: s_lshr_b32 s10, s2, 24
2071 ; GFX9-NEXT: s_and_b32 s2, s2, 0xff
2072 ; GFX9-NEXT: s_lshl_b32 s8, s8, 8
2073 ; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
2074 ; GFX9-NEXT: s_or_b32 s2, s2, s8
2075 ; GFX9-NEXT: s_and_b32 s8, s9, 0xff
2076 ; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
2077 ; GFX9-NEXT: s_and_b32 s8, 0xffff, s8
2078 ; GFX9-NEXT: s_lshr_b32 s11, s3, 8
2079 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
2080 ; GFX9-NEXT: s_lshl_b32 s8, s8, 16
2081 ; GFX9-NEXT: s_and_b32 s3, s3, 0xff
2082 ; GFX9-NEXT: s_or_b32 s2, s2, s8
2083 ; GFX9-NEXT: s_lshl_b32 s3, s3, 8
2084 ; GFX9-NEXT: s_and_b32 s8, s11, 0xff
2085 ; GFX9-NEXT: v_not_b32_e32 v1, 23
2086 ; GFX9-NEXT: s_or_b32 s3, s10, s3
2087 ; GFX9-NEXT: s_and_b32 s8, 0xffff, s8
2088 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
2089 ; GFX9-NEXT: s_and_b32 s3, 0xffff, s3
2090 ; GFX9-NEXT: s_lshl_b32 s8, s8, 16
2091 ; GFX9-NEXT: s_or_b32 s3, s3, s8
2092 ; GFX9-NEXT: s_lshr_b32 s8, s4, 8
2093 ; GFX9-NEXT: s_and_b32 s8, s8, 0xff
2094 ; GFX9-NEXT: s_lshr_b32 s9, s4, 16
2095 ; GFX9-NEXT: s_lshr_b32 s10, s4, 24
2096 ; GFX9-NEXT: s_and_b32 s4, s4, 0xff
2097 ; GFX9-NEXT: s_lshl_b32 s8, s8, 8
2098 ; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
2099 ; GFX9-NEXT: s_or_b32 s4, s4, s8
2100 ; GFX9-NEXT: s_and_b32 s8, s9, 0xff
2101 ; GFX9-NEXT: s_and_b32 s8, 0xffff, s8
2102 ; GFX9-NEXT: s_and_b32 s4, 0xffff, s4
2103 ; GFX9-NEXT: s_lshl_b32 s8, s8, 16
2104 ; GFX9-NEXT: s_or_b32 s4, s4, s8
2105 ; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
2106 ; GFX9-NEXT: v_mul_hi_u32 v1, s4, v0
2107 ; GFX9-NEXT: s_lshr_b32 s11, s5, 8
2108 ; GFX9-NEXT: s_and_b32 s5, s5, 0xff
2109 ; GFX9-NEXT: s_lshl_b32 s5, s5, 8
2110 ; GFX9-NEXT: s_and_b32 s8, s11, 0xff
2111 ; GFX9-NEXT: s_or_b32 s5, s10, s5
2112 ; GFX9-NEXT: s_and_b32 s8, 0xffff, s8
2113 ; GFX9-NEXT: s_and_b32 s5, 0xffff, s5
2114 ; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24
2115 ; GFX9-NEXT: s_lshl_b32 s8, s8, 16
2116 ; GFX9-NEXT: s_or_b32 s5, s5, s8
2117 ; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0
2118 ; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1
2119 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
2120 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2121 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2122 ; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24
2123 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1
2124 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
2125 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
2126 ; GFX9-NEXT: s_and_b32 s6, 0xffff, s6
2127 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2128 ; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1
2129 ; GFX9-NEXT: s_lshl_b32 s4, s6, 17
2130 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
2131 ; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2132 ; GFX9-NEXT: s_or_b32 s0, s4, s0
2133 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2134 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s2
2135 ; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0
2136 ; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1
2137 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0
2138 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2139 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2140 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v0
2141 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
2142 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
2143 ; GFX9-NEXT: s_and_b32 s7, 0xffff, s7
2144 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2145 ; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0
2146 ; GFX9-NEXT: s_lshl_b32 s0, s7, 17
2147 ; GFX9-NEXT: s_lshl_b32 s1, s1, 1
2148 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2149 ; GFX9-NEXT: s_or_b32 s0, s0, s1
2150 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2151 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s3
2152 ; GFX9-NEXT: v_mov_b32_e32 v3, 8
2153 ; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0
2154 ; GFX9-NEXT: v_mov_b32_e32 v2, 0xff
2155 ; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2156 ; GFX9-NEXT: v_and_or_b32 v2, v1, v2, v3
2157 ; GFX9-NEXT: v_mov_b32_e32 v3, 16
2158 ; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2159 ; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v0
2160 ; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3
2161 ; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3
2162 ; GFX9-NEXT: v_bfe_u32 v2, v0, 8, 8
2163 ; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 8
2164 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, 8, v2
2165 ; GFX9-NEXT: v_readfirstlane_b32 s0, v1
2166 ; GFX9-NEXT: v_readfirstlane_b32 s1, v0
2167 ; GFX9-NEXT: ; return to shader part epilog
2169 ; GFX10-LABEL: s_fshr_v2i24:
2171 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
2172 ; GFX10-NEXT: s_lshr_b32 s14, s4, 8
2173 ; GFX10-NEXT: s_lshr_b32 s15, s4, 16
2174 ; GFX10-NEXT: s_and_b32 s14, s14, 0xff
2175 ; GFX10-NEXT: s_lshr_b32 s16, s4, 24
2176 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0
2177 ; GFX10-NEXT: s_and_b32 s4, s4, 0xff
2178 ; GFX10-NEXT: s_and_b32 s15, s15, 0xff
2179 ; GFX10-NEXT: s_lshl_b32 s14, s14, 8
2180 ; GFX10-NEXT: s_and_b32 s15, 0xffff, s15
2181 ; GFX10-NEXT: s_or_b32 s4, s4, s14
2182 ; GFX10-NEXT: s_lshr_b32 s17, s5, 8
2183 ; GFX10-NEXT: s_and_b32 s5, s5, 0xff
2184 ; GFX10-NEXT: s_lshl_b32 s14, s15, 16
2185 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
2186 ; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
2187 ; GFX10-NEXT: s_lshl_b32 s5, s5, 8
2188 ; GFX10-NEXT: s_and_b32 s15, s17, 0xff
2189 ; GFX10-NEXT: s_or_b32 s4, s4, s14
2190 ; GFX10-NEXT: s_or_b32 s5, s16, s5
2191 ; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0
2192 ; GFX10-NEXT: s_and_b32 s14, 0xffff, s15
2193 ; GFX10-NEXT: s_and_b32 s5, 0xffff, s5
2194 ; GFX10-NEXT: s_lshl_b32 s14, s14, 16
2195 ; GFX10-NEXT: s_lshr_b32 s9, s1, 8
2196 ; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0
2197 ; GFX10-NEXT: s_or_b32 s5, s5, s14
2198 ; GFX10-NEXT: s_and_b32 s1, s1, 0xff
2199 ; GFX10-NEXT: s_lshr_b32 s10, s2, 8
2200 ; GFX10-NEXT: s_lshr_b32 s8, s0, 24
2201 ; GFX10-NEXT: s_lshr_b32 s11, s2, 16
2202 ; GFX10-NEXT: s_lshl_b32 s1, s1, 8
2203 ; GFX10-NEXT: s_and_b32 s9, s9, 0xff
2204 ; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
2205 ; GFX10-NEXT: s_and_b32 s10, s10, 0xff
2206 ; GFX10-NEXT: s_lshr_b32 s12, s2, 24
2207 ; GFX10-NEXT: s_and_b32 s2, s2, 0xff
2208 ; GFX10-NEXT: s_or_b32 s1, s8, s1
2209 ; GFX10-NEXT: s_and_b32 s8, 0xffff, s9
2210 ; GFX10-NEXT: s_lshl_b32 s9, s10, 8
2211 ; GFX10-NEXT: s_lshr_b32 s6, s0, 8
2212 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
2213 ; GFX10-NEXT: s_or_b32 s2, s2, s9
2214 ; GFX10-NEXT: s_lshr_b32 s13, s3, 8
2215 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
2216 ; GFX10-NEXT: s_and_b32 s3, s3, 0xff
2217 ; GFX10-NEXT: v_mul_hi_u32 v1, s4, v0
2218 ; GFX10-NEXT: v_mul_hi_u32 v0, s5, v0
2219 ; GFX10-NEXT: s_and_b32 s6, s6, 0xff
2220 ; GFX10-NEXT: s_lshl_b32 s3, s3, 8
2221 ; GFX10-NEXT: s_lshr_b32 s7, s0, 16
2222 ; GFX10-NEXT: s_and_b32 s0, s0, 0xff
2223 ; GFX10-NEXT: s_lshl_b32 s6, s6, 8
2224 ; GFX10-NEXT: s_or_b32 s3, s12, s3
2225 ; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24
2226 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24
2227 ; GFX10-NEXT: s_and_b32 s7, s7, 0xff
2228 ; GFX10-NEXT: s_or_b32 s0, s0, s6
2229 ; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
2230 ; GFX10-NEXT: s_and_b32 s7, 0xffff, s7
2231 ; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
2232 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
2233 ; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1
2234 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, s5, v0
2235 ; GFX10-NEXT: s_and_b32 s4, s11, 0xff
2236 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
2237 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
2238 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
2239 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2240 ; GFX10-NEXT: s_lshl_b32 s4, s4, 16
2241 ; GFX10-NEXT: s_lshl_b32 s1, s1, 1
2242 ; GFX10-NEXT: s_or_b32 s2, s2, s4
2243 ; GFX10-NEXT: s_and_b32 s4, s13, 0xff
2244 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2245 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
2246 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2247 ; GFX10-NEXT: s_and_b32 s4, 0xffff, s4
2248 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
2249 ; GFX10-NEXT: s_lshl_b32 s4, s4, 16
2250 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2251 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2252 ; GFX10-NEXT: s_or_b32 s3, s3, s4
2253 ; GFX10-NEXT: s_lshl_b32 s4, s7, 17
2254 ; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
2255 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2256 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2257 ; GFX10-NEXT: s_or_b32 s0, s4, s0
2258 ; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1
2259 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2260 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2261 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v3
2262 ; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0
2263 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2264 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s2
2265 ; GFX10-NEXT: s_lshl_b32 s2, s8, 17
2266 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2267 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s3
2268 ; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1
2269 ; GFX10-NEXT: s_or_b32 s0, s2, s1
2270 ; GFX10-NEXT: v_mov_b32_e32 v2, 8
2271 ; GFX10-NEXT: v_lshl_or_b32 v0, s0, v3, v0
2272 ; GFX10-NEXT: v_mov_b32_e32 v3, 16
2273 ; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
2274 ; GFX10-NEXT: v_and_b32_e32 v4, 0xff, v0
2275 ; GFX10-NEXT: v_and_or_b32 v2, 0xff, v1, v2
2276 ; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2
2277 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4
2278 ; GFX10-NEXT: v_bfe_u32 v4, v0, 8, 8
2279 ; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8
2280 ; GFX10-NEXT: v_or3_b32 v1, v2, v1, v3
2281 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 8, v4
2282 ; GFX10-NEXT: v_readfirstlane_b32 s0, v1
2283 ; GFX10-NEXT: v_readfirstlane_b32 s1, v0
2284 ; GFX10-NEXT: ; return to shader part epilog
2286 ; GFX11-LABEL: s_fshr_v2i24:
2288 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
2289 ; GFX11-NEXT: s_lshr_b32 s14, s4, 8
2290 ; GFX11-NEXT: s_lshr_b32 s15, s4, 16
2291 ; GFX11-NEXT: s_and_b32 s14, s14, 0xff
2292 ; GFX11-NEXT: s_lshr_b32 s16, s4, 24
2293 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0
2294 ; GFX11-NEXT: s_and_b32 s4, s4, 0xff
2295 ; GFX11-NEXT: s_and_b32 s15, s15, 0xff
2296 ; GFX11-NEXT: s_lshl_b32 s14, s14, 8
2297 ; GFX11-NEXT: s_and_b32 s15, 0xffff, s15
2298 ; GFX11-NEXT: s_or_b32 s4, s4, s14
2299 ; GFX11-NEXT: s_lshr_b32 s17, s5, 8
2300 ; GFX11-NEXT: s_and_b32 s5, s5, 0xff
2301 ; GFX11-NEXT: s_lshl_b32 s14, s15, 16
2302 ; GFX11-NEXT: s_and_b32 s4, 0xffff, s4
2303 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
2304 ; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
2305 ; GFX11-NEXT: s_lshl_b32 s5, s5, 8
2306 ; GFX11-NEXT: s_and_b32 s15, s17, 0xff
2307 ; GFX11-NEXT: s_or_b32 s4, s4, s14
2308 ; GFX11-NEXT: s_or_b32 s5, s16, s5
2309 ; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
2310 ; GFX11-NEXT: s_and_b32 s14, 0xffff, s15
2311 ; GFX11-NEXT: s_and_b32 s5, 0xffff, s5
2312 ; GFX11-NEXT: s_lshl_b32 s14, s14, 16
2313 ; GFX11-NEXT: s_lshr_b32 s10, s2, 8
2314 ; GFX11-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0
2315 ; GFX11-NEXT: s_or_b32 s5, s5, s14
2316 ; GFX11-NEXT: s_lshr_b32 s9, s1, 8
2317 ; GFX11-NEXT: s_and_b32 s1, s1, 0xff
2318 ; GFX11-NEXT: s_lshr_b32 s11, s2, 16
2319 ; GFX11-NEXT: s_and_b32 s10, s10, 0xff
2320 ; GFX11-NEXT: s_lshr_b32 s6, s0, 8
2321 ; GFX11-NEXT: s_lshr_b32 s8, s0, 24
2322 ; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1
2323 ; GFX11-NEXT: s_lshr_b32 s12, s2, 24
2324 ; GFX11-NEXT: s_and_b32 s2, s2, 0xff
2325 ; GFX11-NEXT: s_lshl_b32 s1, s1, 8
2326 ; GFX11-NEXT: s_and_b32 s9, s9, 0xff
2327 ; GFX11-NEXT: s_and_b32 s11, s11, 0xff
2328 ; GFX11-NEXT: s_and_b32 s6, s6, 0xff
2329 ; GFX11-NEXT: s_or_b32 s1, s8, s1
2330 ; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
2331 ; GFX11-NEXT: s_and_b32 s8, 0xffff, s9
2332 ; GFX11-NEXT: s_and_b32 s9, 0xffff, s11
2333 ; GFX11-NEXT: s_lshr_b32 s7, s0, 16
2334 ; GFX11-NEXT: s_and_b32 s0, s0, 0xff
2335 ; GFX11-NEXT: v_mul_hi_u32 v1, s4, v0
2336 ; GFX11-NEXT: v_mul_hi_u32 v0, s5, v0
2337 ; GFX11-NEXT: s_lshl_b32 s6, s6, 8
2338 ; GFX11-NEXT: s_and_b32 s7, s7, 0xff
2339 ; GFX11-NEXT: s_or_b32 s0, s0, s6
2340 ; GFX11-NEXT: s_and_b32 s7, 0xffff, s7
2341 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
2342 ; GFX11-NEXT: s_lshr_b32 s13, s3, 8
2343 ; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24
2344 ; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24
2345 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
2346 ; GFX11-NEXT: s_and_b32 s3, s3, 0xff
2347 ; GFX11-NEXT: s_and_b32 s13, s13, 0xff
2348 ; GFX11-NEXT: s_lshl_b32 s3, s3, 8
2349 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
2350 ; GFX11-NEXT: s_or_b32 s3, s12, s3
2351 ; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1
2352 ; GFX11-NEXT: v_sub_nc_u32_e32 v0, s5, v0
2353 ; GFX11-NEXT: s_lshl_b32 s4, s10, 8
2354 ; GFX11-NEXT: s_and_b32 s10, 0xffff, s13
2355 ; GFX11-NEXT: s_or_b32 s2, s2, s4
2356 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v1
2357 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2358 ; GFX11-NEXT: s_lshl_b32 s4, s9, 16
2359 ; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
2360 ; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
2361 ; GFX11-NEXT: s_or_b32 s2, s2, s4
2362 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo
2363 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
2364 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2365 ; GFX11-NEXT: s_lshl_b32 s4, s7, 17
2366 ; GFX11-NEXT: s_lshl_b32 s5, s10, 16
2367 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1
2368 ; GFX11-NEXT: s_or_b32 s0, s4, s0
2369 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2370 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1
2371 ; GFX11-NEXT: s_lshl_b32 s1, s1, 1
2372 ; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo
2373 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3)
2374 ; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1
2375 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2376 ; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0
2377 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0
2378 ; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2
2379 ; GFX11-NEXT: s_or_b32 s2, s3, s5
2380 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2381 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo
2382 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3
2383 ; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0
2384 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
2385 ; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1
2386 ; GFX11-NEXT: s_lshl_b32 s0, s8, 17
2387 ; GFX11-NEXT: s_or_b32 s0, s0, s1
2388 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2389 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3
2390 ; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8
2391 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
2392 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3
2393 ; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2394 ; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2
2395 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2396 ; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0
2397 ; GFX11-NEXT: v_bfe_u32 v2, v1, 8, 8
2398 ; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0
2399 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2400 ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v2
2401 ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4
2402 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
2403 ; GFX11-NEXT: v_and_or_b32 v1, 0xff, v1, v2
2404 ; GFX11-NEXT: v_bfe_u32 v2, v0, 8, 8
2405 ; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8
2406 ; GFX11-NEXT: v_or3_b32 v1, v1, v3, v4
2407 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2408 ; GFX11-NEXT: v_lshl_or_b32 v0, v0, 8, v2
2409 ; GFX11-NEXT: v_readfirstlane_b32 s0, v1
2410 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
2411 ; GFX11-NEXT: v_readfirstlane_b32 s1, v0
2412 ; GFX11-NEXT: ; return to shader part epilog
2413 %lhs = bitcast i48 %lhs.arg to <2 x i24>
2414 %rhs = bitcast i48 %rhs.arg to <2 x i24>
2415 %amt = bitcast i48 %amt.arg to <2 x i24>
2416 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2417 %cast.result = bitcast <2 x i24> %result to i48
2418 ret i48 %cast.result
2421 define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
2422 ; GFX6-LABEL: v_fshr_v2i24:
2424 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2425 ; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2426 ; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6
2427 ; GFX6-NEXT: v_not_b32_e32 v7, 23
2428 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2429 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2430 ; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2431 ; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6
2432 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2433 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2434 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
2435 ; GFX6-NEXT: v_mul_lo_u32 v7, v6, v7
2436 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2437 ; GFX6-NEXT: v_mul_hi_u32 v7, v6, v7
2438 ; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v7
2439 ; GFX6-NEXT: v_mul_hi_u32 v7, v4, v6
2440 ; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6
2441 ; GFX6-NEXT: v_mul_lo_u32 v7, v7, 24
2442 ; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24
2443 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v7
2444 ; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4
2445 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2446 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
2447 ; GFX6-NEXT: v_subrev_i32_e32 v7, vcc, 24, v4
2448 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2449 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
2450 ; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4
2451 ; GFX6-NEXT: v_and_b32_e32 v7, 0xffffff, v7
2452 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2453 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0
2454 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2455 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
2456 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6
2457 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
2458 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2459 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2460 ; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
2461 ; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2462 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2463 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
2464 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2465 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2466 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
2467 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v3
2468 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
2469 ; GFX6-NEXT: s_setpc_b64 s[30:31]
2471 ; GFX8-LABEL: v_fshr_v2i24:
2473 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2474 ; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2475 ; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6
2476 ; GFX8-NEXT: v_not_b32_e32 v7, 23
2477 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2478 ; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2479 ; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2480 ; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6
2481 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2482 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2483 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1
2484 ; GFX8-NEXT: v_mul_lo_u32 v7, v6, v7
2485 ; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2486 ; GFX8-NEXT: v_mul_hi_u32 v7, v6, v7
2487 ; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v7
2488 ; GFX8-NEXT: v_mul_hi_u32 v7, v4, v6
2489 ; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6
2490 ; GFX8-NEXT: v_mul_lo_u32 v7, v7, 24
2491 ; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24
2492 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v7
2493 ; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4
2494 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2495 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
2496 ; GFX8-NEXT: v_subrev_u32_e32 v7, vcc, 24, v4
2497 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2498 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc
2499 ; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4
2500 ; GFX8-NEXT: v_and_b32_e32 v7, 0xffffff, v7
2501 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2502 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v7, v0
2503 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2504 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
2505 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6
2506 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
2507 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2508 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2509 ; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2
2510 ; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2511 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2512 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2
2513 ; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2514 ; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2515 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, v4, v1
2516 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, v2, v3
2517 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
2518 ; GFX8-NEXT: s_setpc_b64 s[30:31]
2520 ; GFX9-LABEL: v_fshr_v2i24:
2522 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2523 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2524 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6
2525 ; GFX9-NEXT: v_not_b32_e32 v7, 23
2526 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2527 ; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2528 ; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2529 ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6
2530 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2531 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2532 ; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2533 ; GFX9-NEXT: v_mul_lo_u32 v7, v6, v7
2534 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1
2535 ; GFX9-NEXT: v_mul_hi_u32 v7, v6, v7
2536 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v7
2537 ; GFX9-NEXT: v_mul_hi_u32 v7, v4, v6
2538 ; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6
2539 ; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24
2540 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24
2541 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7
2542 ; GFX9-NEXT: v_sub_u32_e32 v5, v5, v6
2543 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
2544 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2545 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2546 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4
2547 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4
2548 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
2549 ; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4
2550 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2551 ; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6
2552 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2553 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2
2554 ; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v5
2555 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v5
2556 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
2557 ; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2
2558 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
2559 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2560 ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2
2561 ; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2562 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2563 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3
2564 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2
2565 ; GFX9-NEXT: s_setpc_b64 s[30:31]
2567 ; GFX10-LABEL: v_fshr_v2i24:
2569 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2570 ; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2571 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2572 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2573 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2574 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2575 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6
2576 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2577 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v1
2578 ; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2579 ; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6
2580 ; GFX10-NEXT: v_mul_lo_u32 v7, 0xffffffe8, v6
2581 ; GFX10-NEXT: v_mul_hi_u32 v7, v6, v7
2582 ; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v7
2583 ; GFX10-NEXT: v_mul_hi_u32 v7, v4, v6
2584 ; GFX10-NEXT: v_mul_hi_u32 v6, v5, v6
2585 ; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24
2586 ; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24
2587 ; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7
2588 ; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6
2589 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2590 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2591 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2592 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2593 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2594 ; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2595 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2596 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2597 ; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2598 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2599 ; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2600 ; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4
2601 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2602 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2603 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffffff, v6
2604 ; GFX10-NEXT: v_sub_nc_u32_e32 v7, 23, v5
2605 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2606 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2607 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v7
2608 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, v5, v3
2609 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, v6, v2
2610 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, v4, v3
2611 ; GFX10-NEXT: s_setpc_b64 s[30:31]
2613 ; GFX11-LABEL: v_fshr_v2i24:
2615 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2616 ; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
2617 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4
2618 ; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2619 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0
2620 ; GFX11-NEXT: v_lshlrev_b32_e32 v1, 1, v1
2621 ; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6
2622 ; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3
2623 ; GFX11-NEXT: s_waitcnt_depctr 0xfff
2624 ; GFX11-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
2625 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2626 ; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6
2627 ; GFX11-NEXT: v_mul_lo_u32 v7, 0xffffffe8, v6
2628 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2629 ; GFX11-NEXT: v_mul_hi_u32 v7, v6, v7
2630 ; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v7
2631 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2632 ; GFX11-NEXT: v_mul_hi_u32 v7, v4, v6
2633 ; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24
2634 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2635 ; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v7
2636 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2637 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2638 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2639 ; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6
2640 ; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24
2641 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2642 ; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6
2643 ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2644 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2645 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2646 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2647 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2648 ; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4
2649 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2650 ; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo
2651 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4
2652 ; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5
2653 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2654 ; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo
2655 ; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5
2656 ; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4
2657 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
2658 ; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4
2659 ; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6
2660 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_4)
2661 ; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5
2662 ; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5
2663 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, v4, v2
2664 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
2665 ; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7
2666 ; GFX11-NEXT: v_lshrrev_b32_e32 v3, v5, v3
2667 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
2668 ; GFX11-NEXT: v_lshl_or_b32 v0, v0, v6, v2
2669 ; GFX11-NEXT: v_lshl_or_b32 v1, v1, v4, v3
2670 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2671 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt)
2672 ret <2 x i24> %result
2675 define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2676 ; GFX6-LABEL: s_fshr_i32:
2678 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2679 ; GFX6-NEXT: v_mov_b32_e32 v1, s2
2680 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2681 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2682 ; GFX6-NEXT: ; return to shader part epilog
2684 ; GFX8-LABEL: s_fshr_i32:
2686 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2687 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
2688 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2689 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2690 ; GFX8-NEXT: ; return to shader part epilog
2692 ; GFX9-LABEL: s_fshr_i32:
2694 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2695 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
2696 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
2697 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2698 ; GFX9-NEXT: ; return to shader part epilog
2700 ; GFX10-LABEL: s_fshr_i32:
2702 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
2703 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0
2704 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2705 ; GFX10-NEXT: ; return to shader part epilog
2707 ; GFX11-LABEL: s_fshr_i32:
2709 ; GFX11-NEXT: v_mov_b32_e32 v0, s2
2710 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2711 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0
2712 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2713 ; GFX11-NEXT: ; return to shader part epilog
2714 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2718 define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) {
2719 ; GFX6-LABEL: s_fshr_i32_5:
2721 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2722 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 5
2723 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2724 ; GFX6-NEXT: ; return to shader part epilog
2726 ; GFX8-LABEL: s_fshr_i32_5:
2728 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2729 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 5
2730 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2731 ; GFX8-NEXT: ; return to shader part epilog
2733 ; GFX9-LABEL: s_fshr_i32_5:
2735 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2736 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 5
2737 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2738 ; GFX9-NEXT: ; return to shader part epilog
2740 ; GFX10-LABEL: s_fshr_i32_5:
2742 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 5
2743 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2744 ; GFX10-NEXT: ; return to shader part epilog
2746 ; GFX11-LABEL: s_fshr_i32_5:
2748 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 5
2749 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2750 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2751 ; GFX11-NEXT: ; return to shader part epilog
2752 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2756 define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) {
2757 ; GFX6-LABEL: s_fshr_i32_8:
2759 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2760 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 8
2761 ; GFX6-NEXT: v_readfirstlane_b32 s0, v0
2762 ; GFX6-NEXT: ; return to shader part epilog
2764 ; GFX8-LABEL: s_fshr_i32_8:
2766 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2767 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 8
2768 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0
2769 ; GFX8-NEXT: ; return to shader part epilog
2771 ; GFX9-LABEL: s_fshr_i32_8:
2773 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2774 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 8
2775 ; GFX9-NEXT: v_readfirstlane_b32 s0, v0
2776 ; GFX9-NEXT: ; return to shader part epilog
2778 ; GFX10-LABEL: s_fshr_i32_8:
2780 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 8
2781 ; GFX10-NEXT: v_readfirstlane_b32 s0, v0
2782 ; GFX10-NEXT: ; return to shader part epilog
2784 ; GFX11-LABEL: s_fshr_i32_8:
2786 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 8
2787 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2788 ; GFX11-NEXT: v_readfirstlane_b32 s0, v0
2789 ; GFX11-NEXT: ; return to shader part epilog
2790 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2794 define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) {
2795 ; GCN-LABEL: v_fshr_i32:
2797 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2798 ; GCN-NEXT: v_alignbit_b32 v0, v0, v1, v2
2799 ; GCN-NEXT: s_setpc_b64 s[30:31]
2801 ; GFX11-LABEL: v_fshr_i32:
2803 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2804 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2
2805 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2806 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2810 define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) {
2811 ; GCN-LABEL: v_fshr_i32_5:
2813 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2814 ; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 5
2815 ; GCN-NEXT: s_setpc_b64 s[30:31]
2817 ; GFX11-LABEL: v_fshr_i32_5:
2819 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2820 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 5
2821 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2822 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5)
2826 define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) {
2827 ; GCN-LABEL: v_fshr_i32_8:
2829 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2830 ; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 8
2831 ; GCN-NEXT: s_setpc_b64 s[30:31]
2833 ; GFX11-LABEL: v_fshr_i32_8:
2835 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2836 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 8
2837 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2838 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8)
2842 define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) {
2843 ; GFX6-LABEL: v_fshr_i32_ssv:
2845 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2846 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0
2847 ; GFX6-NEXT: ; return to shader part epilog
2849 ; GFX8-LABEL: v_fshr_i32_ssv:
2851 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2852 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0
2853 ; GFX8-NEXT: ; return to shader part epilog
2855 ; GFX9-LABEL: v_fshr_i32_ssv:
2857 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2858 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0
2859 ; GFX9-NEXT: ; return to shader part epilog
2861 ; GFX10-LABEL: v_fshr_i32_ssv:
2863 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0
2864 ; GFX10-NEXT: ; return to shader part epilog
2866 ; GFX11-LABEL: v_fshr_i32_ssv:
2868 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0
2869 ; GFX11-NEXT: ; return to shader part epilog
2870 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2871 %cast.result = bitcast i32 %result to float
2872 ret float %cast.result
2875 define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) {
2876 ; GFX6-LABEL: v_fshr_i32_svs:
2878 ; GFX6-NEXT: v_mov_b32_e32 v1, s1
2879 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2880 ; GFX6-NEXT: ; return to shader part epilog
2882 ; GFX8-LABEL: v_fshr_i32_svs:
2884 ; GFX8-NEXT: v_mov_b32_e32 v1, s1
2885 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2886 ; GFX8-NEXT: ; return to shader part epilog
2888 ; GFX9-LABEL: v_fshr_i32_svs:
2890 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
2891 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
2892 ; GFX9-NEXT: ; return to shader part epilog
2894 ; GFX10-LABEL: v_fshr_i32_svs:
2896 ; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1
2897 ; GFX10-NEXT: ; return to shader part epilog
2899 ; GFX11-LABEL: v_fshr_i32_svs:
2901 ; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, s1
2902 ; GFX11-NEXT: ; return to shader part epilog
2903 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2904 %cast.result = bitcast i32 %result to float
2905 ret float %cast.result
2908 define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) {
2909 ; GFX6-LABEL: v_fshr_i32_vss:
2911 ; GFX6-NEXT: v_mov_b32_e32 v0, s1
2912 ; GFX6-NEXT: v_mov_b32_e32 v1, s2
2913 ; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1
2914 ; GFX6-NEXT: ; return to shader part epilog
2916 ; GFX8-LABEL: v_fshr_i32_vss:
2918 ; GFX8-NEXT: v_mov_b32_e32 v0, s1
2919 ; GFX8-NEXT: v_mov_b32_e32 v1, s2
2920 ; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1
2921 ; GFX8-NEXT: ; return to shader part epilog
2923 ; GFX9-LABEL: v_fshr_i32_vss:
2925 ; GFX9-NEXT: v_mov_b32_e32 v0, s1
2926 ; GFX9-NEXT: v_mov_b32_e32 v1, s2
2927 ; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1
2928 ; GFX9-NEXT: ; return to shader part epilog
2930 ; GFX10-LABEL: v_fshr_i32_vss:
2932 ; GFX10-NEXT: v_mov_b32_e32 v0, s2
2933 ; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0
2934 ; GFX10-NEXT: ; return to shader part epilog
2936 ; GFX11-LABEL: v_fshr_i32_vss:
2938 ; GFX11-NEXT: v_mov_b32_e32 v0, s2
2939 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
2940 ; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0
2941 ; GFX11-NEXT: ; return to shader part epilog
2942 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt)
2943 %cast.result = bitcast i32 %result to float
2944 ret float %cast.result
2947 define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) {
2948 ; GCN-LABEL: v_fshr_v2i32:
2950 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2951 ; GCN-NEXT: v_alignbit_b32 v0, v0, v2, v4
2952 ; GCN-NEXT: v_alignbit_b32 v1, v1, v3, v5
2953 ; GCN-NEXT: s_setpc_b64 s[30:31]
2955 ; GFX11-LABEL: v_fshr_v2i32:
2957 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2958 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4
2959 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5
2960 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2961 %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt)
2962 ret <2 x i32> %result
2965 define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) {
2966 ; GCN-LABEL: v_fshr_v3i32:
2968 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2969 ; GCN-NEXT: v_alignbit_b32 v0, v0, v3, v6
2970 ; GCN-NEXT: v_alignbit_b32 v1, v1, v4, v7
2971 ; GCN-NEXT: v_alignbit_b32 v2, v2, v5, v8
2972 ; GCN-NEXT: s_setpc_b64 s[30:31]
2974 ; GFX11-LABEL: v_fshr_v3i32:
2976 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2977 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6
2978 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7
2979 ; GFX11-NEXT: v_alignbit_b32 v2, v2, v5, v8
2980 ; GFX11-NEXT: s_setpc_b64 s[30:31]
2981 %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt)
2982 ret <3 x i32> %result
2985 define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) {
2986 ; GCN-LABEL: v_fshr_v4i32:
2988 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2989 ; GCN-NEXT: v_alignbit_b32 v0, v0, v4, v8
2990 ; GCN-NEXT: v_alignbit_b32 v1, v1, v5, v9
2991 ; GCN-NEXT: v_alignbit_b32 v2, v2, v6, v10
2992 ; GCN-NEXT: v_alignbit_b32 v3, v3, v7, v11
2993 ; GCN-NEXT: s_setpc_b64 s[30:31]
2995 ; GFX11-LABEL: v_fshr_v4i32:
2997 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2998 ; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8
2999 ; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9
3000 ; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10
3001 ; GFX11-NEXT: v_alignbit_b32 v3, v3, v7, v11
3002 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3003 %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt)
3004 ret <4 x i32> %result
3007 define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) {
3008 ; GFX6-LABEL: s_fshr_i16:
3010 ; GFX6-NEXT: s_and_b32 s3, s2, 15
3011 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2
3012 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3013 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
3014 ; GFX6-NEXT: s_lshl_b32 s0, s0, s2
3015 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s3
3016 ; GFX6-NEXT: s_and_b32 s1, s1, 0xffff
3017 ; GFX6-NEXT: s_lshr_b32 s1, s1, s2
3018 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3019 ; GFX6-NEXT: ; return to shader part epilog
3021 ; GFX8-LABEL: s_fshr_i16:
3023 ; GFX8-NEXT: s_and_b32 s3, s2, 15
3024 ; GFX8-NEXT: s_andn2_b32 s2, 15, s2
3025 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
3026 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
3027 ; GFX8-NEXT: s_lshl_b32 s0, s0, s2
3028 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3029 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s3
3030 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
3031 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3032 ; GFX8-NEXT: ; return to shader part epilog
3034 ; GFX9-LABEL: s_fshr_i16:
3036 ; GFX9-NEXT: s_and_b32 s3, s2, 15
3037 ; GFX9-NEXT: s_andn2_b32 s2, 15, s2
3038 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
3039 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s2
3040 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
3041 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3042 ; GFX9-NEXT: s_and_b32 s2, 0xffff, s3
3043 ; GFX9-NEXT: s_lshr_b32 s1, s1, s2
3044 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3045 ; GFX9-NEXT: ; return to shader part epilog
3047 ; GFX10-LABEL: s_fshr_i16:
3049 ; GFX10-NEXT: s_and_b32 s3, s2, 15
3050 ; GFX10-NEXT: s_andn2_b32 s2, 15, s2
3051 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
3052 ; GFX10-NEXT: s_and_b32 s2, 0xffff, s2
3053 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3054 ; GFX10-NEXT: s_and_b32 s3, 0xffff, s3
3055 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
3056 ; GFX10-NEXT: s_lshr_b32 s1, s1, s3
3057 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3058 ; GFX10-NEXT: ; return to shader part epilog
3060 ; GFX11-LABEL: s_fshr_i16:
3062 ; GFX11-NEXT: s_and_b32 s3, s2, 15
3063 ; GFX11-NEXT: s_and_not1_b32 s2, 15, s2
3064 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
3065 ; GFX11-NEXT: s_and_b32 s2, 0xffff, s2
3066 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3067 ; GFX11-NEXT: s_and_b32 s3, 0xffff, s3
3068 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
3069 ; GFX11-NEXT: s_lshr_b32 s1, s1, s3
3070 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3071 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3072 ; GFX11-NEXT: ; return to shader part epilog
3073 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3077 define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) {
3078 ; GFX6-LABEL: s_fshr_i16_4:
3080 ; GFX6-NEXT: s_lshl_b32 s0, s0, 12
3081 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0xc0004
3082 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3083 ; GFX6-NEXT: ; return to shader part epilog
3085 ; GFX8-LABEL: s_fshr_i16_4:
3087 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3088 ; GFX8-NEXT: s_lshl_b32 s0, s0, 12
3089 ; GFX8-NEXT: s_lshr_b32 s1, s1, 4
3090 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3091 ; GFX8-NEXT: ; return to shader part epilog
3093 ; GFX9-LABEL: s_fshr_i16_4:
3095 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3096 ; GFX9-NEXT: s_lshl_b32 s0, s0, 12
3097 ; GFX9-NEXT: s_lshr_b32 s1, s1, 4
3098 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3099 ; GFX9-NEXT: ; return to shader part epilog
3101 ; GFX10-LABEL: s_fshr_i16_4:
3103 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3104 ; GFX10-NEXT: s_lshl_b32 s0, s0, 12
3105 ; GFX10-NEXT: s_lshr_b32 s1, s1, 4
3106 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3107 ; GFX10-NEXT: ; return to shader part epilog
3109 ; GFX11-LABEL: s_fshr_i16_4:
3111 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3112 ; GFX11-NEXT: s_lshl_b32 s0, s0, 12
3113 ; GFX11-NEXT: s_lshr_b32 s1, s1, 4
3114 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3115 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3116 ; GFX11-NEXT: ; return to shader part epilog
3117 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
3121 define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) {
3122 ; GFX6-LABEL: s_fshr_i16_5:
3124 ; GFX6-NEXT: s_lshl_b32 s0, s0, 11
3125 ; GFX6-NEXT: s_bfe_u32 s1, s1, 0xb0005
3126 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3127 ; GFX6-NEXT: ; return to shader part epilog
3129 ; GFX8-LABEL: s_fshr_i16_5:
3131 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3132 ; GFX8-NEXT: s_lshl_b32 s0, s0, 11
3133 ; GFX8-NEXT: s_lshr_b32 s1, s1, 5
3134 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3135 ; GFX8-NEXT: ; return to shader part epilog
3137 ; GFX9-LABEL: s_fshr_i16_5:
3139 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3140 ; GFX9-NEXT: s_lshl_b32 s0, s0, 11
3141 ; GFX9-NEXT: s_lshr_b32 s1, s1, 5
3142 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3143 ; GFX9-NEXT: ; return to shader part epilog
3145 ; GFX10-LABEL: s_fshr_i16_5:
3147 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3148 ; GFX10-NEXT: s_lshl_b32 s0, s0, 11
3149 ; GFX10-NEXT: s_lshr_b32 s1, s1, 5
3150 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3151 ; GFX10-NEXT: ; return to shader part epilog
3153 ; GFX11-LABEL: s_fshr_i16_5:
3155 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3156 ; GFX11-NEXT: s_lshl_b32 s0, s0, 11
3157 ; GFX11-NEXT: s_lshr_b32 s1, s1, 5
3158 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3159 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3160 ; GFX11-NEXT: ; return to shader part epilog
3161 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
3165 define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) {
3166 ; GFX6-LABEL: v_fshr_i16:
3168 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3169 ; GFX6-NEXT: v_and_b32_e32 v3, 15, v2
3170 ; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2
3171 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v2
3172 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
3173 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3174 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
3175 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3
3176 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3177 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
3178 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3179 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3181 ; GFX8-LABEL: v_fshr_i16:
3183 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3184 ; GFX8-NEXT: v_and_b32_e32 v3, 15, v2
3185 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
3186 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
3187 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
3188 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0
3189 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1
3190 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3191 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3193 ; GFX9-LABEL: v_fshr_i16:
3195 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3196 ; GFX9-NEXT: v_and_b32_e32 v3, 15, v2
3197 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
3198 ; GFX9-NEXT: v_and_b32_e32 v2, 15, v2
3199 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
3200 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0
3201 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, v3, v1
3202 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3203 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3205 ; GFX10-LABEL: v_fshr_i16:
3207 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3208 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
3209 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
3210 ; GFX10-NEXT: v_and_b32_e32 v2, 15, v2
3211 ; GFX10-NEXT: v_and_b32_e32 v3, 15, v3
3212 ; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1
3213 ; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0
3214 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3215 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3217 ; GFX11-LABEL: v_fshr_i16:
3219 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3220 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
3221 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
3222 ; GFX11-NEXT: v_and_b32_e32 v2, 15, v2
3223 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3224 ; GFX11-NEXT: v_and_b32_e32 v3, 15, v3
3225 ; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1
3226 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3227 ; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0
3228 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3229 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3230 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3234 define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) {
3235 ; GFX6-LABEL: v_fshr_i16_4:
3237 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3238 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0
3239 ; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 12
3240 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3241 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3243 ; GFX8-LABEL: v_fshr_i16_4:
3245 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3246 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0
3247 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 4, v1
3248 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3249 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3251 ; GFX9-LABEL: v_fshr_i16_4:
3253 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3254 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 12, v0
3255 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 4, v1
3256 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3257 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3259 ; GFX10-LABEL: v_fshr_i16_4:
3261 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3262 ; GFX10-NEXT: v_lshlrev_b16 v0, 12, v0
3263 ; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1
3264 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3265 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3267 ; GFX11-LABEL: v_fshr_i16_4:
3269 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3270 ; GFX11-NEXT: v_lshlrev_b16 v0, 12, v0
3271 ; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1
3272 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3273 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3274 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3275 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4)
3279 define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) {
3280 ; GFX6-LABEL: v_fshr_i16_5:
3282 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3283 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 11, v0
3284 ; GFX6-NEXT: v_bfe_u32 v1, v1, 5, 11
3285 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3286 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3288 ; GFX8-LABEL: v_fshr_i16_5:
3290 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3291 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 11, v0
3292 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 5, v1
3293 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3294 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3296 ; GFX9-LABEL: v_fshr_i16_5:
3298 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3299 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 11, v0
3300 ; GFX9-NEXT: v_lshrrev_b16_e32 v1, 5, v1
3301 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3302 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3304 ; GFX10-LABEL: v_fshr_i16_5:
3306 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3307 ; GFX10-NEXT: v_lshlrev_b16 v0, 11, v0
3308 ; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1
3309 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3310 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3312 ; GFX11-LABEL: v_fshr_i16_5:
3314 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3315 ; GFX11-NEXT: v_lshlrev_b16 v0, 11, v0
3316 ; GFX11-NEXT: v_lshrrev_b16 v1, 5, v1
3317 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3318 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3319 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3320 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5)
3324 define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) {
3325 ; GFX6-LABEL: v_fshr_i16_ssv:
3327 ; GFX6-NEXT: v_and_b32_e32 v1, 15, v0
3328 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3329 ; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3330 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3331 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3332 ; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0
3333 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3334 ; GFX6-NEXT: s_and_b32 s0, s1, 0xffff
3335 ; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
3336 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3337 ; GFX6-NEXT: ; return to shader part epilog
3339 ; GFX8-LABEL: v_fshr_i16_ssv:
3341 ; GFX8-NEXT: v_and_b32_e32 v1, 15, v0
3342 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
3343 ; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
3344 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
3345 ; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0
3346 ; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1
3347 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3348 ; GFX8-NEXT: ; return to shader part epilog
3350 ; GFX9-LABEL: v_fshr_i16_ssv:
3352 ; GFX9-NEXT: v_and_b32_e32 v1, 15, v0
3353 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
3354 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
3355 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
3356 ; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0
3357 ; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1
3358 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3359 ; GFX9-NEXT: ; return to shader part epilog
3361 ; GFX10-LABEL: v_fshr_i16_ssv:
3363 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
3364 ; GFX10-NEXT: v_and_b32_e32 v0, 15, v0
3365 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
3366 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v1
3367 ; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1
3368 ; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0
3369 ; GFX10-NEXT: v_or_b32_e32 v0, v1, v0
3370 ; GFX10-NEXT: ; return to shader part epilog
3372 ; GFX11-LABEL: v_fshr_i16_ssv:
3374 ; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
3375 ; GFX11-NEXT: v_and_b32_e32 v0, 15, v0
3376 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
3377 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3378 ; GFX11-NEXT: v_and_b32_e32 v1, 15, v1
3379 ; GFX11-NEXT: v_lshrrev_b16 v0, v0, s1
3380 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3381 ; GFX11-NEXT: v_lshlrev_b16 v1, v1, s0
3382 ; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
3383 ; GFX11-NEXT: ; return to shader part epilog
3384 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3385 %cast.result = bitcast i16 %result to half
3386 ret half %cast.result
3389 define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) {
3390 ; GFX6-LABEL: v_fshr_i16_svs:
3392 ; GFX6-NEXT: s_and_b32 s2, s1, 15
3393 ; GFX6-NEXT: s_andn2_b32 s1, 15, s1
3394 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3395 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3396 ; GFX6-NEXT: s_lshl_b32 s0, s0, s1
3397 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
3398 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3399 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0
3400 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
3401 ; GFX6-NEXT: ; return to shader part epilog
3403 ; GFX8-LABEL: v_fshr_i16_svs:
3405 ; GFX8-NEXT: s_and_b32 s2, s1, 15
3406 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
3407 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
3408 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3409 ; GFX8-NEXT: s_lshl_b32 s0, s0, s1
3410 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, s2, v0
3411 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
3412 ; GFX8-NEXT: ; return to shader part epilog
3414 ; GFX9-LABEL: v_fshr_i16_svs:
3416 ; GFX9-NEXT: s_and_b32 s2, s1, 15
3417 ; GFX9-NEXT: s_andn2_b32 s1, 15, s1
3418 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1
3419 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s1
3420 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
3421 ; GFX9-NEXT: v_lshrrev_b16_e32 v0, s2, v0
3422 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
3423 ; GFX9-NEXT: ; return to shader part epilog
3425 ; GFX10-LABEL: v_fshr_i16_svs:
3427 ; GFX10-NEXT: s_and_b32 s2, s1, 15
3428 ; GFX10-NEXT: s_andn2_b32 s1, 15, s1
3429 ; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0
3430 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1
3431 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3432 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
3433 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
3434 ; GFX10-NEXT: ; return to shader part epilog
3436 ; GFX11-LABEL: v_fshr_i16_svs:
3438 ; GFX11-NEXT: s_and_b32 s2, s1, 15
3439 ; GFX11-NEXT: s_and_not1_b32 s1, 15, s1
3440 ; GFX11-NEXT: v_lshrrev_b16 v0, s2, v0
3441 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1
3442 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3443 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3444 ; GFX11-NEXT: s_lshl_b32 s0, s0, s1
3445 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3446 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
3447 ; GFX11-NEXT: ; return to shader part epilog
3448 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3449 %cast.result = bitcast i16 %result to half
3450 ret half %cast.result
3453 define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) {
3454 ; GFX6-LABEL: v_fshr_i16_vss:
3456 ; GFX6-NEXT: s_and_b32 s2, s1, 15
3457 ; GFX6-NEXT: s_andn2_b32 s1, 15, s1
3458 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
3459 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3460 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s1, v0
3461 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
3462 ; GFX6-NEXT: s_and_b32 s0, s0, 0xffff
3463 ; GFX6-NEXT: s_lshr_b32 s0, s0, s1
3464 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
3465 ; GFX6-NEXT: ; return to shader part epilog
3467 ; GFX8-LABEL: v_fshr_i16_vss:
3469 ; GFX8-NEXT: s_and_b32 s2, s1, 15
3470 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
3471 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0
3472 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, s1, v0
3473 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
3474 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s2
3475 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
3476 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
3477 ; GFX8-NEXT: ; return to shader part epilog
3479 ; GFX9-LABEL: v_fshr_i16_vss:
3481 ; GFX9-NEXT: s_and_b32 s2, s1, 15
3482 ; GFX9-NEXT: s_andn2_b32 s1, 15, s1
3483 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0
3484 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, s1, v0
3485 ; GFX9-NEXT: s_and_b32 s0, 0xffff, s0
3486 ; GFX9-NEXT: s_and_b32 s1, 0xffff, s2
3487 ; GFX9-NEXT: s_lshr_b32 s0, s0, s1
3488 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
3489 ; GFX9-NEXT: ; return to shader part epilog
3491 ; GFX10-LABEL: v_fshr_i16_vss:
3493 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0
3494 ; GFX10-NEXT: s_andn2_b32 s2, 15, s1
3495 ; GFX10-NEXT: s_and_b32 s1, s1, 15
3496 ; GFX10-NEXT: s_and_b32 s0, 0xffff, s0
3497 ; GFX10-NEXT: s_and_b32 s1, 0xffff, s1
3498 ; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0
3499 ; GFX10-NEXT: s_lshr_b32 s0, s0, s1
3500 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
3501 ; GFX10-NEXT: ; return to shader part epilog
3503 ; GFX11-LABEL: v_fshr_i16_vss:
3505 ; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0
3506 ; GFX11-NEXT: s_and_not1_b32 s2, 15, s1
3507 ; GFX11-NEXT: s_and_b32 s1, s1, 15
3508 ; GFX11-NEXT: s_and_b32 s0, 0xffff, s0
3509 ; GFX11-NEXT: s_and_b32 s1, 0xffff, s1
3510 ; GFX11-NEXT: v_lshlrev_b16 v0, s2, v0
3511 ; GFX11-NEXT: s_lshr_b32 s0, s0, s1
3512 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
3513 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
3514 ; GFX11-NEXT: ; return to shader part epilog
3515 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt)
3516 %cast.result = bitcast i16 %result to half
3517 ret half %cast.result
3520 define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
3521 ; GFX6-LABEL: s_fshr_v2i16:
3523 ; GFX6-NEXT: s_lshl_b32 s5, s5, 16
3524 ; GFX6-NEXT: s_and_b32 s4, s4, 0xffff
3525 ; GFX6-NEXT: s_or_b32 s4, s5, s4
3526 ; GFX6-NEXT: s_bfe_u32 s5, s2, 0xf0001
3527 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3528 ; GFX6-NEXT: s_lshr_b32 s5, s5, 14
3529 ; GFX6-NEXT: s_or_b32 s0, s0, s5
3530 ; GFX6-NEXT: s_bfe_u32 s5, s3, 0xf0001
3531 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
3532 ; GFX6-NEXT: s_lshr_b32 s5, s5, 14
3533 ; GFX6-NEXT: s_xor_b32 s4, s4, -1
3534 ; GFX6-NEXT: s_or_b32 s1, s1, s5
3535 ; GFX6-NEXT: s_lshl_b32 s2, s2, 1
3536 ; GFX6-NEXT: s_lshr_b32 s5, s4, 16
3537 ; GFX6-NEXT: s_and_b32 s6, s4, 15
3538 ; GFX6-NEXT: s_andn2_b32 s4, 15, s4
3539 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
3540 ; GFX6-NEXT: s_bfe_u32 s2, s2, 0xf0001
3541 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
3542 ; GFX6-NEXT: s_lshl_b32 s0, s0, s6
3543 ; GFX6-NEXT: s_lshr_b32 s2, s2, s4
3544 ; GFX6-NEXT: s_or_b32 s0, s0, s2
3545 ; GFX6-NEXT: s_and_b32 s2, s5, 15
3546 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1
3547 ; GFX6-NEXT: s_andn2_b32 s4, 15, s5
3548 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
3549 ; GFX6-NEXT: s_lshl_b32 s1, s1, s2
3550 ; GFX6-NEXT: s_bfe_u32 s2, s3, 0xf0001
3551 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s4
3552 ; GFX6-NEXT: s_lshr_b32 s2, s2, s3
3553 ; GFX6-NEXT: s_or_b32 s1, s1, s2
3554 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
3555 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
3556 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
3557 ; GFX6-NEXT: s_or_b32 s0, s0, s1
3558 ; GFX6-NEXT: ; return to shader part epilog
3560 ; GFX8-LABEL: s_fshr_v2i16:
3562 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s1
3563 ; GFX8-NEXT: s_lshr_b32 s3, s0, 16
3564 ; GFX8-NEXT: s_lshr_b32 s4, s1, 16
3565 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
3566 ; GFX8-NEXT: s_lshr_b32 s5, s5, 15
3567 ; GFX8-NEXT: s_or_b32 s0, s0, s5
3568 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
3569 ; GFX8-NEXT: s_lshr_b32 s5, s4, 15
3570 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
3571 ; GFX8-NEXT: s_xor_b32 s2, s2, -1
3572 ; GFX8-NEXT: s_or_b32 s3, s3, s5
3573 ; GFX8-NEXT: s_lshr_b32 s5, s2, 16
3574 ; GFX8-NEXT: s_and_b32 s6, s2, 15
3575 ; GFX8-NEXT: s_andn2_b32 s2, 15, s2
3576 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3577 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
3578 ; GFX8-NEXT: s_lshr_b32 s1, s1, 1
3579 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
3580 ; GFX8-NEXT: s_lshl_b32 s0, s0, s6
3581 ; GFX8-NEXT: s_lshr_b32 s1, s1, s2
3582 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3583 ; GFX8-NEXT: s_and_b32 s1, s5, 15
3584 ; GFX8-NEXT: s_lshl_b32 s4, s4, 1
3585 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3586 ; GFX8-NEXT: s_andn2_b32 s2, 15, s5
3587 ; GFX8-NEXT: s_lshl_b32 s1, s3, s1
3588 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s4
3589 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
3590 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
3591 ; GFX8-NEXT: s_lshr_b32 s2, s3, s2
3592 ; GFX8-NEXT: s_or_b32 s1, s1, s2
3593 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
3594 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
3595 ; GFX8-NEXT: s_lshl_b32 s1, s1, 16
3596 ; GFX8-NEXT: s_or_b32 s0, s0, s1
3597 ; GFX8-NEXT: ; return to shader part epilog
3599 ; GFX9-LABEL: s_fshr_v2i16:
3601 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
3602 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001
3603 ; GFX9-NEXT: s_lshl_b32 s4, s4, 1
3604 ; GFX9-NEXT: s_and_b32 s3, s2, 0xf000f
3605 ; GFX9-NEXT: s_andn2_b32 s2, 0xf000f, s2
3606 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
3607 ; GFX9-NEXT: s_lshr_b32 s4, s0, 16
3608 ; GFX9-NEXT: s_lshr_b32 s5, s2, 16
3609 ; GFX9-NEXT: s_lshl_b32 s0, s0, s2
3610 ; GFX9-NEXT: s_lshl_b32 s2, s4, s5
3611 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3612 ; GFX9-NEXT: s_lshr_b32 s2, s1, 16
3613 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
3614 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
3615 ; GFX9-NEXT: s_lshr_b32 s1, s1, s3
3616 ; GFX9-NEXT: s_lshr_b32 s2, s2, s4
3617 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2
3618 ; GFX9-NEXT: s_or_b32 s0, s0, s1
3619 ; GFX9-NEXT: ; return to shader part epilog
3621 ; GFX10-LABEL: s_fshr_v2i16:
3623 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
3624 ; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001
3625 ; GFX10-NEXT: s_lshl_b32 s3, s3, 1
3626 ; GFX10-NEXT: s_and_b32 s4, s2, 0xf000f
3627 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3
3628 ; GFX10-NEXT: s_andn2_b32 s2, 0xf000f, s2
3629 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
3630 ; GFX10-NEXT: s_lshr_b32 s5, s2, 16
3631 ; GFX10-NEXT: s_lshl_b32 s0, s0, s2
3632 ; GFX10-NEXT: s_lshl_b32 s2, s3, s5
3633 ; GFX10-NEXT: s_lshr_b32 s3, s1, 16
3634 ; GFX10-NEXT: s_and_b32 s1, s1, 0xffff
3635 ; GFX10-NEXT: s_lshr_b32 s5, s4, 16
3636 ; GFX10-NEXT: s_lshr_b32 s1, s1, s4
3637 ; GFX10-NEXT: s_lshr_b32 s3, s3, s5
3638 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3639 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3
3640 ; GFX10-NEXT: s_or_b32 s0, s0, s1
3641 ; GFX10-NEXT: ; return to shader part epilog
3643 ; GFX11-LABEL: s_fshr_v2i16:
3645 ; GFX11-NEXT: s_lshr_b32 s3, s0, 16
3646 ; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001
3647 ; GFX11-NEXT: s_lshl_b32 s3, s3, 1
3648 ; GFX11-NEXT: s_and_b32 s4, s2, 0xf000f
3649 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s3
3650 ; GFX11-NEXT: s_and_not1_b32 s2, 0xf000f, s2
3651 ; GFX11-NEXT: s_lshr_b32 s3, s0, 16
3652 ; GFX11-NEXT: s_lshr_b32 s5, s2, 16
3653 ; GFX11-NEXT: s_lshl_b32 s0, s0, s2
3654 ; GFX11-NEXT: s_lshl_b32 s2, s3, s5
3655 ; GFX11-NEXT: s_lshr_b32 s3, s1, 16
3656 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff
3657 ; GFX11-NEXT: s_lshr_b32 s5, s4, 16
3658 ; GFX11-NEXT: s_lshr_b32 s1, s1, s4
3659 ; GFX11-NEXT: s_lshr_b32 s3, s3, s5
3660 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3661 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s3
3662 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
3663 ; GFX11-NEXT: s_or_b32 s0, s0, s1
3664 ; GFX11-NEXT: ; return to shader part epilog
3665 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3666 %cast = bitcast <2 x i16> %result to i32
3670 define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) {
3671 ; GFX6-LABEL: v_fshr_v2i16:
3673 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3674 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5
3675 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
3676 ; GFX6-NEXT: v_or_b32_e32 v4, v5, v4
3677 ; GFX6-NEXT: v_bfe_u32 v5, v2, 1, 15
3678 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
3679 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5
3680 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v5
3681 ; GFX6-NEXT: v_bfe_u32 v5, v3, 1, 15
3682 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
3683 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5
3684 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
3685 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v5
3686 ; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4
3687 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v4
3688 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
3689 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
3690 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3691 ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
3692 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3693 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
3694 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0
3695 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2
3696 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
3697 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v5
3698 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5
3699 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
3700 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
3701 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3702 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1
3703 ; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
3704 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4
3705 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2
3706 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
3707 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3709 ; GFX8-LABEL: v_fshr_v2i16:
3711 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3712 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0
3713 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v1
3714 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4
3715 ; GFX8-NEXT: v_mov_b32_e32 v4, 1
3716 ; GFX8-NEXT: v_mov_b32_e32 v5, 15
3717 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3718 ; GFX8-NEXT: v_lshrrev_b16_sdwa v6, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3719 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2
3720 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v6
3721 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v1
3722 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3723 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v2
3724 ; GFX8-NEXT: v_xor_b32_e32 v7, -1, v2
3725 ; GFX8-NEXT: v_and_b32_e32 v7, 15, v7
3726 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, v4, v3
3727 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 1, v6
3728 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v4
3729 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4
3730 ; GFX8-NEXT: v_and_b32_sdwa v4, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3731 ; GFX8-NEXT: v_mov_b32_e32 v5, -1
3732 ; GFX8-NEXT: v_xor_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3733 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
3734 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1
3735 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v4, v0
3736 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1
3737 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
3738 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
3739 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
3740 ; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3741 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3743 ; GFX9-LABEL: v_fshr_v2i16:
3745 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3746 ; GFX9-NEXT: v_and_b32_e32 v3, 0xf000f, v2
3747 ; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2
3748 ; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v2
3749 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3750 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0
3751 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, v3, v1
3752 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3753 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3755 ; GFX10-LABEL: v_fshr_v2i16:
3757 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3758 ; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2
3759 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3760 ; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2
3761 ; GFX10-NEXT: v_and_b32_e32 v3, 0xf000f, v3
3762 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, v2, v1
3763 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v3, v0
3764 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3765 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3767 ; GFX11-LABEL: v_fshr_v2i16:
3769 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3770 ; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2
3771 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
3772 ; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2
3773 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3774 ; GFX11-NEXT: v_and_b32_e32 v3, 0xf000f, v3
3775 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, v2, v1
3776 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3777 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v3, v0
3778 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3779 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3780 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3781 ret <2 x i16> %result
3784 define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
3785 ; GFX6-LABEL: v_fshr_v2i16_4_8:
3787 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3788 ; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15
3789 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0
3790 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 3, v2
3791 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
3792 ; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15
3793 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1
3794 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 7, v2
3795 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
3796 ; GFX6-NEXT: s_setpc_b64 s[30:31]
3798 ; GFX8-LABEL: v_fshr_v2i16_4_8:
3800 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3801 ; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0
3802 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0
3803 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 4, v1
3804 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
3805 ; GFX8-NEXT: v_mov_b32_e32 v3, 8
3806 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2
3807 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
3808 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
3809 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
3810 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3811 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3812 ; GFX8-NEXT: s_setpc_b64 s[30:31]
3814 ; GFX9-LABEL: v_fshr_v2i16_4_8:
3816 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3817 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x8000c
3818 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0
3819 ; GFX9-NEXT: v_mov_b32_e32 v2, 0x80004
3820 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1
3821 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3822 ; GFX9-NEXT: s_setpc_b64 s[30:31]
3824 ; GFX10-LABEL: v_fshr_v2i16_4_8:
3826 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3827 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0
3828 ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1
3829 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v1
3830 ; GFX10-NEXT: s_setpc_b64 s[30:31]
3832 ; GFX11-LABEL: v_fshr_v2i16_4_8:
3834 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3835 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0
3836 ; GFX11-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1
3837 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
3838 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v1
3839 ; GFX11-NEXT: s_setpc_b64 s[30:31]
3840 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>)
3841 ret <2 x i16> %result
3844 define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) {
3845 ; GFX6-LABEL: v_fshr_v2i16_ssv:
3847 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3848 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3849 ; GFX6-NEXT: v_or_b32_e32 v0, v1, v0
3850 ; GFX6-NEXT: s_bfe_u32 s4, s2, 0xf0001
3851 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3852 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3853 ; GFX6-NEXT: s_lshr_b32 s4, s4, 14
3854 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0
3855 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v0
3856 ; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0
3857 ; GFX6-NEXT: s_or_b32 s0, s0, s4
3858 ; GFX6-NEXT: s_lshl_b32 s2, s2, 1
3859 ; GFX6-NEXT: v_and_b32_e32 v0, 15, v0
3860 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3861 ; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2
3862 ; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001
3863 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3864 ; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0
3865 ; GFX6-NEXT: s_bfe_u32 s4, s3, 0xf0001
3866 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
3867 ; GFX6-NEXT: v_and_b32_e32 v2, 15, v1
3868 ; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1
3869 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
3870 ; GFX6-NEXT: s_lshr_b32 s4, s4, 14
3871 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1
3872 ; GFX6-NEXT: v_and_b32_e32 v1, 15, v1
3873 ; GFX6-NEXT: s_or_b32 s1, s1, s4
3874 ; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2
3875 ; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001
3876 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3877 ; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2
3878 ; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1
3879 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
3880 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
3881 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
3882 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
3883 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
3884 ; GFX6-NEXT: ; return to shader part epilog
3886 ; GFX8-LABEL: v_fshr_v2i16_ssv:
3888 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s1
3889 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
3890 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
3891 ; GFX8-NEXT: s_lshr_b32 s4, s4, 15
3892 ; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
3893 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
3894 ; GFX8-NEXT: s_or_b32 s0, s0, s4
3895 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
3896 ; GFX8-NEXT: v_and_b32_e32 v1, 15, v0
3897 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v0
3898 ; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0
3899 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s1
3900 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2
3901 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
3902 ; GFX8-NEXT: v_lshrrev_b16_e64 v2, v2, s0
3903 ; GFX8-NEXT: s_lshr_b32 s4, s3, 15
3904 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
3905 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
3906 ; GFX8-NEXT: v_mov_b32_e32 v2, 15
3907 ; GFX8-NEXT: v_mov_b32_e32 v3, -1
3908 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1
3909 ; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3910 ; GFX8-NEXT: v_xor_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3911 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s3
3912 ; GFX8-NEXT: s_or_b32 s2, s2, s4
3913 ; GFX8-NEXT: v_and_b32_e32 v0, 15, v0
3914 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
3915 ; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2
3916 ; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0
3917 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
3918 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
3919 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
3920 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
3921 ; GFX8-NEXT: ; return to shader part epilog
3923 ; GFX9-LABEL: v_fshr_v2i16_ssv:
3925 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
3926 ; GFX9-NEXT: v_and_b32_e32 v1, 0xf000f, v0
3927 ; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
3928 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001
3929 ; GFX9-NEXT: s_lshl_b32 s2, s2, 1
3930 ; GFX9-NEXT: v_and_b32_e32 v0, 0xf000f, v0
3931 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3932 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, s0
3933 ; GFX9-NEXT: v_pk_lshrrev_b16 v1, v1, s1
3934 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1
3935 ; GFX9-NEXT: ; return to shader part epilog
3937 ; GFX10-LABEL: v_fshr_v2i16_ssv:
3939 ; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0
3940 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
3941 ; GFX10-NEXT: v_and_b32_e32 v0, 0xf000f, v0
3942 ; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001
3943 ; GFX10-NEXT: s_lshl_b32 s2, s2, 1
3944 ; GFX10-NEXT: v_and_b32_e32 v1, 0xf000f, v1
3945 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3946 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, v0, s1
3947 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v1, s0
3948 ; GFX10-NEXT: v_or_b32_e32 v0, v1, v0
3949 ; GFX10-NEXT: ; return to shader part epilog
3951 ; GFX11-LABEL: v_fshr_v2i16_ssv:
3953 ; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0
3954 ; GFX11-NEXT: s_lshr_b32 s2, s0, 16
3955 ; GFX11-NEXT: v_and_b32_e32 v0, 0xf000f, v0
3956 ; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001
3957 ; GFX11-NEXT: s_lshl_b32 s2, s2, 1
3958 ; GFX11-NEXT: v_and_b32_e32 v1, 0xf000f, v1
3959 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2
3960 ; GFX11-NEXT: v_pk_lshrrev_b16 v0, v0, s1
3961 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3962 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, v1, s0
3963 ; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
3964 ; GFX11-NEXT: ; return to shader part epilog
3965 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
3966 %cast = bitcast <2 x i16> %result to float
3970 define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) {
3971 ; GFX6-LABEL: v_fshr_v2i16_svs:
3973 ; GFX6-NEXT: v_bfe_u32 v2, v0, 1, 15
3974 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
3975 ; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
3976 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
3977 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 14, v2
3978 ; GFX6-NEXT: v_bfe_u32 v3, v1, 1, 15
3979 ; GFX6-NEXT: s_or_b32 s2, s3, s2
3980 ; GFX6-NEXT: v_or_b32_e32 v2, s0, v2
3981 ; GFX6-NEXT: s_lshl_b32 s0, s1, 1
3982 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3
3983 ; GFX6-NEXT: v_or_b32_e32 v3, s0, v3
3984 ; GFX6-NEXT: s_xor_b32 s0, s2, -1
3985 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
3986 ; GFX6-NEXT: s_lshr_b32 s1, s0, 16
3987 ; GFX6-NEXT: s_and_b32 s2, s0, 15
3988 ; GFX6-NEXT: s_andn2_b32 s0, 15, s0
3989 ; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15
3990 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
3991 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
3992 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0
3993 ; GFX6-NEXT: s_and_b32 s0, s1, 15
3994 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
3995 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, s2, v2
3996 ; GFX6-NEXT: s_andn2_b32 s1, 15, s1
3997 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
3998 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
3999 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, s0, v3
4000 ; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15
4001 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s1
4002 ; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1
4003 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
4004 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
4005 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
4006 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4007 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
4008 ; GFX6-NEXT: ; return to shader part epilog
4010 ; GFX8-LABEL: v_fshr_v2i16_svs:
4012 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
4013 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
4014 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 15, v0
4015 ; GFX8-NEXT: v_mov_b32_e32 v2, 15
4016 ; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
4017 ; GFX8-NEXT: s_lshl_b32 s0, s2, 1
4018 ; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4019 ; GFX8-NEXT: v_or_b32_e32 v2, s0, v2
4020 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0
4021 ; GFX8-NEXT: v_mov_b32_e32 v4, 1
4022 ; GFX8-NEXT: s_xor_b32 s0, s1, -1
4023 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4024 ; GFX8-NEXT: s_lshr_b32 s1, s0, 16
4025 ; GFX8-NEXT: s_and_b32 s2, s0, 15
4026 ; GFX8-NEXT: s_andn2_b32 s0, 15, s0
4027 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3
4028 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, s0, v3
4029 ; GFX8-NEXT: s_and_b32 s0, s1, 15
4030 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
4031 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0
4032 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, s0, v2
4033 ; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0
4034 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
4035 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, s2, v1
4036 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
4037 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
4038 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4039 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4040 ; GFX8-NEXT: ; return to shader part epilog
4042 ; GFX9-LABEL: v_fshr_v2i16_svs:
4044 ; GFX9-NEXT: s_lshr_b32 s3, s0, 16
4045 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001
4046 ; GFX9-NEXT: s_lshl_b32 s3, s3, 1
4047 ; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f
4048 ; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1
4049 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3
4050 ; GFX9-NEXT: s_lshr_b32 s3, s0, 16
4051 ; GFX9-NEXT: s_lshr_b32 s4, s1, 16
4052 ; GFX9-NEXT: s_lshl_b32 s0, s0, s1
4053 ; GFX9-NEXT: s_lshl_b32 s1, s3, s4
4054 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4055 ; GFX9-NEXT: v_pk_lshrrev_b16 v0, s2, v0
4056 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
4057 ; GFX9-NEXT: ; return to shader part epilog
4059 ; GFX10-LABEL: v_fshr_v2i16_svs:
4061 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
4062 ; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001
4063 ; GFX10-NEXT: s_lshl_b32 s2, s2, 1
4064 ; GFX10-NEXT: s_and_b32 s3, s1, 0xf000f
4065 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2
4066 ; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1
4067 ; GFX10-NEXT: s_lshr_b32 s2, s0, 16
4068 ; GFX10-NEXT: s_lshr_b32 s4, s1, 16
4069 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, s3, v0
4070 ; GFX10-NEXT: s_lshl_b32 s0, s0, s1
4071 ; GFX10-NEXT: s_lshl_b32 s1, s2, s4
4072 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4073 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
4074 ; GFX10-NEXT: ; return to shader part epilog
4076 ; GFX11-LABEL: v_fshr_v2i16_svs:
4078 ; GFX11-NEXT: s_lshr_b32 s2, s0, 16
4079 ; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001
4080 ; GFX11-NEXT: s_lshl_b32 s2, s2, 1
4081 ; GFX11-NEXT: s_and_b32 s3, s1, 0xf000f
4082 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2
4083 ; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1
4084 ; GFX11-NEXT: s_lshr_b32 s2, s0, 16
4085 ; GFX11-NEXT: s_lshr_b32 s4, s1, 16
4086 ; GFX11-NEXT: v_pk_lshrrev_b16 v0, s3, v0
4087 ; GFX11-NEXT: s_lshl_b32 s0, s0, s1
4088 ; GFX11-NEXT: s_lshl_b32 s1, s2, s4
4089 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4090 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4091 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
4092 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
4093 ; GFX11-NEXT: ; return to shader part epilog
4094 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4095 %cast = bitcast <2 x i16> %result to float
4099 define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) {
4100 ; GFX6-LABEL: v_fshr_v2i16_vss:
4102 ; GFX6-NEXT: s_lshl_b32 s3, s3, 16
4103 ; GFX6-NEXT: s_and_b32 s2, s2, 0xffff
4104 ; GFX6-NEXT: s_or_b32 s2, s3, s2
4105 ; GFX6-NEXT: s_bfe_u32 s3, s0, 0xf0001
4106 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
4107 ; GFX6-NEXT: s_lshr_b32 s3, s3, 14
4108 ; GFX6-NEXT: v_or_b32_e32 v0, s3, v0
4109 ; GFX6-NEXT: s_bfe_u32 s3, s1, 0xf0001
4110 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
4111 ; GFX6-NEXT: s_lshr_b32 s3, s3, 14
4112 ; GFX6-NEXT: s_xor_b32 s2, s2, -1
4113 ; GFX6-NEXT: v_or_b32_e32 v1, s3, v1
4114 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
4115 ; GFX6-NEXT: s_lshr_b32 s3, s2, 16
4116 ; GFX6-NEXT: s_and_b32 s4, s2, 15
4117 ; GFX6-NEXT: s_andn2_b32 s2, 15, s2
4118 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4119 ; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001
4120 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
4121 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0
4122 ; GFX6-NEXT: s_lshr_b32 s0, s0, s2
4123 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
4124 ; GFX6-NEXT: s_and_b32 s0, s3, 15
4125 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
4126 ; GFX6-NEXT: s_andn2_b32 s2, 15, s3
4127 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4128 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1
4129 ; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001
4130 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
4131 ; GFX6-NEXT: s_lshr_b32 s0, s0, s1
4132 ; GFX6-NEXT: v_or_b32_e32 v1, s0, v1
4133 ; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
4134 ; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
4135 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
4136 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
4137 ; GFX6-NEXT: ; return to shader part epilog
4139 ; GFX8-LABEL: v_fshr_v2i16_vss:
4141 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s0
4142 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16
4143 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v0
4144 ; GFX8-NEXT: s_lshr_b32 s3, s3, 15
4145 ; GFX8-NEXT: v_mov_b32_e32 v2, 1
4146 ; GFX8-NEXT: v_or_b32_e32 v1, s3, v1
4147 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4148 ; GFX8-NEXT: s_lshr_b32 s3, s2, 15
4149 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
4150 ; GFX8-NEXT: s_xor_b32 s1, s1, -1
4151 ; GFX8-NEXT: v_or_b32_e32 v0, s3, v0
4152 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16
4153 ; GFX8-NEXT: s_and_b32 s4, s1, 15
4154 ; GFX8-NEXT: s_andn2_b32 s1, 15, s1
4155 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
4156 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
4157 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4158 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, s4, v1
4159 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
4160 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1
4161 ; GFX8-NEXT: v_or_b32_e32 v1, s0, v1
4162 ; GFX8-NEXT: s_and_b32 s0, s3, 15
4163 ; GFX8-NEXT: s_andn2_b32 s1, 15, s3
4164 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0
4165 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s2
4166 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1
4167 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4168 ; GFX8-NEXT: s_lshr_b32 s0, s0, s1
4169 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
4170 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
4171 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4172 ; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4173 ; GFX8-NEXT: ; return to shader part epilog
4175 ; GFX9-LABEL: v_fshr_v2i16_vss:
4177 ; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f
4178 ; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1
4179 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4180 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, s1, v0
4181 ; GFX9-NEXT: s_lshr_b32 s1, s0, 16
4182 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
4183 ; GFX9-NEXT: s_lshr_b32 s3, s2, 16
4184 ; GFX9-NEXT: s_lshr_b32 s0, s0, s2
4185 ; GFX9-NEXT: s_lshr_b32 s1, s1, s3
4186 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4187 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
4188 ; GFX9-NEXT: ; return to shader part epilog
4190 ; GFX10-LABEL: v_fshr_v2i16_vss:
4192 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4193 ; GFX10-NEXT: s_and_b32 s2, s1, 0xf000f
4194 ; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1
4195 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
4196 ; GFX10-NEXT: s_and_b32 s0, s0, 0xffff
4197 ; GFX10-NEXT: s_lshr_b32 s4, s2, 16
4198 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, s1, v0
4199 ; GFX10-NEXT: s_lshr_b32 s0, s0, s2
4200 ; GFX10-NEXT: s_lshr_b32 s1, s3, s4
4201 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4202 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
4203 ; GFX10-NEXT: ; return to shader part epilog
4205 ; GFX11-LABEL: v_fshr_v2i16_vss:
4207 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4208 ; GFX11-NEXT: s_and_b32 s2, s1, 0xf000f
4209 ; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1
4210 ; GFX11-NEXT: s_lshr_b32 s3, s0, 16
4211 ; GFX11-NEXT: s_and_b32 s0, s0, 0xffff
4212 ; GFX11-NEXT: s_lshr_b32 s4, s2, 16
4213 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, s1, v0
4214 ; GFX11-NEXT: s_lshr_b32 s0, s0, s2
4215 ; GFX11-NEXT: s_lshr_b32 s1, s3, s4
4216 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4217 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1
4218 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
4219 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
4220 ; GFX11-NEXT: ; return to shader part epilog
4221 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt)
4222 %cast = bitcast <2 x i16> %result to float
4226 define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) {
4227 ; GFX6-LABEL: s_fshr_v3i16:
4229 ; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
4230 ; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
4231 ; GFX6-NEXT: s_lshl_b32 s7, s7, 16
4232 ; GFX6-NEXT: s_or_b32 s6, s6, s7
4233 ; GFX6-NEXT: s_and_b32 s7, s8, 0xffff
4234 ; GFX6-NEXT: s_bfe_u32 s8, s3, 0xf0001
4235 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
4236 ; GFX6-NEXT: s_lshr_b32 s8, s8, 14
4237 ; GFX6-NEXT: s_or_b32 s0, s0, s8
4238 ; GFX6-NEXT: s_bfe_u32 s8, s4, 0xf0001
4239 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
4240 ; GFX6-NEXT: s_lshr_b32 s8, s8, 14
4241 ; GFX6-NEXT: s_xor_b32 s6, s6, -1
4242 ; GFX6-NEXT: s_or_b32 s1, s1, s8
4243 ; GFX6-NEXT: s_lshl_b32 s3, s3, 1
4244 ; GFX6-NEXT: s_lshr_b32 s8, s6, 16
4245 ; GFX6-NEXT: s_and_b32 s9, s6, 15
4246 ; GFX6-NEXT: s_andn2_b32 s6, 15, s6
4247 ; GFX6-NEXT: s_and_b32 s9, 0xffff, s9
4248 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
4249 ; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
4250 ; GFX6-NEXT: s_lshl_b32 s0, s0, s9
4251 ; GFX6-NEXT: s_lshr_b32 s3, s3, s6
4252 ; GFX6-NEXT: s_or_b32 s0, s0, s3
4253 ; GFX6-NEXT: s_and_b32 s3, s8, 15
4254 ; GFX6-NEXT: s_lshl_b32 s4, s4, 1
4255 ; GFX6-NEXT: s_andn2_b32 s6, 15, s8
4256 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
4257 ; GFX6-NEXT: s_lshl_b32 s1, s1, s3
4258 ; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001
4259 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s6
4260 ; GFX6-NEXT: s_lshr_b32 s3, s3, s4
4261 ; GFX6-NEXT: s_or_b32 s1, s1, s3
4262 ; GFX6-NEXT: s_bfe_u32 s3, s5, 0xf0001
4263 ; GFX6-NEXT: s_lshl_b32 s2, s2, 1
4264 ; GFX6-NEXT: s_lshr_b32 s3, s3, 14
4265 ; GFX6-NEXT: s_xor_b32 s4, s7, -1
4266 ; GFX6-NEXT: s_or_b32 s2, s2, s3
4267 ; GFX6-NEXT: s_lshl_b32 s3, s5, 1
4268 ; GFX6-NEXT: s_and_b32 s5, s4, 15
4269 ; GFX6-NEXT: s_andn2_b32 s4, 15, s4
4270 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
4271 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
4272 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4273 ; GFX6-NEXT: s_lshl_b32 s2, s2, s5
4274 ; GFX6-NEXT: s_lshr_b32 s3, s3, s4
4275 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
4276 ; GFX6-NEXT: s_or_b32 s2, s2, s3
4277 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4278 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
4279 ; GFX6-NEXT: s_or_b32 s0, s0, s1
4280 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s2
4281 ; GFX6-NEXT: ; return to shader part epilog
4283 ; GFX8-LABEL: s_fshr_v3i16:
4285 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s2
4286 ; GFX8-NEXT: s_lshr_b32 s6, s0, 16
4287 ; GFX8-NEXT: s_lshr_b32 s7, s2, 16
4288 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
4289 ; GFX8-NEXT: s_lshr_b32 s8, s8, 15
4290 ; GFX8-NEXT: s_or_b32 s0, s0, s8
4291 ; GFX8-NEXT: s_lshl_b32 s6, s6, 1
4292 ; GFX8-NEXT: s_lshr_b32 s8, s7, 15
4293 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1
4294 ; GFX8-NEXT: s_xor_b32 s4, s4, -1
4295 ; GFX8-NEXT: s_or_b32 s6, s6, s8
4296 ; GFX8-NEXT: s_lshr_b32 s8, s4, 16
4297 ; GFX8-NEXT: s_and_b32 s9, s4, 15
4298 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4
4299 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4300 ; GFX8-NEXT: s_and_b32 s9, 0xffff, s9
4301 ; GFX8-NEXT: s_lshr_b32 s2, s2, 1
4302 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4303 ; GFX8-NEXT: s_lshl_b32 s0, s0, s9
4304 ; GFX8-NEXT: s_lshr_b32 s2, s2, s4
4305 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4306 ; GFX8-NEXT: s_and_b32 s2, s8, 15
4307 ; GFX8-NEXT: s_lshl_b32 s7, s7, 1
4308 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4309 ; GFX8-NEXT: s_andn2_b32 s4, 15, s8
4310 ; GFX8-NEXT: s_lshl_b32 s2, s6, s2
4311 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s7
4312 ; GFX8-NEXT: s_lshr_b32 s6, s6, 1
4313 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4314 ; GFX8-NEXT: s_lshr_b32 s4, s6, s4
4315 ; GFX8-NEXT: s_or_b32 s2, s2, s4
4316 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s3
4317 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
4318 ; GFX8-NEXT: s_lshr_b32 s4, s4, 15
4319 ; GFX8-NEXT: s_or_b32 s1, s1, s4
4320 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
4321 ; GFX8-NEXT: s_xor_b32 s4, s5, -1
4322 ; GFX8-NEXT: s_and_b32 s5, s4, 15
4323 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4
4324 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
4325 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
4326 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
4327 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4328 ; GFX8-NEXT: s_lshl_b32 s1, s1, s5
4329 ; GFX8-NEXT: s_lshr_b32 s3, s3, s4
4330 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4331 ; GFX8-NEXT: s_or_b32 s1, s1, s3
4332 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
4333 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
4334 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4335 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4336 ; GFX8-NEXT: ; return to shader part epilog
4338 ; GFX9-LABEL: s_fshr_v3i16:
4340 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
4341 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001
4342 ; GFX9-NEXT: s_lshl_b32 s7, s7, 1
4343 ; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f
4344 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4
4345 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s7
4346 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
4347 ; GFX9-NEXT: s_lshr_b32 s8, s4, 16
4348 ; GFX9-NEXT: s_lshl_b32 s0, s0, s4
4349 ; GFX9-NEXT: s_lshl_b32 s4, s7, s8
4350 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4351 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
4352 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
4353 ; GFX9-NEXT: s_lshr_b32 s7, s6, 16
4354 ; GFX9-NEXT: s_lshr_b32 s2, s2, s6
4355 ; GFX9-NEXT: s_lshr_b32 s4, s4, s7
4356 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4357 ; GFX9-NEXT: s_or_b32 s0, s0, s2
4358 ; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f
4359 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5
4360 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
4361 ; GFX9-NEXT: s_lshl_b32 s1, s1, 0x10001
4362 ; GFX9-NEXT: s_lshl_b32 s5, s5, 1
4363 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
4364 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
4365 ; GFX9-NEXT: s_lshr_b32 s6, s4, 16
4366 ; GFX9-NEXT: s_lshl_b32 s1, s1, s4
4367 ; GFX9-NEXT: s_lshl_b32 s4, s5, s6
4368 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4369 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
4370 ; GFX9-NEXT: s_and_b32 s3, s3, 0xffff
4371 ; GFX9-NEXT: s_lshr_b32 s5, s2, 16
4372 ; GFX9-NEXT: s_lshr_b32 s2, s3, s2
4373 ; GFX9-NEXT: s_lshr_b32 s3, s4, s5
4374 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3
4375 ; GFX9-NEXT: s_or_b32 s1, s1, s2
4376 ; GFX9-NEXT: s_lshr_b32 s2, s0, 16
4377 ; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
4378 ; GFX9-NEXT: s_lshl_b32 s2, s2, 16
4379 ; GFX9-NEXT: s_or_b32 s0, s0, s2
4380 ; GFX9-NEXT: s_and_b32 s1, s1, 0xffff
4381 ; GFX9-NEXT: ; return to shader part epilog
4383 ; GFX10-LABEL: s_fshr_v3i16:
4385 ; GFX10-NEXT: s_lshr_b32 s6, s0, 16
4386 ; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001
4387 ; GFX10-NEXT: s_lshl_b32 s6, s6, 1
4388 ; GFX10-NEXT: s_and_b32 s7, s4, 0xf000f
4389 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4390 ; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4
4391 ; GFX10-NEXT: s_lshr_b32 s6, s0, 16
4392 ; GFX10-NEXT: s_lshr_b32 s8, s4, 16
4393 ; GFX10-NEXT: s_lshl_b32 s0, s0, s4
4394 ; GFX10-NEXT: s_lshl_b32 s4, s6, s8
4395 ; GFX10-NEXT: s_lshr_b32 s6, s2, 16
4396 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffff
4397 ; GFX10-NEXT: s_lshr_b32 s8, s7, 16
4398 ; GFX10-NEXT: s_lshr_b32 s2, s2, s7
4399 ; GFX10-NEXT: s_lshr_b32 s6, s6, s8
4400 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4401 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s6
4402 ; GFX10-NEXT: s_and_b32 s4, s5, 0xf000f
4403 ; GFX10-NEXT: s_or_b32 s0, s0, s2
4404 ; GFX10-NEXT: s_lshr_b32 s2, s1, 16
4405 ; GFX10-NEXT: s_lshl_b32 s1, s1, 0x10001
4406 ; GFX10-NEXT: s_lshl_b32 s2, s2, 1
4407 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4408 ; GFX10-NEXT: s_andn2_b32 s2, 0xf000f, s5
4409 ; GFX10-NEXT: s_lshr_b32 s5, s1, 16
4410 ; GFX10-NEXT: s_lshr_b32 s6, s2, 16
4411 ; GFX10-NEXT: s_lshl_b32 s1, s1, s2
4412 ; GFX10-NEXT: s_lshl_b32 s2, s5, s6
4413 ; GFX10-NEXT: s_lshr_b32 s5, s3, 16
4414 ; GFX10-NEXT: s_and_b32 s3, s3, 0xffff
4415 ; GFX10-NEXT: s_lshr_b32 s6, s4, 16
4416 ; GFX10-NEXT: s_lshr_b32 s3, s3, s4
4417 ; GFX10-NEXT: s_lshr_b32 s4, s5, s6
4418 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4419 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4
4420 ; GFX10-NEXT: s_lshr_b32 s3, s0, 16
4421 ; GFX10-NEXT: s_and_b32 s0, s0, 0xffff
4422 ; GFX10-NEXT: s_lshl_b32 s3, s3, 16
4423 ; GFX10-NEXT: s_or_b32 s1, s1, s2
4424 ; GFX10-NEXT: s_or_b32 s0, s0, s3
4425 ; GFX10-NEXT: s_and_b32 s1, s1, 0xffff
4426 ; GFX10-NEXT: ; return to shader part epilog
4428 ; GFX11-LABEL: s_fshr_v3i16:
4430 ; GFX11-NEXT: s_lshr_b32 s6, s0, 16
4431 ; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001
4432 ; GFX11-NEXT: s_lshl_b32 s6, s6, 1
4433 ; GFX11-NEXT: s_and_b32 s7, s4, 0xf000f
4434 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4435 ; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4
4436 ; GFX11-NEXT: s_lshr_b32 s6, s0, 16
4437 ; GFX11-NEXT: s_lshr_b32 s8, s4, 16
4438 ; GFX11-NEXT: s_lshl_b32 s0, s0, s4
4439 ; GFX11-NEXT: s_lshl_b32 s4, s6, s8
4440 ; GFX11-NEXT: s_lshr_b32 s6, s2, 16
4441 ; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
4442 ; GFX11-NEXT: s_lshr_b32 s8, s7, 16
4443 ; GFX11-NEXT: s_lshr_b32 s2, s2, s7
4444 ; GFX11-NEXT: s_lshr_b32 s6, s6, s8
4445 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4446 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s6
4447 ; GFX11-NEXT: s_and_b32 s4, s5, 0xf000f
4448 ; GFX11-NEXT: s_or_b32 s0, s0, s2
4449 ; GFX11-NEXT: s_lshr_b32 s2, s1, 16
4450 ; GFX11-NEXT: s_lshl_b32 s1, s1, 0x10001
4451 ; GFX11-NEXT: s_lshl_b32 s2, s2, 1
4452 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4453 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4454 ; GFX11-NEXT: s_and_not1_b32 s2, 0xf000f, s5
4455 ; GFX11-NEXT: s_lshr_b32 s5, s1, 16
4456 ; GFX11-NEXT: s_lshr_b32 s6, s2, 16
4457 ; GFX11-NEXT: s_lshl_b32 s1, s1, s2
4458 ; GFX11-NEXT: s_lshl_b32 s2, s5, s6
4459 ; GFX11-NEXT: s_lshr_b32 s5, s3, 16
4460 ; GFX11-NEXT: s_and_b32 s3, s3, 0xffff
4461 ; GFX11-NEXT: s_lshr_b32 s6, s4, 16
4462 ; GFX11-NEXT: s_lshr_b32 s3, s3, s4
4463 ; GFX11-NEXT: s_lshr_b32 s4, s5, s6
4464 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s2
4465 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s3, s4
4466 ; GFX11-NEXT: s_lshr_b32 s3, s0, 16
4467 ; GFX11-NEXT: s_and_b32 s0, s0, 0xffff
4468 ; GFX11-NEXT: s_lshl_b32 s3, s3, 16
4469 ; GFX11-NEXT: s_or_b32 s1, s1, s2
4470 ; GFX11-NEXT: s_or_b32 s0, s0, s3
4471 ; GFX11-NEXT: s_and_b32 s1, s1, 0xffff
4472 ; GFX11-NEXT: ; return to shader part epilog
4473 %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4474 %cast = bitcast <3 x i16> %result to i48
4478 define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) {
4479 ; GFX6-LABEL: v_fshr_v3i16:
4481 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4482 ; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v7
4483 ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
4484 ; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7
4485 ; GFX6-NEXT: v_or_b32_e32 v6, v6, v7
4486 ; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v8
4487 ; GFX6-NEXT: v_bfe_u32 v8, v3, 1, 15
4488 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
4489 ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8
4490 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v8
4491 ; GFX6-NEXT: v_bfe_u32 v8, v4, 1, 15
4492 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
4493 ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8
4494 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4495 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v8
4496 ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v6
4497 ; GFX6-NEXT: v_and_b32_e32 v9, 15, v6
4498 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4499 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
4500 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4501 ; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9
4502 ; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4503 ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
4504 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0
4505 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3
4506 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v3
4507 ; GFX6-NEXT: v_and_b32_e32 v3, 15, v8
4508 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v8
4509 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
4510 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4511 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
4512 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1
4513 ; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15
4514 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6
4515 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4516 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
4517 ; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15
4518 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
4519 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3
4520 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v7
4521 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
4522 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v5
4523 ; GFX6-NEXT: v_and_b32_e32 v5, 15, v4
4524 ; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4
4525 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v4
4526 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5
4527 ; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15
4528 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4529 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v5, v2
4530 ; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3
4531 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
4532 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4534 ; GFX8-LABEL: v_fshr_v3i16:
4536 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4537 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0
4538 ; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2
4539 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v7
4540 ; GFX8-NEXT: v_mov_b32_e32 v7, 1
4541 ; GFX8-NEXT: v_mov_b32_e32 v8, 15
4542 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4543 ; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4544 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
4545 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v9
4546 ; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2
4547 ; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4548 ; GFX8-NEXT: v_and_b32_e32 v7, 15, v4
4549 ; GFX8-NEXT: v_xor_b32_e32 v10, -1, v4
4550 ; GFX8-NEXT: v_and_b32_e32 v10, 15, v10
4551 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, v7, v6
4552 ; GFX8-NEXT: v_lshrrev_b16_e32 v7, 1, v9
4553 ; GFX8-NEXT: v_lshrrev_b16_e32 v7, v10, v7
4554 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v7
4555 ; GFX8-NEXT: v_and_b32_sdwa v7, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
4556 ; GFX8-NEXT: v_mov_b32_e32 v8, -1
4557 ; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
4558 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
4559 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2
4560 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0
4561 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2
4562 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
4563 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v1
4564 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 15, v3
4565 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
4566 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v3
4567 ; GFX8-NEXT: v_xor_b32_e32 v3, -1, v5
4568 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v3
4569 ; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3
4570 ; GFX8-NEXT: v_and_b32_e32 v3, 15, v3
4571 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2
4572 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1
4573 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v3, v2
4574 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
4575 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
4576 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
4577 ; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
4578 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
4579 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4581 ; GFX9-LABEL: v_fshr_v3i16:
4583 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4584 ; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4
4585 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
4586 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4587 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4588 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0
4589 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2
4590 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5
4591 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
4592 ; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5
4593 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4594 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4595 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1
4596 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3
4597 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2
4598 ; GFX9-NEXT: s_setpc_b64 s[30:31]
4600 ; GFX10-LABEL: v_fshr_v3i16:
4602 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4603 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
4604 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
4605 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4606 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4607 ; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5
4608 ; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6
4609 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4610 ; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7
4611 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2
4612 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3
4613 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0
4614 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1
4615 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
4616 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
4617 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4619 ; GFX11-LABEL: v_fshr_v3i16:
4621 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4622 ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4
4623 ; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5
4624 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4
4625 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
4626 ; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5
4627 ; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6
4628 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
4629 ; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7
4630 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, v4, v2
4631 ; GFX11-NEXT: v_pk_lshrrev_b16 v3, v5, v3
4632 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v6, v0
4633 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
4634 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, v7, v1
4635 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
4636 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
4637 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
4638 ; GFX11-NEXT: s_setpc_b64 s[30:31]
4639 %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt)
4640 %cast.result = bitcast <3 x i16> %result to <3 x half>
4641 ret <3 x half> %cast.result
4644 define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) {
4645 ; GFX6-LABEL: s_fshr_v4i16:
4647 ; GFX6-NEXT: s_lshl_b32 s9, s9, 16
4648 ; GFX6-NEXT: s_and_b32 s8, s8, 0xffff
4649 ; GFX6-NEXT: s_or_b32 s8, s9, s8
4650 ; GFX6-NEXT: s_lshl_b32 s9, s11, 16
4651 ; GFX6-NEXT: s_and_b32 s10, s10, 0xffff
4652 ; GFX6-NEXT: s_or_b32 s9, s9, s10
4653 ; GFX6-NEXT: s_bfe_u32 s10, s4, 0xf0001
4654 ; GFX6-NEXT: s_lshl_b32 s0, s0, 1
4655 ; GFX6-NEXT: s_lshr_b32 s10, s10, 14
4656 ; GFX6-NEXT: s_or_b32 s0, s0, s10
4657 ; GFX6-NEXT: s_bfe_u32 s10, s5, 0xf0001
4658 ; GFX6-NEXT: s_lshl_b32 s1, s1, 1
4659 ; GFX6-NEXT: s_lshr_b32 s10, s10, 14
4660 ; GFX6-NEXT: s_xor_b32 s8, s8, -1
4661 ; GFX6-NEXT: s_or_b32 s1, s1, s10
4662 ; GFX6-NEXT: s_lshl_b32 s4, s4, 1
4663 ; GFX6-NEXT: s_lshr_b32 s10, s8, 16
4664 ; GFX6-NEXT: s_and_b32 s11, s8, 15
4665 ; GFX6-NEXT: s_andn2_b32 s8, 15, s8
4666 ; GFX6-NEXT: s_and_b32 s11, 0xffff, s11
4667 ; GFX6-NEXT: s_bfe_u32 s4, s4, 0xf0001
4668 ; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
4669 ; GFX6-NEXT: s_lshl_b32 s0, s0, s11
4670 ; GFX6-NEXT: s_lshr_b32 s4, s4, s8
4671 ; GFX6-NEXT: s_or_b32 s0, s0, s4
4672 ; GFX6-NEXT: s_and_b32 s4, s10, 15
4673 ; GFX6-NEXT: s_lshl_b32 s5, s5, 1
4674 ; GFX6-NEXT: s_andn2_b32 s8, 15, s10
4675 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
4676 ; GFX6-NEXT: s_lshl_b32 s1, s1, s4
4677 ; GFX6-NEXT: s_bfe_u32 s4, s5, 0xf0001
4678 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s8
4679 ; GFX6-NEXT: s_lshr_b32 s4, s4, s5
4680 ; GFX6-NEXT: s_or_b32 s1, s1, s4
4681 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
4682 ; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
4683 ; GFX6-NEXT: s_lshl_b32 s1, s1, 16
4684 ; GFX6-NEXT: s_or_b32 s0, s0, s1
4685 ; GFX6-NEXT: s_lshl_b32 s1, s2, 1
4686 ; GFX6-NEXT: s_bfe_u32 s2, s6, 0xf0001
4687 ; GFX6-NEXT: s_lshr_b32 s2, s2, 14
4688 ; GFX6-NEXT: s_or_b32 s1, s1, s2
4689 ; GFX6-NEXT: s_lshl_b32 s2, s3, 1
4690 ; GFX6-NEXT: s_bfe_u32 s3, s7, 0xf0001
4691 ; GFX6-NEXT: s_lshr_b32 s3, s3, 14
4692 ; GFX6-NEXT: s_xor_b32 s5, s9, -1
4693 ; GFX6-NEXT: s_or_b32 s2, s2, s3
4694 ; GFX6-NEXT: s_lshl_b32 s3, s6, 1
4695 ; GFX6-NEXT: s_lshl_b32 s4, s7, 1
4696 ; GFX6-NEXT: s_lshr_b32 s6, s5, 16
4697 ; GFX6-NEXT: s_and_b32 s7, s5, 15
4698 ; GFX6-NEXT: s_andn2_b32 s5, 15, s5
4699 ; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
4700 ; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001
4701 ; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
4702 ; GFX6-NEXT: s_lshl_b32 s1, s1, s7
4703 ; GFX6-NEXT: s_lshr_b32 s3, s3, s5
4704 ; GFX6-NEXT: s_or_b32 s1, s1, s3
4705 ; GFX6-NEXT: s_and_b32 s3, s6, 15
4706 ; GFX6-NEXT: s_andn2_b32 s5, 15, s6
4707 ; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
4708 ; GFX6-NEXT: s_lshl_b32 s2, s2, s3
4709 ; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001
4710 ; GFX6-NEXT: s_and_b32 s4, 0xffff, s5
4711 ; GFX6-NEXT: s_lshr_b32 s3, s3, s4
4712 ; GFX6-NEXT: s_or_b32 s2, s2, s3
4713 ; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
4714 ; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
4715 ; GFX6-NEXT: s_lshl_b32 s2, s2, 16
4716 ; GFX6-NEXT: s_or_b32 s1, s1, s2
4717 ; GFX6-NEXT: ; return to shader part epilog
4719 ; GFX8-LABEL: s_fshr_v4i16:
4721 ; GFX8-NEXT: s_and_b32 s8, 0xffff, s2
4722 ; GFX8-NEXT: s_lshr_b32 s6, s0, 16
4723 ; GFX8-NEXT: s_lshr_b32 s7, s2, 16
4724 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1
4725 ; GFX8-NEXT: s_lshr_b32 s8, s8, 15
4726 ; GFX8-NEXT: s_or_b32 s0, s0, s8
4727 ; GFX8-NEXT: s_lshl_b32 s6, s6, 1
4728 ; GFX8-NEXT: s_lshr_b32 s8, s7, 15
4729 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1
4730 ; GFX8-NEXT: s_xor_b32 s4, s4, -1
4731 ; GFX8-NEXT: s_or_b32 s6, s6, s8
4732 ; GFX8-NEXT: s_lshr_b32 s8, s4, 16
4733 ; GFX8-NEXT: s_and_b32 s9, s4, 15
4734 ; GFX8-NEXT: s_andn2_b32 s4, 15, s4
4735 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4736 ; GFX8-NEXT: s_and_b32 s9, 0xffff, s9
4737 ; GFX8-NEXT: s_lshr_b32 s2, s2, 1
4738 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4739 ; GFX8-NEXT: s_lshl_b32 s0, s0, s9
4740 ; GFX8-NEXT: s_lshr_b32 s2, s2, s4
4741 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4742 ; GFX8-NEXT: s_and_b32 s2, s8, 15
4743 ; GFX8-NEXT: s_lshl_b32 s7, s7, 1
4744 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4745 ; GFX8-NEXT: s_andn2_b32 s4, 15, s8
4746 ; GFX8-NEXT: s_lshl_b32 s2, s6, s2
4747 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s7
4748 ; GFX8-NEXT: s_lshr_b32 s6, s6, 1
4749 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
4750 ; GFX8-NEXT: s_lshr_b32 s4, s6, s4
4751 ; GFX8-NEXT: s_or_b32 s2, s2, s4
4752 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4753 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s0
4754 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
4755 ; GFX8-NEXT: s_and_b32 s6, 0xffff, s3
4756 ; GFX8-NEXT: s_or_b32 s0, s0, s2
4757 ; GFX8-NEXT: s_lshr_b32 s2, s1, 16
4758 ; GFX8-NEXT: s_lshr_b32 s4, s3, 16
4759 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1
4760 ; GFX8-NEXT: s_lshr_b32 s6, s6, 15
4761 ; GFX8-NEXT: s_or_b32 s1, s1, s6
4762 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1
4763 ; GFX8-NEXT: s_lshr_b32 s6, s4, 15
4764 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1
4765 ; GFX8-NEXT: s_xor_b32 s5, s5, -1
4766 ; GFX8-NEXT: s_or_b32 s2, s2, s6
4767 ; GFX8-NEXT: s_lshr_b32 s6, s5, 16
4768 ; GFX8-NEXT: s_and_b32 s7, s5, 15
4769 ; GFX8-NEXT: s_andn2_b32 s5, 15, s5
4770 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
4771 ; GFX8-NEXT: s_and_b32 s7, 0xffff, s7
4772 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
4773 ; GFX8-NEXT: s_and_b32 s5, 0xffff, s5
4774 ; GFX8-NEXT: s_lshl_b32 s1, s1, s7
4775 ; GFX8-NEXT: s_lshr_b32 s3, s3, s5
4776 ; GFX8-NEXT: s_or_b32 s1, s1, s3
4777 ; GFX8-NEXT: s_and_b32 s3, s6, 15
4778 ; GFX8-NEXT: s_lshl_b32 s4, s4, 1
4779 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s3
4780 ; GFX8-NEXT: s_andn2_b32 s5, 15, s6
4781 ; GFX8-NEXT: s_lshl_b32 s2, s2, s3
4782 ; GFX8-NEXT: s_and_b32 s3, 0xffff, s4
4783 ; GFX8-NEXT: s_lshr_b32 s3, s3, 1
4784 ; GFX8-NEXT: s_and_b32 s4, 0xffff, s5
4785 ; GFX8-NEXT: s_lshr_b32 s3, s3, s4
4786 ; GFX8-NEXT: s_or_b32 s2, s2, s3
4787 ; GFX8-NEXT: s_and_b32 s2, 0xffff, s2
4788 ; GFX8-NEXT: s_and_b32 s1, 0xffff, s1
4789 ; GFX8-NEXT: s_lshl_b32 s2, s2, 16
4790 ; GFX8-NEXT: s_or_b32 s1, s1, s2
4791 ; GFX8-NEXT: ; return to shader part epilog
4793 ; GFX9-LABEL: s_fshr_v4i16:
4795 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
4796 ; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001
4797 ; GFX9-NEXT: s_lshl_b32 s7, s7, 1
4798 ; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f
4799 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4
4800 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s7
4801 ; GFX9-NEXT: s_lshr_b32 s7, s0, 16
4802 ; GFX9-NEXT: s_lshr_b32 s8, s4, 16
4803 ; GFX9-NEXT: s_lshl_b32 s0, s0, s4
4804 ; GFX9-NEXT: s_lshl_b32 s4, s7, s8
4805 ; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4806 ; GFX9-NEXT: s_lshr_b32 s4, s2, 16
4807 ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff
4808 ; GFX9-NEXT: s_lshr_b32 s7, s6, 16
4809 ; GFX9-NEXT: s_lshr_b32 s2, s2, s6
4810 ; GFX9-NEXT: s_lshr_b32 s4, s4, s7
4811 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4
4812 ; GFX9-NEXT: s_or_b32 s0, s0, s2
4813 ; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f
4814 ; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5
4815 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
4816 ; GFX9-NEXT: s_lshl_b32 s1, s1, 0x10001
4817 ; GFX9-NEXT: s_lshl_b32 s5, s5, 1
4818 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5
4819 ; GFX9-NEXT: s_lshr_b32 s5, s1, 16
4820 ; GFX9-NEXT: s_lshr_b32 s6, s4, 16
4821 ; GFX9-NEXT: s_lshl_b32 s1, s1, s4
4822 ; GFX9-NEXT: s_lshl_b32 s4, s5, s6
4823 ; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4824 ; GFX9-NEXT: s_lshr_b32 s4, s3, 16
4825 ; GFX9-NEXT: s_and_b32 s3, s3, 0xffff
4826 ; GFX9-NEXT: s_lshr_b32 s5, s2, 16
4827 ; GFX9-NEXT: s_lshr_b32 s2, s3, s2
4828 ; GFX9-NEXT: s_lshr_b32 s3, s4, s5
4829 ; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3
4830 ; GFX9-NEXT: s_or_b32 s1, s1, s2
4831 ; GFX9-NEXT: ; return to shader part epilog
4833 ; GFX10-LABEL: s_fshr_v4i16:
4835 ; GFX10-NEXT: s_lshr_b32 s6, s0, 16
4836 ; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001
4837 ; GFX10-NEXT: s_lshl_b32 s6, s6, 1
4838 ; GFX10-NEXT: s_and_b32 s7, s4, 0xf000f
4839 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4840 ; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4
4841 ; GFX10-NEXT: s_lshr_b32 s6, s0, 16
4842 ; GFX10-NEXT: s_lshr_b32 s8, s4, 16
4843 ; GFX10-NEXT: s_lshl_b32 s0, s0, s4
4844 ; GFX10-NEXT: s_lshl_b32 s4, s6, s8
4845 ; GFX10-NEXT: s_lshr_b32 s6, s2, 16
4846 ; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4847 ; GFX10-NEXT: s_lshr_b32 s4, s1, 16
4848 ; GFX10-NEXT: s_and_b32 s2, s2, 0xffff
4849 ; GFX10-NEXT: s_lshr_b32 s8, s7, 16
4850 ; GFX10-NEXT: s_lshl_b32 s1, s1, 0x10001
4851 ; GFX10-NEXT: s_lshl_b32 s4, s4, 1
4852 ; GFX10-NEXT: s_lshr_b32 s2, s2, s7
4853 ; GFX10-NEXT: s_lshr_b32 s6, s6, s8
4854 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4855 ; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s5
4856 ; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s6
4857 ; GFX10-NEXT: s_and_b32 s6, s5, 0xf000f
4858 ; GFX10-NEXT: s_lshr_b32 s5, s1, 16
4859 ; GFX10-NEXT: s_lshr_b32 s7, s4, 16
4860 ; GFX10-NEXT: s_lshl_b32 s1, s1, s4
4861 ; GFX10-NEXT: s_lshl_b32 s4, s5, s7
4862 ; GFX10-NEXT: s_lshr_b32 s5, s3, 16
4863 ; GFX10-NEXT: s_and_b32 s3, s3, 0xffff
4864 ; GFX10-NEXT: s_lshr_b32 s7, s6, 16
4865 ; GFX10-NEXT: s_lshr_b32 s3, s3, s6
4866 ; GFX10-NEXT: s_lshr_b32 s5, s5, s7
4867 ; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4868 ; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5
4869 ; GFX10-NEXT: s_or_b32 s0, s0, s2
4870 ; GFX10-NEXT: s_or_b32 s1, s1, s3
4871 ; GFX10-NEXT: ; return to shader part epilog
4873 ; GFX11-LABEL: s_fshr_v4i16:
4875 ; GFX11-NEXT: s_lshr_b32 s6, s0, 16
4876 ; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001
4877 ; GFX11-NEXT: s_lshl_b32 s6, s6, 1
4878 ; GFX11-NEXT: s_and_b32 s7, s4, 0xf000f
4879 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6
4880 ; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4
4881 ; GFX11-NEXT: s_lshr_b32 s6, s0, 16
4882 ; GFX11-NEXT: s_lshr_b32 s8, s4, 16
4883 ; GFX11-NEXT: s_lshl_b32 s0, s0, s4
4884 ; GFX11-NEXT: s_lshl_b32 s4, s6, s8
4885 ; GFX11-NEXT: s_lshr_b32 s6, s2, 16
4886 ; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s4
4887 ; GFX11-NEXT: s_lshr_b32 s4, s1, 16
4888 ; GFX11-NEXT: s_and_b32 s2, s2, 0xffff
4889 ; GFX11-NEXT: s_lshr_b32 s8, s7, 16
4890 ; GFX11-NEXT: s_lshl_b32 s1, s1, 0x10001
4891 ; GFX11-NEXT: s_lshl_b32 s4, s4, 1
4892 ; GFX11-NEXT: s_lshr_b32 s2, s2, s7
4893 ; GFX11-NEXT: s_lshr_b32 s6, s6, s8
4894 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4895 ; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s5
4896 ; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s6
4897 ; GFX11-NEXT: s_and_b32 s6, s5, 0xf000f
4898 ; GFX11-NEXT: s_lshr_b32 s5, s1, 16
4899 ; GFX11-NEXT: s_lshr_b32 s7, s4, 16
4900 ; GFX11-NEXT: s_lshl_b32 s1, s1, s4
4901 ; GFX11-NEXT: s_lshl_b32 s4, s5, s7
4902 ; GFX11-NEXT: s_lshr_b32 s5, s3, 16
4903 ; GFX11-NEXT: s_and_b32 s3, s3, 0xffff
4904 ; GFX11-NEXT: s_lshr_b32 s7, s6, 16
4905 ; GFX11-NEXT: s_lshr_b32 s3, s3, s6
4906 ; GFX11-NEXT: s_lshr_b32 s5, s5, s7
4907 ; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s4
4908 ; GFX11-NEXT: s_pack_ll_b32_b16 s3, s3, s5
4909 ; GFX11-NEXT: s_or_b32 s0, s0, s2
4910 ; GFX11-NEXT: s_or_b32 s1, s1, s3
4911 ; GFX11-NEXT: ; return to shader part epilog
4912 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
4913 %cast.result = bitcast <4 x i16> %result to <2 x i32>
4914 ret <2 x i32> %cast.result
4917 define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) {
4918 ; GFX6-LABEL: v_fshr_v4i16:
4920 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4921 ; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9
4922 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
4923 ; GFX6-NEXT: v_or_b32_e32 v8, v9, v8
4924 ; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v11
4925 ; GFX6-NEXT: v_and_b32_e32 v10, 0xffff, v10
4926 ; GFX6-NEXT: v_or_b32_e32 v9, v9, v10
4927 ; GFX6-NEXT: v_bfe_u32 v10, v4, 1, 15
4928 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
4929 ; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10
4930 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v10
4931 ; GFX6-NEXT: v_bfe_u32 v10, v5, 1, 15
4932 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1
4933 ; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10
4934 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
4935 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v10
4936 ; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8
4937 ; GFX6-NEXT: v_and_b32_e32 v11, 15, v8
4938 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8
4939 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4
4940 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
4941 ; GFX6-NEXT: v_and_b32_e32 v11, 0xffff, v11
4942 ; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
4943 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
4944 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, v11, v0
4945 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4
4946 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
4947 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v10
4948 ; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10
4949 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5
4950 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v8
4951 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4952 ; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1
4953 ; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
4954 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8
4955 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
4956 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v4
4957 ; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15
4958 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2
4959 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4
4960 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
4961 ; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15
4962 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3
4963 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4
4964 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
4965 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v6
4966 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v9
4967 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v7
4968 ; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6
4969 ; GFX6-NEXT: v_and_b32_e32 v8, 15, v6
4970 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6
4971 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4972 ; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8
4973 ; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15
4974 ; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6
4975 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2
4976 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4
4977 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
4978 ; GFX6-NEXT: v_and_b32_e32 v4, 15, v7
4979 ; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7
4980 ; GFX6-NEXT: v_and_b32_e32 v6, 15, v6
4981 ; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
4982 ; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3
4983 ; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15
4984 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v6
4985 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4
4986 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
4987 ; GFX6-NEXT: s_setpc_b64 s[30:31]
4989 ; GFX8-LABEL: v_fshr_v4i16:
4991 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4992 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0
4993 ; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2
4994 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v7
4995 ; GFX8-NEXT: v_mov_b32_e32 v7, 1
4996 ; GFX8-NEXT: v_mov_b32_e32 v8, 15
4997 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4998 ; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4999 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4
5000 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v9
5001 ; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2
5002 ; GFX8-NEXT: v_xor_b32_e32 v11, -1, v4
5003 ; GFX8-NEXT: v_and_b32_e32 v10, 15, v4
5004 ; GFX8-NEXT: v_and_b32_e32 v11, 15, v11
5005 ; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v9
5006 ; GFX8-NEXT: v_lshlrev_b16_e32 v6, v10, v6
5007 ; GFX8-NEXT: v_lshrrev_b16_e32 v9, v11, v9
5008 ; GFX8-NEXT: v_mov_b32_e32 v10, -1
5009 ; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5010 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v9
5011 ; GFX8-NEXT: v_and_b32_sdwa v9, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5012 ; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5013 ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4
5014 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2
5015 ; GFX8-NEXT: v_lshlrev_b16_e32 v0, v9, v0
5016 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2
5017 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5018 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v1
5019 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v3
5020 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
5021 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
5022 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5023 ; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5024 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5
5025 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0
5026 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
5027 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, 1, v3
5028 ; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5029 ; GFX8-NEXT: v_xor_b32_e32 v7, -1, v5
5030 ; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5031 ; GFX8-NEXT: v_and_b32_e32 v6, 15, v5
5032 ; GFX8-NEXT: v_and_b32_e32 v7, 15, v7
5033 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 1, v4
5034 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v6, v2
5035 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v4
5036 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
5037 ; GFX8-NEXT: v_and_b32_sdwa v4, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5038 ; GFX8-NEXT: v_xor_b32_sdwa v5, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
5039 ; GFX8-NEXT: v_and_b32_e32 v5, 15, v5
5040 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3
5041 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1
5042 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, v5, v3
5043 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
5044 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1
5045 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
5046 ; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
5047 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5049 ; GFX9-LABEL: v_fshr_v4i16:
5051 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5052 ; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4
5053 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4
5054 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5055 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
5056 ; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0
5057 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2
5058 ; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5
5059 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5060 ; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5
5061 ; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5062 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
5063 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1
5064 ; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3
5065 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v2
5066 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5068 ; GFX10-LABEL: v_fshr_v4i16:
5070 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5071 ; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4
5072 ; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5
5073 ; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5074 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
5075 ; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5
5076 ; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6
5077 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
5078 ; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7
5079 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2
5080 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3
5081 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0
5082 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1
5083 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
5084 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
5085 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5087 ; GFX11-LABEL: v_fshr_v4i16:
5089 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5090 ; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4
5091 ; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5
5092 ; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4
5093 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1]
5094 ; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5
5095 ; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6
5096 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1]
5097 ; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7
5098 ; GFX11-NEXT: v_pk_lshrrev_b16 v2, v4, v2
5099 ; GFX11-NEXT: v_pk_lshrrev_b16 v3, v5, v3
5100 ; GFX11-NEXT: v_pk_lshlrev_b16 v0, v6, v0
5101 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
5102 ; GFX11-NEXT: v_pk_lshlrev_b16 v1, v7, v1
5103 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5104 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5105 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
5106 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5107 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt)
5108 %cast.result = bitcast <4 x i16> %result to <4 x half>
5109 ret <4 x half> %cast.result
5112 define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) {
5113 ; GFX6-LABEL: s_fshr_i64:
5115 ; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 63
5116 ; GFX6-NEXT: s_andn2_b64 s[4:5], 63, s[4:5]
5117 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5118 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
5119 ; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s6
5120 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5121 ; GFX6-NEXT: ; return to shader part epilog
5123 ; GFX8-LABEL: s_fshr_i64:
5125 ; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 63
5126 ; GFX8-NEXT: s_andn2_b64 s[4:5], 63, s[4:5]
5127 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5128 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
5129 ; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s6
5130 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5131 ; GFX8-NEXT: ; return to shader part epilog
5133 ; GFX9-LABEL: s_fshr_i64:
5135 ; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 63
5136 ; GFX9-NEXT: s_andn2_b64 s[4:5], 63, s[4:5]
5137 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5138 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s4
5139 ; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s6
5140 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5141 ; GFX9-NEXT: ; return to shader part epilog
5143 ; GFX10-LABEL: s_fshr_i64:
5145 ; GFX10-NEXT: s_andn2_b64 s[6:7], 63, s[4:5]
5146 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5147 ; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], 63
5148 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s6
5149 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s4
5150 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5151 ; GFX10-NEXT: ; return to shader part epilog
5153 ; GFX11-LABEL: s_fshr_i64:
5155 ; GFX11-NEXT: s_and_not1_b64 s[6:7], 63, s[4:5]
5156 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5157 ; GFX11-NEXT: s_and_b64 s[4:5], s[4:5], 63
5158 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s6
5159 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s4
5160 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5161 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5162 ; GFX11-NEXT: ; return to shader part epilog
5163 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5167 define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
5168 ; GCN-LABEL: s_fshr_i64_5:
5170 ; GCN-NEXT: s_lshl_b32 s1, s0, 27
5171 ; GCN-NEXT: s_mov_b32 s0, 0
5172 ; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 5
5173 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5174 ; GCN-NEXT: ; return to shader part epilog
5176 ; GFX11-LABEL: s_fshr_i64_5:
5178 ; GFX11-NEXT: s_lshl_b32 s1, s0, 27
5179 ; GFX11-NEXT: s_mov_b32 s0, 0
5180 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 5
5181 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5182 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5183 ; GFX11-NEXT: ; return to shader part epilog
5184 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
5188 define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
5189 ; GCN-LABEL: s_fshr_i64_32:
5191 ; GCN-NEXT: s_mov_b32 s1, s0
5192 ; GCN-NEXT: s_mov_b32 s0, 0
5193 ; GCN-NEXT: s_mov_b32 s2, s3
5194 ; GCN-NEXT: s_mov_b32 s3, s0
5195 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5196 ; GCN-NEXT: ; return to shader part epilog
5198 ; GFX11-LABEL: s_fshr_i64_32:
5200 ; GFX11-NEXT: s_mov_b32 s1, s0
5201 ; GFX11-NEXT: s_mov_b32 s0, 0
5202 ; GFX11-NEXT: s_mov_b32 s2, s3
5203 ; GFX11-NEXT: s_mov_b32 s3, s0
5204 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5205 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5206 ; GFX11-NEXT: ; return to shader part epilog
5207 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
5211 define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) {
5212 ; GCN-LABEL: s_fshr_i64_48:
5214 ; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 16
5215 ; GCN-NEXT: s_lshr_b32 s2, s3, 16
5216 ; GCN-NEXT: s_mov_b32 s3, 0
5217 ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5218 ; GCN-NEXT: ; return to shader part epilog
5220 ; GFX11-LABEL: s_fshr_i64_48:
5222 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 16
5223 ; GFX11-NEXT: s_lshr_b32 s2, s3, 16
5224 ; GFX11-NEXT: s_mov_b32 s3, 0
5225 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5226 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
5227 ; GFX11-NEXT: ; return to shader part epilog
5228 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
5232 define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) {
5233 ; GFX6-LABEL: v_fshr_i64:
5235 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5236 ; GFX6-NEXT: v_and_b32_e32 v5, 63, v4
5237 ; GFX6-NEXT: v_not_b32_e32 v4, v4
5238 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
5239 ; GFX6-NEXT: v_and_b32_e32 v4, 63, v4
5240 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v4
5241 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v5
5242 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
5243 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
5244 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5246 ; GFX8-LABEL: v_fshr_i64:
5248 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5249 ; GFX8-NEXT: v_and_b32_e32 v5, 63, v4
5250 ; GFX8-NEXT: v_not_b32_e32 v4, v4
5251 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5252 ; GFX8-NEXT: v_and_b32_e32 v4, 63, v4
5253 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
5254 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3]
5255 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5256 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
5257 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5259 ; GFX9-LABEL: v_fshr_i64:
5261 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5262 ; GFX9-NEXT: v_and_b32_e32 v5, 63, v4
5263 ; GFX9-NEXT: v_not_b32_e32 v4, v4
5264 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5265 ; GFX9-NEXT: v_and_b32_e32 v4, 63, v4
5266 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1]
5267 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3]
5268 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5269 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v3
5270 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5272 ; GFX10-LABEL: v_fshr_i64:
5274 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5275 ; GFX10-NEXT: v_not_b32_e32 v5, v4
5276 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5277 ; GFX10-NEXT: v_and_b32_e32 v4, 63, v4
5278 ; GFX10-NEXT: v_and_b32_e32 v5, 63, v5
5279 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3]
5280 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1]
5281 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
5282 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3
5283 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5285 ; GFX11-LABEL: v_fshr_i64:
5287 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5288 ; GFX11-NEXT: v_not_b32_e32 v5, v4
5289 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5290 ; GFX11-NEXT: v_and_b32_e32 v4, 63, v4
5291 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
5292 ; GFX11-NEXT: v_and_b32_e32 v5, 63, v5
5293 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3]
5294 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5295 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1]
5296 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5297 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5298 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v3
5299 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5300 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5304 define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) {
5305 ; GFX6-LABEL: v_fshr_i64_5:
5307 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5308 ; GFX6-NEXT: v_mov_b32_e32 v4, v0
5309 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[2:3], 5
5310 ; GFX6-NEXT: v_lshlrev_b32_e32 v2, 27, v4
5311 ; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
5312 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5314 ; GFX8-LABEL: v_fshr_i64_5:
5316 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5317 ; GFX8-NEXT: v_mov_b32_e32 v4, v0
5318 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3]
5319 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 27, v4
5320 ; GFX8-NEXT: v_or_b32_e32 v1, v2, v1
5321 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5323 ; GFX9-LABEL: v_fshr_i64_5:
5325 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5326 ; GFX9-NEXT: v_mov_b32_e32 v4, v0
5327 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3]
5328 ; GFX9-NEXT: v_lshl_or_b32 v1, v4, 27, v1
5329 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5331 ; GFX10-LABEL: v_fshr_i64_5:
5333 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5334 ; GFX10-NEXT: v_mov_b32_e32 v4, v0
5335 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3]
5336 ; GFX10-NEXT: v_lshl_or_b32 v1, v4, 27, v1
5337 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5339 ; GFX11-LABEL: v_fshr_i64_5:
5341 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5342 ; GFX11-NEXT: v_mov_b32_e32 v4, v0
5343 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3]
5344 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5345 ; GFX11-NEXT: v_lshl_or_b32 v1, v4, 27, v1
5346 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5347 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5)
5351 define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) {
5352 ; GCN-LABEL: v_fshr_i64_32:
5354 ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5355 ; GCN-NEXT: v_mov_b32_e32 v1, v0
5356 ; GCN-NEXT: v_mov_b32_e32 v0, v3
5357 ; GCN-NEXT: s_setpc_b64 s[30:31]
5359 ; GFX11-LABEL: v_fshr_i64_32:
5361 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5362 ; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
5363 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5364 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32)
5368 define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) {
5369 ; GFX6-LABEL: v_fshr_i64_48:
5371 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5372 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 16
5373 ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v3
5374 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
5375 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5377 ; GFX8-LABEL: v_fshr_i64_48:
5379 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5380 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
5381 ; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5382 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5384 ; GFX9-LABEL: v_fshr_i64_48:
5386 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5387 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
5388 ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5389 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5391 ; GFX10-LABEL: v_fshr_i64_48:
5393 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5394 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
5395 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
5396 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5398 ; GFX11-LABEL: v_fshr_i64_48:
5400 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5401 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1]
5402 ; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v3
5403 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5404 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5405 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5406 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48)
5410 define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) {
5411 ; GFX6-LABEL: v_fshr_i64_ssv:
5413 ; GFX6-NEXT: v_and_b32_e32 v2, 63, v0
5414 ; GFX6-NEXT: v_not_b32_e32 v0, v0
5415 ; GFX6-NEXT: v_and_b32_e32 v0, 63, v0
5416 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5417 ; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v0
5418 ; GFX6-NEXT: v_lshr_b64 v[2:3], s[2:3], v2
5419 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
5420 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v3
5421 ; GFX6-NEXT: ; return to shader part epilog
5423 ; GFX8-LABEL: v_fshr_i64_ssv:
5425 ; GFX8-NEXT: v_and_b32_e32 v2, 63, v0
5426 ; GFX8-NEXT: v_not_b32_e32 v0, v0
5427 ; GFX8-NEXT: v_and_b32_e32 v0, 63, v0
5428 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5429 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
5430 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3]
5431 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5432 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3
5433 ; GFX8-NEXT: ; return to shader part epilog
5435 ; GFX9-LABEL: v_fshr_i64_ssv:
5437 ; GFX9-NEXT: v_and_b32_e32 v2, 63, v0
5438 ; GFX9-NEXT: v_not_b32_e32 v0, v0
5439 ; GFX9-NEXT: v_and_b32_e32 v0, 63, v0
5440 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5441 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1]
5442 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3]
5443 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5444 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v3
5445 ; GFX9-NEXT: ; return to shader part epilog
5447 ; GFX10-LABEL: v_fshr_i64_ssv:
5449 ; GFX10-NEXT: v_not_b32_e32 v1, v0
5450 ; GFX10-NEXT: v_and_b32_e32 v0, 63, v0
5451 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5452 ; GFX10-NEXT: v_and_b32_e32 v2, 63, v1
5453 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3]
5454 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1]
5455 ; GFX10-NEXT: v_or_b32_e32 v0, v2, v0
5456 ; GFX10-NEXT: v_or_b32_e32 v1, v3, v1
5457 ; GFX10-NEXT: ; return to shader part epilog
5459 ; GFX11-LABEL: v_fshr_i64_ssv:
5461 ; GFX11-NEXT: v_not_b32_e32 v1, v0
5462 ; GFX11-NEXT: v_and_b32_e32 v0, 63, v0
5463 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5464 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
5465 ; GFX11-NEXT: v_and_b32_e32 v2, 63, v1
5466 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3]
5467 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
5468 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1]
5469 ; GFX11-NEXT: v_or_b32_e32 v0, v2, v0
5470 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5471 ; GFX11-NEXT: v_or_b32_e32 v1, v3, v1
5472 ; GFX11-NEXT: ; return to shader part epilog
5473 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5474 %cast = bitcast i64 %result to <2 x float>
5475 ret <2 x float> %cast
5478 define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) {
5479 ; GFX6-LABEL: v_fshr_i64_svs:
5481 ; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63
5482 ; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5483 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5484 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s4
5485 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5486 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
5487 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
5488 ; GFX6-NEXT: ; return to shader part epilog
5490 ; GFX8-LABEL: v_fshr_i64_svs:
5492 ; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63
5493 ; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5494 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5495 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
5496 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5497 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
5498 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v1
5499 ; GFX8-NEXT: ; return to shader part epilog
5501 ; GFX9-LABEL: v_fshr_i64_svs:
5503 ; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63
5504 ; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5505 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5506 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
5507 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5508 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
5509 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v1
5510 ; GFX9-NEXT: ; return to shader part epilog
5512 ; GFX10-LABEL: v_fshr_i64_svs:
5514 ; GFX10-NEXT: s_and_b64 s[4:5], s[2:3], 63
5515 ; GFX10-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5516 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
5517 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5518 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5519 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
5520 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v1
5521 ; GFX10-NEXT: ; return to shader part epilog
5523 ; GFX11-LABEL: v_fshr_i64_svs:
5525 ; GFX11-NEXT: s_and_b64 s[4:5], s[2:3], 63
5526 ; GFX11-NEXT: s_and_not1_b64 s[2:3], 63, s[2:3]
5527 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1]
5528 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5529 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5530 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s2
5531 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
5532 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
5533 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
5534 ; GFX11-NEXT: v_or_b32_e32 v1, s1, v1
5535 ; GFX11-NEXT: ; return to shader part epilog
5536 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5537 %cast = bitcast i64 %result to <2 x float>
5538 ret <2 x float> %cast
5541 define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) {
5542 ; GFX6-LABEL: v_fshr_i64_vss:
5544 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
5545 ; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63
5546 ; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5547 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s2
5548 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s4
5549 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
5550 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
5551 ; GFX6-NEXT: ; return to shader part epilog
5553 ; GFX8-LABEL: v_fshr_i64_vss:
5555 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5556 ; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63
5557 ; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5558 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1]
5559 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s4
5560 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v0
5561 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v1
5562 ; GFX8-NEXT: ; return to shader part epilog
5564 ; GFX9-LABEL: v_fshr_i64_vss:
5566 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5567 ; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63
5568 ; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3]
5569 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1]
5570 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s4
5571 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v0
5572 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v1
5573 ; GFX9-NEXT: ; return to shader part epilog
5575 ; GFX10-LABEL: v_fshr_i64_vss:
5577 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5578 ; GFX10-NEXT: s_andn2_b64 s[4:5], 63, s[2:3]
5579 ; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], 63
5580 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5581 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
5582 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v0
5583 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v1
5584 ; GFX10-NEXT: ; return to shader part epilog
5586 ; GFX11-LABEL: v_fshr_i64_vss:
5588 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5589 ; GFX11-NEXT: s_and_not1_b64 s[4:5], 63, s[2:3]
5590 ; GFX11-NEXT: s_and_b64 s[2:3], s[2:3], 63
5591 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
5592 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5593 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1]
5594 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
5595 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v0
5596 ; GFX11-NEXT: v_or_b32_e32 v1, s1, v1
5597 ; GFX11-NEXT: ; return to shader part epilog
5598 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt)
5599 %cast = bitcast i64 %result to <2 x float>
5600 ret <2 x float> %cast
5603 define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) {
5604 ; GFX6-LABEL: s_fshr_v2i64:
5606 ; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], 63
5607 ; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
5608 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5609 ; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s8
5610 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
5611 ; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5612 ; GFX6-NEXT: s_and_b64 s[4:5], s[10:11], 63
5613 ; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5614 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5615 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5616 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], s4
5617 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5618 ; GFX6-NEXT: ; return to shader part epilog
5620 ; GFX8-LABEL: s_fshr_v2i64:
5622 ; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], 63
5623 ; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
5624 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5625 ; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s8
5626 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
5627 ; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5628 ; GFX8-NEXT: s_and_b64 s[4:5], s[10:11], 63
5629 ; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5630 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5631 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5632 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], s4
5633 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5634 ; GFX8-NEXT: ; return to shader part epilog
5636 ; GFX9-LABEL: s_fshr_v2i64:
5638 ; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], 63
5639 ; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[8:9]
5640 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5641 ; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s8
5642 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s12
5643 ; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5644 ; GFX9-NEXT: s_and_b64 s[4:5], s[10:11], 63
5645 ; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5646 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5647 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5648 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], s4
5649 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5650 ; GFX9-NEXT: ; return to shader part epilog
5652 ; GFX10-LABEL: s_fshr_v2i64:
5654 ; GFX10-NEXT: s_andn2_b64 s[12:13], 63, s[8:9]
5655 ; GFX10-NEXT: s_and_b64 s[8:9], s[8:9], 63
5656 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5657 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5658 ; GFX10-NEXT: s_andn2_b64 s[8:9], 63, s[10:11]
5659 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5660 ; GFX10-NEXT: s_and_b64 s[10:11], s[10:11], 63
5661 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
5662 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5663 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s10
5664 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5665 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
5666 ; GFX10-NEXT: ; return to shader part epilog
5668 ; GFX11-LABEL: s_fshr_v2i64:
5670 ; GFX11-NEXT: s_and_not1_b64 s[12:13], 63, s[8:9]
5671 ; GFX11-NEXT: s_and_b64 s[8:9], s[8:9], 63
5672 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5673 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], s8
5674 ; GFX11-NEXT: s_and_not1_b64 s[8:9], 63, s[10:11]
5675 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5676 ; GFX11-NEXT: s_and_b64 s[10:11], s[10:11], 63
5677 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s12
5678 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
5679 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s10
5680 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
5681 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
5682 ; GFX11-NEXT: ; return to shader part epilog
5683 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5684 ret <2 x i64> %result
5687 define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) {
5688 ; GFX6-LABEL: v_fshr_v2i64:
5690 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5691 ; GFX6-NEXT: v_and_b32_e32 v9, 63, v8
5692 ; GFX6-NEXT: v_not_b32_e32 v8, v8
5693 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
5694 ; GFX6-NEXT: v_and_b32_e32 v8, 63, v8
5695 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v8
5696 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v9
5697 ; GFX6-NEXT: v_not_b32_e32 v8, v10
5698 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
5699 ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
5700 ; GFX6-NEXT: v_and_b32_e32 v4, 63, v10
5701 ; GFX6-NEXT: v_and_b32_e32 v8, 63, v8
5702 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v8
5703 ; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v4
5704 ; GFX6-NEXT: v_or_b32_e32 v1, v1, v5
5705 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v6
5706 ; GFX6-NEXT: v_or_b32_e32 v3, v3, v7
5707 ; GFX6-NEXT: s_setpc_b64 s[30:31]
5709 ; GFX8-LABEL: v_fshr_v2i64:
5711 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5712 ; GFX8-NEXT: v_and_b32_e32 v9, 63, v8
5713 ; GFX8-NEXT: v_not_b32_e32 v8, v8
5714 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5715 ; GFX8-NEXT: v_and_b32_e32 v8, 63, v8
5716 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
5717 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5]
5718 ; GFX8-NEXT: v_not_b32_e32 v8, v10
5719 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
5720 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
5721 ; GFX8-NEXT: v_and_b32_e32 v4, 63, v10
5722 ; GFX8-NEXT: v_and_b32_e32 v8, 63, v8
5723 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3]
5724 ; GFX8-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7]
5725 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v5
5726 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v6
5727 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v7
5728 ; GFX8-NEXT: s_setpc_b64 s[30:31]
5730 ; GFX9-LABEL: v_fshr_v2i64:
5732 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5733 ; GFX9-NEXT: v_and_b32_e32 v9, 63, v8
5734 ; GFX9-NEXT: v_not_b32_e32 v8, v8
5735 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5736 ; GFX9-NEXT: v_and_b32_e32 v8, 63, v8
5737 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1]
5738 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5]
5739 ; GFX9-NEXT: v_not_b32_e32 v8, v10
5740 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
5741 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v4
5742 ; GFX9-NEXT: v_and_b32_e32 v4, 63, v10
5743 ; GFX9-NEXT: v_and_b32_e32 v8, 63, v8
5744 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3]
5745 ; GFX9-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7]
5746 ; GFX9-NEXT: v_or_b32_e32 v1, v1, v5
5747 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v6
5748 ; GFX9-NEXT: v_or_b32_e32 v3, v3, v7
5749 ; GFX9-NEXT: s_setpc_b64 s[30:31]
5751 ; GFX10-LABEL: v_fshr_v2i64:
5753 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5754 ; GFX10-NEXT: v_not_b32_e32 v9, v8
5755 ; GFX10-NEXT: v_not_b32_e32 v11, v10
5756 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5757 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
5758 ; GFX10-NEXT: v_and_b32_e32 v8, 63, v8
5759 ; GFX10-NEXT: v_and_b32_e32 v9, 63, v9
5760 ; GFX10-NEXT: v_and_b32_e32 v11, 63, v11
5761 ; GFX10-NEXT: v_and_b32_e32 v10, 63, v10
5762 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5]
5763 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1]
5764 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], v11, v[2:3]
5765 ; GFX10-NEXT: v_lshrrev_b64 v[6:7], v10, v[6:7]
5766 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v4
5767 ; GFX10-NEXT: v_or_b32_e32 v1, v1, v5
5768 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v6
5769 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v7
5770 ; GFX10-NEXT: s_setpc_b64 s[30:31]
5772 ; GFX11-LABEL: v_fshr_v2i64:
5774 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5775 ; GFX11-NEXT: v_not_b32_e32 v9, v8
5776 ; GFX11-NEXT: v_not_b32_e32 v11, v10
5777 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
5778 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
5779 ; GFX11-NEXT: v_and_b32_e32 v8, 63, v8
5780 ; GFX11-NEXT: v_and_b32_e32 v9, 63, v9
5781 ; GFX11-NEXT: v_and_b32_e32 v11, 63, v11
5782 ; GFX11-NEXT: v_and_b32_e32 v10, 63, v10
5783 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5784 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5]
5785 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1]
5786 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
5787 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], v11, v[2:3]
5788 ; GFX11-NEXT: v_lshrrev_b64 v[6:7], v10, v[6:7]
5789 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5790 ; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
5791 ; GFX11-NEXT: v_or_b32_e32 v1, v1, v5
5792 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
5793 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v6
5794 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v7
5795 ; GFX11-NEXT: s_setpc_b64 s[30:31]
5796 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt)
5797 ret <2 x i64> %result
5800 define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) {
5801 ; GFX6-LABEL: s_fshr_i128:
5803 ; GFX6-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5804 ; GFX6-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5805 ; GFX6-NEXT: s_lshl_b64 s[12:13], s[0:1], 1
5806 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5807 ; GFX6-NEXT: s_lshr_b32 s0, s1, 31
5808 ; GFX6-NEXT: s_mov_b32 s1, 0
5809 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
5810 ; GFX6-NEXT: s_sub_i32 s11, s8, 64
5811 ; GFX6-NEXT: s_sub_i32 s9, 64, s8
5812 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64
5813 ; GFX6-NEXT: s_cselect_b32 s16, 1, 0
5814 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0
5815 ; GFX6-NEXT: s_cselect_b32 s17, 1, 0
5816 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[12:13], s8
5817 ; GFX6-NEXT: s_lshr_b64 s[14:15], s[12:13], s9
5818 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
5819 ; GFX6-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9]
5820 ; GFX6-NEXT: s_lshl_b64 s[12:13], s[12:13], s11
5821 ; GFX6-NEXT: s_cmp_lg_u32 s16, 0
5822 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
5823 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[12:13]
5824 ; GFX6-NEXT: s_cmp_lg_u32 s17, 0
5825 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9]
5826 ; GFX6-NEXT: s_sub_i32 s14, s10, 64
5827 ; GFX6-NEXT: s_sub_i32 s12, 64, s10
5828 ; GFX6-NEXT: s_cmp_lt_u32 s10, 64
5829 ; GFX6-NEXT: s_cselect_b32 s15, 1, 0
5830 ; GFX6-NEXT: s_cmp_eq_u32 s10, 0
5831 ; GFX6-NEXT: s_cselect_b32 s16, 1, 0
5832 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], s10
5833 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
5834 ; GFX6-NEXT: s_lshl_b64 s[12:13], s[6:7], s12
5835 ; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
5836 ; GFX6-NEXT: s_lshr_b64 s[6:7], s[6:7], s14
5837 ; GFX6-NEXT: s_cmp_lg_u32 s15, 0
5838 ; GFX6-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7]
5839 ; GFX6-NEXT: s_cmp_lg_u32 s16, 0
5840 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
5841 ; GFX6-NEXT: s_cmp_lg_u32 s15, 0
5842 ; GFX6-NEXT: s_cselect_b64 s[6:7], s[0:1], 0
5843 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
5844 ; GFX6-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7]
5845 ; GFX6-NEXT: ; return to shader part epilog
5847 ; GFX8-LABEL: s_fshr_i128:
5849 ; GFX8-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5850 ; GFX8-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5851 ; GFX8-NEXT: s_lshl_b64 s[12:13], s[0:1], 1
5852 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5853 ; GFX8-NEXT: s_lshr_b32 s0, s1, 31
5854 ; GFX8-NEXT: s_mov_b32 s1, 0
5855 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
5856 ; GFX8-NEXT: s_sub_i32 s11, s8, 64
5857 ; GFX8-NEXT: s_sub_i32 s9, 64, s8
5858 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64
5859 ; GFX8-NEXT: s_cselect_b32 s16, 1, 0
5860 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0
5861 ; GFX8-NEXT: s_cselect_b32 s17, 1, 0
5862 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[12:13], s8
5863 ; GFX8-NEXT: s_lshr_b64 s[14:15], s[12:13], s9
5864 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
5865 ; GFX8-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9]
5866 ; GFX8-NEXT: s_lshl_b64 s[12:13], s[12:13], s11
5867 ; GFX8-NEXT: s_cmp_lg_u32 s16, 0
5868 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
5869 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[12:13]
5870 ; GFX8-NEXT: s_cmp_lg_u32 s17, 0
5871 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9]
5872 ; GFX8-NEXT: s_sub_i32 s14, s10, 64
5873 ; GFX8-NEXT: s_sub_i32 s12, 64, s10
5874 ; GFX8-NEXT: s_cmp_lt_u32 s10, 64
5875 ; GFX8-NEXT: s_cselect_b32 s15, 1, 0
5876 ; GFX8-NEXT: s_cmp_eq_u32 s10, 0
5877 ; GFX8-NEXT: s_cselect_b32 s16, 1, 0
5878 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], s10
5879 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
5880 ; GFX8-NEXT: s_lshl_b64 s[12:13], s[6:7], s12
5881 ; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
5882 ; GFX8-NEXT: s_lshr_b64 s[6:7], s[6:7], s14
5883 ; GFX8-NEXT: s_cmp_lg_u32 s15, 0
5884 ; GFX8-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7]
5885 ; GFX8-NEXT: s_cmp_lg_u32 s16, 0
5886 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
5887 ; GFX8-NEXT: s_cmp_lg_u32 s15, 0
5888 ; GFX8-NEXT: s_cselect_b64 s[6:7], s[0:1], 0
5889 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
5890 ; GFX8-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7]
5891 ; GFX8-NEXT: ; return to shader part epilog
5893 ; GFX9-LABEL: s_fshr_i128:
5895 ; GFX9-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5896 ; GFX9-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5897 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[0:1], 1
5898 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5899 ; GFX9-NEXT: s_lshr_b32 s0, s1, 31
5900 ; GFX9-NEXT: s_mov_b32 s1, 0
5901 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
5902 ; GFX9-NEXT: s_sub_i32 s11, s8, 64
5903 ; GFX9-NEXT: s_sub_i32 s9, 64, s8
5904 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64
5905 ; GFX9-NEXT: s_cselect_b32 s16, 1, 0
5906 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0
5907 ; GFX9-NEXT: s_cselect_b32 s17, 1, 0
5908 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[12:13], s8
5909 ; GFX9-NEXT: s_lshr_b64 s[14:15], s[12:13], s9
5910 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
5911 ; GFX9-NEXT: s_or_b64 s[8:9], s[14:15], s[8:9]
5912 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[12:13], s11
5913 ; GFX9-NEXT: s_cmp_lg_u32 s16, 0
5914 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
5915 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[12:13]
5916 ; GFX9-NEXT: s_cmp_lg_u32 s17, 0
5917 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9]
5918 ; GFX9-NEXT: s_sub_i32 s14, s10, 64
5919 ; GFX9-NEXT: s_sub_i32 s12, 64, s10
5920 ; GFX9-NEXT: s_cmp_lt_u32 s10, 64
5921 ; GFX9-NEXT: s_cselect_b32 s15, 1, 0
5922 ; GFX9-NEXT: s_cmp_eq_u32 s10, 0
5923 ; GFX9-NEXT: s_cselect_b32 s16, 1, 0
5924 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], s10
5925 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s10
5926 ; GFX9-NEXT: s_lshl_b64 s[12:13], s[6:7], s12
5927 ; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13]
5928 ; GFX9-NEXT: s_lshr_b64 s[6:7], s[6:7], s14
5929 ; GFX9-NEXT: s_cmp_lg_u32 s15, 0
5930 ; GFX9-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7]
5931 ; GFX9-NEXT: s_cmp_lg_u32 s16, 0
5932 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
5933 ; GFX9-NEXT: s_cmp_lg_u32 s15, 0
5934 ; GFX9-NEXT: s_cselect_b64 s[6:7], s[0:1], 0
5935 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
5936 ; GFX9-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7]
5937 ; GFX9-NEXT: ; return to shader part epilog
5939 ; GFX10-LABEL: s_fshr_i128:
5941 ; GFX10-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5942 ; GFX10-NEXT: s_andn2_b64 s[8:9], 0x7f, s[8:9]
5943 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5944 ; GFX10-NEXT: s_lshr_b32 s12, s1, 31
5945 ; GFX10-NEXT: s_mov_b32 s13, 0
5946 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5947 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[12:13]
5948 ; GFX10-NEXT: s_sub_i32 s11, s8, 64
5949 ; GFX10-NEXT: s_sub_i32 s9, 64, s8
5950 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64
5951 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0
5952 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0
5953 ; GFX10-NEXT: s_cselect_b32 s17, 1, 0
5954 ; GFX10-NEXT: s_lshr_b64 s[12:13], s[0:1], s9
5955 ; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], s8
5956 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
5957 ; GFX10-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15]
5958 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s11
5959 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0
5960 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
5961 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1]
5962 ; GFX10-NEXT: s_cmp_lg_u32 s17, 0
5963 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
5964 ; GFX10-NEXT: s_sub_i32 s14, s10, 64
5965 ; GFX10-NEXT: s_sub_i32 s11, 64, s10
5966 ; GFX10-NEXT: s_cmp_lt_u32 s10, 64
5967 ; GFX10-NEXT: s_cselect_b32 s15, 1, 0
5968 ; GFX10-NEXT: s_cmp_eq_u32 s10, 0
5969 ; GFX10-NEXT: s_cselect_b32 s16, 1, 0
5970 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], s10
5971 ; GFX10-NEXT: s_lshl_b64 s[12:13], s[6:7], s11
5972 ; GFX10-NEXT: s_lshr_b64 s[10:11], s[6:7], s10
5973 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13]
5974 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s14
5975 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0
5976 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7]
5977 ; GFX10-NEXT: s_cmp_lg_u32 s16, 0
5978 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
5979 ; GFX10-NEXT: s_cmp_lg_u32 s15, 0
5980 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[10:11], 0
5981 ; GFX10-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
5982 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
5983 ; GFX10-NEXT: ; return to shader part epilog
5985 ; GFX11-LABEL: s_fshr_i128:
5987 ; GFX11-NEXT: s_and_b64 s[10:11], s[8:9], 0x7f
5988 ; GFX11-NEXT: s_and_not1_b64 s[8:9], 0x7f, s[8:9]
5989 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
5990 ; GFX11-NEXT: s_lshr_b32 s12, s1, 31
5991 ; GFX11-NEXT: s_mov_b32 s13, 0
5992 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
5993 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[12:13]
5994 ; GFX11-NEXT: s_sub_i32 s11, s8, 64
5995 ; GFX11-NEXT: s_sub_i32 s9, 64, s8
5996 ; GFX11-NEXT: s_cmp_lt_u32 s8, 64
5997 ; GFX11-NEXT: s_cselect_b32 s16, 1, 0
5998 ; GFX11-NEXT: s_cmp_eq_u32 s8, 0
5999 ; GFX11-NEXT: s_cselect_b32 s17, 1, 0
6000 ; GFX11-NEXT: s_lshr_b64 s[12:13], s[0:1], s9
6001 ; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], s8
6002 ; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], s8
6003 ; GFX11-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15]
6004 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s11
6005 ; GFX11-NEXT: s_cmp_lg_u32 s16, 0
6006 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
6007 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1]
6008 ; GFX11-NEXT: s_cmp_lg_u32 s17, 0
6009 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6010 ; GFX11-NEXT: s_sub_i32 s14, s10, 64
6011 ; GFX11-NEXT: s_sub_i32 s11, 64, s10
6012 ; GFX11-NEXT: s_cmp_lt_u32 s10, 64
6013 ; GFX11-NEXT: s_cselect_b32 s15, 1, 0
6014 ; GFX11-NEXT: s_cmp_eq_u32 s10, 0
6015 ; GFX11-NEXT: s_cselect_b32 s16, 1, 0
6016 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[4:5], s10
6017 ; GFX11-NEXT: s_lshl_b64 s[12:13], s[6:7], s11
6018 ; GFX11-NEXT: s_lshr_b64 s[10:11], s[6:7], s10
6019 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[12:13]
6020 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s14
6021 ; GFX11-NEXT: s_cmp_lg_u32 s15, 0
6022 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7]
6023 ; GFX11-NEXT: s_cmp_lg_u32 s16, 0
6024 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1]
6025 ; GFX11-NEXT: s_cmp_lg_u32 s15, 0
6026 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[10:11], 0
6027 ; GFX11-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1]
6028 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6029 ; GFX11-NEXT: ; return to shader part epilog
6030 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6034 define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) {
6035 ; GFX6-LABEL: v_fshr_i128:
6037 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6038 ; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v8
6039 ; GFX6-NEXT: v_not_b32_e32 v8, v8
6040 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
6041 ; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v8
6042 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], 1
6043 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6044 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v0
6045 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v15
6046 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[8:9], v0
6047 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], v15
6048 ; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v15
6049 ; GFX6-NEXT: v_lshl_b64 v[12:13], v[8:9], v15
6050 ; GFX6-NEXT: v_or_b32_e32 v10, v0, v10
6051 ; GFX6-NEXT: v_or_b32_e32 v11, v1, v11
6052 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[8:9], v16
6053 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
6054 ; GFX6-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc
6055 ; GFX6-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc
6056 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
6057 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
6058 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15
6059 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc
6060 ; GFX6-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc
6061 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v14
6062 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], v14
6063 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[6:7], v2
6064 ; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14
6065 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
6066 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
6067 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], v15
6068 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[6:7], v14
6069 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
6070 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6071 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6072 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
6073 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6074 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6075 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6076 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6077 ; GFX6-NEXT: v_or_b32_e32 v0, v12, v0
6078 ; GFX6-NEXT: v_or_b32_e32 v1, v13, v1
6079 ; GFX6-NEXT: v_or_b32_e32 v2, v10, v2
6080 ; GFX6-NEXT: v_or_b32_e32 v3, v11, v3
6081 ; GFX6-NEXT: s_setpc_b64 s[30:31]
6083 ; GFX8-LABEL: v_fshr_i128:
6085 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6086 ; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v8
6087 ; GFX8-NEXT: v_not_b32_e32 v8, v8
6088 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6089 ; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v8
6090 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1]
6091 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6092 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v0
6093 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v15
6094 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9]
6095 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3]
6096 ; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v15
6097 ; GFX8-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9]
6098 ; GFX8-NEXT: v_or_b32_e32 v10, v0, v10
6099 ; GFX8-NEXT: v_or_b32_e32 v11, v1, v11
6100 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[8:9]
6101 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
6102 ; GFX8-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc
6103 ; GFX8-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc
6104 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
6105 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
6106 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15
6107 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc
6108 ; GFX8-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc
6109 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v14
6110 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5]
6111 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
6112 ; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14
6113 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
6114 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
6115 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7]
6116 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7]
6117 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
6118 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6119 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6120 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
6121 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6122 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6123 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6124 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6125 ; GFX8-NEXT: v_or_b32_e32 v0, v12, v0
6126 ; GFX8-NEXT: v_or_b32_e32 v1, v13, v1
6127 ; GFX8-NEXT: v_or_b32_e32 v2, v10, v2
6128 ; GFX8-NEXT: v_or_b32_e32 v3, v11, v3
6129 ; GFX8-NEXT: s_setpc_b64 s[30:31]
6131 ; GFX9-LABEL: v_fshr_i128:
6133 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6134 ; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v8
6135 ; GFX9-NEXT: v_not_b32_e32 v8, v8
6136 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6137 ; GFX9-NEXT: v_and_b32_e32 v15, 0x7f, v8
6138 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1]
6139 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6140 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v0
6141 ; GFX9-NEXT: v_sub_u32_e32 v0, 64, v15
6142 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9]
6143 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3]
6144 ; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v15
6145 ; GFX9-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9]
6146 ; GFX9-NEXT: v_or_b32_e32 v10, v0, v10
6147 ; GFX9-NEXT: v_or_b32_e32 v11, v1, v11
6148 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[8:9]
6149 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15
6150 ; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc
6151 ; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc
6152 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
6153 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
6154 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15
6155 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc
6156 ; GFX9-NEXT: v_sub_u32_e32 v2, 64, v14
6157 ; GFX9-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc
6158 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5]
6159 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7]
6160 ; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14
6161 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
6162 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
6163 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7]
6164 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7]
6165 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
6166 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6167 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6168 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
6169 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
6170 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5]
6171 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6172 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6173 ; GFX9-NEXT: v_or_b32_e32 v0, v12, v0
6174 ; GFX9-NEXT: v_or_b32_e32 v1, v13, v1
6175 ; GFX9-NEXT: v_or_b32_e32 v2, v10, v2
6176 ; GFX9-NEXT: v_or_b32_e32 v3, v11, v3
6177 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6179 ; GFX10-LABEL: v_fshr_i128:
6181 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6182 ; GFX10-NEXT: v_not_b32_e32 v9, v8
6183 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6184 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 31, v1
6185 ; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v8
6186 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
6187 ; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v9
6188 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v10
6189 ; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19
6190 ; GFX10-NEXT: v_subrev_nc_u32_e32 v21, 64, v19
6191 ; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18
6192 ; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
6193 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
6194 ; GFX10-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5]
6195 ; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
6196 ; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1]
6197 ; GFX10-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1]
6198 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
6199 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
6200 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19
6201 ; GFX10-NEXT: v_or_b32_e32 v12, v12, v16
6202 ; GFX10-NEXT: v_or_b32_e32 v10, v10, v8
6203 ; GFX10-NEXT: v_or_b32_e32 v11, v11, v9
6204 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7]
6205 ; GFX10-NEXT: v_or_b32_e32 v13, v13, v17
6206 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19
6207 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo
6208 ; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo
6209 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7]
6210 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v12, s4
6211 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18
6212 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v13, s4
6213 ; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo
6214 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo
6215 ; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5
6216 ; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6
6217 ; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6
6218 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, v5, s5
6219 ; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v0, s4
6220 ; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v1, s4
6221 ; GFX10-NEXT: v_or_b32_e32 v0, v14, v4
6222 ; GFX10-NEXT: v_or_b32_e32 v1, v7, v5
6223 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v6
6224 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v8
6225 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6227 ; GFX11-LABEL: v_fshr_i128:
6229 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6230 ; GFX11-NEXT: v_not_b32_e32 v9, v8
6231 ; GFX11-NEXT: v_lshrrev_b32_e32 v10, 31, v1
6232 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
6233 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6234 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
6235 ; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v9
6236 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v10
6237 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
6238 ; GFX11-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1]
6239 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18
6240 ; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v8
6241 ; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18
6242 ; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18
6243 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3]
6244 ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo
6245 ; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19
6246 ; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1]
6247 ; GFX11-NEXT: v_subrev_nc_u32_e32 v21, 64, v19
6248 ; GFX11-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5]
6249 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1]
6250 ; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7]
6251 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19
6252 ; GFX11-NEXT: v_or_b32_e32 v10, v10, v8
6253 ; GFX11-NEXT: v_or_b32_e32 v11, v11, v9
6254 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7]
6255 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19
6256 ; GFX11-NEXT: v_or_b32_e32 v12, v12, v16
6257 ; GFX11-NEXT: v_or_b32_e32 v13, v13, v17
6258 ; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11
6259 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7]
6260 ; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo
6261 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v12, s0
6262 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18
6263 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v9, v13, s0
6264 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
6265 ; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1
6266 ; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2
6267 ; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2
6268 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
6269 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v6, v5, s1
6270 ; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v0, s0
6271 ; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v1, s0
6272 ; GFX11-NEXT: v_or_b32_e32 v0, v14, v4
6273 ; GFX11-NEXT: v_or_b32_e32 v1, v7, v5
6274 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6275 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v6
6276 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v8
6277 ; GFX11-NEXT: s_setpc_b64 s[30:31]
6278 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6282 define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) {
6283 ; GFX6-LABEL: v_fshr_i128_ssv:
6285 ; GFX6-NEXT: v_and_b32_e32 v6, 0x7f, v0
6286 ; GFX6-NEXT: v_not_b32_e32 v0, v0
6287 ; GFX6-NEXT: s_mov_b32 s9, 0
6288 ; GFX6-NEXT: v_and_b32_e32 v7, 0x7f, v0
6289 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6290 ; GFX6-NEXT: s_lshr_b32 s8, s1, 31
6291 ; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1
6292 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
6293 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v7
6294 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[10:11], v0
6295 ; GFX6-NEXT: v_lshl_b64 v[2:3], s[0:1], v7
6296 ; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v7
6297 ; GFX6-NEXT: v_lshl_b64 v[4:5], s[10:11], v7
6298 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
6299 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
6300 ; GFX6-NEXT: v_lshl_b64 v[0:1], s[10:11], v8
6301 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
6302 ; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
6303 ; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
6304 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6305 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6306 ; GFX6-NEXT: v_mov_b32_e32 v2, s0
6307 ; GFX6-NEXT: v_mov_b32_e32 v3, s1
6308 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
6309 ; GFX6-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc
6310 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
6311 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v6
6312 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v6
6313 ; GFX6-NEXT: v_lshl_b64 v[2:3], s[6:7], v2
6314 ; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v6
6315 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
6316 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
6317 ; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v11
6318 ; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v6
6319 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
6320 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6321 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6322 ; GFX6-NEXT: v_mov_b32_e32 v2, s4
6323 ; GFX6-NEXT: v_mov_b32_e32 v3, s5
6324 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
6325 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6326 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6327 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
6328 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
6329 ; GFX6-NEXT: v_or_b32_e32 v0, v8, v0
6330 ; GFX6-NEXT: v_or_b32_e32 v1, v9, v1
6331 ; GFX6-NEXT: v_or_b32_e32 v2, v7, v2
6332 ; GFX6-NEXT: v_or_b32_e32 v3, v10, v3
6333 ; GFX6-NEXT: ; return to shader part epilog
6335 ; GFX8-LABEL: v_fshr_i128_ssv:
6337 ; GFX8-NEXT: v_and_b32_e32 v6, 0x7f, v0
6338 ; GFX8-NEXT: v_not_b32_e32 v0, v0
6339 ; GFX8-NEXT: s_mov_b32 s9, 0
6340 ; GFX8-NEXT: v_and_b32_e32 v7, 0x7f, v0
6341 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6342 ; GFX8-NEXT: s_lshr_b32 s8, s1, 31
6343 ; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1
6344 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
6345 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v7
6346 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[10:11]
6347 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1]
6348 ; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v7
6349 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], v7, s[10:11]
6350 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
6351 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
6352 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, s[10:11]
6353 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
6354 ; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
6355 ; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
6356 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6357 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6358 ; GFX8-NEXT: v_mov_b32_e32 v2, s0
6359 ; GFX8-NEXT: v_mov_b32_e32 v3, s1
6360 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
6361 ; GFX8-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc
6362 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
6363 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v6
6364 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5]
6365 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7]
6366 ; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v6
6367 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
6368 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
6369 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7]
6370 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7]
6371 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
6372 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6373 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6374 ; GFX8-NEXT: v_mov_b32_e32 v2, s4
6375 ; GFX8-NEXT: v_mov_b32_e32 v3, s5
6376 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
6377 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6378 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6379 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
6380 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
6381 ; GFX8-NEXT: v_or_b32_e32 v0, v8, v0
6382 ; GFX8-NEXT: v_or_b32_e32 v1, v9, v1
6383 ; GFX8-NEXT: v_or_b32_e32 v2, v7, v2
6384 ; GFX8-NEXT: v_or_b32_e32 v3, v10, v3
6385 ; GFX8-NEXT: ; return to shader part epilog
6387 ; GFX9-LABEL: v_fshr_i128_ssv:
6389 ; GFX9-NEXT: v_and_b32_e32 v6, 0x7f, v0
6390 ; GFX9-NEXT: v_not_b32_e32 v0, v0
6391 ; GFX9-NEXT: s_mov_b32 s9, 0
6392 ; GFX9-NEXT: v_and_b32_e32 v7, 0x7f, v0
6393 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6394 ; GFX9-NEXT: s_lshr_b32 s8, s1, 31
6395 ; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1
6396 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
6397 ; GFX9-NEXT: v_sub_u32_e32 v0, 64, v7
6398 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[10:11]
6399 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1]
6400 ; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v7
6401 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v7, s[10:11]
6402 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
6403 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
6404 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, s[10:11]
6405 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7
6406 ; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc
6407 ; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc
6408 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6409 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6410 ; GFX9-NEXT: v_mov_b32_e32 v2, s0
6411 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7
6412 ; GFX9-NEXT: v_mov_b32_e32 v3, s1
6413 ; GFX9-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc
6414 ; GFX9-NEXT: v_sub_u32_e32 v2, 64, v6
6415 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc
6416 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5]
6417 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7]
6418 ; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v6
6419 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
6420 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
6421 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7]
6422 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7]
6423 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6
6424 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
6425 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
6426 ; GFX9-NEXT: v_mov_b32_e32 v2, s4
6427 ; GFX9-NEXT: v_mov_b32_e32 v3, s5
6428 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6
6429 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1]
6430 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1]
6431 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc
6432 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc
6433 ; GFX9-NEXT: v_or_b32_e32 v0, v8, v0
6434 ; GFX9-NEXT: v_or_b32_e32 v1, v9, v1
6435 ; GFX9-NEXT: v_or_b32_e32 v2, v7, v2
6436 ; GFX9-NEXT: v_or_b32_e32 v3, v10, v3
6437 ; GFX9-NEXT: ; return to shader part epilog
6439 ; GFX10-LABEL: v_fshr_i128_ssv:
6441 ; GFX10-NEXT: v_not_b32_e32 v1, v0
6442 ; GFX10-NEXT: v_and_b32_e32 v13, 0x7f, v0
6443 ; GFX10-NEXT: s_mov_b32 s9, 0
6444 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6445 ; GFX10-NEXT: s_lshr_b32 s8, s1, 31
6446 ; GFX10-NEXT: v_and_b32_e32 v12, 0x7f, v1
6447 ; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13
6448 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
6449 ; GFX10-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9]
6450 ; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 64, v13
6451 ; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v12
6452 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9]
6453 ; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
6454 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5]
6455 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
6456 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
6457 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12
6458 ; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
6459 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], v12, s[0:1]
6460 ; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13
6461 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v8
6462 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v0
6463 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v1
6464 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7]
6465 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v9
6466 ; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 0, v13
6467 ; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6468 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6469 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7]
6470 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
6471 ; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v12
6472 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0
6473 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6474 ; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo
6475 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1
6476 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, s8, s2
6477 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, s9, s2
6478 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1
6479 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0
6480 ; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0
6481 ; GFX10-NEXT: v_or_b32_e32 v0, v6, v0
6482 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1
6483 ; GFX10-NEXT: v_or_b32_e32 v2, v5, v2
6484 ; GFX10-NEXT: v_or_b32_e32 v3, v7, v3
6485 ; GFX10-NEXT: ; return to shader part epilog
6487 ; GFX11-LABEL: v_fshr_i128_ssv:
6489 ; GFX11-NEXT: v_not_b32_e32 v1, v0
6490 ; GFX11-NEXT: s_lshr_b32 s8, s1, 31
6491 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
6492 ; GFX11-NEXT: s_mov_b32 s9, 0
6493 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6494 ; GFX11-NEXT: v_and_b32_e32 v12, 0x7f, v1
6495 ; GFX11-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9]
6496 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
6497 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], v12, s[0:1]
6498 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12
6499 ; GFX11-NEXT: v_and_b32_e32 v13, 0x7f, v0
6500 ; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v12
6501 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9]
6502 ; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v12
6503 ; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo
6504 ; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13
6505 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1]
6506 ; GFX11-NEXT: v_subrev_nc_u32_e32 v14, 64, v13
6507 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5]
6508 ; GFX11-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1]
6509 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7]
6510 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v13
6511 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v0
6512 ; GFX11-NEXT: v_or_b32_e32 v3, v3, v1
6513 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7]
6514 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v13
6515 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v8
6516 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v9
6517 ; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo
6518 ; GFX11-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo
6519 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7]
6520 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0
6521 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v12
6522 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0
6523 ; GFX11-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo
6524 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6525 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1
6526 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v8, s8, s2
6527 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v10, s9, s2
6528 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1
6529 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0
6530 ; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0
6531 ; GFX11-NEXT: v_or_b32_e32 v0, v6, v0
6532 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6533 ; GFX11-NEXT: v_or_b32_e32 v1, v4, v1
6534 ; GFX11-NEXT: v_or_b32_e32 v2, v5, v2
6535 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
6536 ; GFX11-NEXT: v_or_b32_e32 v3, v7, v3
6537 ; GFX11-NEXT: ; return to shader part epilog
6538 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6539 %cast.result = bitcast i128 %result to <4 x float>
6540 ret <4 x float> %cast.result
6543 define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) {
6544 ; GFX6-LABEL: v_fshr_i128_svs:
6546 ; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6547 ; GFX6-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6548 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6549 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6550 ; GFX6-NEXT: s_lshr_b32 s0, s1, 31
6551 ; GFX6-NEXT: s_mov_b32 s1, 0
6552 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
6553 ; GFX6-NEXT: s_sub_i32 s7, s4, 64
6554 ; GFX6-NEXT: s_sub_i32 s5, 64, s4
6555 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64
6556 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0
6557 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0
6558 ; GFX6-NEXT: s_cselect_b32 s13, 1, 0
6559 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[8:9], s4
6560 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[8:9], s5
6561 ; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], s4
6562 ; GFX6-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
6563 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[8:9], s7
6564 ; GFX6-NEXT: s_cmp_lg_u32 s12, 0
6565 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
6566 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
6567 ; GFX6-NEXT: s_cmp_lg_u32 s13, 0
6568 ; GFX6-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5]
6569 ; GFX6-NEXT: s_sub_i32 s0, s6, 64
6570 ; GFX6-NEXT: s_sub_i32 s1, 64, s6
6571 ; GFX6-NEXT: s_cmp_lt_u32 s6, 64
6572 ; GFX6-NEXT: s_cselect_b32 s7, 1, 0
6573 ; GFX6-NEXT: s_cmp_eq_u32 s6, 0
6574 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s6
6575 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s1
6576 ; GFX6-NEXT: s_cselect_b32 s8, 1, 0
6577 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s6
6578 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s0
6579 ; GFX6-NEXT: s_and_b32 s0, 1, s7
6580 ; GFX6-NEXT: v_or_b32_e32 v4, v4, v6
6581 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v7
6582 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
6583 ; GFX6-NEXT: s_and_b32 s0, 1, s8
6584 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6585 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6586 ; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
6587 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6588 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6589 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6590 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6591 ; GFX6-NEXT: v_or_b32_e32 v0, s2, v0
6592 ; GFX6-NEXT: v_or_b32_e32 v1, s3, v1
6593 ; GFX6-NEXT: v_or_b32_e32 v2, s4, v2
6594 ; GFX6-NEXT: v_or_b32_e32 v3, s5, v3
6595 ; GFX6-NEXT: ; return to shader part epilog
6597 ; GFX8-LABEL: v_fshr_i128_svs:
6599 ; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6600 ; GFX8-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6601 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6602 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6603 ; GFX8-NEXT: s_lshr_b32 s0, s1, 31
6604 ; GFX8-NEXT: s_mov_b32 s1, 0
6605 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
6606 ; GFX8-NEXT: s_sub_i32 s7, s4, 64
6607 ; GFX8-NEXT: s_sub_i32 s5, 64, s4
6608 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64
6609 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0
6610 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0
6611 ; GFX8-NEXT: s_cselect_b32 s13, 1, 0
6612 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[8:9], s4
6613 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[8:9], s5
6614 ; GFX8-NEXT: s_lshl_b64 s[4:5], s[0:1], s4
6615 ; GFX8-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
6616 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[8:9], s7
6617 ; GFX8-NEXT: s_cmp_lg_u32 s12, 0
6618 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
6619 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
6620 ; GFX8-NEXT: s_cmp_lg_u32 s13, 0
6621 ; GFX8-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5]
6622 ; GFX8-NEXT: s_sub_i32 s0, s6, 64
6623 ; GFX8-NEXT: s_sub_i32 s1, 64, s6
6624 ; GFX8-NEXT: s_cmp_lt_u32 s6, 64
6625 ; GFX8-NEXT: s_cselect_b32 s7, 1, 0
6626 ; GFX8-NEXT: s_cmp_eq_u32 s6, 0
6627 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1]
6628 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3]
6629 ; GFX8-NEXT: s_cselect_b32 s8, 1, 0
6630 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], s6, v[2:3]
6631 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3]
6632 ; GFX8-NEXT: s_and_b32 s0, 1, s7
6633 ; GFX8-NEXT: v_or_b32_e32 v4, v4, v6
6634 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v7
6635 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
6636 ; GFX8-NEXT: s_and_b32 s0, 1, s8
6637 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6638 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6639 ; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
6640 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6641 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6642 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6643 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6644 ; GFX8-NEXT: v_or_b32_e32 v0, s2, v0
6645 ; GFX8-NEXT: v_or_b32_e32 v1, s3, v1
6646 ; GFX8-NEXT: v_or_b32_e32 v2, s4, v2
6647 ; GFX8-NEXT: v_or_b32_e32 v3, s5, v3
6648 ; GFX8-NEXT: ; return to shader part epilog
6650 ; GFX9-LABEL: v_fshr_i128_svs:
6652 ; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6653 ; GFX9-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6654 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6655 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6656 ; GFX9-NEXT: s_lshr_b32 s0, s1, 31
6657 ; GFX9-NEXT: s_mov_b32 s1, 0
6658 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
6659 ; GFX9-NEXT: s_sub_i32 s7, s4, 64
6660 ; GFX9-NEXT: s_sub_i32 s5, 64, s4
6661 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64
6662 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0
6663 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0
6664 ; GFX9-NEXT: s_cselect_b32 s13, 1, 0
6665 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[8:9], s4
6666 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[8:9], s5
6667 ; GFX9-NEXT: s_lshl_b64 s[4:5], s[0:1], s4
6668 ; GFX9-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
6669 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[8:9], s7
6670 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0
6671 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
6672 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9]
6673 ; GFX9-NEXT: s_cmp_lg_u32 s13, 0
6674 ; GFX9-NEXT: s_cselect_b64 s[4:5], s[0:1], s[4:5]
6675 ; GFX9-NEXT: s_sub_i32 s0, s6, 64
6676 ; GFX9-NEXT: s_sub_i32 s1, 64, s6
6677 ; GFX9-NEXT: s_cmp_lt_u32 s6, 64
6678 ; GFX9-NEXT: s_cselect_b32 s7, 1, 0
6679 ; GFX9-NEXT: s_cmp_eq_u32 s6, 0
6680 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1]
6681 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3]
6682 ; GFX9-NEXT: s_cselect_b32 s8, 1, 0
6683 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], s6, v[2:3]
6684 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3]
6685 ; GFX9-NEXT: s_and_b32 s0, 1, s7
6686 ; GFX9-NEXT: v_or_b32_e32 v4, v4, v6
6687 ; GFX9-NEXT: v_or_b32_e32 v5, v5, v7
6688 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
6689 ; GFX9-NEXT: s_and_b32 s0, 1, s8
6690 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
6691 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
6692 ; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
6693 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1]
6694 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1]
6695 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc
6696 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc
6697 ; GFX9-NEXT: v_or_b32_e32 v0, s2, v0
6698 ; GFX9-NEXT: v_or_b32_e32 v1, s3, v1
6699 ; GFX9-NEXT: v_or_b32_e32 v2, s4, v2
6700 ; GFX9-NEXT: v_or_b32_e32 v3, s5, v3
6701 ; GFX9-NEXT: ; return to shader part epilog
6703 ; GFX10-LABEL: v_fshr_i128_svs:
6705 ; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6706 ; GFX10-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6707 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6708 ; GFX10-NEXT: s_lshr_b32 s8, s1, 31
6709 ; GFX10-NEXT: s_mov_b32 s9, 0
6710 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
6711 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
6712 ; GFX10-NEXT: s_sub_i32 s7, s4, 64
6713 ; GFX10-NEXT: s_sub_i32 s5, 64, s4
6714 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64
6715 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1]
6716 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0
6717 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0
6718 ; GFX10-NEXT: s_cselect_b32 s13, 1, 0
6719 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], s5
6720 ; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s4
6721 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[0:1], s4
6722 ; GFX10-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
6723 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s7
6724 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0
6725 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], 0
6726 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
6727 ; GFX10-NEXT: s_cmp_lg_u32 s13, 0
6728 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6729 ; GFX10-NEXT: s_sub_i32 s0, 64, s6
6730 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3]
6731 ; GFX10-NEXT: s_sub_i32 s0, s6, 64
6732 ; GFX10-NEXT: s_cmp_lt_u32 s6, 64
6733 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3]
6734 ; GFX10-NEXT: s_cselect_b32 s1, 1, 0
6735 ; GFX10-NEXT: s_cmp_eq_u32 s6, 0
6736 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6
6737 ; GFX10-NEXT: s_cselect_b32 s7, 1, 0
6738 ; GFX10-NEXT: s_and_b32 s0, 1, s1
6739 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7
6740 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
6741 ; GFX10-NEXT: s_and_b32 s0, 1, s7
6742 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], s6, v[2:3]
6743 ; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
6744 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo
6745 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo
6746 ; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo
6747 ; GFX10-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc_lo
6748 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0
6749 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0
6750 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2
6751 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3
6752 ; GFX10-NEXT: v_or_b32_e32 v0, s4, v0
6753 ; GFX10-NEXT: v_or_b32_e32 v1, s5, v1
6754 ; GFX10-NEXT: ; return to shader part epilog
6756 ; GFX11-LABEL: v_fshr_i128_svs:
6758 ; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6759 ; GFX11-NEXT: s_and_not1_b64 s[4:5], 0x7f, s[4:5]
6760 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
6761 ; GFX11-NEXT: s_lshr_b32 s8, s1, 31
6762 ; GFX11-NEXT: s_mov_b32 s9, 0
6763 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
6764 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
6765 ; GFX11-NEXT: s_sub_i32 s7, s4, 64
6766 ; GFX11-NEXT: s_sub_i32 s5, 64, s4
6767 ; GFX11-NEXT: s_cmp_lt_u32 s4, 64
6768 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1]
6769 ; GFX11-NEXT: s_cselect_b32 s12, 1, 0
6770 ; GFX11-NEXT: s_cmp_eq_u32 s4, 0
6771 ; GFX11-NEXT: s_cselect_b32 s13, 1, 0
6772 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[0:1], s5
6773 ; GFX11-NEXT: s_lshl_b64 s[10:11], s[2:3], s4
6774 ; GFX11-NEXT: s_lshl_b64 s[4:5], s[0:1], s4
6775 ; GFX11-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11]
6776 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s7
6777 ; GFX11-NEXT: s_cmp_lg_u32 s12, 0
6778 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], 0
6779 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
6780 ; GFX11-NEXT: s_cmp_lg_u32 s13, 0
6781 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
6782 ; GFX11-NEXT: s_sub_i32 s0, 64, s6
6783 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6784 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3]
6785 ; GFX11-NEXT: s_sub_i32 s0, s6, 64
6786 ; GFX11-NEXT: s_cmp_lt_u32 s6, 64
6787 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3]
6788 ; GFX11-NEXT: s_cselect_b32 s1, 1, 0
6789 ; GFX11-NEXT: s_cmp_eq_u32 s6, 0
6790 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v6
6791 ; GFX11-NEXT: s_cselect_b32 s7, 1, 0
6792 ; GFX11-NEXT: s_and_b32 s0, 1, s1
6793 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7
6794 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0
6795 ; GFX11-NEXT: s_and_b32 s0, 1, s7
6796 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], s6, v[2:3]
6797 ; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
6798 ; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5
6799 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
6800 ; GFX11-NEXT: v_dual_cndmask_b32 v2, 0, v2 :: v_dual_cndmask_b32 v3, 0, v3
6801 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0
6802 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
6803 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0
6804 ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2
6805 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
6806 ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3
6807 ; GFX11-NEXT: v_or_b32_e32 v0, s4, v0
6808 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
6809 ; GFX11-NEXT: v_or_b32_e32 v1, s5, v1
6810 ; GFX11-NEXT: ; return to shader part epilog
6811 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
6812 %cast.result = bitcast i128 %result to <4 x float>
6813 ret <4 x float> %cast.result
6816 define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) {
6817 ; GFX6-LABEL: v_fshr_i128_vss:
6819 ; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6820 ; GFX6-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6821 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
6822 ; GFX6-NEXT: s_sub_i32 s5, s4, 64
6823 ; GFX6-NEXT: s_sub_i32 s7, 64, s4
6824 ; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], 1
6825 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6826 ; GFX6-NEXT: s_cmp_lt_u32 s4, 64
6827 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v0
6828 ; GFX6-NEXT: s_cselect_b32 s8, 1, 0
6829 ; GFX6-NEXT: s_cmp_eq_u32 s4, 0
6830 ; GFX6-NEXT: s_cselect_b32 s9, 1, 0
6831 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], s7
6832 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s4
6833 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], s4
6834 ; GFX6-NEXT: s_and_b32 s4, 1, s8
6835 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
6836 ; GFX6-NEXT: s_and_b32 s4, 1, s9
6837 ; GFX6-NEXT: s_sub_i32 s10, s6, 64
6838 ; GFX6-NEXT: s_sub_i32 s8, 64, s6
6839 ; GFX6-NEXT: v_or_b32_e32 v6, v0, v6
6840 ; GFX6-NEXT: v_or_b32_e32 v7, v1, v7
6841 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[4:5], s5
6842 ; GFX6-NEXT: s_cmp_lt_u32 s6, 64
6843 ; GFX6-NEXT: s_cselect_b32 s11, 1, 0
6844 ; GFX6-NEXT: s_cmp_eq_u32 s6, 0
6845 ; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
6846 ; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc
6847 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
6848 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
6849 ; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
6850 ; GFX6-NEXT: s_cselect_b32 s12, 1, 0
6851 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[2:3], s6
6852 ; GFX6-NEXT: s_lshr_b64 s[6:7], s[0:1], s6
6853 ; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
6854 ; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
6855 ; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
6856 ; GFX6-NEXT: s_cmp_lg_u32 s11, 0
6857 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3]
6858 ; GFX6-NEXT: s_cmp_lg_u32 s12, 0
6859 ; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
6860 ; GFX6-NEXT: s_cmp_lg_u32 s11, 0
6861 ; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
6862 ; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
6863 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[4:5], 0
6864 ; GFX6-NEXT: v_or_b32_e32 v0, s0, v4
6865 ; GFX6-NEXT: v_or_b32_e32 v1, s1, v5
6866 ; GFX6-NEXT: v_or_b32_e32 v2, s2, v2
6867 ; GFX6-NEXT: v_or_b32_e32 v3, s3, v3
6868 ; GFX6-NEXT: ; return to shader part epilog
6870 ; GFX8-LABEL: v_fshr_i128_vss:
6872 ; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6873 ; GFX8-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6874 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6875 ; GFX8-NEXT: s_sub_i32 s5, s4, 64
6876 ; GFX8-NEXT: s_sub_i32 s7, 64, s4
6877 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1]
6878 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6879 ; GFX8-NEXT: s_cmp_lt_u32 s4, 64
6880 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v0
6881 ; GFX8-NEXT: s_cselect_b32 s8, 1, 0
6882 ; GFX8-NEXT: s_cmp_eq_u32 s4, 0
6883 ; GFX8-NEXT: s_cselect_b32 s9, 1, 0
6884 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], s7, v[4:5]
6885 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3]
6886 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5]
6887 ; GFX8-NEXT: s_and_b32 s4, 1, s8
6888 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
6889 ; GFX8-NEXT: s_and_b32 s4, 1, s9
6890 ; GFX8-NEXT: s_sub_i32 s10, s6, 64
6891 ; GFX8-NEXT: s_sub_i32 s8, 64, s6
6892 ; GFX8-NEXT: v_or_b32_e32 v6, v0, v6
6893 ; GFX8-NEXT: v_or_b32_e32 v7, v1, v7
6894 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5]
6895 ; GFX8-NEXT: s_cmp_lt_u32 s6, 64
6896 ; GFX8-NEXT: s_cselect_b32 s11, 1, 0
6897 ; GFX8-NEXT: s_cmp_eq_u32 s6, 0
6898 ; GFX8-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
6899 ; GFX8-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc
6900 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
6901 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
6902 ; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
6903 ; GFX8-NEXT: s_cselect_b32 s12, 1, 0
6904 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[2:3], s6
6905 ; GFX8-NEXT: s_lshr_b64 s[6:7], s[0:1], s6
6906 ; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
6907 ; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
6908 ; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
6909 ; GFX8-NEXT: s_cmp_lg_u32 s11, 0
6910 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3]
6911 ; GFX8-NEXT: s_cmp_lg_u32 s12, 0
6912 ; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
6913 ; GFX8-NEXT: s_cmp_lg_u32 s11, 0
6914 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
6915 ; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
6916 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[4:5], 0
6917 ; GFX8-NEXT: v_or_b32_e32 v0, s0, v4
6918 ; GFX8-NEXT: v_or_b32_e32 v1, s1, v5
6919 ; GFX8-NEXT: v_or_b32_e32 v2, s2, v2
6920 ; GFX8-NEXT: v_or_b32_e32 v3, s3, v3
6921 ; GFX8-NEXT: ; return to shader part epilog
6923 ; GFX9-LABEL: v_fshr_i128_vss:
6925 ; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6926 ; GFX9-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6927 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6928 ; GFX9-NEXT: s_sub_i32 s5, s4, 64
6929 ; GFX9-NEXT: s_sub_i32 s7, 64, s4
6930 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1]
6931 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1
6932 ; GFX9-NEXT: s_cmp_lt_u32 s4, 64
6933 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v0
6934 ; GFX9-NEXT: s_cselect_b32 s8, 1, 0
6935 ; GFX9-NEXT: s_cmp_eq_u32 s4, 0
6936 ; GFX9-NEXT: s_cselect_b32 s9, 1, 0
6937 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], s7, v[4:5]
6938 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3]
6939 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5]
6940 ; GFX9-NEXT: s_and_b32 s4, 1, s8
6941 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
6942 ; GFX9-NEXT: s_and_b32 s4, 1, s9
6943 ; GFX9-NEXT: s_sub_i32 s10, s6, 64
6944 ; GFX9-NEXT: s_sub_i32 s8, 64, s6
6945 ; GFX9-NEXT: v_or_b32_e32 v6, v0, v6
6946 ; GFX9-NEXT: v_or_b32_e32 v7, v1, v7
6947 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5]
6948 ; GFX9-NEXT: s_cmp_lt_u32 s6, 64
6949 ; GFX9-NEXT: s_cselect_b32 s11, 1, 0
6950 ; GFX9-NEXT: s_cmp_eq_u32 s6, 0
6951 ; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc
6952 ; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc
6953 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
6954 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
6955 ; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4
6956 ; GFX9-NEXT: s_cselect_b32 s12, 1, 0
6957 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[2:3], s6
6958 ; GFX9-NEXT: s_lshr_b64 s[6:7], s[0:1], s6
6959 ; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
6960 ; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
6961 ; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
6962 ; GFX9-NEXT: s_cmp_lg_u32 s11, 0
6963 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3]
6964 ; GFX9-NEXT: s_cmp_lg_u32 s12, 0
6965 ; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
6966 ; GFX9-NEXT: s_cmp_lg_u32 s11, 0
6967 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc
6968 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc
6969 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[4:5], 0
6970 ; GFX9-NEXT: v_or_b32_e32 v0, s0, v4
6971 ; GFX9-NEXT: v_or_b32_e32 v1, s1, v5
6972 ; GFX9-NEXT: v_or_b32_e32 v2, s2, v2
6973 ; GFX9-NEXT: v_or_b32_e32 v3, s3, v3
6974 ; GFX9-NEXT: ; return to shader part epilog
6976 ; GFX10-LABEL: v_fshr_i128_vss:
6978 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
6979 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v1
6980 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
6981 ; GFX10-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
6982 ; GFX10-NEXT: s_andn2_b64 s[4:5], 0x7f, s[4:5]
6983 ; GFX10-NEXT: s_sub_i32 s7, 64, s4
6984 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v4
6985 ; GFX10-NEXT: s_sub_i32 s5, s4, 64
6986 ; GFX10-NEXT: s_cmp_lt_u32 s4, 64
6987 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1]
6988 ; GFX10-NEXT: s_cselect_b32 s8, 1, 0
6989 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3]
6990 ; GFX10-NEXT: s_cmp_eq_u32 s4, 0
6991 ; GFX10-NEXT: v_lshlrev_b64 v[8:9], s4, v[0:1]
6992 ; GFX10-NEXT: s_cselect_b32 s9, 1, 0
6993 ; GFX10-NEXT: s_and_b32 s4, 1, s8
6994 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
6995 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
6996 ; GFX10-NEXT: v_or_b32_e32 v4, v4, v6
6997 ; GFX10-NEXT: v_or_b32_e32 v5, v5, v7
6998 ; GFX10-NEXT: s_and_b32 s4, 1, s9
6999 ; GFX10-NEXT: s_sub_i32 s10, s6, 64
7000 ; GFX10-NEXT: s_sub_i32 s7, 64, s6
7001 ; GFX10-NEXT: s_cmp_lt_u32 s6, 64
7002 ; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo
7003 ; GFX10-NEXT: s_cselect_b32 s11, 1, 0
7004 ; GFX10-NEXT: s_cmp_eq_u32 s6, 0
7005 ; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo
7006 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo
7007 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo
7008 ; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
7009 ; GFX10-NEXT: s_cselect_b32 s12, 1, 0
7010 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s6
7011 ; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s7
7012 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[2:3], s6
7013 ; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
7014 ; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
7015 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0
7016 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo
7017 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
7018 ; GFX10-NEXT: s_cmp_lg_u32 s12, 0
7019 ; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo
7020 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
7021 ; GFX10-NEXT: s_cmp_lg_u32 s11, 0
7022 ; GFX10-NEXT: v_or_b32_e32 v0, s0, v6
7023 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
7024 ; GFX10-NEXT: v_or_b32_e32 v1, s1, v7
7025 ; GFX10-NEXT: v_or_b32_e32 v2, s2, v2
7026 ; GFX10-NEXT: v_or_b32_e32 v3, s3, v3
7027 ; GFX10-NEXT: ; return to shader part epilog
7029 ; GFX11-LABEL: v_fshr_i128_vss:
7031 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
7032 ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v1
7033 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
7034 ; GFX11-NEXT: s_and_b64 s[6:7], s[4:5], 0x7f
7035 ; GFX11-NEXT: s_and_not1_b64 s[4:5], 0x7f, s[4:5]
7036 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
7037 ; GFX11-NEXT: s_sub_i32 s7, 64, s4
7038 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v4
7039 ; GFX11-NEXT: s_sub_i32 s5, s4, 64
7040 ; GFX11-NEXT: s_cmp_lt_u32 s4, 64
7041 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1]
7042 ; GFX11-NEXT: s_cselect_b32 s8, 1, 0
7043 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3]
7044 ; GFX11-NEXT: s_cmp_eq_u32 s4, 0
7045 ; GFX11-NEXT: v_lshlrev_b64 v[8:9], s4, v[0:1]
7046 ; GFX11-NEXT: s_cselect_b32 s9, 1, 0
7047 ; GFX11-NEXT: s_and_b32 s4, 1, s8
7048 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1]
7049 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
7050 ; GFX11-NEXT: v_or_b32_e32 v4, v4, v6
7051 ; GFX11-NEXT: v_or_b32_e32 v5, v5, v7
7052 ; GFX11-NEXT: s_and_b32 s4, 1, s9
7053 ; GFX11-NEXT: s_sub_i32 s10, s6, 64
7054 ; GFX11-NEXT: s_sub_i32 s7, 64, s6
7055 ; GFX11-NEXT: s_cmp_lt_u32 s6, 64
7056 ; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9
7057 ; GFX11-NEXT: s_cselect_b32 s11, 1, 0
7058 ; GFX11-NEXT: s_cmp_eq_u32 s6, 0
7059 ; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
7060 ; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4
7061 ; GFX11-NEXT: s_cselect_b32 s12, 1, 0
7062 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[0:1], s6
7063 ; GFX11-NEXT: s_lshl_b64 s[8:9], s[2:3], s7
7064 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[2:3], s6
7065 ; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
7066 ; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
7067 ; GFX11-NEXT: s_cmp_lg_u32 s11, 0
7068 ; GFX11-NEXT: v_dual_cndmask_b32 v2, v0, v2 :: v_dual_cndmask_b32 v3, v1, v3
7069 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
7070 ; GFX11-NEXT: s_cmp_lg_u32 s12, 0
7071 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
7072 ; GFX11-NEXT: s_cmp_lg_u32 s11, 0
7073 ; GFX11-NEXT: v_or_b32_e32 v0, s0, v6
7074 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
7075 ; GFX11-NEXT: v_or_b32_e32 v1, s1, v7
7076 ; GFX11-NEXT: v_or_b32_e32 v2, s2, v2
7077 ; GFX11-NEXT: v_or_b32_e32 v3, s3, v3
7078 ; GFX11-NEXT: ; return to shader part epilog
7079 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt)
7080 %cast.result = bitcast i128 %result to <4 x float>
7081 ret <4 x float> %cast.result
7084 define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
7085 ; GFX6-LABEL: s_fshr_i128_65:
7087 ; GFX6-NEXT: s_mov_b32 s4, 0
7088 ; GFX6-NEXT: s_lshl_b32 s5, s0, 31
7089 ; GFX6-NEXT: s_lshl_b32 s3, s2, 31
7090 ; GFX6-NEXT: s_mov_b32 s2, s4
7091 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
7092 ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1]
7093 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], 1
7094 ; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
7095 ; GFX6-NEXT: ; return to shader part epilog
7097 ; GFX8-LABEL: s_fshr_i128_65:
7099 ; GFX8-NEXT: s_mov_b32 s4, 0
7100 ; GFX8-NEXT: s_lshl_b32 s5, s0, 31
7101 ; GFX8-NEXT: s_lshl_b32 s3, s2, 31
7102 ; GFX8-NEXT: s_mov_b32 s2, s4
7103 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
7104 ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1]
7105 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], 1
7106 ; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
7107 ; GFX8-NEXT: ; return to shader part epilog
7109 ; GFX9-LABEL: s_fshr_i128_65:
7111 ; GFX9-NEXT: s_mov_b32 s4, 0
7112 ; GFX9-NEXT: s_lshl_b32 s5, s0, 31
7113 ; GFX9-NEXT: s_lshl_b32 s3, s2, 31
7114 ; GFX9-NEXT: s_mov_b32 s2, s4
7115 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1
7116 ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1]
7117 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], 1
7118 ; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
7119 ; GFX9-NEXT: ; return to shader part epilog
7121 ; GFX10-LABEL: s_fshr_i128_65:
7123 ; GFX10-NEXT: s_mov_b32 s4, 0
7124 ; GFX10-NEXT: s_lshl_b32 s5, s0, 31
7125 ; GFX10-NEXT: s_lshl_b32 s3, s2, 31
7126 ; GFX10-NEXT: s_mov_b32 s2, s4
7127 ; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
7128 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], 1
7129 ; GFX10-NEXT: s_or_b64 s[0:1], s[4:5], s[6:7]
7130 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7131 ; GFX10-NEXT: ; return to shader part epilog
7133 ; GFX11-LABEL: s_fshr_i128_65:
7135 ; GFX11-NEXT: s_mov_b32 s4, 0
7136 ; GFX11-NEXT: s_lshl_b32 s5, s0, 31
7137 ; GFX11-NEXT: s_lshl_b32 s3, s2, 31
7138 ; GFX11-NEXT: s_mov_b32 s2, s4
7139 ; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], 1
7140 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[0:1], 1
7141 ; GFX11-NEXT: s_or_b64 s[0:1], s[4:5], s[6:7]
7142 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7143 ; GFX11-NEXT: ; return to shader part epilog
7144 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
7148 define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) {
7149 ; GFX6-LABEL: v_fshr_i128_65:
7151 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7152 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 31, v0
7153 ; GFX6-NEXT: v_lshlrev_b32_e32 v5, 31, v2
7154 ; GFX6-NEXT: v_lshr_b64 v[2:3], v[0:1], 1
7155 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], 1
7156 ; GFX6-NEXT: v_or_b32_e32 v3, v5, v3
7157 ; GFX6-NEXT: v_or_b32_e32 v1, v4, v1
7158 ; GFX6-NEXT: s_setpc_b64 s[30:31]
7160 ; GFX8-LABEL: v_fshr_i128_65:
7162 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7163 ; GFX8-NEXT: v_lshlrev_b32_e32 v4, 31, v0
7164 ; GFX8-NEXT: v_lshlrev_b32_e32 v5, 31, v2
7165 ; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1]
7166 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[6:7]
7167 ; GFX8-NEXT: v_or_b32_e32 v3, v5, v3
7168 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1
7169 ; GFX8-NEXT: s_setpc_b64 s[30:31]
7171 ; GFX9-LABEL: v_fshr_i128_65:
7173 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7174 ; GFX9-NEXT: v_mov_b32_e32 v8, v2
7175 ; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1]
7176 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7]
7177 ; GFX9-NEXT: v_lshl_or_b32 v3, v8, 31, v3
7178 ; GFX9-NEXT: v_lshl_or_b32 v1, v0, 31, v5
7179 ; GFX9-NEXT: v_mov_b32_e32 v0, v4
7180 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7182 ; GFX10-LABEL: v_fshr_i128_65:
7184 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7185 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
7186 ; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7]
7187 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1]
7188 ; GFX10-NEXT: v_lshl_or_b32 v1, v0, 31, v5
7189 ; GFX10-NEXT: v_lshl_or_b32 v3, v8, 31, v3
7190 ; GFX10-NEXT: v_mov_b32_e32 v0, v4
7191 ; GFX10-NEXT: s_setpc_b64 s[30:31]
7193 ; GFX11-LABEL: v_fshr_i128_65:
7195 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7196 ; GFX11-NEXT: v_mov_b32_e32 v8, v2
7197 ; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7]
7198 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1]
7199 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
7200 ; GFX11-NEXT: v_lshl_or_b32 v1, v0, 31, v5
7201 ; GFX11-NEXT: v_lshl_or_b32 v3, v8, 31, v3
7202 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
7203 ; GFX11-NEXT: v_mov_b32_e32 v0, v4
7204 ; GFX11-NEXT: s_setpc_b64 s[30:31]
7205 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65)
7209 define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) {
7210 ; GFX6-LABEL: s_fshr_v2i128:
7212 ; GFX6-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7213 ; GFX6-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7214 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
7215 ; GFX6-NEXT: s_lshr_b32 s24, s1, 31
7216 ; GFX6-NEXT: s_mov_b32 s25, 0
7217 ; GFX6-NEXT: s_lshl_b64 s[22:23], s[0:1], 1
7218 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[24:25]
7219 ; GFX6-NEXT: s_sub_i32 s19, s16, 64
7220 ; GFX6-NEXT: s_sub_i32 s17, 64, s16
7221 ; GFX6-NEXT: s_cmp_lt_u32 s16, 64
7222 ; GFX6-NEXT: s_cselect_b32 s24, 1, 0
7223 ; GFX6-NEXT: s_cmp_eq_u32 s16, 0
7224 ; GFX6-NEXT: s_cselect_b32 s28, 1, 0
7225 ; GFX6-NEXT: s_lshl_b64 s[2:3], s[22:23], s16
7226 ; GFX6-NEXT: s_lshr_b64 s[26:27], s[22:23], s17
7227 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[0:1], s16
7228 ; GFX6-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17]
7229 ; GFX6-NEXT: s_lshl_b64 s[22:23], s[22:23], s19
7230 ; GFX6-NEXT: s_cmp_lg_u32 s24, 0
7231 ; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
7232 ; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], s[22:23]
7233 ; GFX6-NEXT: s_cmp_lg_u32 s28, 0
7234 ; GFX6-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17]
7235 ; GFX6-NEXT: s_sub_i32 s24, s18, 64
7236 ; GFX6-NEXT: s_sub_i32 s22, 64, s18
7237 ; GFX6-NEXT: s_cmp_lt_u32 s18, 64
7238 ; GFX6-NEXT: s_cselect_b32 s26, 1, 0
7239 ; GFX6-NEXT: s_cmp_eq_u32 s18, 0
7240 ; GFX6-NEXT: s_cselect_b32 s27, 1, 0
7241 ; GFX6-NEXT: s_lshr_b64 s[0:1], s[10:11], s18
7242 ; GFX6-NEXT: s_lshr_b64 s[18:19], s[8:9], s18
7243 ; GFX6-NEXT: s_lshl_b64 s[22:23], s[10:11], s22
7244 ; GFX6-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23]
7245 ; GFX6-NEXT: s_lshr_b64 s[10:11], s[10:11], s24
7246 ; GFX6-NEXT: s_cmp_lg_u32 s26, 0
7247 ; GFX6-NEXT: s_cselect_b64 s[10:11], s[18:19], s[10:11]
7248 ; GFX6-NEXT: s_cmp_lg_u32 s27, 0
7249 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11]
7250 ; GFX6-NEXT: s_cmp_lg_u32 s26, 0
7251 ; GFX6-NEXT: s_cselect_b64 s[10:11], s[0:1], 0
7252 ; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
7253 ; GFX6-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11]
7254 ; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7255 ; GFX6-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7256 ; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
7257 ; GFX6-NEXT: s_lshr_b32 s24, s5, 31
7258 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], 1
7259 ; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[24:25]
7260 ; GFX6-NEXT: s_sub_i32 s9, s10, 64
7261 ; GFX6-NEXT: s_sub_i32 s11, 64, s10
7262 ; GFX6-NEXT: s_cmp_lt_u32 s10, 64
7263 ; GFX6-NEXT: s_cselect_b32 s20, 1, 0
7264 ; GFX6-NEXT: s_cmp_eq_u32 s10, 0
7265 ; GFX6-NEXT: s_cselect_b32 s21, 1, 0
7266 ; GFX6-NEXT: s_lshl_b64 s[6:7], s[16:17], s10
7267 ; GFX6-NEXT: s_lshr_b64 s[18:19], s[16:17], s11
7268 ; GFX6-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
7269 ; GFX6-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11]
7270 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[16:17], s9
7271 ; GFX6-NEXT: s_cmp_lg_u32 s20, 0
7272 ; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
7273 ; GFX6-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17]
7274 ; GFX6-NEXT: s_cmp_lg_u32 s21, 0
7275 ; GFX6-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11]
7276 ; GFX6-NEXT: s_sub_i32 s18, s8, 64
7277 ; GFX6-NEXT: s_sub_i32 s16, 64, s8
7278 ; GFX6-NEXT: s_cmp_lt_u32 s8, 64
7279 ; GFX6-NEXT: s_cselect_b32 s19, 1, 0
7280 ; GFX6-NEXT: s_cmp_eq_u32 s8, 0
7281 ; GFX6-NEXT: s_cselect_b32 s20, 1, 0
7282 ; GFX6-NEXT: s_lshr_b64 s[4:5], s[14:15], s8
7283 ; GFX6-NEXT: s_lshr_b64 s[8:9], s[12:13], s8
7284 ; GFX6-NEXT: s_lshl_b64 s[16:17], s[14:15], s16
7285 ; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17]
7286 ; GFX6-NEXT: s_lshr_b64 s[14:15], s[14:15], s18
7287 ; GFX6-NEXT: s_cmp_lg_u32 s19, 0
7288 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15]
7289 ; GFX6-NEXT: s_cmp_lg_u32 s20, 0
7290 ; GFX6-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9]
7291 ; GFX6-NEXT: s_cmp_lg_u32 s19, 0
7292 ; GFX6-NEXT: s_cselect_b64 s[12:13], s[4:5], 0
7293 ; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9]
7294 ; GFX6-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13]
7295 ; GFX6-NEXT: ; return to shader part epilog
7297 ; GFX8-LABEL: s_fshr_v2i128:
7299 ; GFX8-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7300 ; GFX8-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7301 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
7302 ; GFX8-NEXT: s_lshr_b32 s24, s1, 31
7303 ; GFX8-NEXT: s_mov_b32 s25, 0
7304 ; GFX8-NEXT: s_lshl_b64 s[22:23], s[0:1], 1
7305 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[24:25]
7306 ; GFX8-NEXT: s_sub_i32 s19, s16, 64
7307 ; GFX8-NEXT: s_sub_i32 s17, 64, s16
7308 ; GFX8-NEXT: s_cmp_lt_u32 s16, 64
7309 ; GFX8-NEXT: s_cselect_b32 s24, 1, 0
7310 ; GFX8-NEXT: s_cmp_eq_u32 s16, 0
7311 ; GFX8-NEXT: s_cselect_b32 s28, 1, 0
7312 ; GFX8-NEXT: s_lshl_b64 s[2:3], s[22:23], s16
7313 ; GFX8-NEXT: s_lshr_b64 s[26:27], s[22:23], s17
7314 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[0:1], s16
7315 ; GFX8-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17]
7316 ; GFX8-NEXT: s_lshl_b64 s[22:23], s[22:23], s19
7317 ; GFX8-NEXT: s_cmp_lg_u32 s24, 0
7318 ; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
7319 ; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], s[22:23]
7320 ; GFX8-NEXT: s_cmp_lg_u32 s28, 0
7321 ; GFX8-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17]
7322 ; GFX8-NEXT: s_sub_i32 s24, s18, 64
7323 ; GFX8-NEXT: s_sub_i32 s22, 64, s18
7324 ; GFX8-NEXT: s_cmp_lt_u32 s18, 64
7325 ; GFX8-NEXT: s_cselect_b32 s26, 1, 0
7326 ; GFX8-NEXT: s_cmp_eq_u32 s18, 0
7327 ; GFX8-NEXT: s_cselect_b32 s27, 1, 0
7328 ; GFX8-NEXT: s_lshr_b64 s[0:1], s[10:11], s18
7329 ; GFX8-NEXT: s_lshr_b64 s[18:19], s[8:9], s18
7330 ; GFX8-NEXT: s_lshl_b64 s[22:23], s[10:11], s22
7331 ; GFX8-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23]
7332 ; GFX8-NEXT: s_lshr_b64 s[10:11], s[10:11], s24
7333 ; GFX8-NEXT: s_cmp_lg_u32 s26, 0
7334 ; GFX8-NEXT: s_cselect_b64 s[10:11], s[18:19], s[10:11]
7335 ; GFX8-NEXT: s_cmp_lg_u32 s27, 0
7336 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11]
7337 ; GFX8-NEXT: s_cmp_lg_u32 s26, 0
7338 ; GFX8-NEXT: s_cselect_b64 s[10:11], s[0:1], 0
7339 ; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
7340 ; GFX8-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11]
7341 ; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7342 ; GFX8-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7343 ; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
7344 ; GFX8-NEXT: s_lshr_b32 s24, s5, 31
7345 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], 1
7346 ; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[24:25]
7347 ; GFX8-NEXT: s_sub_i32 s9, s10, 64
7348 ; GFX8-NEXT: s_sub_i32 s11, 64, s10
7349 ; GFX8-NEXT: s_cmp_lt_u32 s10, 64
7350 ; GFX8-NEXT: s_cselect_b32 s20, 1, 0
7351 ; GFX8-NEXT: s_cmp_eq_u32 s10, 0
7352 ; GFX8-NEXT: s_cselect_b32 s21, 1, 0
7353 ; GFX8-NEXT: s_lshl_b64 s[6:7], s[16:17], s10
7354 ; GFX8-NEXT: s_lshr_b64 s[18:19], s[16:17], s11
7355 ; GFX8-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
7356 ; GFX8-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11]
7357 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[16:17], s9
7358 ; GFX8-NEXT: s_cmp_lg_u32 s20, 0
7359 ; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
7360 ; GFX8-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17]
7361 ; GFX8-NEXT: s_cmp_lg_u32 s21, 0
7362 ; GFX8-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11]
7363 ; GFX8-NEXT: s_sub_i32 s18, s8, 64
7364 ; GFX8-NEXT: s_sub_i32 s16, 64, s8
7365 ; GFX8-NEXT: s_cmp_lt_u32 s8, 64
7366 ; GFX8-NEXT: s_cselect_b32 s19, 1, 0
7367 ; GFX8-NEXT: s_cmp_eq_u32 s8, 0
7368 ; GFX8-NEXT: s_cselect_b32 s20, 1, 0
7369 ; GFX8-NEXT: s_lshr_b64 s[4:5], s[14:15], s8
7370 ; GFX8-NEXT: s_lshr_b64 s[8:9], s[12:13], s8
7371 ; GFX8-NEXT: s_lshl_b64 s[16:17], s[14:15], s16
7372 ; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17]
7373 ; GFX8-NEXT: s_lshr_b64 s[14:15], s[14:15], s18
7374 ; GFX8-NEXT: s_cmp_lg_u32 s19, 0
7375 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15]
7376 ; GFX8-NEXT: s_cmp_lg_u32 s20, 0
7377 ; GFX8-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9]
7378 ; GFX8-NEXT: s_cmp_lg_u32 s19, 0
7379 ; GFX8-NEXT: s_cselect_b64 s[12:13], s[4:5], 0
7380 ; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9]
7381 ; GFX8-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13]
7382 ; GFX8-NEXT: ; return to shader part epilog
7384 ; GFX9-LABEL: s_fshr_v2i128:
7386 ; GFX9-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7387 ; GFX9-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7388 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
7389 ; GFX9-NEXT: s_lshr_b32 s24, s1, 31
7390 ; GFX9-NEXT: s_mov_b32 s25, 0
7391 ; GFX9-NEXT: s_lshl_b64 s[22:23], s[0:1], 1
7392 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[24:25]
7393 ; GFX9-NEXT: s_sub_i32 s19, s16, 64
7394 ; GFX9-NEXT: s_sub_i32 s17, 64, s16
7395 ; GFX9-NEXT: s_cmp_lt_u32 s16, 64
7396 ; GFX9-NEXT: s_cselect_b32 s24, 1, 0
7397 ; GFX9-NEXT: s_cmp_eq_u32 s16, 0
7398 ; GFX9-NEXT: s_cselect_b32 s28, 1, 0
7399 ; GFX9-NEXT: s_lshl_b64 s[2:3], s[22:23], s16
7400 ; GFX9-NEXT: s_lshr_b64 s[26:27], s[22:23], s17
7401 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[0:1], s16
7402 ; GFX9-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17]
7403 ; GFX9-NEXT: s_lshl_b64 s[22:23], s[22:23], s19
7404 ; GFX9-NEXT: s_cmp_lg_u32 s24, 0
7405 ; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
7406 ; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], s[22:23]
7407 ; GFX9-NEXT: s_cmp_lg_u32 s28, 0
7408 ; GFX9-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17]
7409 ; GFX9-NEXT: s_sub_i32 s24, s18, 64
7410 ; GFX9-NEXT: s_sub_i32 s22, 64, s18
7411 ; GFX9-NEXT: s_cmp_lt_u32 s18, 64
7412 ; GFX9-NEXT: s_cselect_b32 s26, 1, 0
7413 ; GFX9-NEXT: s_cmp_eq_u32 s18, 0
7414 ; GFX9-NEXT: s_cselect_b32 s27, 1, 0
7415 ; GFX9-NEXT: s_lshr_b64 s[0:1], s[10:11], s18
7416 ; GFX9-NEXT: s_lshr_b64 s[18:19], s[8:9], s18
7417 ; GFX9-NEXT: s_lshl_b64 s[22:23], s[10:11], s22
7418 ; GFX9-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23]
7419 ; GFX9-NEXT: s_lshr_b64 s[10:11], s[10:11], s24
7420 ; GFX9-NEXT: s_cmp_lg_u32 s26, 0
7421 ; GFX9-NEXT: s_cselect_b64 s[10:11], s[18:19], s[10:11]
7422 ; GFX9-NEXT: s_cmp_lg_u32 s27, 0
7423 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11]
7424 ; GFX9-NEXT: s_cmp_lg_u32 s26, 0
7425 ; GFX9-NEXT: s_cselect_b64 s[10:11], s[0:1], 0
7426 ; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9]
7427 ; GFX9-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11]
7428 ; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7429 ; GFX9-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7430 ; GFX9-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
7431 ; GFX9-NEXT: s_lshr_b32 s24, s5, 31
7432 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], 1
7433 ; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[24:25]
7434 ; GFX9-NEXT: s_sub_i32 s9, s10, 64
7435 ; GFX9-NEXT: s_sub_i32 s11, 64, s10
7436 ; GFX9-NEXT: s_cmp_lt_u32 s10, 64
7437 ; GFX9-NEXT: s_cselect_b32 s20, 1, 0
7438 ; GFX9-NEXT: s_cmp_eq_u32 s10, 0
7439 ; GFX9-NEXT: s_cselect_b32 s21, 1, 0
7440 ; GFX9-NEXT: s_lshl_b64 s[6:7], s[16:17], s10
7441 ; GFX9-NEXT: s_lshr_b64 s[18:19], s[16:17], s11
7442 ; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
7443 ; GFX9-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11]
7444 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[16:17], s9
7445 ; GFX9-NEXT: s_cmp_lg_u32 s20, 0
7446 ; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0
7447 ; GFX9-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17]
7448 ; GFX9-NEXT: s_cmp_lg_u32 s21, 0
7449 ; GFX9-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11]
7450 ; GFX9-NEXT: s_sub_i32 s18, s8, 64
7451 ; GFX9-NEXT: s_sub_i32 s16, 64, s8
7452 ; GFX9-NEXT: s_cmp_lt_u32 s8, 64
7453 ; GFX9-NEXT: s_cselect_b32 s19, 1, 0
7454 ; GFX9-NEXT: s_cmp_eq_u32 s8, 0
7455 ; GFX9-NEXT: s_cselect_b32 s20, 1, 0
7456 ; GFX9-NEXT: s_lshr_b64 s[4:5], s[14:15], s8
7457 ; GFX9-NEXT: s_lshr_b64 s[8:9], s[12:13], s8
7458 ; GFX9-NEXT: s_lshl_b64 s[16:17], s[14:15], s16
7459 ; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17]
7460 ; GFX9-NEXT: s_lshr_b64 s[14:15], s[14:15], s18
7461 ; GFX9-NEXT: s_cmp_lg_u32 s19, 0
7462 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15]
7463 ; GFX9-NEXT: s_cmp_lg_u32 s20, 0
7464 ; GFX9-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9]
7465 ; GFX9-NEXT: s_cmp_lg_u32 s19, 0
7466 ; GFX9-NEXT: s_cselect_b64 s[12:13], s[4:5], 0
7467 ; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9]
7468 ; GFX9-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13]
7469 ; GFX9-NEXT: ; return to shader part epilog
7471 ; GFX10-LABEL: s_fshr_v2i128:
7473 ; GFX10-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7474 ; GFX10-NEXT: s_andn2_b64 s[16:17], 0x7f, s[16:17]
7475 ; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
7476 ; GFX10-NEXT: s_lshr_b32 s22, s1, 31
7477 ; GFX10-NEXT: s_mov_b32 s23, 0
7478 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
7479 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[22:23]
7480 ; GFX10-NEXT: s_sub_i32 s19, s16, 64
7481 ; GFX10-NEXT: s_sub_i32 s17, 64, s16
7482 ; GFX10-NEXT: s_cmp_lt_u32 s16, 64
7483 ; GFX10-NEXT: s_cselect_b32 s22, 1, 0
7484 ; GFX10-NEXT: s_cmp_eq_u32 s16, 0
7485 ; GFX10-NEXT: s_cselect_b32 s28, 1, 0
7486 ; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s17
7487 ; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], s16
7488 ; GFX10-NEXT: s_lshl_b64 s[16:17], s[0:1], s16
7489 ; GFX10-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27]
7490 ; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s19
7491 ; GFX10-NEXT: s_cmp_lg_u32 s22, 0
7492 ; GFX10-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
7493 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1]
7494 ; GFX10-NEXT: s_cmp_lg_u32 s28, 0
7495 ; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
7496 ; GFX10-NEXT: s_sub_i32 s22, s18, 64
7497 ; GFX10-NEXT: s_sub_i32 s19, 64, s18
7498 ; GFX10-NEXT: s_cmp_lt_u32 s18, 64
7499 ; GFX10-NEXT: s_cselect_b32 s26, 1, 0
7500 ; GFX10-NEXT: s_cmp_eq_u32 s18, 0
7501 ; GFX10-NEXT: s_cselect_b32 s27, 1, 0
7502 ; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], s18
7503 ; GFX10-NEXT: s_lshl_b64 s[24:25], s[10:11], s19
7504 ; GFX10-NEXT: s_lshr_b64 s[18:19], s[10:11], s18
7505 ; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25]
7506 ; GFX10-NEXT: s_lshr_b64 s[10:11], s[10:11], s22
7507 ; GFX10-NEXT: s_cmp_lg_u32 s26, 0
7508 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11]
7509 ; GFX10-NEXT: s_cmp_lg_u32 s27, 0
7510 ; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
7511 ; GFX10-NEXT: s_cmp_lg_u32 s26, 0
7512 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[18:19], 0
7513 ; GFX10-NEXT: s_andn2_b64 s[10:11], 0x7f, s[20:21]
7514 ; GFX10-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
7515 ; GFX10-NEXT: s_lshr_b32 s22, s5, 31
7516 ; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7517 ; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7518 ; GFX10-NEXT: s_or_b64 s[0:1], s[16:17], s[0:1]
7519 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], 1
7520 ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[22:23]
7521 ; GFX10-NEXT: s_sub_i32 s9, s10, 64
7522 ; GFX10-NEXT: s_sub_i32 s11, 64, s10
7523 ; GFX10-NEXT: s_cmp_lt_u32 s10, 64
7524 ; GFX10-NEXT: s_cselect_b32 s20, 1, 0
7525 ; GFX10-NEXT: s_cmp_eq_u32 s10, 0
7526 ; GFX10-NEXT: s_cselect_b32 s21, 1, 0
7527 ; GFX10-NEXT: s_lshr_b64 s[16:17], s[4:5], s11
7528 ; GFX10-NEXT: s_lshl_b64 s[18:19], s[6:7], s10
7529 ; GFX10-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
7530 ; GFX10-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19]
7531 ; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s9
7532 ; GFX10-NEXT: s_cmp_lg_u32 s20, 0
7533 ; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0
7534 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5]
7535 ; GFX10-NEXT: s_cmp_lg_u32 s21, 0
7536 ; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
7537 ; GFX10-NEXT: s_sub_i32 s18, s8, 64
7538 ; GFX10-NEXT: s_sub_i32 s9, 64, s8
7539 ; GFX10-NEXT: s_cmp_lt_u32 s8, 64
7540 ; GFX10-NEXT: s_cselect_b32 s19, 1, 0
7541 ; GFX10-NEXT: s_cmp_eq_u32 s8, 0
7542 ; GFX10-NEXT: s_cselect_b32 s20, 1, 0
7543 ; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], s8
7544 ; GFX10-NEXT: s_lshl_b64 s[16:17], s[14:15], s9
7545 ; GFX10-NEXT: s_lshr_b64 s[8:9], s[14:15], s8
7546 ; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[16:17]
7547 ; GFX10-NEXT: s_lshr_b64 s[14:15], s[14:15], s18
7548 ; GFX10-NEXT: s_cmp_lg_u32 s19, 0
7549 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[14:15]
7550 ; GFX10-NEXT: s_cmp_lg_u32 s20, 0
7551 ; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5]
7552 ; GFX10-NEXT: s_cmp_lg_u32 s19, 0
7553 ; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
7554 ; GFX10-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
7555 ; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
7556 ; GFX10-NEXT: ; return to shader part epilog
7558 ; GFX11-LABEL: s_fshr_v2i128:
7560 ; GFX11-NEXT: s_and_b64 s[18:19], s[16:17], 0x7f
7561 ; GFX11-NEXT: s_and_not1_b64 s[16:17], 0x7f, s[16:17]
7562 ; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1
7563 ; GFX11-NEXT: s_lshr_b32 s22, s1, 31
7564 ; GFX11-NEXT: s_mov_b32 s23, 0
7565 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1
7566 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[22:23]
7567 ; GFX11-NEXT: s_sub_i32 s19, s16, 64
7568 ; GFX11-NEXT: s_sub_i32 s17, 64, s16
7569 ; GFX11-NEXT: s_cmp_lt_u32 s16, 64
7570 ; GFX11-NEXT: s_cselect_b32 s22, 1, 0
7571 ; GFX11-NEXT: s_cmp_eq_u32 s16, 0
7572 ; GFX11-NEXT: s_cselect_b32 s28, 1, 0
7573 ; GFX11-NEXT: s_lshr_b64 s[24:25], s[0:1], s17
7574 ; GFX11-NEXT: s_lshl_b64 s[26:27], s[2:3], s16
7575 ; GFX11-NEXT: s_lshl_b64 s[16:17], s[0:1], s16
7576 ; GFX11-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27]
7577 ; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s19
7578 ; GFX11-NEXT: s_cmp_lg_u32 s22, 0
7579 ; GFX11-NEXT: s_cselect_b64 s[16:17], s[16:17], 0
7580 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1]
7581 ; GFX11-NEXT: s_cmp_lg_u32 s28, 0
7582 ; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1]
7583 ; GFX11-NEXT: s_sub_i32 s22, s18, 64
7584 ; GFX11-NEXT: s_sub_i32 s19, 64, s18
7585 ; GFX11-NEXT: s_cmp_lt_u32 s18, 64
7586 ; GFX11-NEXT: s_cselect_b32 s26, 1, 0
7587 ; GFX11-NEXT: s_cmp_eq_u32 s18, 0
7588 ; GFX11-NEXT: s_cselect_b32 s27, 1, 0
7589 ; GFX11-NEXT: s_lshr_b64 s[0:1], s[8:9], s18
7590 ; GFX11-NEXT: s_lshl_b64 s[24:25], s[10:11], s19
7591 ; GFX11-NEXT: s_lshr_b64 s[18:19], s[10:11], s18
7592 ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25]
7593 ; GFX11-NEXT: s_lshr_b64 s[10:11], s[10:11], s22
7594 ; GFX11-NEXT: s_cmp_lg_u32 s26, 0
7595 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11]
7596 ; GFX11-NEXT: s_cmp_lg_u32 s27, 0
7597 ; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1]
7598 ; GFX11-NEXT: s_cmp_lg_u32 s26, 0
7599 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[18:19], 0
7600 ; GFX11-NEXT: s_and_not1_b64 s[10:11], 0x7f, s[20:21]
7601 ; GFX11-NEXT: s_lshl_b64 s[6:7], s[6:7], 1
7602 ; GFX11-NEXT: s_lshr_b32 s22, s5, 31
7603 ; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
7604 ; GFX11-NEXT: s_and_b64 s[8:9], s[20:21], 0x7f
7605 ; GFX11-NEXT: s_or_b64 s[0:1], s[16:17], s[0:1]
7606 ; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], 1
7607 ; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[22:23]
7608 ; GFX11-NEXT: s_sub_i32 s9, s10, 64
7609 ; GFX11-NEXT: s_sub_i32 s11, 64, s10
7610 ; GFX11-NEXT: s_cmp_lt_u32 s10, 64
7611 ; GFX11-NEXT: s_cselect_b32 s20, 1, 0
7612 ; GFX11-NEXT: s_cmp_eq_u32 s10, 0
7613 ; GFX11-NEXT: s_cselect_b32 s21, 1, 0
7614 ; GFX11-NEXT: s_lshr_b64 s[16:17], s[4:5], s11
7615 ; GFX11-NEXT: s_lshl_b64 s[18:19], s[6:7], s10
7616 ; GFX11-NEXT: s_lshl_b64 s[10:11], s[4:5], s10
7617 ; GFX11-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19]
7618 ; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], s9
7619 ; GFX11-NEXT: s_cmp_lg_u32 s20, 0
7620 ; GFX11-NEXT: s_cselect_b64 s[10:11], s[10:11], 0
7621 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5]
7622 ; GFX11-NEXT: s_cmp_lg_u32 s21, 0
7623 ; GFX11-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5]
7624 ; GFX11-NEXT: s_sub_i32 s18, s8, 64
7625 ; GFX11-NEXT: s_sub_i32 s9, 64, s8
7626 ; GFX11-NEXT: s_cmp_lt_u32 s8, 64
7627 ; GFX11-NEXT: s_cselect_b32 s19, 1, 0
7628 ; GFX11-NEXT: s_cmp_eq_u32 s8, 0
7629 ; GFX11-NEXT: s_cselect_b32 s20, 1, 0
7630 ; GFX11-NEXT: s_lshr_b64 s[4:5], s[12:13], s8
7631 ; GFX11-NEXT: s_lshl_b64 s[16:17], s[14:15], s9
7632 ; GFX11-NEXT: s_lshr_b64 s[8:9], s[14:15], s8
7633 ; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[16:17]
7634 ; GFX11-NEXT: s_lshr_b64 s[14:15], s[14:15], s18
7635 ; GFX11-NEXT: s_cmp_lg_u32 s19, 0
7636 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], s[14:15]
7637 ; GFX11-NEXT: s_cmp_lg_u32 s20, 0
7638 ; GFX11-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5]
7639 ; GFX11-NEXT: s_cmp_lg_u32 s19, 0
7640 ; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0
7641 ; GFX11-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5]
7642 ; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
7643 ; GFX11-NEXT: ; return to shader part epilog
7644 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
7645 ret <2 x i128> %result
7648 define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) {
7649 ; GFX6-LABEL: v_fshr_v2i128:
7651 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7652 ; GFX6-NEXT: v_and_b32_e32 v23, 0x7f, v16
7653 ; GFX6-NEXT: v_not_b32_e32 v16, v16
7654 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
7655 ; GFX6-NEXT: v_and_b32_e32 v24, 0x7f, v16
7656 ; GFX6-NEXT: v_lshl_b64 v[16:17], v[0:1], 1
7657 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1
7658 ; GFX6-NEXT: v_or_b32_e32 v2, v2, v0
7659 ; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v24
7660 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[16:17], v0
7661 ; GFX6-NEXT: v_lshl_b64 v[18:19], v[2:3], v24
7662 ; GFX6-NEXT: v_subrev_i32_e32 v25, vcc, 64, v24
7663 ; GFX6-NEXT: v_lshl_b64 v[21:22], v[16:17], v24
7664 ; GFX6-NEXT: v_or_b32_e32 v18, v0, v18
7665 ; GFX6-NEXT: v_or_b32_e32 v19, v1, v19
7666 ; GFX6-NEXT: v_lshl_b64 v[0:1], v[16:17], v25
7667 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
7668 ; GFX6-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc
7669 ; GFX6-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc
7670 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v18, vcc
7671 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v19, vcc
7672 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
7673 ; GFX6-NEXT: v_cndmask_b32_e32 v18, v0, v2, vcc
7674 ; GFX6-NEXT: v_cndmask_b32_e32 v19, v1, v3, vcc
7675 ; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v23
7676 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[8:9], v23
7677 ; GFX6-NEXT: v_lshl_b64 v[2:3], v[10:11], v2
7678 ; GFX6-NEXT: v_subrev_i32_e32 v24, vcc, 64, v23
7679 ; GFX6-NEXT: v_or_b32_e32 v2, v0, v2
7680 ; GFX6-NEXT: v_or_b32_e32 v3, v1, v3
7681 ; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v24
7682 ; GFX6-NEXT: v_lshr_b64 v[16:17], v[10:11], v23
7683 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
7684 ; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
7685 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23
7686 ; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
7687 ; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
7688 ; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
7689 ; GFX6-NEXT: v_not_b32_e32 v8, v20
7690 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
7691 ; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
7692 ; GFX6-NEXT: v_or_b32_e32 v3, v19, v3
7693 ; GFX6-NEXT: v_and_b32_e32 v19, 0x7f, v8
7694 ; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], 1
7695 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7696 ; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
7697 ; GFX6-NEXT: v_or_b32_e32 v6, v6, v4
7698 ; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v19
7699 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], v4
7700 ; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v19
7701 ; GFX6-NEXT: v_or_b32_e32 v2, v18, v2
7702 ; GFX6-NEXT: v_and_b32_e32 v18, 0x7f, v20
7703 ; GFX6-NEXT: v_subrev_i32_e32 v20, vcc, 64, v19
7704 ; GFX6-NEXT: v_lshl_b64 v[16:17], v[8:9], v19
7705 ; GFX6-NEXT: v_or_b32_e32 v10, v4, v10
7706 ; GFX6-NEXT: v_or_b32_e32 v11, v5, v11
7707 ; GFX6-NEXT: v_lshl_b64 v[4:5], v[8:9], v20
7708 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
7709 ; GFX6-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
7710 ; GFX6-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
7711 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
7712 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
7713 ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v19
7714 ; GFX6-NEXT: v_cndmask_b32_e32 v10, v4, v6, vcc
7715 ; GFX6-NEXT: v_cndmask_b32_e32 v11, v5, v7, vcc
7716 ; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v18
7717 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], v18
7718 ; GFX6-NEXT: v_lshl_b64 v[6:7], v[14:15], v6
7719 ; GFX6-NEXT: v_subrev_i32_e32 v19, vcc, 64, v18
7720 ; GFX6-NEXT: v_or_b32_e32 v6, v4, v6
7721 ; GFX6-NEXT: v_or_b32_e32 v7, v5, v7
7722 ; GFX6-NEXT: v_lshr_b64 v[4:5], v[14:15], v19
7723 ; GFX6-NEXT: v_lshr_b64 v[8:9], v[14:15], v18
7724 ; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
7725 ; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
7726 ; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
7727 ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v18
7728 ; GFX6-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[4:5]
7729 ; GFX6-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[4:5]
7730 ; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
7731 ; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
7732 ; GFX6-NEXT: v_or_b32_e32 v0, v21, v0
7733 ; GFX6-NEXT: v_or_b32_e32 v1, v22, v1
7734 ; GFX6-NEXT: v_or_b32_e32 v4, v16, v4
7735 ; GFX6-NEXT: v_or_b32_e32 v5, v17, v5
7736 ; GFX6-NEXT: v_or_b32_e32 v6, v10, v6
7737 ; GFX6-NEXT: v_or_b32_e32 v7, v11, v7
7738 ; GFX6-NEXT: s_setpc_b64 s[30:31]
7740 ; GFX8-LABEL: v_fshr_v2i128:
7742 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7743 ; GFX8-NEXT: v_and_b32_e32 v23, 0x7f, v16
7744 ; GFX8-NEXT: v_not_b32_e32 v16, v16
7745 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
7746 ; GFX8-NEXT: v_and_b32_e32 v24, 0x7f, v16
7747 ; GFX8-NEXT: v_lshlrev_b64 v[16:17], 1, v[0:1]
7748 ; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1
7749 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v0
7750 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v24
7751 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[16:17]
7752 ; GFX8-NEXT: v_lshlrev_b64 v[18:19], v24, v[2:3]
7753 ; GFX8-NEXT: v_subrev_u32_e32 v25, vcc, 64, v24
7754 ; GFX8-NEXT: v_lshlrev_b64 v[21:22], v24, v[16:17]
7755 ; GFX8-NEXT: v_or_b32_e32 v18, v0, v18
7756 ; GFX8-NEXT: v_or_b32_e32 v19, v1, v19
7757 ; GFX8-NEXT: v_lshlrev_b64 v[0:1], v25, v[16:17]
7758 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
7759 ; GFX8-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc
7760 ; GFX8-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc
7761 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v18, vcc
7762 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v19, vcc
7763 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
7764 ; GFX8-NEXT: v_cndmask_b32_e32 v18, v0, v2, vcc
7765 ; GFX8-NEXT: v_cndmask_b32_e32 v19, v1, v3, vcc
7766 ; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v23
7767 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v23, v[8:9]
7768 ; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11]
7769 ; GFX8-NEXT: v_subrev_u32_e32 v24, vcc, 64, v23
7770 ; GFX8-NEXT: v_or_b32_e32 v2, v0, v2
7771 ; GFX8-NEXT: v_or_b32_e32 v3, v1, v3
7772 ; GFX8-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
7773 ; GFX8-NEXT: v_lshrrev_b64 v[16:17], v23, v[10:11]
7774 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
7775 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
7776 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23
7777 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
7778 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
7779 ; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
7780 ; GFX8-NEXT: v_not_b32_e32 v8, v20
7781 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
7782 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
7783 ; GFX8-NEXT: v_or_b32_e32 v3, v19, v3
7784 ; GFX8-NEXT: v_and_b32_e32 v19, 0x7f, v8
7785 ; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5]
7786 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7787 ; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
7788 ; GFX8-NEXT: v_or_b32_e32 v6, v6, v4
7789 ; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v19
7790 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9]
7791 ; GFX8-NEXT: v_lshlrev_b64 v[10:11], v19, v[6:7]
7792 ; GFX8-NEXT: v_or_b32_e32 v2, v18, v2
7793 ; GFX8-NEXT: v_and_b32_e32 v18, 0x7f, v20
7794 ; GFX8-NEXT: v_subrev_u32_e32 v20, vcc, 64, v19
7795 ; GFX8-NEXT: v_lshlrev_b64 v[16:17], v19, v[8:9]
7796 ; GFX8-NEXT: v_or_b32_e32 v10, v4, v10
7797 ; GFX8-NEXT: v_or_b32_e32 v11, v5, v11
7798 ; GFX8-NEXT: v_lshlrev_b64 v[4:5], v20, v[8:9]
7799 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
7800 ; GFX8-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
7801 ; GFX8-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
7802 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
7803 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
7804 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v19
7805 ; GFX8-NEXT: v_cndmask_b32_e32 v10, v4, v6, vcc
7806 ; GFX8-NEXT: v_cndmask_b32_e32 v11, v5, v7, vcc
7807 ; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v18
7808 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v18, v[12:13]
7809 ; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15]
7810 ; GFX8-NEXT: v_subrev_u32_e32 v19, vcc, 64, v18
7811 ; GFX8-NEXT: v_or_b32_e32 v6, v4, v6
7812 ; GFX8-NEXT: v_or_b32_e32 v7, v5, v7
7813 ; GFX8-NEXT: v_lshrrev_b64 v[4:5], v19, v[14:15]
7814 ; GFX8-NEXT: v_lshrrev_b64 v[8:9], v18, v[14:15]
7815 ; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
7816 ; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
7817 ; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
7818 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v18
7819 ; GFX8-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[4:5]
7820 ; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[4:5]
7821 ; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
7822 ; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
7823 ; GFX8-NEXT: v_or_b32_e32 v0, v21, v0
7824 ; GFX8-NEXT: v_or_b32_e32 v1, v22, v1
7825 ; GFX8-NEXT: v_or_b32_e32 v4, v16, v4
7826 ; GFX8-NEXT: v_or_b32_e32 v5, v17, v5
7827 ; GFX8-NEXT: v_or_b32_e32 v6, v10, v6
7828 ; GFX8-NEXT: v_or_b32_e32 v7, v11, v7
7829 ; GFX8-NEXT: s_setpc_b64 s[30:31]
7831 ; GFX9-LABEL: v_fshr_v2i128:
7833 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7834 ; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v16
7835 ; GFX9-NEXT: v_not_b32_e32 v16, v16
7836 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
7837 ; GFX9-NEXT: v_and_b32_e32 v24, 0x7f, v16
7838 ; GFX9-NEXT: v_lshlrev_b64 v[16:17], 1, v[0:1]
7839 ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1
7840 ; GFX9-NEXT: v_or_b32_e32 v2, v2, v0
7841 ; GFX9-NEXT: v_sub_u32_e32 v0, 64, v24
7842 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[16:17]
7843 ; GFX9-NEXT: v_lshlrev_b64 v[18:19], v24, v[2:3]
7844 ; GFX9-NEXT: v_subrev_u32_e32 v25, 64, v24
7845 ; GFX9-NEXT: v_lshlrev_b64 v[21:22], v24, v[16:17]
7846 ; GFX9-NEXT: v_or_b32_e32 v18, v0, v18
7847 ; GFX9-NEXT: v_or_b32_e32 v19, v1, v19
7848 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], v25, v[16:17]
7849 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
7850 ; GFX9-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc
7851 ; GFX9-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc
7852 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v18, vcc
7853 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v19, vcc
7854 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
7855 ; GFX9-NEXT: v_cndmask_b32_e32 v18, v0, v2, vcc
7856 ; GFX9-NEXT: v_sub_u32_e32 v2, 64, v23
7857 ; GFX9-NEXT: v_cndmask_b32_e32 v19, v1, v3, vcc
7858 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v23, v[8:9]
7859 ; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[10:11]
7860 ; GFX9-NEXT: v_subrev_u32_e32 v24, 64, v23
7861 ; GFX9-NEXT: v_or_b32_e32 v2, v0, v2
7862 ; GFX9-NEXT: v_or_b32_e32 v3, v1, v3
7863 ; GFX9-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11]
7864 ; GFX9-NEXT: v_lshrrev_b64 v[16:17], v23, v[10:11]
7865 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23
7866 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
7867 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23
7868 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
7869 ; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5]
7870 ; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v17, vcc
7871 ; GFX9-NEXT: v_not_b32_e32 v8, v20
7872 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
7873 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5]
7874 ; GFX9-NEXT: v_or_b32_e32 v3, v19, v3
7875 ; GFX9-NEXT: v_and_b32_e32 v19, 0x7f, v8
7876 ; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5]
7877 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5
7878 ; GFX9-NEXT: v_or_b32_e32 v6, v6, v4
7879 ; GFX9-NEXT: v_sub_u32_e32 v4, 64, v19
7880 ; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc
7881 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9]
7882 ; GFX9-NEXT: v_lshlrev_b64 v[10:11], v19, v[6:7]
7883 ; GFX9-NEXT: v_or_b32_e32 v2, v18, v2
7884 ; GFX9-NEXT: v_and_b32_e32 v18, 0x7f, v20
7885 ; GFX9-NEXT: v_subrev_u32_e32 v20, 64, v19
7886 ; GFX9-NEXT: v_lshlrev_b64 v[16:17], v19, v[8:9]
7887 ; GFX9-NEXT: v_or_b32_e32 v10, v4, v10
7888 ; GFX9-NEXT: v_or_b32_e32 v11, v5, v11
7889 ; GFX9-NEXT: v_lshlrev_b64 v[4:5], v20, v[8:9]
7890 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19
7891 ; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
7892 ; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
7893 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
7894 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
7895 ; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v19
7896 ; GFX9-NEXT: v_cndmask_b32_e32 v10, v4, v6, vcc
7897 ; GFX9-NEXT: v_sub_u32_e32 v6, 64, v18
7898 ; GFX9-NEXT: v_cndmask_b32_e32 v11, v5, v7, vcc
7899 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v18, v[12:13]
7900 ; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15]
7901 ; GFX9-NEXT: v_subrev_u32_e32 v19, 64, v18
7902 ; GFX9-NEXT: v_or_b32_e32 v6, v4, v6
7903 ; GFX9-NEXT: v_or_b32_e32 v7, v5, v7
7904 ; GFX9-NEXT: v_lshrrev_b64 v[4:5], v19, v[14:15]
7905 ; GFX9-NEXT: v_lshrrev_b64 v[8:9], v18, v[14:15]
7906 ; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v18
7907 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
7908 ; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
7909 ; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v18
7910 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v12, s[4:5]
7911 ; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[4:5]
7912 ; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc
7913 ; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc
7914 ; GFX9-NEXT: v_or_b32_e32 v0, v21, v0
7915 ; GFX9-NEXT: v_or_b32_e32 v1, v22, v1
7916 ; GFX9-NEXT: v_or_b32_e32 v4, v16, v4
7917 ; GFX9-NEXT: v_or_b32_e32 v5, v17, v5
7918 ; GFX9-NEXT: v_or_b32_e32 v6, v10, v6
7919 ; GFX9-NEXT: v_or_b32_e32 v7, v11, v7
7920 ; GFX9-NEXT: s_setpc_b64 s[30:31]
7922 ; GFX10-LABEL: v_fshr_v2i128:
7924 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7925 ; GFX10-NEXT: v_not_b32_e32 v17, v16
7926 ; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
7927 ; GFX10-NEXT: v_and_b32_e32 v26, 0x7f, v16
7928 ; GFX10-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
7929 ; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v17
7930 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 31, v1
7931 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
7932 ; GFX10-NEXT: v_subrev_nc_u32_e32 v27, 64, v26
7933 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v26
7934 ; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v25
7935 ; GFX10-NEXT: v_or_b32_e32 v2, v2, v17
7936 ; GFX10-NEXT: v_subrev_nc_u32_e32 v19, 64, v25
7937 ; GFX10-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1]
7938 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
7939 ; GFX10-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1]
7940 ; GFX10-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3]
7941 ; GFX10-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1]
7942 ; GFX10-NEXT: v_cndmask_b32_e32 v23, 0, v23, vcc_lo
7943 ; GFX10-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo
7944 ; GFX10-NEXT: v_or_b32_e32 v22, v18, v22
7945 ; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v26
7946 ; GFX10-NEXT: v_or_b32_e32 v21, v17, v21
7947 ; GFX10-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9]
7948 ; GFX10-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo
7949 ; GFX10-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11]
7950 ; GFX10-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo
7951 ; GFX10-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11]
7952 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25
7953 ; GFX10-NEXT: v_or_b32_e32 v16, v16, v18
7954 ; GFX10-NEXT: v_or_b32_e32 v17, v17, v19
7955 ; GFX10-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo
7956 ; GFX10-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo
7957 ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26
7958 ; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s4
7959 ; GFX10-NEXT: v_not_b32_e32 v16, v20
7960 ; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s4
7961 ; GFX10-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11]
7962 ; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo
7963 ; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v16
7964 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 31, v5
7965 ; GFX10-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5]
7966 ; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo
7967 ; GFX10-NEXT: v_or_b32_e32 v0, v23, v0
7968 ; GFX10-NEXT: v_sub_nc_u32_e32 v9, 64, v25
7969 ; GFX10-NEXT: v_or_b32_e32 v6, v6, v8
7970 ; GFX10-NEXT: v_and_b32_e32 v23, 0x7f, v20
7971 ; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s4
7972 ; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, v3, s4
7973 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5]
7974 ; GFX10-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7]
7975 ; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v23
7976 ; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v25
7977 ; GFX10-NEXT: v_or_b32_e32 v2, v18, v2
7978 ; GFX10-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5]
7979 ; GFX10-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13]
7980 ; GFX10-NEXT: v_or_b32_e32 v10, v8, v10
7981 ; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v23
7982 ; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
7983 ; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
7984 ; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
7985 ; GFX10-NEXT: v_or_b32_e32 v5, v9, v11
7986 ; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[14:15]
7987 ; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v23
7988 ; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v16, vcc_lo
7989 ; GFX10-NEXT: v_or_b32_e32 v16, v18, v20
7990 ; GFX10-NEXT: v_or_b32_e32 v18, v19, v21
7991 ; GFX10-NEXT: v_cndmask_b32_e32 v10, v3, v10, vcc_lo
7992 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo
7993 ; GFX10-NEXT: v_lshrrev_b64 v[3:4], v23, v[14:15]
7994 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v16, s4
7995 ; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v23
7996 ; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v25
7997 ; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v18, s4
7998 ; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
7999 ; GFX10-NEXT: v_or_b32_e32 v1, v24, v1
8000 ; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v6, s6
8001 ; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6
8002 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, v12, s5
8003 ; GFX10-NEXT: v_cndmask_b32_e64 v8, v9, v13, s5
8004 ; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4
8005 ; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4
8006 ; GFX10-NEXT: v_or_b32_e32 v3, v22, v26
8007 ; GFX10-NEXT: v_or_b32_e32 v4, v11, v5
8008 ; GFX10-NEXT: v_or_b32_e32 v5, v14, v8
8009 ; GFX10-NEXT: v_or_b32_e32 v6, v6, v9
8010 ; GFX10-NEXT: v_or_b32_e32 v7, v7, v10
8011 ; GFX10-NEXT: s_setpc_b64 s[30:31]
8013 ; GFX11-LABEL: v_fshr_v2i128:
8015 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8016 ; GFX11-NEXT: v_not_b32_e32 v17, v16
8017 ; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
8018 ; GFX11-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7]
8019 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3)
8020 ; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v17
8021 ; GFX11-NEXT: v_lshrrev_b32_e32 v17, 31, v1
8022 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1]
8023 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
8024 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
8025 ; GFX11-NEXT: v_or_b32_e32 v2, v2, v17
8026 ; GFX11-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1]
8027 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
8028 ; GFX11-NEXT: v_dual_cndmask_b32 v23, 0, v23 :: v_dual_and_b32 v26, 0x7f, v16
8029 ; GFX11-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo
8030 ; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v25
8031 ; GFX11-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3]
8032 ; GFX11-NEXT: v_subrev_nc_u32_e32 v19, 64, v25
8033 ; GFX11-NEXT: v_subrev_nc_u32_e32 v27, 64, v26
8034 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v26
8035 ; GFX11-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1]
8036 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
8037 ; GFX11-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1]
8038 ; GFX11-NEXT: v_or_b32_e32 v22, v18, v22
8039 ; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v26
8040 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
8041 ; GFX11-NEXT: v_or_b32_e32 v21, v17, v21
8042 ; GFX11-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9]
8043 ; GFX11-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo
8044 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8045 ; GFX11-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11]
8046 ; GFX11-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo
8047 ; GFX11-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11]
8048 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25
8049 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
8050 ; GFX11-NEXT: v_or_b32_e32 v16, v16, v18
8051 ; GFX11-NEXT: v_or_b32_e32 v17, v17, v19
8052 ; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s0
8053 ; GFX11-NEXT: v_not_b32_e32 v16, v20
8054 ; GFX11-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo
8055 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
8056 ; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s0
8057 ; GFX11-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo
8058 ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26
8059 ; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v16
8060 ; GFX11-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11]
8061 ; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8
8062 ; GFX11-NEXT: v_lshrrev_b32_e32 v8, 31, v5
8063 ; GFX11-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5]
8064 ; GFX11-NEXT: v_sub_nc_u32_e32 v9, 64, v25
8065 ; GFX11-NEXT: v_cndmask_b32_e64 v26, 0, v3, s0
8066 ; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v25
8067 ; GFX11-NEXT: v_or_b32_e32 v6, v6, v8
8068 ; GFX11-NEXT: v_or_b32_e32 v0, v23, v0
8069 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5]
8070 ; GFX11-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5]
8071 ; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5]
8072 ; GFX11-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7]
8073 ; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25
8074 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0
8075 ; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v25
8076 ; GFX11-NEXT: v_or_b32_e32 v1, v24, v1
8077 ; GFX11-NEXT: v_or_b32_e32 v10, v8, v10
8078 ; GFX11-NEXT: v_and_b32_e32 v23, 0x7f, v20
8079 ; GFX11-NEXT: v_or_b32_e32 v2, v18, v2
8080 ; GFX11-NEXT: v_or_b32_e32 v5, v9, v11
8081 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8082 ; GFX11-NEXT: v_dual_cndmask_b32 v11, 0, v16 :: v_dual_cndmask_b32 v10, v3, v10
8083 ; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v23
8084 ; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v23
8085 ; GFX11-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13]
8086 ; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v23
8087 ; GFX11-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo
8088 ; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15]
8089 ; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[14:15]
8090 ; GFX11-NEXT: v_lshrrev_b64 v[3:4], v23, v[14:15]
8091 ; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo
8092 ; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v23
8093 ; GFX11-NEXT: v_cndmask_b32_e64 v6, v10, v6, s2
8094 ; GFX11-NEXT: v_or_b32_e32 v16, v18, v20
8095 ; GFX11-NEXT: v_or_b32_e32 v18, v19, v21
8096 ; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2
8097 ; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v4, s0
8098 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8099 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v16, s0
8100 ; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v18, s0
8101 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
8102 ; GFX11-NEXT: v_or_b32_e32 v7, v7, v10
8103 ; GFX11-NEXT: v_cndmask_b32_e64 v5, v8, v12, s1
8104 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4)
8105 ; GFX11-NEXT: v_cndmask_b32_e64 v8, v9, v13, s1
8106 ; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v3, s0
8107 ; GFX11-NEXT: v_or_b32_e32 v3, v22, v26
8108 ; GFX11-NEXT: v_or_b32_e32 v4, v11, v5
8109 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
8110 ; GFX11-NEXT: v_or_b32_e32 v5, v14, v8
8111 ; GFX11-NEXT: v_or_b32_e32 v6, v6, v9
8112 ; GFX11-NEXT: s_setpc_b64 s[30:31]
8113 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt)
8114 ret <2 x i128> %result
8117 declare i7 @llvm.fshr.i7(i7, i7, i7) #0
8118 declare i8 @llvm.fshr.i8(i8, i8, i8) #0
8119 declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
8120 declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
8122 declare i16 @llvm.fshr.i16(i16, i16, i16) #0
8123 declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
8124 declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
8125 declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
8126 declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
8127 declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
8128 declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
8130 declare i24 @llvm.fshr.i24(i24, i24, i24) #0
8131 declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
8133 declare i32 @llvm.fshr.i32(i32, i32, i32) #0
8134 declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
8135 declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
8136 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
8137 declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
8138 declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
8140 declare i48 @llvm.fshr.i48(i48, i48, i48) #0
8142 declare i64 @llvm.fshr.i64(i64, i64, i64) #0
8143 declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
8145 declare i128 @llvm.fshr.i128(i128, i128, i128) #0
8146 declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
8148 attributes #0 = { nounwind readnone speculatable willreturn }