1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s
5 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
7 define i32 @v_srem_i32(i32 %num, i32 %den) {
8 ; GISEL-LABEL: v_srem_i32:
10 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
12 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
13 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
14 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
15 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
16 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
17 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1
18 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
19 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
20 ; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
21 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
22 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
23 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
24 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
25 ; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3
26 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1
27 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
28 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
29 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
30 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
31 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
32 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
33 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
34 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
35 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
36 ; GISEL-NEXT: s_setpc_b64 s[30:31]
38 ; CGP-LABEL: v_srem_i32:
40 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
42 ; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v1
43 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
44 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3
45 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
46 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v3
47 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1
48 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
49 ; CGP-NEXT: v_rcp_f32_e32 v3, v3
50 ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
51 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
52 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
53 ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
54 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
55 ; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
56 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v1
57 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
58 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
59 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
60 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
61 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
62 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
63 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
64 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
65 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
66 ; CGP-NEXT: s_setpc_b64 s[30:31]
67 %result = srem i32 %num, %den
71 ; FIXME: This is a workaround for not handling uniform VGPR case.
72 declare i32 @llvm.amdgcn.readfirstlane(i32)
74 define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
75 ; GISEL-LABEL: s_srem_i32:
77 ; GISEL-NEXT: s_ashr_i32 s2, s0, 31
78 ; GISEL-NEXT: s_ashr_i32 s3, s1, 31
79 ; GISEL-NEXT: s_add_i32 s0, s0, s2
80 ; GISEL-NEXT: s_add_i32 s1, s1, s3
81 ; GISEL-NEXT: s_xor_b32 s0, s0, s2
82 ; GISEL-NEXT: s_xor_b32 s1, s1, s3
83 ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1
84 ; GISEL-NEXT: s_sub_i32 s3, 0, s1
85 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0
86 ; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
87 ; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
88 ; GISEL-NEXT: v_mul_lo_u32 v1, s3, v0
89 ; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1
90 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
91 ; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0
92 ; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1
93 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
94 ; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
95 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
96 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
97 ; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
98 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
99 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
100 ; GISEL-NEXT: v_xor_b32_e32 v0, s2, v0
101 ; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
102 ; GISEL-NEXT: v_readfirstlane_b32 s0, v0
103 ; GISEL-NEXT: ; return to shader part epilog
105 ; CGP-LABEL: s_srem_i32:
107 ; CGP-NEXT: s_ashr_i32 s2, s0, 31
108 ; CGP-NEXT: s_ashr_i32 s3, s1, 31
109 ; CGP-NEXT: s_add_i32 s0, s0, s2
110 ; CGP-NEXT: s_add_i32 s1, s1, s3
111 ; CGP-NEXT: s_xor_b32 s0, s0, s2
112 ; CGP-NEXT: s_xor_b32 s1, s1, s3
113 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1
114 ; CGP-NEXT: s_sub_i32 s3, 0, s1
115 ; CGP-NEXT: v_rcp_f32_e32 v0, v0
116 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
117 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
118 ; CGP-NEXT: v_mul_lo_u32 v1, s3, v0
119 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
120 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
121 ; CGP-NEXT: v_mul_hi_u32 v0, s0, v0
122 ; CGP-NEXT: v_mul_lo_u32 v0, v0, s1
123 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
124 ; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
125 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
126 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
127 ; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
128 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
129 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
130 ; CGP-NEXT: v_xor_b32_e32 v0, s2, v0
131 ; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
132 ; CGP-NEXT: v_readfirstlane_b32 s0, v0
133 ; CGP-NEXT: ; return to shader part epilog
134 %result = srem i32 %num, %den
135 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
139 define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
140 ; GISEL-LABEL: v_srem_v2i32:
142 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143 ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0
144 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2
145 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
146 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
147 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
148 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
149 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
150 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
151 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
152 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5
153 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
154 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
155 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
156 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
157 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
158 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
159 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
160 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
161 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
162 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
163 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
164 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
165 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5
166 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
167 ; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
168 ; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
169 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
170 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
171 ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
172 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
173 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
174 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
175 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
176 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
177 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
178 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
179 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
180 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
181 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
182 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
183 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
184 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
185 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
186 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
187 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
188 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
189 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
190 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
191 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
192 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
193 ; GISEL-NEXT: s_setpc_b64 s[30:31]
195 ; CGP-LABEL: v_srem_v2i32:
197 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0
199 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v2
200 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
201 ; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
202 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
203 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
204 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
205 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
206 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
207 ; CGP-NEXT: v_xor_b32_e32 v2, v2, v5
208 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
209 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
210 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2
211 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
212 ; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3
213 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
214 ; CGP-NEXT: v_rcp_f32_e32 v5, v5
215 ; CGP-NEXT: v_rcp_f32_e32 v8, v8
216 ; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
217 ; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
218 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
219 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
220 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v5
221 ; CGP-NEXT: v_mul_lo_u32 v9, v9, v8
222 ; CGP-NEXT: v_mul_hi_u32 v7, v5, v7
223 ; CGP-NEXT: v_mul_hi_u32 v9, v8, v9
224 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
225 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9
226 ; CGP-NEXT: v_mul_hi_u32 v5, v0, v5
227 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
228 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v2
229 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v3
230 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
231 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
232 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
233 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
234 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
235 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
236 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
237 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
238 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
239 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
240 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
241 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
242 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
243 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
244 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
245 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
246 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
247 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
248 ; CGP-NEXT: s_setpc_b64 s[30:31]
249 %result = srem <2 x i32> %num, %den
250 ret <2 x i32> %result
253 define i32 @v_srem_i32_pow2k_denom(i32 %num) {
254 ; CHECK-LABEL: v_srem_i32_pow2k_denom:
256 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257 ; CHECK-NEXT: s_movk_i32 s4, 0x1000
258 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
259 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
260 ; CHECK-NEXT: v_mov_b32_e32 v3, 0xfffff000
261 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
262 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
263 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
264 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
265 ; CHECK-NEXT: v_mul_lo_u32 v3, v2, v3
266 ; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
267 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
268 ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
269 ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2
270 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
271 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
272 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
273 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
274 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
275 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
276 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
277 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
278 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
279 ; CHECK-NEXT: s_setpc_b64 s[30:31]
280 %result = srem i32 %num, 4096
284 define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
285 ; GISEL-LABEL: v_srem_v2i32_pow2k_denom:
287 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
289 ; GISEL-NEXT: s_movk_i32 s4, 0x1000
290 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, 0x1000
291 ; GISEL-NEXT: v_mov_b32_e32 v4, 0xfffff000
292 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1
293 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
294 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
295 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
296 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
297 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v3
298 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
299 ; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
300 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
301 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
302 ; GISEL-NEXT: v_mul_lo_u32 v7, v6, v4
303 ; GISEL-NEXT: v_mul_lo_u32 v4, v3, v4
304 ; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
305 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
306 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
307 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
308 ; GISEL-NEXT: v_mul_hi_u32 v4, v0, v6
309 ; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3
310 ; GISEL-NEXT: v_lshlrev_b32_e32 v4, 12, v4
311 ; GISEL-NEXT: v_lshlrev_b32_e32 v3, 12, v3
312 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
313 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
314 ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
315 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1
316 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
317 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
318 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
319 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
320 ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
321 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1
322 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
323 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
324 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
325 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
326 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
327 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
328 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
329 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
330 ; GISEL-NEXT: s_setpc_b64 s[30:31]
332 ; CGP-LABEL: v_srem_v2i32_pow2k_denom:
334 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335 ; CGP-NEXT: s_movk_i32 s4, 0x1000
336 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
337 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x45800000
338 ; CGP-NEXT: s_movk_i32 s5, 0xf000
339 ; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000
340 ; CGP-NEXT: v_mov_b32_e32 v5, 0x1000
341 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
342 ; CGP-NEXT: v_rcp_iflag_f32_e32 v7, 0x45800000
343 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
344 ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
345 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
346 ; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
347 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
348 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
349 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
350 ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
351 ; CGP-NEXT: v_mul_lo_u32 v8, v3, s5
352 ; CGP-NEXT: v_mul_lo_u32 v4, v7, v4
353 ; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
354 ; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
355 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
356 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
357 ; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
358 ; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
359 ; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3
360 ; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4
361 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
362 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
363 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
364 ; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1
365 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
366 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
367 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
368 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
369 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
370 ; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1
371 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
372 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
373 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
374 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
375 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
376 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
377 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
378 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
379 ; CGP-NEXT: s_setpc_b64 s[30:31]
380 %result = srem <2 x i32> %num, <i32 4096, i32 4096>
381 ret <2 x i32> %result
384 define i32 @v_srem_i32_oddk_denom(i32 %num) {
385 ; CHECK-LABEL: v_srem_i32_oddk_denom:
387 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
388 ; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
389 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
390 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
391 ; CHECK-NEXT: v_mov_b32_e32 v3, 0xffed2705
392 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
393 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
394 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
395 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
396 ; CHECK-NEXT: v_mul_lo_u32 v3, v2, v3
397 ; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
398 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
399 ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
400 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, s4
401 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
402 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
403 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
404 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
405 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
406 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
407 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
408 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
409 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
410 ; CHECK-NEXT: s_setpc_b64 s[30:31]
411 %result = srem i32 %num, 1235195
415 define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
416 ; GISEL-LABEL: v_srem_v2i32_oddk_denom:
418 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
420 ; GISEL-NEXT: s_mov_b32 s4, 0x12d8fb
421 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, 0x12d8fb
422 ; GISEL-NEXT: v_mov_b32_e32 v4, 0xffed2705
423 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1
424 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
425 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
426 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
427 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
428 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v3
429 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
430 ; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
431 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
432 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
433 ; GISEL-NEXT: v_mul_lo_u32 v7, v6, v4
434 ; GISEL-NEXT: v_mul_lo_u32 v4, v3, v4
435 ; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
436 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
437 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
438 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
439 ; GISEL-NEXT: v_mul_hi_u32 v4, v0, v6
440 ; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3
441 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4
442 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, s4
443 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
444 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
445 ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
446 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1
447 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
448 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
449 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
450 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
451 ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
452 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v1
453 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
454 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
455 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
456 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
457 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
458 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
459 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
460 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
461 ; GISEL-NEXT: s_setpc_b64 s[30:31]
463 ; CGP-LABEL: v_srem_v2i32_oddk_denom:
465 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466 ; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
467 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
468 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x4996c7d8
469 ; CGP-NEXT: s_mov_b32 s5, 0xffed2705
470 ; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705
471 ; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb
472 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
473 ; CGP-NEXT: v_rcp_iflag_f32_e32 v7, 0x4996c7d8
474 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
475 ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
476 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
477 ; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
478 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
479 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
480 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
481 ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
482 ; CGP-NEXT: v_mul_lo_u32 v8, v3, s5
483 ; CGP-NEXT: v_mul_lo_u32 v4, v7, v4
484 ; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
485 ; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
486 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
487 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
488 ; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
489 ; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
490 ; CGP-NEXT: v_mul_lo_u32 v3, v3, s4
491 ; CGP-NEXT: v_mul_lo_u32 v4, v4, s4
492 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
493 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
494 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
495 ; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
496 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
497 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
498 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
499 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
500 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
501 ; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
502 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
503 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
504 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
505 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
506 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
507 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
508 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
509 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
510 ; CGP-NEXT: s_setpc_b64 s[30:31]
511 %result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
512 ret <2 x i32> %result
515 define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) {
516 ; CHECK-LABEL: v_srem_i32_pow2_shl_denom:
518 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519 ; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1
520 ; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0
521 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1
522 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
523 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
524 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2
525 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3
526 ; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1
527 ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
528 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
529 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
530 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
531 ; CHECK-NEXT: v_mul_lo_u32 v4, v4, v3
532 ; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4
533 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
534 ; CHECK-NEXT: v_mul_hi_u32 v3, v0, v3
535 ; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1
536 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
537 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
538 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
539 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
540 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
541 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
542 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
543 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2
544 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
545 ; CHECK-NEXT: s_setpc_b64 s[30:31]
546 %shl.y = shl i32 4096, %y
547 %r = srem i32 %x, %shl.y
551 define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
552 ; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom:
554 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555 ; GISEL-NEXT: v_lshl_b32_e32 v2, 0x1000, v2
556 ; GISEL-NEXT: v_lshl_b32_e32 v3, 0x1000, v3
557 ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0
558 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1
559 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v2
560 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
561 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
562 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
563 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
564 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
565 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
566 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
567 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6
568 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
569 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2
570 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
571 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
572 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
573 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
574 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
575 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
576 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
577 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
578 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
579 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
580 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
581 ; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
582 ; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
583 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
584 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
585 ; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6
586 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
587 ; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2
588 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
589 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
590 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
591 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
592 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
593 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
594 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
595 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
596 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
597 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
598 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
599 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
600 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
601 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
602 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
603 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
604 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
605 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
606 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
607 ; GISEL-NEXT: s_setpc_b64 s[30:31]
609 ; CGP-LABEL: v_srem_v2i32_pow2_shl_denom:
611 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612 ; CGP-NEXT: v_lshl_b32_e32 v2, 0x1000, v2
613 ; CGP-NEXT: v_lshl_b32_e32 v3, 0x1000, v3
614 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0
615 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
616 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v2
617 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
618 ; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
619 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
620 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
621 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
622 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
623 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
624 ; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
625 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
626 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2
627 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
628 ; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3
629 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
630 ; CGP-NEXT: v_rcp_f32_e32 v6, v6
631 ; CGP-NEXT: v_rcp_f32_e32 v8, v8
632 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
633 ; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
634 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
635 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
636 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6
637 ; CGP-NEXT: v_mul_lo_u32 v9, v9, v8
638 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
639 ; CGP-NEXT: v_mul_hi_u32 v9, v8, v9
640 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
641 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9
642 ; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
643 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
644 ; CGP-NEXT: v_mul_lo_u32 v6, v6, v2
645 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v3
646 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
647 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
648 ; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
649 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
650 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
651 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
652 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
653 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
654 ; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
655 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
656 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
657 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
658 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
659 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
660 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
661 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
662 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
663 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
664 ; CGP-NEXT: s_setpc_b64 s[30:31]
665 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
666 %r = srem <2 x i32> %x, %shl.y
670 define i32 @v_srem_i32_24bit(i32 %num, i32 %den) {
671 ; GISEL-LABEL: v_srem_i32_24bit:
673 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
675 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
676 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
677 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
678 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
679 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
680 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
681 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
682 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1
683 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
684 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
685 ; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
686 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
687 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
688 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
689 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
690 ; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3
691 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1
692 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
693 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
694 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
695 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
696 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
697 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
698 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
699 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
700 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
701 ; GISEL-NEXT: s_setpc_b64 s[30:31]
703 ; CGP-LABEL: v_srem_i32_24bit:
705 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
706 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
707 ; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v1
708 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1
709 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
710 ; CGP-NEXT: v_rcp_f32_e32 v2, v2
711 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
712 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
713 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
714 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
715 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
716 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
717 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v1
718 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
719 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
720 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
721 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
722 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
723 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
724 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
725 ; CGP-NEXT: s_setpc_b64 s[30:31]
726 %num.mask = and i32 %num, 16777215
727 %den.mask = and i32 %den, 16777215
728 %result = srem i32 %num.mask, %den.mask
732 define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
733 ; GISEL-LABEL: v_srem_v2i32_24bit:
735 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
737 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
738 ; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2
739 ; GISEL-NEXT: v_and_b32_e32 v3, 0xffffff, v3
740 ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0
741 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2
742 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
743 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
744 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
745 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
746 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
747 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
748 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
749 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5
750 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
751 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
752 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
753 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
754 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
755 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
756 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
757 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
758 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
759 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
760 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
761 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
762 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5
763 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
764 ; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
765 ; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
766 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
767 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
768 ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
769 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
770 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
771 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
772 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
773 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
774 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
775 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
776 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
777 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
778 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
779 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
780 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
781 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
782 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
783 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
784 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
785 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
786 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
787 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
788 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
789 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
790 ; GISEL-NEXT: s_setpc_b64 s[30:31]
792 ; CGP-LABEL: v_srem_v2i32_24bit:
794 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
795 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
796 ; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v1
797 ; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v2
798 ; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v3
799 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
800 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
801 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
802 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
803 ; CGP-NEXT: v_rcp_f32_e32 v4, v4
804 ; CGP-NEXT: v_rcp_f32_e32 v6, v6
805 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
806 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
807 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
808 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
809 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4
810 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6
811 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5
812 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
813 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
814 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7
815 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4
816 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
817 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v2
818 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v3
819 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
820 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
821 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
822 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
823 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
824 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
825 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
826 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
827 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
828 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
829 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
830 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
831 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
832 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
833 ; CGP-NEXT: s_setpc_b64 s[30:31]
834 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
835 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
836 %result = srem <2 x i32> %num.mask, %den.mask
837 ret <2 x i32> %result