1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s
5 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
7 define i32 @v_srem_i32(i32 %num, i32 %den) {
8 ; GISEL-LABEL: v_srem_i32:
10 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
12 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
13 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
14 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
15 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
16 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
17 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1
18 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
19 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
20 ; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
21 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
22 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
23 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
24 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
25 ; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3
26 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1
27 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
28 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
29 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
30 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
31 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
32 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
33 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
34 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
35 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
36 ; GISEL-NEXT: s_setpc_b64 s[30:31]
38 ; CGP-LABEL: v_srem_i32:
40 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
42 ; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v1
43 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
44 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3
45 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
46 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v3
47 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1
48 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
49 ; CGP-NEXT: v_rcp_f32_e32 v3, v3
50 ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
51 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
52 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
53 ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
54 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
55 ; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
56 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v1
57 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
58 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
59 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
60 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
61 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
62 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
63 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
64 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
65 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
66 ; CGP-NEXT: s_setpc_b64 s[30:31]
67 %result = srem i32 %num, %den
71 ; FIXME: This is a workaround for not handling uniform VGPR case.
72 declare i32 @llvm.amdgcn.readfirstlane(i32)
74 define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
75 ; GISEL-LABEL: s_srem_i32:
77 ; GISEL-NEXT: s_ashr_i32 s2, s0, 31
78 ; GISEL-NEXT: s_ashr_i32 s3, s1, 31
79 ; GISEL-NEXT: s_add_i32 s0, s0, s2
80 ; GISEL-NEXT: s_add_i32 s1, s1, s3
81 ; GISEL-NEXT: s_xor_b32 s0, s0, s2
82 ; GISEL-NEXT: s_xor_b32 s1, s1, s3
83 ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1
84 ; GISEL-NEXT: s_sub_i32 s3, 0, s1
85 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0
86 ; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
87 ; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
88 ; GISEL-NEXT: v_mul_lo_u32 v1, s3, v0
89 ; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1
90 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
91 ; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0
92 ; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1
93 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
94 ; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
95 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
96 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
97 ; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
98 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
99 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
100 ; GISEL-NEXT: v_xor_b32_e32 v0, s2, v0
101 ; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
102 ; GISEL-NEXT: v_readfirstlane_b32 s0, v0
103 ; GISEL-NEXT: ; return to shader part epilog
105 ; CGP-LABEL: s_srem_i32:
107 ; CGP-NEXT: s_ashr_i32 s2, s0, 31
108 ; CGP-NEXT: s_ashr_i32 s3, s1, 31
109 ; CGP-NEXT: s_add_i32 s0, s0, s2
110 ; CGP-NEXT: s_add_i32 s1, s1, s3
111 ; CGP-NEXT: s_xor_b32 s0, s0, s2
112 ; CGP-NEXT: s_xor_b32 s1, s1, s3
113 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1
114 ; CGP-NEXT: s_sub_i32 s3, 0, s1
115 ; CGP-NEXT: v_rcp_f32_e32 v0, v0
116 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
117 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
118 ; CGP-NEXT: v_mul_lo_u32 v1, s3, v0
119 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
120 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
121 ; CGP-NEXT: v_mul_hi_u32 v0, s0, v0
122 ; CGP-NEXT: v_mul_lo_u32 v0, v0, s1
123 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
124 ; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
125 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
126 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
127 ; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
128 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
129 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
130 ; CGP-NEXT: v_xor_b32_e32 v0, s2, v0
131 ; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
132 ; CGP-NEXT: v_readfirstlane_b32 s0, v0
133 ; CGP-NEXT: ; return to shader part epilog
134 %result = srem i32 %num, %den
135 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
139 define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
140 ; GISEL-LABEL: v_srem_v2i32:
142 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143 ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0
144 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2
145 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
146 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
147 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
148 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
149 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
150 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
151 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
152 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5
153 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
154 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
155 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
156 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
157 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
158 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
159 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
160 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
161 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
162 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
163 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
164 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
165 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5
166 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
167 ; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
168 ; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
169 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
170 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
171 ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
172 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
173 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
174 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
175 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
176 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
177 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
178 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
179 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
180 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
181 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
182 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
183 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
184 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
185 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
186 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
187 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
188 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
189 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
190 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
191 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
192 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
193 ; GISEL-NEXT: s_setpc_b64 s[30:31]
195 ; CGP-LABEL: v_srem_v2i32:
197 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0
199 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v2
200 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
201 ; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
202 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
203 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
204 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
205 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
206 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
207 ; CGP-NEXT: v_xor_b32_e32 v2, v2, v5
208 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
209 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
210 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2
211 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
212 ; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3
213 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
214 ; CGP-NEXT: v_rcp_f32_e32 v5, v5
215 ; CGP-NEXT: v_rcp_f32_e32 v8, v8
216 ; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
217 ; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
218 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
219 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
220 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v5
221 ; CGP-NEXT: v_mul_lo_u32 v9, v9, v8
222 ; CGP-NEXT: v_mul_hi_u32 v7, v5, v7
223 ; CGP-NEXT: v_mul_hi_u32 v9, v8, v9
224 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
225 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9
226 ; CGP-NEXT: v_mul_hi_u32 v5, v0, v5
227 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
228 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v2
229 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v3
230 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
231 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
232 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
233 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
234 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
235 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
236 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
237 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
238 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
239 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
240 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
241 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
242 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
243 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
244 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
245 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
246 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
247 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
248 ; CGP-NEXT: s_setpc_b64 s[30:31]
249 %result = srem <2 x i32> %num, %den
250 ret <2 x i32> %result
253 define i32 @v_srem_i32_pow2k_denom(i32 %num) {
254 ; CHECK-LABEL: v_srem_i32_pow2k_denom:
256 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
258 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
259 ; CHECK-NEXT: v_mov_b32_e32 v3, 0xfffff000
260 ; CHECK-NEXT: v_mov_b32_e32 v4, 0x1000
261 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
262 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
263 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
264 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
265 ; CHECK-NEXT: v_mul_lo_u32 v3, v2, v3
266 ; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
267 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
268 ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
269 ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2
270 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
271 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x1000, v0
272 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
273 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
274 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x1000, v0
275 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
276 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
277 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
278 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
279 ; CHECK-NEXT: s_setpc_b64 s[30:31]
280 %result = srem i32 %num, 4096
284 define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
285 ; GISEL-LABEL: v_srem_v2i32_pow2k_denom:
287 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
289 ; GISEL-NEXT: v_mov_b32_e32 v3, 0x1000
290 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x1000
291 ; GISEL-NEXT: v_mov_b32_e32 v5, 0xfffff000
292 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
293 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
294 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
295 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
296 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
297 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
298 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
299 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
300 ; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
301 ; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
302 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
303 ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
304 ; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
305 ; GISEL-NEXT: v_lshlrev_b32_e32 v5, 12, v5
306 ; GISEL-NEXT: v_lshlrev_b32_e32 v4, 12, v4
307 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
308 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
309 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
310 ; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x1000, v1
311 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
312 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
313 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
314 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
315 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
316 ; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x1000, v1
317 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
318 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
319 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
320 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
321 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
322 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
323 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
324 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
325 ; GISEL-NEXT: s_setpc_b64 s[30:31]
327 ; CGP-LABEL: v_srem_v2i32_pow2k_denom:
329 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
331 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x45800000
332 ; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000
333 ; CGP-NEXT: v_mov_b32_e32 v5, 0x1000
334 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
335 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
336 ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
337 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
338 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
339 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
340 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
341 ; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
342 ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
343 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
344 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
345 ; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
346 ; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4
347 ; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3
348 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
349 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
350 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
351 ; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1
352 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
353 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
354 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
355 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
356 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
357 ; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x1000, v1
358 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
359 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
360 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
361 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
362 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
363 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
364 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
365 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
366 ; CGP-NEXT: s_setpc_b64 s[30:31]
367 %result = srem <2 x i32> %num, <i32 4096, i32 4096>
368 ret <2 x i32> %result
371 define i32 @v_srem_i32_oddk_denom(i32 %num) {
372 ; CHECK-LABEL: v_srem_i32_oddk_denom:
374 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
376 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
377 ; CHECK-NEXT: v_mov_b32_e32 v3, 0xffed2705
378 ; CHECK-NEXT: v_mov_b32_e32 v4, 0x12d8fb
379 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
380 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
381 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
382 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
383 ; CHECK-NEXT: v_mul_lo_u32 v3, v2, v3
384 ; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
385 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
386 ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
387 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, v4
388 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
389 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
390 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
391 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
392 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, 0x12d8fb, v0
393 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
394 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
395 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
396 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
397 ; CHECK-NEXT: s_setpc_b64 s[30:31]
398 %result = srem i32 %num, 1235195
402 define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
403 ; GISEL-LABEL: v_srem_v2i32_oddk_denom:
405 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
407 ; GISEL-NEXT: v_mov_b32_e32 v3, 0x12d8fb
408 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb
409 ; GISEL-NEXT: v_mov_b32_e32 v5, 0xffed2705
410 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
411 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
412 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
413 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
414 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
415 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
416 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
417 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
418 ; GISEL-NEXT: v_mul_lo_u32 v5, v4, v5
419 ; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
420 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
421 ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v4
422 ; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
423 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3
424 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
425 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
426 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
427 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
428 ; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
429 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
430 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
431 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
432 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
433 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3
434 ; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 0x12d8fb, v1
435 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
436 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
437 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
438 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
439 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
440 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
441 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
442 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
443 ; GISEL-NEXT: s_setpc_b64 s[30:31]
445 ; CGP-LABEL: v_srem_v2i32_oddk_denom:
447 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
449 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x4996c7d8
450 ; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705
451 ; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb
452 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
453 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
454 ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
455 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
456 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
457 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
458 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
459 ; CGP-NEXT: v_mul_lo_u32 v4, v3, v4
460 ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
461 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
462 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v3
463 ; CGP-NEXT: v_mul_hi_u32 v3, v1, v3
464 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v5
465 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v5
466 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
467 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
468 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
469 ; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
470 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
471 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
472 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
473 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
474 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v5
475 ; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
476 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
477 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
478 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
479 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
480 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
481 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
482 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
483 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
484 ; CGP-NEXT: s_setpc_b64 s[30:31]
485 %result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
486 ret <2 x i32> %result
489 define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) {
490 ; CHECK-LABEL: v_srem_i32_pow2_shl_denom:
492 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493 ; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1
494 ; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0
495 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1
496 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
497 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
498 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2
499 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3
500 ; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1
501 ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
502 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
503 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
504 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
505 ; CHECK-NEXT: v_mul_lo_u32 v4, v4, v3
506 ; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4
507 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
508 ; CHECK-NEXT: v_mul_hi_u32 v3, v0, v3
509 ; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1
510 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
511 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
512 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
513 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
514 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
515 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
516 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
517 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2
518 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
519 ; CHECK-NEXT: s_setpc_b64 s[30:31]
520 %shl.y = shl i32 4096, %y
521 %r = srem i32 %x, %shl.y
525 define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
526 ; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom:
528 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
529 ; GISEL-NEXT: v_lshl_b32_e32 v2, 0x1000, v2
530 ; GISEL-NEXT: v_lshl_b32_e32 v3, 0x1000, v3
531 ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0
532 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1
533 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v2
534 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
535 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
536 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
537 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
538 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
539 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
540 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
541 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6
542 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
543 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2
544 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
545 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
546 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
547 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
548 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
549 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
550 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
551 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
552 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
553 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
554 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
555 ; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
556 ; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
557 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
558 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
559 ; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6
560 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
561 ; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2
562 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
563 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
564 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
565 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
566 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
567 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
568 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
569 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
570 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
571 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
572 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
573 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
574 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
575 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
576 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
577 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
578 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
579 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
580 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
581 ; GISEL-NEXT: s_setpc_b64 s[30:31]
583 ; CGP-LABEL: v_srem_v2i32_pow2_shl_denom:
585 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
586 ; CGP-NEXT: v_lshl_b32_e32 v2, 0x1000, v2
587 ; CGP-NEXT: v_lshl_b32_e32 v3, 0x1000, v3
588 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0
589 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
590 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v2
591 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
592 ; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
593 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
594 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
595 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
596 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
597 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
598 ; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
599 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
600 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2
601 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
602 ; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3
603 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
604 ; CGP-NEXT: v_rcp_f32_e32 v6, v6
605 ; CGP-NEXT: v_rcp_f32_e32 v8, v8
606 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
607 ; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
608 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
609 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
610 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6
611 ; CGP-NEXT: v_mul_lo_u32 v9, v9, v8
612 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
613 ; CGP-NEXT: v_mul_hi_u32 v9, v8, v9
614 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
615 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9
616 ; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
617 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
618 ; CGP-NEXT: v_mul_lo_u32 v6, v6, v2
619 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v3
620 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
621 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
622 ; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
623 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
624 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
625 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
626 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
627 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
628 ; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
629 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
630 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
631 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
632 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
633 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
634 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
635 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
636 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
637 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
638 ; CGP-NEXT: s_setpc_b64 s[30:31]
639 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
640 %r = srem <2 x i32> %x, %shl.y
644 define i32 @v_srem_i32_24bit(i32 %num, i32 %den) {
645 ; GISEL-LABEL: v_srem_i32_24bit:
647 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
648 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
649 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
650 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
651 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
652 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
653 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
654 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
655 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
656 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1
657 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
658 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
659 ; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
660 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
661 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
662 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
663 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
664 ; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3
665 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1
666 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
667 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
668 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
669 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
670 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
671 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
672 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
673 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
674 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
675 ; GISEL-NEXT: s_setpc_b64 s[30:31]
677 ; CGP-LABEL: v_srem_i32_24bit:
679 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
680 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
681 ; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v1
682 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1
683 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
684 ; CGP-NEXT: v_rcp_f32_e32 v2, v2
685 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
686 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
687 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
688 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
689 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
690 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
691 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v1
692 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
693 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
694 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
695 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
696 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
697 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
698 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
699 ; CGP-NEXT: s_setpc_b64 s[30:31]
700 %num.mask = and i32 %num, 16777215
701 %den.mask = and i32 %den, 16777215
702 %result = srem i32 %num.mask, %den.mask
706 define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
707 ; GISEL-LABEL: v_srem_v2i32_24bit:
709 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
710 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
711 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
712 ; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2
713 ; GISEL-NEXT: v_and_b32_e32 v3, 0xffffff, v3
714 ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0
715 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2
716 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
717 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
718 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
719 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
720 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
721 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
722 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
723 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5
724 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
725 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
726 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
727 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
728 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
729 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
730 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
731 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
732 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
733 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
734 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
735 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
736 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5
737 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
738 ; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
739 ; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
740 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
741 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
742 ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
743 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
744 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
745 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
746 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
747 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
748 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
749 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
750 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
751 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
752 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
753 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
754 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
755 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
756 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
757 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
758 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
759 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
760 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
761 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
762 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
763 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
764 ; GISEL-NEXT: s_setpc_b64 s[30:31]
766 ; CGP-LABEL: v_srem_v2i32_24bit:
768 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
770 ; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v1
771 ; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v2
772 ; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v3
773 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
774 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
775 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
776 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
777 ; CGP-NEXT: v_rcp_f32_e32 v4, v4
778 ; CGP-NEXT: v_rcp_f32_e32 v6, v6
779 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
780 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
781 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
782 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
783 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4
784 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6
785 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5
786 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
787 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
788 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7
789 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4
790 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
791 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v2
792 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v3
793 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
794 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
795 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
796 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
797 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
798 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
799 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
800 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
801 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
802 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
803 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
804 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
805 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
806 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
807 ; CGP-NEXT: s_setpc_b64 s[30:31]
808 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
809 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
810 %result = srem <2 x i32> %num.mask, %den.mask
811 ret <2 x i32> %result