1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s
5 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
7 define i32 @v_urem_i32(i32 %num, i32 %den) {
8 ; GISEL-LABEL: v_urem_i32:
10 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
12 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
13 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
14 ; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
15 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
16 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
17 ; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
18 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
19 ; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
20 ; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
21 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
22 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
23 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
24 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
25 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
26 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
27 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
28 ; GISEL-NEXT: s_setpc_b64 s[30:31]
30 ; CGP-LABEL: v_urem_i32:
32 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1
34 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
35 ; CGP-NEXT: v_rcp_f32_e32 v2, v2
36 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
37 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
38 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
39 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
40 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
41 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
42 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v1
43 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
44 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
45 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
46 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
47 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
48 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
49 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
50 ; CGP-NEXT: s_setpc_b64 s[30:31]
51 %result = urem i32 %num, %den
55 ; FIXME: This is a workaround for not handling uniform VGPR case.
56 declare i32 @llvm.amdgcn.readfirstlane(i32)
58 define amdgpu_ps i32 @s_urem_i32(i32 inreg %num, i32 inreg %den) {
59 ; GISEL-LABEL: s_urem_i32:
61 ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1
62 ; GISEL-NEXT: s_sub_i32 s2, 0, s1
63 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0
64 ; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
65 ; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
66 ; GISEL-NEXT: v_mul_lo_u32 v1, s2, v0
67 ; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1
68 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
69 ; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0
70 ; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1
71 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
72 ; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
73 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
74 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
75 ; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
76 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
77 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
78 ; GISEL-NEXT: v_readfirstlane_b32 s0, v0
79 ; GISEL-NEXT: ; return to shader part epilog
81 ; CGP-LABEL: s_urem_i32:
83 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1
84 ; CGP-NEXT: s_sub_i32 s2, 0, s1
85 ; CGP-NEXT: v_rcp_f32_e32 v0, v0
86 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
87 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
88 ; CGP-NEXT: v_mul_lo_u32 v1, s2, v0
89 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
90 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
91 ; CGP-NEXT: v_mul_hi_u32 v0, s0, v0
92 ; CGP-NEXT: v_mul_lo_u32 v0, v0, s1
93 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
94 ; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
95 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
96 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
97 ; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
98 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
99 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
100 ; CGP-NEXT: v_readfirstlane_b32 s0, v0
101 ; CGP-NEXT: ; return to shader part epilog
102 %result = urem i32 %num, %den
103 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
107 define <2 x i32> @v_urem_v2i32(<2 x i32> %num, <2 x i32> %den) {
108 ; GISEL-LABEL: v_urem_v2i32:
110 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
112 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
113 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
114 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
115 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
116 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
117 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
118 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
119 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
120 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
121 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
122 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
123 ; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
124 ; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
125 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
126 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
127 ; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
128 ; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
129 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2
130 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3
131 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
132 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
133 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
134 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
135 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
136 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
137 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
138 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
139 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
140 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
141 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
142 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
143 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
144 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
145 ; GISEL-NEXT: s_setpc_b64 s[30:31]
147 ; CGP-LABEL: v_urem_v2i32:
149 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
151 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
152 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
153 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
154 ; CGP-NEXT: v_rcp_f32_e32 v4, v4
155 ; CGP-NEXT: v_rcp_f32_e32 v6, v6
156 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
157 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
158 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
159 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
160 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4
161 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6
162 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5
163 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
164 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
165 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7
166 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4
167 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
168 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v2
169 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v3
170 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
171 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
172 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
173 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
174 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
175 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
176 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
177 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
178 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
179 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
180 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
181 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
182 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
183 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
184 ; CGP-NEXT: s_setpc_b64 s[30:31]
185 %result = urem <2 x i32> %num, %den
186 ret <2 x i32> %result
189 define i32 @v_urem_i32_pow2k_denom(i32 %num) {
190 ; CHECK-LABEL: v_urem_i32_pow2k_denom:
192 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193 ; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0
194 ; CHECK-NEXT: s_setpc_b64 s[30:31]
195 %result = urem i32 %num, 4096
199 define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) {
200 ; CHECK-LABEL: v_urem_v2i32_pow2k_denom:
202 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203 ; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0
204 ; CHECK-NEXT: v_and_b32_e32 v1, 0xfff, v1
205 ; CHECK-NEXT: s_setpc_b64 s[30:31]
206 %result = urem <2 x i32> %num, <i32 4096, i32 4096>
207 ret <2 x i32> %result
210 define i32 @v_urem_i32_oddk_denom(i32 %num) {
211 ; CHECK-LABEL: v_urem_i32_oddk_denom:
213 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214 ; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
215 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, 0x4996c7d8
216 ; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705
217 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1
218 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
219 ; CHECK-NEXT: v_mul_lo_u32 v2, v1, v2
220 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
221 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
222 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
223 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, s4
224 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
225 ; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
226 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
227 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
228 ; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0
229 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
230 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
231 ; CHECK-NEXT: s_setpc_b64 s[30:31]
232 %result = urem i32 %num, 1235195
236 define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) {
237 ; GISEL-LABEL: v_urem_v2i32_oddk_denom:
239 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240 ; GISEL-NEXT: s_mov_b32 s4, 0x12d8fb
241 ; GISEL-NEXT: v_mov_b32_e32 v2, 0x12d8fb
242 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, 0x12d8fb
243 ; GISEL-NEXT: v_mov_b32_e32 v4, 0xffed2705
244 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
245 ; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
246 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
247 ; GISEL-NEXT: v_mul_lo_u32 v4, v3, v4
248 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
249 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
250 ; GISEL-NEXT: v_mul_hi_u32 v4, v0, v3
251 ; GISEL-NEXT: v_mul_hi_u32 v3, v1, v3
252 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4
253 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
254 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
255 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
256 ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
257 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
258 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
259 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
260 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
261 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
262 ; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
263 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, 0x12d8fb, v1
264 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
265 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
266 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
267 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
268 ; GISEL-NEXT: s_setpc_b64 s[30:31]
270 ; CGP-LABEL: v_urem_v2i32_oddk_denom:
272 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
274 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
275 ; CGP-NEXT: s_mov_b32 s5, 0xffed2705
276 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
277 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
278 ; CGP-NEXT: v_mul_lo_u32 v3, v2, s5
279 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
280 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
281 ; CGP-NEXT: v_mul_hi_u32 v3, v0, v2
282 ; CGP-NEXT: v_mul_hi_u32 v2, v1, v2
283 ; CGP-NEXT: v_mul_lo_u32 v3, v3, s4
284 ; CGP-NEXT: v_mul_lo_u32 v2, v2, s4
285 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
286 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v2
287 ; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
288 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1
289 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
290 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
291 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
292 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
293 ; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
294 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, 0x12d8fb, v1
295 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
296 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
297 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
298 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
299 ; CGP-NEXT: s_setpc_b64 s[30:31]
300 %result = urem <2 x i32> %num, <i32 1235195, i32 1235195>
301 ret <2 x i32> %result
304 define i32 @v_urem_i32_pow2_shl_denom(i32 %x, i32 %y) {
305 ; CHECK-LABEL: v_urem_i32_pow2_shl_denom:
307 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308 ; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1
309 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1
310 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
311 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2
312 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
313 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
314 ; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2
315 ; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
316 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
317 ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
318 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
319 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
320 ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
321 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
322 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
323 ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
324 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
325 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
326 ; CHECK-NEXT: s_setpc_b64 s[30:31]
327 %shl.y = shl i32 4096, %y
328 %r = urem i32 %x, %shl.y
332 define <2 x i32> @v_urem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
333 ; GISEL-LABEL: v_urem_v2i32_pow2_shl_denom:
335 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336 ; GISEL-NEXT: v_lshl_b32_e32 v2, 0x1000, v2
337 ; GISEL-NEXT: v_lshl_b32_e32 v3, 0x1000, v3
338 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
339 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
340 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
341 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
342 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
343 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
344 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
345 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
346 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
347 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
348 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
349 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
350 ; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
351 ; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
352 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
353 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
354 ; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
355 ; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
356 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2
357 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3
358 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
359 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
360 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
361 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
362 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
363 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
364 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
365 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
366 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
367 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
368 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
369 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
370 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
371 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
372 ; GISEL-NEXT: s_setpc_b64 s[30:31]
374 ; CGP-LABEL: v_urem_v2i32_pow2_shl_denom:
376 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377 ; CGP-NEXT: v_lshl_b32_e32 v2, 0x1000, v2
378 ; CGP-NEXT: v_lshl_b32_e32 v3, 0x1000, v3
379 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
380 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
381 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
382 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
383 ; CGP-NEXT: v_rcp_f32_e32 v4, v4
384 ; CGP-NEXT: v_rcp_f32_e32 v6, v6
385 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
386 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
387 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
388 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
389 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4
390 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6
391 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5
392 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
393 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
394 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7
395 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4
396 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
397 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v2
398 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v3
399 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
400 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
401 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
402 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
403 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
404 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
405 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
406 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
407 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
408 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
409 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
410 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
411 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
412 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
413 ; CGP-NEXT: s_setpc_b64 s[30:31]
414 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
415 %r = urem <2 x i32> %x, %shl.y
419 define i32 @v_urem_i32_24bit(i32 %num, i32 %den) {
420 ; GISEL-LABEL: v_urem_i32_24bit:
422 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
423 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
424 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
425 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
426 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
427 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
428 ; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
429 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
430 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
431 ; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
432 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
433 ; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
434 ; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
435 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
436 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
437 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
438 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
439 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
440 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
441 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
442 ; GISEL-NEXT: s_setpc_b64 s[30:31]
444 ; CGP-LABEL: v_urem_i32_24bit:
446 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
448 ; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v1
449 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1
450 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
451 ; CGP-NEXT: v_rcp_f32_e32 v2, v2
452 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
453 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
454 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
455 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
456 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
457 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
458 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v1
459 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
460 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
461 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
462 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
463 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
464 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
465 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
466 ; CGP-NEXT: s_setpc_b64 s[30:31]
467 %num.mask = and i32 %num, 16777215
468 %den.mask = and i32 %den, 16777215
469 %result = urem i32 %num.mask, %den.mask
473 define <2 x i32> @v_urem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
474 ; GISEL-LABEL: v_urem_v2i32_24bit:
476 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
478 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
479 ; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2
480 ; GISEL-NEXT: v_and_b32_e32 v3, 0xffffff, v3
481 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2
482 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
483 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
484 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
485 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
486 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
487 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
488 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
489 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
490 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
491 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4
492 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
493 ; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5
494 ; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
495 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
496 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
497 ; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4
498 ; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5
499 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2
500 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3
501 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
502 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
503 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
504 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
505 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
506 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
507 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
508 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
509 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
510 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
511 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
512 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
513 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
514 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
515 ; GISEL-NEXT: s_setpc_b64 s[30:31]
517 ; CGP-LABEL: v_urem_v2i32_24bit:
519 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
520 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
521 ; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v1
522 ; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v2
523 ; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v3
524 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
525 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
526 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
527 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
528 ; CGP-NEXT: v_rcp_f32_e32 v4, v4
529 ; CGP-NEXT: v_rcp_f32_e32 v6, v6
530 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
531 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
532 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
533 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
534 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4
535 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6
536 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5
537 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
538 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
539 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7
540 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4
541 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
542 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v2
543 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v3
544 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
545 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
546 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
547 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
548 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
549 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
550 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
551 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
552 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
553 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
554 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
555 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
556 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
557 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
558 ; CGP-NEXT: s_setpc_b64 s[30:31]
559 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
560 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
561 %result = urem <2 x i32> %num.mask, %den.mask
562 ret <2 x i32> %result